From 46439007cf417cbd9ac8049bb4122c890097a0fa Mon Sep 17 00:00:00 2001 From: "Charles.Forsyth" Date: Fri, 22 Dec 2006 20:52:35 +0000 Subject: 20060303-partial --- module/regex.m | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 module/regex.m (limited to 'module/regex.m') diff --git a/module/regex.m b/module/regex.m new file mode 100644 index 00000000..f285a524 --- /dev/null +++ b/module/regex.m @@ -0,0 +1,38 @@ +Regex: module { + + PATH: con "/dis/lib/regex.dis"; + +# normally imported identifiers + + Re: type ref Arena; + compile: fn(nil:string,nil:int): (Re, string); + execute: fn(nil:Re, nil:string): array of (int, int); + executese: fn(nil:Re, nil:string, se: (int, int), bol: int, eol: int): array of (int, int); + +# internal identifiers, not normally imported + + ALT, CAT, DOT, SET, HAT, DOL, NUL, PCLO, CLO, OPT, LPN, RPN : con (1<<16)+iota; + + refRex : type int; # used instead of ref Rex to avoid circularity + + Set: adt { # character class + neg: int; # 0 or 1 + ascii : array of int; # ascii members, bit array + unicode : list of (int,int); # non-ascii char ranges + }; + + Rex: adt { # node in parse of regex, or state of fsm + kind : int; # kind of node: char or ALT, CAT, etc + left : refRex; # left descendant + right : refRex; # right descendant, or next state + set : ref Set; # character class + pno : int; + }; + + Arena: adt { # free store from which nodes are allocated + rex : array of Rex; + ptr : refRex; # next available space + start : refRex; # root of parse, or start of fsm + pno : int; + }; +}; -- cgit v1.2.3