summaryrefslogtreecommitdiff
path: root/module/regex.m
diff options
context:
space:
mode:
authorCharles.Forsyth <devnull@localhost>2006-12-22 20:52:35 +0000
committerCharles.Forsyth <devnull@localhost>2006-12-22 20:52:35 +0000
commit46439007cf417cbd9ac8049bb4122c890097a0fa (patch)
tree6fdb25e5f3a2b6d5657eb23b35774b631d4d97e4 /module/regex.m
parent37da2899f40661e3e9631e497da8dc59b971cbd0 (diff)
20060303-partial
Diffstat (limited to 'module/regex.m')
-rw-r--r--module/regex.m38
1 files changed, 38 insertions, 0 deletions
diff --git a/module/regex.m b/module/regex.m
new file mode 100644
index 00000000..f285a524
--- /dev/null
+++ b/module/regex.m
@@ -0,0 +1,38 @@
+Regex: module {
+
+ PATH: con "/dis/lib/regex.dis";
+
+# normally imported identifiers
+
+ Re: type ref Arena;
+ compile: fn(nil:string,nil:int): (Re, string);
+ execute: fn(nil:Re, nil:string): array of (int, int);
+ executese: fn(nil:Re, nil:string, se: (int, int), bol: int, eol: int): array of (int, int);
+
+# internal identifiers, not normally imported
+
+ ALT, CAT, DOT, SET, HAT, DOL, NUL, PCLO, CLO, OPT, LPN, RPN : con (1<<16)+iota;
+
+ refRex : type int; # used instead of ref Rex to avoid circularity
+
+ Set: adt { # character class
+ neg: int; # 0 or 1
+ ascii : array of int; # ascii members, bit array
+ unicode : list of (int,int); # non-ascii char ranges
+ };
+
+ Rex: adt { # node in parse of regex, or state of fsm
+ kind : int; # kind of node: char or ALT, CAT, etc
+ left : refRex; # left descendant
+ right : refRex; # right descendant, or next state
+ set : ref Set; # character class
+ pno : int;
+ };
+
+ Arena: adt { # free store from which nodes are allocated
+ rex : array of Rex;
+ ptr : refRex; # next available space
+ start : refRex; # root of parse, or start of fsm
+ pno : int;
+ };
+};