summaryrefslogtreecommitdiff
path: root/module/regex.m
blob: f285a52417bee81775078b5c3b9b397a195baf4c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
Regex: module {

	PATH:	con "/dis/lib/regex.dis";

# normally imported identifiers

	Re: type ref Arena;
	compile:	fn(nil:string,nil:int): (Re, string);
	execute:	fn(nil:Re, nil:string): array of (int, int);
	executese:	fn(nil:Re, nil:string, se: (int, int), bol: int, eol: int): array of (int, int);

# internal identifiers, not normally imported

	ALT, CAT, DOT, SET, HAT, DOL, NUL, PCLO, CLO, OPT, LPN, RPN : con (1<<16)+iota;

	refRex : type int;	# used instead of ref Rex to avoid circularity

	Set: adt {				# character class
		neg: int;			# 0 or 1
		ascii : array of int;		# ascii members, bit array
		unicode : list of (int,int);	# non-ascii char ranges
	};

	Rex: adt {		# node in parse of regex, or state of fsm
		kind : int;	# kind of node: char or ALT, CAT, etc
		left : refRex;	# left descendant
		right : refRex;	# right descendant, or next state
		set : ref Set;	# character class
		pno : int;
	};

	Arena: adt {		# free store from which nodes are allocated
		rex : array of Rex;		
		ptr : refRex;	# next available space
		start : refRex;	# root of parse, or start of fsm
		pno : int;
	};
};