summaryrefslogtreecommitdiff
path: root/appl/acme/regx.b
diff options
context:
space:
mode:
Diffstat (limited to 'appl/acme/regx.b')
-rw-r--r--appl/acme/regx.b1050
1 files changed, 1050 insertions, 0 deletions
diff --git a/appl/acme/regx.b b/appl/acme/regx.b
new file mode 100644
index 00000000..2bb571c0
--- /dev/null
+++ b/appl/acme/regx.b
@@ -0,0 +1,1050 @@
+implement Regx;
+
+include "common.m";
+
+sys : Sys;
+utils : Utils;
+textm : Textm;
+
+FALSE, TRUE, XXX : import Dat;
+NRange : import Dat;
+Range, Rangeset : import Dat;
+error, warning, tgetc, rgetc : import utils;
+Text : import textm;
+
+init(mods : ref Dat->Mods)
+{
+ sys = mods.sys;
+ utils = mods.utils;
+ textm = mods.textm;
+}
+
+None : con 0;
+Fore : con '+';
+Back : con '-';
+
+Char : con 0;
+Line : con 1;
+
+isaddrc(r : int) : int
+{
+ if (utils->strchr("0123456789+-/$.#", r) >= 0)
+ return TRUE;
+ return FALSE;
+}
+
+#
+# quite hard: could be almost anything but white space, but we are a little conservative,
+# aiming for regular expressions of alphanumerics and no white space
+#
+isregexc(r : int) : int
+{
+ if(r == 0)
+ return FALSE;
+ if(utils->isalnum(r))
+ return TRUE;
+ if(utils->strchr("^+-.*?#,;[]()$", r)>=0)
+ return TRUE;
+ return FALSE;
+}
+
+number(md: ref Dat->Mntdir, t : ref Text, r : Range, line : int, dir : int, size : int) : (int, Range)
+{
+ q0, q1 : int;
+
+ {
+ if(size == Char){
+ if(dir == Fore)
+ line = r.q1+line; # was t.file.buf.nc+line;
+ else if(dir == Back){
+ if(r.q0==0 && line > 0)
+ r.q0 = t.file.buf.nc;
+ line = r.q0-line; # was t.file.buf.nc - line;
+ }
+ if(line<0 || line>t.file.buf.nc)
+ raise "e";
+ return (TRUE, (line, line));
+ }
+ (q0, q1) = r;
+ case(dir){
+ None =>
+ q0 = 0;
+ q1 = 0;
+ while(line>0 && q1<t.file.buf.nc)
+ if(t.readc(q1++) == '\n')
+ if(--line > 0)
+ q0 = q1;
+ if(line==1 && t.readc(q1-1)!='\n') # no newline at end - count it
+ ;
+ else if(line > 0)
+ raise "e";
+ Fore =>
+ if(q1 > 0)
+ while(t.readc(q1-1) != '\n')
+ q1++;
+ q0 = q1;
+ while(line>0 && q1<t.file.buf.nc)
+ if(t.readc(q1++) == '\n')
+ if(--line > 0)
+ q0 = q1;
+ if(line > 0)
+ raise "e";
+ Back =>
+ if(q0 < t.file.buf.nc)
+ while(q0>0 && t.readc(q0-1)!='\n')
+ q0--;
+ q1 = q0;
+ while(line>0 && q0>0){
+ if(t.readc(q0-1) == '\n'){
+ if(--line >= 0)
+ q1 = q0;
+ }
+ --q0;
+ }
+ if(line > 0)
+ raise "e";
+ while(q0>0 && t.readc(q0-1)!='\n')
+ --q0;
+ }
+ return (TRUE, (q0, q1));
+ }
+ exception{
+ * =>
+ if(md != nil)
+ warning(nil, "address out of range\n");
+ return (FALSE, r);
+ }
+ return (FALSE, r);
+}
+
+regexp(md: ref Dat->Mntdir, t : ref Text, lim : Range, r : Range, pat : string, dir : int) : (int, Range)
+{
+ found : int;
+ sel : Rangeset;
+ q : int;
+
+ if(pat == nil && rxnull()){
+ warning(md, "no previous regular expression");
+ return (FALSE, r);
+ }
+ if(pat == nil || !rxcompile(pat))
+ return (FALSE, r);
+ if(dir == Back)
+ (found, sel) = rxbexecute(t, r.q0);
+ else{
+ if(lim.q0 < 0)
+ q = Dat->Infinity;
+ else
+ q = lim.q1;
+ (found, sel) = rxexecute(t, nil, r.q1, q);
+ }
+ if(!found && md == nil)
+ warning(nil, "no match for regexp\n");
+ return (found, sel[0]);
+}
+
+xgetc(a0 : ref Text, a1 : string, n : int) : int
+{
+ if (a0 == nil)
+ return rgetc(a1, n);
+ return tgetc(a0, n);
+}
+
+address(md: ref Dat->Mntdir, t : ref Text, lim : Range, ar : Range, a0 : ref Text, a1 : string, q0 : int, q1 : int, eval : int) : (int, int, Range)
+{
+ dir, size : int;
+ prevc, c, n : int;
+ q : int;
+ pat : string;
+ r, nr : Range;
+
+ r = ar;
+ q = q0;
+ dir = None;
+ size = Line;
+ c = 0;
+ while(q < q1){
+ prevc = c;
+ c = xgetc(a0, a1, q++);
+ case(c){
+ ';' =>
+ ar = r;
+ if(prevc == 0) # lhs defaults to 0
+ r.q0 = 0;
+ if(q>=q1 && t!=nil && t.file!=nil) # rhs defaults to $
+ r.q1 = t.file.buf.nc;
+ else{
+ (eval, q, nr) = address(md, t, lim, ar, a0, a1, q, q1, eval);
+ r.q1 = nr.q1;
+ }
+ return (eval, q, r);
+ ',' =>
+ if(prevc == 0) # lhs defaults to 0
+ r.q0 = 0;
+ if(q>=q1 && t!=nil && t.file!=nil) # rhs defaults to $
+ r.q1 = t.file.buf.nc;
+ else{
+ (eval, q, nr) = address(md, t, lim, ar, a0, a1, q, q1, eval);
+ r.q1 = nr.q1;
+ }
+ return (eval, q, r);
+ '+' or '-' =>
+ if(eval && (prevc=='+' || prevc=='-')){
+ if((nc := xgetc(a0, a1, q)) != '#' && nc != '/' && nc != '?')
+ (eval, r) = number(md, t, r, 1, prevc, Line); # do previous one
+ }
+ dir = c;
+ '.' or '$' =>
+ if(q != q0+1)
+ return (eval, q-1, r);
+ if(eval)
+ if(c == '.')
+ r = ar;
+ else
+ r = (t.file.buf.nc, t.file.buf.nc);
+ if(q < q1)
+ dir = Fore;
+ else
+ dir = None;
+ '#' =>
+ if(q==q1 || (c=xgetc(a0, a1, q++))<'0' || '9'<c)
+ return (eval, q-1, r);
+ size = Char;
+ n = c -'0';
+ while(q<q1){
+ c = xgetc(a0, a1, q++);
+ if(c<'0' || '9'<c){
+ q--;
+ break;
+ }
+ n = n*10+(c-'0');
+ }
+ if(eval)
+ (eval, r) = number(md, t, r, n, dir, size);
+ dir = None;
+ size = Line;
+ '0' to '9' =>
+ n = c -'0';
+ while(q<q1){
+ c = xgetc(a0, a1, q++);
+ if(c<'0' || '9'<c){
+ q--;
+ break;
+ }
+ n = n*10+(c-'0');
+ }
+ if(eval)
+ (eval, r) = number(md, t, r, n, dir, size);
+ dir = None;
+ size = Line;
+ '/' =>
+ pat = nil;
+ break2 := 0; # Ow !
+ while(q<q1){
+ c = xgetc(a0, a1, q++);
+ case(c){
+ '\n' =>
+ --q;
+ break2 = 1;
+ '\\' =>
+ pat[len pat] = c;
+ if(q == q1)
+ break2 = 1;
+ else
+ c = xgetc(a0, a1, q++);
+ '/' =>
+ break2 = 1;
+ }
+ if (break2)
+ break;
+ pat[len pat] = c;
+ }
+ if(eval)
+ (eval, r) = regexp(md, t, lim, r, pat, dir);
+ pat = nil;
+ dir = None;
+ size = Line;
+ * =>
+ return (eval, q-1, r);
+ }
+ }
+ if(eval && dir != None)
+ (eval, r) = number(md, t, r, 1, dir, Line); # do previous one
+ return (eval, q, r);
+}
+
+sel : Rangeset = array[NRange] of Range;
+lastregexp : string;
+
+# Machine Information
+
+Inst : adt {
+ typex : int; # < 16r10000 ==> literal, otherwise action
+ # sid : int;
+ subid : int;
+ class : int;
+ # other : cyclic ref Inst;
+ right : cyclic ref Inst;
+ # left : cyclic ref Inst;
+ next : cyclic ref Inst;
+};
+
+NPROG : con 1024;
+program := array[NPROG] of ref Inst;
+progp : int;
+startinst : ref Inst; # First inst. of program; might not be program[0]
+bstartinst : ref Inst; # same for backwards machine
+
+Ilist : adt {
+ inst : ref Inst; # Instruction of the thread
+ se : Rangeset;
+ startp : int; # first char of match
+};
+
+NLIST : con 128;
+
+thl, nl : array of Ilist; # This list, next list
+listx := array[2] of array of Ilist;
+sempty : Rangeset = array[NRange] of Range;
+
+#
+# Actions and Tokens
+#
+# 0x100xx are operators, value == precedence
+# 0x200xx are tokens, i.e. operands for operators
+#
+
+OPERATOR : con 16r10000; # Bitmask of all operators
+START : con 16r10000; # Start, used for marker on stack
+RBRA : con 16r10001; # Right bracket, )
+LBRA : con 16r10002; # Left bracket, (
+OR : con 16r10003; # Alternation, |
+CAT : con 16r10004; # Concatentation, implicit operator
+STAR : con 16r10005; # Closure, *
+PLUS : con 16r10006; # a+ == aa*
+QUEST : con 16r10007; # a? == a|nothing, i.e. 0 or 1 a's
+ANY : con 16r20000; # Any character but newline, .
+NOP : con 16r20001; # No operation, internal use only
+BOL : con 16r20002; # Beginning of line, ^
+EOL : con 16r20003; # End of line, $
+CCLASS : con 16r20004; # Character class, []
+NCCLASS : con 16r20005; # Negated character class, [^]
+END : con 16r20077; # Terminate: match found
+
+ISATOR : con 16r10000;
+ISAND : con 16r20000;
+
+# Parser Information
+
+Node : adt {
+ first : ref Inst;
+ last : ref Inst;
+};
+
+NSTACK : con 20;
+andstack := array[NSTACK] of ref Node;
+andp : int;
+atorstack := array[NSTACK] of int;
+atorp : int;
+lastwasand : int; # Last token was operand
+cursubid : int;
+subidstack := array[NSTACK] of int;
+subidp : int;
+backwards : int;
+nbra : int;
+exprs : string;
+exprp : int; # pointer to next character in source expression
+DCLASS : con 10; # allocation increment
+nclass : int; # number active
+Nclass : int = 0; # high water mark
+class : array of string;
+negateclass : int;
+
+nilnode : Node;
+nilinst : Inst;
+
+rxinit()
+{
+ lastregexp = nil;
+ for (k := 0; k < NPROG; k++)
+ program[k] = ref nilinst;
+ for (k = 0; k < NSTACK; k++)
+ andstack[k] = ref nilnode;
+ for (k = 0; k < 2; k++) {
+ listx[k] = array[NLIST] of Ilist;
+ for (i := 0; i < NLIST; i++) {
+ listx[k][i].inst = nil;
+ listx[k][i].startp = 0;
+ listx[k][i].se = array[NRange] of Range;
+ for (j := 0; j < NRange; j++)
+ listx[k][i].se[j].q0 = listx[k][i].se[j].q1 = 0;
+ }
+ }
+}
+
+regerror(e : string)
+{
+ lastregexp = nil;
+ buf := sys->sprint("regexp: %s\n", e);
+ warning(nil, buf);
+ raise "regerror";
+}
+
+newinst(t : int) : ref Inst
+{
+ if(progp >= NPROG)
+ regerror("expression too long");
+ program[progp].typex = t;
+ program[progp].next = nil; # next was left
+ program[progp].right = nil;
+ return program[progp++];
+}
+
+realcompile(s : string) : ref Inst
+{
+ token : int;
+
+ {
+ startlex(s);
+ atorp = 0;
+ andp = 0;
+ subidp = 0;
+ cursubid = 0;
+ lastwasand = FALSE;
+ # Start with a low priority operator to prime parser
+ pushator(START-1);
+ while((token=lex()) != END){
+ if((token&ISATOR) == OPERATOR)
+ operator(token);
+ else
+ operand(token);
+ }
+ # Close with a low priority operator
+ evaluntil(START);
+ # Force END
+ operand(END);
+ evaluntil(START);
+ if(nbra)
+ regerror("unmatched `('");
+ --andp; # points to first and only operand
+ return andstack[andp].first;
+ }
+ exception{
+ "regerror" =>
+ return nil;
+ }
+ return nil;
+}
+
+rxcompile(r : string) : int
+{
+ oprogp : int;
+
+ if(lastregexp == r)
+ return TRUE;
+ lastregexp = nil;
+ for (i := 0; i < nclass; i++)
+ class[i] = nil;
+ nclass = 0;
+ progp = 0;
+ backwards = FALSE;
+ bstartinst = nil;
+ startinst = realcompile(r);
+ if(startinst == nil)
+ return FALSE;
+ optimize(0);
+ oprogp = progp;
+ backwards = TRUE;
+ bstartinst = realcompile(r);
+ if(bstartinst == nil)
+ return FALSE;
+ optimize(oprogp);
+ lastregexp = r;
+ return TRUE;
+}
+
+operand(t : int)
+{
+ i : ref Inst;
+
+ if(lastwasand)
+ operator(CAT); # catenate is implicit
+ i = newinst(t);
+ if(t == CCLASS){
+ if(negateclass)
+ i.typex = NCCLASS; # UGH
+ i.class = nclass-1; # UGH
+ }
+ pushand(i, i);
+ lastwasand = TRUE;
+}
+
+operator(t : int)
+{
+ if(t==RBRA && --nbra<0)
+ regerror("unmatched `)'");
+ if(t==LBRA){
+ cursubid++; # silently ignored
+ nbra++;
+ if(lastwasand)
+ operator(CAT);
+ }else
+ evaluntil(t);
+ if(t!=RBRA)
+ pushator(t);
+ lastwasand = FALSE;
+ if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
+ lastwasand = TRUE; # these look like operands
+}
+
+pushand(f : ref Inst, l : ref Inst)
+{
+ if(andp >= NSTACK)
+ error("operand stack overflow");
+ andstack[andp].first = f;
+ andstack[andp].last = l;
+ andp++;
+}
+
+pushator(t : int)
+{
+ if(atorp >= NSTACK)
+ error("operator stack overflow");
+ atorstack[atorp++]=t;
+ if(cursubid >= NRange)
+ subidstack[subidp++]= -1;
+ else
+ subidstack[subidp++]=cursubid;
+}
+
+popand(op : int) : ref Node
+{
+ if(andp <= 0)
+ if(op){
+ buf := sys->sprint("missing operand for %c", op);
+ regerror(buf);
+ }else
+ regerror("malformed regexp");
+ return andstack[--andp];
+}
+
+popator() : int
+{
+ if(atorp <= 0)
+ error("operator stack underflow");
+ --subidp;
+ return atorstack[--atorp];
+}
+
+evaluntil(pri : int)
+{
+ op1, op2 : ref Node;
+ inst1, inst2 : ref Inst;
+
+ while(pri==RBRA || atorstack[atorp-1]>=pri){
+ case(popator()){
+ LBRA =>
+ op1 = popand('(');
+ inst2 = newinst(RBRA);
+ inst2.subid = subidstack[subidp];
+ op1.last.next = inst2;
+ inst1 = newinst(LBRA);
+ inst1.subid = subidstack[subidp];
+ inst1.next = op1.first;
+ pushand(inst1, inst2);
+ return; # must have been RBRA
+ OR =>
+ op2 = popand('|');
+ op1 = popand('|');
+ inst2 = newinst(NOP);
+ op2.last.next = inst2;
+ op1.last.next = inst2;
+ inst1 = newinst(OR);
+ inst1.right = op1.first;
+ inst1.next = op2.first; # next was left
+ pushand(inst1, inst2);
+ CAT =>
+ op2 = popand(0);
+ op1 = popand(0);
+ if(backwards && op2.first.typex!=END)
+ (op1, op2) = (op2, op1);
+ op1.last.next = op2.first;
+ pushand(op1.first, op2.last);
+ STAR =>
+ op2 = popand('*');
+ inst1 = newinst(OR);
+ op2.last.next = inst1;
+ inst1.right = op2.first;
+ pushand(inst1, inst1);
+ PLUS =>
+ op2 = popand('+');
+ inst1 = newinst(OR);
+ op2.last.next = inst1;
+ inst1.right = op2.first;
+ pushand(op2.first, inst1);
+ QUEST =>
+ op2 = popand('?');
+ inst1 = newinst(OR);
+ inst2 = newinst(NOP);
+ inst1.next = inst2; # next was left
+ inst1.right = op2.first;
+ op2.last.next = inst2;
+ pushand(inst1, inst2);
+ * =>
+ error("unknown regexp operator");
+ }
+ }
+}
+
+optimize(start : int)
+{
+ inst : int;
+ target : ref Inst;
+
+ for(inst=start; program[inst].typex!=END; inst++){
+ target = program[inst].next;
+ while(target.typex == NOP)
+ target = target.next;
+ program[inst].next = target;
+ }
+}
+
+startlex(s : string)
+{
+ exprs = s;
+ exprp = 0;
+ nbra = 0;
+}
+
+lex() : int
+{
+ c : int;
+
+ if (exprp == len exprs)
+ return END;
+ c = exprs[exprp++];
+ case(c){
+ '\\' =>
+ if(exprp < len exprs)
+ if((c= exprs[exprp++])=='n')
+ c='\n';
+ '*' =>
+ c = STAR;
+ '?' =>
+ c = QUEST;
+ '+' =>
+ c = PLUS;
+ '|' =>
+ c = OR;
+ '.' =>
+ c = ANY;
+ '(' =>
+ c = LBRA;
+ ')' =>
+ c = RBRA;
+ '^' =>
+ c = BOL;
+ '$' =>
+ c = EOL;
+ '[' =>
+ c = CCLASS;
+ bldcclass();
+ }
+ return c;
+}
+
+nextrec() : int
+{
+ if(exprp == len exprs || (exprp == len exprs-1 && exprs[exprp]=='\\'))
+ regerror("malformed `[]'");
+ if(exprs[exprp] == '\\'){
+ exprp++;
+ if(exprs[exprp]=='n'){
+ exprp++;
+ return '\n';
+ }
+ return exprs[exprp++] | 16r10000;
+ }
+ return exprs[exprp++];
+}
+
+bldcclass()
+{
+ c1, c2 : int;
+ classp : string;
+
+ # we have already seen the '['
+ if(exprp < len exprs && exprs[exprp] == '^'){
+ classp[len classp] = '\n'; # don't match newline in negate case
+ negateclass = TRUE;
+ exprp++;
+ }else
+ negateclass = FALSE;
+ while((c1 = nextrec()) != ']'){
+ if(c1 == '-'){
+ classp = nil;
+ regerror("malformed `[]'");
+ }
+ if(exprp < len exprs && exprs[exprp] == '-'){
+ exprp++; # eat '-'
+ if((c2 = nextrec()) == ']') {
+ classp = nil;
+ regerror("malformed '[]'");
+ }
+ classp[len classp] = 16rFFFF;
+ classp[len classp] = c1;
+ classp[len classp] = c2;
+ }else
+ classp[len classp] = c1;
+ }
+ if(nclass == Nclass){
+ Nclass += DCLASS;
+ oc := class;
+ class = array[Nclass] of string;
+ if (oc != nil) {
+ class[0:] = oc[0:Nclass-DCLASS];
+ oc = nil;
+ }
+ }
+ class[nclass++] = classp;
+}
+
+classmatch(classno : int, c : int, negate : int) : int
+{
+ p : string;
+
+ p = class[classno];
+ for (i := 0; i < len p; ) {
+ if(p[i] == 16rFFFF){
+ if(p[i+1]<=c && c<=p[i+2])
+ return !negate;
+ i += 3;
+ }else if(p[i++] == c)
+ return !negate;
+ }
+ return negate;
+}
+
+#
+# Note optimization in addinst:
+# *l must be pending when addinst called; if *l has been looked
+# at already, the optimization is a bug.
+#
+addinst(l : array of Ilist, inst : ref Inst, sep : Rangeset)
+{
+ p : int;
+
+ for(p = 0; l[p].inst != nil; p++){
+ if(l[p].inst==inst){
+ if(sep[0].q0 < l[p].se[0].q0)
+ l[p].se[0:] = sep[0:NRange]; # this would be bug
+ return; # It's already there
+ }
+ }
+ l[p].inst = inst;
+ l[p].se[0:]= sep[0:NRange];
+ l[p+1].inst = nil;
+}
+
+rxnull() : int
+{
+ return startinst==nil || bstartinst==nil;
+}
+
+OVERFLOW : con "overflow";
+
+# either t!=nil or r!=nil, and we match the string in the appropriate place
+rxexecute(t : ref Text, r: string, startp : int, eof : int) : (int, Rangeset)
+{
+ flag : int;
+ inst : ref Inst;
+ tlp : int;
+ p : int;
+ nnl, ntl : int;
+ nc, c : int;
+ wrapped : int;
+ startchar : int;
+
+ flag = 0;
+ p = startp;
+ startchar = 0;
+ wrapped = 0;
+ nnl = 0;
+ if(startinst.typex<OPERATOR)
+ startchar = startinst.typex;
+ listx[0][0].inst = listx[1][0].inst = nil;
+ sel[0].q0 = -1;
+
+ {
+ if(t != nil)
+ nc = t.file.buf.nc;
+ else
+ nc = len r;
+ # Execute machine once for each character
+ for(;;p++){
+ if(p>=eof || p>=nc){
+ case(wrapped++){
+ 0 or 2 => # let loop run one more click
+ ;
+ 1 => # expired; wrap to beginning
+ if(sel[0].q0>=0 || eof!=Dat->Infinity)
+ return (sel[0].q0>=0, sel);
+ listx[0][0].inst = listx[1][0].inst = nil;
+ p = -1;
+ continue;
+ * =>
+ return (sel[0].q0>=0, sel);
+ }
+ c = 0;
+ }else{
+ if(((wrapped && p>=startp) || sel[0].q0>0) && nnl==0)
+ break;
+ if(t != nil)
+ c = t.readc(p);
+ else
+ c = r[p];
+ }
+ # fast check for first char
+ if(startchar && nnl==0 && c!=startchar)
+ continue;
+ thl = listx[flag];
+ nl = listx[flag^=1];
+ nl[0].inst = nil;
+ ntl = nnl;
+ nnl = 0;
+ if(sel[0].q0<0 && (!wrapped || p<startp || startp==eof)){
+ # Add first instruction to this list
+ if(++ntl >= NLIST)
+ raise OVERFLOW;
+ sempty[0].q0 = p;
+ addinst(thl, startinst, sempty);
+ }
+ # Execute machine until this list is empty
+ tlp = 0;
+ inst = thl[0].inst;
+ while(inst != nil){ # assignment =
+ case(inst.typex){
+ LBRA =>
+ if(inst.subid>=0)
+ thl[tlp].se[inst.subid].q0 = p;
+ inst = inst.next;
+ continue;
+ RBRA =>
+ if(inst.subid>=0)
+ thl[tlp].se[inst.subid].q1 = p;
+ inst = inst.next;
+ continue;
+ ANY =>
+ if(c!='\n') {
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ BOL =>
+ if(p==0 || (t != nil && t.readc(p-1)=='\n') || (r != nil && r[p-1] == '\n')){
+ inst = inst.next;
+ continue;
+ }
+ EOL =>
+ if(c == '\n') {
+ inst = inst.next;
+ continue;
+ }
+ CCLASS =>
+ if(c>=0 && classmatch(inst.class, c, 0)) {
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ NCCLASS =>
+ if(c>=0 && classmatch(inst.class, c, 1)) {
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ OR =>
+ # evaluate right choice later
+ if(++ntl >= NLIST)
+ raise OVERFLOW;
+ addinst(thl[tlp:], inst.right, thl[tlp].se);
+ # efficiency: advance and re-evaluate
+ inst = inst.next; # next was left
+ continue;
+ END => # Match!
+ thl[tlp].se[0].q1 = p;
+ newmatch(thl[tlp].se);
+ * => # regular character
+ if(inst.typex==c){
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ }
+ tlp++;
+ inst = thl[tlp].inst;
+ }
+ }
+ return (sel[0].q0>=0, sel);
+ }
+ exception{
+ OVERFLOW =>
+ error("regexp list overflow");
+ sel[0].q0 = -1;
+ return (0, sel);
+ }
+ return (0, sel);
+}
+
+newmatch(sp : Rangeset)
+{
+ if(sel[0].q0<0 || sp[0].q0<sel[0].q0 ||
+ (sp[0].q0==sel[0].q0 && sp[0].q1>sel[0].q1))
+ sel[0:] = sp[0:NRange];
+}
+
+rxbexecute(t : ref Text, startp : int) : (int, Rangeset)
+{
+ flag : int;
+ inst : ref Inst;
+ tlp : int;
+ p : int;
+ nnl, ntl : int;
+ c : int;
+ wrapped : int;
+ startchar : int;
+
+ flag = 0;
+ nnl = 0;
+ wrapped = 0;
+ p = startp;
+ startchar = 0;
+ if(bstartinst.typex<OPERATOR)
+ startchar = bstartinst.typex;
+ listx[0][0].inst = listx[1][0].inst = nil;
+ sel[0].q0= -1;
+
+ {
+ # Execute machine once for each character, including terminal NUL
+ for(;;--p){
+ if(p <= 0){
+ case(wrapped++){
+ 0 or 2 => # let loop run one more click
+ ;
+ 1 => # expired; wrap to end
+ if(sel[0].q0>=0)
+ return (sel[0].q0>=0, sel);
+ listx[0][0].inst = listx[1][0].inst = nil;
+ p = t.file.buf.nc+1;
+ continue;
+ 3 or * =>
+ return (sel[0].q0>=0, sel);
+ }
+ c = 0;
+ }else{
+ if(((wrapped && p<=startp) || sel[0].q0>0) && nnl==0)
+ break;
+ c = t.readc(p-1);
+ }
+ # fast check for first char
+ if(startchar && nnl==0 && c!=startchar)
+ continue;
+ thl = listx[flag];
+ nl = listx[flag^=1];
+ nl[0].inst = nil;
+ ntl = nnl;
+ nnl = 0;
+ if(sel[0].q0<0 && (!wrapped || p>startp)){
+ # Add first instruction to this list
+ if(++ntl >= NLIST)
+ raise OVERFLOW;
+ # the minus is so the optimizations in addinst work
+ sempty[0].q0 = -p;
+ addinst(thl, bstartinst, sempty);
+ }
+ # Execute machine until this list is empty
+ tlp = 0;
+ inst = thl[0].inst;
+ while(inst != nil){ # assignment =
+ case(inst.typex){
+ LBRA =>
+ if(inst.subid>=0)
+ thl[tlp].se[inst.subid].q0 = p;
+ inst = inst.next;
+ continue;
+ RBRA =>
+ if(inst.subid >= 0)
+ thl[tlp].se[inst.subid].q1 = p;
+ inst = inst.next;
+ continue;
+ ANY =>
+ if(c != '\n') {
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ BOL =>
+ if(c=='\n' || p==0){
+ inst = inst.next;
+ continue;
+ }
+ EOL =>
+ if(p<t.file.buf.nc && t.readc(p)=='\n') {
+ inst = inst.next;
+ continue;
+ }
+ CCLASS =>
+ if(c>0 && classmatch(inst.class, c, 0)) {
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ NCCLASS =>
+ if(c>0 && classmatch(inst.class, c, 1)) {
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ OR =>
+ # evaluate right choice later
+ if(++ntl >= NLIST)
+ raise OVERFLOW;
+ addinst(thl[tlp:], inst.right, thl[tlp].se);
+ # efficiency: advance and re-evaluate
+ inst = inst.next; # next was left
+ continue;
+ END => # Match!
+ thl[tlp].se[0].q0 = -thl[tlp].se[0].q0; # minus sign
+ thl[tlp].se[0].q1 = p;
+ bnewmatch(thl[tlp].se);
+ * => # regular character
+ if(inst.typex == c){
+ if(++nnl >= NLIST)
+ raise OVERFLOW;
+ addinst(nl, inst.next, thl[tlp].se);
+ }
+ }
+ tlp++;
+ inst = thl[tlp].inst;
+ }
+ }
+ return (sel[0].q0>=0, sel);
+ }
+ exception{
+ OVERFLOW =>
+ error("regexp list overflow");
+ sel[0].q0 = -1;
+ return (0, sel);
+ }
+ return (0, sel);
+}
+
+bnewmatch(sp : Rangeset)
+{
+ i : int;
+
+ if(sel[0].q0<0 || sp[0].q0>sel[0].q1 || (sp[0].q0==sel[0].q1 && sp[0].q1<sel[0].q0))
+ for(i = 0; i<NRange; i++){ # note the reversal; q0<=q1
+ sel[i].q0 = sp[i].q1;
+ sel[i].q1 = sp[i].q0;
+ }
+}