diff options
Diffstat (limited to 'appl/acme/regx.b')
| -rw-r--r-- | appl/acme/regx.b | 1050 |
1 files changed, 1050 insertions, 0 deletions
diff --git a/appl/acme/regx.b b/appl/acme/regx.b new file mode 100644 index 00000000..2bb571c0 --- /dev/null +++ b/appl/acme/regx.b @@ -0,0 +1,1050 @@ +implement Regx; + +include "common.m"; + +sys : Sys; +utils : Utils; +textm : Textm; + +FALSE, TRUE, XXX : import Dat; +NRange : import Dat; +Range, Rangeset : import Dat; +error, warning, tgetc, rgetc : import utils; +Text : import textm; + +init(mods : ref Dat->Mods) +{ + sys = mods.sys; + utils = mods.utils; + textm = mods.textm; +} + +None : con 0; +Fore : con '+'; +Back : con '-'; + +Char : con 0; +Line : con 1; + +isaddrc(r : int) : int +{ + if (utils->strchr("0123456789+-/$.#", r) >= 0) + return TRUE; + return FALSE; +} + +# +# quite hard: could be almost anything but white space, but we are a little conservative, +# aiming for regular expressions of alphanumerics and no white space +# +isregexc(r : int) : int +{ + if(r == 0) + return FALSE; + if(utils->isalnum(r)) + return TRUE; + if(utils->strchr("^+-.*?#,;[]()$", r)>=0) + return TRUE; + return FALSE; +} + +number(md: ref Dat->Mntdir, t : ref Text, r : Range, line : int, dir : int, size : int) : (int, Range) +{ + q0, q1 : int; + + { + if(size == Char){ + if(dir == Fore) + line = r.q1+line; # was t.file.buf.nc+line; + else if(dir == Back){ + if(r.q0==0 && line > 0) + r.q0 = t.file.buf.nc; + line = r.q0-line; # was t.file.buf.nc - line; + } + if(line<0 || line>t.file.buf.nc) + raise "e"; + return (TRUE, (line, line)); + } + (q0, q1) = r; + case(dir){ + None => + q0 = 0; + q1 = 0; + while(line>0 && q1<t.file.buf.nc) + if(t.readc(q1++) == '\n') + if(--line > 0) + q0 = q1; + if(line==1 && t.readc(q1-1)!='\n') # no newline at end - count it + ; + else if(line > 0) + raise "e"; + Fore => + if(q1 > 0) + while(t.readc(q1-1) != '\n') + q1++; + q0 = q1; + while(line>0 && q1<t.file.buf.nc) + if(t.readc(q1++) == '\n') + if(--line > 0) + q0 = q1; + if(line > 0) + raise "e"; + Back => + if(q0 < t.file.buf.nc) + while(q0>0 && t.readc(q0-1)!='\n') + q0--; + q1 = q0; + while(line>0 && q0>0){ + if(t.readc(q0-1) == '\n'){ + if(--line >= 0) + q1 = q0; + } + --q0; + } + if(line > 0) + raise "e"; + while(q0>0 && t.readc(q0-1)!='\n') + --q0; + } + return (TRUE, (q0, q1)); + } + exception{ + * => + if(md != nil) + warning(nil, "address out of range\n"); + return (FALSE, r); + } + return (FALSE, r); +} + +regexp(md: ref Dat->Mntdir, t : ref Text, lim : Range, r : Range, pat : string, dir : int) : (int, Range) +{ + found : int; + sel : Rangeset; + q : int; + + if(pat == nil && rxnull()){ + warning(md, "no previous regular expression"); + return (FALSE, r); + } + if(pat == nil || !rxcompile(pat)) + return (FALSE, r); + if(dir == Back) + (found, sel) = rxbexecute(t, r.q0); + else{ + if(lim.q0 < 0) + q = Dat->Infinity; + else + q = lim.q1; + (found, sel) = rxexecute(t, nil, r.q1, q); + } + if(!found && md == nil) + warning(nil, "no match for regexp\n"); + return (found, sel[0]); +} + +xgetc(a0 : ref Text, a1 : string, n : int) : int +{ + if (a0 == nil) + return rgetc(a1, n); + return tgetc(a0, n); +} + +address(md: ref Dat->Mntdir, t : ref Text, lim : Range, ar : Range, a0 : ref Text, a1 : string, q0 : int, q1 : int, eval : int) : (int, int, Range) +{ + dir, size : int; + prevc, c, n : int; + q : int; + pat : string; + r, nr : Range; + + r = ar; + q = q0; + dir = None; + size = Line; + c = 0; + while(q < q1){ + prevc = c; + c = xgetc(a0, a1, q++); + case(c){ + ';' => + ar = r; + if(prevc == 0) # lhs defaults to 0 + r.q0 = 0; + if(q>=q1 && t!=nil && t.file!=nil) # rhs defaults to $ + r.q1 = t.file.buf.nc; + else{ + (eval, q, nr) = address(md, t, lim, ar, a0, a1, q, q1, eval); + r.q1 = nr.q1; + } + return (eval, q, r); + ',' => + if(prevc == 0) # lhs defaults to 0 + r.q0 = 0; + if(q>=q1 && t!=nil && t.file!=nil) # rhs defaults to $ + r.q1 = t.file.buf.nc; + else{ + (eval, q, nr) = address(md, t, lim, ar, a0, a1, q, q1, eval); + r.q1 = nr.q1; + } + return (eval, q, r); + '+' or '-' => + if(eval && (prevc=='+' || prevc=='-')){ + if((nc := xgetc(a0, a1, q)) != '#' && nc != '/' && nc != '?') + (eval, r) = number(md, t, r, 1, prevc, Line); # do previous one + } + dir = c; + '.' or '$' => + if(q != q0+1) + return (eval, q-1, r); + if(eval) + if(c == '.') + r = ar; + else + r = (t.file.buf.nc, t.file.buf.nc); + if(q < q1) + dir = Fore; + else + dir = None; + '#' => + if(q==q1 || (c=xgetc(a0, a1, q++))<'0' || '9'<c) + return (eval, q-1, r); + size = Char; + n = c -'0'; + while(q<q1){ + c = xgetc(a0, a1, q++); + if(c<'0' || '9'<c){ + q--; + break; + } + n = n*10+(c-'0'); + } + if(eval) + (eval, r) = number(md, t, r, n, dir, size); + dir = None; + size = Line; + '0' to '9' => + n = c -'0'; + while(q<q1){ + c = xgetc(a0, a1, q++); + if(c<'0' || '9'<c){ + q--; + break; + } + n = n*10+(c-'0'); + } + if(eval) + (eval, r) = number(md, t, r, n, dir, size); + dir = None; + size = Line; + '/' => + pat = nil; + break2 := 0; # Ow ! + while(q<q1){ + c = xgetc(a0, a1, q++); + case(c){ + '\n' => + --q; + break2 = 1; + '\\' => + pat[len pat] = c; + if(q == q1) + break2 = 1; + else + c = xgetc(a0, a1, q++); + '/' => + break2 = 1; + } + if (break2) + break; + pat[len pat] = c; + } + if(eval) + (eval, r) = regexp(md, t, lim, r, pat, dir); + pat = nil; + dir = None; + size = Line; + * => + return (eval, q-1, r); + } + } + if(eval && dir != None) + (eval, r) = number(md, t, r, 1, dir, Line); # do previous one + return (eval, q, r); +} + +sel : Rangeset = array[NRange] of Range; +lastregexp : string; + +# Machine Information + +Inst : adt { + typex : int; # < 16r10000 ==> literal, otherwise action + # sid : int; + subid : int; + class : int; + # other : cyclic ref Inst; + right : cyclic ref Inst; + # left : cyclic ref Inst; + next : cyclic ref Inst; +}; + +NPROG : con 1024; +program := array[NPROG] of ref Inst; +progp : int; +startinst : ref Inst; # First inst. of program; might not be program[0] +bstartinst : ref Inst; # same for backwards machine + +Ilist : adt { + inst : ref Inst; # Instruction of the thread + se : Rangeset; + startp : int; # first char of match +}; + +NLIST : con 128; + +thl, nl : array of Ilist; # This list, next list +listx := array[2] of array of Ilist; +sempty : Rangeset = array[NRange] of Range; + +# +# Actions and Tokens +# +# 0x100xx are operators, value == precedence +# 0x200xx are tokens, i.e. operands for operators +# + +OPERATOR : con 16r10000; # Bitmask of all operators +START : con 16r10000; # Start, used for marker on stack +RBRA : con 16r10001; # Right bracket, ) +LBRA : con 16r10002; # Left bracket, ( +OR : con 16r10003; # Alternation, | +CAT : con 16r10004; # Concatentation, implicit operator +STAR : con 16r10005; # Closure, * +PLUS : con 16r10006; # a+ == aa* +QUEST : con 16r10007; # a? == a|nothing, i.e. 0 or 1 a's +ANY : con 16r20000; # Any character but newline, . +NOP : con 16r20001; # No operation, internal use only +BOL : con 16r20002; # Beginning of line, ^ +EOL : con 16r20003; # End of line, $ +CCLASS : con 16r20004; # Character class, [] +NCCLASS : con 16r20005; # Negated character class, [^] +END : con 16r20077; # Terminate: match found + +ISATOR : con 16r10000; +ISAND : con 16r20000; + +# Parser Information + +Node : adt { + first : ref Inst; + last : ref Inst; +}; + +NSTACK : con 20; +andstack := array[NSTACK] of ref Node; +andp : int; +atorstack := array[NSTACK] of int; +atorp : int; +lastwasand : int; # Last token was operand +cursubid : int; +subidstack := array[NSTACK] of int; +subidp : int; +backwards : int; +nbra : int; +exprs : string; +exprp : int; # pointer to next character in source expression +DCLASS : con 10; # allocation increment +nclass : int; # number active +Nclass : int = 0; # high water mark +class : array of string; +negateclass : int; + +nilnode : Node; +nilinst : Inst; + +rxinit() +{ + lastregexp = nil; + for (k := 0; k < NPROG; k++) + program[k] = ref nilinst; + for (k = 0; k < NSTACK; k++) + andstack[k] = ref nilnode; + for (k = 0; k < 2; k++) { + listx[k] = array[NLIST] of Ilist; + for (i := 0; i < NLIST; i++) { + listx[k][i].inst = nil; + listx[k][i].startp = 0; + listx[k][i].se = array[NRange] of Range; + for (j := 0; j < NRange; j++) + listx[k][i].se[j].q0 = listx[k][i].se[j].q1 = 0; + } + } +} + +regerror(e : string) +{ + lastregexp = nil; + buf := sys->sprint("regexp: %s\n", e); + warning(nil, buf); + raise "regerror"; +} + +newinst(t : int) : ref Inst +{ + if(progp >= NPROG) + regerror("expression too long"); + program[progp].typex = t; + program[progp].next = nil; # next was left + program[progp].right = nil; + return program[progp++]; +} + +realcompile(s : string) : ref Inst +{ + token : int; + + { + startlex(s); + atorp = 0; + andp = 0; + subidp = 0; + cursubid = 0; + lastwasand = FALSE; + # Start with a low priority operator to prime parser + pushator(START-1); + while((token=lex()) != END){ + if((token&ISATOR) == OPERATOR) + operator(token); + else + operand(token); + } + # Close with a low priority operator + evaluntil(START); + # Force END + operand(END); + evaluntil(START); + if(nbra) + regerror("unmatched `('"); + --andp; # points to first and only operand + return andstack[andp].first; + } + exception{ + "regerror" => + return nil; + } + return nil; +} + +rxcompile(r : string) : int +{ + oprogp : int; + + if(lastregexp == r) + return TRUE; + lastregexp = nil; + for (i := 0; i < nclass; i++) + class[i] = nil; + nclass = 0; + progp = 0; + backwards = FALSE; + bstartinst = nil; + startinst = realcompile(r); + if(startinst == nil) + return FALSE; + optimize(0); + oprogp = progp; + backwards = TRUE; + bstartinst = realcompile(r); + if(bstartinst == nil) + return FALSE; + optimize(oprogp); + lastregexp = r; + return TRUE; +} + +operand(t : int) +{ + i : ref Inst; + + if(lastwasand) + operator(CAT); # catenate is implicit + i = newinst(t); + if(t == CCLASS){ + if(negateclass) + i.typex = NCCLASS; # UGH + i.class = nclass-1; # UGH + } + pushand(i, i); + lastwasand = TRUE; +} + +operator(t : int) +{ + if(t==RBRA && --nbra<0) + regerror("unmatched `)'"); + if(t==LBRA){ + cursubid++; # silently ignored + nbra++; + if(lastwasand) + operator(CAT); + }else + evaluntil(t); + if(t!=RBRA) + pushator(t); + lastwasand = FALSE; + if(t==STAR || t==QUEST || t==PLUS || t==RBRA) + lastwasand = TRUE; # these look like operands +} + +pushand(f : ref Inst, l : ref Inst) +{ + if(andp >= NSTACK) + error("operand stack overflow"); + andstack[andp].first = f; + andstack[andp].last = l; + andp++; +} + +pushator(t : int) +{ + if(atorp >= NSTACK) + error("operator stack overflow"); + atorstack[atorp++]=t; + if(cursubid >= NRange) + subidstack[subidp++]= -1; + else + subidstack[subidp++]=cursubid; +} + +popand(op : int) : ref Node +{ + if(andp <= 0) + if(op){ + buf := sys->sprint("missing operand for %c", op); + regerror(buf); + }else + regerror("malformed regexp"); + return andstack[--andp]; +} + +popator() : int +{ + if(atorp <= 0) + error("operator stack underflow"); + --subidp; + return atorstack[--atorp]; +} + +evaluntil(pri : int) +{ + op1, op2 : ref Node; + inst1, inst2 : ref Inst; + + while(pri==RBRA || atorstack[atorp-1]>=pri){ + case(popator()){ + LBRA => + op1 = popand('('); + inst2 = newinst(RBRA); + inst2.subid = subidstack[subidp]; + op1.last.next = inst2; + inst1 = newinst(LBRA); + inst1.subid = subidstack[subidp]; + inst1.next = op1.first; + pushand(inst1, inst2); + return; # must have been RBRA + OR => + op2 = popand('|'); + op1 = popand('|'); + inst2 = newinst(NOP); + op2.last.next = inst2; + op1.last.next = inst2; + inst1 = newinst(OR); + inst1.right = op1.first; + inst1.next = op2.first; # next was left + pushand(inst1, inst2); + CAT => + op2 = popand(0); + op1 = popand(0); + if(backwards && op2.first.typex!=END) + (op1, op2) = (op2, op1); + op1.last.next = op2.first; + pushand(op1.first, op2.last); + STAR => + op2 = popand('*'); + inst1 = newinst(OR); + op2.last.next = inst1; + inst1.right = op2.first; + pushand(inst1, inst1); + PLUS => + op2 = popand('+'); + inst1 = newinst(OR); + op2.last.next = inst1; + inst1.right = op2.first; + pushand(op2.first, inst1); + QUEST => + op2 = popand('?'); + inst1 = newinst(OR); + inst2 = newinst(NOP); + inst1.next = inst2; # next was left + inst1.right = op2.first; + op2.last.next = inst2; + pushand(inst1, inst2); + * => + error("unknown regexp operator"); + } + } +} + +optimize(start : int) +{ + inst : int; + target : ref Inst; + + for(inst=start; program[inst].typex!=END; inst++){ + target = program[inst].next; + while(target.typex == NOP) + target = target.next; + program[inst].next = target; + } +} + +startlex(s : string) +{ + exprs = s; + exprp = 0; + nbra = 0; +} + +lex() : int +{ + c : int; + + if (exprp == len exprs) + return END; + c = exprs[exprp++]; + case(c){ + '\\' => + if(exprp < len exprs) + if((c= exprs[exprp++])=='n') + c='\n'; + '*' => + c = STAR; + '?' => + c = QUEST; + '+' => + c = PLUS; + '|' => + c = OR; + '.' => + c = ANY; + '(' => + c = LBRA; + ')' => + c = RBRA; + '^' => + c = BOL; + '$' => + c = EOL; + '[' => + c = CCLASS; + bldcclass(); + } + return c; +} + +nextrec() : int +{ + if(exprp == len exprs || (exprp == len exprs-1 && exprs[exprp]=='\\')) + regerror("malformed `[]'"); + if(exprs[exprp] == '\\'){ + exprp++; + if(exprs[exprp]=='n'){ + exprp++; + return '\n'; + } + return exprs[exprp++] | 16r10000; + } + return exprs[exprp++]; +} + +bldcclass() +{ + c1, c2 : int; + classp : string; + + # we have already seen the '[' + if(exprp < len exprs && exprs[exprp] == '^'){ + classp[len classp] = '\n'; # don't match newline in negate case + negateclass = TRUE; + exprp++; + }else + negateclass = FALSE; + while((c1 = nextrec()) != ']'){ + if(c1 == '-'){ + classp = nil; + regerror("malformed `[]'"); + } + if(exprp < len exprs && exprs[exprp] == '-'){ + exprp++; # eat '-' + if((c2 = nextrec()) == ']') { + classp = nil; + regerror("malformed '[]'"); + } + classp[len classp] = 16rFFFF; + classp[len classp] = c1; + classp[len classp] = c2; + }else + classp[len classp] = c1; + } + if(nclass == Nclass){ + Nclass += DCLASS; + oc := class; + class = array[Nclass] of string; + if (oc != nil) { + class[0:] = oc[0:Nclass-DCLASS]; + oc = nil; + } + } + class[nclass++] = classp; +} + +classmatch(classno : int, c : int, negate : int) : int +{ + p : string; + + p = class[classno]; + for (i := 0; i < len p; ) { + if(p[i] == 16rFFFF){ + if(p[i+1]<=c && c<=p[i+2]) + return !negate; + i += 3; + }else if(p[i++] == c) + return !negate; + } + return negate; +} + +# +# Note optimization in addinst: +# *l must be pending when addinst called; if *l has been looked +# at already, the optimization is a bug. +# +addinst(l : array of Ilist, inst : ref Inst, sep : Rangeset) +{ + p : int; + + for(p = 0; l[p].inst != nil; p++){ + if(l[p].inst==inst){ + if(sep[0].q0 < l[p].se[0].q0) + l[p].se[0:] = sep[0:NRange]; # this would be bug + return; # It's already there + } + } + l[p].inst = inst; + l[p].se[0:]= sep[0:NRange]; + l[p+1].inst = nil; +} + +rxnull() : int +{ + return startinst==nil || bstartinst==nil; +} + +OVERFLOW : con "overflow"; + +# either t!=nil or r!=nil, and we match the string in the appropriate place +rxexecute(t : ref Text, r: string, startp : int, eof : int) : (int, Rangeset) +{ + flag : int; + inst : ref Inst; + tlp : int; + p : int; + nnl, ntl : int; + nc, c : int; + wrapped : int; + startchar : int; + + flag = 0; + p = startp; + startchar = 0; + wrapped = 0; + nnl = 0; + if(startinst.typex<OPERATOR) + startchar = startinst.typex; + listx[0][0].inst = listx[1][0].inst = nil; + sel[0].q0 = -1; + + { + if(t != nil) + nc = t.file.buf.nc; + else + nc = len r; + # Execute machine once for each character + for(;;p++){ + if(p>=eof || p>=nc){ + case(wrapped++){ + 0 or 2 => # let loop run one more click + ; + 1 => # expired; wrap to beginning + if(sel[0].q0>=0 || eof!=Dat->Infinity) + return (sel[0].q0>=0, sel); + listx[0][0].inst = listx[1][0].inst = nil; + p = -1; + continue; + * => + return (sel[0].q0>=0, sel); + } + c = 0; + }else{ + if(((wrapped && p>=startp) || sel[0].q0>0) && nnl==0) + break; + if(t != nil) + c = t.readc(p); + else + c = r[p]; + } + # fast check for first char + if(startchar && nnl==0 && c!=startchar) + continue; + thl = listx[flag]; + nl = listx[flag^=1]; + nl[0].inst = nil; + ntl = nnl; + nnl = 0; + if(sel[0].q0<0 && (!wrapped || p<startp || startp==eof)){ + # Add first instruction to this list + if(++ntl >= NLIST) + raise OVERFLOW; + sempty[0].q0 = p; + addinst(thl, startinst, sempty); + } + # Execute machine until this list is empty + tlp = 0; + inst = thl[0].inst; + while(inst != nil){ # assignment = + case(inst.typex){ + LBRA => + if(inst.subid>=0) + thl[tlp].se[inst.subid].q0 = p; + inst = inst.next; + continue; + RBRA => + if(inst.subid>=0) + thl[tlp].se[inst.subid].q1 = p; + inst = inst.next; + continue; + ANY => + if(c!='\n') { + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + BOL => + if(p==0 || (t != nil && t.readc(p-1)=='\n') || (r != nil && r[p-1] == '\n')){ + inst = inst.next; + continue; + } + EOL => + if(c == '\n') { + inst = inst.next; + continue; + } + CCLASS => + if(c>=0 && classmatch(inst.class, c, 0)) { + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + NCCLASS => + if(c>=0 && classmatch(inst.class, c, 1)) { + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + OR => + # evaluate right choice later + if(++ntl >= NLIST) + raise OVERFLOW; + addinst(thl[tlp:], inst.right, thl[tlp].se); + # efficiency: advance and re-evaluate + inst = inst.next; # next was left + continue; + END => # Match! + thl[tlp].se[0].q1 = p; + newmatch(thl[tlp].se); + * => # regular character + if(inst.typex==c){ + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + } + tlp++; + inst = thl[tlp].inst; + } + } + return (sel[0].q0>=0, sel); + } + exception{ + OVERFLOW => + error("regexp list overflow"); + sel[0].q0 = -1; + return (0, sel); + } + return (0, sel); +} + +newmatch(sp : Rangeset) +{ + if(sel[0].q0<0 || sp[0].q0<sel[0].q0 || + (sp[0].q0==sel[0].q0 && sp[0].q1>sel[0].q1)) + sel[0:] = sp[0:NRange]; +} + +rxbexecute(t : ref Text, startp : int) : (int, Rangeset) +{ + flag : int; + inst : ref Inst; + tlp : int; + p : int; + nnl, ntl : int; + c : int; + wrapped : int; + startchar : int; + + flag = 0; + nnl = 0; + wrapped = 0; + p = startp; + startchar = 0; + if(bstartinst.typex<OPERATOR) + startchar = bstartinst.typex; + listx[0][0].inst = listx[1][0].inst = nil; + sel[0].q0= -1; + + { + # Execute machine once for each character, including terminal NUL + for(;;--p){ + if(p <= 0){ + case(wrapped++){ + 0 or 2 => # let loop run one more click + ; + 1 => # expired; wrap to end + if(sel[0].q0>=0) + return (sel[0].q0>=0, sel); + listx[0][0].inst = listx[1][0].inst = nil; + p = t.file.buf.nc+1; + continue; + 3 or * => + return (sel[0].q0>=0, sel); + } + c = 0; + }else{ + if(((wrapped && p<=startp) || sel[0].q0>0) && nnl==0) + break; + c = t.readc(p-1); + } + # fast check for first char + if(startchar && nnl==0 && c!=startchar) + continue; + thl = listx[flag]; + nl = listx[flag^=1]; + nl[0].inst = nil; + ntl = nnl; + nnl = 0; + if(sel[0].q0<0 && (!wrapped || p>startp)){ + # Add first instruction to this list + if(++ntl >= NLIST) + raise OVERFLOW; + # the minus is so the optimizations in addinst work + sempty[0].q0 = -p; + addinst(thl, bstartinst, sempty); + } + # Execute machine until this list is empty + tlp = 0; + inst = thl[0].inst; + while(inst != nil){ # assignment = + case(inst.typex){ + LBRA => + if(inst.subid>=0) + thl[tlp].se[inst.subid].q0 = p; + inst = inst.next; + continue; + RBRA => + if(inst.subid >= 0) + thl[tlp].se[inst.subid].q1 = p; + inst = inst.next; + continue; + ANY => + if(c != '\n') { + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + BOL => + if(c=='\n' || p==0){ + inst = inst.next; + continue; + } + EOL => + if(p<t.file.buf.nc && t.readc(p)=='\n') { + inst = inst.next; + continue; + } + CCLASS => + if(c>0 && classmatch(inst.class, c, 0)) { + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + NCCLASS => + if(c>0 && classmatch(inst.class, c, 1)) { + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + OR => + # evaluate right choice later + if(++ntl >= NLIST) + raise OVERFLOW; + addinst(thl[tlp:], inst.right, thl[tlp].se); + # efficiency: advance and re-evaluate + inst = inst.next; # next was left + continue; + END => # Match! + thl[tlp].se[0].q0 = -thl[tlp].se[0].q0; # minus sign + thl[tlp].se[0].q1 = p; + bnewmatch(thl[tlp].se); + * => # regular character + if(inst.typex == c){ + if(++nnl >= NLIST) + raise OVERFLOW; + addinst(nl, inst.next, thl[tlp].se); + } + } + tlp++; + inst = thl[tlp].inst; + } + } + return (sel[0].q0>=0, sel); + } + exception{ + OVERFLOW => + error("regexp list overflow"); + sel[0].q0 = -1; + return (0, sel); + } + return (0, sel); +} + +bnewmatch(sp : Rangeset) +{ + i : int; + + if(sel[0].q0<0 || sp[0].q0>sel[0].q1 || (sp[0].q0==sel[0].q1 && sp[0].q1<sel[0].q0)) + for(i = 0; i<NRange; i++){ # note the reversal; q0<=q1 + sel[i].q0 = sp[i].q1; + sel[i].q1 = sp[i].q0; + } +} |
