diff options
Diffstat (limited to 'appl/cmd/yacc.b')
| -rw-r--r-- | appl/cmd/yacc.b | 2810 |
1 files changed, 2810 insertions, 0 deletions
diff --git a/appl/cmd/yacc.b b/appl/cmd/yacc.b new file mode 100644 index 00000000..97ef87cf --- /dev/null +++ b/appl/cmd/yacc.b @@ -0,0 +1,2810 @@ +implement Yacc; + +include "sys.m"; + sys: Sys; + print, fprint, sprint: import sys; + UTFmax: import Sys; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "draw.m"; + +Yacc: module +{ + init: fn(ctxt: ref Draw->Context, argv: list of string); +}; + +Arg: adt +{ + argv: list of string; + c: int; + opts: string; + + init: fn(argv: list of string): ref Arg; + opt: fn(arg: self ref Arg): int; + arg: fn(arg: self ref Arg): string; +}; + +PARSER: con "/lib/yaccpar"; +OFILE: con "tab.b"; +FILEU: con "output"; +FILED: con "tab.m"; +FILEDEBUG: con "debug"; + +# the following are adjustable +# according to memory size +ACTSIZE: con 30000; +NSTATES: con 2000; +TEMPSIZE: con 2000; + +SYMINC: con 50; # increase for non-term or term +RULEINC: con 50; # increase for max rule length prodptr[i] +PRODINC: con 100; # increase for productions prodptr +WSETINC: con 50; # increase for working sets wsets +STATEINC: con 200; # increase for states statemem + +NAMESIZE: con 50; +NTYPES: con 63; +ISIZE: con 400; + +PRIVATE: con 16rE000; # unicode private use + +# relationships which must hold: +# TEMPSIZE >= NTERMS + NNONTERM + 1 +# TEMPSIZE >= NSTATES +# + +NTBASE: con 8r10000; +ERRCODE: con 8190; +ACCEPTCODE: con 8191; +YYLEXUNK: con 3; +TOKSTART: con 4; #index of first defined token + +# no, left, right, binary assoc. +NOASC, LASC, RASC, BASC: con iota; + +# flags for state generation +DONE, MUSTDO, MUSTLOOKAHEAD: con iota; + +# flags for a rule having an action, and being reduced +ACTFLAG: con 16r4; +REDFLAG: con 16r8; + +# output parser flags +YYFLAG1: con -1000; + +# parse tokens +IDENTIFIER, MARK, TERM, LEFT, RIGHT, BINARY, PREC, LCURLY, IDENTCOLON, NUMBER, START, TYPEDEF, TYPENAME, MODULE: con PRIVATE+iota; + +ENDFILE: con 0; + +EMPTY: con 1; +WHOKNOWS: con 0; +OK: con 1; +NOMORE: con -1000; + +# macros for getting associativity and precedence levels +ASSOC(i: int): int +{ + return i & 3; +} + +PLEVEL(i: int): int +{ + return (i >> 4) & 16r3f; +} + +TYPE(i: int): int +{ + return (i >> 10) & 16r3f; +} + +# macros for setting associativity and precedence levels +SETASC(i, j: int): int +{ + return i | j; +} + +SETPLEV(i, j: int): int +{ + return i | (j << 4); +} + +SETTYPE(i, j: int): int +{ + return i | (j << 10); +} + +# I/O descriptors +stderr: ref Sys->FD; +fdefine: ref Iobuf; # file for module definition +fdebug: ref Iobuf; # y.debug for strings for debugging +ftable: ref Iobuf; # y.tab.c file +finput: ref Iobuf; # input file +foutput: ref Iobuf; # y.output file + +CodeData, CodeMod, CodeAct: con iota; +NCode: con 8192; + +Code: adt +{ + kind: int; + data: array of byte; + ndata: int; + next: cyclic ref Code; +}; + +codehead: ref Code; +codetail: ref Code; + +modname: string; # name of module +suppressmod: int; # suppress module definition +stacksize := 200; + +# communication variables between various I/O routines +infile: string; # input file name +numbval: int; # value of an input number +tokname: string; # input token name, slop for runes and 0 + +# structure declarations +Lkset: type array of int; + +Pitem: adt +{ + prod: array of int; + off: int; # offset within the production + first: int; # first term or non-term in item + prodno: int; # production number for sorting +}; + +Item: adt +{ + pitem: Pitem; + look: Lkset; +}; + +Symb: adt +{ + name: string; + value: int; +}; + +Wset: adt +{ + pitem: Pitem; + flag: int; + ws: Lkset; +}; + + # storage of names + +parser := PARSER; +yydebug: string; + + # storage of types +ntypes: int; # number of types defined +typeset := array[NTYPES] of string; # pointers to type tags + + # token information + +ntokens := 0; # number of tokens +tokset: array of Symb; +toklev: array of int; # vector with the precedence of the terminals + + # nonterminal information + +nnonter := -1; # the number of nonterminals +nontrst: array of Symb; +start: int; # start symbol + + # state information + +nstate := 0; # number of states +pstate := array[NSTATES+2] of int; # index into statemem to the descriptions of the states +statemem : array of Item; +tystate := array[NSTATES] of int; # contains type information about the states +tstates : array of int; # states generated by terminal gotos +ntstates : array of int; # states generated by nonterminal gotos +mstates := array[NSTATES] of {* => 0}; # chain of overflows of term/nonterm generation lists +lastred: int; # number of last reduction of a state +defact := array[NSTATES] of int; # default actions of states + + # lookahead set information + +lkst: array of Lkset; +nolook := 0; # flag to turn off lookahead computations +tbitset := 0; # size of lookahead sets +clset: Lkset; # temporary storage for lookahead computations + + # working set information + +wsets: array of Wset; +cwp: int; + + # storage for action table + +amem: array of int; # action table storage +memp: int; # next free action table position +indgo := array[NSTATES] of int; # index to the stored goto table + + # temporary vector, indexable by states, terms, or ntokens + +temp1 := array[TEMPSIZE] of int; # temporary storage, indexed by terms + ntokens or states +lineno := 1; # current input line number +fatfl := 1; # if on, error is fatal +nerrors := 0; # number of errors + + # assigned token type values +extval := 0; + +ytabc := OFILE; # name of y.tab.c + + # grammar rule information + +nprod := 1; # number of productions +prdptr: array of array of int; # pointers to descriptions of productions +levprd: array of int; # precedence levels for the productions +rlines: array of int; # line number for this rule + + + # statistics collection variables + +zzgoent := 0; +zzgobest := 0; +zzacent := 0; +zzexcp := 0; +zzclose := 0; +zzrrconf := 0; +zzsrconf := 0; +zzstate := 0; + + # optimizer arrays +yypgo: array of array of int; +optst: array of array of int; +ggreed: array of int; +pgo: array of int; + +maxspr: int; # maximum spread of any entry +maxoff: int; # maximum offset into a array +maxa: int; + + # storage for information about the nonterminals + +pres: array of array of array of int; # vector of pointers to productions yielding each nonterminal +pfirst: array of Lkset; +pempty: array of int; # vector of nonterminals nontrivially deriving e + # random stuff picked out from between functions + +indebug := 0; # debugging flag for cpfir +pidebug := 0; # debugging flag for putitem +gsdebug := 0; # debugging flag for stagen +cldebug := 0; # debugging flag for closure +pkdebug := 0; # debugging flag for apack +g2debug := 0; # debugging for go2gen +adb := 0; # debugging for callopt + +Resrv : adt +{ + name: string; + value: int; +}; + +resrv := array[] of { + Resrv("binary", BINARY), + Resrv("module", MODULE), + Resrv("left", LEFT), + Resrv("nonassoc", BINARY), + Resrv("prec", PREC), + Resrv("right", RIGHT), + Resrv("start", START), + Resrv("term", TERM), + Resrv("token", TERM), + Resrv("type", TYPEDEF),}; + +zznewstate := 0; + +init(nil: ref Draw->Context, argv: list of string) +{ + sys = load Sys Sys->PATH; + bufio = load Bufio Bufio->PATH; + + stderr = sys->fildes(2); + + setup(argv); # initialize and read productions + + tbitset = (ntokens+32)/32; + cpres(); # make table of which productions yield a given nonterminal + cempty(); # make a table of which nonterminals can match the empty string + cpfir(); # make a table of firsts of nonterminals + + stagen(); # generate the states + + yypgo = array[nnonter+1] of array of int; + optst = array[nstate] of array of int; + output(); # write the states and the tables + go2out(); + + hideprod(); + summary(); + + callopt(); + + others(); + + if(fdefine != nil) + fdefine.close(); + if(fdebug != nil) + fdebug.close(); + if(ftable != nil) + ftable.close(); + if(foutput != nil) + foutput.close(); +} + +setup(argv: list of string) +{ + j, ty: int; + + ytab := 0; + vflag := 0; + dflag := 0; + stem := 0; + stemc := "y"; + foutput = nil; + fdefine = nil; + fdebug = nil; + arg := Arg.init(argv); + while(c := arg.opt()){ + case c{ + 'v' or 'V' => + vflag++; + 'D' => + yydebug = arg.arg(); + 'd' => + dflag++; + 'n' => + stacksize = int arg.arg(); + 'o' => + ytab++; + ytabc = arg.arg(); + 's' => + stem++; + stemc = arg.arg(); + 'm' => + suppressmod++; + * => + usage(); + } + } + argv = arg.argv; + if(len argv != 1) + usage(); + if (suppressmod && dflag) { + sys->fprint(stderr, "yacc: -m and -d are exclusive\n"); + usage(); + } + if (stacksize < 1) { + sys->fprint(stderr, "yacc: stack size too small\n"); + usage(); + } + infile = hd argv; + finput = bufio->open(infile, Bufio->OREAD); + if(finput == nil) + error("cannot open '"+infile+"'"); + + openup(stemc, dflag, vflag, ytab, ytabc); + + defin(0, "$end"); + extval = PRIVATE; # tokens start in unicode 'private use' + defin(0, "error"); + defin(1, "$accept"); + defin(0, "$unk"); + i := 0; + + for(t := gettok(); t != MARK && t != ENDFILE; ) + case t { + ';' => + t = gettok(); + + START => + if(gettok() != IDENTIFIER) + error("bad %%start construction"); + start = chfind(1, tokname); + t = gettok(); + + TYPEDEF => + if(gettok() != TYPENAME) + error("bad syntax in %%type"); + ty = numbval; + for(;;) { + t = gettok(); + case t { + IDENTIFIER => + if((t=chfind(1, tokname)) < NTBASE) { + j = TYPE(toklev[t]); + if(j != 0 && j != ty) + error("type redeclaration of token "+ + tokset[t].name); + else + toklev[t] = SETTYPE(toklev[t], ty); + } else { + j = nontrst[t-NTBASE].value; + if(j != 0 && j != ty) + error("type redeclaration of nonterminal "+ + nontrst[t-NTBASE].name); + else + nontrst[t-NTBASE].value = ty; + } + continue; + ',' => + continue; + ';' => + t = gettok(); + } + break; + } + + MODULE => + cpymodule(); + t = gettok(); + + LEFT or BINARY or RIGHT or TERM => + # nonzero means new prec. and assoc. + lev := t-TERM; + if(lev) + i++; + ty = 0; + + # get identifiers so defined + t = gettok(); + + # there is a type defined + if(t == TYPENAME) { + ty = numbval; + t = gettok(); + } + for(;;) { + case t { + ',' => + t = gettok(); + continue; + + ';' => + break; + + IDENTIFIER => + j = chfind(0, tokname); + if(j >= NTBASE) + error(tokname+" defined earlier as nonterminal"); + if(lev) { + if(ASSOC(toklev[j])) + error("redeclaration of precedence of "+tokname); + toklev[j] = SETASC(toklev[j], lev); + toklev[j] = SETPLEV(toklev[j], i); + } + if(ty) { + if(TYPE(toklev[j])) + error("redeclaration of type of "+tokname); + toklev[j] = SETTYPE(toklev[j],ty); + } + t = gettok(); + if(t == NUMBER) { + tokset[j].value = numbval; + t = gettok(); + } + continue; + } + break; + } + + LCURLY => + cpycode(); + t = gettok(); + + * => + error("syntax error"); + } + if(t == ENDFILE) + error("unexpected EOF before %%"); + if(modname == nil) + error("missing %module specification"); + + moreprod(); + prdptr[0] = array[4] of { + NTBASE, # added production + start, # if start is 0, we will overwrite with the lhs of the first rule + 1, + 0 + }; + nprod = 1; + curprod := array[RULEINC] of int; + t = gettok(); + if(t != IDENTCOLON) + error("bad syntax on first rule"); + + if(!start) + prdptr[0][1] = chfind(1, tokname); + + # read rules + # put into prdptr array in the format + # target + # followed by id's of terminals and non-terminals + # followd by -nprod + while(t != MARK && t != ENDFILE) { + mem := 0; + # process a rule + rlines[nprod] = lineno; + if(t == '|') + curprod[mem++] = prdptr[nprod-1][0]; + else if(t == IDENTCOLON) { + curprod[mem] = chfind(1, tokname); + if(curprod[mem] < NTBASE) + error("token illegal on LHS of grammar rule"); + mem++; + } else + error("illegal rule: missing semicolon or | ?"); + + # read rule body + t = gettok(); + + for(;;){ + while(t == IDENTIFIER) { + curprod[mem] = chfind(1, tokname); + if(curprod[mem] < NTBASE) + levprd[nprod] = toklev[curprod[mem]]; + mem++; + if(mem >= len curprod){ + ncurprod := array[mem+RULEINC] of int; + ncurprod[0:] = curprod; + curprod = ncurprod; + } + t = gettok(); + } + if(t == PREC) { + if(gettok() != IDENTIFIER) + error("illegal %%prec syntax"); + j = chfind(2, tokname); + if(j >= NTBASE) + error("nonterminal "+nontrst[j-NTBASE].name+" illegal after %%prec"); + levprd[nprod] = toklev[j]; + t = gettok(); + } + if(t != '=') + break; + levprd[nprod] |= ACTFLAG; + addcode(CodeAct, "\n"+string nprod+"=>"); + cpyact(curprod, mem); + + # action within rule... + if((t=gettok()) == IDENTIFIER) { + # make it a nonterminal + j = chfind(1, "$$"+string nprod); + + # + # the current rule will become rule number nprod+1 + # enter null production for action + # + prdptr[nprod] = array[2] of {j, -nprod}; + + # update the production information + nprod++; + moreprod(); + levprd[nprod] = levprd[nprod-1] & ~ACTFLAG; + levprd[nprod-1] = ACTFLAG; + rlines[nprod] = lineno; + + # make the action appear in the original rule + curprod[mem++] = j; + if(mem >= len curprod){ + ncurprod := array[mem+RULEINC] of int; + ncurprod[0:] = curprod; + curprod = ncurprod; + } + } + } + + while(t == ';') + t = gettok(); + curprod[mem++] = -nprod; + + # check that default action is reasonable + if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[curprod[0]-NTBASE].value) { + # no explicit action, LHS has value + + tempty := curprod[1]; + if(tempty < 0) + error("must return a value, since LHS has a type"); + else + if(tempty >= NTBASE) + tempty = nontrst[tempty-NTBASE].value; + else + tempty = TYPE(toklev[tempty]); + if(tempty != nontrst[curprod[0]-NTBASE].value) + error("default action causes potential type clash"); + else{ + addcodec(CodeAct, '\n'); + addcode(CodeAct, string nprod); + addcode(CodeAct, "=>\nyyval."); + addcode(CodeAct, typeset[tempty]); + addcode(CodeAct, " = yys[yyp+1].yyv."); + addcode(CodeAct, typeset[tempty]); + addcodec(CodeAct, ';'); + } + } + moreprod(); + prdptr[nprod] = array[mem] of int; + prdptr[nprod][0:] = curprod[:mem]; + nprod++; + moreprod(); + levprd[nprod] = 0; + } + + # + # end of all rules + # dump out the prefix code + # + ftable.puts("implement "); + ftable.puts(modname); + ftable.puts(";\n"); + + dumpcode(CodeMod); + dumpmod(); + dumpcode(CodeAct); + + ftable.puts("YYEOFCODE: con 1;\n"); + ftable.puts("YYERRCODE: con 2;\n"); + ftable.puts("YYMAXDEPTH: con " + string stacksize + ";\n"); # was 150 + #ftable.puts("yyval: YYSTYPE;\n"); + + # + # copy any postfix code + # + if(t == MARK) { + ftable.puts("\n#line\t"); + ftable.puts(string lineno); + ftable.puts("\t\""); + ftable.puts(infile); + ftable.puts("\"\n"); + while((c=finput.getc()) != Bufio->EOF) + ftable.putc(c); + } + finput.close(); +} + +# +# allocate enough room to hold another production +# +moreprod() +{ + n := len prdptr; + if(nprod < n) + return; + n += PRODINC; + aprod := array[n] of array of int; + aprod[0:] = prdptr; + prdptr = aprod; + + alevprd := array[n] of int; + alevprd[0:] = levprd; + levprd = alevprd; + + arlines := array[n] of int; + arlines[0:] = rlines; + rlines = arlines; +} + +# +# define s to be a terminal if t=0 +# or a nonterminal if t=1 +# +defin(nt: int, s: string): int +{ + val := 0; + if(nt) { + nnonter++; + if(nnonter >= len nontrst){ + anontrst := array[nnonter + SYMINC] of Symb; + anontrst[0:] = nontrst; + nontrst = anontrst; + } + nontrst[nnonter] = Symb(s, 0); + return NTBASE + nnonter; + } + + # must be a token + ntokens++; + if(ntokens >= len tokset){ + atokset := array[ntokens + SYMINC] of Symb; + atokset[0:] = tokset; + tokset = atokset; + + atoklev := array[ntokens + SYMINC] of int; + atoklev[0:] = toklev; + toklev = atoklev; + } + tokset[ntokens].name = s; + toklev[ntokens] = 0; + + # establish value for token + # single character literal + if(s[0] == ' ' && len s == 1+1){ + val = s[1]; + }else if(s[0] == ' ' && s[1] == '\\') { # escape sequence + if(len s == 2+1) { + # single character escape sequence + case s[2] { + '\'' => val = '\''; + '"' => val = '"'; + '\\' => val = '\\'; + 'a' => val = '\a'; + 'b' => val = '\b'; + 'n' => val = '\n'; + 'r' => val = '\r'; + 't' => val = '\t'; + 'v' => val = '\v'; + * => + error("invalid escape "+s[1:3]); + } + }else if(s[2] == 'u' && len s == 2+1+4) { # \unnnn sequence + val = 0; + s = s[3:]; + while(s != ""){ + c := s[0]; + if(c >= '0' && c <= '9') + c -= '0'; + else if(c >= 'a' && c <= 'f') + c -= 'a' - 10; + else if(c >= 'A' && c <= 'F') + c -= 'A' - 10; + else + error("illegal \\unnnn construction"); + val = val * 16 + c; + s = s[1:]; + } + if(val == 0) + error("'\\u0000' is illegal"); + }else + error("unknown escape"); + }else + val = extval++; + + tokset[ntokens].value = val; + return ntokens; +} + +peekline := 0; +gettok(): int +{ + i, match, c: int; + + tokname = ""; + for(;;){ + reserve := 0; + lineno += peekline; + peekline = 0; + c = finput.getc(); + while(c == ' ' || c == '\n' || c == '\t' || c == '\v' || c == '\r') { + if(c == '\n') + lineno++; + c = finput.getc(); + } + + # skip comment + if(c != '#') + break; + lineno += skipcom(); + } + case c { + Bufio->EOF => + return ENDFILE; + + '{' => + finput.ungetc(); + return '='; + + '<' => + # get, and look up, a type name (union member name) + i = 0; + while((c=finput.getc()) != '>' && c != Bufio->EOF && c != '\n') + tokname[i++] = c; + if(c != '>') + error("unterminated < ... > clause"); + for(i=1; i<=ntypes; i++) + if(typeset[i] == tokname) { + numbval = i; + return TYPENAME; + } + ntypes++; + numbval = ntypes; + typeset[numbval] = tokname; + return TYPENAME; + + '"' or '\'' => + match = c; + tokname[0] = ' '; + i = 1; + for(;;) { + c = finput.getc(); + if(c == '\n' || c == Bufio->EOF) + error("illegal or missing ' or \"" ); + if(c == '\\') { + tokname[i++] = '\\'; + c = finput.getc(); + } else if(c == match) + return IDENTIFIER; + tokname[i++] = c; + } + + '%' => + case c = finput.getc(){ + '%' => return MARK; + '=' => return PREC; + '{' => return LCURLY; + } + + getword(c); + # find a reserved word + for(c=0; c < len resrv; c++) + if(tokname == resrv[c].name) + return resrv[c].value; + error("invalid escape, or illegal reserved word: "+tokname); + + '0' to '9' => + numbval = c - '0'; + while(isdigit(c = finput.getc())) + numbval = numbval*10 + c-'0'; + finput.ungetc(); + return NUMBER; + + * => + if(isword(c) || c=='.' || c=='$') + getword(c); + else + return c; + } + + # look ahead to distinguish IDENTIFIER from IDENTCOLON + c = finput.getc(); + while(c == ' ' || c == '\t'|| c == '\n' || c == '\v' || c == '\r' || c == '#') { + if(c == '\n') + peekline++; + # look for comments + if(c == '#') + peekline += skipcom(); + c = finput.getc(); + } + if(c == ':') + return IDENTCOLON; + finput.ungetc(); + return IDENTIFIER; +} + +getword(c: int) +{ + i := 0; + while(isword(c) || isdigit(c) || c == '_' || c=='.' || c=='$') { + tokname[i++] = c; + c = finput.getc(); + } + finput.ungetc(); +} + +# +# determine the type of a symbol +# +fdtype(t: int): int +{ + v : int; + s: string; + + if(t >= NTBASE) { + v = nontrst[t-NTBASE].value; + s = nontrst[t-NTBASE].name; + } else { + v = TYPE(toklev[t]); + s = tokset[t].name; + } + if(v <= 0) + error("must specify type for "+s); + return v; +} + +chfind(t: int, s: string): int +{ + if(s[0] == ' ') + t = 0; + for(i:=0; i<=ntokens; i++) + if(s == tokset[i].name) + return i; + for(i=0; i<=nnonter; i++) + if(s == nontrst[i].name) + return NTBASE+i; + + # cannot find name + if(t > 1) + error(s+" should have been defined earlier"); + return defin(t, s); +} + +# +# saves module definition in Code +# +cpymodule() +{ + if(gettok() != IDENTIFIER) + error("bad %%module construction"); + if(modname != nil) + error("duplicate %%module construction"); + modname = tokname; + + level := 0; + for(;;) { + if((c:=finput.getc()) == Bufio->EOF) + error("EOF encountered while processing %%module"); + case c { + '\n' => + lineno++; + '{' => + level++; + if(level == 1) + continue; + '}' => + level--; + + # we are finished copying + if(level == 0) + return; + } + addcodec(CodeMod, c); + } + if(codehead == nil || codetail.kind != CodeMod) + addcodec(CodeMod, '\n'); # ensure we add something +} + +# +# saves code between %{ and %} +# +cpycode() +{ + c := finput.getc(); + if(c == '\n') { + c = finput.getc(); + lineno++; + } + addcode(CodeData, "\n#line\t" + string lineno + "\t\"" + infile + "\"\n"); + while(c != Bufio->EOF) { + if(c == '%') { + if((c=finput.getc()) == '}') + return; + addcodec(CodeData, '%'); + } + addcodec(CodeData, c); + if(c == '\n') + lineno++; + c = finput.getc(); + } + error("eof before %%}"); +} + +addcode(k: int, s: string) +{ + for(i := 0; i < len s; i++) + addcodec(k, s[i]); +} + +addcodec(k, c: int) +{ + if(codehead == nil + || k != codetail.kind + || codetail.ndata >= NCode){ + cd := ref Code(k, array[NCode+UTFmax] of byte, 0, nil); + if(codehead == nil) + codehead = cd; + else + codetail.next = cd; + codetail = cd; + } + + codetail.ndata += sys->char2byte(c, codetail.data, codetail.ndata); +} + +dumpcode(til: int) +{ + for(; codehead != nil; codehead = codehead.next){ + if(codehead.kind == til) + return; + if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata) + error("can't write output file"); + } +} + +# +# write out the module declaration and any token info +# +dumpmod() +{ + if(fdefine != nil) { + fdefine.puts(modname); + fdefine.puts(": module {\n"); + } + if (!suppressmod) { + ftable.puts(modname); + ftable.puts(": module {\n"); + } + + for(; codehead != nil; codehead = codehead.next){ + if(codehead.kind != CodeMod) + break; + if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata) + error("can't write output file"); + if(fdefine != nil && fdefine.write(codehead.data, codehead.ndata) != codehead.ndata) + error("can't write define file"); + } + + for(i:=TOKSTART; i<=ntokens; i++) { + # non-literals + c := tokset[i].name[0]; + if(c != ' ' && c != '$') { + s := tokset[i].name+": con "+string tokset[i].value+";\n"; + ftable.puts(s); + if(fdefine != nil) + fdefine.puts(s); + } + } + + if(fdefine != nil) + fdefine.puts("};\n"); + if (!suppressmod) + ftable.puts("\n};\n"); + + if(fdebug != nil) { + fdebug.puts("yytoknames = array[] of {\n"); + for(i=1; i<=ntokens; i++) { + if(tokset[i].name != nil) + fdebug.puts("\t\""+chcopy(tokset[i].name)+"\",\n"); + else + fdebug.puts("\t\"\",\n"); + } + fdebug.puts("};\n"); + } +} + +# +# skip over comments +# skipcom is called after reading a '#' +# +skipcom(): int +{ + c := finput.getc(); + while(c != Bufio->EOF) { + if(c == '\n') + return 1; + c = finput.getc(); + } + error("EOF inside comment"); + return 0; +} + +# +# copy limbo action to the next ; or closing } +# +cpyact(curprod: array of int, max: int) +{ + addcode(CodeAct, "\n#line\t"); + addcode(CodeAct, string lineno); + addcode(CodeAct, "\t\""); + addcode(CodeAct, infile); + addcode(CodeAct, "\"\n"); + + brac := 0; + +loop: for(;;){ + c := finput.getc(); + swt: case c { + ';' => + if(brac == 0) { + addcodec(CodeAct, c); + return; + } + + '{' => + brac++; + + '$' => + s := 1; + tok := -1; + c = finput.getc(); + + # type description + if(c == '<') { + finput.ungetc(); + if(gettok() != TYPENAME) + error("bad syntax on $<ident> clause"); + tok = numbval; + c = finput.getc(); + } + if(c == '$') { + addcode(CodeAct, "yyval"); + + # put out the proper tag... + if(ntypes) { + if(tok < 0) + tok = fdtype(curprod[0]); + addcode(CodeAct, "."+typeset[tok]); + } + continue loop; + } + if(c == '-') { + s = -s; + c = finput.getc(); + } + j := 0; + if(isdigit(c)) { + while(isdigit(c)) { + j = j*10 + c-'0'; + c = finput.getc(); + } + finput.ungetc(); + j = j*s; + if(j >= max) + error("Illegal use of $" + string j); + }else if(isword(c) || c == '_' || c == '.') { + # look for $name + finput.ungetc(); + if(gettok() != IDENTIFIER) + error("$ must be followed by an identifier"); + tokn := chfind(2, tokname); + fnd := -1; + if((c = finput.getc()) != '@') + finput.ungetc(); + else if(gettok() != NUMBER) + error("@ must be followed by number"); + else + fnd = numbval; + for(j=1; j<max; j++){ + if(tokn == curprod[j]) { + fnd--; + if(fnd <= 0) + break; + } + } + if(j >= max) + error("$name or $name@number not found"); + }else{ + addcodec(CodeAct, '$'); + if(s < 0) + addcodec(CodeAct, '-'); + finput.ungetc(); + continue loop; + } + addcode(CodeAct, "yys[yypt-" + string(max-j-1) + "].yyv"); + + # put out the proper tag + if(ntypes) { + if(j <= 0 && tok < 0) + error("must specify type of $" + string j); + if(tok < 0) + tok = fdtype(curprod[j]); + addcodec(CodeAct, '.'); + addcode(CodeAct, typeset[tok]); + } + continue loop; + + '}' => + brac--; + if(brac) + break; + addcodec(CodeAct, c); + return; + + '#' => + # a comment + addcodec(CodeAct, c); + c = finput.getc(); + while(c != Bufio->EOF) { + if(c == '\n') { + lineno++; + break swt; + } + addcodec(CodeAct, c); + c = finput.getc(); + } + error("EOF inside comment"); + + '\''or '"' => + # character string or constant + match := c; + addcodec(CodeAct, c); + while(c = finput.getc()) { + if(c == '\\') { + addcodec(CodeAct, c); + c = finput.getc(); + if(c == '\n') + lineno++; + } else if(c == match) + break swt; + if(c == '\n') + error("newline in string or char const."); + addcodec(CodeAct, c); + } + error("EOF in string or character constant"); + + Bufio->EOF => + error("action does not terminate"); + + '\n' => + lineno++; + } + + addcodec(CodeAct, c); + } +} + +openup(stem: string, dflag, vflag, ytab: int, ytabc: string) +{ + buf: string; + if(vflag) { + buf = stem + "." + FILEU; + foutput = bufio->create(buf, Bufio->OWRITE, 8r666); + if(foutput == nil) + error("can't create " + buf); + } + if(yydebug != nil) { + buf = stem + "." + FILEDEBUG; + fdebug = bufio->create(buf, Bufio->OWRITE, 8r666); + if(fdebug == nil) + error("can't create " + buf); + } + if(dflag) { + buf = stem + "." + FILED; + fdefine = bufio->create(buf, Bufio->OWRITE, 8r666); + if(fdefine == nil) + error("can't create " + buf); + } + if(ytab == 0) + buf = stem + "." + OFILE; + else + buf = ytabc; + ftable = bufio->create(buf, Bufio->OWRITE, 8r666); + if(ftable == nil) + error("can't create file " + buf); +} + +# +# return a pointer to the name of symbol i +# +symnam(i: int): string +{ + s: string; + if(i >= NTBASE) + s = nontrst[i-NTBASE].name; + else + s = tokset[i].name; + if(s[0] == ' ') + s = s[1:]; + return s; +} + +# +# write out error comment +# +error(s: string) +{ + nerrors++; + fprint(stderr, "yacc: fatal error: %s, %s:%d\n", s, infile, lineno); + if(!fatfl) + return; + summary(); + raise "fail:error"; +} + +# +# set elements 0 through n-1 to c +# +aryfil(v: array of int, n, c: int) +{ + for(i:=0; i<n; i++) + v[i] = c; +} + +# +# compute an array with the beginnings of productions yielding given nonterminals +# The array pres points to these lists +# the array pyield has the lists: the total size is only NPROD+1 +# +cpres() +{ + pres = array[nnonter+1] of array of array of int; + curres := array[nprod] of array of int; + for(i:=0; i<=nnonter; i++) { + n := 0; + c := i+NTBASE; + fatfl = 0; # make undefined symbols nonfatal + for(j:=0; j<nprod; j++) + if(prdptr[j][0] == c) + curres[n++] = prdptr[j][1:]; + if(n == 0) + error("nonterminal " + nontrst[i].name + " not defined!"); + else{ + pres[i] = array[n] of array of int; + pres[i][0:] = curres[:n]; + } + } + fatfl = 1; + if(nerrors) { + summary(); + raise "fail:error"; + } +} + +dumppres() +{ + for(i := 0; i <= nnonter; i++){ + print("nonterm %d\n", i); + curres := pres[i]; + for(j := 0; j < len curres; j++){ + print("\tproduction %d:", j); + prd := curres[j]; + for(k := 0; k < len prd; k++) + print(" %d", prd[k]); + print("\n"); + } + } +} + +# +# mark nonterminals which derive the empty string +# also, look for nonterminals which don't derive any token strings +# +cempty() +{ + i, p, np: int; + prd: array of int; + + pempty = array[nnonter+1] of int; + + # first, use the array pempty to detect productions that can never be reduced + # set pempty to WHONOWS + aryfil(pempty, nnonter+1, WHOKNOWS); + + # now, look at productions, marking nonterminals which derive something +more: for(;;){ + for(i=0; i<nprod; i++) { + prd = prdptr[i]; + if(pempty[prd[0] - NTBASE]) + continue; + np = len prd - 1; + for(p = 1; p < np; p++) + if(prd[p] >= NTBASE && pempty[prd[p]-NTBASE] == WHOKNOWS) + break; + # production can be derived + if(p == np) { + pempty[prd[0]-NTBASE] = OK; + continue more; + } + } + break; + } + + # now, look at the nonterminals, to see if they are all OK + for(i=0; i<=nnonter; i++) { + # the added production rises or falls as the start symbol ... + if(i == 0) + continue; + if(pempty[i] != OK) { + fatfl = 0; + error("nonterminal " + nontrst[i].name + " never derives any token string"); + } + } + + if(nerrors) { + summary(); + raise "fail:error"; + } + + # now, compute the pempty array, to see which nonterminals derive the empty string + # set pempty to WHOKNOWS + aryfil(pempty, nnonter+1, WHOKNOWS); + + # loop as long as we keep finding empty nonterminals + +again: for(;;){ + next: for(i=1; i<nprod; i++) { + # not known to be empty + prd = prdptr[i]; + if(pempty[prd[0]-NTBASE] != WHOKNOWS) + continue; + np = len prd - 1; + for(p = 1; p < np; p++) + if(prd[p] < NTBASE || pempty[prd[p]-NTBASE] != EMPTY) + continue next; + + # we have a nontrivially empty nonterminal + pempty[prd[0]-NTBASE] = EMPTY; + # got one ... try for another + continue again; + } + return; + } +} + +dumpempty() +{ + for(i := 0; i <= nnonter; i++) + if(pempty[i] == EMPTY) + print("non-term %d %s matches empty\n", i, symnam(i+NTBASE)); +} + +# +# compute an array with the first of nonterminals +# +cpfir() +{ + s, n, p, np, ch: int; + curres: array of array of int; + prd: array of int; + + wsets = array[nnonter+WSETINC] of Wset; + pfirst = array[nnonter+1] of Lkset; + for(i:=0; i<=nnonter; i++) { + wsets[i].ws = mkset(); + pfirst[i] = mkset(); + curres = pres[i]; + n = len curres; + # initially fill the sets + for(s = 0; s < n; s++) { + prd = curres[s]; + np = len prd - 1; + for(p = 0; p < np; p++) { + ch = prd[p]; + if(ch < NTBASE) { + setbit(pfirst[i], ch); + break; + } + if(!pempty[ch-NTBASE]) + break; + } + } + } + + # now, reflect transitivity + changes := 1; + while(changes) { + changes = 0; + for(i=0; i<=nnonter; i++) { + curres = pres[i]; + n = len curres; + for(s = 0; s < n; s++) { + prd = curres[s]; + np = len prd - 1; + for(p = 0; p < np; p++) { + ch = prd[p] - NTBASE; + if(ch < 0) + break; + changes |= setunion(pfirst[i], pfirst[ch]); + if(!pempty[ch]) + break; + } + } + } + } + + if(!indebug) + return; + if(foutput != nil){ + for(i=0; i<=nnonter; i++) { + foutput.putc('\n'); + foutput.puts(nontrst[i].name); + foutput.puts(": "); + prlook(pfirst[i]); + foutput.putc(' '); + foutput.puts(string pempty[i]); + foutput.putc('\n'); + } + } +} + +# +# generate the states +# +stagen() +{ + # initialize + nstate = 0; + tstates = array[ntokens+1] of {* => 0}; # states generated by terminal gotos + ntstates = array[nnonter+1] of {* => 0};# states generated by nonterminal gotos + amem = array[ACTSIZE] of {* => 0}; + memp = 0; + + clset = mkset(); + pstate[0] = pstate[1] = 0; + aryfil(clset, tbitset, 0); + putitem(Pitem(prdptr[0], 0, 0, 0), clset); + tystate[0] = MUSTDO; + nstate = 1; + pstate[2] = pstate[1]; + + # + # now, the main state generation loop + # first pass generates all of the states + # later passes fix up lookahead + # could be sped up a lot by remembering + # results of the first pass rather than recomputing + # + first := 1; + for(more := 1; more; first = 0){ + more = 0; + for(i:=0; i<nstate; i++) { + if(tystate[i] != MUSTDO) + continue; + + tystate[i] = DONE; + aryfil(temp1, nnonter+1, 0); + + # take state i, close it, and do gotos + closure(i); + + # generate goto's + for(p:=0; p<cwp; p++) { + pi := wsets[p]; + if(pi.flag) + continue; + wsets[p].flag = 1; + c := pi.pitem.first; + if(c <= 1) { + if(pstate[i+1]-pstate[i] <= p) + tystate[i] = MUSTLOOKAHEAD; + continue; + } + # do a goto on c + putitem(wsets[p].pitem, wsets[p].ws); + for(q:=p+1; q<cwp; q++) { + # this item contributes to the goto + if(c == wsets[q].pitem.first) { + putitem(wsets[q].pitem, wsets[q].ws); + wsets[q].flag = 1; + } + } + + if(c < NTBASE) + state(c); # register new state + else + temp1[c-NTBASE] = state(c); + } + + if(gsdebug && foutput != nil) { + foutput.puts(string i + ": "); + for(j:=0; j<=nnonter; j++) + if(temp1[j]) + foutput.puts(nontrst[j].name + " " + string temp1[j] + ", "); + foutput.putc('\n'); + } + + if(first) + indgo[i] = apack(temp1[1:], nnonter-1) - 1; + + more++; + } + } +} + +# +# generate the closure of state i +# +closure(i: int) +{ + zzclose++; + + # first, copy kernel of state i to wsets + cwp = 0; + q := pstate[i+1]; + for(p:=pstate[i]; p<q; p++) { + wsets[cwp].pitem = statemem[p].pitem; + wsets[cwp].flag = 1; # this item must get closed + wsets[cwp].ws[0:] = statemem[p].look; + cwp++; + } + + # now, go through the loop, closing each item + work := 1; + while(work) { + work = 0; + for(u:=0; u<cwp; u++) { + if(wsets[u].flag == 0) + continue; + # dot is before c + c := wsets[u].pitem.first; + if(c < NTBASE) { + wsets[u].flag = 0; + # only interesting case is where . is before nonterminal + continue; + } + + # compute the lookahead + aryfil(clset, tbitset, 0); + + # find items involving c + for(v:=u; v<cwp; v++) { + if(wsets[v].flag != 1 + || wsets[v].pitem.first != c) + continue; + pi := wsets[v].pitem.prod; + ipi := wsets[v].pitem.off + 1; + + wsets[v].flag = 0; + if(nolook) + continue; + while((ch := pi[ipi++]) > 0) { + # terminal symbol + if(ch < NTBASE) { + setbit(clset, ch); + break; + } + # nonterminal symbol + setunion(clset, pfirst[ch-NTBASE]); + if(!pempty[ch-NTBASE]) + break; + } + if(ch <= 0) + setunion(clset, wsets[v].ws); + } + + # + # now loop over productions derived from c + # + curres := pres[c - NTBASE]; + n := len curres; + # initially fill the sets + nexts: for(s := 0; s < n; s++) { + prd := curres[s]; + # + # put these items into the closure + # is the item there + # + for(v=0; v<cwp; v++) { + # yes, it is there + if(wsets[v].pitem.off == 0 + && wsets[v].pitem.prod == prd) { + if(!nolook && setunion(wsets[v].ws, clset)) + wsets[v].flag = work = 1; + continue nexts; + } + } + + # not there; make a new entry + if(cwp >= len wsets){ + awsets := array[cwp + WSETINC] of Wset; + awsets[0:] = wsets; + wsets = awsets; + } + wsets[cwp].pitem = Pitem(prd, 0, prd[0], -prd[len prd-1]); + wsets[cwp].flag = 1; + wsets[cwp].ws = mkset(); + if(!nolook) { + work = 1; + wsets[cwp].ws[0:] = clset; + } + cwp++; + } + } + } + + # have computed closure; flags are reset; return + if(cldebug && foutput != nil) { + foutput.puts("\nState " + string i + ", nolook = " + string nolook + "\n"); + for(u:=0; u<cwp; u++) { + if(wsets[u].flag) + foutput.puts("flag set!\n"); + wsets[u].flag = 0; + foutput.putc('\t'); + foutput.puts(writem(wsets[u].pitem)); + prlook(wsets[u].ws); + foutput.putc('\n'); + } + } +} + +# +# sorts last state,and sees if it equals earlier ones. returns state number +# +state(c: int): int +{ + zzstate++; + p1 := pstate[nstate]; + p2 := pstate[nstate+1]; + if(p1 == p2) + return 0; # null state + # sort the items + k, l: int; + for(k = p1+1; k < p2; k++) { # make k the biggest + for(l = k; l > p1; l--) { + if(statemem[l].pitem.prodno < statemem[l-1].pitem.prodno + || statemem[l].pitem.prodno == statemem[l-1].pitem.prodno + && statemem[l].pitem.off < statemem[l-1].pitem.off) { + s := statemem[l]; + statemem[l] = statemem[l-1]; + statemem[l-1] = s; + }else + break; + } + } + + size1 := p2 - p1; # size of state + + if(c >= NTBASE) + i := ntstates[c-NTBASE]; + else + i = tstates[c]; + +look: for(; i != 0; i = mstates[i]) { + # get ith state + q1 := pstate[i]; + q2 := pstate[i+1]; + size2 := q2 - q1; + if(size1 != size2) + continue; + k = p1; + for(l = q1; l < q2; l++) { + if(statemem[l].pitem.prod != statemem[k].pitem.prod + || statemem[l].pitem.off != statemem[k].pitem.off) + continue look; + k++; + } + + # found it + pstate[nstate+1] = pstate[nstate]; # delete last state + # fix up lookaheads + if(nolook) + return i; + k = p1; + for(l = q1; l < q2; l++) { + if(setunion(statemem[l].look, statemem[k].look)) + tystate[i] = MUSTDO; + k++; + } + return i; + } + # state is new + zznewstate++; + if(nolook) + error("yacc state/nolook error"); + pstate[nstate+2] = p2; + if(nstate+1 >= NSTATES) + error("too many states"); + if(c >= NTBASE) { + mstates[nstate] = ntstates[c-NTBASE]; + ntstates[c-NTBASE] = nstate; + } else { + mstates[nstate] = tstates[c]; + tstates[c] = nstate; + } + tystate[nstate] = MUSTDO; + return nstate++; +} + +putitem(p: Pitem, set: Lkset) +{ + p.off++; + p.first = p.prod[p.off]; + + if(pidebug && foutput != nil) + foutput.puts("putitem(" + writem(p) + "), state " + string nstate + "\n"); + j := pstate[nstate+1]; + if(j >= len statemem){ + asm := array[j + STATEINC] of Item; + asm[0:] = statemem; + statemem = asm; + } + statemem[j].pitem = p; + if(!nolook){ + s := mkset(); + s[0:] = set; + statemem[j].look = s; + } + j++; + pstate[nstate+1] = j; +} + +# +# creates output string for item pointed to by pp +# +writem(pp: Pitem): string +{ + i: int; + p := pp.prod; + q := chcopy(nontrst[prdptr[pp.prodno][0]-NTBASE].name) + ": "; + npi := pp.off; + pi := p == prdptr[pp.prodno]; + for(;;){ + c := ' '; + if(pi == npi) + c = '.'; + q[len q] = c; + i = p[pi++]; + if(i <= 0) + break; + q += chcopy(symnam(i)); + } + + # an item calling for a reduction + i = p[npi]; + if(i < 0) + q += " (" + string -i + ")"; + return q; +} + +# +# pack state i from temp1 into amem +# +apack(p: array of int, n: int): int +{ + # + # we don't need to worry about checking because + # we will only look at entries known to be there... + # eliminate leading and trailing 0's + # + off := 0; + for(pp := 0; pp <= n && p[pp] == 0; pp++) + off--; + # no actions + if(pp > n) + return 0; + for(; n > pp && p[n] == 0; n--) + ; + p = p[pp:n+1]; + + # now, find a place for the elements from p to q, inclusive + r := len amem - len p; +nextk: for(rr := 0; rr <= r; rr++) { + qq := rr; + for(pp = 0; pp < len p; pp++) { + if(p[pp] != 0) + if(p[pp] != amem[qq] && amem[qq] != 0) + continue nextk; + qq++; + } + + # we have found an acceptable k + if(pkdebug && foutput != nil) + foutput.puts("off = " + string(off+rr) + ", k = " + string rr + "\n"); + qq = rr; + for(pp = 0; pp < len p; pp++) { + if(p[pp]) { + if(qq > memp) + memp = qq; + amem[qq] = p[pp]; + } + qq++; + } + if(pkdebug && foutput != nil) { + for(pp = 0; pp <= memp; pp += 10) { + foutput.putc('\t'); + for(qq = pp; qq <= pp+9; qq++) + foutput.puts(string amem[qq] + " "); + foutput.putc('\n'); + } + } + return off + rr; + } + error("no space in action table"); + return 0; +} + +# +# print the output for the states +# +output() +{ + c, u, v: int; + + ftable.puts("yyexca := array[] of {"); + if(fdebug != nil) + fdebug.puts("yystates = array [] of {\n"); + + noset := mkset(); + + # output the stuff for state i + for(i:=0; i<nstate; i++) { + nolook = tystate[i]!=MUSTLOOKAHEAD; + closure(i); + + # output actions + nolook = 1; + aryfil(temp1, ntokens+nnonter+1, 0); + for(u=0; u<cwp; u++) { + c = wsets[u].pitem.first; + if(c > 1 && c < NTBASE && temp1[c] == 0) { + for(v=u; v<cwp; v++) + if(c == wsets[v].pitem.first) + putitem(wsets[v].pitem, noset); + temp1[c] = state(c); + } else + if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) + temp1[c+ntokens] = amem[indgo[i]+c]; + } + if(i == 1) + temp1[1] = ACCEPTCODE; + + # now, we have the shifts; look at the reductions + lastred = 0; + for(u=0; u<cwp; u++) { + c = wsets[u].pitem.first; + + # reduction + if(c > 0) + continue; + lastred = -c; + us := wsets[u].ws; + for(k:=0; k<=ntokens; k++) { + if(!bitset(us, k)) + continue; + if(temp1[k] == 0) + temp1[k] = c; + else + if(temp1[k] < 0) { # reduce/reduce conflict + if(foutput != nil) + foutput.puts( + "\n" + string i + ": reduce/reduce conflict (red'ns " + + string -temp1[k] + " and " + string lastred + " ) on " + symnam(k)); + if(-temp1[k] > lastred) + temp1[k] = -lastred; + zzrrconf++; + } else + # potential shift/reduce conflict + precftn(lastred, k, i); + } + } + wract(i); + } + + if(fdebug != nil) + fdebug.puts("};\n"); + ftable.puts("};\n"); + ftable.puts("YYNPROD: con " + string nprod + ";\n"); + ftable.puts("YYPRIVATE: con " + string PRIVATE + ";\n"); + ftable.puts("yytoknames: array of string;\n"); + ftable.puts("yystates: array of string;\n"); + if(yydebug != nil){ + ftable.puts("include \"y.debug\";\n"); + ftable.puts("yydebug: con " + yydebug + ";\n"); + }else{ + ftable.puts("yydebug: con 0;\n"); + } +} + +# +# decide a shift/reduce conflict by precedence. +# r is a rule number, t a token number +# the conflict is in state s +# temp1[t] is changed to reflect the action +# +precftn(r, t, s: int) +{ + action: int; + + lp := levprd[r]; + lt := toklev[t]; + if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { + + # conflict + if(foutput != nil) + foutput.puts( + "\n" + string s + ": shift/reduce conflict (shift " + + string temp1[t] + "(" + string PLEVEL(lt) + "), red'n " + + string r + "(" + string PLEVEL(lp) + ")) on " + symnam(t)); + zzsrconf++; + return; + } + if(PLEVEL(lt) == PLEVEL(lp)) + action = ASSOC(lt); + else if(PLEVEL(lt) > PLEVEL(lp)) + action = RASC; # shift + else + action = LASC; # reduce + case action{ + BASC => # error action + temp1[t] = ERRCODE; + LASC => # reduce + temp1[t] = -r; + } +} + +# +# output state i +# temp1 has the actions, lastred the default +# +wract(i: int) +{ + p, p1: int; + + # find the best choice for lastred + lastred = 0; + ntimes := 0; + for(j:=0; j<=ntokens; j++) { + if(temp1[j] >= 0) + continue; + if(temp1[j]+lastred == 0) + continue; + # count the number of appearances of temp1[j] + count := 0; + tred := -temp1[j]; + levprd[tred] |= REDFLAG; + for(p=0; p<=ntokens; p++) + if(temp1[p]+tred == 0) + count++; + if(count > ntimes) { + lastred = tred; + ntimes = count; + } + } + + # + # for error recovery, arrange that, if there is a shift on the + # error recovery token, `error', that the default be the error action + # + if(temp1[2] > 0) + lastred = 0; + + # clear out entries in temp1 which equal lastred + # count entries in optst table + n := 0; + for(p=0; p<=ntokens; p++) { + p1 = temp1[p]; + if(p1+lastred == 0) + temp1[p] = p1 = 0; + if(p1 > 0 && p1 != ACCEPTCODE && p1 != ERRCODE) + n++; + } + + wrstate(i); + defact[i] = lastred; + flag := 0; + os := array[n*2] of int; + n = 0; + for(p=0; p<=ntokens; p++) { + if((p1=temp1[p]) != 0) { + if(p1 < 0) { + p1 = -p1; + } else if(p1 == ACCEPTCODE) { + p1 = -1; + } else if(p1 == ERRCODE) { + p1 = 0; + } else { + os[n++] = p; + os[n++] = p1; + zzacent++; + continue; + } + if(flag++ == 0) + ftable.puts("-1, " + string i + ",\n"); + ftable.puts("\t" + string p + ", " + string p1 + ",\n"); + zzexcp++; + } + } + if(flag) { + defact[i] = -2; + ftable.puts("\t-2, " + string lastred + ",\n"); + } + optst[i] = os; +} + +# +# writes state i +# +wrstate(i: int) +{ + j0, j1, u: int; + pp, qq: int; + + if(fdebug != nil) { + if(lastred) { + fdebug.puts(" nil, #" + string i + "\n"); + } else { + fdebug.puts(" \""); + qq = pstate[i+1]; + for(pp=pstate[i]; pp<qq; pp++){ + fdebug.puts(writem(statemem[pp].pitem)); + fdebug.puts("\\n"); + } + if(tystate[i] == MUSTLOOKAHEAD) + for(u = pstate[i+1] - pstate[i]; u < cwp; u++) + if(wsets[u].pitem.first < 0){ + fdebug.puts(writem(wsets[u].pitem)); + fdebug.puts("\\n"); + } + fdebug.puts("\", #" + string i + "/\n"); + } + } + if(foutput == nil) + return; + foutput.puts("\nstate " + string i + "\n"); + qq = pstate[i+1]; + for(pp=pstate[i]; pp<qq; pp++){ + foutput.putc('\t'); + foutput.puts(writem(statemem[pp].pitem)); + foutput.putc('\n'); + } + if(tystate[i] == MUSTLOOKAHEAD) { + # print out empty productions in closure + for(u = pstate[i+1] - pstate[i]; u < cwp; u++) { + if(wsets[u].pitem.first < 0) { + foutput.putc('\t'); + foutput.puts(writem(wsets[u].pitem)); + foutput.putc('\n'); + } + } + } + + # check for state equal to another + for(j0=0; j0<=ntokens; j0++) + if((j1=temp1[j0]) != 0) { + foutput.puts("\n\t" + symnam(j0) + " "); + # shift, error, or accept + if(j1 > 0) { + if(j1 == ACCEPTCODE) + foutput.puts("accept"); + else if(j1 == ERRCODE) + foutput.puts("error"); + else + foutput.puts("shift "+string j1); + } else + foutput.puts("reduce " + string -j1 + " (src line " + string rlines[-j1] + ")"); + } + + # output the final production + if(lastred) + foutput.puts("\n\t. reduce " + string lastred + " (src line " + string rlines[lastred] + ")\n\n"); + else + foutput.puts("\n\t. error\n\n"); + + # now, output nonterminal actions + j1 = ntokens; + for(j0 = 1; j0 <= nnonter; j0++) { + j1++; + if(temp1[j1]) + foutput.puts("\t" + symnam(j0+NTBASE) + " goto " + string temp1[j1] + "\n"); + } +} + +# +# output the gotos for the nontermninals +# +go2out() +{ + for(i := 1; i <= nnonter; i++) { + go2gen(i); + + # find the best one to make default + best := -1; + times := 0; + + # is j the most frequent + for(j := 0; j < nstate; j++) { + if(tystate[j] == 0) + continue; + if(tystate[j] == best) + continue; + + # is tystate[j] the most frequent + count := 0; + cbest := tystate[j]; + for(k := j; k < nstate; k++) + if(tystate[k] == cbest) + count++; + if(count > times) { + best = cbest; + times = count; + } + } + + # best is now the default entry + zzgobest += times-1; + n := 0; + for(j = 0; j < nstate; j++) + if(tystate[j] != 0 && tystate[j] != best) + n++; + goent := array[2*n+1] of int; + n = 0; + for(j = 0; j < nstate; j++) + if(tystate[j] != 0 && tystate[j] != best) { + goent[n++] = j; + goent[n++] = tystate[j]; + zzgoent++; + } + + # now, the default + if(best == -1) + best = 0; + zzgoent++; + goent[n] = best; + yypgo[i] = goent; + } +} + +# +# output the gotos for nonterminal c +# +go2gen(c: int) +{ + i, cc, p, q: int; + + # first, find nonterminals with gotos on c + aryfil(temp1, nnonter+1, 0); + temp1[c] = 1; + work := 1; + while(work) { + work = 0; + for(i=0; i<nprod; i++) { + # cc is a nonterminal with a goto on c + cc = prdptr[i][1]-NTBASE; + if(cc >= 0 && temp1[cc] != 0) { + # thus, the left side of production i does too + cc = prdptr[i][0]-NTBASE; + if(temp1[cc] == 0) { + work = 1; + temp1[cc] = 1; + } + } + } + } + + # now, we have temp1[c] = 1 if a goto on c in closure of cc + if(g2debug && foutput != nil) { + foutput.puts(nontrst[c].name); + foutput.puts(": gotos on "); + for(i=0; i<=nnonter; i++) + if(temp1[i]){ + foutput.puts(nontrst[i].name); + foutput.putc(' '); + } + foutput.putc('\n'); + } + + # now, go through and put gotos into tystate + aryfil(tystate, nstate, 0); + for(i=0; i<nstate; i++) { + q = pstate[i+1]; + for(p=pstate[i]; p<q; p++) { + if((cc = statemem[p].pitem.first) >= NTBASE) { + # goto on c is possible + if(temp1[cc-NTBASE]) { + tystate[i] = amem[indgo[i]+c]; + break; + } + } + } + } +} + +# +# in order to free up the mem and amem arrays for the optimizer, +# and still be able to output yyr1, etc., after the sizes of +# the action array is known, we hide the nonterminals +# derived by productions in levprd. +# +hideprod() +{ + j := 0; + levprd[0] = 0; + for(i:=1; i<nprod; i++) { + if(!(levprd[i] & REDFLAG)) { + j++; + if(foutput != nil) { + foutput.puts("Rule not reduced: "); + foutput.puts(writem(Pitem(prdptr[i], 0, 0, i))); + foutput.putc('\n'); + } + } + levprd[i] = prdptr[i][0] - NTBASE; + } + if(j) + print("%d rules never reduced\n", j); +} + +callopt() +{ + j, k, p, q: int; + v: array of int; + + pgo = array[nnonter+1] of int; + pgo[0] = 0; + maxoff = 0; + maxspr = 0; + for(i := 0; i < nstate; i++) { + k = 32000; + j = 0; + v = optst[i]; + q = len v; + for(p = 0; p < q; p += 2) { + if(v[p] > j) + j = v[p]; + if(v[p] < k) + k = v[p]; + } + # nontrivial situation + if(k <= j) { + # j is now the range +# j -= k; # call scj + if(k > maxoff) + maxoff = k; + } + tystate[i] = q + 2*j; + if(j > maxspr) + maxspr = j; + } + + # initialize ggreed table + ggreed = array[nnonter+1] of int; + for(i = 1; i <= nnonter; i++) { + ggreed[i] = 1; + j = 0; + + # minimum entry index is always 0 + v = yypgo[i]; + q = len v - 1; + for(p = 0; p < q ; p += 2) { + ggreed[i] += 2; + if(v[p] > j) + j = v[p]; + } + ggreed[i] = ggreed[i] + 2*j; + if(j > maxoff) + maxoff = j; + } + + # now, prepare to put the shift actions into the amem array + for(i = 0; i < ACTSIZE; i++) + amem[i] = 0; + maxa = 0; + for(i = 0; i < nstate; i++) { + if(tystate[i] == 0 && adb > 1) + ftable.puts("State " + string i + ": null\n"); + indgo[i] = YYFLAG1; + } + while((i = nxti()) != NOMORE) + if(i >= 0) + stin(i); + else + gin(-i); + + # print amem array + if(adb > 2) + for(p = 0; p <= maxa; p += 10) { + ftable.puts(string p + " "); + for(i = 0; i < 10; i++) + ftable.puts(string amem[p+i] + " "); + ftable.putc('\n'); + } + + aoutput(); + osummary(); +} + +# +# finds the next i +# +nxti(): int +{ + max := 0; + maxi := 0; + for(i := 1; i <= nnonter; i++) + if(ggreed[i] >= max) { + max = ggreed[i]; + maxi = -i; + } + for(i = 0; i < nstate; i++) + if(tystate[i] >= max) { + max = tystate[i]; + maxi = i; + } + if(max == 0) + return NOMORE; + return maxi; +} + +gin(i: int) +{ + s: int; + + # enter gotos on nonterminal i into array amem + ggreed[i] = 0; + + q := yypgo[i]; + nq := len q - 1; + # now, find amem place for it +nextgp: for(p := 0; p < ACTSIZE; p++) { + if(amem[p]) + continue; + for(r := 0; r < nq; r += 2) { + s = p + q[r] + 1; + if(s > maxa){ + maxa = s; + if(maxa >= ACTSIZE) + error("a array overflow"); + } + if(amem[s]) + continue nextgp; + } + # we have found amem spot + amem[p] = q[nq]; + if(p > maxa) + maxa = p; + for(r = 0; r < nq; r += 2) { + s = p + q[r] + 1; + amem[s] = q[r+1]; + } + pgo[i] = p; + if(adb > 1) + ftable.puts("Nonterminal " + string i + ", entry at " + string pgo[i] + "\n"); + return; + } + error("cannot place goto " + string i + "\n"); +} + +stin(i: int) +{ + s: int; + + tystate[i] = 0; + + # enter state i into the amem array + q := optst[i]; + nq := len q; + # find an acceptable place +nextn: for(n := -maxoff; n < ACTSIZE; n++) { + flag := 0; + for(r := 0; r < nq; r += 2) { + s = q[r] + n; + if(s < 0 || s > ACTSIZE) + continue nextn; + if(amem[s] == 0) + flag++; + else if(amem[s] != q[r+1]) + continue nextn; + } + + # check the position equals another only if the states are identical + for(j:=0; j<nstate; j++) { + if(indgo[j] == n) { + + # we have some disagreement + if(flag) + continue nextn; + if(nq == len optst[j]) { + + # states are equal + indgo[i] = n; + if(adb > 1) + ftable.puts("State " + string i + ": entry at " + + string n + " equals state " + string j + "\n"); + return; + } + + # we have some disagreement + continue nextn; + } + } + + for(r = 0; r < nq; r += 2) { + s = q[r] + n; + if(s > maxa) + maxa = s; + if(amem[s] != 0 && amem[s] != q[r+1]) + error("clobber of a array, pos'n " + string s + ", by " + string q[r+1] + ""); + amem[s] = q[r+1]; + } + indgo[i] = n; + if(adb > 1) + ftable.puts("State " + string i + ": entry at " + string indgo[i] + "\n"); + return; + } + error("Error; failure to place state " + string i + "\n"); +} + +# +# this version is for limbo +# write out the optimized parser +# +aoutput() +{ + ftable.puts("YYLAST:\tcon "+string (maxa+1)+";\n"); + arout("yyact", amem, maxa+1); + arout("yypact", indgo, nstate); + arout("yypgo", pgo, nnonter+1); +} + +# +# put out other arrays, copy the parsers +# +others() +{ + finput = bufio->open(parser, Bufio->OREAD); + if(finput == nil) + error("cannot find parser " + parser); + arout("yyr1", levprd, nprod); + aryfil(temp1, nprod, 0); + + # + #yyr2 is the number of rules for each production + # + for(i:=1; i<nprod; i++) + temp1[i] = len prdptr[i] - 2; + arout("yyr2", temp1, nprod); + + aryfil(temp1, nstate, -1000); + for(i=0; i<=ntokens; i++) + for(j:=tstates[i]; j!=0; j=mstates[j]) + temp1[j] = i; + for(i=0; i<=nnonter; i++) + for(j=ntstates[i]; j!=0; j=mstates[j]) + temp1[j] = -i; + arout("yychk", temp1, nstate); + arout("yydef", defact, nstate); + + # put out token translation tables + # table 1 has 0-256 + aryfil(temp1, 256, 0); + c := 0; + for(i=1; i<=ntokens; i++) { + j = tokset[i].value; + if(j >= 0 && j < 256) { + if(temp1[j]) { + print("yacc bug -- cant have 2 different Ts with same value\n"); + print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); + nerrors++; + } + temp1[j] = i; + if(j > c) + c = j; + } + } + for(i = 0; i <= c; i++) + if(temp1[i] == 0) + temp1[i] = YYLEXUNK; + arout("yytok1", temp1, c+1); + + # table 2 has PRIVATE-PRIVATE+256 + aryfil(temp1, 256, 0); + c = 0; + for(i=1; i<=ntokens; i++) { + j = tokset[i].value - PRIVATE; + if(j >= 0 && j < 256) { + if(temp1[j]) { + print("yacc bug -- cant have 2 different Ts with same value\n"); + print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); + nerrors++; + } + temp1[j] = i; + if(j > c) + c = j; + } + } + arout("yytok2", temp1, c+1); + + # table 3 has everything else + ftable.puts("yytok3 := array[] of {\n"); + c = 0; + for(i=1; i<=ntokens; i++) { + j = tokset[i].value; + if(j >= 0 && j < 256) + continue; + if(j >= PRIVATE && j < 256+PRIVATE) + continue; + + ftable.puts(sprint("%4d,%4d,", j, i)); + c++; + if(c%5 == 0) + ftable.putc('\n'); + } + ftable.puts(sprint("%4d\n};\n", 0)); + + # copy parser text + while((c=finput.getc()) != Bufio->EOF) { + if(c == '$') { + if((c = finput.getc()) != 'A') + ftable.putc('$'); + else { # copy actions + if(codehead == nil) + ftable.puts("* => ;"); + else + dumpcode(-1); + c = finput.getc(); + } + } + ftable.putc(c); + } + ftable.close(); +} + +arout(s: string, v: array of int, n: int) +{ + ftable.puts(s+" := array[] of {"); + for(i := 0; i < n; i++) { + if(i%10 == 0) + ftable.putc('\n'); + ftable.puts(sprint("%4d", v[i])); + ftable.putc(','); + } + ftable.puts("\n};\n"); +} + +# +# output the summary on y.output +# +summary() +{ + if(foutput != nil) { + foutput.puts("\n" + string ntokens + " terminals, " + string(nnonter + 1) + " nonterminals\n"); + foutput.puts("" + string nprod + " grammar rules, " + string nstate + "/" + string NSTATES + " states\n"); + foutput.puts("" + string zzsrconf + " shift/reduce, " + string zzrrconf + " reduce/reduce conflicts reported\n"); + foutput.puts("" + string len wsets + " working sets used\n"); + foutput.puts("memory: parser " + string memp + "/" + string ACTSIZE + "\n"); + foutput.puts(string (zzclose - 2*nstate) + " extra closures\n"); + foutput.puts(string zzacent + " shift entries, " + string zzexcp + " exceptions\n"); + foutput.puts(string zzgoent + " goto entries\n"); + foutput.puts(string zzgobest + " entries saved by goto default\n"); + } + if(zzsrconf != 0 || zzrrconf != 0) { + print("\nconflicts: "); + if(zzsrconf) + print("%d shift/reduce", zzsrconf); + if(zzsrconf && zzrrconf) + print(", "); + if(zzrrconf) + print("%d reduce/reduce", zzrrconf); + print("\n"); + } + if(fdefine != nil) + fdefine.close(); +} + +# +# write optimizer summary +# +osummary() +{ + if(foutput == nil) + return; + i := 0; + for(p := maxa; p >= 0; p--) + if(amem[p] == 0) + i++; + + foutput.puts("Optimizer space used: output " + string (maxa+1) + "/" + string ACTSIZE + "\n"); + foutput.puts(string(maxa+1) + " table entries, " + string i + " zero\n"); + foutput.puts("maximum spread: " + string maxspr + ", maximum offset: " + string maxoff + "\n"); +} + +# +# copies and protects "'s in q +# +chcopy(q: string): string +{ + s := ""; + j := 0; + for(i := 0; i < len q; i++) { + if(q[i] == '"') { + s += q[j:i] + "\\"; + j = i; + } + } + return s + q[j:i]; +} + +usage() +{ + fprint(stderr, "usage: yacc [-vdm] [-Dn] [-o output] [-s stem] file\n"); + raise "fail:usage"; +} + +bitset(set: Lkset, bit: int): int +{ + return set[bit>>5] & (1<<(bit&31)); +} + +setbit(set: Lkset, bit: int): int +{ + return set[bit>>5] |= (1<<(bit&31)); +} + +mkset(): Lkset +{ + return array[tbitset] of {* => 0}; +} + +# +# set a to the union of a and b +# return 1 if b is not a subset of a, 0 otherwise +# +setunion(a, b: array of int): int +{ + sub := 0; + for(i:=0; i<tbitset; i++) { + x := a[i]; + y := x | b[i]; + a[i] = y; + if(y != x) + sub = 1; + } + return sub; +} + +prlook(p: Lkset) +{ + if(p == nil){ + foutput.puts("\tNULL"); + return; + } + foutput.puts(" { "); + for(j:=0; j<=ntokens; j++){ + if(bitset(p, j)){ + foutput.puts(symnam(j)); + foutput.putc(' '); + } + } + foutput.putc('}'); +} + +# +# utility routines +# +isdigit(c: int): int +{ + return c >= '0' && c <= '9'; +} + +isword(c: int): int +{ + return c >= 16ra0 || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; +} + +mktemp(t: string): string +{ + return t; +} + +# +# arg processing +# +Arg.init(argv: list of string): ref Arg +{ + if(argv != nil) + argv = tl argv; + return ref Arg(argv, 0, ""); +} + +Arg.opt(arg: self ref Arg): int +{ + opts := arg.opts; + if(opts != ""){ + arg.c = opts[0]; + arg.opts = opts[1:]; + return arg.c; + } + argv := arg.argv; + if(argv == nil) + return arg.c = 0; + opts = hd argv; + if(len opts < 2 || opts[0] != '-') + return arg.c = 0; + arg.argv = tl argv; + if(opts == "--") + return arg.c = 0; + arg.opts = opts[2:]; + return arg.c = opts[1]; +} + +Arg.arg(arg: self ref Arg): string +{ + s := arg.opts; + arg.opts = ""; + if(s != "") + return s; + argv := arg.argv; + if(argv == nil) + return ""; + arg.argv = tl argv; + return hd argv; +} |
