diff options
Diffstat (limited to 'appl/cmd/mash/lex.b')
| -rw-r--r-- | appl/cmd/mash/lex.b | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/appl/cmd/mash/lex.b b/appl/cmd/mash/lex.b new file mode 100644 index 00000000..c9c3789b --- /dev/null +++ b/appl/cmd/mash/lex.b @@ -0,0 +1,547 @@ +# +# Lexical analyzer. +# + +lexdebug : con 0; + +# +# Import tokens from parser. +# +Land, +Lat, +Lbackq, +Lcaret, +Lcase, +Lcolon, +Lcolonmatch, +Lcons, +Ldefeq, +Lelse, +Leof, +Leq, +Leqeq, +Lerror, +Lfn, +Lfor, +Lgreat, +Lgreatgreat, +Lhd, +Lif, +Lin, +Llen, +Lless, +Llessgreat, +Lmatch, +Lmatched, +Lnot, +Lnoteq, +Loffcurly, +Loffparen, +Loncurly, +Lonparen, +Lpipe, +Lquote, +Lrescue, +Lsemi, +Ltl, +Lwhile, +Lword + : import Mashparse; + +KWSIZE: con 31; # keyword hashtable size +NCTYPE: con 128; # character class array size + +ALPHA, +NUMERIC, +ONE, +WS, +META + : con 1 << iota; + +keywords := array[] of +{ + ("case", Lcase), + ("else", Lelse), + ("fn", Lfn), + ("for", Lfor), + ("hd", Lhd), + ("if", Lif), + ("in", Lin), + ("len", Llen), + ("rescue", Lrescue), + ("tl", Ltl), + ("while", Lwhile) +}; + +ctype := array[NCTYPE] of +{ + 0 or ' ' or '\t' or '\n' or '\r' or '\v' => WS, + ':' or '#' or ';' or '&' or '|' or '^' or '$' or '=' or '@' + or '~' or '`'or '{' or '}' or '(' or ')' or '<' or '>' => ONE, + 'a' to 'z' or 'A' to 'Z' or '_' => ALPHA, + '0' to '9' => NUMERIC, + '*' or '[' or ']' or '?' => META, + * => 0 +}; + +keytab: ref HashTable; + +# +# Initialize hashtable. +# +initlex() +{ + keytab = hash->new(KWSIZE); + for (i := 0; i < len keywords; i++) { + (s, v) := keywords[i]; + keytab.insert(s, HashVal(v, 0.0, nil)); + } +} + +# +# Keyword value, or -1. +# +keyval(i: ref Item): int +{ + if (i.op != Iword) + return -1; + w := i.word; + if (w.flags & Wquoted) + return -1; + v := keytab.find(w.text); + if (v == nil) + return -1; + return v.i; +} + +# +# Attach a source file to an environment. +# +Env.fopen(e: self ref Env, fd: ref Sys->FD, s: string) +{ + in := bufio->fopen(fd, Bufio->OREAD); + if (in == nil) + e.error(sys->sprint("could not fopen %s: %r\n", s)); + e.file = ref File(in, s, 1, 0); +} + +# +# Attach a source string to an environment. +# +Env.sopen(e: self ref Env, s: string) +{ + in := bufio->sopen(s); + if (in == nil) + e.error(sys->sprint("Bufio->sopen failed: %r\n")); + e.file = ref File(in, "<string>", 1, 0); +} + +# +# Close source file. +# +fclose(e: ref Env, c: int) +{ + if (c == Bufio->ERROR) + readerror(e, e.file); + e.file.in.close(); + e.file = nil; +} + +# +# Character class routines. +# + +isalpha(c: int): int +{ + return c >= NCTYPE || (c >= 0 && (ctype[c] & ALPHA) != 0); +} + +isalnum(c: int): int +{ + return c >= NCTYPE || (c >= 0 && (ctype[c] & (ALPHA | NUMERIC)) != 0); +} + +isdigit(c: int): int +{ + return c >= 0 && c < NCTYPE && (ctype[c] & NUMERIC) != 0; +} + +isquote(c: int): int +{ + return c < NCTYPE && (c < 0 || (ctype[c] & (ONE | WS | META)) != 0); +} + +isspace(c: int): int +{ + return c >= 0 && c < NCTYPE && (ctype[c] & WS) != 0; +} + +isterm(c: int): int +{ + return c < NCTYPE && (c < 0 || (ctype[c] & (ONE | WS)) != 0); +} + +# +# Test for an identifier. +# +ident(s: string): int +{ + if (s == nil || !isalpha(s[0])) + return 0; + n := len s; + for (x := 1; x < n; x++) { + if (!isalnum(s[x])) + return 0; + } + return 1; +} + +# +# Quote text. +# +enquote(s: string): string +{ + r := "'"; + j := 1; + n := len s; + for (i := 0; i < n; i++) { + c := s[i]; + if (c == '\'' || c == '\\') + r[j++] = '\\'; + r[j++] = c; + } + r[j] = '\''; + return r; +} + +# +# Quote text if needed. +# +quote(s: string): string +{ + n := len s; + for (i := 0; i < n; i++) { + if (isquote(s[i])) + return enquote(s); + } + return s; +} + +# +# Test for single word and identifier. +# +Item.sword(i: self ref Item, e: ref Env): ref Item +{ + if (i.op == Iword && ident(i.word.text)) + return i; + e.report("malformed identifier: " + i.text()); + return nil; +} + +readerror(e: ref Env, f: ref File) +{ + sys->fprint(e.stderr, "error reading %s: %r\n", f.name); +} + +where(e: ref Env): string +{ + if ((e.flags & EInter) || e.file == nil) + return nil; + return e.file.name + ":" + string e.file.line + ": "; +} + +# +# Suck input (on error). +# +Env.suck(e: self ref Env) +{ + if (e.file == nil) + return; + in := e.file.in; + while ((c := in.getc()) >= 0 && c != '\n') + ; +} + +# +# Lexical analyzer. +# +Env.lex(e: self ref Env, yylval: ref Mashparse->YYSTYPE): int +{ + i, r: ref Item; +reader: + for (;;) { + if (e.file == nil) + return -1; + f := e.file; + in := f.in; + while (isspace(c := in.getc())) { + if (c == '\n') + f.line++; + } + if (c < 0) { + fclose(e, c); + return Leof; + } + case c { + ':' => + if ((d := in.getc()) == ':') + return Lcons; + if (d == '=') + return Ldefeq; + if (d == '~') + return Lcolonmatch; + if (d >= 0) + in.ungetc(); + return Lcolon; + '#' => + for (;;) { + if ((c = in.getc()) < 0) { + fclose(e, c); + return Leof; + } + if (c == '\n') { + f.line++; + continue reader; + } + } + ';' => + return Lsemi; + '&' => + return Land; + '|' => + return Lpipe; + '^' => + return Lcaret; + '@' => + return Lat; + '!' => + if ((d := in.getc()) == '=') + return Lnoteq; + if (d >= 0) + in.ungetc(); + return Lnot; + '~' => + return Lmatch; + '=' => + if ((d := in.getc()) == '>') + return Lmatched; + if (d == '=') + return Leqeq; + if (d >= 0) + in.ungetc(); + return Leq; + '`' => + return Lbackq; + '"' => + return Lquote; + '{' => + return Loncurly; + '}' => + return Loffcurly; + '(' => + return Lonparen; + ')' => + return Loffparen; + '<' => + if ((d := in.getc()) == '>') + return Llessgreat; + if (d >= 0) + in.ungetc(); + return Lless; + '>' => + if ((d := in.getc()) == '>') + return Lgreatgreat; + if (d >= 0) + in.ungetc(); + return Lgreat; + '\\' => + if ((d := in.getc()) == '\n') { + f.line++; + continue reader; + } + if (d >= 0) + in.ungetc(); + } + # Loop over "carets for free". + for (;;) { + if (c == '$') + (i, c) = getdollar(f); + else + (i, c) = getword(e, f, c); + if (i == nil) + return Lerror; + if (isterm(c) && c != '$') + break; + if (r != nil) + r = ref Item(Iicaret, nil, r, i, nil, nil); + else + r = i; + } + if (c >= 0) + in.ungetc(); + if (r != nil) + yylval.item = ref Item(Iicaret, nil, r, i, nil, nil); + else if ((c = keyval(i)) >= 0) + return c; + else + yylval.item = i; + return Lword; + } +} + +# +# Get $n or $word. +# +getdollar(f: ref File): (ref Item, int) +{ + s: string; + in := f.in; + l := f.line; + o := Idollar; + if (isdigit(c := in.getc())) { + s[0] = c; + n := 1; + while (isdigit(c = in.getc())) + s[n++] = c; + o = Imatch; + } else { + if (c == '"') { + o = Idollarq; + c = in.getc(); + } + if (isalpha(c)) { + s[0] = c; + n := 1; + while (isalnum(c = in.getc())) + s[n++] = c; + } else { + if (o == Idollar) + s = "$"; + else + s = "$\""; + o = Iword; + } + } + return (ref Item(o, ref Word(s, 0, Src(l, f.name)), nil, nil, nil, nil), c); +} + +# +# Get word with quoting. +# +getword(e: ref Env, f: ref File, c: int): (ref Item, int) +{ + s: string; + in := f.in; + l := f.line; + wf := 0; + n := 0; + if (c == '\'') { + wf = Wquoted; + collect: + while ((c = in.getc()) >= 0) { + case c { + '\'' => + c = in.getc(); + break collect; + '\\' => + c = in.getc(); + if (c != '\'' && c != '\\') { + if (c == '\n') + continue collect; + if (c >= 0) + in.ungetc(); + c = '\\'; + } + '\n' => + f.line++; + e.report("newline in quoted word"); + return (nil, 0); + } + s[n++] = c; + } + } else { + do { + case c { + '*' or '[' or '?' => + wf |= Wexpand; + } + s[n++] = c; + } while (!isterm(c = in.getc()) && c != '\''); + } + if (lexdebug && s == "exit") + exit; + return (ref Item(Iword, ref Word(s, wf, Src(l, f.name)), nil, nil, nil, nil), c); +} + +# +# Get a line, mapping escape newline to space newline. +# +getline(in: ref Bufio->Iobuf): string +{ + if (inchan != nil) { + alt { + b := <-inchan => + if (inchan == nil) + return nil; + s := string b; + n := len s; + if (n > 1) { + while (s[n - 2] == '\\' && s[n - 1] == '\n') { + s[n - 2] = ' '; + s[n - 1] = ' '; + prprompt(1); + b = <-inchan; + if (b == nil) + break; + s += string b; + n = len s; + } + } + return s; + b := <-servechan => + s := string b; + sys->print("%s", s); + return s; + } + } else { + s := in.gets('\n'); + if (s == nil) + return nil; + n := len s; + if (n > 1) { + while (s[n - 2] == '\\' && s[n - 1] == '\n') { + s[n - 2] = ' '; + s[n - 1] = ' '; + prprompt(1); + t := in.gets('\n'); + if (t == nil) + break; + s += t; + n = len s; + } + } + return s; + } +} + +# +# Interactive shell loop. +# +Env.interactive(e: self ref Env, fd: ref Sys->FD) +{ + in := bufio->fopen(fd, Sys->OREAD); + if (in == nil) + e.error(sys->sprint("could not fopen stdin: %r\n")); + e.flags |= EInter; + for (;;) { + prprompt(0); + if (startserve) + e.serve(); + if ((s := getline(in)) == nil) + exitmash(); + e.sopen(s); + parse->parse(e); + if (histchan != nil) + histchan <-= array of byte s; + } +} |
