diff options
| author | Charles.Forsyth <devnull@localhost> | 2006-12-22 21:39:35 +0000 |
|---|---|---|
| committer | Charles.Forsyth <devnull@localhost> | 2006-12-22 21:39:35 +0000 |
| commit | 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a (patch) | |
| tree | c6e220ba61db3a6ea4052e6841296d829654e664 /utils/acid/lex.c | |
| parent | 46439007cf417cbd9ac8049bb4122c890097a0fa (diff) | |
20060303
Diffstat (limited to 'utils/acid/lex.c')
| -rw-r--r-- | utils/acid/lex.c | 636 |
1 files changed, 636 insertions, 0 deletions
diff --git a/utils/acid/lex.c b/utils/acid/lex.c new file mode 100644 index 00000000..364bcae9 --- /dev/null +++ b/utils/acid/lex.c @@ -0,0 +1,636 @@ +#include <lib9.h> +#include <bio.h> +#include <ctype.h> +#include "mach.h" +#define Extern extern +#include "acid.h" +#include "y.tab.h" + +struct keywd +{ + char *name; + int terminal; +} +keywds[] = +{ + "do", Tdo, + "if", Tif, + "then", Tthen, + "else", Telse, + "while", Twhile, + "loop", Tloop, + "head", Thead, + "tail", Ttail, + "append", Tappend, + "defn", Tfn, + "return", Tret, + "local", Tlocal, + "aggr", Tcomplex, + "union", Tcomplex, + "adt", Tcomplex, + "complex", Tcomplex, + "delete", Tdelete, + "whatis", Twhat, + "eval", Teval, + 0, 0 +}; + +char cmap[256] = +{ + 0,0,0,0,0,0,0,0, /* 0-7 */ + 0,0,0,0,0,0,0,0, /* 8-15 */ + 0,0,0,0,0,0,0,0, /* 16-23 */ + 0,0,0,0,0,0,0,0, /* 24-31 */ + 0,0, /* 32-33 */ + '"'+1, /* 34 ['"'] '"'+1, */ + 0,0,0,0,0, /* 35-39 */ + 0,0,0,0,0,0,0,0, /* 40-47 */ + '\0'+1, /* 48 ['0'] '\0'+1, */ + 0,0,0,0,0,0,0, /* 49-55 */ + 0,0,0,0,0,0,0,0, /* 56-63 */ + 0,0,0,0,0,0,0,0, /* 64-71 */ + 0,0,0,0,0,0,0,0, /* 72-79 */ + 0,0,0,0,0,0,0,0, /* 80-87 */ + 0,0,0,0, /* 88-91 */ + '\\'+1, /* 92 ['\\'] '\\'+1, */ + 0,0,0,0, /* 93-96 */ + '\a'+1, /* 97 ['a'] '\a'+1, */ + '\b'+1, /* 98 ['b'] '\b'+1, */ + 0,0,0, /* 99-101 */ + '\f'+1, /* 102 ['f'] '\f'+1, */ + 0,0,0,0,0,0,0, /* 103-109 */ + '\n'+1, /* 110 ['n'] '\n'+1, */ + 0,0,0, /* 111-113 */ + '\r'+1, /* 114 ['r'] '\r'+1, */ + 0, /* 115 */ + '\t'+1, /* 116 ['t'] '\t'+1, */ + 0, /* 117 */ + '\v'+1, /* 118 ['v'] '\v'+1, */ +}; + +void +kinit(void) +{ + int i; + + for(i = 0; keywds[i].name; i++) + enter(keywds[i].name, keywds[i].terminal); +} + +typedef struct IOstack IOstack; +struct IOstack +{ + char *name; + int line; + char *text; + char *ip; + Biobuf *fin; + IOstack *prev; +}; +IOstack *lexio; + +void +pushfile(char *file) +{ + Biobuf *b; + IOstack *io; + char buf[512]; + extern char *acidlib; + + if(file){ + b = Bopen(file, OREAD); + if(b == nil && strchr(file, '/') == 0){ + snprint(buf, sizeof(buf)-1, "%s/%s", acidlib, file); + b = Bopen(buf, OREAD); + } + } + else{ + b = gmalloc(sizeof(Biobuf)); + if (Binit(b, 0, OREAD) == Beof) { + free(b); + b = 0; + } + file = "<stdin>"; + } + + if(b == 0) + error("pushfile: %s: %r", file); + + io = gmalloc(sizeof(IOstack)); + if(io == 0) + fatal("no memory"); + io->name = strdup(file); + if(io->name == 0) + fatal("no memory"); + io->line = line; + line = 1; + io->text = 0; + io->fin = b; + io->prev = lexio; + lexio = io; +} + +void +pushstr(Node *s) +{ + IOstack *io; + + io = gmalloc(sizeof(IOstack)); + if(io == 0) + fatal("no memory"); + io->line = line; + line = 1; + io->name = strdup("<string>"); + if(io->name == 0) + fatal("no memory"); + io->line = line; + line = 1; + io->text = strdup(s->nstore.u0.sstring->string); + if(io->text == 0) + fatal("no memory"); + io->ip = io->text; + io->fin = 0; + io->prev = lexio; + lexio = io; +} + +void +restartio(void) +{ + Bflush(lexio->fin); + Binit(lexio->fin, 0, OREAD); +} + +int +popio(void) +{ + IOstack *s; + + if(lexio == 0) + return 0; + + if(lexio->prev == 0){ + if(lexio->fin) + restartio(); + return 0; + } + + if(lexio->fin) + Bterm(lexio->fin); + else + free(lexio->text); + free(lexio->name); + line = lexio->line; + s = lexio; + lexio = s->prev; + free(s); + return 1; +} + +int +Lfmt(Fmt *f) +{ + int i; + char buf[1024]; + IOstack *e; + + e = lexio; + if(e) { + i = sprint(buf, "%s:%d", e->name, line); + while(e->prev) { + e = e->prev; + if(initialising && e->prev == 0) + break; + i += sprint(buf+i, " [%s:%d]", e->name, e->line); + } + } else + sprint(buf, "no file:0"); + return fmtstrcpy(f, buf); +} + +void +unlexc(int s) +{ + if(s == '\n') + line--; + + if(lexio->fin) + Bungetc(lexio->fin); + else + lexio->ip--; +} + +int +lexc(void) +{ + int c; + + if(lexio->fin) { + c = Bgetc(lexio->fin); + if(gotint) + error("interrupt"); + return c; + } + + c = *lexio->ip++; + if(c == 0) + return -1; + return c; +} + +int +escchar(char c) +{ + int n; + char buf[Strsize]; + + if(c >= '0' && c <= '9') { + n = 1; + buf[0] = c; + for(;;) { + c = lexc(); + if(c == Eof) + error("%d: <eof> in escape sequence", line); + if(strchr("0123456789xX", c) == 0) { + unlexc(c); + break; + } + buf[n++] = c; + } + buf[n] = '\0'; + return strtol(buf, 0, 0); + } + + n = cmap[c]; + if(n == 0) + return c; + return n-1; +} + +void +eatstring(void) +{ + int esc, c, cnt; + char buf[Strsize]; + + esc = 0; + for(cnt = 0;;) { + c = lexc(); + switch(c) { + case Eof: + error("%d: <eof> in string constant", line); + + case '\r': + case '\n': + error("newline in string constant"); + goto done; + + case '\\': + if(esc) + goto Default; + esc = 1; + break; + + case '"': + if(esc == 0) + goto done; + + /* Fall through */ + default: + Default: + if(esc) { + c = escchar(c); + esc = 0; + } + buf[cnt++] = c; + break; + } + if(cnt >= Strsize) + error("string token too long"); + } +done: + buf[cnt] = '\0'; + yylval.string = strnode(buf); +} + +void +eatnl(void) +{ + int c; + + line++; + for(;;) { + c = lexc(); + if(c == Eof) + error("eof in comment"); + if(c == '\n') + return; + } +} + +int +yylex(void) +{ + int c; + extern char vfmt[]; + +loop: + Bflush(bout); + c = lexc(); + switch(c) { + case Eof: + if(gotint) { + gotint = 0; + stacked = 0; + Bprint(bout, "\nacid: "); + goto loop; + } + return Eof; + + case '"': + eatstring(); + return Tstring; + + case ' ': + case '\r': + case '\t': + goto loop; + + case '\n': + line++; + if(interactive == 0) + goto loop; + if(stacked) { + print("\t"); + goto loop; + } + return ';'; + + case '.': + c = lexc(); + unlexc(c); + if(isdigit(c)) + return numsym('.'); + + return '.'; + + case '(': + case ')': + case '[': + case ']': + case ';': + case ':': + case ',': + case '~': + case '?': + case '*': + case '@': + case '^': + case '%': + return c; + case '{': + stacked++; + return c; + case '}': + stacked--; + return c; + + case '\\': + c = lexc(); + if(strchr(vfmt, c) == 0) { + unlexc(c); + return '\\'; + } + yylval.ival = c; + return Tfmt; + + case '!': + c = lexc(); + if(c == '=') + return Tneq; + unlexc(c); + return '!'; + + case '+': + c = lexc(); + if(c == '+') + return Tinc; + unlexc(c); + return '+'; + + case '/': + c = lexc(); + if(c == '/') { + eatnl(); + goto loop; + } + unlexc(c); + return '/'; + + case '\'': + c = lexc(); + if(c == '\\') + yylval.ival = escchar(lexc()); + else + yylval.ival = c; + c = lexc(); + if(c != '\'') { + error("missing '"); + unlexc(c); + } + return Tconst; + + case '&': + c = lexc(); + if(c == '&') + return Tandand; + unlexc(c); + return '&'; + + case '=': + c = lexc(); + if(c == '=') + return Teq; + unlexc(c); + return '='; + + case '|': + c = lexc(); + if(c == '|') + return Toror; + unlexc(c); + return '|'; + + case '<': + c = lexc(); + if(c == '=') + return Tleq; + if(c == '<') + return Tlsh; + unlexc(c); + return '<'; + + case '>': + c = lexc(); + if(c == '=') + return Tgeq; + if(c == '>') + return Trsh; + unlexc(c); + return '>'; + + case '-': + c = lexc(); + + if(c == '>') + return Tindir; + + if(c == '-') + return Tdec; + unlexc(c); + return '-'; + + default: + return numsym(c); + } +} + +int +numsym(char first) +{ + int c, isbin, isfloat, ishex; + char *sel, *p; + Lsym *s; + + symbol[0] = first; + p = symbol; + + ishex = 0; + isbin = 0; + isfloat = 0; + if(first == '.') + isfloat = 1; + + if(isdigit(*p++) || isfloat) { + for(;;) { + c = lexc(); + if(c < 0) + error("%d: <eof> eating symbols", line); + + if(c == '\r') + continue; + if(c == '\n') + line++; + sel = "01234567890.xb"; + if(ishex) + sel = "01234567890abcdefABCDEF"; + else if(isbin) + sel = "01"; + else if(isfloat) + sel = "01234567890eE-+"; + + if(strchr(sel, c) == 0) { + unlexc(c); + break; + } + if(c == '.') + isfloat = 1; + if(!isbin && c == 'x') + ishex = 1; + if(!ishex && c == 'b') + isbin = 1; + *p++ = c; + } + *p = '\0'; + if(isfloat) { + yylval.fval = atof(symbol); + return Tfconst; + } + + if(isbin) + yylval.ival = strtoul(symbol+2, 0, 2); + else + yylval.ival = strtoul(symbol, 0, 0); + return Tconst; + } + + for(;;) { + c = lexc(); + if(c < 0) + error("%d <eof> eating symbols", line); + if(c == '\n') + line++; + if(c != '_' && c != '$' && c <= '~' && !isalnum(c)) { /* checking against ~ lets UTF names through */ + unlexc(c); + break; + } + *p++ = c; + } + + *p = '\0'; + + s = look(symbol); + if(s == 0) + s = enter(symbol, Tid); + + yylval.sym = s; + return s->lexval; +} + +Lsym* +enter(char *name, int t) +{ + Lsym *s; + ulong h; + char *p; + Value *v; + + h = 0; + for(p = name; *p; p++) + h = h*3 + *p; + h %= Hashsize; + + s = gmalloc(sizeof(Lsym)); + s->name = strdup(name); + + s->hash = hash[h]; + hash[h] = s; + s->lexval = t; + + v = gmalloc(sizeof(Value)); + s->v = v; + + v->vstore.fmt = 'X'; + v->type = TINT; + + return s; +} + +Lsym* +look(char *name) +{ + Lsym *s; + ulong h; + char *p; + + h = 0; + for(p = name; *p; p++) + h = h*3 + *p; + h %= Hashsize; + + for(s = hash[h]; s; s = s->hash) + if(strcmp(name, s->name) == 0) + return s; + return 0; +} + +Lsym* +mkvar(char *s) +{ + Lsym *l; + + l = look(s); + if(l == 0) + l = enter(s, Tid); + return l; +} |
