diff options
| author | Charles.Forsyth <devnull@localhost> | 2006-12-22 21:39:35 +0000 |
|---|---|---|
| committer | Charles.Forsyth <devnull@localhost> | 2006-12-22 21:39:35 +0000 |
| commit | 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a (patch) | |
| tree | c6e220ba61db3a6ea4052e6841296d829654e664 /utils/rcsh/lex.c | |
| parent | 46439007cf417cbd9ac8049bb4122c890097a0fa (diff) | |
20060303
Diffstat (limited to 'utils/rcsh/lex.c')
| -rw-r--r-- | utils/rcsh/lex.c | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/utils/rcsh/lex.c b/utils/rcsh/lex.c new file mode 100644 index 00000000..d31a94d8 --- /dev/null +++ b/utils/rcsh/lex.c @@ -0,0 +1,398 @@ +#include "rc.h" +#include "y.tab.h" + +#define NTOK 8192 + +int getnext(void); + +int future=EOF; +int doprompt=1; +int inquote; +int nerror; +char *promptstr; + +char tok[NTOK]; + +int lastdol; /* was the last token read '$' or '$#' or '"'? */ +int lastword; /* was the last token read a word or compound word terminator? */ +int lastc; + +void +kinit(void) +{ + kenter(FOR, "for"); + kenter(IN, "in"); + kenter(WHILE, "while"); + kenter(IF, "if"); + kenter(NOT, "not"); + kenter(TWIDDLE, "~"); + kenter(BANG, "!"); + kenter(SUBSHELL, "@"); + kenter(SWITCH, "switch"); + kenter(FN, "fn"); +} + +int +wordchr(int c) +{ + return !strchr("\n \t\r#;&|^$=`'{}()<>", c) && c!=EOF; +} + +int +idchr(int c) +{ + /* + * Formerly: + * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9' + * || c=='_' || c=='*'; + */ + return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); +} + +/* + * Look ahead in the input stream + */ +int +nextc(void) +{ + if(future==EOF) + future=getnext(); + return future; +} + +/* + * Consume the lookahead character. + */ +int +advance(void) +{ + int c=nextc(); + lastc=future; + future=EOF; + return c; +} + +/* + * read a character from the input stream + */ +int +getnext(void) +{ + register int c; + static peekc=EOF; + if(peekc!=EOF){ + c=peekc; + peekc=EOF; + return c; + } + if(runq->eof) return EOF; + if(doprompt) + pprompt(); + c=rchr(runq->cmdfd); + if(!inquote && c=='\\'){ + c=rchr(runq->cmdfd); + if(c=='\n'){ + doprompt=1; + c=' '; + } + else{ + peekc=c; + c='\\'; + } + } + doprompt=doprompt || c=='\n' || c==EOF; + if(c==EOF) runq->eof++; + else if(flag['V'] || ndot>=2 && flag['v']) + pchr(err, c); + return c; +} + +void +pprompt(void) +{ + Var *prompt; + + if(runq->iflag){ + pstr(err, promptstr); + flush(err); + prompt=vlook("prompt"); + if(prompt->val && prompt->val->next) + promptstr=prompt->val->next->word; + else + promptstr="\t"; + } + runq->lineno++; + doprompt=0; +} + +void +skipwhite(void) +{ + int c; + for(;;){ + c=nextc(); + if(c=='#'){ /* Why did this used to be if(!inquote && c=='#') ?? */ + for(;;){ + c=nextc(); + if(c=='\n' || c==EOF) break; + advance(); + } + } + if(c==' ' || c=='\t' || c=='\r') advance(); + else return; + } +} + +void +skipnl(void) +{ + int c; + for(;;){ + skipwhite(); + c=nextc(); + if(c!='\n') return; + advance(); + } +} + +int +nextis(int c) +{ + if(nextc()==c){ + advance(); + return 1; + } + return 0; +} + +char * +addtok(char *p, int val) +{ + if(p==0) return 0; + if(p==&tok[NTOK]){ + *p=0; + yyerror("token buffer too short"); + return 0; + } + *p++=val; + return p; +} + +char * +addutf(char *p, int c) +{ + p=addtok(p, c); + if(twobyte(c)) /* 2-byte escape */ + return addtok(p, advance()); + if(threebyte(c)){ /* 3-byte escape */ + p=addtok(p, advance()); + return addtok(p, advance()); + } + return p; +} + +int +yylex(void) +{ + int c, d=nextc(); + char *w=tok; + Tree *t; + + yylval.tree=0; + /* + * Embarassing sneakiness: if the last token read was a quoted or unquoted + * WORD then we alter the meaning of what follows. If the next character + * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise, + * if the next character is the first character of a simple or compound word, + * we insert a `^' before it. + */ + if(lastword){ + lastword=0; + if(d=='('){ + advance(); + strcpy(tok, "( [SUB]"); + return SUB; + } + if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ + strcpy(tok, "^"); + return '^'; + } + } + inquote=0; + skipwhite(); + switch(c=advance()){ + case EOF: + lastdol=0; + strcpy(tok, "EOF"); + return EOF; + case '$': + lastdol=1; + if(nextis('#')){ + strcpy(tok, "$#"); + return COUNT; + } + if(nextis('"')){ + strcpy(tok, "$\""); + return '"'; + } + strcpy(tok, "$"); + return '$'; + case '&': + lastdol=0; + if(nextis('&')){ + skipnl(); + strcpy(tok, "&&"); + return ANDAND; + } + strcpy(tok, "&"); + return '&'; + case '|': + lastdol=0; + if(nextis(c)){ + skipnl(); + strcpy(tok, "||"); + return OROR; + } + case '<': + case '>': + lastdol=0; + /* + * funny redirection tokens: + * redir: arrow | arrow '[' fd ']' + * arrow: '<' | '<<' | '>' | '>>' | '|' + * fd: digit | digit '=' | digit '=' digit + * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' + * some possibilities are nonsensical and get a message. + */ + *w++=c; + t=newtree(); + switch(c){ + case '|': + t->type=PIPE; + t->fd0=1; + t->fd1=0; + break; + case '>': + t->type=REDIR; + if(nextis(c)){ + t->rtype=APPEND; + *w++=c; + } + else t->rtype=WRITE; + t->fd0=1; + break; + case '<': + t->type=REDIR; + if(nextis(c)){ + t->rtype=HERE; + *w++=c; + } + else t->rtype=READ; + t->fd0=0; + break; + } + if(nextis('[')){ + *w++='['; + c=advance(); + *w++=c; + if(c<'0' || '9'<c){ + RedirErr: + *w=0; + yyerror(t->type==PIPE?"pipe syntax" + :"redirection syntax"); + return EOF; + } + t->fd0=0; + do{ + t->fd0=t->fd0*10+c-'0'; + *w++=c; + c=advance(); + }while('0'<=c && c<='9'); + if(c=='='){ + *w++='='; + if(t->type==REDIR) + t->type=DUP; + c=advance(); + if('0'<=c && c<='9'){ + t->rtype=DUPFD; + t->fd1=t->fd0; + t->fd0=0; + do{ + t->fd0=t->fd0*10+c-'0'; + *w++=c; + c=advance(); + }while('0'<=c && c<='9'); + } + else{ + if(t->type==PIPE) goto RedirErr; + t->rtype=CLOSE; + } + } + if(c!=']' || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND)) + goto RedirErr; + *w++=']'; + } + *w='\0'; + yylval.tree=t; + if(t->type==PIPE) skipnl(); + return t->type; + case '\'': + lastdol=0; + lastword=1; + inquote=1; + for(;;){ + c=advance(); + if(c==EOF) break; + if(c=='\''){ + if(nextc()!='\'') + break; + advance(); + } + w=addutf(w, c); + } + if(w!=0) *w='\0'; + t=token(tok, WORD); + t->quoted=1; + yylval.tree=t; + return t->type; + } + if(!wordchr(c)){ + lastdol=0; + tok[0]=c; + tok[1]='\0'; + return c; + } + for(;;){ + /* next line should have (char)c==GLOB, but ken's compiler is broken */ + if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) + w=addtok(w, GLOB); + w=addutf(w, c); + c=nextc(); + if(lastdol?!idchr(c):!wordchr(c)) break; + advance(); + } +Out: + lastword=1; + lastdol=0; + if(w!=0) *w='\0'; + t=klook(tok); + if(t->type!=WORD) lastword=0; + t->quoted=0; + yylval.tree=t; + return t->type; +} + +void +yyerror(char *m) +{ + pfmt(err, "rc: "); + if(runq->cmdfile) pfmt(err, "file %s: ", runq->cmdfile); + if(!runq->iflag) pfmt(err, "line %d: ", runq->lineno); + if(tok[0] && tok[0]!='\n') pfmt(err, "token %q: ", tok); + pfmt(err, "%s\n", m); + flush(err); + lastword=0; + lastdol=0; + while(lastc!='\n' && lastc!=EOF) advance(); + nerror++; +} |
