summaryrefslogtreecommitdiff
path: root/utils/rcsh/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'utils/rcsh/lex.c')
-rw-r--r--utils/rcsh/lex.c398
1 files changed, 398 insertions, 0 deletions
diff --git a/utils/rcsh/lex.c b/utils/rcsh/lex.c
new file mode 100644
index 00000000..d31a94d8
--- /dev/null
+++ b/utils/rcsh/lex.c
@@ -0,0 +1,398 @@
+#include "rc.h"
+#include "y.tab.h"
+
+#define NTOK 8192
+
+int getnext(void);
+
+int future=EOF;
+int doprompt=1;
+int inquote;
+int nerror;
+char *promptstr;
+
+char tok[NTOK];
+
+int lastdol; /* was the last token read '$' or '$#' or '"'? */
+int lastword; /* was the last token read a word or compound word terminator? */
+int lastc;
+
+void
+kinit(void)
+{
+ kenter(FOR, "for");
+ kenter(IN, "in");
+ kenter(WHILE, "while");
+ kenter(IF, "if");
+ kenter(NOT, "not");
+ kenter(TWIDDLE, "~");
+ kenter(BANG, "!");
+ kenter(SUBSHELL, "@");
+ kenter(SWITCH, "switch");
+ kenter(FN, "fn");
+}
+
+int
+wordchr(int c)
+{
+ return !strchr("\n \t\r#;&|^$=`'{}()<>", c) && c!=EOF;
+}
+
+int
+idchr(int c)
+{
+ /*
+ * Formerly:
+ * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
+ * || c=='_' || c=='*';
+ */
+ return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
+}
+
+/*
+ * Look ahead in the input stream
+ */
+int
+nextc(void)
+{
+ if(future==EOF)
+ future=getnext();
+ return future;
+}
+
+/*
+ * Consume the lookahead character.
+ */
+int
+advance(void)
+{
+ int c=nextc();
+ lastc=future;
+ future=EOF;
+ return c;
+}
+
+/*
+ * read a character from the input stream
+ */
+int
+getnext(void)
+{
+ register int c;
+ static peekc=EOF;
+ if(peekc!=EOF){
+ c=peekc;
+ peekc=EOF;
+ return c;
+ }
+ if(runq->eof) return EOF;
+ if(doprompt)
+ pprompt();
+ c=rchr(runq->cmdfd);
+ if(!inquote && c=='\\'){
+ c=rchr(runq->cmdfd);
+ if(c=='\n'){
+ doprompt=1;
+ c=' ';
+ }
+ else{
+ peekc=c;
+ c='\\';
+ }
+ }
+ doprompt=doprompt || c=='\n' || c==EOF;
+ if(c==EOF) runq->eof++;
+ else if(flag['V'] || ndot>=2 && flag['v'])
+ pchr(err, c);
+ return c;
+}
+
+void
+pprompt(void)
+{
+ Var *prompt;
+
+ if(runq->iflag){
+ pstr(err, promptstr);
+ flush(err);
+ prompt=vlook("prompt");
+ if(prompt->val && prompt->val->next)
+ promptstr=prompt->val->next->word;
+ else
+ promptstr="\t";
+ }
+ runq->lineno++;
+ doprompt=0;
+}
+
+void
+skipwhite(void)
+{
+ int c;
+ for(;;){
+ c=nextc();
+ if(c=='#'){ /* Why did this used to be if(!inquote && c=='#') ?? */
+ for(;;){
+ c=nextc();
+ if(c=='\n' || c==EOF) break;
+ advance();
+ }
+ }
+ if(c==' ' || c=='\t' || c=='\r') advance();
+ else return;
+ }
+}
+
+void
+skipnl(void)
+{
+ int c;
+ for(;;){
+ skipwhite();
+ c=nextc();
+ if(c!='\n') return;
+ advance();
+ }
+}
+
+int
+nextis(int c)
+{
+ if(nextc()==c){
+ advance();
+ return 1;
+ }
+ return 0;
+}
+
+char *
+addtok(char *p, int val)
+{
+ if(p==0) return 0;
+ if(p==&tok[NTOK]){
+ *p=0;
+ yyerror("token buffer too short");
+ return 0;
+ }
+ *p++=val;
+ return p;
+}
+
+char *
+addutf(char *p, int c)
+{
+ p=addtok(p, c);
+ if(twobyte(c)) /* 2-byte escape */
+ return addtok(p, advance());
+ if(threebyte(c)){ /* 3-byte escape */
+ p=addtok(p, advance());
+ return addtok(p, advance());
+ }
+ return p;
+}
+
+int
+yylex(void)
+{
+ int c, d=nextc();
+ char *w=tok;
+ Tree *t;
+
+ yylval.tree=0;
+ /*
+ * Embarassing sneakiness: if the last token read was a quoted or unquoted
+ * WORD then we alter the meaning of what follows. If the next character
+ * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
+ * if the next character is the first character of a simple or compound word,
+ * we insert a `^' before it.
+ */
+ if(lastword){
+ lastword=0;
+ if(d=='('){
+ advance();
+ strcpy(tok, "( [SUB]");
+ return SUB;
+ }
+ if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
+ strcpy(tok, "^");
+ return '^';
+ }
+ }
+ inquote=0;
+ skipwhite();
+ switch(c=advance()){
+ case EOF:
+ lastdol=0;
+ strcpy(tok, "EOF");
+ return EOF;
+ case '$':
+ lastdol=1;
+ if(nextis('#')){
+ strcpy(tok, "$#");
+ return COUNT;
+ }
+ if(nextis('"')){
+ strcpy(tok, "$\"");
+ return '"';
+ }
+ strcpy(tok, "$");
+ return '$';
+ case '&':
+ lastdol=0;
+ if(nextis('&')){
+ skipnl();
+ strcpy(tok, "&&");
+ return ANDAND;
+ }
+ strcpy(tok, "&");
+ return '&';
+ case '|':
+ lastdol=0;
+ if(nextis(c)){
+ skipnl();
+ strcpy(tok, "||");
+ return OROR;
+ }
+ case '<':
+ case '>':
+ lastdol=0;
+ /*
+ * funny redirection tokens:
+ * redir: arrow | arrow '[' fd ']'
+ * arrow: '<' | '<<' | '>' | '>>' | '|'
+ * fd: digit | digit '=' | digit '=' digit
+ * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
+ * some possibilities are nonsensical and get a message.
+ */
+ *w++=c;
+ t=newtree();
+ switch(c){
+ case '|':
+ t->type=PIPE;
+ t->fd0=1;
+ t->fd1=0;
+ break;
+ case '>':
+ t->type=REDIR;
+ if(nextis(c)){
+ t->rtype=APPEND;
+ *w++=c;
+ }
+ else t->rtype=WRITE;
+ t->fd0=1;
+ break;
+ case '<':
+ t->type=REDIR;
+ if(nextis(c)){
+ t->rtype=HERE;
+ *w++=c;
+ }
+ else t->rtype=READ;
+ t->fd0=0;
+ break;
+ }
+ if(nextis('[')){
+ *w++='[';
+ c=advance();
+ *w++=c;
+ if(c<'0' || '9'<c){
+ RedirErr:
+ *w=0;
+ yyerror(t->type==PIPE?"pipe syntax"
+ :"redirection syntax");
+ return EOF;
+ }
+ t->fd0=0;
+ do{
+ t->fd0=t->fd0*10+c-'0';
+ *w++=c;
+ c=advance();
+ }while('0'<=c && c<='9');
+ if(c=='='){
+ *w++='=';
+ if(t->type==REDIR)
+ t->type=DUP;
+ c=advance();
+ if('0'<=c && c<='9'){
+ t->rtype=DUPFD;
+ t->fd1=t->fd0;
+ t->fd0=0;
+ do{
+ t->fd0=t->fd0*10+c-'0';
+ *w++=c;
+ c=advance();
+ }while('0'<=c && c<='9');
+ }
+ else{
+ if(t->type==PIPE) goto RedirErr;
+ t->rtype=CLOSE;
+ }
+ }
+ if(c!=']' || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
+ goto RedirErr;
+ *w++=']';
+ }
+ *w='\0';
+ yylval.tree=t;
+ if(t->type==PIPE) skipnl();
+ return t->type;
+ case '\'':
+ lastdol=0;
+ lastword=1;
+ inquote=1;
+ for(;;){
+ c=advance();
+ if(c==EOF) break;
+ if(c=='\''){
+ if(nextc()!='\'')
+ break;
+ advance();
+ }
+ w=addutf(w, c);
+ }
+ if(w!=0) *w='\0';
+ t=token(tok, WORD);
+ t->quoted=1;
+ yylval.tree=t;
+ return t->type;
+ }
+ if(!wordchr(c)){
+ lastdol=0;
+ tok[0]=c;
+ tok[1]='\0';
+ return c;
+ }
+ for(;;){
+ /* next line should have (char)c==GLOB, but ken's compiler is broken */
+ if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
+ w=addtok(w, GLOB);
+ w=addutf(w, c);
+ c=nextc();
+ if(lastdol?!idchr(c):!wordchr(c)) break;
+ advance();
+ }
+Out:
+ lastword=1;
+ lastdol=0;
+ if(w!=0) *w='\0';
+ t=klook(tok);
+ if(t->type!=WORD) lastword=0;
+ t->quoted=0;
+ yylval.tree=t;
+ return t->type;
+}
+
+void
+yyerror(char *m)
+{
+ pfmt(err, "rc: ");
+ if(runq->cmdfile) pfmt(err, "file %s: ", runq->cmdfile);
+ if(!runq->iflag) pfmt(err, "line %d: ", runq->lineno);
+ if(tok[0] && tok[0]!='\n') pfmt(err, "token %q: ", tok);
+ pfmt(err, "%s\n", m);
+ flush(err);
+ lastword=0;
+ lastdol=0;
+ while(lastc!='\n' && lastc!=EOF) advance();
+ nerror++;
+}