20060303

author: Charles.Forsyth <devnull@localhost> 2006-12-22 21:39:35 +0000
committer: Charles.Forsyth <devnull@localhost> 2006-12-22 21:39:35 +0000
commit: 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a (patch)
tree: c6e220ba61db3a6ea4052e6841296d829654e664 /utils/rcsh/lex.c
parent: 46439007cf417cbd9ac8049bb4122c890097a0fa (diff)
1 files changed, 398 insertions, 0 deletions
diff --git a/utils/rcsh/lex.c b/utils/rcsh/lex.c
new file mode 100644
index 00000000..d31a94d8
--- /dev/null
+++ b/utils/rcsh/lex.c
@@ -0,0 +1,398 @@
+#include "rc.h"
+#include "y.tab.h"
+
+#define	NTOK	8192
+
+int getnext(void);
+
+int	future=EOF;
+int	doprompt=1;
+int	inquote;
+int	nerror;
+char	*promptstr;
+
+char	tok[NTOK];
+
+int	lastdol;	/* was the last token read '$' or '$#' or '"'? */
+int	lastword;	/* was the last token read a word or compound word terminator? */
+int	lastc;
+
+void
+kinit(void)
+{
+	kenter(FOR, "for");
+	kenter(IN, "in");
+	kenter(WHILE, "while");
+	kenter(IF, "if");
+	kenter(NOT, "not");
+	kenter(TWIDDLE, "~");
+	kenter(BANG, "!");
+	kenter(SUBSHELL, "@");
+	kenter(SWITCH, "switch");
+	kenter(FN, "fn");
+}
+
+int
+wordchr(int c)
+{
+	return !strchr("\n \t\r#;&|^$=`'{}()<>", c) && c!=EOF;
+}
+
+int
+idchr(int c)
+{
+	/*
+	 * Formerly:
+	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
+	 *	|| c=='_' || c=='*';
+	 */
+	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
+}
+
+/*
+ * Look ahead in the input stream
+ */
+int
+nextc(void)
+{
+	if(future==EOF)
+		future=getnext();
+	return future;
+}
+
+/*
+ * Consume the lookahead character.
+ */
+int
+advance(void)
+{
+	int c=nextc();
+	lastc=future;
+	future=EOF;
+	return c;
+}
+
+/*
+ * read a character from the input stream
+ */	
+int
+getnext(void)
+{
+	register int c;
+	static peekc=EOF;
+	if(peekc!=EOF){
+		c=peekc;
+		peekc=EOF;
+		return c;
+	}
+	if(runq->eof) return EOF;
+	if(doprompt)
+		pprompt();
+	c=rchr(runq->cmdfd);
+	if(!inquote && c=='\\'){
+		c=rchr(runq->cmdfd);
+		if(c=='\n'){
+			doprompt=1;
+			c=' ';
+		}
+		else{
+			peekc=c;
+			c='\\';
+		}
+	}
+	doprompt=doprompt || c=='\n' || c==EOF;
+	if(c==EOF) runq->eof++;
+	else if(flag['V'] || ndot>=2 && flag['v'])
+		pchr(err, c);
+	return c;
+}
+
+void
+pprompt(void)
+{
+	Var *prompt;
+
+	if(runq->iflag){
+		pstr(err, promptstr);
+		flush(err);
+		prompt=vlook("prompt");
+		if(prompt->val && prompt->val->next)
+			promptstr=prompt->val->next->word;
+		else
+			promptstr="\t";
+	}
+	runq->lineno++;
+	doprompt=0;
+}
+
+void
+skipwhite(void)
+{
+	int c;
+	for(;;){
+		c=nextc();
+		if(c=='#'){	/* Why did this used to be  if(!inquote && c=='#') ?? */
+			for(;;){
+				c=nextc();
+				if(c=='\n' || c==EOF) break;
+				advance();
+			}
+		}
+		if(c==' ' || c=='\t' || c=='\r') advance();
+		else return;
+	}
+}
+
+void
+skipnl(void)
+{
+	int c;
+	for(;;){
+		skipwhite();
+		c=nextc();
+		if(c!='\n') return;
+		advance();
+	}
+}
+
+int
+nextis(int c)
+{
+	if(nextc()==c){
+		advance();
+		return 1;
+	}
+	return 0;
+}
+
+char *
+addtok(char *p, int val)
+{
+	if(p==0) return 0;
+	if(p==&tok[NTOK]){
+		*p=0;
+		yyerror("token buffer too short");
+		return 0;
+	}
+	*p++=val;
+	return p;
+}
+
+char *
+addutf(char *p, int c)
+{
+	p=addtok(p, c);
+	if(twobyte(c))	 /* 2-byte escape */
+		return addtok(p, advance());
+	if(threebyte(c)){	/* 3-byte escape */
+		p=addtok(p, advance());
+		return addtok(p, advance());
+	}
+	return p;
+}
+
+int
+yylex(void)
+{
+	int c, d=nextc();
+	char *w=tok;
+	Tree *t;
+
+	yylval.tree=0;
+	/*
+	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
+	 * WORD then we alter the meaning of what follows.  If the next character
+	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
+	 * if the next character is the first character of a simple or compound word,
+	 * we insert a `^' before it.
+	 */
+	if(lastword){
+		lastword=0;
+		if(d=='('){
+			advance();
+			strcpy(tok, "( [SUB]");
+			return SUB;
+		}
+		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
+			strcpy(tok, "^");
+			return '^';
+		}
+	}
+	inquote=0;
+	skipwhite();
+	switch(c=advance()){
+	case EOF:
+		lastdol=0;
+		strcpy(tok, "EOF");
+		return EOF;
+	case '$':
+		lastdol=1;
+		if(nextis('#')){
+			strcpy(tok, "$#");
+			return COUNT;
+		}
+		if(nextis('"')){
+			strcpy(tok, "$\"");
+			return '"';
+		}
+		strcpy(tok, "$");
+		return '$';
+	case '&':
+		lastdol=0;
+		if(nextis('&')){
+			skipnl();
+			strcpy(tok, "&&");
+			return ANDAND;
+		}
+		strcpy(tok, "&");
+		return '&';
+	case '|':
+		lastdol=0;
+		if(nextis(c)){
+			skipnl();
+			strcpy(tok, "||");
+			return OROR;
+		}
+	case '<':
+	case '>':
+		lastdol=0;
+		/*
+		 * funny redirection tokens:
+		 *	redir:	arrow | arrow '[' fd ']'
+		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
+		 *	fd:	digit | digit '=' | digit '=' digit
+		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
+		 * some possibilities are nonsensical and get a message.
+		 */
+		*w++=c;
+		t=newtree();
+		switch(c){
+		case '|':
+			t->type=PIPE;
+			t->fd0=1;
+			t->fd1=0;
+			break;
+		case '>':
+			t->type=REDIR;
+			if(nextis(c)){
+				t->rtype=APPEND;
+				*w++=c;
+			}
+			else t->rtype=WRITE;
+			t->fd0=1;
+			break;
+		case '<':
+			t->type=REDIR;
+			if(nextis(c)){
+				t->rtype=HERE;
+				*w++=c;
+			}
+			else t->rtype=READ;
+			t->fd0=0;
+			break;
+		}
+		if(nextis('[')){
+			*w++='[';
+			c=advance();
+			*w++=c;
+			if(c<'0' || '9'<c){
+			RedirErr:
+				*w=0;
+				yyerror(t->type==PIPE?"pipe syntax"
+						:"redirection syntax");
+				return EOF;
+			}
+			t->fd0=0;
+			do{
+				t->fd0=t->fd0*10+c-'0';
+				*w++=c;
+				c=advance();
+			}while('0'<=c && c<='9');
+			if(c=='='){
+				*w++='=';
+				if(t->type==REDIR)
+					t->type=DUP;
+				c=advance();
+				if('0'<=c && c<='9'){
+					t->rtype=DUPFD;
+					t->fd1=t->fd0;
+					t->fd0=0;
+					do{
+						t->fd0=t->fd0*10+c-'0';
+						*w++=c;
+						c=advance();
+					}while('0'<=c && c<='9');
+				}
+				else{
+					if(t->type==PIPE) goto RedirErr;
+					t->rtype=CLOSE;
+				}
+			}
+			if(c!=']' || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
+				goto RedirErr;
+			*w++=']';
+		}
+		*w='\0';
+		yylval.tree=t;
+		if(t->type==PIPE) skipnl();
+		return t->type;
+	case '\'':
+		lastdol=0;
+		lastword=1;
+		inquote=1;
+		for(;;){
+			c=advance();
+			if(c==EOF) break;
+			if(c=='\''){
+				if(nextc()!='\'')
+					break;
+				advance();
+			}
+			w=addutf(w, c);
+		}
+		if(w!=0) *w='\0';
+		t=token(tok, WORD);
+		t->quoted=1;
+		yylval.tree=t;
+		return t->type;
+	}
+	if(!wordchr(c)){
+		lastdol=0;
+		tok[0]=c;
+		tok[1]='\0';
+		return c;
+	}
+	for(;;){
+		/* next line should have (char)c==GLOB, but ken's compiler is broken */
+		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
+			w=addtok(w, GLOB);
+		w=addutf(w, c);
+		c=nextc();
+		if(lastdol?!idchr(c):!wordchr(c)) break;
+		advance();
+	}
+Out:
+	lastword=1;
+	lastdol=0;
+	if(w!=0) *w='\0';
+	t=klook(tok);
+	if(t->type!=WORD) lastword=0;
+	t->quoted=0;
+	yylval.tree=t;
+	return t->type;
+}
+
+void 
+yyerror(char *m)
+{
+	pfmt(err, "rc: ");
+	if(runq->cmdfile) pfmt(err, "file %s: ", runq->cmdfile);
+	if(!runq->iflag) pfmt(err, "line %d: ", runq->lineno);
+	if(tok[0] && tok[0]!='\n') pfmt(err, "token %q: ", tok);
+	pfmt(err, "%s\n", m);
+	flush(err);
+	lastword=0;
+	lastdol=0;
+	while(lastc!='\n' && lastc!=EOF) advance();
+	nerror++;
+}
author	Charles.Forsyth <devnull@localhost>	2006-12-22 21:39:35 +0000
committer	Charles.Forsyth <devnull@localhost>	2006-12-22 21:39:35 +0000
commit	74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a (patch)
tree	c6e220ba61db3a6ea4052e6841296d829654e664 /utils/rcsh/lex.c
parent	46439007cf417cbd9ac8049bb4122c890097a0fa (diff)