diff options
| author | Charles.Forsyth <devnull@localhost> | 2006-12-22 17:07:39 +0000 |
|---|---|---|
| committer | Charles.Forsyth <devnull@localhost> | 2006-12-22 17:07:39 +0000 |
| commit | 37da2899f40661e3e9631e497da8dc59b971cbd0 (patch) | |
| tree | cbc6d4680e347d906f5fa7fca73214418741df72 /appl/lib/w3c | |
| parent | 54bc8ff236ac10b3eaa928fd6bcfc0cdb2ba46ae (diff) | |
20060303a
Diffstat (limited to 'appl/lib/w3c')
| -rw-r--r-- | appl/lib/w3c/css.b | 1019 | ||||
| -rw-r--r-- | appl/lib/w3c/mkfile | 17 | ||||
| -rw-r--r-- | appl/lib/w3c/xpointers.b | 858 |
3 files changed, 1894 insertions, 0 deletions
diff --git a/appl/lib/w3c/css.b b/appl/lib/w3c/css.b new file mode 100644 index 00000000..9b1475b5 --- /dev/null +++ b/appl/lib/w3c/css.b @@ -0,0 +1,1019 @@ +implement CSS; + +# +# CSS2 parsing module +# +# CSS2.1 style sheets +# +# Copyright © 2001, 2005 Vita Nuova Holdings Limited. All rights reserved. +# + +include "sys.m"; + sys: Sys; + +include "css.m"; + +B, NUMBER, IDENT, STRING, URL, PERCENTAGE, UNIT, + HASH, ATKEYWORD, IMPORTANT, IMPORT, PSEUDO, CLASS, INCLUDES, + DASHMATCH, FUNCTION: con 16rE000+iota; + +toknames := array[] of{ + B-B => "Zero", + NUMBER-B => "NUMBER", + IDENT-B => "IDENT", + STRING-B => "STRING", + URL-B => "URL", + PERCENTAGE-B => "PERCENTAGE", + UNIT-B => "UNIT", + HASH-B => "HASH", + ATKEYWORD-B => "ATKEYWORD", + IMPORTANT-B => "IMPORTANT", + CLASS-B => "CLASS", + INCLUDES-B => "INCLUDES", + DASHMATCH-B => "DASHMATCH", + PSEUDO-B => "PSEUDO", + FUNCTION-B => "FUNCTION", +}; + +printdiag := 0; + +init(d: int) +{ + sys = load Sys Sys->PATH; + printdiag = d; +} + +parse(s: string): (ref Stylesheet, string) +{ + return stylesheet(ref Cparse(-1, 0, nil, nil, Clex.new(s,1))); +} + +parsedecl(s: string): (list of ref Decl, string) +{ + return (declarations(ref Cparse(-1, 0, nil, nil, Clex.new(s,0))), nil); +} + +ptok(c: int): string +{ + if(c < 0) + return "eof"; + if(c == 0) + return "zero?"; + if(c >= B) + return sys->sprint("%s", toknames[c-B]); + return sys->sprint("%c", c); +} + +Cparse: adt { + lookahead: int; + eof: int; + value: string; + suffix: string; + cs: ref Clex; + + get: fn(nil: self ref Cparse): int; + look: fn(nil: self ref Cparse): int; + unget: fn(nil: self ref Cparse, tok: int); + skipto: fn(nil: self ref Cparse, followset: string): int; + synerr: fn(nil: self ref Cparse, s: string); +}; + +Cparse.get(p: self ref Cparse): int +{ + if((c := p.lookahead) >= 0){ + p.lookahead = -1; + return c; + } + if(p.eof) + return -1; + (c, p.value, p.suffix) = csslex(p.cs); + if(c < 0) + p.eof = 1; + if(printdiag > 1) + sys->print("lex: %s v=%s s=%s\n", ptok(c), p.value, p.suffix); + return c; +} + +Cparse.look(p: self ref Cparse): int +{ + c := p.get(); + p.unget(c); + return c; +} + +Cparse.unget(p: self ref Cparse, c: int) +{ + if(p.lookahead >= 0) + raise "css: internal error: Cparse.unget"; + p.lookahead = c; # note that p.value and p.suffix are assumed to be those of c +} + +Cparse.skipto(p: self ref Cparse, followset: string): int +{ + while((c := p.get()) >= 0) + for(i := 0; i < len followset; i++) + if(followset[i] == c){ + p.unget(c); + return c; + } + return -1; +} + +Cparse.synerr(p: self ref Cparse, s: string) +{ + p.cs.synerr(s); +} + +# +# stylesheet: +# ["@charset" STRING ';']? +# [CDO|CDC]* [import [CDO|CDC]*]* +# [[ruleset | media | page ] [CDO|CDC]*]* +# import: +# "@import" [STRING|URL] [ medium [',' medium]*]? ';' +# media: +# "@media" medium [',' medium]* '{' ruleset* '}' +# medium: +# IDENT +# page: +# "@page" pseudo_page? '{' declaration [';' declaration]* '}' +# pseudo_page: +# ':' IDENT +# + +stylesheet(p: ref Cparse): (ref Stylesheet, string) +{ + charset: string; + if(atkeywd(p, "@charset")){ + if(itisa(p, STRING)){ + charset = p.value; + itisa(p, ';'); + }else + p.synerr("bad @charset declaration"); + } + imports: list of ref Import; + while(atkeywd(p, "@import")){ + c := p.get(); + if(c == STRING || c == URL){ + name := p.value; + media: list of string; + c = p.get(); + if(c == IDENT){ # optional medium [, ...] + p.unget(c); + media = medialist(p); + } + imports = ref Import(name, media) :: imports; + }else + p.synerr("bad @import"); + if(c != ';'){ + p.synerr("missing ; in @import"); + p.unget(c); + if(p.skipto(";}") < 0) + break; + } + } + imports = rev(imports); + + stmts: list of ref Statement; + do{ + while((c := p.get()) == ATKEYWORD) + case p.value { + "@media" => # medium[,medium]* { ruleset*} + media := medialist(p); + if(!itisa(p, '{')){ + p.synerr("bad @media"); + skipatrule("@media", p); + continue; + } + rules: list of ref Statement.Ruleset; + do{ + rule := checkrule(p); + if(rule != nil) + rules = rule :: rules; + }while(!itisa(p, '}') && !p.eof); + stmts = ref Statement.Media(media, rev(rules)) :: stmts; + "@page" => # [:ident]? { declaration [; declaration]* } + pseudo: string; + if(itisa(p, PSEUDO)) + pseudo = p.value; + if(!itisa(p, '{')){ + p.synerr("bad @page"); + skipatrule("@page", p); + continue; + } + decls := declarations(p); + if(!itisa(p, '}')){ + p.synerr("unclosed @page declaration block"); + skipatrule("@page", p); + continue; + } + stmts = ref Statement.Page(pseudo, decls) :: stmts; + * => + skipatrule(p.value, p); # skip unknown or misplaced at-rule + } + p.unget(c); + rule := checkrule(p); + if(rule != nil) + stmts = rule :: stmts; + }while(!p.eof); + rl := stmts; + stmts = nil; + for(; rl != nil; rl = tl rl) + stmts = hd rl :: stmts; + return (ref Stylesheet(charset, imports, stmts), nil); +} + +checkrule(p: ref Cparse): ref Statement.Ruleset +{ + (rule, err) := ruleset(p); + if(rule == nil){ + if(err != nil){ + p.synerr(sys->sprint("bad ruleset: %s", err)); + p.get(); # make some progress + } + } + return rule; +} + +medialist(p: ref Cparse): list of string +{ + media: list of string; + do{ + c := p.get(); + if(c != IDENT){ + p.unget(c); + p.synerr("missing medium identifier"); + break; + } + media = p.value :: media; + }while(itisa(p, ',')); + return rev(media); +} + +itisa(p: ref Cparse, expect: int): int +{ + if((c := p.get()) == expect) + return 1; + p.unget(c); + return 0; +} + +atkeywd(p: ref Cparse, expect: string): int +{ + if((c := p.get()) == ATKEYWORD && p.value == expect) + return 1; + p.unget(c); + return 0; +} + +skipatrule(name: string, p: ref Cparse) +{ + if(printdiag) + sys->print("skip unimplemented or misplaced %s\n", name); + if((c := p.get()) == '{'){ # block + for(nesting := '}' :: nil; nesting != nil && c >= 0; nesting = tl nesting){ + while((c = p.cs.getc()) >= 0 && c != hd nesting) + case c { + '{' => + nesting = '}' :: nesting; + '(' => + nesting = ')' :: nesting; + '[' => + nesting = ']' :: nesting; + '"' or '\'' => + quotedstring(p.cs, c); + } + } + }else{ + while(c >= 0 && c != ';') + c = p.get(); + } +} + +# ruleset: +# selector [',' S* selector]* '{' S* declaration [';' S* declaration]* '}' S* + +ruleset(p: ref Cparse): (ref Statement.Ruleset, string) +{ + selectors: list of list of (int, list of ref Select); + c := -1; + do{ + s := selector(p); + if(s == nil){ + if(p.eof) + return (nil, nil); + p.synerr("expected selector"); + if(p.skipto(",{}") < 0) + return (nil, nil); + c = p.look(); + }else + selectors = s :: selectors; + }while((c = p.get()) == ','); + if(c != '{') + return (nil, "expected declaration block"); + sl := selectors; + selectors = nil; + for(; sl != nil; sl = tl sl) + selectors = hd sl :: selectors; + decls := declarations(p); + if(!itisa(p, '}')){ + p.synerr("unclosed declaration block"); + } + return (ref Statement.Ruleset(selectors, decls), nil); +} + +declarations(p: ref Cparse): list of ref Decl +{ + decls: list of ref Decl; + c: int; + do{ + (d, e) := declaration(p); + if(d != nil) + decls = d :: decls; + else if(e != nil){ + p.synerr("ruleset declaration: "+e); + if((c = p.skipto(";}")) < 0) + break; + } + }while((c = p.get()) == ';'); + p.unget(c); + l := decls; + for(decls = nil; l != nil; l = tl l) + decls = hd l :: decls; + return decls; +} + +# selector: +# simple_selector [combinator simple_selector]* +# combinator: +# '+' S* | '>' S* | /* empty */ +# + +selector(p: ref Cparse): list of (int, list of ref Select) +{ + sel: list of (int, list of ref Select); + op := ' '; + while((s := selector1(p)) != nil){ + sel = (op, s) :: sel; + if((c := p.look()) == '+' || c == '>') + op = p.get(); + else + op = ' '; + } + l: list of (int, list of ref Select); + for(; sel != nil; sel = tl sel) + l = hd sel :: l; + return l; +} + +# +# simple_selector: +# element_name? [HASH | class | attrib | pseudo]* S* +# element_name: +# IDENT | '*' +# class: +# '.' IDENT +# attrib: +# '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* [IDENT | STRING] S* ]? ']' +# pseudo +# ':' [ IDENT | FUNCTION S* IDENT? S* ')' ] + +selector1(p: ref Cparse): list of ref Select +{ + sel: list of ref Select; + c := p.get(); + if(c == IDENT) + sel = ref Select.Element(p.value) :: sel; + else if(c== '*') + sel = ref Select.Any("*") :: sel; + else + p.unget(c); +Sel: + for(;;){ + c = p.get(); + case c { + HASH => + sel = ref Select.ID(p.value) :: sel; + CLASS => + sel = ref Select.Class(p.value) :: sel; + '[' => + if(!itisa(p, IDENT)) + break; + name := p.value; + case c = p.get() { + '=' => + sel = ref Select.Attrib(name, "=", optaval(p)) :: sel; + INCLUDES => + sel = ref Select.Attrib(name, "~=", optaval(p)) :: sel; + DASHMATCH => + sel = ref Select.Attrib(name, "|=", optaval(p)) :: sel; + * => + sel = ref Select.Attrib(name, nil, nil) :: sel; + p.unget(c); + } + if((c = p.get()) != ']'){ + p.synerr("bad attribute syntax"); + p.unget(c); + break Sel; + } + PSEUDO => + case c = p.get() { + IDENT => + sel = ref Select.Pseudo(p.value) :: sel; + FUNCTION => + name := p.value; + case c = p.get() { + IDENT => + sel = ref Select.Pseudofn(name, lowercase(p.value)) :: sel; + ')' => + p.unget(c); + sel = ref Select.Pseudofn(name, nil) :: sel; + * => + p.synerr("bad pseudo-function syntax"); + p.unget(c); + break Sel; + } + if((c = p.get()) != ')'){ + p.synerr("missing ')' for pseudo-function"); + p.unget(c); + break Sel; + } + * => + p.synerr(sys->sprint("unexpected :pseudo: %s:%s", ptok(c), p.value)); + p.unget(c); + break Sel; + } + * => + p.unget(c); + break Sel; + } + # qualifiers must be adjacent to the first item, and each other + c = p.cs.getc(); + p.cs.ungetc(c); + if(isspace(c)) + break; + } + sl := sel; + for(sel = nil; sl != nil; sl = tl sl) + sel = hd sl :: sel; + return sel; +} + +optaval(p: ref Cparse): ref Value +{ + case c := p.get() { + IDENT => + return ref Value.Ident(' ', p.value); + STRING => + return ref Value.String(' ', p.value); + * => + p.unget(c); + return nil; + } +} + +# declaration: +# property ':' S* expr prio? +# | /* empty */ +# property: +# IDENT +# prio: +# IMPORTANT S* /* ! important */ + +declaration(p: ref Cparse): (ref Decl, string) +{ + c := p.get(); + if(c != IDENT){ + p.unget(c); + return (nil, nil); + } + prop := lowercase(p.value); + c = p.get(); + if(c != ':'){ + p.unget(c); + return (nil, "missing :"); + } + values := expr(p); + if(values == nil) + return (nil, "missing expression(s)"); + prio := 0; + if(p.look() == IMPORTANT){ + p.get(); + prio = 1; + } + return (ref Decl(prop, values, prio), nil); +} + +# expr: +# term [operator term]* +# operator: +# '/' | ',' | /* empty */ + +expr(p: ref Cparse): list of ref Value +{ + values: list of ref Value; + sep := ' '; + while((t := term(p, sep)) != nil){ + values = t :: values; + if((c := p.look()) == '/' || c == ',') + sep = p.get(); # need something fancier here? + else + sep = ' '; + } + vl := values; + for(values = nil; vl != nil; vl = tl vl) + values = hd vl :: values; + return values; +} + +# +# term: +# unary_operator? [NUMBER | PERCENTAGE | LENGTH | EMS | EXS | ANGLE | TIME | FREQ | function] +# | STRING | IDENT | URI | RGB | UNICODERANGE | hexcolour +# function: +# FUNCTION expr ')' +# unary_operator: +# '-' | '+' +# hexcolour: +# HASH S* +# +# LENGTH, EMS, ... FREQ have been combined into UNIT here +# +# TO DO: UNICODERANGE + +term(p: ref Cparse, sep: int): ref Value +{ + prefix: string; + case p.look(){ + '+' or '-' => + prefix[0] = p.get(); + } + c := p.get(); + case c { + NUMBER => + return ref Value.Number(sep, prefix+p.value); + PERCENTAGE => + return ref Value.Percentage(sep, prefix+p.value); + UNIT => + return ref Value.Unit(sep, prefix+p.value, p.suffix); + } + if(prefix != nil) + p.synerr("+/- before non-numeric"); + case c { + STRING => + return ref Value.String(sep, p.value); + IDENT => + return ref Value.Ident(sep, lowercase(p.value)); + URL => + return ref Value.Url(sep, p.value); + HASH => + # could check value: 3 or 6 hex digits + (r, g, b) := torgb(p.value); + if(r < 0) + return nil; + return ref Value.Hexcolour(sep, p.value, (r,g,b)); + FUNCTION => + name := p.value; + args := expr(p); + c = p.get(); + if(c != ')'){ + p.synerr(sys->sprint("missing ')' for function %s", name)); + return nil; + } + if(name == "rgb"){ + if(len args != 3){ + p.synerr("wrong number of arguments to rgb()"); + return nil; + } + r := colourof(hd args); + g := colourof(hd tl args); + b := colourof(hd tl tl args); + if(r < 0 || g < 0 || b < 0){ + p.synerr("invalid rgb() parameters"); + return nil; + } + return ref Value.RGB(sep, args, (r,g,b)); + } + return ref Value.Function(sep, name, args); + * => + p.unget(c); + return nil; + } +} + +torgb(s: string): (int, int, int) +{ + case len s { + 3 => + r := hex(s[0]); + g := hex(s[1]); + b := hex(s[2]); + if(r >= 0 && g >= 0 && b >= 0) + return ((r<<4)|r, (g<<4)|g, (b<<4)|b); + 6 => + v := 0; + for(i := 0; i < 6; i++){ + n := hex(s[i]); + if(n < 0) + return (-1, 0, 0); + v = (v<<4) | n; + } + return (v>>16, (v>>8)&16rFF, v&16rFF); + } + return (-1, 0, 0); +} + +colourof(v: ref Value): int +{ + pick r := v { + Number => + return clip(int r.value, 0, 255); + Percentage => + # just the integer part + return clip((int r.value*255 + 50)/100, 0, 255); + * => + return -1; + } +} + +clip(v: int, l: int, u: int): int +{ + if(v < l) + return l; + if(v > u) + return u; + return v; +} + +rev[T](l: list of T): list of T +{ + t: list of T; + for(; l != nil; l = tl l) + t = hd l :: t; + return t; +} + +Clex: adt { + context: list of int; # characters + input: string; + lim: int; + n: int; + lineno: int; + + new: fn(s: string, lno: int): ref Clex; + getc: fn(cs: self ref Clex): int; + ungetc: fn(cs: self ref Clex, c: int); + synerr: fn(nil: self ref Clex, s: string); +}; + +Clex.new(s: string, lno: int): ref Clex +{ + return ref Clex(nil, s, len s, 0, lno); +} + +Clex.getc(cs: self ref Clex): int +{ + if(cs.context != nil){ + c := hd cs.context; + cs.context = tl cs.context; + return c; + } + if(cs.n >= cs.lim) + return -1; + c := cs.input[cs.n++]; + if(c == '\n') + cs.lineno++; + return c; +} + +Clex.ungetc(cs: self ref Clex, c: int) +{ + cs.context = c :: cs.context; +} + +Clex.synerr(cs: self ref Clex, s: string) +{ + if(printdiag) + sys->fprint(sys->fildes(2), "%d: err: %s\n", cs.lineno, s); +} + +csslex(cs: ref Clex): (int, string, string) +{ + for(;;){ + c := skipws(cs); + if(c < 0) + return (-1, nil, nil); + case c { + '<' => + if(seq(cs, "!--")) + break; # <!-- ignore HTML comment start (CDO) + return (c, nil, nil); + '-' => + if(seq(cs, "->")) + break; # --> ignore HTML comment end (CDC) + return (c, nil, nil); + ':' => + c = cs.getc(); + cs.ungetc(c); + if(isnamec(c, 0)) + return (PSEUDO, nil, nil); + return (':', nil, nil); + '#' => + c = cs.getc(); + if(isnamec(c, 1)) + return (HASH, name(cs, c), nil); + cs.ungetc(c); + return ('#', nil, nil); + '/' => + if(subseq(cs, '*', 1, 0)){ + comment(cs); + break; + } + return (c, nil, nil); + '\'' or '"' => + return (STRING, quotedstring(cs, c), nil); + '0' to '9' or '.' => + if(c == '.'){ + d := cs.getc(); + cs.ungetc(d); + if(!isdigit(d)){ + if(isnamec(d, 1)) + return (CLASS, name(cs, cs.getc()), nil); + return ('.', nil, nil); + } + # apply CSS2 treatment: .55 is a number not a class + } + val := number(cs, c); + c = cs.getc(); + if(c == '%') + return (PERCENTAGE, val, "%"); + if(isnamec(c, 0)) # use CSS2 interpetation + return (UNIT, val, lowercase(name(cs, c))); + cs.ungetc(c); + return (NUMBER, val, nil); + '\\' => + d := cs.getc(); + if(d >= ' ' && d <= '~' || islatin1(d)){ # probably should handle it in name + wd := name(cs, d); + return (IDENT, "\\"+wd, nil); + } + cs.ungetc(d); + return ('\\', nil, nil); + '@' => + c = cs.getc(); + if(isnamec(c, 0)) # @something + return (ATKEYWORD, "@"+lowercase(name(cs,c)), nil); + cs.ungetc(c); + return ('@', nil, nil); + '!' => + c = skipws(cs); + if(isnamec(c, 0)){ # !something + wd := name(cs, c); + if(lowercase(wd) == "important") + return (IMPORTANT, nil, nil); + pushback(cs, wd); + }else + cs.ungetc(c); + return ('!', nil, nil); + '~' => + if(subseq(cs, '=', 1, 0)) + return (INCLUDES, "~=", nil); + return ('~', nil, nil); + '|' => + if(subseq(cs, '=', 1, 0)) + return (DASHMATCH, "|=", nil); + return ('|', nil, nil); + * => + if(isnamec(c, 0)){ + wd := name(cs, c); + d := cs.getc(); + if(d != '('){ + cs.ungetc(d); + return (IDENT, wd, nil); + } + val := lowercase(wd); + if(val == "url") + return (URL, url(cs), nil); # bizarre special case + return (FUNCTION, val, nil); + } + return (c, nil, nil); + } + + } +} + +skipws(cs: ref Clex): int +{ + for(;;){ + while((c := cs.getc()) == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f') + ; + if(c != '/') + return c; + c = cs.getc(); + if(c != '*'){ + cs.ungetc(c); + return '/'; + } + comment(cs); + } +} + +seq(cs: ref Clex, s: string): int +{ + for(i := 0; i < len s; i++) + if((c := cs.getc()) != s[i]) + break; + if(i == len s) + return 1; + cs.ungetc(c); + while(i > 0) + cs.ungetc(s[--i]); + if(c < 0) + return -1; + return 0; +} + +subseq(cs: ref Clex, a: int, t: int, e: int): int +{ + if((c := cs.getc()) != a){ + cs.ungetc(c); + return e; + } + return t; +} + +pushback(cs: ref Clex, wd: string) +{ + for(i := len wd; --i >= 0;) + cs.ungetc(wd[i]); +} + +comment(cs: ref Clex) +{ + while((c := cs.getc()) != '*' || (c = cs.getc()) != '/') + if(c < 0) { + # end of file in comment + break; + } +} + +number(cs: ref Clex, c: int): string +{ + s: string; + for(; isdigit(c); c = cs.getc()) + s[len s] = c; + if(c != '.'){ + cs.ungetc(c); + return s; + } + if(!isdigit(c = cs.getc())){ + cs.ungetc(c); + cs.ungetc('.'); + return s; + } + s[len s] = '.'; + do{ + s[len s] = c; + }while(isdigit(c = cs.getc())); + cs.ungetc(c); + return s; +} + +name(cs: ref Clex, c: int): string +{ + s: string; + for(; isnamec(c, 1); c = cs.getc()){ + s[len s] = c; + if(c == '\\'){ + c = cs.getc(); + if(isescapable(c)) + s[len s] = c; + } + } + cs.ungetc(c); + return s; +} + +isescapable(c: int): int +{ + return c >= ' ' && c <= '~' || isnamec(c, 1); +} + +islatin1(c: int): int +{ + return c >= 16rA1 && c <= 16rFF; # printable latin-1 +} + +isnamec(c: int, notfirst: int): int +{ + return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '\\' || + notfirst && (c >= '0' && c <= '9' || c == '-') || + c >= 16rA1 && c <= 16rFF; # printable latin-1 +} + +isxdigit(c: int): int +{ + return c>='0' && c<='9' || c>='a'&&c<='f' || c>='A'&&c<='F'; +} + +isdigit(c: int): int +{ + return c >= '0' && c <= '9'; +} + +isspace(c: int): int +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; +} + +hex(c: int): int +{ + if(c >= '0' && c <= '9') + return c-'0'; + if(c >= 'A' && c <= 'F') + return c-'A' + 10; + if(c >= 'a' && c <= 'f') + return c-'a' + 10; + return -1; +} + +quotedstring(cs: ref Clex, delim: int): string +{ + s: string; + while((c := cs.getc()) != delim){ + if(c < 0){ + cs.synerr("end-of-file in string"); + return s; + } + if(c == '\\'){ + c = cs.getc(); + if(c < 0){ + cs.synerr("end-of-file in string"); + return s; + } + if(isxdigit(c)){ + # unicode escape + n := 0; + for(i := 0;;){ + n = (n<<4) | hex(c); + c = cs.getc(); + if(!isxdigit(c) || ++i >= 6){ + if(!isspace(c)) + cs.ungetc(c); # CSS2 ignores the first white space following + break; + } + } + s[len s] = n; + }else if(c == '\n'){ + ; # escaped newline + }else if(isescapable(c)) + s[len s] = c; + }else if(c) + s[len s] = c; + } + return s; +} + +url(cs: ref Clex): string +{ + s: string; + c := skipws(cs); + if(c != '"' && c != '\''){ # not a quoted string + while(c != ' ' && c != '\n' && c != '\'' && c != '"' && c != ')'){ + s[len s] = c; + c = cs.getc(); + if(c == '\\'){ + c = cs.getc(); + if(c < 0){ + cs.synerr("end of file in url parameter"); + break; + } + if(c == ' ' || c == '\'' || c == '"' || c == ')') + s[len s] = c; + else{ + cs.synerr("invalid escape sequence in url"); + s[len s] = '\\'; + s[len s] = c; + } + c = cs.getc(); + } + } + cs.ungetc(c); +# if(s == nil) +# p.synerr("empty parameter to url"); + }else + s = quotedstring(cs, c); + if((c = skipws(cs)) != ')'){ + cs.synerr("unclosed parameter to url"); + cs.ungetc(c); + } + return s; +} + +lowercase(s: string): string +{ + for(i := 0; i < len s; i++) + if((c := s[i]) >= 'A' && c <= 'Z') + s[i] = c-'A' + 'a'; + return s; +} diff --git a/appl/lib/w3c/mkfile b/appl/lib/w3c/mkfile new file mode 100644 index 00000000..90f4d043 --- /dev/null +++ b/appl/lib/w3c/mkfile @@ -0,0 +1,17 @@ +<../../../mkconfig + +TARG=\ + css.dis\ + xpointers.dis\ + +MODULES= + +SYSMODULES= \ + sys.m\ + bufio.m\ + css.m\ + xpointers.m\ + +DISBIN=$ROOT/dis/lib/w3c + +<$ROOT/mkfiles/mkdis diff --git a/appl/lib/w3c/xpointers.b b/appl/lib/w3c/xpointers.b new file mode 100644 index 00000000..0d7c231a --- /dev/null +++ b/appl/lib/w3c/xpointers.b @@ -0,0 +1,858 @@ +implement Xpointers; + +# +# Copyright © 2005 Vita Nuova Holdings Oimited +# + +include "sys.m"; + sys: Sys; + +include "xpointers.m"; + +init() +{ + sys = load Sys Sys->PATH; +} + +# +# XPointer framework syntax +# +# Pointer ::= Shorthand | SchemeBased +# Shorthand ::= NCName # from [XML-Names] +# SchemeBased ::= PointerPart (S? PointerPart)* +# PointerPart ::= SchemeName '(' SchemeData ')' +# SchemeName ::= QName # from [XML-Names] +# SchemeData ::= EscapedData* +# EscapedData ::= NormalChar | '^(' | '^)' | '^^' | '(' SchemeData ')' +# NormalChar ::= UnicodeChar - [()^] +# UnicodeChar ::= [#x0 - #x10FFFF] + +framework(s: string): (string, list of (string, string, string), string) +{ + (q, nm, i) := name(s, 0); + if(i >= len s){ # Shorthand + if(q != nil) + return (nil, nil, "shorthand pointer must be unqualified name"); + if(nm == nil) + return (nil, nil, "missing pointer name"); + return (nm, nil, nil); + } + # must be SchemeBased + l: list of (string, string, string); + for(;;){ + if(nm == nil){ + if(q != nil) + return (nil, nil, sys->sprint("prefix but no local part in name at %d", i)); + return (nil, nil, sys->sprint("expected name at %d", i)); + } + if(i >= len s || s[i] != '(') + return (nil, nil, sys->sprint("expected '(' at %d", i)); + o := i++; + a := ""; + nesting := 0; + for(; i < len s && ((c := s[i]) != ')' || nesting); i++){ + case c { + '^' => + if(i+1 >= len s) + return (nil, nil, "unexpected eof after ^"); + c = s[++i]; + if(c != '(' && c != ')' && c != '^') + return (nil, nil, sys->sprint("invalid escape ^%c at %d", c, i)); + '(' => + nesting++; + ')' => + if(--nesting < 0) + return (nil, nil, sys->sprint("unbalanced ) at %d", i)); + } + a[len a] = c; + } + if(i >= len s) + return (nil, nil, sys->sprint("unbalanced ( at %d", o)); + l = (q, nm, a) :: l; + if(++i == len s) + break; + while(i < len s && isspace(s[i])) + i++; + (q, nm, i) = name(s, i); + } + rl: list of (string, string, string); + for(; l != nil; l = tl l) + rl = hd l :: rl; + return (nil, rl, nil); +} + +isspace(c: int): int +{ + return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\v' || c == '\f'; +} + +# +# QName ::= (Prefix ':')? LocalPart +# Prefix ::= NCName +# LocalPart ::= NCName +# +#NCName :: (Oetter | '_') NCNameChar* +#NCNameChar :: Oetter | Digit | '.' | '-' | '_' | CombiningChar | Extender + +name(s: string, o: int): (string, string, int) +{ + (ns, i) := ncname(s, o); + if(i >= len s || s[i] != ':') + return (nil, ns, i); + (nm, j) := ncname(s, i+1); + if(j == i+1) + return (nil, ns, i); # assume it's a LocalPart followed by ':' + return (ns, nm, j); +} + +ncname(s: string, o: int): (string, int) +{ + if(o >= len s || !isalnum(c := s[o]) && c != '_' || c >= '0' && c <= '9') + return (nil, o); # missing or invalid start character + for(i := o; i < len s && isnamec(s[i]); i++) + ; + return (s[o:i], i); +} + +isnamec(c: int): int +{ + return isalnum(c) || c == '_' || c == '-' || c == '.'; +} + +isalnum(c: int): int +{ + # + # Hard to get absolutely right without silly amount of character data. + # Use what we know about ASCII + # and assume anything above the Oatin control characters is + # potentially an alphanumeric. + # + if(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') + return 1; # usual case + if(c <= ' ') + return 0; + if(c > 16rA0) + return 1; # non-ASCII + return 0; +} + +# schemes: xpointer(), xmlns(), element() + +# xmlns() +# XmlnsSchemeData ::= NCName S? '=' S? EscapedNamespaceName +# EscapedNamespaceName ::= EscapedData* + +xmlns(s: string): (string, string, string) +{ + (nm, i) := ncname(s, 0); + if(nm == nil) + return (nil, nil, "illegal namespace name"); + while(i < len s && isspace(s[i])) + i++; + if(i >= len s || s[i++] != '=') + return (nil, nil, "illegal xmlns declaration"); + while(i < len s && isspace(s[i])) + i++; + return (nm, s[i:], nil); +} + +# element() +# ElementSchemeData ::= (NCName ChildSequence?) | ChildSequence +# ChildSequence ::= ('/' [1-9] [0-9]*)+ + +element(s: string): (string, list of int, string) +{ + nm: string; + i := 0; + if(s != nil && s[0] != '/'){ + (nm, i) = ncname(s, 0); + if(nm == nil) + return (nil, nil, "illegal element name"); + } + l: list of int; + do{ + if(i >= len s || s[i++] != '/') + return (nil, nil, "illegal child sequence (expected '/')"); + v := 0; + do{ + if(i >= len s || !isdigit(s[i])) + return (nil, nil, "illegal child sequence (expected integer)"); + v = v*10 + s[i]-'0'; + }while(++i < len s && s[i] != '/'); + l = v :: l; + }while(i < len s); + rl: list of int; + for(; l != nil; l = tl l) + rl = hd l :: rl; + return (nm, rl, nil); +} + +# xpointer() +# XpointerSchemeData ::= Expr # from Xpath, with new functions and data types + +xpointer(s: string): (ref Xpath, string) +{ + p := ref Parse(ref Rd(s, 0, 0), nil); + { + e := expr(p, 0); + if(p.r.i < len s) + synerr("missing operator"); + return (e, nil); + }exception e{ + "syntax error*" => + return (nil, e); + * => + raise; + } +} + +Lerror, Ldslash, Lint, Lreal, Llit, Lvar, Ldotdot, Lop, Laxis, Lfn: con 'a'+iota; # internal lexical items + +Keywd: adt { + name: string; + val: int; +}; + +axes: array of Keywd = array[] of { + ("ancestor", Aancestor), + ("ancestor-or-self", Aancestor_or_self), + ("attribute", Aattribute), + ("child", Achild), + ("descendant", Adescendant), + ("descendant-or-self", Adescendant_or_self), + ("following", Afollowing), + ("following-sibling", Afollowing_sibling), + ("namespace", Anamespace), + ("parent", Aparent), + ("preceding", Apreceding), + ("preceding-sibling", Apreceding_sibling), + ("self", Aself), +}; + +keywds: array of Keywd = array[] of { + ("and", Oand), + ("comment", Onodetype), + ("div", Odiv), + ("mod", Omod), + ("node", Onodetype), + ("or", Oor), + ("processing-instruction", Onodetype), + ("text", Onodetype), +}; + +iskeywd(s: string): int +{ + return look(keywds, s); +} + +look(k: array of Keywd, s: string): int +{ + for(i := 0; i < len k; i++) + if(k[i].name == s) + return k[i].val; + return 0; +} + +lookname(k: array of Keywd, v: int): string +{ + for(i := 0; i < len k; i++) + if(k[i].val == v) + return k[i].name; + return nil; +} + +prectab := array[] of { + array[] of {Oor}, + array[] of {Oand}, + array[] of {'=', One}, + array[] of {'<', Ole, '>', Oge}, + array[] of {'+', '-'}, + array[] of {Omul, Odiv, Omod}, + array[] of {Oneg}, # unary '-' + array[] of {'|'}, # UnionExpr +}; + +isop(t: int, p: array of int): int +{ + if(t >= 0) + for(j := 0; j < len p; j++) + if(t == p[j]) + return 1; + return 0; +} + +# Expr ::= OrExpr +# UnionExpr ::= PathExpr | UnionExpr '|' PathExpr +# PathExpr ::= LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath | +# FilterExpr '//' RelativeLocationPath +# OrExpr ::= AndExpr | OrExpr 'or' AndExpr +# AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr +# EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr +# RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr | +# RelationalExpr '<=' AdditiveExpr | RelationalExpr '>=' AdditiveExpr +# AdditiveExpr ::= MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr +# MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator UnaryExpr | +# MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr +# UnaryExpr ::= UnionExpr | '-' UnaryExpr + +expr(p: ref Parse, k: int): ref Xpath +{ + if(k >= len prectab) + return pathexpr(p); + if(prectab[k][0] == Oneg){ # unary '-' + if(p.look() == '-'){ + p.get(); + return ref Xpath.E(Oneg, expr(p,k+1), nil); + } + # must be UnionExpr + k++; + } + e := expr(p, k+1); + while(isop(p.look(), prectab[k])){ + o := p.get().t0; + e = ref Xpath.E(o, e, expr(p, k+1)); # +assoc[k] + } + return e; +} + +# PathExpr ::= LocationPath | FilterExpr ( ('/' | '//') RelativeLocationPath ) +# FilterExpr ::= PrimaryExpr | FilterExpr Predicate => PrimaryExpr Predicate* + +pathexpr(p: ref Parse): ref Xpath +{ + # LocationPath? + case p.look() { + '.' or Ldotdot or Laxis or '@' or Onametest or Onodetype or '*' => + return locationpath(p, 0); + '/' or Ldslash => + return locationpath(p, 1); + } + # FilterExpr + e := primary(p); + while(p.look() == '[') + e = ref Xpath.E(Ofilter, e, predicate(p)); + if((o := p.look()) == '/' || o == Ldslash) + e = ref Xpath.E(Opath, e, locationpath(p, 0)); + return e; +} + +# LocationPath ::= RelativeLocationPath | AbsoluteLocationPath +# AbsoluteLocationPath ::= '/' RelativeLocationPath? | AbbreviatedAbsoluteLocationPath +# RelativeLocationPath ::= Step | RelativeLocationPath '/' Step +# AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath +# AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + +locationpath(p: ref Parse, abs: int): ref Xpath +{ + # // => /descendent-or-self::node()/ + pl: list of ref Xstep; + o := p.look(); + if(o != '/' && o != Ldslash){ + s := step(p); + if(s == nil) + synerr("expected Step in LocationPath"); + pl = s :: pl; + } + while((o = p.look()) == '/' || o == Ldslash){ + p.get(); + if(o == Ldslash) + pl = ref Xstep(Adescendant_or_self, Onodetype, nil, "node", nil, nil) :: pl; + s := step(p); + if(s == nil){ + if(abs && pl == nil) + break; # it's just an initial '/' + synerr("expected Step in LocationPath"); + } + pl = s :: pl; + } + return ref Xpath.Path(abs, rev(pl)); +} + +# Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep +# AxisSpecifier ::= AxisName '::' | AbbreviatedAxisSpecifier +# AxisName := ... # long list +# NodeTest ::= NameTest | NodeType '(' ')' +# Predicate ::= '[' PredicateExpr ']' +# PredicateExpr ::= Expr +# AbbreviatedStep ::= '.' | '..' +# AbbreviatedAxisSpecifier ::= '@'? + +step(p: ref Parse): ref Xstep +{ + # AxisSpecifier ... | AbbreviatedStep + (o, ns, nm) := p.get(); + axis := Achild; + case o { + '.' => + return ref Xstep(Aself, Onodetype, nil, "node", nil, nil); # self::node() + Ldotdot => + return ref Xstep(Aparent, Onodetype, nil, "node", nil, nil); # parent::node() + Laxis => + axis = look(axes, ns); + (o, ns, nm) = p.get(); + '@' => + axis = Aattribute; + (o, ns, nm) = p.get(); + * => + ; + } + + if(o == '*'){ + o = Onametest; + nm = "*"; + ns = nil; + } + + # NodeTest ::= NameTest | NodeType '(' ')' + if(o != Onametest && o != Onodetype){ + p.unget((o, ns, nm)); + return nil; + } + + arg: string; + if(o == Onodetype){ # '(' ... ')' + expect(p, '('); + # grammar is wrong: processing-instruction can have optional literal + if(nm == "processing-instruction" && p.look() == Llit) + arg = p.get().t1; + expect(p, ')'); + } + + # Predicate* + pl: list of ref Xpath; + while((pe := predicate(p)) != nil) + pl = pe :: pl; + return ref Xstep(axis, o, ns, nm, arg, rev(pl)); +} + +# PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall +# FunctionCall ::= FunctionName '(' (Argument ( ',' Argument)*)? ')' +# Argument ::= Expr + +primary(p: ref Parse): ref Xpath +{ + (o, ns, nm) := p.get(); + case o { + Lvar => + return ref Xpath.Var(ns, nm); + '(' => + e := expr(p, 0); + expect(p, ')'); + return e; + Llit => + return ref Xpath.Str(ns); + Lint => + return ref Xpath.Int(big ns); + Lreal => + return ref Xpath.Real(real ns); + Lfn => + expect(p, '('); + al: list of ref Xpath; + if(p.look() != ')'){ + for(;;){ + al = expr(p, 0) :: al; + if(p.look() != ',') + break; + p.get(); + } + al = rev(al); + } + expect(p, ')'); + return ref Xpath.Fn(ns, nm, al); + * => + synerr("invalid PrimaryExpr"); + return nil; + } +} + +# Predicate ::= '[' PredicateExpr ']' +# PredicateExpr ::= Expr + +predicate(p: ref Parse): ref Xpath +{ + l := p.get(); + if(l.t0 != '['){ + p.unget(l); + return nil; + } + e := expr(p, 0); + expect(p, ']'); + return e; +} + +expect(p: ref Parse, t: int) +{ + l := p.get(); + if(l.t0 != t) + synerr(sys->sprint("expected '%c'", t)); +} + +Xpath.text(e: self ref Xpath): string +{ + if(e == nil) + return "nil"; + pick r := e { + E => + if(r.r == nil) + return sys->sprint("(%s%s)", opname(r.op), r.l.text()); + if(r.op == Ofilter) + return sys->sprint("%s[%s]", r.l.text(), r.r.text()); + return sys->sprint("(%s%s%s)", r.l.text(), opname(r.op), r.r.text()); + Fn => + a := ""; + for(l := r.args; l != nil; l = tl l) + a += sys->sprint(",%s", (hd l).text()); + if(a != "") + a = a[1:]; + return sys->sprint("%s(%s)", qual(r.ns, r.name), a); + Var => + return sys->sprint("$%s", qual(r.ns, r.name)); + Path => + if(r.abs) + t := "/"; + else + t = ""; + for(l := r.steps; l != nil; l = tl l){ + if(t != nil && t != "/") + t += "/"; + t += (hd l).text(); + } + return t; + Int => + return sys->sprint("%bd", r.val); + Real => + return sys->sprint("%g", r.val); + Str => + return sys->sprint("%s", str(r.s)); + } +} + +qual(ns: string, nm: string): string +{ + if(ns != nil) + return ns+":"+nm; + return nm; +} + +str(s: string): string +{ + for(i := 0; i < len s; i++) + if(s[i] == '\'') + return sys->sprint("\"%s\"", s); + return sys->sprint("'%s'", s); +} + +opname(o: int): string +{ + case o { + One => return "!="; + Ole => return "<="; + Oge => return ">="; + Omul => return "*"; + Odiv => return " div "; + Omod => return " mod "; + Oand => return " and "; + Oor => return " or "; + Oneg => return "-"; + Ofilter => return " op_filter "; + Opath => return "/"; + * => return sys->sprint(" %c ", o); + } +} + +Xstep.text(s: self ref Xstep): string +{ + t := sys->sprint("%s::", Xstep.axisname(s.axis)); + case s.op { + Onametest => + if(s.ns == "*" && s.name == "*") + t += "*"; + else + t += qual(s.ns, s.name); + Onodetype => + if(s.arg != nil) + t += sys->sprint("%s(%s)", s.name, str(s.arg)); + else + t += sys->sprint("%s()", s.name); + } + for(l := s.preds; l != nil; l = tl l) + t += sys->sprint("[%s]", (hd l).text()); + return t; +} + +Xstep.axisname(n: int): string +{ + return lookname(axes, n); +} + +# ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::' | +# NameTest | NodeType | Operator | FunctionName | AxisName | +# Literal | Number | VariableReference +# Operator ::= OperatorName | MultiplyOperator | '/' | '//' | '|' | '+' | '' | '=' | '!=' | '<' | '<=' | '>' | '>=' +# MultiplyOperator ::= '*' +# FunctionName ::= QName - NodeType +# VariableReference ::= '$' QName +# NameTest ::= '*' | NCName ':' '*' | QName +# NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node' +# + +Lex: type (int, string, string); + +Parse: adt { + r: ref Rd; + pb: list of Lex; # push back + + look: fn(p: self ref Parse): int; + get: fn(p: self ref Parse): Lex; + unget: fn(p: self ref Parse, t: Lex); +}; + +Parse.get(p: self ref Parse): Lex +{ + if(p.pb != nil){ + h := hd p.pb; + p.pb = tl p.pb; + return h; + } + return lex(p.r); +} + +Parse.look(p: self ref Parse): int +{ + t := p.get(); + p.unget(t); + return t.t0; +} + +Parse.unget(p: self ref Parse, t: Lex) +{ + p.pb = t :: p.pb; +} + +lex(r: ref Rd): Lex +{ + l := lex0(r); + r.prev = l.t0; + return l; +} + +# disambiguating rules are D1 to D3 + +# D1. preceding token p && p not in {'@', '::', '(', '[', ',', Operator} then '*' is MultiplyOperator +# and NCName must be OperatorName + +xop(t: int): int +{ + case t { + -1 or 0 or '@' or '(' or '[' or ',' or Lop or Omul or + '/' or Ldslash or '|' or '+' or '-' or '=' or One or '<' or Ole or '>' or Oge or + Oand or Oor or Omod or Odiv or Laxis => + return 0; + } + return 1; +} + +# UnaryExpr ::= UnionExpr | '-' UnaryExpr +# ExprToken ::= ... | +# NameTest | NodeType | Operator | FunctionName | AxisName | +# Literal | Number | VariableReference +# Operator ::= OperatorName | MultiplyOperator | '/' | '//' | '|' | '+' | '' | '=' | '!=' | '<' | '<=' | '>' | '>=' +# MultiplyOperator ::= '*' + +lex0(r: ref Rd): Lex +{ + while(isspace(r.look())) + r.get(); + case c := r.get() { + -1 or + '(' or ')' or '[' or ']' or '@' or ',' or '+' or '-' or '|' or '=' or ':' => + # singletons ('::' only valid after name, see below) + return (c, nil, nil); + '/' => + return subseq(r, '/', Ldslash, '/'); + '!' => + return subseq(r, '=', One, '!'); + '<' => + return subseq(r, '=', Ole, '<'); + '>' => + return subseq(r, '=', Oge, '>'); + '*' => + if(xop(r.prev)) + return (Omul, nil, nil); + return (c, nil, nil); + '.' => + case r.look() { + '0' to '9' => + (v, nil) := number(r, r.get()); + return (Lreal, v, nil); + '.' => + r.get(); + return (Ldotdot, nil, nil); + * => + return ('.', nil, nil); + } + '$' => + # variable reference + (ns, nm, i) := name(r.s, r.i); + if(ns == nil && nm == nil) + return (Lerror, nil, nil); + r.i = i; + return (Lvar, ns, nm); + '0' to '9' => + (v, f) := number(r, c); + if(f) + return (Lreal, v, nil); + return (Lint, v, nil); + '"' or '\'' => + return (Llit, literal(r, c), nil); + * => + if(isalnum(c) || c == '_'){ + # QName/NCName + r.unget(); + (ns, nm, i) := name(r.s, r.i); + if(ns == nil && nm == nil) + return (Lerror, nil, nil); + r.i = i; + if(xop(r.prev)){ + if(ns == nil){ + o := iskeywd(nm); + if(o != Laxis && o != Onodetype) + return (o, nil, nil); + } + return (Lop, ns, nm); + } + while(isspace(r.look())) + r.get(); + case r.look() { + '(' => # D2: NCName '(' =>NodeType or FunctionName + if(ns == nil && iskeywd(nm) == Onodetype) + return (Onodetype, nil, nm); + return (Lfn, ns, nm); # possibly NodeTest + ':' => # D3: NCName '::' => AxisName + r.get(); + case r.look() { + ':' => + if(ns == nil && look(axes, nm) != 0){ + r.get(); + return (Laxis, nm, nil); + } + '*' => + # NameTest ::= ... | NCName ':' '*' + if(ns == nil){ + r.get(); + return (Onametest, nm, "*"); + } + } + r.unget(); # put back the ':' + # NameTest ::= '*' | NCName ':' '*' | QName + } + return (Onametest, ns, nm); # actually NameTest + } + # unexpected character + } + return (Lerror, nil, nil); +} + +subseq(r: ref Rd, a: int, t: int, e: int): Lex +{ + if(r.look() != a) + return (e, nil, nil); + r.get(); + return (t, nil, nil); +} + +# Literal ::= '"'[^"]*'"' | "'"[^']* "'" + +literal(r: ref Rd, delim: int): string +{ + s: string; + while((c := r.get()) != delim){ + if(c < 0){ + synerr("missing string terminator"); + return s; + } + if(c) + s[len s] = c; # could slice r.s + } + return s; +} + +# +# Number ::= Digits('.' Digits?)? | '.' Digits +# Digits ::= [0-9]+ +# +number(r: ref Rd, c: int): (string, int) +{ + s: string; + for(; isdigit(c); c = r.get()) + s[len s] = c; + if(c != '.'){ + if(c >= 0) + r.unget(); + return (s, 0); + } + if(!isdigit(c = r.get())){ + if(c >= 0) + r.unget(); + r.unget(); # the '.' + return (s, 0); + } + s[len s] = '.'; + do{ + s[len s] = c; + }while(isdigit(c = r.get())); + if(c >= 0) + r.unget(); + return (s, 1); +} + +isdigit(c: int): int +{ + return c>='0' && c<='9'; +} + +Rd: adt{ + s: string; + i: int; + prev: int; # previous token + + get: fn(r: self ref Rd): int; + look: fn(r: self ref Rd): int; + unget: fn(r: self ref Rd); +}; + +Rd.get(r: self ref Rd): int +{ + if(r.i >= len r.s) + return -1; + return r.s[r.i++]; +} + +Rd.look(r: self ref Rd): int +{ + if(r.i >= len r.s) + return -1; + return r.s[r.i]; +} + +Rd.unget(r: self ref Rd) +{ + if(r.i > 0) + r.i--; +} + +rev[T](l: list of T): list of T +{ + rl: list of T; + for(; l != nil; l = tl l) + rl = hd l :: rl; + return rl; +} + +synerr(s: string) +{ + raise "syntax error: "+s; +} + +# to do: +# dictionary? |
