summaryrefslogtreecommitdiff
path: root/appl/lib/w3c
diff options
context:
space:
mode:
Diffstat (limited to 'appl/lib/w3c')
-rw-r--r--appl/lib/w3c/css.b1019
-rw-r--r--appl/lib/w3c/mkfile17
-rw-r--r--appl/lib/w3c/xpointers.b858
3 files changed, 1894 insertions, 0 deletions
diff --git a/appl/lib/w3c/css.b b/appl/lib/w3c/css.b
new file mode 100644
index 00000000..9b1475b5
--- /dev/null
+++ b/appl/lib/w3c/css.b
@@ -0,0 +1,1019 @@
+implement CSS;
+
+#
+# CSS2 parsing module
+#
+# CSS2.1 style sheets
+#
+# Copyright © 2001, 2005 Vita Nuova Holdings Limited. All rights reserved.
+#
+
+include "sys.m";
+ sys: Sys;
+
+include "css.m";
+
+B, NUMBER, IDENT, STRING, URL, PERCENTAGE, UNIT,
+ HASH, ATKEYWORD, IMPORTANT, IMPORT, PSEUDO, CLASS, INCLUDES,
+ DASHMATCH, FUNCTION: con 16rE000+iota;
+
+toknames := array[] of{
+ B-B => "Zero",
+ NUMBER-B => "NUMBER",
+ IDENT-B => "IDENT",
+ STRING-B => "STRING",
+ URL-B => "URL",
+ PERCENTAGE-B => "PERCENTAGE",
+ UNIT-B => "UNIT",
+ HASH-B => "HASH",
+ ATKEYWORD-B => "ATKEYWORD",
+ IMPORTANT-B => "IMPORTANT",
+ CLASS-B => "CLASS",
+ INCLUDES-B => "INCLUDES",
+ DASHMATCH-B => "DASHMATCH",
+ PSEUDO-B => "PSEUDO",
+ FUNCTION-B => "FUNCTION",
+};
+
+printdiag := 0;
+
+init(d: int)
+{
+ sys = load Sys Sys->PATH;
+ printdiag = d;
+}
+
+parse(s: string): (ref Stylesheet, string)
+{
+ return stylesheet(ref Cparse(-1, 0, nil, nil, Clex.new(s,1)));
+}
+
+parsedecl(s: string): (list of ref Decl, string)
+{
+ return (declarations(ref Cparse(-1, 0, nil, nil, Clex.new(s,0))), nil);
+}
+
+ptok(c: int): string
+{
+ if(c < 0)
+ return "eof";
+ if(c == 0)
+ return "zero?";
+ if(c >= B)
+ return sys->sprint("%s", toknames[c-B]);
+ return sys->sprint("%c", c);
+}
+
+Cparse: adt {
+ lookahead: int;
+ eof: int;
+ value: string;
+ suffix: string;
+ cs: ref Clex;
+
+ get: fn(nil: self ref Cparse): int;
+ look: fn(nil: self ref Cparse): int;
+ unget: fn(nil: self ref Cparse, tok: int);
+ skipto: fn(nil: self ref Cparse, followset: string): int;
+ synerr: fn(nil: self ref Cparse, s: string);
+};
+
+Cparse.get(p: self ref Cparse): int
+{
+ if((c := p.lookahead) >= 0){
+ p.lookahead = -1;
+ return c;
+ }
+ if(p.eof)
+ return -1;
+ (c, p.value, p.suffix) = csslex(p.cs);
+ if(c < 0)
+ p.eof = 1;
+ if(printdiag > 1)
+ sys->print("lex: %s v=%s s=%s\n", ptok(c), p.value, p.suffix);
+ return c;
+}
+
+Cparse.look(p: self ref Cparse): int
+{
+ c := p.get();
+ p.unget(c);
+ return c;
+}
+
+Cparse.unget(p: self ref Cparse, c: int)
+{
+ if(p.lookahead >= 0)
+ raise "css: internal error: Cparse.unget";
+ p.lookahead = c; # note that p.value and p.suffix are assumed to be those of c
+}
+
+Cparse.skipto(p: self ref Cparse, followset: string): int
+{
+ while((c := p.get()) >= 0)
+ for(i := 0; i < len followset; i++)
+ if(followset[i] == c){
+ p.unget(c);
+ return c;
+ }
+ return -1;
+}
+
+Cparse.synerr(p: self ref Cparse, s: string)
+{
+ p.cs.synerr(s);
+}
+
+#
+# stylesheet:
+# ["@charset" STRING ';']?
+# [CDO|CDC]* [import [CDO|CDC]*]*
+# [[ruleset | media | page ] [CDO|CDC]*]*
+# import:
+# "@import" [STRING|URL] [ medium [',' medium]*]? ';'
+# media:
+# "@media" medium [',' medium]* '{' ruleset* '}'
+# medium:
+# IDENT
+# page:
+# "@page" pseudo_page? '{' declaration [';' declaration]* '}'
+# pseudo_page:
+# ':' IDENT
+#
+
+stylesheet(p: ref Cparse): (ref Stylesheet, string)
+{
+ charset: string;
+ if(atkeywd(p, "@charset")){
+ if(itisa(p, STRING)){
+ charset = p.value;
+ itisa(p, ';');
+ }else
+ p.synerr("bad @charset declaration");
+ }
+ imports: list of ref Import;
+ while(atkeywd(p, "@import")){
+ c := p.get();
+ if(c == STRING || c == URL){
+ name := p.value;
+ media: list of string;
+ c = p.get();
+ if(c == IDENT){ # optional medium [, ...]
+ p.unget(c);
+ media = medialist(p);
+ }
+ imports = ref Import(name, media) :: imports;
+ }else
+ p.synerr("bad @import");
+ if(c != ';'){
+ p.synerr("missing ; in @import");
+ p.unget(c);
+ if(p.skipto(";}") < 0)
+ break;
+ }
+ }
+ imports = rev(imports);
+
+ stmts: list of ref Statement;
+ do{
+ while((c := p.get()) == ATKEYWORD)
+ case p.value {
+ "@media" => # medium[,medium]* { ruleset*}
+ media := medialist(p);
+ if(!itisa(p, '{')){
+ p.synerr("bad @media");
+ skipatrule("@media", p);
+ continue;
+ }
+ rules: list of ref Statement.Ruleset;
+ do{
+ rule := checkrule(p);
+ if(rule != nil)
+ rules = rule :: rules;
+ }while(!itisa(p, '}') && !p.eof);
+ stmts = ref Statement.Media(media, rev(rules)) :: stmts;
+ "@page" => # [:ident]? { declaration [; declaration]* }
+ pseudo: string;
+ if(itisa(p, PSEUDO))
+ pseudo = p.value;
+ if(!itisa(p, '{')){
+ p.synerr("bad @page");
+ skipatrule("@page", p);
+ continue;
+ }
+ decls := declarations(p);
+ if(!itisa(p, '}')){
+ p.synerr("unclosed @page declaration block");
+ skipatrule("@page", p);
+ continue;
+ }
+ stmts = ref Statement.Page(pseudo, decls) :: stmts;
+ * =>
+ skipatrule(p.value, p); # skip unknown or misplaced at-rule
+ }
+ p.unget(c);
+ rule := checkrule(p);
+ if(rule != nil)
+ stmts = rule :: stmts;
+ }while(!p.eof);
+ rl := stmts;
+ stmts = nil;
+ for(; rl != nil; rl = tl rl)
+ stmts = hd rl :: stmts;
+ return (ref Stylesheet(charset, imports, stmts), nil);
+}
+
+checkrule(p: ref Cparse): ref Statement.Ruleset
+{
+ (rule, err) := ruleset(p);
+ if(rule == nil){
+ if(err != nil){
+ p.synerr(sys->sprint("bad ruleset: %s", err));
+ p.get(); # make some progress
+ }
+ }
+ return rule;
+}
+
+medialist(p: ref Cparse): list of string
+{
+ media: list of string;
+ do{
+ c := p.get();
+ if(c != IDENT){
+ p.unget(c);
+ p.synerr("missing medium identifier");
+ break;
+ }
+ media = p.value :: media;
+ }while(itisa(p, ','));
+ return rev(media);
+}
+
+itisa(p: ref Cparse, expect: int): int
+{
+ if((c := p.get()) == expect)
+ return 1;
+ p.unget(c);
+ return 0;
+}
+
+atkeywd(p: ref Cparse, expect: string): int
+{
+ if((c := p.get()) == ATKEYWORD && p.value == expect)
+ return 1;
+ p.unget(c);
+ return 0;
+}
+
+skipatrule(name: string, p: ref Cparse)
+{
+ if(printdiag)
+ sys->print("skip unimplemented or misplaced %s\n", name);
+ if((c := p.get()) == '{'){ # block
+ for(nesting := '}' :: nil; nesting != nil && c >= 0; nesting = tl nesting){
+ while((c = p.cs.getc()) >= 0 && c != hd nesting)
+ case c {
+ '{' =>
+ nesting = '}' :: nesting;
+ '(' =>
+ nesting = ')' :: nesting;
+ '[' =>
+ nesting = ']' :: nesting;
+ '"' or '\'' =>
+ quotedstring(p.cs, c);
+ }
+ }
+ }else{
+ while(c >= 0 && c != ';')
+ c = p.get();
+ }
+}
+
+# ruleset:
+# selector [',' S* selector]* '{' S* declaration [';' S* declaration]* '}' S*
+
+ruleset(p: ref Cparse): (ref Statement.Ruleset, string)
+{
+ selectors: list of list of (int, list of ref Select);
+ c := -1;
+ do{
+ s := selector(p);
+ if(s == nil){
+ if(p.eof)
+ return (nil, nil);
+ p.synerr("expected selector");
+ if(p.skipto(",{}") < 0)
+ return (nil, nil);
+ c = p.look();
+ }else
+ selectors = s :: selectors;
+ }while((c = p.get()) == ',');
+ if(c != '{')
+ return (nil, "expected declaration block");
+ sl := selectors;
+ selectors = nil;
+ for(; sl != nil; sl = tl sl)
+ selectors = hd sl :: selectors;
+ decls := declarations(p);
+ if(!itisa(p, '}')){
+ p.synerr("unclosed declaration block");
+ }
+ return (ref Statement.Ruleset(selectors, decls), nil);
+}
+
+declarations(p: ref Cparse): list of ref Decl
+{
+ decls: list of ref Decl;
+ c: int;
+ do{
+ (d, e) := declaration(p);
+ if(d != nil)
+ decls = d :: decls;
+ else if(e != nil){
+ p.synerr("ruleset declaration: "+e);
+ if((c = p.skipto(";}")) < 0)
+ break;
+ }
+ }while((c = p.get()) == ';');
+ p.unget(c);
+ l := decls;
+ for(decls = nil; l != nil; l = tl l)
+ decls = hd l :: decls;
+ return decls;
+}
+
+# selector:
+# simple_selector [combinator simple_selector]*
+# combinator:
+# '+' S* | '>' S* | /* empty */
+#
+
+selector(p: ref Cparse): list of (int, list of ref Select)
+{
+ sel: list of (int, list of ref Select);
+ op := ' ';
+ while((s := selector1(p)) != nil){
+ sel = (op, s) :: sel;
+ if((c := p.look()) == '+' || c == '>')
+ op = p.get();
+ else
+ op = ' ';
+ }
+ l: list of (int, list of ref Select);
+ for(; sel != nil; sel = tl sel)
+ l = hd sel :: l;
+ return l;
+}
+
+#
+# simple_selector:
+# element_name? [HASH | class | attrib | pseudo]* S*
+# element_name:
+# IDENT | '*'
+# class:
+# '.' IDENT
+# attrib:
+# '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* [IDENT | STRING] S* ]? ']'
+# pseudo
+# ':' [ IDENT | FUNCTION S* IDENT? S* ')' ]
+
+selector1(p: ref Cparse): list of ref Select
+{
+ sel: list of ref Select;
+ c := p.get();
+ if(c == IDENT)
+ sel = ref Select.Element(p.value) :: sel;
+ else if(c== '*')
+ sel = ref Select.Any("*") :: sel;
+ else
+ p.unget(c);
+Sel:
+ for(;;){
+ c = p.get();
+ case c {
+ HASH =>
+ sel = ref Select.ID(p.value) :: sel;
+ CLASS =>
+ sel = ref Select.Class(p.value) :: sel;
+ '[' =>
+ if(!itisa(p, IDENT))
+ break;
+ name := p.value;
+ case c = p.get() {
+ '=' =>
+ sel = ref Select.Attrib(name, "=", optaval(p)) :: sel;
+ INCLUDES =>
+ sel = ref Select.Attrib(name, "~=", optaval(p)) :: sel;
+ DASHMATCH =>
+ sel = ref Select.Attrib(name, "|=", optaval(p)) :: sel;
+ * =>
+ sel = ref Select.Attrib(name, nil, nil) :: sel;
+ p.unget(c);
+ }
+ if((c = p.get()) != ']'){
+ p.synerr("bad attribute syntax");
+ p.unget(c);
+ break Sel;
+ }
+ PSEUDO =>
+ case c = p.get() {
+ IDENT =>
+ sel = ref Select.Pseudo(p.value) :: sel;
+ FUNCTION =>
+ name := p.value;
+ case c = p.get() {
+ IDENT =>
+ sel = ref Select.Pseudofn(name, lowercase(p.value)) :: sel;
+ ')' =>
+ p.unget(c);
+ sel = ref Select.Pseudofn(name, nil) :: sel;
+ * =>
+ p.synerr("bad pseudo-function syntax");
+ p.unget(c);
+ break Sel;
+ }
+ if((c = p.get()) != ')'){
+ p.synerr("missing ')' for pseudo-function");
+ p.unget(c);
+ break Sel;
+ }
+ * =>
+ p.synerr(sys->sprint("unexpected :pseudo: %s:%s", ptok(c), p.value));
+ p.unget(c);
+ break Sel;
+ }
+ * =>
+ p.unget(c);
+ break Sel;
+ }
+ # qualifiers must be adjacent to the first item, and each other
+ c = p.cs.getc();
+ p.cs.ungetc(c);
+ if(isspace(c))
+ break;
+ }
+ sl := sel;
+ for(sel = nil; sl != nil; sl = tl sl)
+ sel = hd sl :: sel;
+ return sel;
+}
+
+optaval(p: ref Cparse): ref Value
+{
+ case c := p.get() {
+ IDENT =>
+ return ref Value.Ident(' ', p.value);
+ STRING =>
+ return ref Value.String(' ', p.value);
+ * =>
+ p.unget(c);
+ return nil;
+ }
+}
+
+# declaration:
+# property ':' S* expr prio?
+# | /* empty */
+# property:
+# IDENT
+# prio:
+# IMPORTANT S* /* ! important */
+
+declaration(p: ref Cparse): (ref Decl, string)
+{
+ c := p.get();
+ if(c != IDENT){
+ p.unget(c);
+ return (nil, nil);
+ }
+ prop := lowercase(p.value);
+ c = p.get();
+ if(c != ':'){
+ p.unget(c);
+ return (nil, "missing :");
+ }
+ values := expr(p);
+ if(values == nil)
+ return (nil, "missing expression(s)");
+ prio := 0;
+ if(p.look() == IMPORTANT){
+ p.get();
+ prio = 1;
+ }
+ return (ref Decl(prop, values, prio), nil);
+}
+
+# expr:
+# term [operator term]*
+# operator:
+# '/' | ',' | /* empty */
+
+expr(p: ref Cparse): list of ref Value
+{
+ values: list of ref Value;
+ sep := ' ';
+ while((t := term(p, sep)) != nil){
+ values = t :: values;
+ if((c := p.look()) == '/' || c == ',')
+ sep = p.get(); # need something fancier here?
+ else
+ sep = ' ';
+ }
+ vl := values;
+ for(values = nil; vl != nil; vl = tl vl)
+ values = hd vl :: values;
+ return values;
+}
+
+#
+# term:
+# unary_operator? [NUMBER | PERCENTAGE | LENGTH | EMS | EXS | ANGLE | TIME | FREQ | function]
+# | STRING | IDENT | URI | RGB | UNICODERANGE | hexcolour
+# function:
+# FUNCTION expr ')'
+# unary_operator:
+# '-' | '+'
+# hexcolour:
+# HASH S*
+#
+# LENGTH, EMS, ... FREQ have been combined into UNIT here
+#
+# TO DO: UNICODERANGE
+
+term(p: ref Cparse, sep: int): ref Value
+{
+ prefix: string;
+ case p.look(){
+ '+' or '-' =>
+ prefix[0] = p.get();
+ }
+ c := p.get();
+ case c {
+ NUMBER =>
+ return ref Value.Number(sep, prefix+p.value);
+ PERCENTAGE =>
+ return ref Value.Percentage(sep, prefix+p.value);
+ UNIT =>
+ return ref Value.Unit(sep, prefix+p.value, p.suffix);
+ }
+ if(prefix != nil)
+ p.synerr("+/- before non-numeric");
+ case c {
+ STRING =>
+ return ref Value.String(sep, p.value);
+ IDENT =>
+ return ref Value.Ident(sep, lowercase(p.value));
+ URL =>
+ return ref Value.Url(sep, p.value);
+ HASH =>
+ # could check value: 3 or 6 hex digits
+ (r, g, b) := torgb(p.value);
+ if(r < 0)
+ return nil;
+ return ref Value.Hexcolour(sep, p.value, (r,g,b));
+ FUNCTION =>
+ name := p.value;
+ args := expr(p);
+ c = p.get();
+ if(c != ')'){
+ p.synerr(sys->sprint("missing ')' for function %s", name));
+ return nil;
+ }
+ if(name == "rgb"){
+ if(len args != 3){
+ p.synerr("wrong number of arguments to rgb()");
+ return nil;
+ }
+ r := colourof(hd args);
+ g := colourof(hd tl args);
+ b := colourof(hd tl tl args);
+ if(r < 0 || g < 0 || b < 0){
+ p.synerr("invalid rgb() parameters");
+ return nil;
+ }
+ return ref Value.RGB(sep, args, (r,g,b));
+ }
+ return ref Value.Function(sep, name, args);
+ * =>
+ p.unget(c);
+ return nil;
+ }
+}
+
+torgb(s: string): (int, int, int)
+{
+ case len s {
+ 3 =>
+ r := hex(s[0]);
+ g := hex(s[1]);
+ b := hex(s[2]);
+ if(r >= 0 && g >= 0 && b >= 0)
+ return ((r<<4)|r, (g<<4)|g, (b<<4)|b);
+ 6 =>
+ v := 0;
+ for(i := 0; i < 6; i++){
+ n := hex(s[i]);
+ if(n < 0)
+ return (-1, 0, 0);
+ v = (v<<4) | n;
+ }
+ return (v>>16, (v>>8)&16rFF, v&16rFF);
+ }
+ return (-1, 0, 0);
+}
+
+colourof(v: ref Value): int
+{
+ pick r := v {
+ Number =>
+ return clip(int r.value, 0, 255);
+ Percentage =>
+ # just the integer part
+ return clip((int r.value*255 + 50)/100, 0, 255);
+ * =>
+ return -1;
+ }
+}
+
+clip(v: int, l: int, u: int): int
+{
+ if(v < l)
+ return l;
+ if(v > u)
+ return u;
+ return v;
+}
+
+rev[T](l: list of T): list of T
+{
+ t: list of T;
+ for(; l != nil; l = tl l)
+ t = hd l :: t;
+ return t;
+}
+
+Clex: adt {
+ context: list of int; # characters
+ input: string;
+ lim: int;
+ n: int;
+ lineno: int;
+
+ new: fn(s: string, lno: int): ref Clex;
+ getc: fn(cs: self ref Clex): int;
+ ungetc: fn(cs: self ref Clex, c: int);
+ synerr: fn(nil: self ref Clex, s: string);
+};
+
+Clex.new(s: string, lno: int): ref Clex
+{
+ return ref Clex(nil, s, len s, 0, lno);
+}
+
+Clex.getc(cs: self ref Clex): int
+{
+ if(cs.context != nil){
+ c := hd cs.context;
+ cs.context = tl cs.context;
+ return c;
+ }
+ if(cs.n >= cs.lim)
+ return -1;
+ c := cs.input[cs.n++];
+ if(c == '\n')
+ cs.lineno++;
+ return c;
+}
+
+Clex.ungetc(cs: self ref Clex, c: int)
+{
+ cs.context = c :: cs.context;
+}
+
+Clex.synerr(cs: self ref Clex, s: string)
+{
+ if(printdiag)
+ sys->fprint(sys->fildes(2), "%d: err: %s\n", cs.lineno, s);
+}
+
+csslex(cs: ref Clex): (int, string, string)
+{
+ for(;;){
+ c := skipws(cs);
+ if(c < 0)
+ return (-1, nil, nil);
+ case c {
+ '<' =>
+ if(seq(cs, "!--"))
+ break; # <!-- ignore HTML comment start (CDO)
+ return (c, nil, nil);
+ '-' =>
+ if(seq(cs, "->"))
+ break; # --> ignore HTML comment end (CDC)
+ return (c, nil, nil);
+ ':' =>
+ c = cs.getc();
+ cs.ungetc(c);
+ if(isnamec(c, 0))
+ return (PSEUDO, nil, nil);
+ return (':', nil, nil);
+ '#' =>
+ c = cs.getc();
+ if(isnamec(c, 1))
+ return (HASH, name(cs, c), nil);
+ cs.ungetc(c);
+ return ('#', nil, nil);
+ '/' =>
+ if(subseq(cs, '*', 1, 0)){
+ comment(cs);
+ break;
+ }
+ return (c, nil, nil);
+ '\'' or '"' =>
+ return (STRING, quotedstring(cs, c), nil);
+ '0' to '9' or '.' =>
+ if(c == '.'){
+ d := cs.getc();
+ cs.ungetc(d);
+ if(!isdigit(d)){
+ if(isnamec(d, 1))
+ return (CLASS, name(cs, cs.getc()), nil);
+ return ('.', nil, nil);
+ }
+ # apply CSS2 treatment: .55 is a number not a class
+ }
+ val := number(cs, c);
+ c = cs.getc();
+ if(c == '%')
+ return (PERCENTAGE, val, "%");
+ if(isnamec(c, 0)) # use CSS2 interpetation
+ return (UNIT, val, lowercase(name(cs, c)));
+ cs.ungetc(c);
+ return (NUMBER, val, nil);
+ '\\' =>
+ d := cs.getc();
+ if(d >= ' ' && d <= '~' || islatin1(d)){ # probably should handle it in name
+ wd := name(cs, d);
+ return (IDENT, "\\"+wd, nil);
+ }
+ cs.ungetc(d);
+ return ('\\', nil, nil);
+ '@' =>
+ c = cs.getc();
+ if(isnamec(c, 0)) # @something
+ return (ATKEYWORD, "@"+lowercase(name(cs,c)), nil);
+ cs.ungetc(c);
+ return ('@', nil, nil);
+ '!' =>
+ c = skipws(cs);
+ if(isnamec(c, 0)){ # !something
+ wd := name(cs, c);
+ if(lowercase(wd) == "important")
+ return (IMPORTANT, nil, nil);
+ pushback(cs, wd);
+ }else
+ cs.ungetc(c);
+ return ('!', nil, nil);
+ '~' =>
+ if(subseq(cs, '=', 1, 0))
+ return (INCLUDES, "~=", nil);
+ return ('~', nil, nil);
+ '|' =>
+ if(subseq(cs, '=', 1, 0))
+ return (DASHMATCH, "|=", nil);
+ return ('|', nil, nil);
+ * =>
+ if(isnamec(c, 0)){
+ wd := name(cs, c);
+ d := cs.getc();
+ if(d != '('){
+ cs.ungetc(d);
+ return (IDENT, wd, nil);
+ }
+ val := lowercase(wd);
+ if(val == "url")
+ return (URL, url(cs), nil); # bizarre special case
+ return (FUNCTION, val, nil);
+ }
+ return (c, nil, nil);
+ }
+
+ }
+}
+
+skipws(cs: ref Clex): int
+{
+ for(;;){
+ while((c := cs.getc()) == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f')
+ ;
+ if(c != '/')
+ return c;
+ c = cs.getc();
+ if(c != '*'){
+ cs.ungetc(c);
+ return '/';
+ }
+ comment(cs);
+ }
+}
+
+seq(cs: ref Clex, s: string): int
+{
+ for(i := 0; i < len s; i++)
+ if((c := cs.getc()) != s[i])
+ break;
+ if(i == len s)
+ return 1;
+ cs.ungetc(c);
+ while(i > 0)
+ cs.ungetc(s[--i]);
+ if(c < 0)
+ return -1;
+ return 0;
+}
+
+subseq(cs: ref Clex, a: int, t: int, e: int): int
+{
+ if((c := cs.getc()) != a){
+ cs.ungetc(c);
+ return e;
+ }
+ return t;
+}
+
+pushback(cs: ref Clex, wd: string)
+{
+ for(i := len wd; --i >= 0;)
+ cs.ungetc(wd[i]);
+}
+
+comment(cs: ref Clex)
+{
+ while((c := cs.getc()) != '*' || (c = cs.getc()) != '/')
+ if(c < 0) {
+ # end of file in comment
+ break;
+ }
+}
+
+number(cs: ref Clex, c: int): string
+{
+ s: string;
+ for(; isdigit(c); c = cs.getc())
+ s[len s] = c;
+ if(c != '.'){
+ cs.ungetc(c);
+ return s;
+ }
+ if(!isdigit(c = cs.getc())){
+ cs.ungetc(c);
+ cs.ungetc('.');
+ return s;
+ }
+ s[len s] = '.';
+ do{
+ s[len s] = c;
+ }while(isdigit(c = cs.getc()));
+ cs.ungetc(c);
+ return s;
+}
+
+name(cs: ref Clex, c: int): string
+{
+ s: string;
+ for(; isnamec(c, 1); c = cs.getc()){
+ s[len s] = c;
+ if(c == '\\'){
+ c = cs.getc();
+ if(isescapable(c))
+ s[len s] = c;
+ }
+ }
+ cs.ungetc(c);
+ return s;
+}
+
+isescapable(c: int): int
+{
+ return c >= ' ' && c <= '~' || isnamec(c, 1);
+}
+
+islatin1(c: int): int
+{
+ return c >= 16rA1 && c <= 16rFF; # printable latin-1
+}
+
+isnamec(c: int, notfirst: int): int
+{
+ return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '\\' ||
+ notfirst && (c >= '0' && c <= '9' || c == '-') ||
+ c >= 16rA1 && c <= 16rFF; # printable latin-1
+}
+
+isxdigit(c: int): int
+{
+ return c>='0' && c<='9' || c>='a'&&c<='f' || c>='A'&&c<='F';
+}
+
+isdigit(c: int): int
+{
+ return c >= '0' && c <= '9';
+}
+
+isspace(c: int): int
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
+}
+
+hex(c: int): int
+{
+ if(c >= '0' && c <= '9')
+ return c-'0';
+ if(c >= 'A' && c <= 'F')
+ return c-'A' + 10;
+ if(c >= 'a' && c <= 'f')
+ return c-'a' + 10;
+ return -1;
+}
+
+quotedstring(cs: ref Clex, delim: int): string
+{
+ s: string;
+ while((c := cs.getc()) != delim){
+ if(c < 0){
+ cs.synerr("end-of-file in string");
+ return s;
+ }
+ if(c == '\\'){
+ c = cs.getc();
+ if(c < 0){
+ cs.synerr("end-of-file in string");
+ return s;
+ }
+ if(isxdigit(c)){
+ # unicode escape
+ n := 0;
+ for(i := 0;;){
+ n = (n<<4) | hex(c);
+ c = cs.getc();
+ if(!isxdigit(c) || ++i >= 6){
+ if(!isspace(c))
+ cs.ungetc(c); # CSS2 ignores the first white space following
+ break;
+ }
+ }
+ s[len s] = n;
+ }else if(c == '\n'){
+ ; # escaped newline
+ }else if(isescapable(c))
+ s[len s] = c;
+ }else if(c)
+ s[len s] = c;
+ }
+ return s;
+}
+
+url(cs: ref Clex): string
+{
+ s: string;
+ c := skipws(cs);
+ if(c != '"' && c != '\''){ # not a quoted string
+ while(c != ' ' && c != '\n' && c != '\'' && c != '"' && c != ')'){
+ s[len s] = c;
+ c = cs.getc();
+ if(c == '\\'){
+ c = cs.getc();
+ if(c < 0){
+ cs.synerr("end of file in url parameter");
+ break;
+ }
+ if(c == ' ' || c == '\'' || c == '"' || c == ')')
+ s[len s] = c;
+ else{
+ cs.synerr("invalid escape sequence in url");
+ s[len s] = '\\';
+ s[len s] = c;
+ }
+ c = cs.getc();
+ }
+ }
+ cs.ungetc(c);
+# if(s == nil)
+# p.synerr("empty parameter to url");
+ }else
+ s = quotedstring(cs, c);
+ if((c = skipws(cs)) != ')'){
+ cs.synerr("unclosed parameter to url");
+ cs.ungetc(c);
+ }
+ return s;
+}
+
+lowercase(s: string): string
+{
+ for(i := 0; i < len s; i++)
+ if((c := s[i]) >= 'A' && c <= 'Z')
+ s[i] = c-'A' + 'a';
+ return s;
+}
diff --git a/appl/lib/w3c/mkfile b/appl/lib/w3c/mkfile
new file mode 100644
index 00000000..90f4d043
--- /dev/null
+++ b/appl/lib/w3c/mkfile
@@ -0,0 +1,17 @@
+<../../../mkconfig
+
+TARG=\
+ css.dis\
+ xpointers.dis\
+
+MODULES=
+
+SYSMODULES= \
+ sys.m\
+ bufio.m\
+ css.m\
+ xpointers.m\
+
+DISBIN=$ROOT/dis/lib/w3c
+
+<$ROOT/mkfiles/mkdis
diff --git a/appl/lib/w3c/xpointers.b b/appl/lib/w3c/xpointers.b
new file mode 100644
index 00000000..0d7c231a
--- /dev/null
+++ b/appl/lib/w3c/xpointers.b
@@ -0,0 +1,858 @@
+implement Xpointers;
+
+#
+# Copyright © 2005 Vita Nuova Holdings Oimited
+#
+
+include "sys.m";
+ sys: Sys;
+
+include "xpointers.m";
+
+init()
+{
+ sys = load Sys Sys->PATH;
+}
+
+#
+# XPointer framework syntax
+#
+# Pointer ::= Shorthand | SchemeBased
+# Shorthand ::= NCName # from [XML-Names]
+# SchemeBased ::= PointerPart (S? PointerPart)*
+# PointerPart ::= SchemeName '(' SchemeData ')'
+# SchemeName ::= QName # from [XML-Names]
+# SchemeData ::= EscapedData*
+# EscapedData ::= NormalChar | '^(' | '^)' | '^^' | '(' SchemeData ')'
+# NormalChar ::= UnicodeChar - [()^]
+# UnicodeChar ::= [#x0 - #x10FFFF]
+
+framework(s: string): (string, list of (string, string, string), string)
+{
+ (q, nm, i) := name(s, 0);
+ if(i >= len s){ # Shorthand
+ if(q != nil)
+ return (nil, nil, "shorthand pointer must be unqualified name");
+ if(nm == nil)
+ return (nil, nil, "missing pointer name");
+ return (nm, nil, nil);
+ }
+ # must be SchemeBased
+ l: list of (string, string, string);
+ for(;;){
+ if(nm == nil){
+ if(q != nil)
+ return (nil, nil, sys->sprint("prefix but no local part in name at %d", i));
+ return (nil, nil, sys->sprint("expected name at %d", i));
+ }
+ if(i >= len s || s[i] != '(')
+ return (nil, nil, sys->sprint("expected '(' at %d", i));
+ o := i++;
+ a := "";
+ nesting := 0;
+ for(; i < len s && ((c := s[i]) != ')' || nesting); i++){
+ case c {
+ '^' =>
+ if(i+1 >= len s)
+ return (nil, nil, "unexpected eof after ^");
+ c = s[++i];
+ if(c != '(' && c != ')' && c != '^')
+ return (nil, nil, sys->sprint("invalid escape ^%c at %d", c, i));
+ '(' =>
+ nesting++;
+ ')' =>
+ if(--nesting < 0)
+ return (nil, nil, sys->sprint("unbalanced ) at %d", i));
+ }
+ a[len a] = c;
+ }
+ if(i >= len s)
+ return (nil, nil, sys->sprint("unbalanced ( at %d", o));
+ l = (q, nm, a) :: l;
+ if(++i == len s)
+ break;
+ while(i < len s && isspace(s[i]))
+ i++;
+ (q, nm, i) = name(s, i);
+ }
+ rl: list of (string, string, string);
+ for(; l != nil; l = tl l)
+ rl = hd l :: rl;
+ return (nil, rl, nil);
+}
+
+isspace(c: int): int
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\v' || c == '\f';
+}
+
+#
+# QName ::= (Prefix ':')? LocalPart
+# Prefix ::= NCName
+# LocalPart ::= NCName
+#
+#NCName :: (Oetter | '_') NCNameChar*
+#NCNameChar :: Oetter | Digit | '.' | '-' | '_' | CombiningChar | Extender
+
+name(s: string, o: int): (string, string, int)
+{
+ (ns, i) := ncname(s, o);
+ if(i >= len s || s[i] != ':')
+ return (nil, ns, i);
+ (nm, j) := ncname(s, i+1);
+ if(j == i+1)
+ return (nil, ns, i); # assume it's a LocalPart followed by ':'
+ return (ns, nm, j);
+}
+
+ncname(s: string, o: int): (string, int)
+{
+ if(o >= len s || !isalnum(c := s[o]) && c != '_' || c >= '0' && c <= '9')
+ return (nil, o); # missing or invalid start character
+ for(i := o; i < len s && isnamec(s[i]); i++)
+ ;
+ return (s[o:i], i);
+}
+
+isnamec(c: int): int
+{
+ return isalnum(c) || c == '_' || c == '-' || c == '.';
+}
+
+isalnum(c: int): int
+{
+ #
+ # Hard to get absolutely right without silly amount of character data.
+ # Use what we know about ASCII
+ # and assume anything above the Oatin control characters is
+ # potentially an alphanumeric.
+ #
+ if(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9')
+ return 1; # usual case
+ if(c <= ' ')
+ return 0;
+ if(c > 16rA0)
+ return 1; # non-ASCII
+ return 0;
+}
+
+# schemes: xpointer(), xmlns(), element()
+
+# xmlns()
+# XmlnsSchemeData ::= NCName S? '=' S? EscapedNamespaceName
+# EscapedNamespaceName ::= EscapedData*
+
+xmlns(s: string): (string, string, string)
+{
+ (nm, i) := ncname(s, 0);
+ if(nm == nil)
+ return (nil, nil, "illegal namespace name");
+ while(i < len s && isspace(s[i]))
+ i++;
+ if(i >= len s || s[i++] != '=')
+ return (nil, nil, "illegal xmlns declaration");
+ while(i < len s && isspace(s[i]))
+ i++;
+ return (nm, s[i:], nil);
+}
+
+# element()
+# ElementSchemeData ::= (NCName ChildSequence?) | ChildSequence
+# ChildSequence ::= ('/' [1-9] [0-9]*)+
+
+element(s: string): (string, list of int, string)
+{
+ nm: string;
+ i := 0;
+ if(s != nil && s[0] != '/'){
+ (nm, i) = ncname(s, 0);
+ if(nm == nil)
+ return (nil, nil, "illegal element name");
+ }
+ l: list of int;
+ do{
+ if(i >= len s || s[i++] != '/')
+ return (nil, nil, "illegal child sequence (expected '/')");
+ v := 0;
+ do{
+ if(i >= len s || !isdigit(s[i]))
+ return (nil, nil, "illegal child sequence (expected integer)");
+ v = v*10 + s[i]-'0';
+ }while(++i < len s && s[i] != '/');
+ l = v :: l;
+ }while(i < len s);
+ rl: list of int;
+ for(; l != nil; l = tl l)
+ rl = hd l :: rl;
+ return (nm, rl, nil);
+}
+
+# xpointer()
+# XpointerSchemeData ::= Expr # from Xpath, with new functions and data types
+
+xpointer(s: string): (ref Xpath, string)
+{
+ p := ref Parse(ref Rd(s, 0, 0), nil);
+ {
+ e := expr(p, 0);
+ if(p.r.i < len s)
+ synerr("missing operator");
+ return (e, nil);
+ }exception e{
+ "syntax error*" =>
+ return (nil, e);
+ * =>
+ raise;
+ }
+}
+
+Lerror, Ldslash, Lint, Lreal, Llit, Lvar, Ldotdot, Lop, Laxis, Lfn: con 'a'+iota; # internal lexical items
+
+Keywd: adt {
+ name: string;
+ val: int;
+};
+
+axes: array of Keywd = array[] of {
+ ("ancestor", Aancestor),
+ ("ancestor-or-self", Aancestor_or_self),
+ ("attribute", Aattribute),
+ ("child", Achild),
+ ("descendant", Adescendant),
+ ("descendant-or-self", Adescendant_or_self),
+ ("following", Afollowing),
+ ("following-sibling", Afollowing_sibling),
+ ("namespace", Anamespace),
+ ("parent", Aparent),
+ ("preceding", Apreceding),
+ ("preceding-sibling", Apreceding_sibling),
+ ("self", Aself),
+};
+
+keywds: array of Keywd = array[] of {
+ ("and", Oand),
+ ("comment", Onodetype),
+ ("div", Odiv),
+ ("mod", Omod),
+ ("node", Onodetype),
+ ("or", Oor),
+ ("processing-instruction", Onodetype),
+ ("text", Onodetype),
+};
+
+iskeywd(s: string): int
+{
+ return look(keywds, s);
+}
+
+look(k: array of Keywd, s: string): int
+{
+ for(i := 0; i < len k; i++)
+ if(k[i].name == s)
+ return k[i].val;
+ return 0;
+}
+
+lookname(k: array of Keywd, v: int): string
+{
+ for(i := 0; i < len k; i++)
+ if(k[i].val == v)
+ return k[i].name;
+ return nil;
+}
+
+prectab := array[] of {
+ array[] of {Oor},
+ array[] of {Oand},
+ array[] of {'=', One},
+ array[] of {'<', Ole, '>', Oge},
+ array[] of {'+', '-'},
+ array[] of {Omul, Odiv, Omod},
+ array[] of {Oneg}, # unary '-'
+ array[] of {'|'}, # UnionExpr
+};
+
+isop(t: int, p: array of int): int
+{
+ if(t >= 0)
+ for(j := 0; j < len p; j++)
+ if(t == p[j])
+ return 1;
+ return 0;
+}
+
+# Expr ::= OrExpr
+# UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
+# PathExpr ::= LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath |
+# FilterExpr '//' RelativeLocationPath
+# OrExpr ::= AndExpr | OrExpr 'or' AndExpr
+# AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
+# EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr
+# RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr |
+# RelationalExpr '<=' AdditiveExpr | RelationalExpr '>=' AdditiveExpr
+# AdditiveExpr ::= MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr
+# MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator UnaryExpr |
+# MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr
+# UnaryExpr ::= UnionExpr | '-' UnaryExpr
+
+expr(p: ref Parse, k: int): ref Xpath
+{
+ if(k >= len prectab)
+ return pathexpr(p);
+ if(prectab[k][0] == Oneg){ # unary '-'
+ if(p.look() == '-'){
+ p.get();
+ return ref Xpath.E(Oneg, expr(p,k+1), nil);
+ }
+ # must be UnionExpr
+ k++;
+ }
+ e := expr(p, k+1);
+ while(isop(p.look(), prectab[k])){
+ o := p.get().t0;
+ e = ref Xpath.E(o, e, expr(p, k+1)); # +assoc[k]
+ }
+ return e;
+}
+
+# PathExpr ::= LocationPath | FilterExpr ( ('/' | '//') RelativeLocationPath )
+# FilterExpr ::= PrimaryExpr | FilterExpr Predicate => PrimaryExpr Predicate*
+
+pathexpr(p: ref Parse): ref Xpath
+{
+ # LocationPath?
+ case p.look() {
+ '.' or Ldotdot or Laxis or '@' or Onametest or Onodetype or '*' =>
+ return locationpath(p, 0);
+ '/' or Ldslash =>
+ return locationpath(p, 1);
+ }
+ # FilterExpr
+ e := primary(p);
+ while(p.look() == '[')
+ e = ref Xpath.E(Ofilter, e, predicate(p));
+ if((o := p.look()) == '/' || o == Ldslash)
+ e = ref Xpath.E(Opath, e, locationpath(p, 0));
+ return e;
+}
+
+# LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
+# AbsoluteLocationPath ::= '/' RelativeLocationPath? | AbbreviatedAbsoluteLocationPath
+# RelativeLocationPath ::= Step | RelativeLocationPath '/' Step
+# AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
+# AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
+
+locationpath(p: ref Parse, abs: int): ref Xpath
+{
+ # // => /descendent-or-self::node()/
+ pl: list of ref Xstep;
+ o := p.look();
+ if(o != '/' && o != Ldslash){
+ s := step(p);
+ if(s == nil)
+ synerr("expected Step in LocationPath");
+ pl = s :: pl;
+ }
+ while((o = p.look()) == '/' || o == Ldslash){
+ p.get();
+ if(o == Ldslash)
+ pl = ref Xstep(Adescendant_or_self, Onodetype, nil, "node", nil, nil) :: pl;
+ s := step(p);
+ if(s == nil){
+ if(abs && pl == nil)
+ break; # it's just an initial '/'
+ synerr("expected Step in LocationPath");
+ }
+ pl = s :: pl;
+ }
+ return ref Xpath.Path(abs, rev(pl));
+}
+
+# Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
+# AxisSpecifier ::= AxisName '::' | AbbreviatedAxisSpecifier
+# AxisName := ... # long list
+# NodeTest ::= NameTest | NodeType '(' ')'
+# Predicate ::= '[' PredicateExpr ']'
+# PredicateExpr ::= Expr
+# AbbreviatedStep ::= '.' | '..'
+# AbbreviatedAxisSpecifier ::= '@'?
+
+step(p: ref Parse): ref Xstep
+{
+ # AxisSpecifier ... | AbbreviatedStep
+ (o, ns, nm) := p.get();
+ axis := Achild;
+ case o {
+ '.' =>
+ return ref Xstep(Aself, Onodetype, nil, "node", nil, nil); # self::node()
+ Ldotdot =>
+ return ref Xstep(Aparent, Onodetype, nil, "node", nil, nil); # parent::node()
+ Laxis =>
+ axis = look(axes, ns);
+ (o, ns, nm) = p.get();
+ '@' =>
+ axis = Aattribute;
+ (o, ns, nm) = p.get();
+ * =>
+ ;
+ }
+
+ if(o == '*'){
+ o = Onametest;
+ nm = "*";
+ ns = nil;
+ }
+
+ # NodeTest ::= NameTest | NodeType '(' ')'
+ if(o != Onametest && o != Onodetype){
+ p.unget((o, ns, nm));
+ return nil;
+ }
+
+ arg: string;
+ if(o == Onodetype){ # '(' ... ')'
+ expect(p, '(');
+ # grammar is wrong: processing-instruction can have optional literal
+ if(nm == "processing-instruction" && p.look() == Llit)
+ arg = p.get().t1;
+ expect(p, ')');
+ }
+
+ # Predicate*
+ pl: list of ref Xpath;
+ while((pe := predicate(p)) != nil)
+ pl = pe :: pl;
+ return ref Xstep(axis, o, ns, nm, arg, rev(pl));
+}
+
+# PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
+# FunctionCall ::= FunctionName '(' (Argument ( ',' Argument)*)? ')'
+# Argument ::= Expr
+
+primary(p: ref Parse): ref Xpath
+{
+ (o, ns, nm) := p.get();
+ case o {
+ Lvar =>
+ return ref Xpath.Var(ns, nm);
+ '(' =>
+ e := expr(p, 0);
+ expect(p, ')');
+ return e;
+ Llit =>
+ return ref Xpath.Str(ns);
+ Lint =>
+ return ref Xpath.Int(big ns);
+ Lreal =>
+ return ref Xpath.Real(real ns);
+ Lfn =>
+ expect(p, '(');
+ al: list of ref Xpath;
+ if(p.look() != ')'){
+ for(;;){
+ al = expr(p, 0) :: al;
+ if(p.look() != ',')
+ break;
+ p.get();
+ }
+ al = rev(al);
+ }
+ expect(p, ')');
+ return ref Xpath.Fn(ns, nm, al);
+ * =>
+ synerr("invalid PrimaryExpr");
+ return nil;
+ }
+}
+
+# Predicate ::= '[' PredicateExpr ']'
+# PredicateExpr ::= Expr
+
+predicate(p: ref Parse): ref Xpath
+{
+ l := p.get();
+ if(l.t0 != '['){
+ p.unget(l);
+ return nil;
+ }
+ e := expr(p, 0);
+ expect(p, ']');
+ return e;
+}
+
+expect(p: ref Parse, t: int)
+{
+ l := p.get();
+ if(l.t0 != t)
+ synerr(sys->sprint("expected '%c'", t));
+}
+
+Xpath.text(e: self ref Xpath): string
+{
+ if(e == nil)
+ return "nil";
+ pick r := e {
+ E =>
+ if(r.r == nil)
+ return sys->sprint("(%s%s)", opname(r.op), r.l.text());
+ if(r.op == Ofilter)
+ return sys->sprint("%s[%s]", r.l.text(), r.r.text());
+ return sys->sprint("(%s%s%s)", r.l.text(), opname(r.op), r.r.text());
+ Fn =>
+ a := "";
+ for(l := r.args; l != nil; l = tl l)
+ a += sys->sprint(",%s", (hd l).text());
+ if(a != "")
+ a = a[1:];
+ return sys->sprint("%s(%s)", qual(r.ns, r.name), a);
+ Var =>
+ return sys->sprint("$%s", qual(r.ns, r.name));
+ Path =>
+ if(r.abs)
+ t := "/";
+ else
+ t = "";
+ for(l := r.steps; l != nil; l = tl l){
+ if(t != nil && t != "/")
+ t += "/";
+ t += (hd l).text();
+ }
+ return t;
+ Int =>
+ return sys->sprint("%bd", r.val);
+ Real =>
+ return sys->sprint("%g", r.val);
+ Str =>
+ return sys->sprint("%s", str(r.s));
+ }
+}
+
+qual(ns: string, nm: string): string
+{
+ if(ns != nil)
+ return ns+":"+nm;
+ return nm;
+}
+
+str(s: string): string
+{
+ for(i := 0; i < len s; i++)
+ if(s[i] == '\'')
+ return sys->sprint("\"%s\"", s);
+ return sys->sprint("'%s'", s);
+}
+
+opname(o: int): string
+{
+ case o {
+ One => return "!=";
+ Ole => return "<=";
+ Oge => return ">=";
+ Omul => return "*";
+ Odiv => return " div ";
+ Omod => return " mod ";
+ Oand => return " and ";
+ Oor => return " or ";
+ Oneg => return "-";
+ Ofilter => return " op_filter ";
+ Opath => return "/";
+ * => return sys->sprint(" %c ", o);
+ }
+}
+
+Xstep.text(s: self ref Xstep): string
+{
+ t := sys->sprint("%s::", Xstep.axisname(s.axis));
+ case s.op {
+ Onametest =>
+ if(s.ns == "*" && s.name == "*")
+ t += "*";
+ else
+ t += qual(s.ns, s.name);
+ Onodetype =>
+ if(s.arg != nil)
+ t += sys->sprint("%s(%s)", s.name, str(s.arg));
+ else
+ t += sys->sprint("%s()", s.name);
+ }
+ for(l := s.preds; l != nil; l = tl l)
+ t += sys->sprint("[%s]", (hd l).text());
+ return t;
+}
+
+Xstep.axisname(n: int): string
+{
+ return lookname(axes, n);
+}
+
+# ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::' |
+# NameTest | NodeType | Operator | FunctionName | AxisName |
+# Literal | Number | VariableReference
+# Operator ::= OperatorName | MultiplyOperator | '/' | '//' | '|' | '+' | '' | '=' | '!=' | '<' | '<=' | '>' | '>='
+# MultiplyOperator ::= '*'
+# FunctionName ::= QName - NodeType
+# VariableReference ::= '$' QName
+# NameTest ::= '*' | NCName ':' '*' | QName
+# NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
+#
+
+Lex: type (int, string, string);
+
+Parse: adt {
+ r: ref Rd;
+ pb: list of Lex; # push back
+
+ look: fn(p: self ref Parse): int;
+ get: fn(p: self ref Parse): Lex;
+ unget: fn(p: self ref Parse, t: Lex);
+};
+
+Parse.get(p: self ref Parse): Lex
+{
+ if(p.pb != nil){
+ h := hd p.pb;
+ p.pb = tl p.pb;
+ return h;
+ }
+ return lex(p.r);
+}
+
+Parse.look(p: self ref Parse): int
+{
+ t := p.get();
+ p.unget(t);
+ return t.t0;
+}
+
+Parse.unget(p: self ref Parse, t: Lex)
+{
+ p.pb = t :: p.pb;
+}
+
+lex(r: ref Rd): Lex
+{
+ l := lex0(r);
+ r.prev = l.t0;
+ return l;
+}
+
+# disambiguating rules are D1 to D3
+
+# D1. preceding token p && p not in {'@', '::', '(', '[', ',', Operator} then '*' is MultiplyOperator
+# and NCName must be OperatorName
+
+xop(t: int): int
+{
+ case t {
+ -1 or 0 or '@' or '(' or '[' or ',' or Lop or Omul or
+ '/' or Ldslash or '|' or '+' or '-' or '=' or One or '<' or Ole or '>' or Oge or
+ Oand or Oor or Omod or Odiv or Laxis =>
+ return 0;
+ }
+ return 1;
+}
+
+# UnaryExpr ::= UnionExpr | '-' UnaryExpr
+# ExprToken ::= ... |
+# NameTest | NodeType | Operator | FunctionName | AxisName |
+# Literal | Number | VariableReference
+# Operator ::= OperatorName | MultiplyOperator | '/' | '//' | '|' | '+' | '' | '=' | '!=' | '<' | '<=' | '>' | '>='
+# MultiplyOperator ::= '*'
+
+lex0(r: ref Rd): Lex
+{
+ while(isspace(r.look()))
+ r.get();
+ case c := r.get() {
+ -1 or
+ '(' or ')' or '[' or ']' or '@' or ',' or '+' or '-' or '|' or '=' or ':' =>
+ # singletons ('::' only valid after name, see below)
+ return (c, nil, nil);
+ '/' =>
+ return subseq(r, '/', Ldslash, '/');
+ '!' =>
+ return subseq(r, '=', One, '!');
+ '<' =>
+ return subseq(r, '=', Ole, '<');
+ '>' =>
+ return subseq(r, '=', Oge, '>');
+ '*' =>
+ if(xop(r.prev))
+ return (Omul, nil, nil);
+ return (c, nil, nil);
+ '.' =>
+ case r.look() {
+ '0' to '9' =>
+ (v, nil) := number(r, r.get());
+ return (Lreal, v, nil);
+ '.' =>
+ r.get();
+ return (Ldotdot, nil, nil);
+ * =>
+ return ('.', nil, nil);
+ }
+ '$' =>
+ # variable reference
+ (ns, nm, i) := name(r.s, r.i);
+ if(ns == nil && nm == nil)
+ return (Lerror, nil, nil);
+ r.i = i;
+ return (Lvar, ns, nm);
+ '0' to '9' =>
+ (v, f) := number(r, c);
+ if(f)
+ return (Lreal, v, nil);
+ return (Lint, v, nil);
+ '"' or '\'' =>
+ return (Llit, literal(r, c), nil);
+ * =>
+ if(isalnum(c) || c == '_'){
+ # QName/NCName
+ r.unget();
+ (ns, nm, i) := name(r.s, r.i);
+ if(ns == nil && nm == nil)
+ return (Lerror, nil, nil);
+ r.i = i;
+ if(xop(r.prev)){
+ if(ns == nil){
+ o := iskeywd(nm);
+ if(o != Laxis && o != Onodetype)
+ return (o, nil, nil);
+ }
+ return (Lop, ns, nm);
+ }
+ while(isspace(r.look()))
+ r.get();
+ case r.look() {
+ '(' => # D2: NCName '(' =>NodeType or FunctionName
+ if(ns == nil && iskeywd(nm) == Onodetype)
+ return (Onodetype, nil, nm);
+ return (Lfn, ns, nm); # possibly NodeTest
+ ':' => # D3: NCName '::' => AxisName
+ r.get();
+ case r.look() {
+ ':' =>
+ if(ns == nil && look(axes, nm) != 0){
+ r.get();
+ return (Laxis, nm, nil);
+ }
+ '*' =>
+ # NameTest ::= ... | NCName ':' '*'
+ if(ns == nil){
+ r.get();
+ return (Onametest, nm, "*");
+ }
+ }
+ r.unget(); # put back the ':'
+ # NameTest ::= '*' | NCName ':' '*' | QName
+ }
+ return (Onametest, ns, nm); # actually NameTest
+ }
+ # unexpected character
+ }
+ return (Lerror, nil, nil);
+}
+
+subseq(r: ref Rd, a: int, t: int, e: int): Lex
+{
+ if(r.look() != a)
+ return (e, nil, nil);
+ r.get();
+ return (t, nil, nil);
+}
+
+# Literal ::= '"'[^"]*'"' | "'"[^']* "'"
+
+literal(r: ref Rd, delim: int): string
+{
+ s: string;
+ while((c := r.get()) != delim){
+ if(c < 0){
+ synerr("missing string terminator");
+ return s;
+ }
+ if(c)
+ s[len s] = c; # could slice r.s
+ }
+ return s;
+}
+
+#
+# Number ::= Digits('.' Digits?)? | '.' Digits
+# Digits ::= [0-9]+
+#
+number(r: ref Rd, c: int): (string, int)
+{
+ s: string;
+ for(; isdigit(c); c = r.get())
+ s[len s] = c;
+ if(c != '.'){
+ if(c >= 0)
+ r.unget();
+ return (s, 0);
+ }
+ if(!isdigit(c = r.get())){
+ if(c >= 0)
+ r.unget();
+ r.unget(); # the '.'
+ return (s, 0);
+ }
+ s[len s] = '.';
+ do{
+ s[len s] = c;
+ }while(isdigit(c = r.get()));
+ if(c >= 0)
+ r.unget();
+ return (s, 1);
+}
+
+isdigit(c: int): int
+{
+ return c>='0' && c<='9';
+}
+
+Rd: adt{
+ s: string;
+ i: int;
+ prev: int; # previous token
+
+ get: fn(r: self ref Rd): int;
+ look: fn(r: self ref Rd): int;
+ unget: fn(r: self ref Rd);
+};
+
+Rd.get(r: self ref Rd): int
+{
+ if(r.i >= len r.s)
+ return -1;
+ return r.s[r.i++];
+}
+
+Rd.look(r: self ref Rd): int
+{
+ if(r.i >= len r.s)
+ return -1;
+ return r.s[r.i];
+}
+
+Rd.unget(r: self ref Rd)
+{
+ if(r.i > 0)
+ r.i--;
+}
+
+rev[T](l: list of T): list of T
+{
+ rl: list of T;
+ for(; l != nil; l = tl l)
+ rl = hd l :: rl;
+ return rl;
+}
+
+synerr(s: string)
+{
+ raise "syntax error: "+s;
+}
+
+# to do:
+# dictionary?