diff options
Diffstat (limited to 'appl/lib/sexprs.b')
| -rw-r--r-- | appl/lib/sexprs.b | 638 |
1 files changed, 638 insertions, 0 deletions
diff --git a/appl/lib/sexprs.b b/appl/lib/sexprs.b new file mode 100644 index 00000000..7c42dd35 --- /dev/null +++ b/appl/lib/sexprs.b @@ -0,0 +1,638 @@ +implement Sexprs; + +# +# full SDSI/SPKI S-expression reader +# +# Copyright © 2003-2004 Vita Nuova Holdings Limited +# + +include "sys.m"; + sys: Sys; + +include "encoding.m"; + base64: Encoding; + base16: Encoding; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "sexprs.m"; + +Maxtoken: con 1024*1024; # should be more than enough + +Syntax: exception(string, big); +Here: con big -1; + +Rd: adt[T] + for { + T => + getb: fn(nil: self T): int; + ungetb: fn(nil: self T): int; + offset: fn(nil: self T): big; + } +{ + t: T; + + parseitem: fn(rd: self ref Rd[T]): ref Sexp raises (Syntax); + ws: fn(rd: self ref Rd[T]): int; + simplestring: fn(rd: self ref Rd[T], c: int, hint: string): ref Sexp raises (Syntax); + toclosing: fn(rd: self ref Rd[T], c: int): string raises (Syntax); + unquote: fn(rd: self ref Rd[T]): string raises (Syntax); +}; + +init() +{ + sys = load Sys Sys->PATH; + base64 = load Encoding Encoding->BASE64PATH; + base16 = load Encoding Encoding->BASE16PATH; + bufio = load Bufio Bufio->PATH; + bufio->sopen(""); +} + +Sexp.read[T](t: T): (ref Sexp, string) + for { + T => + getb: fn(nil: self T): int; + ungetb: fn(nil: self T): int; + offset: fn(nil: self T): big; + } +{ + { + rd := ref Rd[T](t); + e := rd.parseitem(); + return (e, nil); + }exception e { + Syntax => + (diag, pos) := e; + if(pos < big 0) + pos += t.offset(); + return (nil, sys->sprint("%s at offset %bd", diag, pos)); + } +} + +Sexp.parse(s: string): (ref Sexp, string, string) +{ + f := bufio->sopen(s); + (e, diag) := Sexp.read(f); + pos := int f.offset(); + return (e, s[pos:], diag); +} + +Sexp.unpack(a: array of byte): (ref Sexp, array of byte, string) +{ + f := bufio->aopen(a); + (e, diag) := Sexp.read(f); + pos := int f.offset(); + return (e, a[pos:], diag); +} + +Rd[T].parseitem(rd: self ref Rd[T]): ref Sexp raises (Syntax) +{ + p0 := rd.t.offset(); + { + c := rd.ws(); + if(c < 0) + return nil; + case c { + '{' => + a := rd.toclosing('}'); + f := bufio->aopen(base64->dec(a)); + ht: type Rd[ref Iobuf]; + nr := ref ht(f); + return nr.parseitem(); + '(' => + lists: list of ref Sexp; + while((c = rd.ws()) != ')'){ + if(c < 0) + raise Syntax("unclosed '('", p0); + rd.t.ungetb(); + e := rd.parseitem(); # we'll catch missing ) at top of loop + lists = e :: lists; + } + rl := lists; + lists = nil; + for(; rl != nil; rl = tl rl) + lists = hd rl :: lists; + return ref Sexp.List(lists); + '[' => + # display hint + e := rd.simplestring(rd.t.getb(), nil); + c = rd.ws(); + if(c != ']'){ + if(c >= 0) + rd.t.ungetb(); + raise Syntax("missing ] in display hint", p0); + } + pick r := e { + String => + return rd.simplestring(rd.ws(), r.s); + * => + raise Syntax("illegal display hint", Here); + } + * => + return rd.simplestring(c, nil); + } + }exception{ + Syntax => raise; + } +} + +# skip white space +Rd[T].ws(rd: self ref Rd[T]): int +{ + while(isspace(c := rd.t.getb())) + {} + return c; +} + +isspace(c: int): int +{ + return c == ' ' || c == '\r' || c == '\t' || c == '\n'; +} + +Rd[T].simplestring(rd: self ref Rd[T], c: int, hint: string): ref Sexp raises (Syntax) +{ + dec := -1; + decs: string; + if(c >= '0' && c <= '9'){ + for(dec = 0; c >= '0' && c <= '9'; c = rd.t.getb()){ + dec = dec*10 + c-'0'; + decs[len decs] = c; + } + if(dec < 0 || dec > Maxtoken) + raise Syntax("implausible token length", Here); + } + { + case c { + '"' => + text := rd.unquote(); + return ref Sexp.String(text, hint); + '|' => + return sform(base64->dec(rd.toclosing(c)), hint); + '#' => + return sform(base16->dec(rd.toclosing(c)), hint); + * => + if(c == ':' && dec >= 0){ # raw bytes + a := array[dec] of byte; + for(i := 0; i < dec; i++){ + c = rd.t.getb(); + if(c < 0) + raise Syntax("missing bytes in raw token", Here); + a[i] = byte c; + } + return sform(a, hint); + } + #s := decs; + if(decs != nil) + raise Syntax("token can't start with a digit", Here); + s: string; # <token> by definition is always printable; never utf-8 + while(istokenc(c)){ + s[len s] = c; + c = rd.t.getb(); + } + if(s == nil) + raise Syntax("missing token", Here); # consume c to ensure progress on error + if(c >= 0) + rd.t.ungetb(); + return ref Sexp.String(s, hint); + } + }exception{ + Syntax => raise; + } +} + +sform(a: array of byte, hint: string): ref Sexp +{ + if(istextual(a)) + return ref Sexp.String(string a, hint); + return ref Sexp.Binary(a, hint); +} + +Rd[T].toclosing(rd: self ref Rd[T], end: int): string raises (Syntax) +{ + s: string; + p0 := rd.t.offset(); + while((c := rd.t.getb()) != end){ + if(c < 0) + raise Syntax(sys->sprint("missing closing '%c'", end), p0); + s[len s] = c; + } + return s; +} + +hex(c: int): int +{ + if(c >= '0' && c <= '9') + return c-'0'; + if(c >= 'a' && c <= 'f') + return 10+(c-'a'); + if(c >= 'A' && c <= 'F') + return 10+(c-'A'); + return -1; +} + +Rd[T].unquote(rd: self ref Rd[T]): string raises (Syntax) +{ + os: string; + + p0 := rd.t.offset(); + while((c := rd.t.getb()) != '"'){ + if(c < 0) + raise Syntax("unclosed quoted string", p0); + if(c == '\\'){ + e0 := rd.t.offset(); + c = rd.t.getb(); + if(c < 0) + break; + case c { + '\r' => + c = rd.t.getb(); + if(c != '\n') + rd.t.ungetb(); + continue; + '\n' => + c = rd.t.getb(); + if(c != '\r') + rd.t.ungetb(); + continue; + 'b' => + c = '\b'; + 'f' => + c = '\f'; + 'n' => + c = '\n'; + 'r' => + c = '\r'; + 't' => + c = '\t'; + 'v' => + c = '\v'; + '0' to '7' => + oct := 0; + for(i := 0;;){ + if(!(c >= '0' && c <= '7')) + raise Syntax("illegal octal escape", e0); + oct = (oct<<3) | (c-'0'); + if(++i == 3) + break; + c = rd.t.getb(); + } + c = oct & 16rFF; + 'x' => + c0 := hex(rd.t.getb()); + c1 := hex(rd.t.getb()); + if(c0 < 0 || c1 < 0) + raise Syntax("illegal hex escape", e0); + c = (c0<<4) | c1; + * => + ; # as-is + } + } + os[len os] = c; + } + return os; +} + +hintlen(s: string): int +{ + if(s == nil) + return 0; + n := len array of byte s; + return len sys->aprint("[%d:]", n) + n; +} + +Sexp.packedsize(e: self ref Sexp): int +{ + if(e == nil) + return 0; + pick r := e{ + String => + n := len array of byte r.s; + return hintlen(r.hint) + len sys->aprint("%d:", n) + n; + Binary => + n := len r.data; + return hintlen(r.hint) + len sys->aprint("%d:", n) + n; + List => + n := 1; # '(' + for(l := r.l; l != nil; l = tl l) + n += (hd l).packedsize(); + return n+1; # + ')' + } +} + +packbytes(a: array of byte, b: array of byte): array of byte +{ + n := len b; + c := sys->aprint("%d:", n); + a[0:] = c; + a[len c:] = b; + return a[len c+n:]; +} + +packhint(a: array of byte, s: string): array of byte +{ + if(s == nil) + return a; + a[0] = byte '['; + a = packbytes(a[1:], array of byte s); + a[0] = byte ']'; + return a[1:]; +} + +pack(e: ref Sexp, a: array of byte): array of byte +{ + if(e == nil) + return array[0] of byte; + pick r := e{ + String => + if(r.hint != nil) + a = packhint(a, r.hint); + return packbytes(a, array of byte r.s); + Binary => + if(r.hint != nil) + a = packhint(a, r.hint); + return packbytes(a, r.data); + List => + a[0] = byte '('; + a = a[1:]; + for(l := r.l; l != nil; l = tl l) + a = pack(hd l, a); + a[0] = byte ')'; + return a[1:]; + } +} + +Sexp.pack(e: self ref Sexp): array of byte +{ + a := array[e.packedsize()] of byte; + pack(e, a); + return a; +} + +Sexp.b64text(e: self ref Sexp): string +{ + return "{" + base64->enc(e.pack()) + "}"; +} + +Sexp.text(e: self ref Sexp): string +{ + if(e == nil) + return ""; + pick r := e{ + String => + s := quote(r.s); + if(r.hint == nil) + return s; + return "["+quote(r.hint)+"]"+s; + Binary => + h := r.hint; + if(h != nil) + h = "["+quote(h)+"]"; + if(len r.data <= 4) + return sys->sprint("%s#%s#", h, base16->enc(r.data)); + return sys->sprint("%s|%s|", h, base64->enc(r.data)); + List => + s := "("; + for(l := r.l; l != nil; l = tl l){ + s += (hd l).text(); + if(tl l != nil) + s += " "; + } + return s+")"; + } +} + +#An octet string that meets the following conditions may be given +#directly as a "token". +# +# -- it does not begin with a digit +# +# -- it contains only characters that are +# -- alphabetic (upper or lower case), +# -- numeric, or +# -- one of the eight "pseudo-alphabetic" punctuation marks: +# - . / _ : * + = +# (Note: upper and lower case are not equivalent.) +# (Note: A token may begin with punctuation, including ":"). + +istokenc(c: int): int +{ + return c >= '0' && c <= '9' || + c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || + c == '-' || c == '.' || c == '/' || c == '_' || c == ':' || c == '*' || c == '+' || c == '='; +} + +istoken(s: string): int +{ + if(s == nil) + return 0; + for(i := 0; i < len s; i++) + case s[i] { + '0' to '9' => + if(i == 0) + return 0; + 'a' to 'z' or 'A' to 'Z' or + '-' or '.' or '/' or '_' or ':' or '*' or '+' or '=' => + break; + * => + return 0; + } + return 1; +} + +# should the data qualify as binary or text? +# the if(0) version accepts valid Unicode sequences +# could use [display] to control character set? +istextual(a: array of byte): int +{ + for(i := 0; i < len a;){ + if(0){ + (c, n, ok) := sys->byte2char(a, i); + if(!ok || c < ' ' && !isspace(c) || c >= 16r7F) + return 0; + i += n; + }else{ + c := int a[i++]; + if(c < ' ' && !isspace(c) || c >= 16r7F) + return 0; + } + } + return 1; +} + +esc(c: int): string +{ + case c { + '"' => return "\\\""; + '\\' => return "\\\\"; + '\b' => return "\\b"; + '\f' => return "\\f"; + '\n' => return "\\n"; + '\t' => return "\\t"; + '\r' => return "\\r"; + '\v' => return "\\v"; + * => + if(c < ' ' || c >= 16r7F) + return sys->sprint("\\x%.2ux", c & 16rFF); + } + return nil; +} + +quote(s: string): string +{ + if(istoken(s)) + return s; + for(i := 0; i < len s; i++) + if((v := esc(s[i])) != nil){ + os := "\"" + s[0:i] + v; + while(++i < len s){ + if((v = esc(s[i])) != nil) + os += v; + else + os[len os] = s[i]; + } + os[len os] = '"'; + return os; + } + return "\""+s+"\""; +} + +# +# other S expression operations +# +Sexp.islist(e: self ref Sexp): int +{ + return e != nil && tagof e == tagof Sexp.List; +} + +Sexp.els(e: self ref Sexp): list of ref Sexp +{ + if(e == nil) + return nil; + pick s := e { + List => + return s.l; + * => + return nil; + } +} + +Sexp.op(e: self ref Sexp): string +{ + if(e == nil) + return nil; + pick s := e { + String => + return s.s; + Binary => + return nil; + List => + if(s.l == nil) + return nil; + pick t := hd s.l { + String => + return t.s; + * => + return nil; + } + } + return nil; +} + +Sexp.args(e: self ref Sexp): list of ref Sexp +{ + if((l := e.els()) != nil) + return tl l; + return nil; +} + +Sexp.asdata(e: self ref Sexp): array of byte +{ + if(e == nil) + return nil; + pick s := e { + List => + return nil; + String => + return array of byte s.s; + Binary => + return s.data; + } +} + +Sexp.astext(e: self ref Sexp): string +{ + if(e == nil) + return nil; + pick s := e { + List => + return nil; + String => + return s.s; + Binary => + return string s.data; # questionable; should possibly treat it as latin-1 + } +} + +Sexp.eq(e1: self ref Sexp, e2: ref Sexp): int +{ + if(e1 == e2) + return 1; + if(e1 == nil || e2 == nil || tagof e1 != tagof e2) + return 0; + pick s1 := e1 { + List => + pick s2 := e2 { + List => + l1 := s1.l; + l2 := s2.l; + for(; l1 != nil; l1 = tl l1){ + if(l2 == nil || !(hd l1).eq(hd l2)) + return 0; + l2 = tl l2; + } + return l2 == nil; + } + String => + pick s2 := e2 { + String => + return s1.s == s2.s && s1.hint == s2.hint; + } + Binary => + pick s2 := e2 { + Binary => + if(len s1.data != len s2.data || s1.hint != s2.hint) + return 0; + for(i := 0; i < len s1.data; i++) + if(s1.data[i] != s2.data[i]) + return 0; + return 1; + } + } + return 0; +} + +Sexp.copy(e: self ref Sexp): ref Sexp +{ + if(e == nil) + return nil; + pick r := e { + List => + rl: list of ref Sexp; + for(l := r.l; l != nil; l = tl l) + rl = (hd l).copy() :: rl; + for(l = nil; rl != nil; rl = tl rl) + l = hd rl :: l; + return ref Sexp.List(l); + String => + return ref *r; # safe because .s and .hint are strings, immutable + Binary => + b: array of byte; + if((a := r.data) != nil){ + b = array[len a] of byte; + b[0:] = a; + } + return ref Sexp.Binary(b, r.hint); + } +} |
