diff options
Diffstat (limited to 'appl/lib/ubfa.b')
| -rw-r--r-- | appl/lib/ubfa.b | 623 |
1 files changed, 623 insertions, 0 deletions
diff --git a/appl/lib/ubfa.b b/appl/lib/ubfa.b new file mode 100644 index 00000000..bfbf21ca --- /dev/null +++ b/appl/lib/ubfa.b @@ -0,0 +1,623 @@ +implement UBFa; + +# +# UBF(A) data encoding interpreter +# + +include "sys.m"; + sys: Sys; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "ubfa.m"; + +Syntax: exception(string); +Badwrite: exception; + +dict: array of list of string; +dictlock: chan of int; + +init(m: Bufio) +{ + sys = load Sys Sys->PATH; + bufio = m; + + dict = array[74] of list of string; + dictlock = chan[1] of int; +} + +uvatom(s: string): ref UValue.Atom +{ + return ref UValue.Atom(uniq(s)); +} + +uvint(i: int): ref UValue.Int +{ + return ref UValue.Int(i); +} + +uvbig(i: big): ref UValue.Int +{ + return ref UValue.Int(int i); +} + +uvbinary(a: array of byte): ref UValue.Binary +{ + return ref UValue.Binary(a); +} + +uvstring(s: string): ref UValue.String +{ + return ref UValue.String(s); +} + +uvtuple(a: array of ref UValue): ref UValue.Tuple +{ + return ref UValue.Tuple(a); +} + +uvlist(l: list of ref UValue): ref UValue.List +{ + return ref UValue.List(l); +} + +uvtag(s: string, o: ref UValue): ref UValue.Tag +{ + return ref UValue.Tag(uniq(s), o); +} + +# needed only to avoid O(n) len s.s +Stack: adt { + s: list of ref UValue; + n: int; + + new: fn(): ref Stack; + pop: fn(s: self ref Stack): ref UValue raises(Syntax); + push: fn(s: self ref Stack, o: ref UValue); +}; + +Stack.new(): ref Stack +{ + return ref Stack(nil, 0); +} + +Stack.pop(s: self ref Stack): ref UValue raises(Syntax) +{ + if(--s.n < 0 || s.s == nil) + raise Syntax("parse stack underflow"); + v := hd s.s; + s.s = tl s.s; + return v; +} + +Stack.push(s: self ref Stack, o: ref UValue) +{ + s.s = o :: s.s; + s.n++; +} + +Parse: adt { + input: ref Iobuf; + stack: ref Stack; + reg: array of ref UValue; + + getb: fn(nil: self ref Parse): int raises(Syntax); + unget: fn(nil: self ref Parse); +}; + +Parse.getb(p: self ref Parse): int raises(Syntax) +{ + c := p.input.getb(); + if(c < 0){ + if(c == Bufio->EOF) + raise Syntax("unexpected end-of-file"); + raise Syntax(sys->sprint("read error: %r")); + } + return c; +} + +Parse.unget(p: self ref Parse) +{ + p.input.ungetb(); +} + +uniq(s: string): string +{ + if(s == nil) + return ""; + dictlock <-= 1; + h := 0; + for(i:=0; i<len s; i++){ + h = (h<<4) + s[i]; + if((g := h & int 16rF0000000) != 0) + h ^= ((g>>24) & 16rFF) | g; + } + h = (h & Sys->Maxint)%len dict; + for(l := dict[h]; l != nil; l = tl l) + if(hd l == s){ + s = hd l; # share space + break; + } + if(l == nil) + dict[h] = s :: dict[h]; + <-dictlock; + return s; +} + +writeubf(out: ref Iobuf, obj: ref UValue): int +{ + { + # write it out, put final '$' + if(out != nil) + writeobj(out, obj); + putc(out, '$'); + return 0; + }exception{ + Badwrite => + return -1; + } +} + +readubf(input: ref Iobuf): (ref UValue, string) +{ + { + return (getobj(ref Parse(input, Stack.new(), array[256] of ref UValue)), nil); + }exception e{ + Syntax => + return (nil, sys->sprint("ubf error: offset %bd: %s", input.offset(), e)); + } +} + +UValue.isatom(o: self ref UValue): int +{ + return tagof o == tagof UValue.Atom; +} + +UValue.isstring(o: self ref UValue): int +{ + return tagof o == tagof UValue.String; +} + +UValue.isint(o: self ref UValue): int +{ + return tagof o == tagof UValue.Int; +} + +UValue.islist(o: self ref UValue): int +{ + return tagof o == tagof UValue.List; +} + +UValue.istuple(o: self ref UValue): int +{ + return tagof o == tagof UValue.Tuple; +} + +UValue.isbinary(o: self ref UValue): int +{ + return tagof o == tagof UValue.Binary; +} + +UValue.istag(o: self ref UValue): int +{ + return tagof o == tagof UValue.Tag; +} + +UValue.isop(o: self ref UValue, op: string, arity: int): int +{ + pick r := o { + Tuple => + if(len r.a > 0 && (arity <= 0 || len r.a == arity)) + pick s := r.a[0] { + Atom => + return s.name == op; + String => + return s.s == op; + } + } + return 0; +} + +UValue.op(o: self ref UValue, arity: int): string +{ + pick r := o { + Tuple => + if(len r.a > 0 && (arity <= 0 || len r.a == arity)) + pick s := r.a[0] { + Atom => + return s.name; + String => + return s.s; + } + } + return nil; +} + +UValue.args(o: self ref UValue, arity: int): array of ref UValue +{ + pick r := o { + Tuple => + if(len r.a > 0 && (arity <= 0 || len r.a == arity)) + return r.a[1:]; + } + return nil; +} + +UValue.els(o: self ref UValue): list of ref UValue +{ + pick r := o { + List => + return r.l; + } + return nil; +} + +UValue.val(o: self ref UValue): int +{ + pick r := o { + Int => + return r.value; + } + return 0; +} + +UValue.objtag(o: self ref UValue): string +{ + pick r := o { + Tag => + return r.name; + } + return nil; +} + +UValue.obj(o: self ref UValue): ref UValue +{ + pick r := o { + Tag => + return r.o; + } + return o; +} + +UValue.binary(o: self ref UValue): array of byte +{ + pick r := o { + Atom => + return array of byte r.name; + String => + return array of byte r.s; + Binary => + return r.a; + } + return nil; +} + +UValue.text(o: self ref UValue): string +{ + pick r := o { + Atom => + return r.name; + String => + return r.s; + Int => + return string r.value; + Tuple => + s := "{"; + for(i := 0; i < len r.a; i++) + s += " "+r.a[i].text(); + return s+"}"; + List => + s := "["; + for(l := r.l; l != nil; l = tl l) + s += " "+(hd l).text(); + return s+"]"; + Binary => + s := "<<"; + for(i := 0; i < len r.a; i++) + s += sys->sprint(" %.2ux", int r.a[i]); + return s+">>"; + Tag => + return "{'$TYPE', "+r.name+", "+r.o.text()+"}"; + * => + return "unknown"; + } +} + +UValue.eq(o: self ref UValue, v: ref UValue): int +{ + if(v == nil) + return 0; + if(o == v) + return 1; + pick r := o { + Atom => + pick s := v { + Atom => + return r.name == s.name; + } + return 0; + String => + pick s := v { + String => + return r.s == s.s; + } + return 0; + Int => + pick s := v { + Int => + return r.value == s.value; + } + Tuple => + pick s := v { + Tuple => + if(len r.a != len s.a) + return 0; + for(i := 0; i < len r.a; i++) + if(!r.a[i].eq(s.a[i])) + return 0; + return 1; + } + return 0; + List => + pick s := v { + List => + l1 := r.l; + l2 := s.l; + while(l1 != nil && l2 != nil){ + if(!(hd l1).eq(hd l2)) + return 0; + l1 = tl l1; + l2 = tl l2; + } + return l1 == l2; + } + return 0; + Binary => + pick s := v { + Binary => + if(len r.a != len s.a) + return 0; + for(i := 0; i < len r.a; i++) + if(r.a[i] != s.a[i]) + return 0; + return 1; + } + return 0; + Tag => + pick s := v { + Tag => + return r.name == s.name && r.o.eq(s.o); + } + return 0; + * => + raise "ubf: bad object"; # can't happen + } +} + +S: con byte 1; + +special := array[256] of { + '\n' or '\r' or '\t' or ' ' or ',' => S, + '}' => S, '$' => S, '>' => S, '#' => S, '&' => S, + '"' => S, '\'' => S, '{' => S, '~' => S, '-' => S, + '0' to '9' => S, '%' => S, '`' => S, * => byte 0 +}; + +getobj(p: ref Parse): ref UValue raises(Syntax) +{ + { + for(;;){ + case p.getb() { + '\n' or '\r' or '\t' or ' ' or ',' => + ; # white space + '%' => + while((c := p.getb()) != '%'){ + if(c == '\\'){ # do comments really use \? + c = p.getb(); + if(c != '\\' && c != '%') + raise Syntax("invalid escape in comment"); + } + } + '}' => + a := array[p.stack.n] of ref UValue; + for(i := len a; --i >= 0;) + a[i] = p.stack.pop(); + return ref UValue.Tuple(a); + '$' => + if(p.stack.n != 1) + raise Syntax("unbalanced stack: size "+string p.stack.n); + return p.stack.pop(); + '>' => + r := p.getb(); + if(special[r] == S) + raise Syntax("invalid register name"); + p.reg[r] = p.stack.pop(); + '`' => + t := uniq(readdelimitedstring(p, '`')); + p.stack.push(ref UValue.Tag(t, p.stack.pop())); + * => + p.unget(); + p.stack.push(readobj(p)); + } + } + }exception{ + Syntax => + raise; + } +} + +readobj(p: ref Parse): ref UValue raises(Syntax) +{ + { + case c := p.getb() { + '#' => + return ref UValue.List(nil); + '&' => + a := p.stack.pop(); + b := p.stack.pop(); + pick r := b { + List => + return ref UValue.List(a :: r.l); # not changed in place: might be shared register value + * => + raise Syntax("can't make cons with cdr "+b.text()); + } + '"' => + return ref UValue.String(readdelimitedstring(p, c)); + '\'' => + return ref UValue.Atom(uniq(readdelimitedstring(p, c))); + '{' => + obj := getobj(ref Parse(p.input, Stack.new(), p.reg)); + if(!obj.istuple()) + raise Syntax("expected tuple: obj"); + return obj; + '~' => + o := p.stack.pop(); + if(!o.isint()) + raise Syntax("expected Int before ~"); + n := o.val(); + if(n < 0) + raise Syntax("negative length for binary"); + a := array[n] of byte; + n = p.input.read(a, len a); + if(n != len a){ + if(n != Bufio->ERROR) + sys->werrstr("short read"); + raise Syntax(sys->sprint("cannot read binary data: %r")); + } + if(p.getb() != '~') + raise Syntax("missing closing ~"); + return ref UValue.Binary(a); + '-' or '0' to '9' => + p.unget(); + return ref UValue.Int(int readinteger(p)); + * => + if(p.reg[c] != nil) + return p.reg[c]; + p.unget(); # point to error + raise Syntax(sys->sprint("invalid start character/undefined register #%.2ux",c)); + } + }exception{ + Syntax => + raise; + } +} + +readdelimitedstring(p: ref Parse, delim: int): string raises(Syntax) +{ + { + s := ""; + while((c := p.input.getc()) != delim){ # note: we'll use UTF-8 + if(c < 0){ + if(c == Bufio->ERROR) + raise Syntax(sys->sprint("read error: %r")); + raise Syntax("unexpected end of file"); + } + if(c == '\\'){ + c = p.getb(); + if(c != '\\' && c != delim) + raise Syntax("invalid escape"); + } + s[len s] = c; + } + return s; + }exception{ + Syntax => + raise; + } +} + +readinteger(p: ref Parse): big raises(Syntax) +{ + sign := 1; + c := p.getb(); + if(c == '-'){ + sign = -1; + c = p.getb(); + if(!(c >= '0' && c <= '9')) + raise Syntax("expected integer literal"); + } + for(n := big 0; c >= '0' && c <= '9'; c = p.getb()){ + n = n*big 10 + big((c-'0')*sign); + if(n > big Sys->Maxint || n < big(-Sys->Maxint-1)) + raise Syntax("integer overflow"); + } + p.unget(); + return n; +} + +writeobj(out: ref Iobuf, o: ref UValue) raises(Badwrite) +{ + { + pick r := o { + Atom => + writedelimitedstring(out, r.name, '\''); + String => + writedelimitedstring(out, r.s, '"'); + Int => + puts(out, string r.value); + Tuple => # { el * } + putc(out, '{'); + for(i := 0; i < len r.a; i++){ + if(i != 0) + putc(out, ' '); + writeobj(out, r.a[i]); + } + putc(out, '}'); + List => # # eN & eN-1 & ... & e0 & + putc(out, '#'); + # put them out in reverse order, each followed by '&' + rl: list of ref UValue; + for(l := r.l; l != nil; l = tl l) + rl = hd l :: rl; + for(; rl != nil; rl = tl rl){ + writeobj(out, hd rl); + putc(out, '&'); + } + Binary => # Int ~data~ + puts(out, string len r.a); + putc(out, '~'); + if(out.write(r.a, len r.a) != len r.a) + raise Badwrite; + putc(out, '~'); + Tag => # obj `tag` + writeobj(out, r.o); + writedelimitedstring(out, r.name, '`'); + * => + raise "ubf: unknown object"; # can't happen + } + }exception{ + Badwrite => + raise; + } +} + +writedelimitedstring(out: ref Iobuf, s: string, d: int) raises(Badwrite) +{ + { + putc(out, d); + for(i := 0; i < len s; i++){ + c := s[i]; + if(c == d || c == '\\') + putc(out, '\\'); + putc(out, c); + } + putc(out, d); + }exception{ + Badwrite => + raise; + } +} + +puts(out: ref Iobuf, s: string) raises(Badwrite) +{ + if(out.puts(s) == Bufio->ERROR) + raise Badwrite; +} + +putc(out: ref Iobuf, c: int) raises(Badwrite) +{ + if(out.putc(c) == Bufio->ERROR) + raise Badwrite; +} |
