diff options
Diffstat (limited to 'appl/lib')
| -rw-r--r-- | appl/lib/csv.b | 86 | ||||
| -rw-r--r-- | appl/lib/rfc822.b | 561 | ||||
| -rw-r--r-- | appl/lib/w3c/uris.b | 320 |
3 files changed, 967 insertions, 0 deletions
diff --git a/appl/lib/csv.b b/appl/lib/csv.b new file mode 100644 index 00000000..336e2cde --- /dev/null +++ b/appl/lib/csv.b @@ -0,0 +1,86 @@ +implement CSV; + +include "sys.m"; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "csv.m"; + +init(b: Bufio) +{ + bufio = b; +} + +getline(fd: ref Iobuf): list of string +{ + rl: list of string; + for(;;){ + (w, end) := getfield(fd); + if(rl == nil && w == nil && end < 0) + return nil; + rl = w :: rl; + if(end != ',') + break; + } + l: list of string; + for(; rl != nil; rl = tl rl) + l = hd rl :: l; + return l; +} + +getfield(fd: ref Iobuf): (string, int) +{ + w := ""; + if((c := getcr(fd)) == '"'){ # quoted field + while((c = getcr(fd)) >= 0){ + if(c == '"'){ + c = getcr(fd); + if(c != '"') + break; + } + w[len w] = c; + } + } + # unquoted text, possibly following quoted text above + for(; c >= 0 && c != ',' && c != '\n'; c = getcr(fd)) + w[len w] = c; + return (w, c); +} + +getcr(fd: ref Iobuf): int +{ + c := fd.getc(); + if(c == '\r'){ + nc := fd.getc(); + if(nc >= 0 && nc != '\n') + fd.ungetc(); + c = '\n'; + } + return c; +} + +quote(s: string): string +{ + sep := 0; + for(i := 0; i < len s; i++) + if((c := s[i]) == '"') + return innerquote(s); + else if(c == ',' || c == '\n') + sep = 1; + if(sep) + return "\""+s+"\""; + return s; +} + +innerquote(s: string): string +{ + w := "\""; + for(i := j := 0; i < len s; i++) + if(s[i] == '"'){ + w += s[j: i+1]; # including " + j = i; # including " again + } + return w+s[j:i]+"\""; +} diff --git a/appl/lib/rfc822.b b/appl/lib/rfc822.b new file mode 100644 index 00000000..0f15a585 --- /dev/null +++ b/appl/lib/rfc822.b @@ -0,0 +1,561 @@ +implement RFC822; + +include "sys.m"; + sys: Sys; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "rfc822.m"; + +include "string.m"; + str: String; + +include "daytime.m"; + daytime: Daytime; + Tm: import daytime; + +Minrequest: con 512; # more than enough for most requests + +Suffix: adt { + suffix: string; + generic: string; + specific: string; + encoding: string; +}; + +SuffixFile: con "/lib/mimetype"; +mtime := 0; +qid: Sys->Qid; + +suffixes: list of ref Suffix; + +nomod(s: string) +{ + raise sys->sprint("internal: can't load %s: %r", s); +} + +init(b: Bufio) +{ + sys = load Sys Sys->PATH; + bufio = b; + str = load String String->PATH; + if(str == nil) + nomod(String->PATH); + daytime = load Daytime Daytime->PATH; + if(daytime == nil) + nomod(Daytime->PATH); + readsuffixfile(); +} + +readheaders(fd: ref Iobuf, limit: int): array of (string, array of byte) +{ + n := 0; + s := 0; + b := array[Minrequest] of byte; + nline := 0; + lines: list of array of byte; + while((c := fd.getb()) >= 0){ + if(c == '\r'){ + c = fd.getb(); + if(c < 0) + break; + if(c != '\n'){ + fd.ungetb(); + c = '\r'; + } + } + if(n >= len b){ + if(len b >= limit) + return nil; + ab := array[n+512] of byte; + ab[0:] = b; + b = ab; + } + b[n++] = byte c; + if(c == '\n'){ + if(n == 1 || b[n-2] == byte '\n') + break; # empty line + c = fd.getb(); + if(c < 0) + break; + if(c != ' ' && c != '\t'){ # not continued + fd.ungetb(); + lines = b[s: n] :: lines; + nline++; + s = n; + }else + b[n-1] = byte ' '; + } + } + if(n == 0) + return nil; + b = b[0: n]; + if(n != s){ + lines = b[s:n] :: lines; + nline++; + } + a := array[nline] of (string, array of byte); + for(; lines != nil; lines = tl lines){ + b = hd lines; + name := ""; + for(i := 0; i < len b; i++) + if(b[i] == byte ':'){ + name = str->tolower(string b[0:i]); + b = b[i+1:]; + break; + } + a[--nline] = (name, b); + } + return a; +} + +# +# *(";" parameter) used in transfer-extension, media-type and media-range +# parameter = attribute "=" value +# attribute = token +# value = token | quoted-string +# +parseparams(ps: ref Rfclex): list of (string, string) +{ + l: list of (string, string); + do{ + if(ps.lex() != Word) + break; + attr := ps.wordval; + if(ps.lex() != '=' || ps.lex() != Word && ps.tok != QString) + break; + l = (attr, ps.wordval) :: l; + }while(ps.lex() == ';'); + ps.unlex(); + return rev(l); +} + +# +# 1#transfer-coding +# +mimefields(ps: ref Rfclex): list of (string, list of (string, string)) +{ + rf: list of (string, list of (string, string)); + do{ + if(ps.lex() == Word){ + w := ps.wordval; + if(ps.lex() == ';'){ + rf = (w, parseparams(ps)) :: rf; + ps.lex(); + }else + rf = (w, nil) :: rf; + } + }while(ps.tok == ','); + ps.unlex(); + f: list of (string, list of (string, string)); + for(; rf != nil; rf = tl rf) + f = hd rf :: f; + return f; +} + +# #(media-type | (media-range [accept-params])) ; Content-Type and Accept +# +# media-type = type "/" subtype *( ";" parameter ) +# type = token +# subtype = token +# LWS must not be used between type and subtype, nor between attribute and value (in parameter) +# +# media-range = ("*/*" | type "/*" | type "/" subtype ) *(";' parameter) +# accept-params = ";" "q" "=" qvalue *( accept-extension ) +# accept-extension = ";" token [ "=" ( token | quoted-string ) ] +# +# 1#( ( charset | "*" )[ ";" "q" "=" qvalue ] ) ; Accept-Charset +# 1#( codings [ ";" "q" "=" qvalue ] ) ; Accept-Encoding +# 1#( language-range [ ";" "q" "=" qvalue ] ) ; Accept-Language +# +# codings = ( content-coding | "*" ) +# +parsecontent(ps: ref Rfclex, multipart: int, head: list of ref Content): list of ref Content +{ + do{ + if(ps.lex() == Word){ + generic := ps.wordval; + specific := "*"; + if(ps.lex() == '/'){ + if(ps.lex() != Word) + break; + specific = ps.wordval; + if(!multipart && specific != "*") + break; + }else if(multipart) + break; # syntax error + else + ps.unlex(); + params: list of (string, string) = nil; + if(ps.lex() == ';'){ + params = parseparams(ps); + ps.lex(); + } + head = Content.mk(generic, specific, params) :: head; # order reversed, but doesn't matter + } + }while(ps.tok == ','); + ps.unlex(); + return head; +} + +rev(l: list of (string, string)): list of (string, string) +{ + rl: list of (string, string); + for(; l != nil; l = tl l) + rl = hd l :: rl; + return rl; +} + +Rfclex.mk(a: array of byte): ref Rfclex +{ + ps := ref Rfclex; + ps.fd = bufio->aopen(a); + ps.tok = '\n'; + ps.eof = 0; + return ps; +} + +Rfclex.getc(ps: self ref Rfclex): int +{ + c := ps.fd.getb(); + if(c < 0) + ps.eof = 1; + return c; +} + +Rfclex.ungetc(ps: self ref Rfclex) +{ + if(!ps.eof) + ps.fd.ungetb(); +} + +Rfclex.lex(ps: self ref Rfclex): int +{ + if(ps.seen != nil){ + (ps.tok, ps.wordval) = hd ps.seen; + ps.seen = tl ps.seen; + }else + ps.tok = lex1(ps, 0); + return ps.tok; +} + +Rfclex.unlex(ps: self ref Rfclex) +{ + ps.seen = (ps.tok, ps.wordval) :: ps.seen; +} + +Rfclex.skipws(ps: self ref Rfclex): int +{ + return lex1(ps, 1); +} + +# +# rfc 2822/rfc 1521 lexical analyzer +# +lex1(ps: ref Rfclex, skipwhite: int): int +{ + ps.wordval = nil; + while((c := ps.getc()) >= 0){ + case c { + '(' => + level := 1; + while((c = ps.getc()) != Bufio->EOF && c != '\n'){ + if(c == '\\'){ + c = ps.getc(); + if(c == Bufio->EOF) + return '\n'; + continue; + } + if(c == '(') + level++; + else if(c == ')' && --level == 0) + break; + } + ' ' or '\t' or '\r' or 0 => + ; + '\n' => + return '\n'; + ')' or '<' or '>' or '[' or ']' or '@' or '/' or ',' or + ';' or ':' or '?' or '=' => + if(skipwhite){ + ps.ungetc(); + return c; + } + return c; + + '"' => + if(skipwhite){ + ps.ungetc(); + return c; + } + word(ps,"\""); + ps.getc(); # skip the closing quote + return QString; + + * => + ps.ungetc(); + if(skipwhite) + return c; + word(ps,"\"()<>@,;:/[]?={}\r\n \t"); + return Word; + } + } + return '\n'; +} + +# return the rest of an rfc 822 line, not including \r or \n +# do not map to lower case + +Rfclex.line(ps: self ref Rfclex): string +{ + s := ""; + while((c := ps.getc()) != Bufio->EOF && c != '\n' && c != '\r'){ + if(c == '\\'){ + c = ps.getc(); + if(c == Bufio->EOF) + break; + } + s[len s] = c; + } + ps.tok = '\n'; + ps.wordval = s; + return s; +} + +word(ps: ref Rfclex, stop: string) +{ + w := ""; + while((c := ps.getc()) != Bufio->EOF){ + if(c == '\r') + c = ' '; + if(c == '\\'){ + c = ps.getc(); + if(c == Bufio->EOF) + break; + }else if(str->in(c,stop)){ + ps.ungetc(); + break; + } + if(c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + w[len w] = c; + } + ps.wordval = w; +} + +readsuffixfile(): string +{ + iob := bufio->open(SuffixFile, Bufio->OREAD); + if(iob == nil) + return sys->sprint("cannot open %s: %r", SuffixFile); + for(n := 1; (line := iob.gets('\n')) != nil; n++){ + (s, nil) := parsesuffix(line); + if(s != nil) + suffixes = s :: suffixes; + } + return nil; +} + +parsesuffix(line: string): (ref Suffix, string) +{ + (line, nil) = str->splitstrl(line, "#"); + if(line == nil) + return (nil, nil); + (n, slist) := sys->tokenize(line,"\n\t "); + if(n == 0) + return (nil, nil); + if(n < 4) + return (nil, "too few fields"); + s := ref Suffix; + s.suffix = hd slist; + slist = tl slist; + s.generic = hd slist; + if (s.generic == "-") + s.generic = ""; + slist = tl slist; + s.specific = hd slist; + if (s.specific == "-") + s.specific = ""; + slist = tl slist; + s.encoding = hd slist; + if (s.encoding == "-") + s.encoding = ""; + if((s.generic == nil || s.specific == nil) && s.encoding == nil) + return (nil, nil); + return (s, nil); +} + +# +# classify by file suffix +# +suffixclass(name: string): (ref Content, ref Content) +{ + typ, enc: ref Content; + + p := str->splitstrr(name, "/").t1; + if(p != nil) + name = p; + + for(;;){ + (name, p) = suffix(name); # TO DO: match below is case sensitive + if(p == nil) + break; + for(l := suffixes; l != nil; l = tl l){ + s := hd l; + if(p == s.suffix){ + if(s.generic != nil && typ == nil) + typ = Content.mk(s.generic, s.specific, nil); + if(s.encoding != nil && enc == nil) + enc = Content.mk(s.encoding, "", nil); + if(typ != nil && enc != nil) + break; + } + } + } + return (typ, enc); +} + +suffix(s: string): (string, string) +{ + for(n := len s; --n >= 0;) + if(s[n] == '.') + return (s[0: n], s[n:]); + return (s, nil); +} + +# +# classify by initial contents of file +# +dataclass(a: array of byte): (ref Content, ref Content) +{ + utf8 := 0; + for(i := 0; i < len a;){ + c := int a[i]; + if(c < 16r80){ + if(c < 32 && c != '\n' && c != '\r' && c != '\t' && c != '\v' && c != '\f') + return (nil, nil); + i++; + }else{ + utf8 = 1; + (r, l, nil) := sys->byte2char(a, i); + if(r == Sys->UTFerror) + return (nil, nil); + i += l; + } + } + if(utf8) + params := ("charset", "utf-8") :: nil; + return (Content.mk("text", "plain", params), nil); +} + +Content.mk(generic, specific: string, params: list of (string, string)): ref Content +{ + c := ref Content; + c.generic = generic; + c.specific = specific; + c.params = params; + return c; +} + +Content.check(me: self ref Content, oks: list of ref Content): int +{ + if(oks == nil) + return 1; + g := str->tolower(me.generic); + s := str->tolower(me.specific); + for(; oks != nil; oks = tl oks){ + ok := hd oks; + if((ok.generic == g || ok.generic=="*") && + (s == nil || ok.specific == s || ok.specific=="*")) + return 1; + } + return 0; +} + +Content.text(c: self ref Content): string +{ + if((s := c.specific) != nil) + s = c.generic+"/"+s; + else + s = c.generic; + for(l := c.params; l != nil; l = tl l){ + (n, v) := hd l; + s += sys->sprint(";%s=%s", n, quote(v)); + } + return s; +} + +# +# should probably be in a Mime or HTTP module +# + +Quotable: con "()<>@,;:\\\"/[]?={} \t"; + +quotable(s: string): int +{ + for(i := 0; i < len s; i++) + if(str->in(s[i], Quotable)) + return 1; + return 0; +} + +quote(s: string): string +{ + if(!quotable(s)) + return s; + q := "\""; + for(i := 0; i < len s; i++){ + if(str->in(s[i], Quotable)) + q[len q] = '\\'; + q[len q] = s[i]; + } + q[len q] = '"'; + return q; +} + +weekdays := array[] of { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; + +months := array[] of { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +# print dates in the format +# Wkd, DD Mon YYYY HH:MM:SS GMT + +sec2date(t: int): string +{ + tm := daytime->gmt(t); + return sys->sprint("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT", + weekdays[tm.wday], tm.mday, months[tm.mon], tm.year+1900, + tm.hour, tm.min, tm.sec); +} + +# parse dates of formats +# Wkd, DD Mon YYYY HH:MM:SS GMT +# Weekday, DD-Mon-YY HH:MM:SS GMT +# Wkd Mon ( D|DD) HH:MM:SS YYYY +# plus anything similar + +date2sec(date: string): int +{ + tm := daytime->string2tm(date); + if(tm == nil || tm.year < 70 || tm.zone != "GMT") + t := 0; + else + t = daytime->tm2epoch(tm); + return t; +} + +now(): int +{ + return daytime->now(); +} + +time(): string +{ + return sec2date(daytime->now()); +} diff --git a/appl/lib/w3c/uris.b b/appl/lib/w3c/uris.b new file mode 100644 index 00000000..b49c17b8 --- /dev/null +++ b/appl/lib/w3c/uris.b @@ -0,0 +1,320 @@ +implement URIs; + +# +# RFC3986, URI Generic Syntax +# + +include "sys.m"; + sys: Sys; + +include "string.m"; + S: String; + +include "uris.m"; + +Alpha: con "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; +Digit: con "0123456789"; + +GenDelims: con ":/?#[]@"; +SubDelims: con "!$&'()*+,;="; +Reserved: con GenDelims + SubDelims; +HexDigit: con Digit+"abcdefABCDEF"; + +Escape: con GenDelims+"%"; # "%" must be encoded as %25 + +Unreserved: con Alpha+Digit+"-._~"; + +F_Esc, F_Scheme: con byte(1<<iota); + +ctype: array of byte; + +classify(s: string, f: byte) +{ + for(i := 0; i < len s; i++) + ctype[s[i]] |= f; +} + +init() +{ + sys = load Sys Sys->PATH; + S = load String String->PATH; + if(S == nil) + raise sys->sprint("can't load %s: %r", String->PATH); + + ctype = array [256] of { * => byte 0 }; + classify(Escape, F_Esc); + for(i := 0; i <= ' '; i++) + ctype[i] |= F_Esc; + for(i = 16r80; i <= 16rFF; i++) + ctype[i] |= F_Esc; + classify(Alpha+Digit+"+-.", F_Scheme); +} + +# scheme://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> +# +# ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? +# +# delimiters: :/?# /?# ?# # +# +URI.parse(url: string): ref URI +{ + scheme, userinfo, host, port, path, query, frag: string; + for(i := 0; i < len url; i++){ + c := url[i]; + if(c == ':'){ + scheme = S->tolower(url[0:i]); + url = url[i+1:]; + break; + } + if(c < 0 || c >= len ctype || (ctype[c] & F_Scheme) == byte 0) + break; + } + + if(S->prefix("//", url)){ + authority: string; + (authority, path) = S->splitstrl(url[2:], "/"); + (up, hp) := splitl(authority, "@"); + if(hp == "") + hp = authority; + else + userinfo = up; + if(hp != nil && hp[0] == '['){ # another rfc hack, for IPv6 addresses, which contain : + (host, hp) = S->splitstrr(hp, "]"); + if(hp != nil && hp[0] == ':') + port = hp[1:]; + else + host += hp; # put it back + }else + (host, port) = splitl(hp, ":"); + if(path == nil) + path = "/"; + }else + path = url; + (path, frag) = S->splitstrl(path, "#"); # includes # in frag + (path, query) = S->splitstrl(path, "?"); # includes ? in query + return ref URI(scheme, dec(userinfo), dec(host), port, dec(path), query, dec(frag)); +} + +URI.userpw(u: self ref URI): (string, string) +{ + return splitl(u.userinfo, ":"); +} + +URI.text(u: self ref URI): string +{ + s := ""; + if(u.scheme != nil) + s += u.scheme + ":"; + if(u.hasauthority()) + s += "//" + u.authority(); + return s + enc(u.path, "/@:") + u.query + enc1(u.fragment, "@:/?"); +} + +URI.copy(u: self ref URI): ref URI +{ + return ref *u; +} + +URI.pathonly(u: self ref URI): ref URI +{ + v := ref *u; + v.userinfo = nil; + v.query = nil; + v.fragment = nil; + return v; +} + +URI.addbase(u: self ref URI, b: ref URI): ref URI +{ + # RFC3986 5.2.2, rearranged + r := ref *u; + if(r.scheme == nil && b != nil){ + r.scheme = b.scheme; + if(!r.hasauthority()){ + r.userinfo = b.userinfo; + r.host = b.host; + r.port = b.port; + if(r.path == nil){ + r.path = b.path; + if(r.query == nil) + r.query = b.query; + }else if(r.path[0] != '/'){ + # 5.2.3: merge paths + if(b.path == "" && b.hasauthority()) + p1 := "/"; + else + (p1, nil) = S->splitstrr(b.path, "/"); + r.path = p1 + r.path; + } + } + } + r.path = removedots(r.path); + return r; +} + +URI.nodots(u: self ref URI): ref URI +{ + return u.addbase(nil); +} + +URI.hasauthority(u: self ref URI): int +{ + return u.host != nil || u.userinfo != nil || u.port != nil; +} + +URI.isabsolute(u: self ref URI): int +{ + return u.scheme != nil; +} + +URI.authority(u: self ref URI): string +{ + s := enc(u.userinfo, ":"); + if(s != nil) + s += "@"; + if(u.host != nil){ + s += enc(u.host, "[]:"); # assumes : appears inside []; could enforce it + if(u.port != nil) + s += ":" + enc(u.port,nil); + } + return s; +} + +# +# simplified version of procedure in RFC3986 5.2.4: +# it extracts a complete segment from the input first, then analyses it +# +removedots(s: string): string +{ + if(s == nil) + return ""; + out := ""; + for(p := 0; p < len s;){ + # extract the first segment and any preceding / + q := p; + if(++p < len s){ + while(++p < len s && s[p] != '/') + {} + } + seg := s[q: p]; + if((e := p) < len s) + e++; + case s[q: e] { # includes any following / + "../" or "./" => ; + "/./" or "/." => + if(p >= len s) + s += "/"; + "/../" or "/.." => + if(p >= len s) + s += "/"; + if(out != nil){ + for(q = len out; --q > 0 && out[q] != '/';) + {} # skip + out = out[0: q]; + } + "." or ".." => ; # null effect + * => # including "/" + out += seg; + } + } + return out; +} + +# +# similar to splitstrl but trims the matched character from the result +# +splitl(s, c: string): (string, string) +{ + (a, b) := S->splitstrl(s, c); + if(b != "") + b = b[1:]; + return (a, b); +} + +hex2(s: string): int +{ + n := 0; + for(i := 0; i < 2; i++){ + if(i >= len s) + return -1; + n <<= 4; + case c := s[i] { + '0' to '9' => + n += c-'0'; + 'a' to 'f' => + n += 10+(c-'a'); + 'A' to 'F' => + n += 10+(c-'A'); + * => + return -1; + } + } + return n; +} + +dec(s: string): string +{ + for(i := 0;; i++){ + if(i >= len s) + return s; + if(s[i] == '%' || s[i] == 0) + break; + } + o := s[0:i]; + while(i < len s){ + case c := s[i++] { + '%' => + if((v := hex2(s[i:])) > 0){ + c = v; + i += 2; + } + 0 => + c = ' '; # shouldn't happen + } + o[len o] = c; + } + return o; +} + +enc1(s: string, safe: string): string +{ + if(len s > 1) + return s[0:1] + enc(s[1:], safe); + return s; +} + +# encoding depends on context (eg, &=/: not escaped in `query' string) +enc(s: string, safe: string): string +{ + for(i := 0;; i++){ + if(i >= len s) + return s; # use as-is + c := s[i]; + if(c >= 16r80 || (ctype[c] & F_Esc) != byte 0 && !S->in(c, safe)) + break; + } + t := s[0: i]; + b := array of byte s[i:]; + for(i = 0; i < len b; i++){ + c := int b[i]; + if((ctype[c] & F_Esc) != byte 0 && !S->in(c, safe)) + t += sys->sprint("%%%.2X", c); + else + t[len t] = c; + } + return t; +} + +URI.eq(u: self ref URI, v: ref URI): int +{ + if(v == nil) + return 0; + return u.scheme == v.scheme && u.userinfo == v.userinfo && + u.host == v.host && u.port == v.port && u.path == v.path && # path might need canon + u.query == v.query; # not fragment +} + +URI.eqf(u: self ref URI, v: ref URI): int +{ + return u.eq(v) && u.fragment == v.fragment; +} |
