summaryrefslogtreecommitdiff
path: root/appl/lib
diff options
context:
space:
mode:
Diffstat (limited to 'appl/lib')
-rw-r--r--appl/lib/csv.b86
-rw-r--r--appl/lib/rfc822.b561
-rw-r--r--appl/lib/w3c/uris.b320
3 files changed, 967 insertions, 0 deletions
diff --git a/appl/lib/csv.b b/appl/lib/csv.b
new file mode 100644
index 00000000..336e2cde
--- /dev/null
+++ b/appl/lib/csv.b
@@ -0,0 +1,86 @@
+implement CSV;
+
+include "sys.m";
+
+include "bufio.m";
+ bufio: Bufio;
+ Iobuf: import bufio;
+
+include "csv.m";
+
+init(b: Bufio)
+{
+ bufio = b;
+}
+
+getline(fd: ref Iobuf): list of string
+{
+ rl: list of string;
+ for(;;){
+ (w, end) := getfield(fd);
+ if(rl == nil && w == nil && end < 0)
+ return nil;
+ rl = w :: rl;
+ if(end != ',')
+ break;
+ }
+ l: list of string;
+ for(; rl != nil; rl = tl rl)
+ l = hd rl :: l;
+ return l;
+}
+
+getfield(fd: ref Iobuf): (string, int)
+{
+ w := "";
+ if((c := getcr(fd)) == '"'){ # quoted field
+ while((c = getcr(fd)) >= 0){
+ if(c == '"'){
+ c = getcr(fd);
+ if(c != '"')
+ break;
+ }
+ w[len w] = c;
+ }
+ }
+ # unquoted text, possibly following quoted text above
+ for(; c >= 0 && c != ',' && c != '\n'; c = getcr(fd))
+ w[len w] = c;
+ return (w, c);
+}
+
+getcr(fd: ref Iobuf): int
+{
+ c := fd.getc();
+ if(c == '\r'){
+ nc := fd.getc();
+ if(nc >= 0 && nc != '\n')
+ fd.ungetc();
+ c = '\n';
+ }
+ return c;
+}
+
+quote(s: string): string
+{
+ sep := 0;
+ for(i := 0; i < len s; i++)
+ if((c := s[i]) == '"')
+ return innerquote(s);
+ else if(c == ',' || c == '\n')
+ sep = 1;
+ if(sep)
+ return "\""+s+"\"";
+ return s;
+}
+
+innerquote(s: string): string
+{
+ w := "\"";
+ for(i := j := 0; i < len s; i++)
+ if(s[i] == '"'){
+ w += s[j: i+1]; # including "
+ j = i; # including " again
+ }
+ return w+s[j:i]+"\"";
+}
diff --git a/appl/lib/rfc822.b b/appl/lib/rfc822.b
new file mode 100644
index 00000000..0f15a585
--- /dev/null
+++ b/appl/lib/rfc822.b
@@ -0,0 +1,561 @@
+implement RFC822;
+
+include "sys.m";
+ sys: Sys;
+
+include "bufio.m";
+ bufio: Bufio;
+ Iobuf: import bufio;
+
+include "rfc822.m";
+
+include "string.m";
+ str: String;
+
+include "daytime.m";
+ daytime: Daytime;
+ Tm: import daytime;
+
+Minrequest: con 512; # more than enough for most requests
+
+Suffix: adt {
+ suffix: string;
+ generic: string;
+ specific: string;
+ encoding: string;
+};
+
+SuffixFile: con "/lib/mimetype";
+mtime := 0;
+qid: Sys->Qid;
+
+suffixes: list of ref Suffix;
+
+nomod(s: string)
+{
+ raise sys->sprint("internal: can't load %s: %r", s);
+}
+
+init(b: Bufio)
+{
+ sys = load Sys Sys->PATH;
+ bufio = b;
+ str = load String String->PATH;
+ if(str == nil)
+ nomod(String->PATH);
+ daytime = load Daytime Daytime->PATH;
+ if(daytime == nil)
+ nomod(Daytime->PATH);
+ readsuffixfile();
+}
+
+readheaders(fd: ref Iobuf, limit: int): array of (string, array of byte)
+{
+ n := 0;
+ s := 0;
+ b := array[Minrequest] of byte;
+ nline := 0;
+ lines: list of array of byte;
+ while((c := fd.getb()) >= 0){
+ if(c == '\r'){
+ c = fd.getb();
+ if(c < 0)
+ break;
+ if(c != '\n'){
+ fd.ungetb();
+ c = '\r';
+ }
+ }
+ if(n >= len b){
+ if(len b >= limit)
+ return nil;
+ ab := array[n+512] of byte;
+ ab[0:] = b;
+ b = ab;
+ }
+ b[n++] = byte c;
+ if(c == '\n'){
+ if(n == 1 || b[n-2] == byte '\n')
+ break; # empty line
+ c = fd.getb();
+ if(c < 0)
+ break;
+ if(c != ' ' && c != '\t'){ # not continued
+ fd.ungetb();
+ lines = b[s: n] :: lines;
+ nline++;
+ s = n;
+ }else
+ b[n-1] = byte ' ';
+ }
+ }
+ if(n == 0)
+ return nil;
+ b = b[0: n];
+ if(n != s){
+ lines = b[s:n] :: lines;
+ nline++;
+ }
+ a := array[nline] of (string, array of byte);
+ for(; lines != nil; lines = tl lines){
+ b = hd lines;
+ name := "";
+ for(i := 0; i < len b; i++)
+ if(b[i] == byte ':'){
+ name = str->tolower(string b[0:i]);
+ b = b[i+1:];
+ break;
+ }
+ a[--nline] = (name, b);
+ }
+ return a;
+}
+
+#
+# *(";" parameter) used in transfer-extension, media-type and media-range
+# parameter = attribute "=" value
+# attribute = token
+# value = token | quoted-string
+#
+parseparams(ps: ref Rfclex): list of (string, string)
+{
+ l: list of (string, string);
+ do{
+ if(ps.lex() != Word)
+ break;
+ attr := ps.wordval;
+ if(ps.lex() != '=' || ps.lex() != Word && ps.tok != QString)
+ break;
+ l = (attr, ps.wordval) :: l;
+ }while(ps.lex() == ';');
+ ps.unlex();
+ return rev(l);
+}
+
+#
+# 1#transfer-coding
+#
+mimefields(ps: ref Rfclex): list of (string, list of (string, string))
+{
+ rf: list of (string, list of (string, string));
+ do{
+ if(ps.lex() == Word){
+ w := ps.wordval;
+ if(ps.lex() == ';'){
+ rf = (w, parseparams(ps)) :: rf;
+ ps.lex();
+ }else
+ rf = (w, nil) :: rf;
+ }
+ }while(ps.tok == ',');
+ ps.unlex();
+ f: list of (string, list of (string, string));
+ for(; rf != nil; rf = tl rf)
+ f = hd rf :: f;
+ return f;
+}
+
+# #(media-type | (media-range [accept-params])) ; Content-Type and Accept
+#
+# media-type = type "/" subtype *( ";" parameter )
+# type = token
+# subtype = token
+# LWS must not be used between type and subtype, nor between attribute and value (in parameter)
+#
+# media-range = ("*/*" | type "/*" | type "/" subtype ) *(";' parameter)
+# accept-params = ";" "q" "=" qvalue *( accept-extension )
+# accept-extension = ";" token [ "=" ( token | quoted-string ) ]
+#
+# 1#( ( charset | "*" )[ ";" "q" "=" qvalue ] ) ; Accept-Charset
+# 1#( codings [ ";" "q" "=" qvalue ] ) ; Accept-Encoding
+# 1#( language-range [ ";" "q" "=" qvalue ] ) ; Accept-Language
+#
+# codings = ( content-coding | "*" )
+#
+parsecontent(ps: ref Rfclex, multipart: int, head: list of ref Content): list of ref Content
+{
+ do{
+ if(ps.lex() == Word){
+ generic := ps.wordval;
+ specific := "*";
+ if(ps.lex() == '/'){
+ if(ps.lex() != Word)
+ break;
+ specific = ps.wordval;
+ if(!multipart && specific != "*")
+ break;
+ }else if(multipart)
+ break; # syntax error
+ else
+ ps.unlex();
+ params: list of (string, string) = nil;
+ if(ps.lex() == ';'){
+ params = parseparams(ps);
+ ps.lex();
+ }
+ head = Content.mk(generic, specific, params) :: head; # order reversed, but doesn't matter
+ }
+ }while(ps.tok == ',');
+ ps.unlex();
+ return head;
+}
+
+rev(l: list of (string, string)): list of (string, string)
+{
+ rl: list of (string, string);
+ for(; l != nil; l = tl l)
+ rl = hd l :: rl;
+ return rl;
+}
+
+Rfclex.mk(a: array of byte): ref Rfclex
+{
+ ps := ref Rfclex;
+ ps.fd = bufio->aopen(a);
+ ps.tok = '\n';
+ ps.eof = 0;
+ return ps;
+}
+
+Rfclex.getc(ps: self ref Rfclex): int
+{
+ c := ps.fd.getb();
+ if(c < 0)
+ ps.eof = 1;
+ return c;
+}
+
+Rfclex.ungetc(ps: self ref Rfclex)
+{
+ if(!ps.eof)
+ ps.fd.ungetb();
+}
+
+Rfclex.lex(ps: self ref Rfclex): int
+{
+ if(ps.seen != nil){
+ (ps.tok, ps.wordval) = hd ps.seen;
+ ps.seen = tl ps.seen;
+ }else
+ ps.tok = lex1(ps, 0);
+ return ps.tok;
+}
+
+Rfclex.unlex(ps: self ref Rfclex)
+{
+ ps.seen = (ps.tok, ps.wordval) :: ps.seen;
+}
+
+Rfclex.skipws(ps: self ref Rfclex): int
+{
+ return lex1(ps, 1);
+}
+
+#
+# rfc 2822/rfc 1521 lexical analyzer
+#
+lex1(ps: ref Rfclex, skipwhite: int): int
+{
+ ps.wordval = nil;
+ while((c := ps.getc()) >= 0){
+ case c {
+ '(' =>
+ level := 1;
+ while((c = ps.getc()) != Bufio->EOF && c != '\n'){
+ if(c == '\\'){
+ c = ps.getc();
+ if(c == Bufio->EOF)
+ return '\n';
+ continue;
+ }
+ if(c == '(')
+ level++;
+ else if(c == ')' && --level == 0)
+ break;
+ }
+ ' ' or '\t' or '\r' or 0 =>
+ ;
+ '\n' =>
+ return '\n';
+ ')' or '<' or '>' or '[' or ']' or '@' or '/' or ',' or
+ ';' or ':' or '?' or '=' =>
+ if(skipwhite){
+ ps.ungetc();
+ return c;
+ }
+ return c;
+
+ '"' =>
+ if(skipwhite){
+ ps.ungetc();
+ return c;
+ }
+ word(ps,"\"");
+ ps.getc(); # skip the closing quote
+ return QString;
+
+ * =>
+ ps.ungetc();
+ if(skipwhite)
+ return c;
+ word(ps,"\"()<>@,;:/[]?={}\r\n \t");
+ return Word;
+ }
+ }
+ return '\n';
+}
+
+# return the rest of an rfc 822 line, not including \r or \n
+# do not map to lower case
+
+Rfclex.line(ps: self ref Rfclex): string
+{
+ s := "";
+ while((c := ps.getc()) != Bufio->EOF && c != '\n' && c != '\r'){
+ if(c == '\\'){
+ c = ps.getc();
+ if(c == Bufio->EOF)
+ break;
+ }
+ s[len s] = c;
+ }
+ ps.tok = '\n';
+ ps.wordval = s;
+ return s;
+}
+
+word(ps: ref Rfclex, stop: string)
+{
+ w := "";
+ while((c := ps.getc()) != Bufio->EOF){
+ if(c == '\r')
+ c = ' ';
+ if(c == '\\'){
+ c = ps.getc();
+ if(c == Bufio->EOF)
+ break;
+ }else if(str->in(c,stop)){
+ ps.ungetc();
+ break;
+ }
+ if(c >= 'A' && c <= 'Z')
+ c += 'a' - 'A';
+ w[len w] = c;
+ }
+ ps.wordval = w;
+}
+
+readsuffixfile(): string
+{
+ iob := bufio->open(SuffixFile, Bufio->OREAD);
+ if(iob == nil)
+ return sys->sprint("cannot open %s: %r", SuffixFile);
+ for(n := 1; (line := iob.gets('\n')) != nil; n++){
+ (s, nil) := parsesuffix(line);
+ if(s != nil)
+ suffixes = s :: suffixes;
+ }
+ return nil;
+}
+
+parsesuffix(line: string): (ref Suffix, string)
+{
+ (line, nil) = str->splitstrl(line, "#");
+ if(line == nil)
+ return (nil, nil);
+ (n, slist) := sys->tokenize(line,"\n\t ");
+ if(n == 0)
+ return (nil, nil);
+ if(n < 4)
+ return (nil, "too few fields");
+ s := ref Suffix;
+ s.suffix = hd slist;
+ slist = tl slist;
+ s.generic = hd slist;
+ if (s.generic == "-")
+ s.generic = "";
+ slist = tl slist;
+ s.specific = hd slist;
+ if (s.specific == "-")
+ s.specific = "";
+ slist = tl slist;
+ s.encoding = hd slist;
+ if (s.encoding == "-")
+ s.encoding = "";
+ if((s.generic == nil || s.specific == nil) && s.encoding == nil)
+ return (nil, nil);
+ return (s, nil);
+}
+
+#
+# classify by file suffix
+#
+suffixclass(name: string): (ref Content, ref Content)
+{
+ typ, enc: ref Content;
+
+ p := str->splitstrr(name, "/").t1;
+ if(p != nil)
+ name = p;
+
+ for(;;){
+ (name, p) = suffix(name); # TO DO: match below is case sensitive
+ if(p == nil)
+ break;
+ for(l := suffixes; l != nil; l = tl l){
+ s := hd l;
+ if(p == s.suffix){
+ if(s.generic != nil && typ == nil)
+ typ = Content.mk(s.generic, s.specific, nil);
+ if(s.encoding != nil && enc == nil)
+ enc = Content.mk(s.encoding, "", nil);
+ if(typ != nil && enc != nil)
+ break;
+ }
+ }
+ }
+ return (typ, enc);
+}
+
+suffix(s: string): (string, string)
+{
+ for(n := len s; --n >= 0;)
+ if(s[n] == '.')
+ return (s[0: n], s[n:]);
+ return (s, nil);
+}
+
+#
+# classify by initial contents of file
+#
+dataclass(a: array of byte): (ref Content, ref Content)
+{
+ utf8 := 0;
+ for(i := 0; i < len a;){
+ c := int a[i];
+ if(c < 16r80){
+ if(c < 32 && c != '\n' && c != '\r' && c != '\t' && c != '\v' && c != '\f')
+ return (nil, nil);
+ i++;
+ }else{
+ utf8 = 1;
+ (r, l, nil) := sys->byte2char(a, i);
+ if(r == Sys->UTFerror)
+ return (nil, nil);
+ i += l;
+ }
+ }
+ if(utf8)
+ params := ("charset", "utf-8") :: nil;
+ return (Content.mk("text", "plain", params), nil);
+}
+
+Content.mk(generic, specific: string, params: list of (string, string)): ref Content
+{
+ c := ref Content;
+ c.generic = generic;
+ c.specific = specific;
+ c.params = params;
+ return c;
+}
+
+Content.check(me: self ref Content, oks: list of ref Content): int
+{
+ if(oks == nil)
+ return 1;
+ g := str->tolower(me.generic);
+ s := str->tolower(me.specific);
+ for(; oks != nil; oks = tl oks){
+ ok := hd oks;
+ if((ok.generic == g || ok.generic=="*") &&
+ (s == nil || ok.specific == s || ok.specific=="*"))
+ return 1;
+ }
+ return 0;
+}
+
+Content.text(c: self ref Content): string
+{
+ if((s := c.specific) != nil)
+ s = c.generic+"/"+s;
+ else
+ s = c.generic;
+ for(l := c.params; l != nil; l = tl l){
+ (n, v) := hd l;
+ s += sys->sprint(";%s=%s", n, quote(v));
+ }
+ return s;
+}
+
+#
+# should probably be in a Mime or HTTP module
+#
+
+Quotable: con "()<>@,;:\\\"/[]?={} \t";
+
+quotable(s: string): int
+{
+ for(i := 0; i < len s; i++)
+ if(str->in(s[i], Quotable))
+ return 1;
+ return 0;
+}
+
+quote(s: string): string
+{
+ if(!quotable(s))
+ return s;
+ q := "\"";
+ for(i := 0; i < len s; i++){
+ if(str->in(s[i], Quotable))
+ q[len q] = '\\';
+ q[len q] = s[i];
+ }
+ q[len q] = '"';
+ return q;
+}
+
+weekdays := array[] of {
+ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+};
+
+months := array[] of {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+# print dates in the format
+# Wkd, DD Mon YYYY HH:MM:SS GMT
+
+sec2date(t: int): string
+{
+ tm := daytime->gmt(t);
+ return sys->sprint("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
+ weekdays[tm.wday], tm.mday, months[tm.mon], tm.year+1900,
+ tm.hour, tm.min, tm.sec);
+}
+
+# parse dates of formats
+# Wkd, DD Mon YYYY HH:MM:SS GMT
+# Weekday, DD-Mon-YY HH:MM:SS GMT
+# Wkd Mon ( D|DD) HH:MM:SS YYYY
+# plus anything similar
+
+date2sec(date: string): int
+{
+ tm := daytime->string2tm(date);
+ if(tm == nil || tm.year < 70 || tm.zone != "GMT")
+ t := 0;
+ else
+ t = daytime->tm2epoch(tm);
+ return t;
+}
+
+now(): int
+{
+ return daytime->now();
+}
+
+time(): string
+{
+ return sec2date(daytime->now());
+}
diff --git a/appl/lib/w3c/uris.b b/appl/lib/w3c/uris.b
new file mode 100644
index 00000000..b49c17b8
--- /dev/null
+++ b/appl/lib/w3c/uris.b
@@ -0,0 +1,320 @@
+implement URIs;
+
+#
+# RFC3986, URI Generic Syntax
+#
+
+include "sys.m";
+ sys: Sys;
+
+include "string.m";
+ S: String;
+
+include "uris.m";
+
+Alpha: con "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+Digit: con "0123456789";
+
+GenDelims: con ":/?#[]@";
+SubDelims: con "!$&'()*+,;=";
+Reserved: con GenDelims + SubDelims;
+HexDigit: con Digit+"abcdefABCDEF";
+
+Escape: con GenDelims+"%"; # "%" must be encoded as %25
+
+Unreserved: con Alpha+Digit+"-._~";
+
+F_Esc, F_Scheme: con byte(1<<iota);
+
+ctype: array of byte;
+
+classify(s: string, f: byte)
+{
+ for(i := 0; i < len s; i++)
+ ctype[s[i]] |= f;
+}
+
+init()
+{
+ sys = load Sys Sys->PATH;
+ S = load String String->PATH;
+ if(S == nil)
+ raise sys->sprint("can't load %s: %r", String->PATH);
+
+ ctype = array [256] of { * => byte 0 };
+ classify(Escape, F_Esc);
+ for(i := 0; i <= ' '; i++)
+ ctype[i] |= F_Esc;
+ for(i = 16r80; i <= 16rFF; i++)
+ ctype[i] |= F_Esc;
+ classify(Alpha+Digit+"+-.", F_Scheme);
+}
+
+# scheme://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
+#
+# ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+#
+# delimiters: :/?# /?# ?# #
+#
+URI.parse(url: string): ref URI
+{
+ scheme, userinfo, host, port, path, query, frag: string;
+ for(i := 0; i < len url; i++){
+ c := url[i];
+ if(c == ':'){
+ scheme = S->tolower(url[0:i]);
+ url = url[i+1:];
+ break;
+ }
+ if(c < 0 || c >= len ctype || (ctype[c] & F_Scheme) == byte 0)
+ break;
+ }
+
+ if(S->prefix("//", url)){
+ authority: string;
+ (authority, path) = S->splitstrl(url[2:], "/");
+ (up, hp) := splitl(authority, "@");
+ if(hp == "")
+ hp = authority;
+ else
+ userinfo = up;
+ if(hp != nil && hp[0] == '['){ # another rfc hack, for IPv6 addresses, which contain :
+ (host, hp) = S->splitstrr(hp, "]");
+ if(hp != nil && hp[0] == ':')
+ port = hp[1:];
+ else
+ host += hp; # put it back
+ }else
+ (host, port) = splitl(hp, ":");
+ if(path == nil)
+ path = "/";
+ }else
+ path = url;
+ (path, frag) = S->splitstrl(path, "#"); # includes # in frag
+ (path, query) = S->splitstrl(path, "?"); # includes ? in query
+ return ref URI(scheme, dec(userinfo), dec(host), port, dec(path), query, dec(frag));
+}
+
+URI.userpw(u: self ref URI): (string, string)
+{
+ return splitl(u.userinfo, ":");
+}
+
+URI.text(u: self ref URI): string
+{
+ s := "";
+ if(u.scheme != nil)
+ s += u.scheme + ":";
+ if(u.hasauthority())
+ s += "//" + u.authority();
+ return s + enc(u.path, "/@:") + u.query + enc1(u.fragment, "@:/?");
+}
+
+URI.copy(u: self ref URI): ref URI
+{
+ return ref *u;
+}
+
+URI.pathonly(u: self ref URI): ref URI
+{
+ v := ref *u;
+ v.userinfo = nil;
+ v.query = nil;
+ v.fragment = nil;
+ return v;
+}
+
+URI.addbase(u: self ref URI, b: ref URI): ref URI
+{
+ # RFC3986 5.2.2, rearranged
+ r := ref *u;
+ if(r.scheme == nil && b != nil){
+ r.scheme = b.scheme;
+ if(!r.hasauthority()){
+ r.userinfo = b.userinfo;
+ r.host = b.host;
+ r.port = b.port;
+ if(r.path == nil){
+ r.path = b.path;
+ if(r.query == nil)
+ r.query = b.query;
+ }else if(r.path[0] != '/'){
+ # 5.2.3: merge paths
+ if(b.path == "" && b.hasauthority())
+ p1 := "/";
+ else
+ (p1, nil) = S->splitstrr(b.path, "/");
+ r.path = p1 + r.path;
+ }
+ }
+ }
+ r.path = removedots(r.path);
+ return r;
+}
+
+URI.nodots(u: self ref URI): ref URI
+{
+ return u.addbase(nil);
+}
+
+URI.hasauthority(u: self ref URI): int
+{
+ return u.host != nil || u.userinfo != nil || u.port != nil;
+}
+
+URI.isabsolute(u: self ref URI): int
+{
+ return u.scheme != nil;
+}
+
+URI.authority(u: self ref URI): string
+{
+ s := enc(u.userinfo, ":");
+ if(s != nil)
+ s += "@";
+ if(u.host != nil){
+ s += enc(u.host, "[]:"); # assumes : appears inside []; could enforce it
+ if(u.port != nil)
+ s += ":" + enc(u.port,nil);
+ }
+ return s;
+}
+
+#
+# simplified version of procedure in RFC3986 5.2.4:
+# it extracts a complete segment from the input first, then analyses it
+#
+removedots(s: string): string
+{
+ if(s == nil)
+ return "";
+ out := "";
+ for(p := 0; p < len s;){
+ # extract the first segment and any preceding /
+ q := p;
+ if(++p < len s){
+ while(++p < len s && s[p] != '/')
+ {}
+ }
+ seg := s[q: p];
+ if((e := p) < len s)
+ e++;
+ case s[q: e] { # includes any following /
+ "../" or "./" => ;
+ "/./" or "/." =>
+ if(p >= len s)
+ s += "/";
+ "/../" or "/.." =>
+ if(p >= len s)
+ s += "/";
+ if(out != nil){
+ for(q = len out; --q > 0 && out[q] != '/';)
+ {} # skip
+ out = out[0: q];
+ }
+ "." or ".." => ; # null effect
+ * => # including "/"
+ out += seg;
+ }
+ }
+ return out;
+}
+
+#
+# similar to splitstrl but trims the matched character from the result
+#
+splitl(s, c: string): (string, string)
+{
+ (a, b) := S->splitstrl(s, c);
+ if(b != "")
+ b = b[1:];
+ return (a, b);
+}
+
+hex2(s: string): int
+{
+ n := 0;
+ for(i := 0; i < 2; i++){
+ if(i >= len s)
+ return -1;
+ n <<= 4;
+ case c := s[i] {
+ '0' to '9' =>
+ n += c-'0';
+ 'a' to 'f' =>
+ n += 10+(c-'a');
+ 'A' to 'F' =>
+ n += 10+(c-'A');
+ * =>
+ return -1;
+ }
+ }
+ return n;
+}
+
+dec(s: string): string
+{
+ for(i := 0;; i++){
+ if(i >= len s)
+ return s;
+ if(s[i] == '%' || s[i] == 0)
+ break;
+ }
+ o := s[0:i];
+ while(i < len s){
+ case c := s[i++] {
+ '%' =>
+ if((v := hex2(s[i:])) > 0){
+ c = v;
+ i += 2;
+ }
+ 0 =>
+ c = ' '; # shouldn't happen
+ }
+ o[len o] = c;
+ }
+ return o;
+}
+
+enc1(s: string, safe: string): string
+{
+ if(len s > 1)
+ return s[0:1] + enc(s[1:], safe);
+ return s;
+}
+
+# encoding depends on context (eg, &=/: not escaped in `query' string)
+enc(s: string, safe: string): string
+{
+ for(i := 0;; i++){
+ if(i >= len s)
+ return s; # use as-is
+ c := s[i];
+ if(c >= 16r80 || (ctype[c] & F_Esc) != byte 0 && !S->in(c, safe))
+ break;
+ }
+ t := s[0: i];
+ b := array of byte s[i:];
+ for(i = 0; i < len b; i++){
+ c := int b[i];
+ if((ctype[c] & F_Esc) != byte 0 && !S->in(c, safe))
+ t += sys->sprint("%%%.2X", c);
+ else
+ t[len t] = c;
+ }
+ return t;
+}
+
+URI.eq(u: self ref URI, v: ref URI): int
+{
+ if(v == nil)
+ return 0;
+ return u.scheme == v.scheme && u.userinfo == v.userinfo &&
+ u.host == v.host && u.port == v.port && u.path == v.path && # path might need canon
+ u.query == v.query; # not fragment
+}
+
+URI.eqf(u: self ref URI, v: ref URI): int
+{
+ return u.eq(v) && u.fragment == v.fragment;
+}