summaryrefslogtreecommitdiff
path: root/appl/lib/w3c/uris.b
diff options
context:
space:
mode:
Diffstat (limited to 'appl/lib/w3c/uris.b')
-rw-r--r--appl/lib/w3c/uris.b320
1 files changed, 320 insertions, 0 deletions
diff --git a/appl/lib/w3c/uris.b b/appl/lib/w3c/uris.b
new file mode 100644
index 00000000..b49c17b8
--- /dev/null
+++ b/appl/lib/w3c/uris.b
@@ -0,0 +1,320 @@
+implement URIs;
+
+#
+# RFC3986, URI Generic Syntax
+#
+
+include "sys.m";
+ sys: Sys;
+
+include "string.m";
+ S: String;
+
+include "uris.m";
+
+Alpha: con "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+Digit: con "0123456789";
+
+GenDelims: con ":/?#[]@";
+SubDelims: con "!$&'()*+,;=";
+Reserved: con GenDelims + SubDelims;
+HexDigit: con Digit+"abcdefABCDEF";
+
+Escape: con GenDelims+"%"; # "%" must be encoded as %25
+
+Unreserved: con Alpha+Digit+"-._~";
+
+F_Esc, F_Scheme: con byte(1<<iota);
+
+ctype: array of byte;
+
+classify(s: string, f: byte)
+{
+ for(i := 0; i < len s; i++)
+ ctype[s[i]] |= f;
+}
+
+init()
+{
+ sys = load Sys Sys->PATH;
+ S = load String String->PATH;
+ if(S == nil)
+ raise sys->sprint("can't load %s: %r", String->PATH);
+
+ ctype = array [256] of { * => byte 0 };
+ classify(Escape, F_Esc);
+ for(i := 0; i <= ' '; i++)
+ ctype[i] |= F_Esc;
+ for(i = 16r80; i <= 16rFF; i++)
+ ctype[i] |= F_Esc;
+ classify(Alpha+Digit+"+-.", F_Scheme);
+}
+
+# scheme://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
+#
+# ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+#
+# delimiters: :/?# /?# ?# #
+#
+URI.parse(url: string): ref URI
+{
+ scheme, userinfo, host, port, path, query, frag: string;
+ for(i := 0; i < len url; i++){
+ c := url[i];
+ if(c == ':'){
+ scheme = S->tolower(url[0:i]);
+ url = url[i+1:];
+ break;
+ }
+ if(c < 0 || c >= len ctype || (ctype[c] & F_Scheme) == byte 0)
+ break;
+ }
+
+ if(S->prefix("//", url)){
+ authority: string;
+ (authority, path) = S->splitstrl(url[2:], "/");
+ (up, hp) := splitl(authority, "@");
+ if(hp == "")
+ hp = authority;
+ else
+ userinfo = up;
+ if(hp != nil && hp[0] == '['){ # another rfc hack, for IPv6 addresses, which contain :
+ (host, hp) = S->splitstrr(hp, "]");
+ if(hp != nil && hp[0] == ':')
+ port = hp[1:];
+ else
+ host += hp; # put it back
+ }else
+ (host, port) = splitl(hp, ":");
+ if(path == nil)
+ path = "/";
+ }else
+ path = url;
+ (path, frag) = S->splitstrl(path, "#"); # includes # in frag
+ (path, query) = S->splitstrl(path, "?"); # includes ? in query
+ return ref URI(scheme, dec(userinfo), dec(host), port, dec(path), query, dec(frag));
+}
+
+URI.userpw(u: self ref URI): (string, string)
+{
+ return splitl(u.userinfo, ":");
+}
+
+URI.text(u: self ref URI): string
+{
+ s := "";
+ if(u.scheme != nil)
+ s += u.scheme + ":";
+ if(u.hasauthority())
+ s += "//" + u.authority();
+ return s + enc(u.path, "/@:") + u.query + enc1(u.fragment, "@:/?");
+}
+
+URI.copy(u: self ref URI): ref URI
+{
+ return ref *u;
+}
+
+URI.pathonly(u: self ref URI): ref URI
+{
+ v := ref *u;
+ v.userinfo = nil;
+ v.query = nil;
+ v.fragment = nil;
+ return v;
+}
+
+URI.addbase(u: self ref URI, b: ref URI): ref URI
+{
+ # RFC3986 5.2.2, rearranged
+ r := ref *u;
+ if(r.scheme == nil && b != nil){
+ r.scheme = b.scheme;
+ if(!r.hasauthority()){
+ r.userinfo = b.userinfo;
+ r.host = b.host;
+ r.port = b.port;
+ if(r.path == nil){
+ r.path = b.path;
+ if(r.query == nil)
+ r.query = b.query;
+ }else if(r.path[0] != '/'){
+ # 5.2.3: merge paths
+ if(b.path == "" && b.hasauthority())
+ p1 := "/";
+ else
+ (p1, nil) = S->splitstrr(b.path, "/");
+ r.path = p1 + r.path;
+ }
+ }
+ }
+ r.path = removedots(r.path);
+ return r;
+}
+
+URI.nodots(u: self ref URI): ref URI
+{
+ return u.addbase(nil);
+}
+
+URI.hasauthority(u: self ref URI): int
+{
+ return u.host != nil || u.userinfo != nil || u.port != nil;
+}
+
+URI.isabsolute(u: self ref URI): int
+{
+ return u.scheme != nil;
+}
+
+URI.authority(u: self ref URI): string
+{
+ s := enc(u.userinfo, ":");
+ if(s != nil)
+ s += "@";
+ if(u.host != nil){
+ s += enc(u.host, "[]:"); # assumes : appears inside []; could enforce it
+ if(u.port != nil)
+ s += ":" + enc(u.port,nil);
+ }
+ return s;
+}
+
+#
+# simplified version of procedure in RFC3986 5.2.4:
+# it extracts a complete segment from the input first, then analyses it
+#
+removedots(s: string): string
+{
+ if(s == nil)
+ return "";
+ out := "";
+ for(p := 0; p < len s;){
+ # extract the first segment and any preceding /
+ q := p;
+ if(++p < len s){
+ while(++p < len s && s[p] != '/')
+ {}
+ }
+ seg := s[q: p];
+ if((e := p) < len s)
+ e++;
+ case s[q: e] { # includes any following /
+ "../" or "./" => ;
+ "/./" or "/." =>
+ if(p >= len s)
+ s += "/";
+ "/../" or "/.." =>
+ if(p >= len s)
+ s += "/";
+ if(out != nil){
+ for(q = len out; --q > 0 && out[q] != '/';)
+ {} # skip
+ out = out[0: q];
+ }
+ "." or ".." => ; # null effect
+ * => # including "/"
+ out += seg;
+ }
+ }
+ return out;
+}
+
+#
+# similar to splitstrl but trims the matched character from the result
+#
+splitl(s, c: string): (string, string)
+{
+ (a, b) := S->splitstrl(s, c);
+ if(b != "")
+ b = b[1:];
+ return (a, b);
+}
+
+hex2(s: string): int
+{
+ n := 0;
+ for(i := 0; i < 2; i++){
+ if(i >= len s)
+ return -1;
+ n <<= 4;
+ case c := s[i] {
+ '0' to '9' =>
+ n += c-'0';
+ 'a' to 'f' =>
+ n += 10+(c-'a');
+ 'A' to 'F' =>
+ n += 10+(c-'A');
+ * =>
+ return -1;
+ }
+ }
+ return n;
+}
+
+dec(s: string): string
+{
+ for(i := 0;; i++){
+ if(i >= len s)
+ return s;
+ if(s[i] == '%' || s[i] == 0)
+ break;
+ }
+ o := s[0:i];
+ while(i < len s){
+ case c := s[i++] {
+ '%' =>
+ if((v := hex2(s[i:])) > 0){
+ c = v;
+ i += 2;
+ }
+ 0 =>
+ c = ' '; # shouldn't happen
+ }
+ o[len o] = c;
+ }
+ return o;
+}
+
+enc1(s: string, safe: string): string
+{
+ if(len s > 1)
+ return s[0:1] + enc(s[1:], safe);
+ return s;
+}
+
+# encoding depends on context (eg, &=/: not escaped in `query' string)
+enc(s: string, safe: string): string
+{
+ for(i := 0;; i++){
+ if(i >= len s)
+ return s; # use as-is
+ c := s[i];
+ if(c >= 16r80 || (ctype[c] & F_Esc) != byte 0 && !S->in(c, safe))
+ break;
+ }
+ t := s[0: i];
+ b := array of byte s[i:];
+ for(i = 0; i < len b; i++){
+ c := int b[i];
+ if((ctype[c] & F_Esc) != byte 0 && !S->in(c, safe))
+ t += sys->sprint("%%%.2X", c);
+ else
+ t[len t] = c;
+ }
+ return t;
+}
+
+URI.eq(u: self ref URI, v: ref URI): int
+{
+ if(v == nil)
+ return 0;
+ return u.scheme == v.scheme && u.userinfo == v.userinfo &&
+ u.host == v.host && u.port == v.port && u.path == v.path && # path might need canon
+ u.query == v.query; # not fragment
+}
+
+URI.eqf(u: self ref URI, v: ref URI): int
+{
+ return u.eq(v) && u.fragment == v.fragment;
+}