diff options
Diffstat (limited to 'appl')
| -rw-r--r-- | appl/lib/complete.b | 1 | ||||
| -rw-r--r-- | appl/lib/w3c/uris.b | 28 | ||||
| -rw-r--r-- | appl/svc/httpd/httpd.b | 5 | ||||
| -rw-r--r-- | appl/svc/httpd/httpd.m | 5 | ||||
| -rw-r--r-- | appl/svc/httpd/parser.b | 354 | ||||
| -rw-r--r-- | appl/svc/httpd/parser.m | 1 |
6 files changed, 62 insertions, 332 deletions
diff --git a/appl/lib/complete.b b/appl/lib/complete.b index 1a2e7713..b793e8f5 100644 --- a/appl/lib/complete.b +++ b/appl/lib/complete.b @@ -41,7 +41,6 @@ complete(dir, s: string): (ref Completion, string) if(n == 0) return (nil, nil); - readdir = nil; c := ref Completion(0, 0, nil, 0, nil); diff --git a/appl/lib/w3c/uris.b b/appl/lib/w3c/uris.b index b49c17b8..07042dc4 100644 --- a/appl/lib/w3c/uris.b +++ b/appl/lib/w3c/uris.b @@ -260,20 +260,26 @@ dec(s: string): string if(s[i] == '%' || s[i] == 0) break; } - o := s[0:i]; + t := s[0:i]; + a := array[Sys->UTFmax*len s] of byte; # upper bound + o := 0; while(i < len s){ - case c := s[i++] { - '%' => - if((v := hex2(s[i:])) > 0){ - c = v; - i += 2; + c := s[i++]; + if(c < 16r80){ + case c { + '%' => + if((v := hex2(s[i:])) > 0){ + c = v; + i += 2; + } + 0 => + c = ' '; # shouldn't happen } - 0 => - c = ' '; # shouldn't happen - } - o[len o] = c; + a[o++] = byte c; + }else + o += sys->char2byte(c, a, o); # string contained Unicode } - return o; + return t + string a[0:o]; } enc1(s: string, safe: string): string diff --git a/appl/svc/httpd/httpd.b b/appl/svc/httpd/httpd.b index e8cf84ea..db570a74 100644 --- a/appl/svc/httpd/httpd.b +++ b/appl/svc/httpd/httpd.b @@ -212,7 +212,6 @@ service_req(nc : Sys->Connection) g.dbg_log=dbg_log; g.logfile = logfile; g.modtime=0; - g.entity = parser->initarray(); g.mydomain = my_domain; g.version = "HTTP/1.0"; g.cache = cache; @@ -715,7 +714,7 @@ getendpoint(dir, file: string): (string, string) getendpoints(dir: string): string { - (lsys, lserv) := getendpoint(dir, "local"); - (rsys, rserv) := getendpoint(dir, "remote"); +# (lsys, lserv) := getendpoint(dir, "local"); + (rsys, nil) := getendpoint(dir, "remote"); return rsys; } diff --git a/appl/svc/httpd/httpd.m b/appl/svc/httpd/httpd.m index e96008e9..7d4c0dcf 100644 --- a/appl/svc/httpd/httpd.m +++ b/appl/svc/httpd/httpd.m @@ -1,8 +1,4 @@ Httpd: module { - Entity: adt{ - name : string; - value : int; - }; Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax, BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : con iota; @@ -35,7 +31,6 @@ Httpd: module { wordval : string; tok,parse_eof : int; mydomain,client : string; - entity: array of Entity; oklang : list of ref Contents->Content; }; Request: adt { diff --git a/appl/svc/httpd/parser.b b/appl/svc/httpd/parser.b index a109d022..14b9bf8e 100644 --- a/appl/svc/httpd/parser.b +++ b/appl/svc/httpd/parser.b @@ -14,7 +14,7 @@ include "contents.m"; Content: import contents; include "cache.m"; include "httpd.m"; - Entity, Private_info: import Httpd; + Private_info: import Httpd; Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax, BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : import Httpd; include "parser.m"; @@ -61,236 +61,6 @@ errormsg := array[] of { OK => Error("200 OK", "everything is fine","Groovy man"), }; -latin1 := array[] of { - '¡', - '¢', - '£', - '¤', - '¥', - '¦', - '§', - '¨', - '©', - 'ª', - '«', - '¬', - '', - '®', - '¯', - '°', - '±', - '²', - '³', - '´', - 'µ', - '¶', - '·', - '¸', - '¹', - 'º', - '»', - '¼', - '½', - '¾', - '¿', - 'À', - 'Á', - 'Â', - 'Ã', - 'Ä', - 'Å', - 'Æ', - 'Ç', - 'È', - 'É', - 'Ê', - 'Ë', - 'Ì', - 'Í', - 'Î', - 'Ï', - 'Ð', - 'Ñ', - 'Ò', - 'Ó', - 'Ô', - 'Õ', - 'Ö', - '×', - 'Ø', - 'Ù', - 'Ú', - 'Û', - 'Ü', - 'Ý', - 'Þ', - 'ß', - 'à', - 'á', - 'â', - 'ã', - 'ä', - 'å', - 'æ', - 'ç', - 'è', - 'é', - 'ê', - 'ë', - 'ì', - 'í', - 'î', - 'ï', - 'ð', - 'ñ', - 'ò', - 'ó', - 'ô', - 'õ', - 'ö', - '÷', - 'ø', - 'ù', - 'ú', - 'û', - 'ü', - 'ý', - 'þ', - 'ÿ', - 0, -}; - -entities :=array[] of { - Entity( "¡", '¡' ), - Entity( "¢", '¢' ), - Entity( "£", '£' ), - Entity( "¤", '¤' ), - Entity( "¥", '¥' ), - Entity( "¦", '¦' ), - Entity( "§", '§' ), - Entity( "¨", '¨' ), - Entity( "©", '©' ), - Entity( "ª", 'ª' ), - Entity( "«", '«' ), - Entity( "¬", '¬' ), - Entity( "­", '' ), - Entity( "®", '®' ), - Entity( "¯", '¯' ), - Entity( "°", '°' ), - Entity( "±", '±' ), - Entity( "²", '²' ), - Entity( "³", '³' ), - Entity( "´", '´' ), - Entity( "µ", 'µ' ), - Entity( "¶", '¶' ), - Entity( "·", '·' ), - Entity( "¸", '¸' ), - Entity( "¹", '¹' ), - Entity( "º", 'º' ), - Entity( "»", '»' ), - Entity( "¼", '¼' ), - Entity( "½", '½' ), - Entity( "¾", '¾' ), - Entity( "¿", '¿' ), - Entity( "À", 'À' ), - Entity( "Á", 'Á' ), - Entity( "Â", 'Â' ), - Entity( "Ã", 'Ã' ), - Entity( "Ä", 'Ä' ), - Entity( "Å", 'Å' ), - Entity( "Æ", 'Æ' ), - Entity( "Ç", 'Ç' ), - Entity( "È", 'È' ), - Entity( "É", 'É' ), - Entity( "Ê", 'Ê' ), - Entity( "Ë", 'Ë' ), - Entity( "Ì", 'Ì' ), - Entity( "Í", 'Í' ), - Entity( "Î", 'Î' ), - Entity( "Ï", 'Ï' ), - Entity( "Ð", 'Ð' ), - Entity( "Ñ", 'Ñ' ), - Entity( "Ò", 'Ò' ), - Entity( "Ó", 'Ó' ), - Entity( "Ô", 'Ô' ), - Entity( "Õ", 'Õ' ), - Entity( "Ö", 'Ö' ), - Entity( "&215;", '×' ), - Entity( "Ø", 'Ø' ), - Entity( "Ù", 'Ù' ), - Entity( "Ú", 'Ú' ), - Entity( "Û", 'Û' ), - Entity( "Ü", 'Ü' ), - Entity( "Ý", 'Ý' ), - Entity( "Þ", 'Þ' ), - Entity( "ß", 'ß' ), - Entity( "à", 'à' ), - Entity( "á", 'á' ), - Entity( "â", 'â' ), - Entity( "ã", 'ã' ), - Entity( "ä", 'ä' ), - Entity( "å", 'å' ), - Entity( "æ", 'æ' ), - Entity( "ç", 'ç' ), - Entity( "è", 'è' ), - Entity( "é", 'é' ), - Entity( "ê", 'ê' ), - Entity( "ë", 'ë' ), - Entity( "ì", 'ì' ), - Entity( "í", 'í' ), - Entity( "î", 'î' ), - Entity( "ï", 'ï' ), - Entity( "ð", 'ð' ), - Entity( "ñ", 'ñ' ), - Entity( "ò", 'ò' ), - Entity( "ó", 'ó' ), - Entity( "ô", 'ô' ), - Entity( "õ", 'õ' ), - Entity( "ö", 'ö' ), - Entity( "&247;", '÷' ), - Entity( "ø", 'ø' ), - Entity( "ù", 'ù' ), - Entity( "ú", 'ú' ), - Entity( "û", 'û' ), - Entity( "ü", 'ü' ), - Entity( "ý", 'ý' ), - Entity( "þ", 'þ' ), - Entity( "ÿ", 'ÿ' ), - - Entity( "&#SPACE;", ' ' ), - Entity( "&#RS;", '\n' ), - Entity( "&#RE;", '\r' ), - Entity( """, '"' ), - Entity( "&", '&' ), - Entity( "<", '<' ), - Entity( ">", '>' ), - - Entity( "CAP-DELTA", 'Δ' ), - Entity( "ALPHA", 'α' ), - Entity( "BETA", 'β' ), - Entity( "DELTA", 'δ' ), - Entity( "EPSILON", 'ε' ), - Entity( "THETA", 'θ' ), - Entity( "MU", 'μ' ), - Entity( "PI", 'π' ), - Entity( "TAU", 'τ' ), - Entity( "CHI", 'χ' ), - - Entity( "<-", '←' ), - Entity( "^", '↑' ), - Entity( "->", '→' ), - Entity( "v", '↓' ), - Entity( "!=", '≠' ), - Entity( "<=", '≤' ), - Entity( nil, 0 ), - }; - - -initarray() : array of Entity -{ - return entities; -} - badmodule(p: string) { sys->fprint(sys->fildes(2), "parse: cannot load %s: %r", p); @@ -690,93 +460,51 @@ getc(g: ref Private_info): int return c & 16r7f; } -ungetc(g: ref Private_info) { +ungetc(g: ref Private_info) +{ # this is a dirty hack, I am tacitly assuming that characters read # from stdin will be ASCII..... g.bufio->g.bin.ungetc(); } -# go from url with latin1 and escapes to utf +# go from url with ascii and %xx escapes to unicode, allowing for existing unencoded utf-8 urlunesc(s : string): string { - c, n : int; - t : string; - for(i := 0;i<len s ; i++){ - c = int s[i]; - if(c == '%'){ - n = int s[i+1]; - if(n >= '0' && n <= '9') - n = n - '0'; - else if(n >= 'A' && n <= 'F') - n = n - 'A' + 10; - else if(n >= 'a' && n <= 'f') - n = n - 'a' + 10; - else - break; - c = n; - n = int s[i+2]; - if(n >= '0' && n <= '9') - n = n - '0'; - else if(n >= 'A' && n <= 'F') - n = n - 'A' + 10; - else if(n >= 'a' && n <= 'f') - n = n - 'a' + 10; - else - break; - i += 2; - c = c * 16 + n; - } - else if( c == '+' ) - c = ' '; - t[len t] = c; + a := array[Sys->UTFmax*len s] of byte; + o := 0; + for(i := 0; i < len s; i++){ + c := int s[i]; + if(c < Runeself){ + if(c == '%' && i+2 < len s){ + d0 := hex(int s[i+1]); + if(d0 >= 0){ + d1 := hex(int s[i+2]); + if(d1 >= 0){ + i += 2; + c = d0*16 + d1; + } + } + } else if(c == '+' || c == 0) + c = ' '; + a[o++] = byte c; + }else + o += sys->char2byte(c, a, o); } - return t; + return string a[0: o]; } - -# go from http with latin1 escapes to utf, -# we assume that anything >= Runeself is already in utf - -httpunesc(g: ref Private_info,s : array of byte): string +hex(c: int): int { - t,v: string; - c,i : int; - # convert bytes to a string. - v = string s; - for(i=0; i < len v;i++){ - c = v[i]; - if(c == '&'){ - if(v[1] == '#' && v[2] && v[3] && v[4] && v[5] == ';'){ - c = 100*(v[2])+10*(v[3])+(v[4]); - if(c < Runeself){ - t[len t] = c; - i += 6; - continue; - } - if(c < 256 && c >= 161){ - t[len t] = g.entity[c-161].value; - i += 6; - continue; - } - } else { - for(j:= 0;g.entity[j].name != nil; j++) - if(g.entity[j].name == v[i+1:]) - # problem here cvert array of byte to string? - break; - if(g.entity[j].name != nil){ - i += len g.entity[j].name; - t[len t] = g.entity[j].value; - continue; - } - } - } - t[len t] = c; - } - return t; + if(c >= '0' && c <= '9') + return c-'0'; + if(c >= 'a' && c <= 'f') + return c-'a' + 10; + if(c >= 'A' && c <= 'F') + return c-'A' + 10; + return -1; } - # write a failure message to the net and exit fail(g: ref Private_info,reason : int, message : string) { @@ -845,14 +573,18 @@ logit(g: ref Private_info,message : string ) urlconv(p : string): string { - c : int; - t : string; - for(i:=0;i<len p ;i++){ - c = p[i]; + a := array[Sys->UTFmax] of byte; + t := ""; + for(i := 0; i < len p; i++){ + c := p[i]; if(c == 0) - break; - if(c <= ' ' || c == '%' || c >= Runeself){ - t += sys->sprint("%%%2.2x", c); + continue; # ignore nul bytes + if(c >= Runeself){ # convert to UTF-8 + n := sys->char2byte(c, a, 0); + for(j := 0; j < n; j++) + t += sys->sprint("%%%.2X", int a[j]); + }else if(c <= ' ' || c == '%'){ + t += sys->sprint("%%%2.2X", c); } else { t[len t] = c; } diff --git a/appl/svc/httpd/parser.m b/appl/svc/httpd/parser.m index 0c1a5829..3d50237a 100644 --- a/appl/svc/httpd/parser.m +++ b/appl/svc/httpd/parser.m @@ -5,7 +5,6 @@ Parser: module { PATH: con "/dis/svc/httpd/parser.dis"; init: fn(); - initarray: fn(): array of Httpd->Entity; urlunesc: fn(s: string): string; fail: fn(g: ref Httpd->Private_info,reason: int, message: string); logit: fn(g: ref Httpd->Private_info, message: string ); |
