summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorforsyth <forsyth@vitanuova.com>2010-09-25 14:04:43 +0100
committerforsyth <forsyth@vitanuova.com>2010-09-25 14:04:43 +0100
commit35f503c642e9dd127a2b989e4e12a10691cba3d4 (patch)
treeacb0a1b5b1d3f242345b5e99d807bfd53ae99fbe
parent9d79870ef4c3926878bf998c10a08c308dc3bd74 (diff)
20100925-1403
-rw-r--r--CHANGES3
-rw-r--r--appl/lib/complete.b1
-rw-r--r--appl/lib/w3c/uris.b28
-rw-r--r--appl/svc/httpd/httpd.b5
-rw-r--r--appl/svc/httpd/httpd.m5
-rw-r--r--appl/svc/httpd/parser.b354
-rw-r--r--appl/svc/httpd/parser.m1
-rw-r--r--dis/lib/complete.disbin1142 -> 1138 bytes
-rw-r--r--dis/lib/w3c/uris.disbin3822 -> 3950 bytes
-rw-r--r--dis/svc/httpd/cgiparse.disbin2789 -> 2789 bytes
-rw-r--r--dis/svc/httpd/echo.disbin1845 -> 1845 bytes
-rw-r--r--dis/svc/httpd/httpd.disbin14440 -> 14385 bytes
-rw-r--r--dis/svc/httpd/imagemap.disbin5321 -> 5321 bytes
-rw-r--r--dis/svc/httpd/parser.disbin10666 -> 8537 bytes
-rw-r--r--dis/svc/httpd/stats.disbin1656 -> 1656 bytes
-rw-r--r--include/version.h2
-rw-r--r--man/2/w3c-uris5
-rw-r--r--module/NOTICE2
18 files changed, 70 insertions, 336 deletions
diff --git a/CHANGES b/CHANGES
index e1b2c4a7..167603f0 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,6 @@
+20100925
+ appl/lib/w3c/uris.b - handle existing Unicode characters if they happen not to have been encoded into string
+ appl/svc/httpd/parser.b - similar [response to issue 233]
20100914
emu/Nt/os.c - replace trap handling
20100822
diff --git a/appl/lib/complete.b b/appl/lib/complete.b
index 1a2e7713..b793e8f5 100644
--- a/appl/lib/complete.b
+++ b/appl/lib/complete.b
@@ -41,7 +41,6 @@ complete(dir, s: string): (ref Completion, string)
if(n == 0)
return (nil, nil);
- readdir = nil;
c := ref Completion(0, 0, nil, 0, nil);
diff --git a/appl/lib/w3c/uris.b b/appl/lib/w3c/uris.b
index b49c17b8..07042dc4 100644
--- a/appl/lib/w3c/uris.b
+++ b/appl/lib/w3c/uris.b
@@ -260,20 +260,26 @@ dec(s: string): string
if(s[i] == '%' || s[i] == 0)
break;
}
- o := s[0:i];
+ t := s[0:i];
+ a := array[Sys->UTFmax*len s] of byte; # upper bound
+ o := 0;
while(i < len s){
- case c := s[i++] {
- '%' =>
- if((v := hex2(s[i:])) > 0){
- c = v;
- i += 2;
+ c := s[i++];
+ if(c < 16r80){
+ case c {
+ '%' =>
+ if((v := hex2(s[i:])) > 0){
+ c = v;
+ i += 2;
+ }
+ 0 =>
+ c = ' '; # shouldn't happen
}
- 0 =>
- c = ' '; # shouldn't happen
- }
- o[len o] = c;
+ a[o++] = byte c;
+ }else
+ o += sys->char2byte(c, a, o); # string contained Unicode
}
- return o;
+ return t + string a[0:o];
}
enc1(s: string, safe: string): string
diff --git a/appl/svc/httpd/httpd.b b/appl/svc/httpd/httpd.b
index e8cf84ea..db570a74 100644
--- a/appl/svc/httpd/httpd.b
+++ b/appl/svc/httpd/httpd.b
@@ -212,7 +212,6 @@ service_req(nc : Sys->Connection)
g.dbg_log=dbg_log;
g.logfile = logfile;
g.modtime=0;
- g.entity = parser->initarray();
g.mydomain = my_domain;
g.version = "HTTP/1.0";
g.cache = cache;
@@ -715,7 +714,7 @@ getendpoint(dir, file: string): (string, string)
getendpoints(dir: string): string
{
- (lsys, lserv) := getendpoint(dir, "local");
- (rsys, rserv) := getendpoint(dir, "remote");
+# (lsys, lserv) := getendpoint(dir, "local");
+ (rsys, nil) := getendpoint(dir, "remote");
return rsys;
}
diff --git a/appl/svc/httpd/httpd.m b/appl/svc/httpd/httpd.m
index e96008e9..7d4c0dcf 100644
--- a/appl/svc/httpd/httpd.m
+++ b/appl/svc/httpd/httpd.m
@@ -1,8 +1,4 @@
Httpd: module {
- Entity: adt{
- name : string;
- value : int;
- };
Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax,
BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : con iota;
@@ -35,7 +31,6 @@ Httpd: module {
wordval : string;
tok,parse_eof : int;
mydomain,client : string;
- entity: array of Entity;
oklang : list of ref Contents->Content;
};
Request: adt {
diff --git a/appl/svc/httpd/parser.b b/appl/svc/httpd/parser.b
index a109d022..14b9bf8e 100644
--- a/appl/svc/httpd/parser.b
+++ b/appl/svc/httpd/parser.b
@@ -14,7 +14,7 @@ include "contents.m";
Content: import contents;
include "cache.m";
include "httpd.m";
- Entity, Private_info: import Httpd;
+ Private_info: import Httpd;
Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax,
BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : import Httpd;
include "parser.m";
@@ -61,236 +61,6 @@ errormsg := array[] of {
OK => Error("200 OK", "everything is fine","Groovy man"),
};
-latin1 := array[] of {
- '¡',
- '¢',
- '£',
- '¤',
- '¥',
- '¦',
- '§',
- '¨',
- '©',
- 'ª',
- '«',
- '¬',
- '­',
- '®',
- '¯',
- '°',
- '±',
- '²',
- '³',
- '´',
- 'µ',
- '¶',
- '·',
- '¸',
- '¹',
- 'º',
- '»',
- '¼',
- '½',
- '¾',
- '¿',
- 'À',
- 'Á',
- 'Â',
- 'Ã',
- 'Ä',
- 'Å',
- 'Æ',
- 'Ç',
- 'È',
- 'É',
- 'Ê',
- 'Ë',
- 'Ì',
- 'Í',
- 'Î',
- 'Ï',
- 'Ð',
- 'Ñ',
- 'Ò',
- 'Ó',
- 'Ô',
- 'Õ',
- 'Ö',
- '×',
- 'Ø',
- 'Ù',
- 'Ú',
- 'Û',
- 'Ü',
- 'Ý',
- 'Þ',
- 'ß',
- 'à',
- 'á',
- 'â',
- 'ã',
- 'ä',
- 'å',
- 'æ',
- 'ç',
- 'è',
- 'é',
- 'ê',
- 'ë',
- 'ì',
- 'í',
- 'î',
- 'ï',
- 'ð',
- 'ñ',
- 'ò',
- 'ó',
- 'ô',
- 'õ',
- 'ö',
- '÷',
- 'ø',
- 'ù',
- 'ú',
- 'û',
- 'ü',
- 'ý',
- 'þ',
- 'ÿ',
- 0,
-};
-
-entities :=array[] of {
- Entity( "&#161;", '¡' ),
- Entity( "&#162;", '¢' ),
- Entity( "&#163;", '£' ),
- Entity( "&#164;", '¤' ),
- Entity( "&#165;", '¥' ),
- Entity( "&#166;", '¦' ),
- Entity( "&#167;", '§' ),
- Entity( "&#168;", '¨' ),
- Entity( "&#169;", '©' ),
- Entity( "&#170;", 'ª' ),
- Entity( "&#171;", '«' ),
- Entity( "&#172;", '¬' ),
- Entity( "&#173;", '­' ),
- Entity( "&#174;", '®' ),
- Entity( "&#175;", '¯' ),
- Entity( "&#176;", '°' ),
- Entity( "&#177;", '±' ),
- Entity( "&#178;", '²' ),
- Entity( "&#179;", '³' ),
- Entity( "&#180;", '´' ),
- Entity( "&#181;", 'µ' ),
- Entity( "&#182;", '¶' ),
- Entity( "&#183;", '·' ),
- Entity( "&#184;", '¸' ),
- Entity( "&#185;", '¹' ),
- Entity( "&#186;", 'º' ),
- Entity( "&#187;", '»' ),
- Entity( "&#188;", '¼' ),
- Entity( "&#189;", '½' ),
- Entity( "&#190;", '¾' ),
- Entity( "&#191;", '¿' ),
- Entity( "&Agrave;", 'À' ),
- Entity( "&Aacute;", 'Á' ),
- Entity( "&Acirc;", 'Â' ),
- Entity( "&Atilde;", 'Ã' ),
- Entity( "&Auml;", 'Ä' ),
- Entity( "&Aring;", 'Å' ),
- Entity( "&AElig;", 'Æ' ),
- Entity( "&Ccedil;", 'Ç' ),
- Entity( "&Egrave;", 'È' ),
- Entity( "&Eacute;", 'É' ),
- Entity( "&Ecirc;", 'Ê' ),
- Entity( "&Euml;", 'Ë' ),
- Entity( "&Igrave;", 'Ì' ),
- Entity( "&Iacute;", 'Í' ),
- Entity( "&Icirc;", 'Î' ),
- Entity( "&Iuml;", 'Ï' ),
- Entity( "&ETH;", 'Ð' ),
- Entity( "&Ntilde;", 'Ñ' ),
- Entity( "&Ograve;", 'Ò' ),
- Entity( "&Oacute;", 'Ó' ),
- Entity( "&Ocirc;", 'Ô' ),
- Entity( "&Otilde;", 'Õ' ),
- Entity( "&Ouml;", 'Ö' ),
- Entity( "&215;", '×' ),
- Entity( "&Oslash;", 'Ø' ),
- Entity( "&Ugrave;", 'Ù' ),
- Entity( "&Uacute;", 'Ú' ),
- Entity( "&Ucirc;", 'Û' ),
- Entity( "&Uuml;", 'Ü' ),
- Entity( "&Yacute;", 'Ý' ),
- Entity( "&THORN;", 'Þ' ),
- Entity( "&szlig;", 'ß' ),
- Entity( "&agrave;", 'à' ),
- Entity( "&aacute;", 'á' ),
- Entity( "&acirc;", 'â' ),
- Entity( "&atilde;", 'ã' ),
- Entity( "&auml;", 'ä' ),
- Entity( "&aring;", 'å' ),
- Entity( "&aelig;", 'æ' ),
- Entity( "&ccedil;", 'ç' ),
- Entity( "&egrave;", 'è' ),
- Entity( "&eacute;", 'é' ),
- Entity( "&ecirc;", 'ê' ),
- Entity( "&euml;", 'ë' ),
- Entity( "&igrave;", 'ì' ),
- Entity( "&iacute;", 'í' ),
- Entity( "&icirc;", 'î' ),
- Entity( "&iuml;", 'ï' ),
- Entity( "&eth;", 'ð' ),
- Entity( "&ntilde;", 'ñ' ),
- Entity( "&ograve;", 'ò' ),
- Entity( "&oacute;", 'ó' ),
- Entity( "&ocirc;", 'ô' ),
- Entity( "&otilde;", 'õ' ),
- Entity( "&ouml;", 'ö' ),
- Entity( "&247;", '÷' ),
- Entity( "&oslash;", 'ø' ),
- Entity( "&ugrave;", 'ù' ),
- Entity( "&uacute;", 'ú' ),
- Entity( "&ucirc;", 'û' ),
- Entity( "&uuml;", 'ü' ),
- Entity( "&yacute;", 'ý' ),
- Entity( "&thorn;", 'þ' ),
- Entity( "&yuml;", 'ÿ' ),
-
- Entity( "&#SPACE;", ' ' ),
- Entity( "&#RS;", '\n' ),
- Entity( "&#RE;", '\r' ),
- Entity( "&quot;", '"' ),
- Entity( "&amp;", '&' ),
- Entity( "&lt;", '<' ),
- Entity( "&gt;", '>' ),
-
- Entity( "CAP-DELTA", 'Δ' ),
- Entity( "ALPHA", 'α' ),
- Entity( "BETA", 'β' ),
- Entity( "DELTA", 'δ' ),
- Entity( "EPSILON", 'ε' ),
- Entity( "THETA", 'θ' ),
- Entity( "MU", 'μ' ),
- Entity( "PI", 'π' ),
- Entity( "TAU", 'τ' ),
- Entity( "CHI", 'χ' ),
-
- Entity( "<-", '←' ),
- Entity( "^", '↑' ),
- Entity( "->", '→' ),
- Entity( "v", '↓' ),
- Entity( "!=", '≠' ),
- Entity( "<=", '≤' ),
- Entity( nil, 0 ),
- };
-
-
-initarray() : array of Entity
-{
- return entities;
-}
-
badmodule(p: string)
{
sys->fprint(sys->fildes(2), "parse: cannot load %s: %r", p);
@@ -690,93 +460,51 @@ getc(g: ref Private_info): int
return c & 16r7f;
}
-ungetc(g: ref Private_info) {
+ungetc(g: ref Private_info)
+{
# this is a dirty hack, I am tacitly assuming that characters read
# from stdin will be ASCII.....
g.bufio->g.bin.ungetc();
}
-# go from url with latin1 and escapes to utf
+# go from url with ascii and %xx escapes to unicode, allowing for existing unencoded utf-8
urlunesc(s : string): string
{
- c, n : int;
- t : string;
- for(i := 0;i<len s ; i++){
- c = int s[i];
- if(c == '%'){
- n = int s[i+1];
- if(n >= '0' && n <= '9')
- n = n - '0';
- else if(n >= 'A' && n <= 'F')
- n = n - 'A' + 10;
- else if(n >= 'a' && n <= 'f')
- n = n - 'a' + 10;
- else
- break;
- c = n;
- n = int s[i+2];
- if(n >= '0' && n <= '9')
- n = n - '0';
- else if(n >= 'A' && n <= 'F')
- n = n - 'A' + 10;
- else if(n >= 'a' && n <= 'f')
- n = n - 'a' + 10;
- else
- break;
- i += 2;
- c = c * 16 + n;
- }
- else if( c == '+' )
- c = ' ';
- t[len t] = c;
+ a := array[Sys->UTFmax*len s] of byte;
+ o := 0;
+ for(i := 0; i < len s; i++){
+ c := int s[i];
+ if(c < Runeself){
+ if(c == '%' && i+2 < len s){
+ d0 := hex(int s[i+1]);
+ if(d0 >= 0){
+ d1 := hex(int s[i+2]);
+ if(d1 >= 0){
+ i += 2;
+ c = d0*16 + d1;
+ }
+ }
+ } else if(c == '+' || c == 0)
+ c = ' ';
+ a[o++] = byte c;
+ }else
+ o += sys->char2byte(c, a, o);
}
- return t;
+ return string a[0: o];
}
-
-# go from http with latin1 escapes to utf,
-# we assume that anything >= Runeself is already in utf
-
-httpunesc(g: ref Private_info,s : array of byte): string
+hex(c: int): int
{
- t,v: string;
- c,i : int;
- # convert bytes to a string.
- v = string s;
- for(i=0; i < len v;i++){
- c = v[i];
- if(c == '&'){
- if(v[1] == '#' && v[2] && v[3] && v[4] && v[5] == ';'){
- c = 100*(v[2])+10*(v[3])+(v[4]);
- if(c < Runeself){
- t[len t] = c;
- i += 6;
- continue;
- }
- if(c < 256 && c >= 161){
- t[len t] = g.entity[c-161].value;
- i += 6;
- continue;
- }
- } else {
- for(j:= 0;g.entity[j].name != nil; j++)
- if(g.entity[j].name == v[i+1:])
- # problem here cvert array of byte to string?
- break;
- if(g.entity[j].name != nil){
- i += len g.entity[j].name;
- t[len t] = g.entity[j].value;
- continue;
- }
- }
- }
- t[len t] = c;
- }
- return t;
+ if(c >= '0' && c <= '9')
+ return c-'0';
+ if(c >= 'a' && c <= 'f')
+ return c-'a' + 10;
+ if(c >= 'A' && c <= 'F')
+ return c-'A' + 10;
+ return -1;
}
-
# write a failure message to the net and exit
fail(g: ref Private_info,reason : int, message : string)
{
@@ -845,14 +573,18 @@ logit(g: ref Private_info,message : string )
urlconv(p : string): string
{
- c : int;
- t : string;
- for(i:=0;i<len p ;i++){
- c = p[i];
+ a := array[Sys->UTFmax] of byte;
+ t := "";
+ for(i := 0; i < len p; i++){
+ c := p[i];
if(c == 0)
- break;
- if(c <= ' ' || c == '%' || c >= Runeself){
- t += sys->sprint("%%%2.2x", c);
+ continue; # ignore nul bytes
+ if(c >= Runeself){ # convert to UTF-8
+ n := sys->char2byte(c, a, 0);
+ for(j := 0; j < n; j++)
+ t += sys->sprint("%%%.2X", int a[j]);
+ }else if(c <= ' ' || c == '%'){
+ t += sys->sprint("%%%2.2X", c);
} else {
t[len t] = c;
}
diff --git a/appl/svc/httpd/parser.m b/appl/svc/httpd/parser.m
index 0c1a5829..3d50237a 100644
--- a/appl/svc/httpd/parser.m
+++ b/appl/svc/httpd/parser.m
@@ -5,7 +5,6 @@ Parser: module {
PATH: con "/dis/svc/httpd/parser.dis";
init: fn();
- initarray: fn(): array of Httpd->Entity;
urlunesc: fn(s: string): string;
fail: fn(g: ref Httpd->Private_info,reason: int, message: string);
logit: fn(g: ref Httpd->Private_info, message: string );
diff --git a/dis/lib/complete.dis b/dis/lib/complete.dis
index 5feb01bc..cd1a2e80 100644
--- a/dis/lib/complete.dis
+++ b/dis/lib/complete.dis
Binary files differ
diff --git a/dis/lib/w3c/uris.dis b/dis/lib/w3c/uris.dis
index 8c4f250d..583c3d77 100644
--- a/dis/lib/w3c/uris.dis
+++ b/dis/lib/w3c/uris.dis
Binary files differ
diff --git a/dis/svc/httpd/cgiparse.dis b/dis/svc/httpd/cgiparse.dis
index d0373b5e..b9fe2608 100644
--- a/dis/svc/httpd/cgiparse.dis
+++ b/dis/svc/httpd/cgiparse.dis
Binary files differ
diff --git a/dis/svc/httpd/echo.dis b/dis/svc/httpd/echo.dis
index 58e885bc..76677a07 100644
--- a/dis/svc/httpd/echo.dis
+++ b/dis/svc/httpd/echo.dis
Binary files differ
diff --git a/dis/svc/httpd/httpd.dis b/dis/svc/httpd/httpd.dis
index 4f48da5f..0ee11082 100644
--- a/dis/svc/httpd/httpd.dis
+++ b/dis/svc/httpd/httpd.dis
Binary files differ
diff --git a/dis/svc/httpd/imagemap.dis b/dis/svc/httpd/imagemap.dis
index b92ed373..7be19c2f 100644
--- a/dis/svc/httpd/imagemap.dis
+++ b/dis/svc/httpd/imagemap.dis
Binary files differ
diff --git a/dis/svc/httpd/parser.dis b/dis/svc/httpd/parser.dis
index 78fa66a6..fb08f76b 100644
--- a/dis/svc/httpd/parser.dis
+++ b/dis/svc/httpd/parser.dis
Binary files differ
diff --git a/dis/svc/httpd/stats.dis b/dis/svc/httpd/stats.dis
index 419bf537..eec2fded 100644
--- a/dis/svc/httpd/stats.dis
+++ b/dis/svc/httpd/stats.dis
Binary files differ
diff --git a/include/version.h b/include/version.h
index d8087e16..a181733c 100644
--- a/include/version.h
+++ b/include/version.h
@@ -1 +1 @@
-#define VERSION "Fourth Edition (20100914)"
+#define VERSION "Fourth Edition (20100925)"
diff --git a/man/2/w3c-uris b/man/2/w3c-uris
index d4dbb73d..b068130f 100644
--- a/man/2/w3c-uris
+++ b/man/2/w3c-uris
@@ -165,7 +165,7 @@ Note that
.I u
and
.I v
-are assumed to be in a canonical form for the scheme and application.
+are assumed to be in a canonical form for the scheme and application.
.TP
.IB u .eqf( v )
Returns true if
@@ -214,7 +214,8 @@ encoded using a sequence of one or more strings of the form
.BI % xx
where
.I xx
-is the hexadecimal value of one byte of the character.
+is the hexadecimal value of one byte of the character's encoding in
+.IR utf (6).
A string
.I s
containing such encodings can be decoded by the function
diff --git a/module/NOTICE b/module/NOTICE
index a9a7616f..6169ac2f 100644
--- a/module/NOTICE
+++ b/module/NOTICE
@@ -8,7 +8,7 @@ file such as NOTICE, LICENCE or COPYING.
Copyright © 1995-1999 Lucent Technologies Inc.
Portions Copyright © 1997-2000 Vita Nuova Limited
-Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
+Portions Copyright © 2000-2010 Vita Nuova Holdings Limited
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License (`LGPL') as published by