summaryrefslogtreecommitdiff
path: root/appl/svc
diff options
context:
space:
mode:
authorforsyth <forsyth@vitanuova.com>2010-09-25 14:04:43 +0100
committerforsyth <forsyth@vitanuova.com>2010-09-25 14:04:43 +0100
commit35f503c642e9dd127a2b989e4e12a10691cba3d4 (patch)
treeacb0a1b5b1d3f242345b5e99d807bfd53ae99fbe /appl/svc
parent9d79870ef4c3926878bf998c10a08c308dc3bd74 (diff)
20100925-1403
Diffstat (limited to 'appl/svc')
-rw-r--r--appl/svc/httpd/httpd.b5
-rw-r--r--appl/svc/httpd/httpd.m5
-rw-r--r--appl/svc/httpd/parser.b354
-rw-r--r--appl/svc/httpd/parser.m1
4 files changed, 45 insertions, 320 deletions
diff --git a/appl/svc/httpd/httpd.b b/appl/svc/httpd/httpd.b
index e8cf84ea..db570a74 100644
--- a/appl/svc/httpd/httpd.b
+++ b/appl/svc/httpd/httpd.b
@@ -212,7 +212,6 @@ service_req(nc : Sys->Connection)
g.dbg_log=dbg_log;
g.logfile = logfile;
g.modtime=0;
- g.entity = parser->initarray();
g.mydomain = my_domain;
g.version = "HTTP/1.0";
g.cache = cache;
@@ -715,7 +714,7 @@ getendpoint(dir, file: string): (string, string)
getendpoints(dir: string): string
{
- (lsys, lserv) := getendpoint(dir, "local");
- (rsys, rserv) := getendpoint(dir, "remote");
+# (lsys, lserv) := getendpoint(dir, "local");
+ (rsys, nil) := getendpoint(dir, "remote");
return rsys;
}
diff --git a/appl/svc/httpd/httpd.m b/appl/svc/httpd/httpd.m
index e96008e9..7d4c0dcf 100644
--- a/appl/svc/httpd/httpd.m
+++ b/appl/svc/httpd/httpd.m
@@ -1,8 +1,4 @@
Httpd: module {
- Entity: adt{
- name : string;
- value : int;
- };
Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax,
BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : con iota;
@@ -35,7 +31,6 @@ Httpd: module {
wordval : string;
tok,parse_eof : int;
mydomain,client : string;
- entity: array of Entity;
oklang : list of ref Contents->Content;
};
Request: adt {
diff --git a/appl/svc/httpd/parser.b b/appl/svc/httpd/parser.b
index a109d022..14b9bf8e 100644
--- a/appl/svc/httpd/parser.b
+++ b/appl/svc/httpd/parser.b
@@ -14,7 +14,7 @@ include "contents.m";
Content: import contents;
include "cache.m";
include "httpd.m";
- Entity, Private_info: import Httpd;
+ Private_info: import Httpd;
Internal, TempFail, Unimp, UnkVers, BadCont, BadReq, Syntax,
BadSearch, NotFound, NoSearch , OnlySearch, Unauth, OK : import Httpd;
include "parser.m";
@@ -61,236 +61,6 @@ errormsg := array[] of {
OK => Error("200 OK", "everything is fine","Groovy man"),
};
-latin1 := array[] of {
- '¡',
- '¢',
- '£',
- '¤',
- '¥',
- '¦',
- '§',
- '¨',
- '©',
- 'ª',
- '«',
- '¬',
- '­',
- '®',
- '¯',
- '°',
- '±',
- '²',
- '³',
- '´',
- 'µ',
- '¶',
- '·',
- '¸',
- '¹',
- 'º',
- '»',
- '¼',
- '½',
- '¾',
- '¿',
- 'À',
- 'Á',
- 'Â',
- 'Ã',
- 'Ä',
- 'Å',
- 'Æ',
- 'Ç',
- 'È',
- 'É',
- 'Ê',
- 'Ë',
- 'Ì',
- 'Í',
- 'Î',
- 'Ï',
- 'Ð',
- 'Ñ',
- 'Ò',
- 'Ó',
- 'Ô',
- 'Õ',
- 'Ö',
- '×',
- 'Ø',
- 'Ù',
- 'Ú',
- 'Û',
- 'Ü',
- 'Ý',
- 'Þ',
- 'ß',
- 'à',
- 'á',
- 'â',
- 'ã',
- 'ä',
- 'å',
- 'æ',
- 'ç',
- 'è',
- 'é',
- 'ê',
- 'ë',
- 'ì',
- 'í',
- 'î',
- 'ï',
- 'ð',
- 'ñ',
- 'ò',
- 'ó',
- 'ô',
- 'õ',
- 'ö',
- '÷',
- 'ø',
- 'ù',
- 'ú',
- 'û',
- 'ü',
- 'ý',
- 'þ',
- 'ÿ',
- 0,
-};
-
-entities :=array[] of {
- Entity( "&#161;", '¡' ),
- Entity( "&#162;", '¢' ),
- Entity( "&#163;", '£' ),
- Entity( "&#164;", '¤' ),
- Entity( "&#165;", '¥' ),
- Entity( "&#166;", '¦' ),
- Entity( "&#167;", '§' ),
- Entity( "&#168;", '¨' ),
- Entity( "&#169;", '©' ),
- Entity( "&#170;", 'ª' ),
- Entity( "&#171;", '«' ),
- Entity( "&#172;", '¬' ),
- Entity( "&#173;", '­' ),
- Entity( "&#174;", '®' ),
- Entity( "&#175;", '¯' ),
- Entity( "&#176;", '°' ),
- Entity( "&#177;", '±' ),
- Entity( "&#178;", '²' ),
- Entity( "&#179;", '³' ),
- Entity( "&#180;", '´' ),
- Entity( "&#181;", 'µ' ),
- Entity( "&#182;", '¶' ),
- Entity( "&#183;", '·' ),
- Entity( "&#184;", '¸' ),
- Entity( "&#185;", '¹' ),
- Entity( "&#186;", 'º' ),
- Entity( "&#187;", '»' ),
- Entity( "&#188;", '¼' ),
- Entity( "&#189;", '½' ),
- Entity( "&#190;", '¾' ),
- Entity( "&#191;", '¿' ),
- Entity( "&Agrave;", 'À' ),
- Entity( "&Aacute;", 'Á' ),
- Entity( "&Acirc;", 'Â' ),
- Entity( "&Atilde;", 'Ã' ),
- Entity( "&Auml;", 'Ä' ),
- Entity( "&Aring;", 'Å' ),
- Entity( "&AElig;", 'Æ' ),
- Entity( "&Ccedil;", 'Ç' ),
- Entity( "&Egrave;", 'È' ),
- Entity( "&Eacute;", 'É' ),
- Entity( "&Ecirc;", 'Ê' ),
- Entity( "&Euml;", 'Ë' ),
- Entity( "&Igrave;", 'Ì' ),
- Entity( "&Iacute;", 'Í' ),
- Entity( "&Icirc;", 'Î' ),
- Entity( "&Iuml;", 'Ï' ),
- Entity( "&ETH;", 'Ð' ),
- Entity( "&Ntilde;", 'Ñ' ),
- Entity( "&Ograve;", 'Ò' ),
- Entity( "&Oacute;", 'Ó' ),
- Entity( "&Ocirc;", 'Ô' ),
- Entity( "&Otilde;", 'Õ' ),
- Entity( "&Ouml;", 'Ö' ),
- Entity( "&215;", '×' ),
- Entity( "&Oslash;", 'Ø' ),
- Entity( "&Ugrave;", 'Ù' ),
- Entity( "&Uacute;", 'Ú' ),
- Entity( "&Ucirc;", 'Û' ),
- Entity( "&Uuml;", 'Ü' ),
- Entity( "&Yacute;", 'Ý' ),
- Entity( "&THORN;", 'Þ' ),
- Entity( "&szlig;", 'ß' ),
- Entity( "&agrave;", 'à' ),
- Entity( "&aacute;", 'á' ),
- Entity( "&acirc;", 'â' ),
- Entity( "&atilde;", 'ã' ),
- Entity( "&auml;", 'ä' ),
- Entity( "&aring;", 'å' ),
- Entity( "&aelig;", 'æ' ),
- Entity( "&ccedil;", 'ç' ),
- Entity( "&egrave;", 'è' ),
- Entity( "&eacute;", 'é' ),
- Entity( "&ecirc;", 'ê' ),
- Entity( "&euml;", 'ë' ),
- Entity( "&igrave;", 'ì' ),
- Entity( "&iacute;", 'í' ),
- Entity( "&icirc;", 'î' ),
- Entity( "&iuml;", 'ï' ),
- Entity( "&eth;", 'ð' ),
- Entity( "&ntilde;", 'ñ' ),
- Entity( "&ograve;", 'ò' ),
- Entity( "&oacute;", 'ó' ),
- Entity( "&ocirc;", 'ô' ),
- Entity( "&otilde;", 'õ' ),
- Entity( "&ouml;", 'ö' ),
- Entity( "&247;", '÷' ),
- Entity( "&oslash;", 'ø' ),
- Entity( "&ugrave;", 'ù' ),
- Entity( "&uacute;", 'ú' ),
- Entity( "&ucirc;", 'û' ),
- Entity( "&uuml;", 'ü' ),
- Entity( "&yacute;", 'ý' ),
- Entity( "&thorn;", 'þ' ),
- Entity( "&yuml;", 'ÿ' ),
-
- Entity( "&#SPACE;", ' ' ),
- Entity( "&#RS;", '\n' ),
- Entity( "&#RE;", '\r' ),
- Entity( "&quot;", '"' ),
- Entity( "&amp;", '&' ),
- Entity( "&lt;", '<' ),
- Entity( "&gt;", '>' ),
-
- Entity( "CAP-DELTA", 'Δ' ),
- Entity( "ALPHA", 'α' ),
- Entity( "BETA", 'β' ),
- Entity( "DELTA", 'δ' ),
- Entity( "EPSILON", 'ε' ),
- Entity( "THETA", 'θ' ),
- Entity( "MU", 'μ' ),
- Entity( "PI", 'π' ),
- Entity( "TAU", 'τ' ),
- Entity( "CHI", 'χ' ),
-
- Entity( "<-", '←' ),
- Entity( "^", '↑' ),
- Entity( "->", '→' ),
- Entity( "v", '↓' ),
- Entity( "!=", '≠' ),
- Entity( "<=", '≤' ),
- Entity( nil, 0 ),
- };
-
-
-initarray() : array of Entity
-{
- return entities;
-}
-
badmodule(p: string)
{
sys->fprint(sys->fildes(2), "parse: cannot load %s: %r", p);
@@ -690,93 +460,51 @@ getc(g: ref Private_info): int
return c & 16r7f;
}
-ungetc(g: ref Private_info) {
+ungetc(g: ref Private_info)
+{
# this is a dirty hack, I am tacitly assuming that characters read
# from stdin will be ASCII.....
g.bufio->g.bin.ungetc();
}
-# go from url with latin1 and escapes to utf
+# go from url with ascii and %xx escapes to unicode, allowing for existing unencoded utf-8
urlunesc(s : string): string
{
- c, n : int;
- t : string;
- for(i := 0;i<len s ; i++){
- c = int s[i];
- if(c == '%'){
- n = int s[i+1];
- if(n >= '0' && n <= '9')
- n = n - '0';
- else if(n >= 'A' && n <= 'F')
- n = n - 'A' + 10;
- else if(n >= 'a' && n <= 'f')
- n = n - 'a' + 10;
- else
- break;
- c = n;
- n = int s[i+2];
- if(n >= '0' && n <= '9')
- n = n - '0';
- else if(n >= 'A' && n <= 'F')
- n = n - 'A' + 10;
- else if(n >= 'a' && n <= 'f')
- n = n - 'a' + 10;
- else
- break;
- i += 2;
- c = c * 16 + n;
- }
- else if( c == '+' )
- c = ' ';
- t[len t] = c;
+ a := array[Sys->UTFmax*len s] of byte;
+ o := 0;
+ for(i := 0; i < len s; i++){
+ c := int s[i];
+ if(c < Runeself){
+ if(c == '%' && i+2 < len s){
+ d0 := hex(int s[i+1]);
+ if(d0 >= 0){
+ d1 := hex(int s[i+2]);
+ if(d1 >= 0){
+ i += 2;
+ c = d0*16 + d1;
+ }
+ }
+ } else if(c == '+' || c == 0)
+ c = ' ';
+ a[o++] = byte c;
+ }else
+ o += sys->char2byte(c, a, o);
}
- return t;
+ return string a[0: o];
}
-
-# go from http with latin1 escapes to utf,
-# we assume that anything >= Runeself is already in utf
-
-httpunesc(g: ref Private_info,s : array of byte): string
+hex(c: int): int
{
- t,v: string;
- c,i : int;
- # convert bytes to a string.
- v = string s;
- for(i=0; i < len v;i++){
- c = v[i];
- if(c == '&'){
- if(v[1] == '#' && v[2] && v[3] && v[4] && v[5] == ';'){
- c = 100*(v[2])+10*(v[3])+(v[4]);
- if(c < Runeself){
- t[len t] = c;
- i += 6;
- continue;
- }
- if(c < 256 && c >= 161){
- t[len t] = g.entity[c-161].value;
- i += 6;
- continue;
- }
- } else {
- for(j:= 0;g.entity[j].name != nil; j++)
- if(g.entity[j].name == v[i+1:])
- # problem here cvert array of byte to string?
- break;
- if(g.entity[j].name != nil){
- i += len g.entity[j].name;
- t[len t] = g.entity[j].value;
- continue;
- }
- }
- }
- t[len t] = c;
- }
- return t;
+ if(c >= '0' && c <= '9')
+ return c-'0';
+ if(c >= 'a' && c <= 'f')
+ return c-'a' + 10;
+ if(c >= 'A' && c <= 'F')
+ return c-'A' + 10;
+ return -1;
}
-
# write a failure message to the net and exit
fail(g: ref Private_info,reason : int, message : string)
{
@@ -845,14 +573,18 @@ logit(g: ref Private_info,message : string )
urlconv(p : string): string
{
- c : int;
- t : string;
- for(i:=0;i<len p ;i++){
- c = p[i];
+ a := array[Sys->UTFmax] of byte;
+ t := "";
+ for(i := 0; i < len p; i++){
+ c := p[i];
if(c == 0)
- break;
- if(c <= ' ' || c == '%' || c >= Runeself){
- t += sys->sprint("%%%2.2x", c);
+ continue; # ignore nul bytes
+ if(c >= Runeself){ # convert to UTF-8
+ n := sys->char2byte(c, a, 0);
+ for(j := 0; j < n; j++)
+ t += sys->sprint("%%%.2X", int a[j]);
+ }else if(c <= ' ' || c == '%'){
+ t += sys->sprint("%%%2.2X", c);
} else {
t[len t] = c;
}
diff --git a/appl/svc/httpd/parser.m b/appl/svc/httpd/parser.m
index 0c1a5829..3d50237a 100644
--- a/appl/svc/httpd/parser.m
+++ b/appl/svc/httpd/parser.m
@@ -5,7 +5,6 @@ Parser: module {
PATH: con "/dis/svc/httpd/parser.dis";
init: fn();
- initarray: fn(): array of Httpd->Entity;
urlunesc: fn(s: string): string;
fail: fn(g: ref Httpd->Private_info,reason: int, message: string);
logit: fn(g: ref Httpd->Private_info, message: string );