implement Build; include "common.m"; # local copies from CU sys: Sys; CU: CharonUtils; ByteSource, CImage, ImageCache, color, Nameval: import CU; D: Draw; Point, Rect, Image: import D; S: String; T: StringIntTab; C: Ctype; LX: Lex; RBRA, Token, TokenSource: import LX; U: Url; Parsedurl: import U; J: Script; ctype: array of byte; whitespace : con " \t\n\r"; notwhitespace : con "^ \t\n\r"; # These tables must be sorted align_tab := array[] of { T->StringInt ("baseline", int Abaseline), ("bottom", int Abottom), ("center", int Acenter), ("char", int Achar), ("justify", int Ajustify), ("left", int Aleft), ("middle", int Amiddle), ("right", int Aright), ("top", int Atop), }; input_tab := array[] of { T->StringInt ("button", Fbutton), ("checkbox", Fcheckbox), ("file", Ffile), ("hidden", Fhidden), ("image", Fimage), ("password", Fpassword), ("radio", Fradio), ("reset", Freset), ("submit", Fsubmit), ("text", Ftext), }; clear_tab := array[] of { T->StringInt ("all", IFcleft|IFcright), ("left", IFcleft), ("right", IFcright), }; fscroll_tab := array[] of { T->StringInt ("auto", FRhscrollauto|FRvscrollauto), ("no", FRnoscroll), ("yes", FRhscroll|FRvscroll), }; # blockbrk[tag] is break info for a block level element, or one # of a few others that get the same treatment re ending open paragraphs # and requiring a line break / vertical space before them. # If we want a line of space before the given element, SPBefore is OR'd in. # If we want a line of space after the given element, SPAfter is OR'd in. SPBefore: con byte 2; SPAfter: con byte 4; BL: con byte 1; BLBA: con BL|SPBefore|SPAfter; blockbrk := array[LX->Numtags] of { LX->Taddress => BLBA, LX->Tblockquote => BLBA, LX->Tcenter => BL, LX->Tdir => BLBA, LX->Tdiv => BL, LX->Tdd => BL, LX->Tdl => BLBA, LX->Tdt => BL, LX->Tform => BLBA, # headings and tables get breaks added manually LX->Th1 => BL, LX->Th2 => BL, LX->Th3 => BL, LX->Th4 => BL, LX->Th5 => BL, LX->Th6 => BL, LX->Thr => BL, LX->Tisindex => BLBA, LX->Tli => BL, LX->Tmenu => BLBA, LX->Tol => BLBA, LX->Tp => BLBA, LX->Tpre => BLBA, LX->Tul => BLBA, LX->Txmp => BLBA, * => byte 0 }; # attrinfo is information about attributes. # The AGEN value means that the attribute is generic (applies to almost all elements) AGEN: con byte 1; attrinfo := array[LX->Numattrs] of { LX->Aid => AGEN, LX->Aclass => AGEN, LX->Astyle => AGEN, LX->Atitle => AGEN, LX->Aonabort => AGEN, LX->Aonblur => AGEN, LX->Aonchange => AGEN, LX->Aonclick => AGEN, LX->Aondblclick => AGEN, LX->Aonerror => AGEN, LX->Aonfocus => AGEN, LX->Aonkeydown => AGEN, LX->Aonkeypress => AGEN, LX->Aonkeyup => AGEN, LX->Aonload => AGEN, LX->Aonmousedown => AGEN, LX->Aonmousemove => AGEN, LX->Aonmouseout => AGEN, LX->Aonmouseover => AGEN, LX->Aonmouseup => AGEN, LX->Aonreset => AGEN, LX->Aonresize => AGEN, LX->Aonselect => AGEN, LX->Aonsubmit => AGEN, LX->Aonunload => AGEN, * => byte 0 }; # Some constants FRKIDMARGIN: con 6; # default margin around kid frames IMGHSPACE: con 0; # default hspace for images (0 matches IE, Netscape) IMGVSPACE: con 0; # default vspace for images FLTIMGHSPACE: con 2; # default hspace for float images TABSP: con 2; # default cellspacing for tables TABPAD: con 2; # default cell padding for tables LISTTAB: con 1; # number of tabs to indent lists BQTAB: con 1; # number of tabs to indent blockquotes HRSZ: con 2; # thickness of horizontal rules SUBOFF: con 4; # vertical offset for subscripts SUPOFF: con 6; # vertical offset for superscripts NBSP: con ' '; # non-breaking space character dbg := 0; warn := 0; doscripts := 0; utf8 : Btos; latin1 : Btos; init(cu: CharonUtils) { CU = cu; sys = load Sys Sys->PATH; D = load Draw Draw->PATH; S = load String String->PATH;; T = load StringIntTab StringIntTab->PATH; U = load Url Url->PATH; if (U != nil) U->init(); C = cu->C; J = cu->J; LX = cu->LX; ctype = C->ctype; utf8 = CU->getconv("utf8"); latin1 = CU->getconv("latin1"); if (utf8 == nil || latin1 == nil) { sys->print("cannot load utf8 or latin1 charset converter\n"); raise "EXinternal:build init"; } dbg = int (CU->config).dbg['h']; warn = (int (CU->config).dbg['w']) || dbg; doscripts = (CU->config).doscripts && J != nil; } # Assume f has been reset, and then had any values from HTTP headers # filled in (e.g., base, chset). ItemSource.new(bs: ref ByteSource, f: ref Layout->Frame, mtype: int) : ref ItemSource { di := f.doc; # sys->print("chset = %s\n", di.chset); chset := CU->getconv(di.chset); if (chset == nil) chset = latin1; ts := TokenSource.new(bs, chset, mtype); psstk := list of { Pstate.new() }; if(mtype != CU->TextHtml) { ps := hd psstk; ps.curstate &= ~IFwrap; ps.literal = 1; pushfontstyle(ps, FntT); } return ref ItemSource(ts, mtype, di, f, psstk, 0, 0, 0, 0, nil, nil, nil, nil, nil, nil, nil); } ItemSource.getitems(is: self ref ItemSource) : ref Item { psstk := is.psstk; ps := hd psstk; # ps is always same as hd psstk curtab: ref Table = nil; # curtab is always same as hd is.tabstk if(is.tabstk != nil) curtab = hd is.tabstk; toks := is.toks; is.toks = nil; tokslen := len toks; toki := 0; di := is.doc; TokLoop: for(;; toki++) { if(toki >= tokslen) { outerps := lastps(psstk); if(outerps.items.next != nil) break; toks = is.ts.gettoks(); tokslen = len toks; if(dbg) sys->print("build: got %d tokens from token source\n", tokslen); if(tokslen == 0) break; toki = 0; } tok := toks[toki]; if(dbg > 1) sys->print("build: curstate %ux, token %s\n", ps.curstate, tok.tostring()); tag := tok.tag; brk := byte 0; brksp := 0; if(tag < LX->Numtags) { brk = blockbrk[tag]; if((brk&SPBefore) != byte 0) brksp = 1; } else if(tag < LX->Numtags+RBRA) { brk = blockbrk[tag-RBRA]; if((brk&SPAfter) != byte 0) brksp = 1; } if(brk != byte 0) { addbrk(ps, brksp, 0); if(ps.inpar) { popjust(ps); ps.inpar = 0; } } # check common case first (Data), then case statement on tag if(tag == LX->Data) { # Lexing didn't pay attention to SGML record boundary rules: # \n after start tag or before end tag to be discarded. # (Lex has already discarded all \r's). # Some pages assume this doesn't happen in
text,
# so we won't do it if literal is true.
# BUG: won't discard \n before a start tag that begins
# the next bufferful of tokens.
s := tok.text;
if(!ps.literal) {
i := 0;
j := len s;
if(toki > 0) {
pt := toks[toki-1].tag;
# IE and Netscape both ignore this rule (contrary to spec)
# if previous tag was img
if(pt < LX->Numtags && pt != LX->Timg && j>0 && s[0]=='\n')
i++;
}
if(toki < tokslen-1) {
nt := toks[toki+1].tag;
if(nt >= RBRA && nt < LX->Numtags+RBRA && j>i && s[j-1]=='\n')
j--;
}
if(i>0 || j drop(s, whitespace);
if(s != "")
ps.skipwhite = 0;
}
if(s != "")
addtext(ps, s);
}
else case tag {
# Some abbrevs used in following DTD comments
# %text = #PCDATA
# | TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
# | EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
# | A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
# | INPUT | SELECT | TEXTAREA
# %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
# | BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
# %flow = (%text | %block)*
# %body.content = (%heading | %text | %block | ADDRESS)*
#
# Anchors are not supposed to be nested, but you sometimes see
# href anchors inside destination anchors.
LX->Ta =>
if(ps.curanchor != 0) {
if(warn)
sys->print("warning: nested or missing \n");
endanchor(ps, di.text);
}
name := aval(tok, LX->Aname);
href := aurlval(tok, LX->Ahref, nil, di.base);
target := astrval(tok, LX->Atarget, di.target);
ga := getgenattr(tok);
evl : list of Lex->Attr = nil;
if(ga != nil) {
evl = ga.events;
if(evl != nil && doscripts)
di.hasscripts = 1;
}
# ignore rel, rev, and title attrs
if(href != nil) {
di.anchors = ref Anchor(++is.nanchors, name, href, target, evl, 0) :: di.anchors;
ps.curanchor = is.nanchors;
ps.curfg = di.link;
ps.fgstk = ps.curfg :: ps.fgstk;
# underline, too
ps.ulstk = ULunder :: ps.ulstk;
ps.curul = ULunder;
}
if(name != nil) {
# add a null item to be destination
brkstate := ps.curstate & IFbrk;
additem(ps, Item.newspacer(ISPnull, 0), tok);
ps.curstate |= brkstate; # not quite right
di.dests = ref DestAnchor(++is.nanchors, name, ps.lastit) :: di.dests;
}
LX->Ta+RBRA =>
endanchor(ps, di.text);
#
# We can't do applets, so ignore PARAMS, and let
# the %text contents appear for the alternative rep
LX->Tapplet or LX->Tapplet+RBRA =>
if(warn && tag == LX->Tapplet)
sys->print("warning: