diff options
Diffstat (limited to 'appl/charon/lex.m')
| -rw-r--r-- | appl/charon/lex.m | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/appl/charon/lex.m b/appl/charon/lex.m new file mode 100644 index 00000000..e6dfcb57 --- /dev/null +++ b/appl/charon/lex.m @@ -0,0 +1,105 @@ +Lex: module +{ + PATH: con "/dis/charon/lex.dis"; + + # HTML 4.0 tags (blink, nobr) + # sorted in lexical order; used as array indices + Notfound, Comment, + Ta, Tabbr, Tacronym, Taddress, Tapplet, Tarea, Tb, + Tbase, Tbasefont, Tbdo, Tbig, Tblink, Tblockquote, Tbody, + Tbq, Tbr, Tbutton, Tcaption, Tcenter, Tcite, Tcode, Tcol, Tcolgroup, + Tdd, Tdel, Tdfn, Tdir, Tdiv, Tdl, Tdt, Tem, + Tfieldset, Tfont, Tform, Tframe, Tframeset, + Th1, Th2, Th3, Th4, Th5, Th6, Thead, Thr, Thtml, Ti, Tiframe, Timage, + Timg, Tinput, Tins, Tisindex, Tkbd, Tlabel, Tlegend, Tli, Tlink, Tmap, + Tmenu, Tmeta, Tnobr, Tnoframes, Tnoscript, + Tobject, Tol, Toptgroup, Toption, Tp, Tparam, Tpre, + Tq, Ts, Tsamp, Tscript, Tselect, Tsmall, Tspan, Tstrike, Tstrong, + Tstyle, Tsub, Tsup, Ttable, Ttbody, Ttd, Ttextarea, Ttfoot, Tth, + Tthead, Ttitle, Ttr, Ttt, Tu, Tul, Tvar, Txmp, + Numtags + : con iota; + RBRA : con Numtags; + Data: con Numtags+RBRA; + + tagnames: array of string; + + # HTML 4.0 tag attributes + # Keep sorted in lexical order + Aabbr, Aaccept, Aaccept_charset, Aaccesskey, Aaction, + Aalign, Aalink, Aalt, Aarchive, Aaxis, + Abackground, Abgcolor, Aborder, + Acellpadding, Acellspacing, Achar, Acharoff, + Acharset, Achecked, Acite, Aclass, Aclassid, Aclear, + Acode, Acodebase, Acodetype, + Acolor, Acols, Acolspan, Acompact, Acontent, Acoords, + Adata, Adatafld, Adataformatas, Adatapagesize, Adatasrc, + Adatetime, Adeclare, Adefer, Adir, Adisabled, + Aenctype, Aevent, + Aface, Afor, Aframe, Aframeborder, + Aheaders, Aheight, Ahref, Ahreflang, Ahspace, Ahttp_equiv, + Aid, Aismap, Alabel, Alang, Alanguage, Alink, Alongdesc, Alowsrc, + Amarginheight, Amarginwidth, Amaxlength, Amedia, Amethod, Amultiple, + Aname, Anohref, Anoresize, Anoshade, Anowrap, Aobject, + Aonabort, Aonblur, Aonchange, Aonclick, Aondblclick, + Aonerror, Aonfocus, Aonkeydown, Aonkeypress, Aonkeyup, Aonload, + Aonmousedown, Aonmousemove, Aonmouseout, Aonmouseover, + Aonmouseup, Aonreset, Aonresize, Aonselect, Aonsubmit, Aonunload, + Aprofile, Aprompt, Areadonly, Arel, Arev, Arows, Arowspan, Arules, + Ascheme, Ascope, Ascrolling, Aselected, Ashape, Asize, + Aspan, Asrc, Astandby, Astart, Astyle, Asummary, + Atabindex, Atarget, Atext, Atitle, Atype, Ausemap, + Avalign, Avalue, Avaluetype, Aversion, Avlink, Avspace, Awidth, + Numattrs + : con iota; + + attrnames: array of string; + + Token: adt + { + tag: int; + text: string; # text in Data, attribute text in tag + attr: list of Attr; + + aval: fn(t: self ref Token, attid: int) : (int, string); + tostring: fn(t: self ref Token) : string; + }; + + Attr: adt + { + attid: int; + value: string; + }; + + # A source of HTML tokens. + # After calling new with a ByteSource (which is past 'gethdr' stage), + # call gettoks repeatedly until get nil. Errors are signalled by exceptions. + # Possible exceptions raised: + # EXInternal (start, gettoks) + # exGeterror (gettoks) + # exAbort (gettoks) + TokenSource: adt + { + b: ref CharonUtils->ByteSource; + chset: Btos; # charset converter + state : ref TSstate; + mtype: int; # CU->TextHtml or CU->TextPlain + inxmp: int; + + new: fn(b: ref CharonUtils->ByteSource, chset : Btos, mtype: int) : ref TokenSource; + gettoks: fn(ts: self ref TokenSource) : array of ref Token; + setchset: fn(ts: self ref TokenSource, conv : Btos); + }; + + TSstate : adt { + bi : int; + prevbi : int; + s : string; + si : int; + csstate : Convcs->State; + prevcsstate : Convcs->State; + }; + + + init: fn(cu: CharonUtils); +}; |
