summaryrefslogtreecommitdiff
path: root/appl/charon/lex.m
diff options
context:
space:
mode:
Diffstat (limited to 'appl/charon/lex.m')
-rw-r--r--appl/charon/lex.m105
1 files changed, 105 insertions, 0 deletions
diff --git a/appl/charon/lex.m b/appl/charon/lex.m
new file mode 100644
index 00000000..e6dfcb57
--- /dev/null
+++ b/appl/charon/lex.m
@@ -0,0 +1,105 @@
+Lex: module
+{
+ PATH: con "/dis/charon/lex.dis";
+
+ # HTML 4.0 tags (blink, nobr)
+ # sorted in lexical order; used as array indices
+ Notfound, Comment,
+ Ta, Tabbr, Tacronym, Taddress, Tapplet, Tarea, Tb,
+ Tbase, Tbasefont, Tbdo, Tbig, Tblink, Tblockquote, Tbody,
+ Tbq, Tbr, Tbutton, Tcaption, Tcenter, Tcite, Tcode, Tcol, Tcolgroup,
+ Tdd, Tdel, Tdfn, Tdir, Tdiv, Tdl, Tdt, Tem,
+ Tfieldset, Tfont, Tform, Tframe, Tframeset,
+ Th1, Th2, Th3, Th4, Th5, Th6, Thead, Thr, Thtml, Ti, Tiframe, Timage,
+ Timg, Tinput, Tins, Tisindex, Tkbd, Tlabel, Tlegend, Tli, Tlink, Tmap,
+ Tmenu, Tmeta, Tnobr, Tnoframes, Tnoscript,
+ Tobject, Tol, Toptgroup, Toption, Tp, Tparam, Tpre,
+ Tq, Ts, Tsamp, Tscript, Tselect, Tsmall, Tspan, Tstrike, Tstrong,
+ Tstyle, Tsub, Tsup, Ttable, Ttbody, Ttd, Ttextarea, Ttfoot, Tth,
+ Tthead, Ttitle, Ttr, Ttt, Tu, Tul, Tvar, Txmp,
+ Numtags
+ : con iota;
+ RBRA : con Numtags;
+ Data: con Numtags+RBRA;
+
+ tagnames: array of string;
+
+ # HTML 4.0 tag attributes
+ # Keep sorted in lexical order
+ Aabbr, Aaccept, Aaccept_charset, Aaccesskey, Aaction,
+ Aalign, Aalink, Aalt, Aarchive, Aaxis,
+ Abackground, Abgcolor, Aborder,
+ Acellpadding, Acellspacing, Achar, Acharoff,
+ Acharset, Achecked, Acite, Aclass, Aclassid, Aclear,
+ Acode, Acodebase, Acodetype,
+ Acolor, Acols, Acolspan, Acompact, Acontent, Acoords,
+ Adata, Adatafld, Adataformatas, Adatapagesize, Adatasrc,
+ Adatetime, Adeclare, Adefer, Adir, Adisabled,
+ Aenctype, Aevent,
+ Aface, Afor, Aframe, Aframeborder,
+ Aheaders, Aheight, Ahref, Ahreflang, Ahspace, Ahttp_equiv,
+ Aid, Aismap, Alabel, Alang, Alanguage, Alink, Alongdesc, Alowsrc,
+ Amarginheight, Amarginwidth, Amaxlength, Amedia, Amethod, Amultiple,
+ Aname, Anohref, Anoresize, Anoshade, Anowrap, Aobject,
+ Aonabort, Aonblur, Aonchange, Aonclick, Aondblclick,
+ Aonerror, Aonfocus, Aonkeydown, Aonkeypress, Aonkeyup, Aonload,
+ Aonmousedown, Aonmousemove, Aonmouseout, Aonmouseover,
+ Aonmouseup, Aonreset, Aonresize, Aonselect, Aonsubmit, Aonunload,
+ Aprofile, Aprompt, Areadonly, Arel, Arev, Arows, Arowspan, Arules,
+ Ascheme, Ascope, Ascrolling, Aselected, Ashape, Asize,
+ Aspan, Asrc, Astandby, Astart, Astyle, Asummary,
+ Atabindex, Atarget, Atext, Atitle, Atype, Ausemap,
+ Avalign, Avalue, Avaluetype, Aversion, Avlink, Avspace, Awidth,
+ Numattrs
+ : con iota;
+
+ attrnames: array of string;
+
+ Token: adt
+ {
+ tag: int;
+ text: string; # text in Data, attribute text in tag
+ attr: list of Attr;
+
+ aval: fn(t: self ref Token, attid: int) : (int, string);
+ tostring: fn(t: self ref Token) : string;
+ };
+
+ Attr: adt
+ {
+ attid: int;
+ value: string;
+ };
+
+ # A source of HTML tokens.
+ # After calling new with a ByteSource (which is past 'gethdr' stage),
+ # call gettoks repeatedly until get nil. Errors are signalled by exceptions.
+ # Possible exceptions raised:
+ # EXInternal (start, gettoks)
+ # exGeterror (gettoks)
+ # exAbort (gettoks)
+ TokenSource: adt
+ {
+ b: ref CharonUtils->ByteSource;
+ chset: Btos; # charset converter
+ state : ref TSstate;
+ mtype: int; # CU->TextHtml or CU->TextPlain
+ inxmp: int;
+
+ new: fn(b: ref CharonUtils->ByteSource, chset : Btos, mtype: int) : ref TokenSource;
+ gettoks: fn(ts: self ref TokenSource) : array of ref Token;
+ setchset: fn(ts: self ref TokenSource, conv : Btos);
+ };
+
+ TSstate : adt {
+ bi : int;
+ prevbi : int;
+ s : string;
+ si : int;
+ csstate : Convcs->State;
+ prevcsstate : Convcs->State;
+ };
+
+
+ init: fn(cu: CharonUtils);
+};