diff options
| author | Charles.Forsyth <devnull@localhost> | 2006-12-22 17:07:39 +0000 |
|---|---|---|
| committer | Charles.Forsyth <devnull@localhost> | 2006-12-22 17:07:39 +0000 |
| commit | 37da2899f40661e3e9631e497da8dc59b971cbd0 (patch) | |
| tree | cbc6d4680e347d906f5fa7fca73214418741df72 /appl/lib/parseman.b | |
| parent | 54bc8ff236ac10b3eaa928fd6bcfc0cdb2ba46ae (diff) | |
20060303a
Diffstat (limited to 'appl/lib/parseman.b')
| -rw-r--r-- | appl/lib/parseman.b | 805 |
1 files changed, 805 insertions, 0 deletions
diff --git a/appl/lib/parseman.b b/appl/lib/parseman.b new file mode 100644 index 00000000..3b682587 --- /dev/null +++ b/appl/lib/parseman.b @@ -0,0 +1,805 @@ +implement Parseman; + +include "sys.m"; +include "bufio.m"; +include "man.m"; + +sys: Sys; +bufio: Bufio; +Iobuf: import bufio; + +FONT_LITERAL: con -1; + +init(): string +{ + sys = load Sys Sys->PATH; + bufio = load Bufio Bufio->PATH; + if (bufio == nil) + return sys->sprint("cannot load module: %r"); + return nil; +} + +ParseState: adt[T] + for{ + T => + textwidth: fn(t: self T, text: Text): int; + }{ + metrics: Metrics; + ql: int; # quote Literal text + margin: int; + mstack: list of int; + istack: list of int; + indent: int; + ntlsetindent: int; #copy prevailindent to indent on n.t.l + prevailindent: int; + curfont: int; + curattr: int; + verbatim: int; + pspace: int; + curline: list of (int, Text); # most recent first + curwidth: int; + newpara: int; + heading: int; + igto: string; + link: string; + viewer: T; + setline: chan of list of (int, Text); + + # addstring() is simply an addtext() of the current font + addstring: fn(s: self ref ParseState, s: string); + addtext: fn(s: self ref ParseState, t: list of Text); + brk: fn(s: self ref ParseState); + paragraph: fn( s: self ref ParseState); +}; + +parseman[T](fd: ref Sys->FD, metrics: Metrics, ql: int, viewer: T, setline: chan of list of (int, Text)) + for{ + T => + textwidth: fn(t: self T, text: Text): int; + } +{ + iob := bufio->fopen(fd, Sys->OREAD); + state := ref ParseState[T](metrics, ql, 0, nil, nil, 0, 0, metrics.indent, FONT_ROMAN, 0, 0, 1, nil, 0, 1, 0, "", nil, viewer, setline); + while ((l := iob.gets('\n')) != nil) { + if (l[len l -1] == '\n') + l = l[0: len l - 1]; + if (state.igto != nil && state.igto != l) + continue; + state.igto = nil; + parseline(state, l); + } + state.pspace = 2; + state.pspace = 1; + state.paragraph(); + footer := Text(FONT_ROMAN, 0, "Inferno Manual", 0, nil); + textw := state.viewer.textwidth(footer); +#should do 'center' in addtext (state.justify = CENTER) + state.indent = (state.metrics.pagew - textw) / 2; + state.addtext(footer::nil); + state.brk(); + setline <- = nil; +} + +parseline[T](state: ref ParseState[T], t: string) + for{ + T => + textwidth: fn(t: self T, text: Text): int; + } +{ + if (t == nil) { + if (state.verbatim) { + blank := Text(state.curfont, state.curattr, "", 0, ""); + state.setline <- = (0, blank)::nil; + } else + state.paragraph(); + return; + } + ntlsetindent := state.ntlsetindent; + state.ntlsetindent = 0; + if (t[0] == '.' || t[0] == '\'') + parsemacro(state, t[1:]); + else { + state.addtext(parsetext(state, t)); + if (state.verbatim) + state.brk(); + } + if (ntlsetindent) { + state.indent = state.prevailindent; + if (state.curwidth + state.metrics.en > state.indent + state.margin) + state.brk(); + } +} + +parsemacro[T](state: ref ParseState[T], t: string) + for{ + T => + textwidth: fn(t: self T, text: Text): int; + } +{ + for (n := 0; n < len t && n < 2; n++) + if (t[n] == ' ' || t[n] == '\t') + break; + macro := t[0:n]; + params: list of string; + quote := 0; + param := 0; + esc := 0; + p := ""; + for (; n < len t; n++) { + if (esc) + esc = 0; + else { + case t[n] { + ' ' or '\t' => + if (!quote) { + if (param) { + params = p :: params; + p = ""; + param = 0; + } + continue; + } + '"' => + param = 1; + quote = !quote; + continue; + '\\' => + esc = 1; + } + } + param = 1; + p[len p] = t[n]; + } + if (param) + params = p :: params; + plist: list of string; + for (; params != nil; params = tl params) + plist = hd params :: plist; + params = plist; + + case macro { + "ig" => + igto := ".."; + if (params != nil) + igto = "." + hd params; + state.brk(); + state.igto = igto; + "sp" => + sp := "1"; + if(params != nil) + sp = hd params; + d := tval(state.metrics, sp, 'v'); + gap := d / state.metrics.V; + if (gap < 1) + gap = 1; + while (gap--) + state.paragraph(); + "br" => + state.brk(); + "nf" => + state.verbatim = 1; + "fi" => + state.verbatim = 0; + "ti" => + state.brk(); + #i := 0; + #if(params != nil) + # i = tval(state.metrics, hd params, 'n'); + #state.ntlsetindent = 1; + #state.prevailindent = i; + "in" => + state.brk(); + #i := 0; + #if(params != nil) + # i = tval(state.metrics, hd params, 'n'); + #state.indent = i; + #state.prevailindent = state.indent; + "1C" => + state.brk(); + # not implemented + "2C" => + state.brk(); + # not implemented + "BI" => + altattr(state, FONT_BOLD, FONT_ITALIC, params); + "BR" => + altattr(state, FONT_BOLD, FONT_ROMAN, params); + "IB" => + altattr(state, FONT_ITALIC, FONT_BOLD, params); + "IR" => + # need to determine link if params of valid form + state.link = convlink(params);; + altattr(state, FONT_ITALIC, FONT_ROMAN, params); + state.link = nil; + "RB" => + altattr(state, FONT_ROMAN, FONT_BOLD, params); + "RI" => + altattr(state, FONT_ROMAN, FONT_ITALIC, params); + "B" => + state.curfont = FONT_BOLD; + if (params != nil) { + for (; params != nil; params = tl params) { + textl := parsetext(state, hd params); + for (; textl != nil; textl = tl textl) + state.addtext(hd textl::nil); + } + state.curfont = FONT_ROMAN; + } + "I" => + state.curfont = FONT_ITALIC; + if (params != nil) { + for (; params != nil; params = tl params) { + textl := parsetext(state, hd params); + for (; textl != nil; textl = tl textl) + state.addtext(hd textl::nil); + } + state.curfont = FONT_ROMAN; + } + "SM"=> + state.curattr |= ATTR_SMALL; + if (params != nil) { + for (; params != nil; params = tl params) + state.addstring(hd params); + state.curattr &= ~ATTR_SMALL; + } + "L" => + state.curfont = FONT_LITERAL; + if (params != nil) { + str := "`"; + for (pl := params; pl != nil;) { + str += hd pl; + if ((pl = tl pl) != nil) + str += " "; + else + break; + } + str += "'"; + state.addstring(str); + state.curfont = FONT_ROMAN; + } + "LR" => + if (params != nil) { + l := Text(FONT_LITERAL, state.curattr, hd params, 0, nil); + t: list of Text; + params = tl params; + if (params == nil) + t = l :: nil; + else { + r := Text(FONT_ROMAN, state.curattr, hd params, 0, nil); + t = l :: r :: nil; + } + state.addtext(t); + } + "RL" => + if (params != nil) { + r := Text(FONT_ROMAN, state.curattr, hd params, 0, nil); + t: list of Text; + params = tl params; + if (params == nil) + t = r :: nil; + else { + l := Text(FONT_LITERAL, state.curattr, hd params, 0, nil); + t = r :: l :: nil; + } + state.addtext(t); + } + "DT" => + # not yet supported + ; + "EE" => + state.brk(); + state.verbatim = 0; + state.curfont = FONT_ROMAN; + "EX" => + state.brk(); + state.verbatim = 1; + state.curfont = FONT_BOLD; + "HP" => + state.paragraph(); + i := state.metrics.indent; + if (params != nil) + i = tval(state.metrics, hd params, 'n'); + state.prevailindent = state.indent + i; + "IP" => + state.paragraph(); + i := state.metrics.indent; + if (params != nil) { + tag := hd params; + params = tl params; + state.addtext(parsetext(state, tag)); + if (params != nil) + i = tval(state.metrics, hd params, 'n'); + } + state.indent = state.metrics.indent + i; + state.prevailindent = state.indent; + "PD" => + state.pspace = 1; + if (params != nil) { + v := tval(state.metrics, hd params, 'v') / state.metrics.V; + state.pspace = v; + } + "LP" or "PP" => + state.paragraph(); + state.prevailindent = state.indent; + "RE" => + state.brk(); + if (state.mstack == nil || state.istack == nil) + break; + + state.margin = hd state.mstack; + state.mstack = tl state.mstack; + state.prevailindent = hd state.istack; + state.indent = state.prevailindent; + state.istack = tl state.istack; + "RS" => + state.brk(); + i := state.prevailindent - state.metrics.indent; + if (params != nil) + i = tval(state.metrics, hd params, 'n'); + state.mstack = state.margin :: state.mstack; + state.istack = state.prevailindent :: state.istack; + state.margin += i; + state.indent = 2 * state.metrics.indent; + state.prevailindent = state.indent; + "SH" => + state.paragraph(); + state.prevailindent = state.indent; + state.curfont = FONT_ROMAN; + state.curattr = 0; + state.indent = 0; + state.heading = 1; + state.verbatim = 0; + + for (pl := params; pl != nil; pl = tl pl) + state.addstring(hd pl); + + state.heading = 0; + state.brk(); + state.newpara = 1; + state.pspace = 1; + "SS" => + state.paragraph(); + state.prevailindent = state.indent; + state.curfont = FONT_ROMAN; + state.curattr = 0; + state.indent = state.metrics.ssindent; + state.heading = 2; + + for (pl := params; pl != nil; pl = tl pl) + state.addstring(hd pl); + + state.heading = 0; + state.brk(); + state.newpara = 1; + state.pspace = 1; + + "TF" => + state.brk(); + state.pspace = 0; + i := state.metrics.indent; + if (params != nil) { + str := hd params; + text := Text(FONT_BOLD, 0, str, 0, nil); + w := state.viewer.textwidth(text) + 2*state.metrics.em; + if (w > i) + i = w; + } + state.indent = state.metrics.indent;; + state.prevailindent = state.indent + i; + "TH" => + state.brk(); + if (len params < 2) + break; + str := hd params + "(" + hd tl params + ")"; + txt := Text(FONT_ROMAN, 0, str, 0, nil); + txtw := state.viewer.textwidth(txt); + state.indent = 0; + state.addtext(txt::nil); + state.indent = state.metrics.pagew - txtw; + state.addtext(txt::nil); + state.indent = 0; + state.brk(); + "TP" => + state.paragraph(); + if (state.prevailindent == state.metrics.indent) + state.prevailindent += state.metrics.indent; + state.indent = state.metrics.indent; + state.ntlsetindent = 1; + if (params != nil) { + i := tval(state.metrics, hd params, 'n'); + if (i == 0) + i = state.metrics.indent; + state.prevailindent = state.indent + i; + } + * => + ; + } + if (state.verbatim) + state.brk(); +} + +parsetext[T](state: ref ParseState[T], t: string): list of Text + for{ + T => + textwidth: fn(t: self T, text: Text): int; + } +{ + # need to do better here - spot inline font changes etc + # we also currently cannot support troff tab stops + textl: list of Text; + line := ""; + curfont := state.curfont; + prevfont := state.curfont; # should perhaps be in State + step := 1; + for (i := 0; i < len t; i += step) { + step = 1; + ch := t[i]; + if (ch == '\\') { + i++; + width := len t - i; + if (width <= 0) + break; + case t[i] { + '-' or '.' or '\\' => + ch = t[i]; + ' ' => + ch = ' '; + 'e' => + ch = '\\'; + '|' or '&' => + continue; + '(' => + if (width > 3) + width = 3; + step = width; + if (step != 3) + continue; + case t[i+1:i+3] { + "bu" => + ch = '•'; + "em" => + ch = '—'; + "mi" => + ch = '-'; + "mu" => + ch = '×'; + "*m" => + ch = 'µ'; + "*G" => + ch = 'Γ'; + "*p" => + ch = 'π'; + "*b" => + ch = 'β'; + "<=" => + ch = '≤'; + "->" => + ch = '→'; + * => + continue; + } + + 'f' => + if (width == 1) + continue; + if (t[i+1] == '(') { + if (width > 4) + width = 4; + step = width; + continue; + } + i++; + case t[i] { + '0' or 'R' => + curfont = FONT_ROMAN; + '1' or 'I' => + curfont = FONT_ITALIC; + '2' => + # should be bold but our 'bold' font is constant width + curfont = FONT_ROMAN; + '5' or 'L' => + curfont = FONT_BOLD; + 'P' => + curfont = prevfont; + } + continue; + '*' => + if (width == 1) + continue; + case t[i+1] { + 'R' => + step = 2; + ch = '®'; + '(' => + if (width > 4) + width = 4; + step = width; + continue; + } + * => + i--; + } + } + if (curfont != state.curfont) { + if (line != "") { + txt := Text(state.curfont, state.curattr, line, state.heading, state.link); + line = ""; + textl = txt :: textl; + } + prevfont = state.curfont; + state.curfont = curfont; + } + line[len line] = ch; + } + if (line != "") { + txt := Text(state.curfont, state.curattr, line, state.heading, state.link); + textl = txt :: textl; + } + state.curfont = curfont; + + r: list of Text; + for (; textl != nil; textl = tl textl) + r = hd textl :: r; + return r; +} + +ParseState[T].addstring(state: self ref ParseState[T], s: string) +{ + t := Text(state.curfont, state.curattr, s, state.heading, state.link); + state.addtext(t::nil); +} + +ParseState[T].addtext(state: self ref ParseState[T], t: list of Text) +{ +#dumptextlist(t); + # on setting a line copy state.prevailindent to state.indent + # + # always make sure that current indent is achieved + # + # if FONT_LITERAL and state.ql then convert to FONT_BOLD and + # quote the text before any other processing + + state.newpara = 0; + addspace := 1; + while (t != nil) { + # this scheme is inadequate... + # results in mixed formatting at end of line getting split up + # e.g. + # .IR man (1) + # can get split at the '(' + + indent := 0; + spacew := 0; + text := hd t; + t = tl t; + if (state.indent + state.margin > state.curwidth || state.curline == nil) { + indent = state.indent + state.margin; + state.curwidth = indent; + addspace = 0; + if (!state.verbatim) { + text.text = trim(text.text); + while (text.text == "" && t != nil) { + text = hd t; + t = tl t; + text.text = trim(text.text); + } + } + } + + if (text.font == FONT_LITERAL) { + if (state.ql) + text.text = "`" + text.text + "'"; + text.font = FONT_BOLD; + } + if (addspace) { + (nil, previtem) := hd state.curline; + if (previtem.text[len previtem.text -1] == ' ') + addspace = 0; + else { + space := Text(previtem.font, previtem.attr, " ", 0, nil); + spacew = state.viewer.textwidth(space); + } + } + # it doesn't fit - try to word wrap... + t2 := text; + end := len text.text; + prevend := end; + nextstart := 0; + while (end > 0) { + t2.text = text.text[0:end]; + tlen := state.viewer.textwidth(t2); + if (state.verbatim || state.curwidth + spacew + tlen <= state.metrics.pagew) { + # easy - just add it! + state.curwidth += spacew+tlen; + if (addspace) { + t2.text = " " + t2.text; + addspace = 0; + } + state.curline = (indent, t2) :: state.curline; + indent = 0; + break; + } + prevend = end; + for (; end > 0; end--) { + if (t2.text[end-1] == ' ') { + nextstart = end; + for (; end >0 && t2.text[end-1] == ' '; end--) + ; + break; + } + } + } + if (end != len text.text) { + # couldn't fit whole item onto line + if (state.curline == nil) { + # couldn't fit (sub)item on empty line - add it anyway + # as there is nowhere else to put it + end = prevend; + t2.text = text.text[0:end]; + state.curline = (indent, t2) :: state.curline; + if (nextstart != 0) { + text.text = text.text[nextstart:]; + t = text :: t; + } + } else { + # already stuff on line and we have consumed upto nexstart of + # the current item + if (end != 0) + text.text = text.text[nextstart:]; + t = text :: t; + } + state.brk(); + } + addspace = 0; + } +} + +trim(s: string): string +{ + for (spi :=0; spi < len s && s[spi] == ' '; spi++) + ; + return s[spi:]; +} + +ParseState[T].brk(state: self ref ParseState) +{ + if (state.curline != nil) { + line: list of (int, Text); + for (l := state.curline; l != nil; l = tl l) + line = hd l :: line; + state.setline <- = line; + state.curline = nil; + state.curwidth = 0; + } + state.indent = state.prevailindent; +} + +ParseState[T].paragraph(state: self ref ParseState) +{ + state.brk(); + if (state.newpara == 0) { + blank := Text(state.curfont, state.curattr, "", 0, ""); + for (i := 0; i < state.pspace; i++) + state.setline <- = (0, blank)::nil; + state.newpara = 1; + } + state.curattr = 0; + state.curfont = FONT_ROMAN; + state.indent = state.metrics.indent; +# state.prevailindent = state.indent; + state.ntlsetindent = 0; +} + +# convert troff 'values' into output 'dots' +tval(m: Metrics, v: string, defunits: int): int +{ + if (v == nil) + return 0; + units := v[len v -1]; + val: real; + + case units { + 'i' or + 'c' or + 'P' or + 'm' or + 'n' or + 'p' or + 'u' or + 'v' => + val = real v[0:len v - 1]; + * => + val = real v; + units = defunits; + } + r := 0; + case units { + 'i' => + r = int (real m.dpi * val); + 'c' => + r = int ((real m.dpi * val)/2.54); + 'P' => + r = int ((real m.dpi * val)/ 6.0); + 'm' => + r = int (real m.em * val); + 'n' => + r = int (real m.en * val); + 'p' => + r = int ((real m.dpi * val)/72.0); + 'u' => + r = int val; + 'v' => + r = int (real m.V * val); + } + return r; +} + +altattr[T](state: ref ParseState[T], f1, f2: int, strs: list of string) + for{ + T => + textwidth: fn(t: self T, text: Text): int; + } +{ + index := 0; + textl: list of Text; + + prevfont := state.curfont; + for (; strs != nil; strs = tl strs) { + str := hd strs; + f := f1; + if (index++ & 1) + f = f2; + state.curfont = f; + newtext := parsetext(state, str); + for (; newtext != nil; newtext = tl newtext) + textl = hd newtext :: textl; + } + orderedtext: list of Text; + for (; textl != nil; textl = tl textl) + orderedtext = hd textl :: orderedtext; + state.addtext(orderedtext); + state.curfont = prevfont; +} + +dumptextlist(t: list of Text) +{ + sys->print("textlist["); + for (; t != nil; t = tl t) { + s := hd t; + sys->print("(%s)", s.text); + } + sys->print("]\n"); +} + +convlink(params: list of string): string +{ + # merge the texts + s := ""; + for (; params != nil; params = tl params) + s = s + (hd params); + + for (i := 0; i < len s; i ++) + if (s[i] == '(') + break; + if (i+1 >= len s) + return nil; + cmd := s[0:i]; + i++; + s = s[i:]; + for (i = 0; i < len s; i++) + if (s[i] == ')') + break; + section := s[0:i]; + if (section == nil || !isint(section)) + return nil; + + return section + " " + cmd; +} + +isint(s: string): int +{ + for (i := 0; i < len s; i++) + if (s[i] != '.' && (s[i] < '0' || s[i] > '9')) + return 0; + return 1; +} |
