implement Man2html;
include "sys.m";
stderr: ref Sys->FD;
sys: Sys;
print, fprint, sprint: import sys;
include "bufio.m";
include "draw.m";
include "daytime.m";
dt: Daytime;
include "string.m";
str: String;
Man2html: module
{
init: fn(ctxt: ref Draw->Context, args: list of string);
};
Runeself: con 16r80;
false, true: con iota;
Troffspec: adt {
name: string;
value: string;
};
tspec := array [] of { Troffspec
("ff", "ff"),
("fi", "fi"),
("fl", "fl"),
("Fi", "ffi"),
("ru", "_"),
("em", "—"),
("14", "¼"),
("12", "½"),
("co", "©"),
("de", "°"),
("dg", "¡"),
("fm", "´"),
("rg", "®"),
# ("bu", "*"),
("bu", "•"),
("sq", "¤"),
("hy", "-"),
("pl", "+"),
("mi", "-"),
("mu", "×"),
("di", "÷"),
("eq", "="),
("==", "=="),
(">=", ">="),
("<=", "<="),
("!=", "!="),
("+-", "±"),
("no", "¬"),
("sl", "/"),
("ap", "&"),
("~=", "~="),
("pt", "oc"),
("gr", "GRAD"),
("->", "->"),
("<-", "<-"),
("ua", "^"),
("da", "v"),
("is", "Integral"),
("pd", "DIV"),
("if", "oo"),
("sr", "-/"),
("sb", "(~"),
("sp", "~)"),
("cu", "U"),
("ca", "(^)"),
("ib", "(="),
("ip", "=)"),
("mo", "C"),
("es", "Ø"),
("aa", "´"),
("ga", "`"),
("ci", "O"),
("L1", "Lucent"),
("sc", "§"),
("dd", "++"),
("lh", "<="),
("rh", "=>"),
("lt", "("),
("rt", ")"),
("lc", "|"),
("rc", "|"),
("lb", "("),
("rb", ")"),
("lf", "|"),
("rf", "|"),
("lk", "|"),
("rk", "|"),
("bv", "|"),
("ts", "s"),
("br", "|"),
("or", "|"),
("ul", "_"),
("rn", " "),
("*p", "PI"),
("**", "*"),
};
Entity: adt {
name: string;
value: int;
};
Entities: array of Entity;
Entities = array[] of {
Entity( "¡", '¡' ),
Entity( "¢", '¢' ),
Entity( "£", '£' ),
Entity( "¤", '¤' ),
Entity( "¥", '¥' ),
Entity( "¦", '¦' ),
Entity( "§", '§' ),
Entity( "¨", '¨' ),
Entity( "©", '©' ),
Entity( "ª", 'ª' ),
Entity( "«", '«' ),
Entity( "¬", '¬' ),
Entity( "", '' ),
Entity( "®", '®' ),
Entity( "¯", '¯' ),
Entity( "°", '°' ),
Entity( "±", '±' ),
Entity( "²", '²' ),
Entity( "³", '³' ),
Entity( "´", '´' ),
Entity( "µ", 'µ' ),
Entity( "¶", '¶' ),
Entity( "·", '·' ),
Entity( "¸", '¸' ),
Entity( "¹", '¹' ),
Entity( "º", 'º' ),
Entity( "»", '»' ),
Entity( "¼", '¼' ),
Entity( "½", '½' ),
Entity( "¾", '¾' ),
Entity( "¿", '¿' ),
Entity( "À", 'À' ),
Entity( "Á", 'Á' ),
Entity( "Â", 'Â' ),
Entity( "Ã", 'Ã' ),
Entity( "Ä", 'Ä' ),
Entity( "Å", 'Å' ),
Entity( "Æ", 'Æ' ),
Entity( "Ç", 'Ç' ),
Entity( "È", 'È' ),
Entity( "É", 'É' ),
Entity( "Ê", 'Ê' ),
Entity( "Ë", 'Ë' ),
Entity( "Ì", 'Ì' ),
Entity( "Í", 'Í' ),
Entity( "Î", 'Î' ),
Entity( "Ï", 'Ï' ),
Entity( "Ð", 'Ð' ),
Entity( "Ñ", 'Ñ' ),
Entity( "Ò", 'Ò' ),
Entity( "Ó", 'Ó' ),
Entity( "Ô", 'Ô' ),
Entity( "Õ", 'Õ' ),
Entity( "Ö", 'Ö' ),
Entity( "&215;", '×' ),
Entity( "Ø", 'Ø' ),
Entity( "Ù", 'Ù' ),
Entity( "Ú", 'Ú' ),
Entity( "Û", 'Û' ),
Entity( "Ü", 'Ü' ),
Entity( "Ý", 'Ý' ),
Entity( "Þ", 'Þ' ),
Entity( "ß", 'ß' ),
Entity( "à", 'à' ),
Entity( "á", 'á' ),
Entity( "â", 'â' ),
Entity( "ã", 'ã' ),
Entity( "ä", 'ä' ),
Entity( "å", 'å' ),
Entity( "æ", 'æ' ),
Entity( "ç", 'ç' ),
Entity( "è", 'è' ),
Entity( "é", 'é' ),
Entity( "ê", 'ê' ),
Entity( "ë", 'ë' ),
Entity( "ì", 'ì' ),
Entity( "í", 'í' ),
Entity( "î", 'î' ),
Entity( "ï", 'ï' ),
Entity( "ð", 'ð' ),
Entity( "ñ", 'ñ' ),
Entity( "ò", 'ò' ),
Entity( "ó", 'ó' ),
Entity( "ô", 'ô' ),
Entity( "õ", 'õ' ),
Entity( "ö", 'ö' ),
Entity( "&247;", '÷' ),
Entity( "ø", 'ø' ),
Entity( "ù", 'ù' ),
Entity( "ú", 'ú' ),
Entity( "û", 'û' ),
Entity( "ü", 'ü' ),
Entity( "ý", 'ý' ),
Entity( "þ", 'þ' ),
Entity( "ÿ", 'ÿ' ), # ÿ
Entity( "SPACE;", ' ' ),
Entity( "RS;", '\n' ),
Entity( "RE;", '\r' ),
Entity( """, '"' ),
Entity( "&", '&' ),
Entity( "<", '<' ),
Entity( ">", '>' ),
Entity( "CAP-DELTA", 'Δ' ),
Entity( "ALPHA", 'α' ),
Entity( "BETA", 'β' ),
Entity( "DELTA", 'δ' ),
Entity( "EPSILON", 'ε' ),
Entity( "THETA", 'θ' ),
Entity( "MU", 'μ' ),
Entity( "PI", 'π' ),
Entity( "TAU", 'τ' ),
Entity( "CHI", 'χ' ),
Entity( "<-", '←' ),
Entity( "^", '↑' ),
Entity( "->", '→' ),
Entity( "v", '↓' ),
Entity( "!=", '≠' ),
Entity( "<=", '≤' ),
Entity( nil, 0 ),
};
Hit: adt {
glob: string;
chap: string;
mtype: string;
page: string;
};
Lnone, Lordered, Lunordered, Ldef, Lother: con iota; # list types
Chaps: adt {
name: string;
primary: int;
};
Types: adt {
name: string;
desc: string;
};
# having two separate flags here allows for inclusion of old-style formatted pages
# under a new-style three-level tree
Oldstyle: adt {
names: int; # two-level directory tree?
fmt: int; # old internal formats: e.g., "B" font means "L"; name in .TH in all caps
};
Href: adt {
title: string;
chap: string;
mtype: string;
man: string;
};
# per-thread global data
Global: adt {
bufio: Bufio;
bin: ref Bufio->Iobuf;
bout: ref Bufio->Iobuf;
topname: string; # name of the top level categories in the manual
chaps: array of Chaps; # names of top-level partitions of this manual
types: array of Types; # names of second-level partitions
oldstyle: Oldstyle;
mantitle: string;
mandir: string;
thisone: Hit; # man page we're displaying
mtime: int; # last modification time of thisone
href: Href; # hrefs of components of this man page
hits: array of Hit;
nhits: int;
list_type: int;
pm: string; # proprietary marking
def_goobie: string; # deferred goobie
sop: int; # output at start of paragraph?
sol: int; # input at start of line?
broken: int; # output at a break?
fill: int; # in fill mode?
pre: int; # in PRE block?
example: int; # an example active?
ipd: int; # emit inter-paragraph distance?
indents: int;
hangingdt: int;
curfont: string; # current font
prevfont: string; # previous font
lastc: int; # previous char from input scanner
def_sm: int; # amount of deferred "make smaller" request
mk_href_chap: fn(g: self ref Global, chap: string);
mk_href_man: fn(g: self ref Global, man: string, oldstyle: int);
mk_href_mtype: fn(g: self ref Global, chap, mtype: string);
dobreak: fn(g: self ref Global);
print: fn(g: self ref Global, s: string);
softbr: fn(g: self ref Global): string;
softp: fn(g: self ref Global): string;
};
usage()
{
sys->fprint(stderr, "Usage: man2html file [section]\n");
raise "fail:usage";
}
init(nil: ref Draw->Context, args: list of string)
{
sys = load Sys Sys->PATH;
stderr = sys->fildes(2);
str = load String String->PATH;
dt = load Daytime Daytime->PATH;
g := Global_init();
if(args != nil)
args = tl args;
if(args == nil)
usage();
page := hd args;
args = tl args;
section := "1";
if(args != nil)
section = hd args;
hit := Hit ("", "man", section, page);
domanpage(g, hit);
g.bufio->g.bout.flush();
}
# remove markup from a string
# doesn't handle nested/quoted delimiters
demark(s: string): string
{
t: string;
clean := true;
for (i := 0; i < len s; i++) {
case s[i] {
'<' =>
clean = false;
'>' =>
clean = true;
* =>
if (clean)
t[len t] = s[i];
}
}
return t;
}
#
# Convert an individual man page to HTML and output.
#
domanpage(g: ref Global, man: Hit)
{
file := man.page;
g.bin = g.bufio->open(file, Bufio->OREAD);
g.bout = g.bufio->fopen(sys->fildes(1), Bufio->OWRITE);
if (g.bin == nil) {
fprint(stderr, "Cannot open %s: %r\n", file);
return;
}
(err, info) := sys->fstat(g.bin.fd);
if (! err) {
g.mtime = info.mtime;
}
g.thisone = man;
while ((p := getnext(g)) != nil) {
c := p[0];
if (c == '.' && g.sol) {
if (g.pre) {
g.print("");
g.pre = false;
}
dogoobie(g, false);
dohangingdt(g);
} else if (g.def_goobie != nil || g.def_sm != 0) {
g.bufio->g.bin.ungetc();
dogoobie(g, true);
} else if (c == '\n') {
g.print(p);
dohangingdt(g);
} else
g.print(p);
}
if (g.pm != nil) {
g.print("
");
if (! g.broken)
g.print("\n");
g.sop = true;
g.fill = false;
g.broken = true;
g.example = true;
}
g_fi(g: ref Global)
{
if (g.fill)
return;
g.fill = true;
g.print("\n");
g.broken = true;
g.sop = true;
}
g_ft(g: ref Global, argl: list of string)
{
font: string;
arg: string;
if (argl == nil)
arg = "P";
else
arg = hd argl;
if (g.curfont != nil)
g.print(sprint("%s>", g.curfont));
case arg {
"2" or "I" =>
font = "I";
"3" or "B" =>
font = "B";
"5" or "L" =>
font = "TT";
"P" =>
font = g.prevfont;
* =>
font = nil;
}
g.prevfont = g.curfont;
g.curfont = font;
if (g.curfont != nil)
if (g.fill)
g.print(sprint("<%s>", g.curfont));
else
g.print(sprint("<%s style=\"white-space: pre\">", g.curfont));
}
# level == 1 is a .HP; level == 3 is a .TP
g_HP_TP(g: ref Global, level: int)
{
case g.list_type {
Ldef =>
if (g.hangingdt != 0)
g.print("
\n");
g.broken = true;
g.sop = true;
g.pre = true;
}
g_PD(g: ref Global, argl: list of string)
{
if (len argl == 1 && hd argl == "0")
g.ipd = false;
else
g.ipd = true;
}
g_PM(g: ref Global, argl: list of string)
{
code := "P";
if (argl != nil)
code = hd argl;
case code {
* => # includes "1" and "P"
g.pm = "Lucent Technologies - Proprietary\n" +
"
Use pursuant to Company Instructions.\n";
"2" or "RS" =>
g.pm = "Lucent Technologies - Proprietary (Restricted)\n" +
"
Solely for authorized persons having a need to know\n" +
"
pursuant to Company Instructions.\n";
"3" or "RG" =>
g.pm = "Lucent Technologies - Proprietary (Registered)\n" +
"
Solely for authorized persons having a need to know\n" +
"
and subject to cover sheet instructions.\n";
"4" or "CP" =>
g.pm = "SEE PROPRIETARY NOTICE ON COVER PAGE\n";
"5" or "CR" =>
g.pm = "Copyright xxxx Lucent Technologies\n" + # should fill in the year from the date register
"
All Rights Reserved.\n";
"6" or "UW" =>
g.pm = "THIS DOCUMENT CONTAINS PROPRIETARY INFORMATION OF\n" +
"
LUCENT TECHNOLOGIES INC. AND IS NOT TO BE DISCLOSED OR USED EXCEPT IN\n" +
"
ACCORDANCE WITH APPLICABLE AGREEMENTS.\n" +
"
Unpublished & Not for Publication\n";
}
}
g_PP(g: ref Global)
{
closel(g);
reset_font(g);
p := g.softp();
if (p != nil)
g.print(p);
g.sop = true;
g.broken = true;
}
g_RE(g: ref Global)
{
g.print(""); g.print("
"); g.print("
| ")); g.print(sprint("%s(%s)", g.thisone.page, g.thisone.mtype)); g.print(" | \n"); g.print(sprint("Rev: %s |
"; else return "
"; } # # get (remainder of) a line # getline(g: ref Global): string { line := ""; while ((token := getnext(g)) != "\n") { if (token == nil) return line; line += token; } return line+"\n"; } # # Get next logical character. Expand it with escapes. # getnext(g: ref Global): string { iob := g.bufio; Iobuf: import iob; font: string; token: string; bin := g.bin; g.sol = (g.lastc == '\n'); c := bin.getc(); if (c < 0) return nil; g.lastc = c; if (c >= Runeself) { for (i := 0; i < len Entities; i++) if (Entities[i].value == c) return Entities[i].name; return sprint("%d;", c); } case c { '<' => return "<"; '>' => return ">"; '\\' => c = bin.getc(); if (c < 0) return nil; g.lastc = c; case c { ' ' => return " "; # chars to ignore '|' or '&' or '^' => return getnext(g); # ignore arg 'k' => nil = bin.getc(); return getnext(g); # defined strings '*' => case bin.getc() { 'R' => return "®"; } return getnext(g); # special chars '(' => token[0] = bin.getc(); token[1] = bin.getc(); for (i := 0; i < len tspec; i++) if (token == tspec[i].name) return tspec[i].value; return "¿"; 'c' => c = bin.getc(); if (c < 0) return nil; else if (c == '\n') { g.lastc = c; g.sol = true; token[0] = bin.getc(); return token; } # DEBUG: should there be a "return xxx" here? 'e' => return "\\"; 'f' => g.lastc = c = bin.getc(); if (c < 0) return nil; case c { '2' or 'I' => font = "I"; '3' or 'B' => font = "B"; '5' or 'L' => font = "TT"; 'P' => font = g.prevfont; * => # includes '1' and 'R' font = nil; } # There are serious problems with this. We don't know the fonts properly at this stage. # g.prevfont = g.curfont; # g.curfont = font; # if (g.prevfont != nil) # token = sprint("%s>", g.prevfont); # if (g.curfont != nil) # token += sprint("<%s>", g.curfont); if (token == nil) return ""; # looks odd but it avoids inserting a space in
text
return token;
's' =>
sign := '+';
size := 0;
relative := false;
getsize:
for (;;) {
c = bin.getc();
if (c < 0)
return nil;
case c {
'+' =>
relative = true;
'-' =>
sign = '-';
relative = true;
'0' to '9' =>
size = size * 10 + (c - '0');
* =>
bin.ungetc();
break getsize;
}
g.lastc = c;
}
if (size == 0)
token = "";
else if (relative)
token = sprint("", sign, size);
else
token = sprint("", size);
return token;
}
}
token[0] = c;
return token;
}
#
# Return strings before and after the left-most instance of separator;
# (s, nil) if no match or separator is last char in s.
#
split(s: string, sep: int): (string, string)
{
for (i := 0; i < len s; i++)
if (s[i] == sep)
return (s[:i], s[i+1:]); # s[len s:] is a valid slice, with value == nil
return (s, nil);
}
Global_init(): ref Global
{
g := ref Global;
g.bufio = load Bufio Bufio->PATH;
g.chaps = array[20] of Chaps;
g.types = array[20] of Types;
g.mantitle = "";
g.href.title = g.mantitle; # ??
g.mtime = 0;
g.nhits = 0;
g.oldstyle.names = false;
g.oldstyle.fmt = false;
g.topname = "System";
g.list_type = Lnone;
g.def_sm = 0;
g.hangingdt = 0;
g.indents = 0;
g.sop = true;
g.broken = true;
g.ipd = true;
g.fill = true;
g.example = false;
g.pre = false;
g.lastc = '\n';
return g;
}
Global.mk_href_chap(g: self ref Global, chap: string)
{
if (chap != nil)
g.href.chap = sprint("%s", g.mandir, chap, chap);
}
Global.mk_href_man(g: self ref Global, man: string, oldstyle: int)
{
rman := man;
if (oldstyle)
rman = str->tolower(man); # compensate for tradition of putting titles in all CAPS
g.href.man = sprint("%s", g.mandir, rman, man);
}
Global.mk_href_mtype(g: self ref Global, chap, mtype: string)
{
g.href.mtype = sprint("%s", g.mandir, chap, mtype, mtype);
}
# We assume that anything >= Runeself is already in UTF.
#
httpunesc(s: string): string
{
t := "";
for (i := 0; i < len s; i++) {
c := s[i];
if (c == '&' && i + 1 < len s) {
(char, rem) := str->splitl(s[i+1:], ";");
if (rem == nil)
break; # require the terminating ';'
if (char == nil)
continue;
if (char[0] == '#' && len char > 1) {
c = int char[1:];
i += len char;
if (c < 256 && c >= 161) {
t[len t] = Entities[c-161].value;
continue;
}
} else {
for (j := 0; j < len Entities; j++)
if (Entities[j].name == char)
break;
if (j < len Entities) {
i += len char;
t[len t] = Entities[j].value;
continue;
}
}
}
t[len t] = c;
}
return t;
}
title(g: ref Global, t: string, search: int)
{
if(search)
; # not yet used
g.print("\n");
g.print(sprint("Inferno's %s \n", demark(t)));
g.print("\n");
g.print("\n");
}