diff options
Diffstat (limited to 'appl/ebook/oebpackage.b')
| -rw-r--r-- | appl/ebook/oebpackage.b | 276 |
1 files changed, 276 insertions, 0 deletions
diff --git a/appl/ebook/oebpackage.b b/appl/ebook/oebpackage.b new file mode 100644 index 00000000..d9799d52 --- /dev/null +++ b/appl/ebook/oebpackage.b @@ -0,0 +1,276 @@ +implement OEBpackage; + +include "sys.m"; + sys: Sys; + +include "bufio.m"; + +include "url.m"; + url: Url; + ParsedUrl: import url; + +include "xml.m"; + xml: Xml; + Attributes, Locator, Parser: import xml; + +include "oebpackage.m"; + +OEBpkgtype: con "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd"; +OEBdoctype: con "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd"; + +OEBpkg, OEBdoc: con iota; +Laxchecking: con 1; + +init(xmlm: Xml) +{ + sys = load Sys Sys->PATH; + url = load Url Url->PATH; + if(url != nil) + url->init(); + xml = xmlm; +} + +open(f: string, warnings: chan of (Xml->Locator, string)): (ref Package, string) +{ + (x, e) := xml->open(f, warnings, nil); + if(x == nil) + return (nil, e); + xi := x.next(); + if(xi == nil) + return (nil, "not valid XML"); + pick d := xi { + Process => + if(d.target != "xml") + return (nil, "not an XML file"); + * => + return (nil, "unexpected file structure"); + } + # XXX i don't understand this 3-times loop... + # seems to me that something like the following (correct) document + # will fail: + # <?xml><!DOCTYPE ...><package> ....</package> + # i.e. no space between the doctype declaration and the + # start of the package tag. + for(i := 0; i < 3; i++){ + xi = x.next(); + if(xi == nil) + return (nil, "not OEB package"); + pick d := xi { + Text => + ; # usual XML extraneous punctuation cruft + Doctype => + if(!d.public || len d.params < 2) + return (nil, "not an OEB document or package"); + case doctype(hd tl d.params, Laxchecking) { + OEBpkg => + break; + OEBdoc => + # it's a document; make it into a simple package + p := ref Package; + p.file = f; + p.uniqueid = d.name; + p.manifest = p.spine = ref Item("doc", f, "text/x-oeb1-document", nil, f, nil) :: nil; + return (p, nil); + * => + return (nil, "unexpected DOCTYPE for OEB package: " + hd tl d.params ); + } + * => + return (nil, "not OEB package (no DOCTYPE)"); + } + } + p := ref Package; + p.file = f; + + # package[@unique-identifier[IDREF], Metadata, Manifest, Spine, Tours?, Guide?] + if((tag := next(x, "package")) == nil) + return (nil, "can't find OEB package"); + p.uniqueid = tag.attrs.get("unique-identifier"); + spine: list of string; + fallbacks: list of (ref Item, string); + x.down(); + while((tag = next(x, nil)) != nil){ + x.down(); + case tag.name { + "metadata" => + while((tag = next(x, nil)) != nil) + if(tag.name == "dc-metadata"){ + x.down(); + while((tag = next(x, nil)) != nil && (s := text(x)) != nil) + p.meta = (tag.name, tag.attrs, s) :: p.meta; + x.up(); + } + "manifest" => + while((tag = next(x, "item")) != nil){ + a := tag.attrs; + p.manifest = ref Item(a.get("id"), a.get("href"), a.get("media-type"), nil, nil, nil) :: p.manifest; + fallback := a.get("fallback"); + if (fallback != nil) + fallbacks = (hd p.manifest, fallback) :: fallbacks; + } + "spine" => + while((tag = next(x, "itemref")) != nil) + if((id := tag.attrs.get("idref")) != nil) + spine = id :: spine; + "guide" => + while((tag = next(x, "reference")) != nil){ + a := tag.attrs; + p.guide = ref Reference(a.get("type"), a.get("title"), a.get("href")) :: p.guide; + } + "tours" => + ; # ignore for now + } + x.up(); + } + x.up(); + + # deal with fallbacks, and make sure they're not circular. + + for (; fallbacks != nil; fallbacks = tl fallbacks) { + (item, fallbackid) := hd fallbacks; + fallback := lookitem(p.manifest, fallbackid); + for (fi := fallback; fi != nil; fi = fi.fallback) + if (fi == item) + break; + if (fi == nil) + item.fallback = fallback; + else + sys->print("warning: circular fallback reference\n"); + } + + # we'll assume it doesn't require a hash table + for(; spine != nil; spine = tl spine) + if((item := lookitem(p.manifest, hd spine)) != nil) + p.spine = item :: p.spine; + else + p.spine = ref Item(hd spine, nil, nil, nil, nil, "item in OEB spine but not listed in manifest") :: p.spine; + guide := p.guide; + for(p.guide = nil; guide != nil; guide = tl guide) + p.guide = hd guide :: p.guide; + return (p, nil); +} + +doctype(s: string, lax: int): int +{ + case s { + OEBpkgtype => + return OEBpkg; + OEBdoctype => + return OEBdoc; + * => + if (!lax) + return -1; + if (contains(s, "oebpkg1")) + return OEBpkg; + if (contains(s, "oebdoc1")); + return OEBdoc; + return -1; + } +} + +# does s1 contain s2 +contains(s1, s2: string): int +{ + if (len s2 > len s1) + return 0; + n := len s1 - len s2 + 1; +search: + for (i := 0; i < n ; i++) { + for (j := 0; j < len s2; j++) + if (s1[i + j] != s2[j]) + continue search; + return 1; + } + return 0; +} + + +lookitem(items: list of ref Item, id: string): ref Item +{ + for(; items != nil; items = tl items){ + item := hd items; + if(item.id == id) + return item; + } + return nil; +} + +next(x: ref Parser, s: string): ref Xml->Item.Tag +{ + while ((t0 := x.next()) != nil) { + pick t1 := t0 { + Error => + sys->print("oebpackage: error: %s:%d: %s\n", t1.loc.systemid, t1.loc.line, t1.msg); + Tag => + if (s == nil || s == t1.name) + return t1; + } + } + return nil; +} + +text(x: ref Parser): string +{ + s: string; + x.down(); +loop: + while ((t0 := x.next()) != nil) { + pick t1 := t0 { + Error => + sys->print("oebpackage: error: %s:%d: %s\n", t1.loc.systemid, t1.loc.line, t1.msg); + Text => + s = t1.ch; + break loop; + } + } + x.up(); + return s; +} + +Package.getmeta(p: self ref Package, n: string): list of (Xml->Attributes, string) +{ + r: list of (Xml->Attributes, string); + for(meta := p.meta; meta != nil; meta = tl meta){ + (name, a, value) := hd meta; + if(name == n) + r = (a, value) :: r; + } + # r is in file order because p.meta is reversed + return r; +} + +Package.locate(p: self ref Package): int +{ + dir := "./"; + for(n := len p.file; --n >= 0;) + if(p.file[n] == '/'){ + dir = p.file[0:n+1]; + break; + } + nmissing := 0; + for(items := p.manifest; items != nil; items = tl items){ + item := hd items; + err := ""; + if(item.href != nil){ + u := url->makeurl(item.href); + if(u.scheme != Url->FILE && u.scheme != Url->NOSCHEME) + err = sys->sprint("URL scheme %s not yet supported", url->schemes[u.scheme]); + else if(u.host != "localhost" && u.host != nil) + err = "non-local URLs not supported"; + else{ + path := u.path; + if(u.pstart != "/") + path = dir+path; # TO DO: security + (ok, d) := sys->stat(path); + if(ok >= 0) + item.file = path; + else + err = sys->sprint("%r"); + } + }else + err = "no location specified (missing HREF)"; + if(err != nil) + nmissing++; + item.missing = err; + } + return nmissing; +} |
