diff options
| author | Charles.Forsyth <devnull@localhost> | 2006-12-22 17:07:39 +0000 |
|---|---|---|
| committer | Charles.Forsyth <devnull@localhost> | 2006-12-22 17:07:39 +0000 |
| commit | 37da2899f40661e3e9631e497da8dc59b971cbd0 (patch) | |
| tree | cbc6d4680e347d906f5fa7fca73214418741df72 /appl/lib/strokes | |
| parent | 54bc8ff236ac10b3eaa928fd6bcfc0cdb2ba46ae (diff) | |
20060303a
Diffstat (limited to 'appl/lib/strokes')
| -rw-r--r-- | appl/lib/strokes/buildstrokes.b | 260 | ||||
| -rw-r--r-- | appl/lib/strokes/mkfile | 18 | ||||
| -rw-r--r-- | appl/lib/strokes/readstrokes.b | 205 | ||||
| -rw-r--r-- | appl/lib/strokes/strokes.b | 793 | ||||
| -rw-r--r-- | appl/lib/strokes/writestrokes.b | 68 |
5 files changed, 1344 insertions, 0 deletions
diff --git a/appl/lib/strokes/buildstrokes.b b/appl/lib/strokes/buildstrokes.b new file mode 100644 index 00000000..2d8a18c4 --- /dev/null +++ b/appl/lib/strokes/buildstrokes.b @@ -0,0 +1,260 @@ +implement Buildstrokes; + +# +# this Limbo code is derived from C code that had the following +# copyright notice, which i reproduce as requested +# +# li_strokesnizer.c +# +# Copyright 2000 Compaq Computer Corporation. +# Copying or modifying this code for any purpose is permitted, +# provided that this copyright notice is preserved in its entirety +# in all copies or modifications. +# COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR +# IMPLIED, AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR +# +# +# Adapted from cmu_strokesnizer.c by Jay Kistler. +# +# Where is the CMU copyright???? Gotta track it down - Jim Gettys +# +# Credit to Dean Rubine, Jim Kempf, and Ari Rapkin. +# + +include "sys.m"; + sys: Sys; + +include "strokes.m"; + strokes: Strokes; + Classifier, Penpoint, Stroke, Region: import strokes; + Rconvex, Rconcave, Rplain, Rpseudo: import Strokes; + +lidebug: con 0; +stderr: ref Sys->FD; + +init(r: Strokes) +{ + sys = load Sys Sys->PATH; + if(lidebug) + stderr = sys->fildes(2); + strokes = r; +} + +# +# Implementation of the Li/Yeung recognition algorithm +# + +# Pre-processing and canonicalization parameters +CANONICAL_X: con 108; +CANONICAL_Y: con 128; +NCANONICAL: con 50; + + +# +# calculate canonical forms +# + +canonical_example(nclasses: int, cnames: array of string, examples: array of list of ref Stroke): (string, array of ref Stroke, array of ref Stroke) +{ + canonex := array[nclasses] of ref Stroke; + dompts := array[nclasses] of ref Stroke; + + # make canonical examples for each class. + for(i := 0; i < nclasses; i++){ + if(lidebug) + sys->fprint(stderr, "canonical_example: class %s\n", cnames[i]); + + # Make a copy of the examples. + pts: list of ref Stroke = nil; + nex := 0; + for(exl := examples[i]; exl != nil; exl = tl exl){ + t := hd exl; + pts = t.copy() :: pts; + nex++; + } + + # Canonicalize each example, and derive the max x and y ranges. + maxxrange := 0; + maxyrange := 0; + for(exl = pts; exl != nil; exl = tl exl){ + e := hd exl; + ce := canonical_stroke(e); + if(ce == nil){ + if(lidebug) + sys->fprint(stderr, "example discarded: can't make canonical form\n"); + continue; # try the next one + } + *e = *ce; + if(e.xrange > maxxrange) + maxxrange = e.xrange; + if(e.yrange > maxyrange) + maxyrange = e.yrange; + } + + # Normalise max ranges. + (maxxrange, maxyrange) = normalise(maxxrange, maxyrange, CANONICAL_X, CANONICAL_Y); + + # Re-scale each example to max ranges. + for(exl = pts; exl != nil; exl = tl exl){ + t := hd exl; + scalex, scaley: int; + if(t.xrange == 0) + scalex = 100; + else + scalex = (100*maxxrange + t.xrange/2) / t.xrange; + if(t.yrange == 0) + scaley = 100; + else + scaley = (100*maxyrange + t.yrange/2) / t.yrange; + t.translate(0, 0, scalex, scaley); + } + + # Average the examples; leave average in first example. + avg := hd pts; # careful, aliasing + for(k := 0; k < NCANONICAL; k++){ + xsum := 0; + ysum := 0; + for(exl = pts; exl != nil; exl = tl exl){ + t := hd exl; + xsum += t.pts[k].x; + ysum += t.pts[k].y; + } + avg.pts[k].x = (xsum + nex/2) / nex; + avg.pts[k].y = (ysum + nex/2) / nex; + } + + # rescale averaged stroke + avg.scaleup(); + + # Re-compute the x and y ranges and center the stroke. + avg.center(); + + canonex[i] = avg; # now it's the canonical representation + + if(lidebug){ + sys->fprint(stderr, "%s, avgpts = %d\n", cnames[i], avg.npts); + for(j := 0; j < avg.npts; j++){ + p := avg.pts[j]; + sys->fprint(stderr, " (%d %d)\n", p.x, p.y); + } + } + + dompts[i] = avg.interpolate().dominant(); # dominant points of canonical representation + } + + return (nil, canonex, dompts); +} + +normalise(x, y: int, xrange, yrange: int): (int, int) +{ + if((100*x + xrange/2)/xrange > (100*y + yrange/2)/yrange){ + y = (y*xrange + x/2)/x; + x = xrange; + }else{ + x = (x*yrange + y/2)/y; + y = yrange; + } + return (x, y); +} + +canonical_stroke(points: ref Stroke): ref Stroke +{ + points = points.filter(); + if(points.npts < 2) + return nil; + + # Scale up to avoid conversion errors. + points.scaleup(); + + # Compute an equivalent stroke with equi-distant points + points = compute_equipoints(points); + if(points == nil) + return nil; + + # Re-translate the points to the origin. + (minx, miny, maxx, maxy) := points.bbox(); + points.translate(minx, miny, 100, 100); + + # Store the x and y ranges in the point list. + points.xrange = maxx - minx; + points.yrange = maxy - miny; + + if(lidebug){ + sys->fprint(stderr, "Canonical stroke: %d, %d, %d, %d\n", minx, miny, maxx, maxy); + for(i := 0; i < points.npts; i++){ + p := points.pts[i]; + sys->fprint(stderr, " (%d %d)\n", p.x, p.y); + } + } + + return points; +} + +compute_equipoints(points: ref Stroke): ref Stroke +{ + pathlen := points.length(); + equidist := (pathlen + (NCANONICAL-1)/2) / (NCANONICAL-1); + equipoints := array[NCANONICAL] of Penpoint; + if(lidebug) + sys->fprint(stderr, "compute_equipoints: npts = %d, pathlen = %d, equidist = %d\n", + points.npts, pathlen, equidist); + + # First original point is an equipoint. + equipoints[0] = points.pts[0]; + nequipoints := 1; + dist_since_last_eqpt := 0; + + for(i := 1; i < points.npts; i++){ + dx1 := points.pts[i].x - points.pts[i-1].x; + dy1 := points.pts[i].y - points.pts[i-1].y; + endx := points.pts[i-1].x*100; + endy := points.pts[i-1].y*100; + remaining_seglen := strokes->sqrt(100*100 * (dx1*dx1 + dy1*dy1)); + dist_to_next_eqpt := equidist - dist_since_last_eqpt; + while(remaining_seglen >= dist_to_next_eqpt){ + if(dx1 == 0){ + # x-coordinate stays the same + if(dy1 >= 0) + endy += dist_to_next_eqpt; + else + endy -= dist_to_next_eqpt; + }else{ + slope := (100*dy1 + dx1/2) / dx1; + tmp := strokes->sqrt(100*100 + slope*slope); + dx := (100*dist_to_next_eqpt + tmp/2) / tmp; + dy := (slope*dx + 50)/100; + if(dy < 0) + dy = -dy; + if(dx1 >= 0) + endx += dx; + else + endx -= dx; + if(dy1 >= 0) + endy += dy; + else + endy -= dy; + } + equipoints[nequipoints].x = (endx + 50) / 100; + equipoints[nequipoints].y = (endy + 50) / 100; + nequipoints++; + #assert(nequipoints <= NCANONICAL); + dist_since_last_eqpt = 0; + remaining_seglen -= dist_to_next_eqpt; + dist_to_next_eqpt = equidist; + } + dist_since_last_eqpt += remaining_seglen; + } + + # Take care of last equipoint. + if(nequipoints == NCANONICAL-1){ + # Make last original point the last equipoint. + equipoints[nequipoints++] = points.pts[points.npts - 1]; + } + if(nequipoints != NCANONICAL){ # fell short + if(lidebug) + sys->fprint(stderr,"compute_equipoints: nequipoints = %d\n", nequipoints); + # assert(false); + return nil; + } + return ref Stroke(NCANONICAL, equipoints, 0, 0); +} diff --git a/appl/lib/strokes/mkfile b/appl/lib/strokes/mkfile new file mode 100644 index 00000000..e06e6327 --- /dev/null +++ b/appl/lib/strokes/mkfile @@ -0,0 +1,18 @@ +<../../../mkconfig + +TARG=\ + buildstrokes.dis\ + strokes.dis\ + readstrokes.dis\ + writestrokes.dis\ + +MODULES= + +SYSMODULES= \ + bufio.m\ + strokes.m\ + sys.m\ + +DISBIN=$ROOT/dis/lib/strokes + +<$ROOT/mkfiles/mkdis diff --git a/appl/lib/strokes/readstrokes.b b/appl/lib/strokes/readstrokes.b new file mode 100644 index 00000000..c42bd48c --- /dev/null +++ b/appl/lib/strokes/readstrokes.b @@ -0,0 +1,205 @@ +implement Readstrokes; + +# +# read structures from stroke classifier files +# + +include "sys.m"; + sys: Sys; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "strokes.m"; + strokes: Strokes; + Classifier, Penpoint, Stroke, Region: import strokes; + buildstrokes: Buildstrokes; + +init(s: Strokes) +{ + sys = load Sys Sys->PATH; + bufio = load Bufio Bufio->PATH; + strokes = s; +} + +getint(fp: ref Iobuf): (int, int) +{ + while((c := fp.getc()) == ' ' || c == '\t' || c == '\n') + ; + if(c < 0) + return (c, 0); + sign := 1; + if(c == '-') + sign = -1; + else if(c == '+') + ; + else + fp.ungetc(); + rc := 0; + n := 0; + while((c = fp.getc()) >= '0' && c <= '9'){ + n = n*10 + (c-'0'); + rc = 1; + } + return (rc, n*sign); +} + +getstr(fp: ref Iobuf): (int, string) +{ + while((c := fp.getc()) == ' ' || c == '\t' || c == '\n') + ; + if(c < 0) + return (c, nil); + fp.ungetc(); + s := ""; + while((c = fp.getc()) != ' ' && c != '\t' && c != '\n') + s[len s] = c; + return (0, s); +} + +getpoint(fp: ref Iobuf): (int, Penpoint) +{ + (okx, x) := getint(fp); + (oky, y) := getint(fp); + if(okx <= 0 || oky <= 0) + return (-1, (0,0,0)); + return (0, (x,y,0)); +} + +getpoints(fp: ref Iobuf): ref Stroke +{ + (ok, npts) := getint(fp); + if(ok <= 0 || npts < 0 || npts > 4000) + return nil; + pts := array[npts] of Penpoint; + for(i := 0; i < npts; i++){ + (ok, pts[i]) = getpoint(fp); + if(ok < 0) + return nil; + } + return ref Stroke(npts, pts, 0, 0); +} + +read_classifier_points(fp: ref Iobuf, nclass: int): (int, array of string, array of list of ref Stroke) +{ + names := array[nclass] of string; + examples := array[nclass] of list of ref Stroke; + for(k := 0; k < nclass; k++){ + # read class name and number of examples + (ok, nex) := getint(fp); + if(ok <= 0) + return (-1, nil, nil); + (ok, names[k]) = getstr(fp); + if(ok < 0) + return (ok, nil, nil); + + # read examples + for(i := 0; i < nex; i++){ + pts := getpoints(fp); + if(pts == nil) + return (-1, nil, nil); + examples[k] = pts :: examples[k]; + } + } + return (0, names, examples); +} + +# +# read a classifier, using its digest if that exists +# +read_classifier(file: string, build: int, needex: int): (string, ref Classifier) +{ + rc := ref Classifier; + l := len file; + digestfile: string; + if(l >= 4 && file[l-4:]==".clx") + digestfile = file; + else if(!needex && l >= 3 && file[l-3:]==".cl") + digestfile = file[0:l-3]+".clx"; # try the digest file first + err: string; + if(digestfile != nil){ + fd := sys->open(digestfile, Sys->OREAD); + if(fd != nil){ + (err, rc.cnames, rc.dompts) = read_digest(fd); + rc.nclasses = len rc.cnames; + if(rc.cnames == nil) + err = "empty digest file"; + if(err == nil) + return (nil, rc); + }else + err = sys->sprint("%r"); + if(!build) + return (sys->sprint("digest file: %s", err), nil); + } + + if(buildstrokes == nil){ + buildstrokes = load Buildstrokes Buildstrokes->PATH; + if(buildstrokes == nil) + return (sys->sprint("module %s: %r", Buildstrokes->PATH), nil); + buildstrokes->init(strokes); + } + + fd := sys->open(file, Sys->OREAD); + if(fd == nil) + return (sys->sprint("%r"), nil); + (emsg, cnames, examples) := read_examples(fd); + if(emsg != nil) + return (emsg, nil); + rc.nclasses = len cnames; + (err, rc.canonex, rc.dompts) = buildstrokes->canonical_example(rc.nclasses, cnames, examples); + if(err != nil) + return ("failed to calculate canonical examples", nil); + rc.cnames = cnames; + if(needex) + rc.examples = examples; + + return (nil, rc); +} + +read_examples(fd: ref Sys->FD): (string, array of string, array of list of ref Strokes->Stroke) +{ + fp := bufio->fopen(fd, Bufio->OREAD); + (ok, nclasses) := getint(fp); + if(ok <= 0) + return ("missing number of classes", nil, nil); + (okc, cnames, examples) := read_classifier_points(fp, nclasses); + if(okc < 0) + return ("couldn't read examples", nil, nil); + return (nil, cnames, examples); +} + +# +# attempt to read the digest of a classifier, +# and return its contents if successful; +# return a diagnostic if not +# +read_digest(fd: ref Sys->FD): (string, array of string, array of ref Stroke) +{ + # Read-in the name and dominant points for each class. + fp := bufio->fopen(fd, Bufio->OREAD); + cnames := array[32] of string; + dompts := array[32] of ref Stroke; + for(nclasses := 0;; nclasses++){ + if(nclasses >= len cnames){ + a := array[nclasses+32] of string; + a[0:] = cnames; + cnames = a; + b := array[nclasses+32] of ref Stroke; + b[0:] = dompts; + dompts = b; + } + (okn, class) := getstr(fp); + if(okn == Bufio->EOF) + break; + if(class == nil) + return ("expected class name", nil, nil); + cnames[nclasses] = class; + dpts := getpoints(fp); + if(dpts == nil) + return ("bad points list", nil, nil); + strokes->compute_chain_code(dpts); + dompts[nclasses] = dpts; + } + return (nil, cnames[0:nclasses], dompts[0:nclasses]); +} diff --git a/appl/lib/strokes/strokes.b b/appl/lib/strokes/strokes.b new file mode 100644 index 00000000..3797b3d1 --- /dev/null +++ b/appl/lib/strokes/strokes.b @@ -0,0 +1,793 @@ +implement Strokes; + +# +# this Limbo code is derived from C code that had the following +# copyright notice, which i reproduce as requested +# +# li_recognizer.c +# +# Copyright 2000 Compaq Computer Corporation. +# Copying or modifying this code for any purpose is permitted, +# provided that this copyright notice is preserved in its entirety +# in all copies or modifications. +# COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR +# IMPLIED, AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR +# +# +# Adapted from cmu_recognizer.c by Jay Kistler. +# +# Where is the CMU copyright???? Gotta track it down - Jim Gettys +# +# Credit to Dean Rubine, Jim Kempf, and Ari Rapkin. +# +# +# the copyright notice really did end in the middle of the sentence +# + +# +# Limbo version for Inferno by forsyth@vitanuova.com, Vita Nuova, September 2001 +# + +# +# the code is reasonably close to the algorithms described in +# ``On-line Handwritten Alphanumeric Character Recognition Using Dominant Stroke in Strokes'', +# Xiaolin Li and Dit-Yan Yueng, Department of Computer Science, +# Hong Kong University of Science and Technology, Hong Kong (23 August 1996) +# + +include "sys.m"; + sys: Sys; + +include "strokes.m"; + +MAXINT: con 16r7FFFFFFF; + +# Dynamic programming parameters +DP_BAND: con 3; +SIM_THLD: con 60; # x100 +#DIST_THLD: con 3200; # x100 +DIST_THLD: con 3300; # x100 + +# Low-pass filter parameters -- empirically derived +LP_FILTER_WIDTH: con 6; +LP_FILTER_ITERS: con 8; +LP_FILTER_THLD: con 250; # x100 +LP_FILTER_MIN: con 5; + +# Pseudo-extrema parameters -- empirically derived +PE_AL_THLD: con 1500; # x100 +PE_ATCR_THLD: con 135; # x100 + +# Pre-processing and canonicalization parameters +CANONICAL_X: con 108; +CANONICAL_Y: con 128; +DIST_SQ_THRESHOLD: con 3*3; + +# direction-code table; indexed by dx, dy +dctbl := array[] of {array[] of {1, 0, 7}, array[] of {2, MAXINT, 6}, array[] of {3, 4, 5}}; + +# low-pass filter weights +lpfwts := array[2 * LP_FILTER_WIDTH + 1] of int; +lpfconst := -1; + +lidebug: con 0; +stderr: ref Sys->FD; + +# x := 0.04 * (i * i); +# wtvals[i] = floor(100.0 * exp(x)); +wtvals := array[] of {100, 104, 117, 143, 189, 271, 422}; + +init() +{ + sys = load Sys Sys->PATH; + if(lidebug) + stderr = sys->fildes(2); + for(i := LP_FILTER_WIDTH; i >= 0; i--){ + wt := wtvals[i]; + lpfwts[LP_FILTER_WIDTH - i] = wt; + lpfwts[LP_FILTER_WIDTH + i] = wt; + } + lpfconst = 0; + for(i = 0; i < (2 * LP_FILTER_WIDTH + 1); i++) + lpfconst += lpfwts[i]; +} + +Stroke.new(n: int): ref Stroke +{ + return ref Stroke(n, array[n] of Penpoint, 0, 0); +} + +Stroke.trim(ps: self ref Stroke, n: int) +{ + ps.npts = n; + ps.pts = ps.pts[0:n]; +} + +Stroke.copy(ps: self ref Stroke): ref Stroke +{ + n := ps.npts; + a := array[n] of Penpoint; + a[0:] = ps.pts[0:n]; + return ref Stroke(n, a, ps.xrange, ps.yrange); +} + +# +# return the bounding box of a set of points +# (note: unlike Draw->Rectangle, the region is closed) +# +Stroke.bbox(ps: self ref Stroke): (int, int, int, int) +{ + minx := maxx := ps.pts[0].x; + miny := maxy := ps.pts[0].y; + for(i := 1; i < ps.npts; i++){ + pt := ps.pts[i]; + if(pt.x < minx) + minx = pt.x; + if(pt.x > maxx) + maxx = pt.x; + if(pt.y < miny) + miny = pt.y; + if(pt.y > maxy) + maxy = pt.y; + } + return (minx, miny, maxx, maxy); # warning: closed interval +} + +Stroke.center(ps: self ref Stroke) +{ + (minx, miny, maxx, maxy) := ps.bbox(); + ps.xrange = maxx-minx; + ps.yrange = maxy-miny; + avgxoff := -((CANONICAL_X - ps.xrange + 1) / 2); + avgyoff := -((CANONICAL_Y - ps.yrange + 1) / 2); + ps.translate(avgxoff, avgyoff, 100, 100); +} + +Stroke.scaleup(ps: self ref Stroke): int +{ + (minx, miny, maxx, maxy) := ps.bbox(); + xrange := maxx - minx; + yrange := maxy - miny; + scale: int; + if(((100 * xrange + CANONICAL_X / 2) / CANONICAL_X) > + ((100 * yrange + CANONICAL_Y / 2) / CANONICAL_Y)) + scale = (100 * CANONICAL_X + xrange / 2) / xrange; + else + scale = (100 * CANONICAL_Y + yrange / 2) / yrange; + ps.translate(minx, miny, scale, scale); + return scale; +} + +# scalex and scaley are x 100. +# Note that this does NOT update points.xrange and points.yrange! +Stroke.translate(ps: self ref Stroke, minx: int, miny: int, scalex: int, scaley: int) +{ + for(i := 0; i < ps.npts; i++){ + ps.pts[i].x = ((ps.pts[i].x - minx) * scalex + 50) / 100; + ps.pts[i].y = ((ps.pts[i].y - miny) * scaley + 50) / 100; + } +} + +TAP_PATHLEN: con 10*100; # x100 + +Classifier.match(rec: self ref Classifier, stroke: ref Stroke): (int, string) +{ + if(stroke.npts < 1) + return (-1, nil); + + # Check for tap. + + # First thing is to filter out ``close points.'' + stroke = stroke.filter(); + + # Unfortunately, we don't have the actual time that each point + # was recorded (i.e., dt is invalid). Hence, we have to use a + # heuristic based on total distance and the number of points. + if(stroke.npts == 1 || stroke.length() < TAP_PATHLEN) + return (-1, "tap"); + + preprocess_stroke(stroke); + + # Compute its dominant points. + dompts := stroke.interpolate().dominant(); + best_dist := MAXDIST; + best_i := -1; + best_name: string; + + # Score input stroke against every class in classifier. + for(i := 0; i < len rec.cnames; i++){ + name := rec.cnames[i]; + (sim, dist) := score_stroke(dompts, rec.dompts[i]); + if(dist < MAXDIST) + sys->fprint(stderr, " (%s, %d, %d)", name, sim, dist); + if(dist < DIST_THLD){ + if(lidebug) + sys->fprint(stderr, " (%s, %d, %d)", name, sim, dist); + # Is it the best so far? + if(dist < best_dist){ + best_dist = dist; + best_i = i; + best_name = name; + } + } + } + + if(lidebug) + sys->fprint(stderr, "\n"); + return (best_i, best_name); +} + +preprocess_stroke(s: ref Stroke) +{ + # Filter out points that are too close. + # We did this earlier, when we checked for a tap. + +# s = s.filter(); + + +# assert(s.npts > 0); + + # Scale up to avoid conversion errors. + s.scaleup(); + + # Center the stroke. + s.center(); + + if(lidebug){ + (minx, miny, maxx, maxy) := s.bbox(); + sys->fprint(stderr, "After pre-processing: [ %d %d %d %d]\n", + minx, miny, maxx, maxy); + printpoints(stderr, s, "\n"); + } +} + +# +# return the dominant points of Stroke s, assuming s has been through interpolation +# +Stroke.dominant(s: self ref Stroke): ref Stroke +{ + regions := s.regions(); + + # Dominant points are: start, end, extrema of non plain regions, and midpoints of the preceding. + nonplain := 0; + for(r := regions; r != nil; r = r.next) + if(r.rtype != Rplain) + nonplain++; + dom := Stroke.new(1 + 2*nonplain + 2); + + # Pick out dominant points. + + # start point + dp := 0; + previx := 0; + dom.pts[dp++] = s.pts[previx]; + currix: int; + + cas := s.contourangles(regions); + for(r = regions; r != nil; r = r.next) + if(r.rtype != Rplain){ + max_v := 0; + min_v := MAXINT; + max_ix := -1; + min_ix := -1; + + for(i := r.start; i <= r.end; i++){ + v := cas[i]; + if(v > max_v){ + max_v = v; max_ix = i; + } + if(v < min_v){ + min_v = v; min_ix = i; + } + if(lidebug > 1) + sys->fprint(stderr, " %d\n", v); + } + if(r.rtype == Rconvex) + currix = max_ix; + else + currix = min_ix; + + dom.pts[dp++] = s.pts[(previx+currix)/2]; # midpoint + dom.pts[dp++] = s.pts[currix]; # extreme + + previx = currix; + } + + # last midpoint, and end point + lastp := s.npts - 1; + dom.pts[dp++] = s.pts[(previx+lastp)/2]; + dom.pts[dp++] = s.pts[lastp]; + dom.trim(dp); + + # Compute chain-code. + compute_chain_code(dom); + + return dom; +} + +Stroke.contourangles(s: self ref Stroke, regions: ref Region): array of int +{ + V := array[s.npts] of int; + V[0] = 18000; + for(r := regions; r != nil; r = r.next){ + for(i := r.start; i <= r.end; i++){ + if(r.rtype == Rplain){ + V[i] = 18000; + }else{ + # For now, simply choose the mid-point. + ismidpt := i == (r.start + r.end)/2; + if(ismidpt ^ (r.rtype!=Rconvex)) + V[i] = 18000; + else + V[i] = 0; + } + } + } + V[s.npts - 1] = 18000; + return V; +} + +Stroke.interpolate(s: self ref Stroke): ref Stroke +{ + # Compute an upper-bound on the number of interpolated points + maxpts := s.npts; + for(i := 0; i < s.npts - 1; i++){ + a := s.pts[i]; + b := s.pts[i+1]; + maxpts += abs(a.x - b.x) + abs(a.y - b.y); + } + + # Allocate an array of the maximum size + newpts := Stroke.new(maxpts); + + # Interpolate each of the segments. + j := 0; + for(i = 0; i < s.npts - 1; i++){ + j = bresline(s.pts[i], s.pts[i+1], newpts, j); + j--; # end point gets recorded as start point of next segment! + } + + # Add-in last point and trim + newpts.pts[j++] = s.pts[s.npts - 1]; + newpts.trim(j); + + if(lidebug){ + sys->fprint(stderr, "After interpolation:\n"); + printpoints(stderr, newpts, "\n"); + } + + # Compute the chain code for P (the list of points). + compute_unit_chain_code(newpts); + + return newpts; +} + +# This implementation is due to an anonymous page on the net +bresline(startpt: Penpoint, endpt: Penpoint, newpts: ref Stroke, j: int): int +{ + x0 := startpt.x; + x1 := endpt.x; + y0 := startpt.y; + y1 := endpt.y; + + stepx := 1; + dx := x1-x0; + if(dx < 0){ + dx = -dx; + stepx = -1; + } + dx <<= 1; + stepy := 1; + dy := y1-y0; + if(dy < 0){ + dy = -dy; + stepy = -1; + } + dy <<= 1; + newpts.pts[j++] = (x0, y0, 0); + if(dx >= dy){ + e := dy - (dx>>1); + while(x0 != x1){ + if(e >= 0){ + y0 += stepy; + e -= dx; + } + x0 += stepx; + e += dy; + newpts.pts[j++] = (x0, y0, 0); + } + }else{ + e := dx - (dy>>1); + while(y0 != y1){ + if(e >= 0){ + x0 += stepx; + e -= dy; + } + y0 += stepy; + e += dx; + newpts.pts[j++] = (x0, y0, 0); + } + } + return j; +} + +compute_chain_code(pts: ref Stroke) +{ + for(i := 0; i < pts.npts - 1; i++){ + dx := pts.pts[i+1].x - pts.pts[i].x; + dy := pts.pts[i+1].y - pts.pts[i].y; + pts.pts[i].chaincode = (12 - quadr(likeatan(dy, dx))) % 8; + } +} + +compute_unit_chain_code(pts: ref Stroke) +{ + for(i := 0; i < pts.npts - 1; i++){ + dx := pts.pts[i+1].x - pts.pts[i].x; + dy := pts.pts[i+1].y - pts.pts[i].y; + pts.pts[i].chaincode = dctbl[dx+1][dy+1]; + } +} + +Stroke.regions(pts: self ref Stroke): ref Region +{ + # Allocate a 2 x pts.npts array for use in computing the (filtered) Angle set, A_n. + R := array[] of {0 to LP_FILTER_ITERS+1 => array[pts.npts] of int}; + curr := R[0]; + + # Compute the Angle set, A, in the first element of array R. + # Values in R are in degrees, x 100. + curr[0] = 18000; # a_0 + for(i := 1; i < pts.npts - 1; i++){ + d_i := pts.pts[i].chaincode; + d_iminusone := pts.pts[i-1].chaincode; + if(d_iminusone < d_i) + d_iminusone += 8; + a_i := (d_iminusone - d_i) % 8; + # convert to degrees, x 100 + curr[i] = ((12 - a_i) % 8) * 45 * 100; + } + curr[pts.npts-1] = 18000; # a_L-1 + + # Perform a number of filtering iterations. + next := R[1]; + for(j := 0; j < LP_FILTER_ITERS; ){ + for(i = 0; i < pts.npts; i++){ + next[i] = 0; + for(k := i - LP_FILTER_WIDTH; k <= i + LP_FILTER_WIDTH; k++){ + oldval: int; + if(k < 0 || k >= pts.npts) + oldval = 18000; + else + oldval = curr[k]; + next[i] += oldval * lpfwts[k - (i - LP_FILTER_WIDTH)]; # overflow? + } + next[i] /= lpfconst; + } + j++; + curr = R[j]; + next = R[j+1]; + } + + # Do final thresholding around PI. + # curr and next are set-up correctly at end of previous loop! + for(i = 0; i < pts.npts; i++) + if(abs(curr[i] - 18000) < LP_FILTER_THLD) + next[i] = 18000; + else + next[i] = curr[i]; + curr = next; + + # Debugging. + if(lidebug > 1){ + for(i = 0; i < pts.npts; i++){ + p := pts.pts[i]; + sys->fprint(stderr, "%3d: (%d %d) %ud ", + i, p.x, p.y, p.chaincode); + for(j = 0; j < 2 + LP_FILTER_ITERS; j++) + sys->fprint(stderr, "%d ", R[j][i]); + sys->fprint(stderr, "\n"); + } + } + + # Do the region segmentation. + r := regions := ref Region(regiontype(curr[0]), 0, 0, nil); + for(i = 1; i < pts.npts; i++){ + t := regiontype(curr[i]); + if(t != r.rtype){ + r.end = i-1; + if(lidebug > 1) + sys->fprint(stderr, " (%d, %d) %d\n", r.start, r.end, r.rtype); + r.next = ref Region(t, i, 0, nil); + r = r.next; + } + } + r.end = i-1; + if(lidebug > 1) + sys->fprint(stderr, " (%d, %d), %d\n", r.start, r.end, r.rtype); + + # Filter out convex/concave regions that are too short. + for(r = regions; r != nil; r = r.next) + if(r.rtype == Rplain){ + while((nr := r.next) != nil && (nr.end - nr.start) < LP_FILTER_MIN){ + # nr must not be plain, and it must be followed by a plain + # assert(nr.rtype != Rplain); + # assert(nr.next != nil && (nr.next).rtype == Rplain); + if(nr.next == nil){ + sys->fprint(stderr, "recog: nr.next==nil\n"); # can't happen + break; + } + r.next = nr.next.next; + r.end = nr.next.end; + } + } + + # Add-in pseudo-extremes. + for(r = regions; r != nil; r = r.next) + if(r.rtype == Rplain){ + arclen := pts.pathlen(r.start, r.end); + dx := pts.pts[r.end].x - pts.pts[r.start].x; + dy := pts.pts[r.end].y - pts.pts[r.start].y; + chordlen := sqrt(100*100 * (dx*dx + dy*dy)); + atcr := 0; + if(chordlen) + atcr = (100*arclen + chordlen/2) / chordlen; + + if(lidebug) + sys->fprint(stderr, "%d, %d, %d\n", arclen, chordlen, atcr); + + # Split region if necessary. + if(arclen >= PE_AL_THLD && atcr >= PE_ATCR_THLD){ + mid := (r.start + r.end)/2; + end := r.end; + r.end = mid - 1; + r = r.next = ref Region(Rpseudo, mid, mid, + ref Region(Rplain, mid+1, end, r.next)); + } + } + + return regions; +} + +regiontype(val: int): int +{ + if(val == 18000) + return Rplain; + if(val < 18000) + return Rconcave; + return Rconvex; +} + +# +# return the similarity of two strokes and, +# if similar, the distance between them; +# if dissimilar, the distance is MAXDIST) +# +score_stroke(a: ref Stroke, b: ref Stroke): (int, int) +{ + sim := compute_similarity(a, b); + if(sim < SIM_THLD) + return (sim, MAXDIST); + return (sim, compute_distance(a, b)); +} + +compute_similarity(A: ref Stroke, B: ref Stroke): int +{ + # A is the longer sequence, length N. + # B is the shorter sequence, length M. + if(A.npts < B.npts){ + t := A; A = B; B = t; + } + N := A.npts; + M := B.npts; + + # Allocate and initialize the Gain matrix, G. + # The size of G is M x (N + 1). + # Note that row 0 is unused. + # Similarities are x 10. + G := array[M] of array of int; + for(i := 1; i < M; i++){ + G[i] = array[N+1] of int; + bcode := B.pts[i-1].chaincode; + + G[i][0] = 0; # source column + + for(j := 1; j < N; j++){ + diff := abs(bcode - A.pts[j-1].chaincode); + if(diff > 4) + diff = 8 - diff; # symmetry + v := 0; + if(diff == 0) + v = 10; + else if(diff == 1) + v = 6; + G[i][j] = v; + } + + G[i][N] = 0; # sink column + } + + # Do the DP algorithm. + # Proceed in column order, from highest column to the lowest. + # Within each column, proceed from the highest row to the lowest. + # Skip the highest column. + for(j := N - 1; j >= 0; j--) + for(i = M - 1; i > 0; i--){ + max := G[i][j + 1]; + if(i < M-1){ + t := G[i + 1][j + 1]; + if(t > max) + max = t; + } + G[i][j] += max; + } + + return (10*G[1][0] + (N-1)/2) / (N-1); +} + +compute_distance(A: ref Stroke, B: ref Stroke): int +{ + # A is the longer sequence, length N. + # B is the shorter sequence, length M. + if(A.npts < B.npts){ + t := A; A = B; B = t; + } + N := A.npts; + M := B.npts; + + # Construct the helper vectors, BE and TE, which say for each column + # what are the ``bottom'' and ``top'' rows of interest. + BE := array[N+1] of int; + TE := array[N+1] of int; + + for(j := 1; j <= N; j++){ + bot := j + (M - DP_BAND); + if(bot > M) bot = M; + BE[j] = bot; + + top := j - (N - DP_BAND); + if(top < 1) top = 1; + TE[j] = top; + } + + # Allocate and initialize the Cost matrix, C. + # The size of C is (M + 1) x (N + 1). + # Note that row and column 0 are unused. + # Costs are x 100. + C := array[M+1] of array of int; + for(i := 1; i <= M; i++){ + C[i] = array[N+1] of int; + bx := B.pts[i-1].x; + by := B.pts[i-1].y; + + for(j = 1; j <= N; j++){ + ax := A.pts[j-1].x; + ay := A.pts[j-1].y; + dx := bx - ax; + dy := by - ay; + dist := sqrt(10000 * (dx * dx + dy * dy)); + + C[i][j] = dist; + } + } + + # Do the DP algorithm. + # Proceed in column order, from highest column to the lowest. + # Within each column, proceed from the highest row to the lowest. + for(j = N; j > 0; j--) + for(i = M; i > 0; i--){ + min := MAXDIST; + if(i > BE[j] || i < TE[j] || (j == N && i == M)) + continue; + if(j < N){ + if(i >= TE[j+1]){ + tmp := C[i][j+1]; + if(tmp < min) + min = tmp; + } + if(i < M){ + tmp := C[i+1][j+1]; + if(tmp < min) + min = tmp; + } + } + if(i < BE[j]){ + tmp := C[i+1][j]; + if(tmp < min) + min = tmp; + } + C[i][j] += min; + } + return (C[1][1] + N / 2) / N; # dist +} + +# Result is x 100. +Stroke.length(s: self ref Stroke): int +{ + return s.pathlen(0, s.npts-1); +} + +# Result is x 100. +Stroke.pathlen(s: self ref Stroke, first: int, last: int): int +{ + l := 0; + for(i := first + 1; i <= last; i++){ + dx := s.pts[i].x - s.pts[i-1].x; + dy := s.pts[i].y - s.pts[i-1].y; + l += sqrt(100*100 * (dx*dx + dy*dy)); + } + return l; +} + +# Note that this does NOT update points.xrange and points.yrange! +Stroke.filter(s: self ref Stroke): ref Stroke +{ + pts := array[s.npts] of Penpoint; + pts[0] = s.pts[0]; + npts := 1; + for(i := 1; i < s.npts; i++){ + j := npts - 1; + dx := s.pts[i].x - pts[j].x; + dy := s.pts[i].y - pts[j].y; + magsq := dx * dx + dy * dy; + if(magsq >= DIST_SQ_THRESHOLD){ + pts[npts] = s.pts[i]; + npts++; + } + } + return ref Stroke(npts, pts[0:npts], 0, 0); +} + +abs(a: int): int +{ + if(a < 0) + return -a; + return a; +} + +# Code from Joseph Hall (jnhall@sat.mot.com). +sqrt(n: int): int +{ + nn := n; + k0 := 2; + for(i := n; i > 0; i >>= 2) + k0 <<= 1; + nn <<= 2; + k1: int; + for(;;){ + k1 = (nn / k0 + k0) >> 1; + if(((k0 ^ k1) & ~1) == 0) + break; + k0 = k1; + } + return (k1 + 1) >> 1; +} + +# Helper routines from Mark Hayter. +likeatan(tantop: int, tanbot: int): int +{ + # Use tan(theta)=top/bot -. order for t + # t in range 0..16r40000 + + if(tantop == 0 && tantop == 0) + return 0; + t := (tantop << 16) / (abs(tantop) + abs(tanbot)); + if(tanbot < 0) + t = 16r20000 - t; + else if(tantop < 0) + t = 16r40000 + t; + return t; +} + +quadr(t: int): int +{ + return (8 - (((t + 16r4000) >> 15) & 7)) & 7; +} + +printpoints(fd: ref Sys->FD, pts: ref Stroke, sep: string) +{ + for(j := 0; j < pts.npts; j++){ + p := pts.pts[j]; + sys->fprint(fd, " (%d %d) %ud%s", p.x, p.y, pts.pts[j].chaincode, sep); + } +} diff --git a/appl/lib/strokes/writestrokes.b b/appl/lib/strokes/writestrokes.b new file mode 100644 index 00000000..f5232466 --- /dev/null +++ b/appl/lib/strokes/writestrokes.b @@ -0,0 +1,68 @@ +implement Writestrokes; + +# +# write structures to classifier files +# + +include "sys.m"; + sys: Sys; + +include "bufio.m"; + bufio: Bufio; + Iobuf: import bufio; + +include "strokes.m"; + strokes: Strokes; + Penpoint, Stroke: import strokes; + +init(s: Strokes) +{ + sys = load Sys Sys->PATH; + bufio = load Bufio Bufio->PATH; + strokes = s; +} + +write_examples(fd: ref Sys->FD, names: array of string, examples: array of list of ref Stroke): string +{ + fp := bufio->fopen(fd, Bufio->OWRITE); + nclass := len names; + fp.puts(sys->sprint("%d\n", nclass)); + for(i := 0; i < nclass; i++){ + exl := examples[i]; + fp.puts(sys->sprint("%d %s\n", len exl, names[i])); + for(; exl != nil; exl = tl exl){ + putpoints(fp, hd exl); + fp.putc('\n'); + } + } + if(fp.flush() == Bufio->ERROR) + return sys->sprint("write error: %r"); + fp.close(); + return nil; +} + +write_digest(fd: ref Sys->FD, cnames: array of string, dompts: array of ref Stroke): string +{ + fp := bufio->fopen(fd, Bufio->OWRITE); + n := len cnames; + for(i := 0; i < n; i++){ + d := dompts[i]; + npts := d.npts; + fp.puts(cnames[i]); + putpoints(fp, d); + fp.putc('\n'); + } + if(fp.flush() == Bufio->ERROR) + return sys->sprint("write error: %r"); + fp.close(); + return nil; +} + +putpoints(fp: ref Iobuf, d: ref Stroke) +{ + fp.puts(sys->sprint(" %d", d.npts)); + for(j := 0; j < d.npts; j++){ + p := d.pts[j]; + fp.puts(sys->sprint(" %d %d", p.x, p.y)); + } +} |
