diff options
Diffstat (limited to 'utils/sqz/sqz.c')
| -rw-r--r-- | utils/sqz/sqz.c | 528 |
1 files changed, 528 insertions, 0 deletions
diff --git a/utils/sqz/sqz.c b/utils/sqz/sqz.c new file mode 100644 index 00000000..cf399e45 --- /dev/null +++ b/utils/sqz/sqz.c @@ -0,0 +1,528 @@ +#include <lib9.h> +#include <a.out.h> +#include "squeeze.h" + +/* + * forsyth@vitanuova.com + */ + +typedef struct Word Word; +struct Word { + ulong v; + ushort freq; + ushort code; + Word* next; +}; + +typedef struct Squeeze Squeeze; +struct Squeeze { + int n; + /*union {*/ + ulong tab[7*256]; + Word* rep[7*256]; + /*};*/ +}; + +enum { + HMASK = 0xFFFF, + HSIZE = HMASK+1, + + Codebufsize = 3*1024*1024 +}; + +#define GET4(p) (((((((p)[0]<<8)|(p)[1])<<8)|(p)[2])<<8)|(p)[3]) +#define GET4L(p) (((((((p)[3]<<8)|(p)[2])<<8)|(p)[1])<<8)|(p)[0]) +#define PUT4(p,v) (((p)[0]=(v)>>24),((p)[1]=(v)>>16),((p)[2]=(v)>>8),((p)[3]=(v))) + +static uchar prog[Codebufsize]; +static uchar outbuf[Codebufsize]; +static Word* hash1[HSIZE]; +static Word* hash2[HSIZE]; +static Sqhdr sqhdr; +static ulong chksum; + +static int aflag; /* all: both text (squeezed) and data (not) */ +static int dflag; /* squeeze data, not text */ +static int tflag; /* squeeze text, leave data as-is */ +static int qflag = 1; /* enable powerpc option */ +static int wflag; /* write output */ +static int zflag; /* use top==0 for data segment */ +static int islittle; /* object code uses little-endian byte order */ +static int debug; +static char* fname; + +static void analyse(ulong*, int, Squeeze*, Squeeze*, Word**); +static Word** collate(Word**, int); +static void dumpsq(Squeeze*, int); +static void freehash(Word**); +static long Read(int, void*, long); +static void remap(Squeeze*); +static int squeeze(ulong*, int, uchar*, ulong); +static int squeezetab(int, int, Squeeze*, Word**, int); +static void squirt(int, Squeeze*); +static void Write(int, void*, long); + +static void +usage(void) +{ + fprint(2, "Usage: sqz [-w] [-t] [-d] [-q] q.out\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int fd, n, ns, nst, nsd; + long txtlen, datlen, asis; + ulong topdat, toptxt; + Exec ex; + Squeeze sq3, sq4, sq5, sq6; + Word *top; + + setbinmode(); +/* fmtinstall('f', gfltconv); */ + ARGBEGIN{ + case 'D': + debug++; + break; + case 'd': + dflag++; + break; + case 'q': + qflag = 0; + break; + case 't': + tflag++; + break; + case 'w': + wflag++; + break; + default: + usage(); + }ARGEND + fname = *argv; + if(fname == nil) + usage(); + fd = open(fname, OREAD); + if(fd < 0){ + fprint(2, "sqz: can't open %s: %r\n", fname); + exits("open"); + } + Read(fd, &ex, sizeof(Exec)); + txtlen = GET4((uchar*)&ex.text); + datlen = GET4((uchar*)&ex.data); + switch(GET4((uchar*)&ex.magic)){ + case Q_MAGIC: /* powerpc */ + islittle = 0; + break; + case E_MAGIC: /* arm */ + islittle = 1; + qflag = 0; + break; + case 0xA0E1: /* arm AIF */ + islittle = 1; + qflag = 0; + txtlen = GET4L((uchar*)&ex+(5*4))-sizeof(Exec); + datlen = GET4L((uchar*)&ex+(6*4)); + break; + default: + fprint(2, "sqz: unknown magic for sqz: %8.8ux\n", GET4((uchar*)&ex.magic)); + exits("bad magic"); + } + if(qflag) + fprint(2, "PowerPC rules\n"); + if(islittle) + fprint(2, "Little endian\n"); + if(txtlen > sizeof(prog) || datlen > sizeof(prog) || txtlen+datlen > sizeof(prog)){ + fprint(2, "sqz: executable too big: %lud+%lud; increase Codebufsize in sqz.c\n", txtlen, datlen); + exits("size"); + } + if(dflag){ + seek(fd, txtlen, 1); + Read(fd, prog, datlen); + }else{ + Read(fd, prog, txtlen); + Read(fd, prog+txtlen, datlen); + } + close(fd); + asis = 0; + if(dflag) + n = datlen; + else if(tflag){ + n = txtlen; + asis = datlen; + }else + n = txtlen+datlen; + if(dflag || tflag){ + analyse((ulong*)prog, n/4, &sq3, &sq4, &top); + nst = squeeze((ulong*)prog, n/4, outbuf, top->v); + if(nst < 0) + exits("sqz"); + nsd = 0; + remap(&sq3); + remap(&sq4); + toptxt = topdat = top->v; + }else{ + analyse((ulong*)prog, txtlen/4, &sq3, &sq4, &top); + nst = squeeze((ulong*)prog, txtlen/4, outbuf, top->v); + if(nst < 0) + exits("sqz"); + toptxt = top->v; + remap(&sq3); + remap(&sq4); + if(datlen/4){ + freehash(hash1); + freehash(hash2); + analyse((ulong*)(prog+txtlen), datlen/4, &sq5, &sq6, &top); + nsd = squeeze((ulong*)(prog+txtlen), datlen/4, outbuf+nst, top->v); + if(nsd < 0) + exits("sqz"); + topdat = top->v; + remap(&sq5); + remap(&sq6); + }else{ + nsd = 0; + topdat = 0; + } + } + ns = nst+nsd; + fprint(2, "%d/%d bytes\n", ns, n); + fprint(2, "%8.8lux csum\n", chksum); + if(!wflag) + exits(0); + PUT4(sqhdr.magic, SQMAGIC); + PUT4(sqhdr.toptxt, toptxt); + PUT4(sqhdr.sum, chksum); + PUT4(sqhdr.text, nst); + PUT4(sqhdr.topdat, topdat); + PUT4(sqhdr.data, nsd); + PUT4(sqhdr.asis, asis); + PUT4(sqhdr.flags, 0); + Write(1, &sqhdr, SQHDRLEN); + Write(1, &ex, sizeof(Exec)); + squirt(1, &sq3); + squirt(1, &sq4); + Write(1, outbuf, nst); + if(nsd){ + squirt(1, &sq5); + squirt(1, &sq6); + Write(1, outbuf+nst, nsd); + } + if(asis) + Write(1, prog+txtlen, asis); + exits(0); +} + +static void +analyse(ulong *prog, int nw, Squeeze *sq3, Squeeze *sq4, Word **top) +{ + Word *w, **hp, **sorts, **resorts; + ulong *rp, *ep; + ulong v; + int i, nv1, nv2, nv, nz; + + rp = prog; + ep = prog+nw; + nv = 0; + nz = 0; + while(rp < ep){ + if(islittle){ + v = GET4L((uchar*)rp); + }else{ + v = GET4((uchar*)rp); + } + rp++; + chksum += v; + if(v == 0){ + nz++; + if(0) + continue; + } + if(qflag){ + QREMAP(v); + } + for(hp = &hash1[v&HMASK]; (w = *hp) != nil; hp = &w->next) + if(w->v == v) + break; + if(w == nil){ + w = (Word*)malloc(sizeof(*w)); + w->v = v; + w->freq = 0; + w->code = 0; + w->next = nil; + *hp = w; + nv++; + } + w->freq++; + } + sorts = collate(hash1, nv); + fprint(2, "phase 1: %d/%d words (%d zero), %d top (%8.8lux)\n", nv, nw, nz, sorts[0]->freq, sorts[0]->v); + *top = sorts[0]; + nv1 = squeezetab(1, 0x900, sq3, sorts+1, nv-1)+1; + nv2 = 0; + for(i=nv1; i<nv; i++){ + v = sorts[i]->v >> 8; + for(hp = &hash2[v&HMASK]; (w = *hp) != nil; hp = &w->next) + if(w->v == v) + break; + if(w == nil){ + w = (Word*)malloc(sizeof(*w)); + w->v = v; + w->freq = 0; + w->code = 0; + w->next = nil; + *hp = w; + nv2++; + } + w->freq++; + } + free(sorts); + resorts = collate(hash2, nv2); + fprint(2, "phase 2: %d/%d\n", nv2, nv-nv1); + squeezetab(2, 0x200, sq4, resorts, nv2); + free(resorts); + fprint(2, "phase 3: 1 4-code, %d 12-codes, %d 20-codes, %d uncoded\n", + sq3->n, sq4->n, nv-(sq3->n+sq4->n+1)); +} + +static int +wdcmp(const void *a, const void *b) +{ + return (*(Word**)b)->freq - (*(Word**)a)->freq; +} + +static Word ** +collate(Word **tab, int nv) +{ + Word *w, **hp, **sorts; + int i; + + sorts = (Word**)malloc(nv*sizeof(Word**)); + i = 0; + for(hp = &tab[0]; hp < &tab[HSIZE]; hp++) + for(w = *hp; w != nil; w = w->next) + sorts[i++] = w; + qsort(sorts, nv, sizeof(*sorts), wdcmp); + if(debug > 1) + for(i=0; i<nv; i++) + fprint(2, "%d\t%d\t%8.8lux\n", i, sorts[i]->freq, sorts[i]->v); + return sorts; +} + +static int +tabcmp(const void *a, const void *b) +{ + ulong av, bv; + + av = (*(Word**)a)->v; + bv = (*(Word**)b)->v; + if(av > bv) + return 1; + if(av < bv) + return -1; + return 0; +} + +static int +squeezetab(int tabno, int base, Squeeze *sq, Word **sorts, int nv) +{ + int i; + + if(nv >= 7*256) + nv = 7*256; + memset(sq, 0, sizeof(*sq)); + for(i=0; i<nv; i++) + sq->rep[sq->n++] = sorts[i]; + qsort(sq->rep, sq->n, sizeof(*sq->rep), tabcmp); + for(i=0; i<sq->n; i++) + sq->rep[i]->code = base + i; + if(debug) + dumpsq(sq, tabno); + return sq->n; +} + +static void +dumpsq(Squeeze *sq, int n) +{ + int i; + + fprint(2, "table %d: %d entries\n", n, sq->n); + for(i=0; i<sq->n; i++) + fprint(2, "%.3x\t%8.8lux\t%lux\n", sq->rep[i]->code, sq->rep[i]->v, i? sq->rep[i]->v - sq->rep[i-1]->v: 0); +} + +static void +remap(Squeeze *sq) +{ + int i; + ulong v; + + if(sq->n){ + v = 0; + for(i=0; i<sq->n; i++){ + sq->tab[i] = sq->rep[i]->v - v; + v += sq->tab[i]; + } + } +} + +static Word * +squash(Word **tab, ulong v) +{ + Word *w, **hp; + + for(hp = &tab[v&0xFFFF]; (w = *hp) != nil; hp = &w->next) + if(w->v == v) + return w; + return nil; +} + +static void +freehash(Word **tab) +{ + Word *w, **hp; + + for(hp = &tab[0]; hp < &tab[HSIZE]; hp++) + while((w = *hp) != nil){ + *hp = w->next; + free(w); + } +} + +static int +squeeze(ulong *prog, int nw, uchar *out, ulong top) +{ + ulong *rp, *ep; + ulong v, bits; + ulong e1, e2, e3, e4; + Word *w; + uchar bytes[8], *bp, *wp; + int ctl, n; + + rp = prog; + ep = prog+nw; + bits = 0; + e1 = e2 = e3 = e4 = 0; + wp = out; + n = 0; + ctl = 0; + bp = bytes; + for(;;){ + if(n == 2){ + *wp++ = ctl; + if(0) + fprint(2, "%x\n", ctl); + memmove(wp, bytes, bp-bytes); + wp += bp-bytes; + bp = bytes; + ctl = 0; + n = 0; + } + ctl <<= 4; + n++; + if(rp >= ep){ + if(n == 1) + break; + continue; + } + if(islittle){ + v = GET4L((uchar*)rp); + }else{ + v = GET4((uchar*)rp); + } + rp++; + if(qflag){ + QREMAP(v); + } + if(v == top){ + e1++; + bits += 4; + ctl |= 0; + continue; + } + w = squash(hash1, v); + if(w && w->code){ + e2++; + bits += 4+8; + ctl |= w->code>>8; + *bp++ = w->code; + continue; + } + w = squash(hash2, v>>8); + if(w && w->code){ + e3++; + bits += 4+8+8; + ctl |= w->code>>8; + *bp++ = w->code; + *bp++ = v & 0xFF; + if(debug > 2) + fprint(2, "%x %8.8lux %8.8lux\n", w->code, w->v, v); + continue; + } + e4++; + bits += 4+32; + ctl |= 0x1; + bp[0] = v; + bp[1] = v>>8; + bp[2] = v>>16; + bp[3] = v>>24; + bp += 4; + } + fprint(2, "enc: %lud 4-bits, %lud 12-bits %lud 20-bits %lud 36-bits -- %ld bytes\n", + e1, e2, e3, e4, wp-out); + return wp-out; +} + +static void +squirt(int fd, Squeeze *sq) +{ + uchar b[7*256*5 + 2], rep[5], *p, *q; + ulong v; + int i; + + p = b+2; + for(i=0; i<sq->n; i++){ + v = sq->tab[i]; + q = rep; + do { + *q++ = v & 0x7F; + }while((v >>= 7) != 0); + do { + *p++ = *--q | 0x80; + }while(q != rep); + p[-1] &= ~0x80; + } + if(p > b+sizeof(b)) + abort(); + i = p-b; + b[0] = i>>8; + b[1] = i; + Write(fd, b, i); + fprint(2, "table: %d/%d\n", i, (sq->n+1)*4); +} + +static long +Read(int fd, void *buf, long nb) +{ + long n; + + n = read(fd, buf, nb); + if(n < 0){ + fprint(2, "sqz: %s: read error: %r\n", fname); + exits("read"); + } + if(n < nb){ + fprint(2, "sqz: %s: unexpected end-of-file\n", fname); + exits("read"); + } + return n; +} + +static void +Write(int fd, void *buf, long nb) +{ + if(write(fd, buf, nb) != nb){ + fprint(2, "sqz: write error: %r\n"); + exits("write err"); + } +} |
