diff options
Diffstat (limited to 'os/ip')
| -rw-r--r-- | os/ip/arp.c | 681 | ||||
| -rw-r--r-- | os/ip/bootp.c | 231 | ||||
| -rw-r--r-- | os/ip/compress.c | 520 | ||||
| -rw-r--r-- | os/ip/devip.c | 1419 | ||||
| -rw-r--r-- | os/ip/dhcp.c | 447 | ||||
| -rw-r--r-- | os/ip/eipconvtest.c | 152 | ||||
| -rw-r--r-- | os/ip/esp.c | 866 | ||||
| -rw-r--r-- | os/ip/ethermedium.c | 792 | ||||
| -rw-r--r-- | os/ip/gre.c | 282 | ||||
| -rw-r--r-- | os/ip/icmp.c | 490 | ||||
| -rw-r--r-- | os/ip/icmp6.c | 917 | ||||
| -rw-r--r-- | os/ip/igmp.c | 291 | ||||
| -rw-r--r-- | os/ip/ihbootp.c | 323 | ||||
| -rw-r--r-- | os/ip/il.c | 1408 | ||||
| -rw-r--r-- | os/ip/ip.c | 805 | ||||
| -rw-r--r-- | os/ip/ip.h | 673 | ||||
| -rw-r--r-- | os/ip/ipaux.c | 730 | ||||
| -rw-r--r-- | os/ip/ipifc.c | 1721 | ||||
| -rw-r--r-- | os/ip/ipmux.c | 857 | ||||
| -rw-r--r-- | os/ip/iproute.c | 852 | ||||
| -rw-r--r-- | os/ip/iprouter.c | 56 | ||||
| -rw-r--r-- | os/ip/ipv6.c | 747 | ||||
| -rw-r--r-- | os/ip/ipv6.h | 185 | ||||
| -rw-r--r-- | os/ip/kernel.h | 10 | ||||
| -rw-r--r-- | os/ip/loopbackmedium.c | 121 | ||||
| -rw-r--r-- | os/ip/netdevmedium.c | 153 | ||||
| -rw-r--r-- | os/ip/netlog.c | 263 | ||||
| -rw-r--r-- | os/ip/nullmedium.c | 39 | ||||
| -rw-r--r-- | os/ip/pktmedium.c | 79 | ||||
| -rw-r--r-- | os/ip/plan9.c | 36 | ||||
| -rw-r--r-- | os/ip/ppp.c | 1656 | ||||
| -rw-r--r-- | os/ip/ppp.h | 258 | ||||
| -rw-r--r-- | os/ip/pppmedium.c | 192 | ||||
| -rw-r--r-- | os/ip/ptclbsum.c | 72 | ||||
| -rw-r--r-- | os/ip/rudp.c | 1085 | ||||
| -rw-r--r-- | os/ip/tcp.c | 3177 | ||||
| -rw-r--r-- | os/ip/udp.c | 649 |
37 files changed, 23235 insertions, 0 deletions
diff --git a/os/ip/arp.c b/os/ip/arp.c new file mode 100644 index 00000000..11f4fb1e --- /dev/null +++ b/os/ip/arp.c @@ -0,0 +1,681 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +/* + * address resolution tables + */ + +enum +{ + NHASH = (1<<6), + NCACHE = 256, + + AOK = 1, + AWAIT = 2, +}; + +char *arpstate[] = +{ + "UNUSED", + "OK", + "WAIT", +}; + +/* + * one per Fs + */ +struct Arp +{ + QLock; + Fs *f; + Arpent *hash[NHASH]; + Arpent cache[NCACHE]; + Arpent *rxmt; + Proc *rxmitp; /* neib sol re-transmit proc */ + Rendez rxmtq; + Block *dropf, *dropl; +}; + +char *Ebadarp = "bad arp"; + +#define haship(s) ((s)[IPaddrlen-1]%NHASH) + +extern int ReTransTimer = RETRANS_TIMER; +static void rxmitproc(void *v); + +void +arpinit(Fs *f) +{ + f->arp = smalloc(sizeof(Arp)); + f->arp->f = f; + f->arp->rxmt = nil; + f->arp->dropf = f->arp->dropl = nil; + kproc("rxmitproc", rxmitproc, f->arp, 0); +} + +/* + * create a new arp entry for an ip address. + */ +static Arpent* +newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt) +{ + uint t; + Block *next, *xp; + Arpent *a, *e, *f, **l; + Medium *m = ifc->m; + int empty; + + /* find oldest entry */ + e = &arp->cache[NCACHE]; + a = arp->cache; + t = a->utime; + for(f = a; f < e; f++){ + if(f->utime < t){ + t = f->utime; + a = f; + } + } + + /* dump waiting packets */ + xp = a->hold; + a->hold = nil; + + if(isv4(a->ip)){ + while(xp){ + next = xp->list; + freeblist(xp); + xp = next; + } + } + else { // queue icmp unreachable for rxmitproc later on, w/o arp lock + if(xp){ + if(arp->dropl == nil) + arp->dropf = xp; + else + arp->dropl->list = xp; + + for(next = xp->list; next; next = next->list) + xp = next; + arp->dropl = xp; + wakeup(&arp->rxmtq); + } + } + + /* take out of current chain */ + l = &arp->hash[haship(a->ip)]; + for(f = *l; f; f = f->hash){ + if(f == a){ + *l = a->hash; + break; + } + l = &f->hash; + } + + /* insert into new chain */ + l = &arp->hash[haship(ip)]; + a->hash = *l; + *l = a; + + memmove(a->ip, ip, sizeof(a->ip)); + a->utime = NOW; + a->ctime = 0; + a->type = m; + + a->rtime = NOW + ReTransTimer; + a->rxtsrem = MAX_MULTICAST_SOLICIT; + a->ifc = ifc; + a->ifcid = ifc->ifcid; + + /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */ + if(!ipismulticast(a->ip) && addrxt){ + l = &arp->rxmt; + empty = (*l==nil); + + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + for(f = *l; f; f = f->nextrxt){ + l = &f->nextrxt; + } + *l = a; + if(empty) + wakeup(&arp->rxmtq); + } + + a->nextrxt = nil; + + return a; +} + +/* called with arp qlocked */ + +void +cleanarpent(Arp *arp, Arpent *a) +{ + Arpent *f, **l; + + a->utime = 0; + a->ctime = 0; + a->type = 0; + a->state = 0; + + /* take out of current chain */ + l = &arp->hash[haship(a->ip)]; + for(f = *l; f; f = f->hash){ + if(f == a){ + *l = a->hash; + break; + } + l = &f->hash; + } + + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + a->nextrxt = nil; + a->hash = nil; + a->hold = nil; + a->last = nil; + a->ifc = nil; +} + +/* + * fill in the media address if we have it. Otherwise return an + * Arpent that represents the state of the address resolution FSM + * for ip. Add the packet to be sent onto the list of packets + * waiting for ip->mac to be resolved. + */ +Arpent* +arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac) +{ + int hash; + Arpent *a; + Medium *type = ifc->m; + uchar v6ip[IPaddrlen]; + + if(version == V4){ + v4tov6(v6ip, ip); + ip = v6ip; + } + + qlock(arp); + hash = haship(ip); + for(a = arp->hash[hash]; a; a = a->hash){ + if(memcmp(ip, a->ip, sizeof(a->ip)) == 0) + if(type == a->type) + break; + } + + if(a == nil){ + a = newarp6(arp, ip, ifc, (version != V4)); + a->state = AWAIT; + } + a->utime = NOW; + if(a->state == AWAIT){ + if(bp != nil){ + if(a->hold) + a->last->list = bp; + else + a->hold = bp; + a->last = bp; + bp->list = nil; + } + return a; /* return with arp qlocked */ + } + + memmove(mac, a->mac, a->type->maclen); + + /* remove old entries */ + if(NOW - a->ctime > 15*60*1000) + cleanarpent(arp, a); + + qunlock(arp); + return nil; +} + +/* + * called with arp locked + */ +void +arprelease(Arp *arp, Arpent*) +{ + qunlock(arp); +} + +/* + * Copy out the mac address from the Arpent. Return the + * block waiting to get sent to this mac address. + * + * called with arp locked + */ +Block* +arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac) +{ + Block *bp; + Arpent *f, **l; + + if(!isv4(a->ip)){ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + } + + memmove(a->mac, mac, type->maclen); + a->type = type; + a->state = AOK; + a->utime = NOW; + bp = a->hold; + a->hold = nil; + qunlock(arp); + + return bp; +} + +void +arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh) +{ + Arp *arp; + Route *r; + Arpent *a, *f, **l; + Ipifc *ifc; + Medium *type; + Block *bp, *next; + uchar v6ip[IPaddrlen]; + + arp = fs->arp; + + if(n != 6){ +// print("arp: len = %d\n", n); + return; + } + + switch(version){ + case V4: + r = v4lookup(fs, ip, nil); + v4tov6(v6ip, ip); + ip = v6ip; + break; + case V6: + r = v6lookup(fs, ip, nil); + break; + default: + panic("arpenter: version %d", version); + return; /* to supress warnings */ + } + + if(r == nil){ +// print("arp: no route for entry\n"); + return; + } + + ifc = r->ifc; + type = ifc->m; + + qlock(arp); + for(a = arp->hash[haship(ip)]; a; a = a->hash){ + if(a->type != type || (a->state != AWAIT && a->state != AOK)) + continue; + + if(ipcmp(a->ip, ip) == 0){ + a->state = AOK; + memmove(a->mac, mac, type->maclen); + + if(version == V6){ + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + } + + a->ifc = ifc; + a->ifcid = ifc->ifcid; + bp = a->hold; + a->hold = nil; + if(version == V4) + ip += IPv4off; + a->utime = NOW; + a->ctime = a->utime; + qunlock(arp); + + while(bp){ + next = bp->list; + if(ifc != nil){ + if(waserror()){ + runlock(ifc); + nexterror(); + } + rlock(ifc); + if(ifc->m != nil) + ifc->m->bwrite(ifc, bp, version, ip); + else + freeb(bp); + runlock(ifc); + poperror(); + } else + freeb(bp); + bp = next; + } + return; + } + } + + if(refresh == 0){ + a = newarp6(arp, ip, ifc, 0); + a->state = AOK; + a->type = type; + a->ctime = NOW; + memmove(a->mac, mac, type->maclen); + } + + qunlock(arp); +} + +int +arpwrite(Fs *fs, char *s, int len) +{ + int n; + Route *r; + Arp *arp; + Block *bp; + Arpent *a, *fl, **l; + Medium *m; + char *f[4], buf[256]; + uchar ip[IPaddrlen], mac[MAClen]; + + arp = fs->arp; + + if(len == 0) + error(Ebadarp); + if(len >= sizeof(buf)) + len = sizeof(buf)-1; + strncpy(buf, s, len); + buf[len] = 0; + if(len > 0 && buf[len-1] == '\n') + buf[len-1] = 0; + + n = getfields(buf, f, 4, 1, " "); + if(strcmp(f[0], "flush") == 0){ + qlock(arp); + for(a = arp->cache; a < &arp->cache[NCACHE]; a++){ + memset(a->ip, 0, sizeof(a->ip)); + memset(a->mac, 0, sizeof(a->mac)); + a->hash = nil; + a->state = 0; + a->utime = 0; + while(a->hold != nil){ + bp = a->hold->list; + freeblist(a->hold); + a->hold = bp; + } + } + memset(arp->hash, 0, sizeof(arp->hash)); +// clear all pkts on these lists (rxmt, dropf/l) + arp->rxmt = nil; + arp->dropf = nil; + arp->dropl = nil; + qunlock(arp); + } else if(strcmp(f[0], "add") == 0){ + switch(n){ + default: + error(Ebadarg); + case 3: + parseip(ip, f[1]); + if(isv4(ip)) + r = v4lookup(fs, ip+IPv4off, nil); + else + r = v6lookup(fs, ip, nil); + if(r == nil) + error("Destination unreachable"); + m = r->ifc->m; + n = parsemac(mac, f[2], m->maclen); + break; + case 4: + m = ipfindmedium(f[1]); + if(m == nil) + error(Ebadarp); + parseip(ip, f[2]); + n = parsemac(mac, f[3], m->maclen); + break; + } + + if(m->ares == nil) + error(Ebadarp); + + m->ares(fs, V6, ip, mac, n, 0); + } else if(strcmp(f[0], "del") == 0){ + if(n != 2) + error(Ebadarg); + + parseip(ip, f[1]); + qlock(arp); + + l = &arp->hash[haship(ip)]; + for(a = *l; a; a = a->hash){ + if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){ + *l = a->hash; + break; + } + l = &a->hash; + } + + if(a){ + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(fl = *l; fl; fl = fl->nextrxt){ + if(fl == a){ + *l = a->nextrxt; + break; + } + l = &fl->nextrxt; + } + + a->nextrxt = nil; + a->hash = nil; + a->hold = nil; + a->last = nil; + a->ifc = nil; + memset(a->ip, 0, sizeof(a->ip)); + memset(a->mac, 0, sizeof(a->mac)); + } + qunlock(arp); + } else + error(Ebadarp); + + return len; +} + +enum +{ + Alinelen= 90, +}; + +char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n"; + +static void +convmac(char *p, uchar *mac, int n) +{ + while(n-- > 0) + p += sprint(p, "%2.2ux", *mac++); +} + +int +arpread(Arp *arp, char *p, ulong offset, int len) +{ + Arpent *a; + int n; + char mac[2*MAClen+1]; + + if(offset % Alinelen) + return 0; + + offset = offset/Alinelen; + len = len/Alinelen; + + n = 0; + for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){ + if(a->state == 0) + continue; + if(offset > 0){ + offset--; + continue; + } + len--; + qlock(arp); + convmac(mac, a->mac, a->type->maclen); + n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac); + qunlock(arp); + } + + return n; +} + +extern int +rxmitsols(Arp *arp) +{ + uint sflag; + Block *next, *xp; + Arpent *a, *b, **l; + Fs *f; + uchar ipsrc[IPaddrlen]; + Ipifc *ifc = nil; + long nrxt; + + qlock(arp); + f = arp->f; + + a = arp->rxmt; + if(a==nil){ + nrxt = 0; + goto dodrops; //return nrxt; + } + nrxt = a->rtime - NOW; + if(nrxt > 3*ReTransTimer/4) + goto dodrops; //return nrxt; + + for(; a; a = a->nextrxt){ + ifc = a->ifc; + assert(ifc != nil); + if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){ + xp = a->hold; + a->hold = nil; + + if(xp){ + if(arp->dropl == nil) + arp->dropf = xp; + else + arp->dropl->list = xp; + } + + cleanarpent(arp, a); + } + else + break; + } + if(a == nil) + goto dodrops; + + + qunlock(arp); /* for icmpns */ + if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) + icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); + + runlock(ifc); + qlock(arp); + + /* put to the end of re-transmit chain */ + l = &arp->rxmt; + for(b = *l; b; b = b->nextrxt){ + if(b == a){ + *l = a->nextrxt; + break; + } + l = &b->nextrxt; + } + for(b = *l; b; b = b->nextrxt){ + l = &b->nextrxt; + } + *l = a; + a->rxtsrem--; + a->nextrxt = nil; + a->rtime = NOW + ReTransTimer; + + a = arp->rxmt; + if(a==nil) + nrxt = 0; + else + nrxt = a->rtime - NOW; + +dodrops: + xp = arp->dropf; + arp->dropf = nil; + arp->dropl = nil; + qunlock(arp); + + for(; xp; xp = next){ + next = xp->list; + icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1); + } + + return nrxt; + +} + +static int +rxready(void *v) +{ + Arp *arp = (Arp *) v; + int x; + + x = ((arp->rxmt != nil) || (arp->dropf != nil)); + + return x; +} + +static void +rxmitproc(void *v) +{ + Arp *arp = v; + long wakeupat; + + arp->rxmitp = up; + //print("arp rxmitproc started\n"); + if(waserror()){ + arp->rxmitp = 0; + pexit("hangup", 1); + } + for(;;){ + wakeupat = rxmitsols(arp); + if(wakeupat == 0) + sleep(&arp->rxmtq, rxready, v); + else if(wakeupat > ReTransTimer/4) + tsleep(&arp->rxmtq, return0, 0, wakeupat); + } +} + diff --git a/os/ip/bootp.c b/os/ip/bootp.c new file mode 100644 index 00000000..b7d3fcda --- /dev/null +++ b/os/ip/bootp.c @@ -0,0 +1,231 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kernel.h" +#include "ip.h" + +static ulong fsip; +static ulong auip; +static ulong gwip; +static ulong ipmask; +static ulong ipaddr; + +enum +{ + Bootrequest = 1, + Bootreply = 2, +}; + +typedef struct Bootp +{ + /* udp.c oldheader */ + uchar raddr[IPaddrlen]; + uchar laddr[IPaddrlen]; + uchar rport[2]; + uchar lport[2]; + /* bootp itself */ + uchar op; /* opcode */ + uchar htype; /* hardware type */ + uchar hlen; /* hardware address len */ + uchar hops; /* hops */ + uchar xid[4]; /* a random number */ + uchar secs[2]; /* elapsed snce client started booting */ + uchar pad[2]; + uchar ciaddr[4]; /* client IP address (client tells server) */ + uchar yiaddr[4]; /* client IP address (server tells client) */ + uchar siaddr[4]; /* server IP address */ + uchar giaddr[4]; /* gateway IP address */ + uchar chaddr[16]; /* client hardware address */ + uchar sname[64]; /* server host name (optional) */ + uchar file[128]; /* boot file name */ + uchar vend[128]; /* vendor-specific goo */ +} Bootp; + +/* + * bootp returns: + * + * "fsip d.d.d.d + * auip d.d.d.d + * gwip d.d.d.d + * ipmask d.d.d.d + * ipaddr d.d.d.d" + * + * where d.d.d.d is the IP address in dotted decimal notation, and each + * address is followed by a newline. + */ + +static Bootp req; +static Proc* rcvprocp; +static int recv; +static int done; +static Rendez bootpr; +static char rcvbuf[512+2*IPaddrlen+2*2]; + +static void +rcvbootp(void *a) +{ + int n, fd; + Bootp *rp; + char *field[4]; + uchar ip[IPaddrlen]; + + if(waserror()) + pexit("", 0); + rcvprocp = up; /* store for postnote below */ + fd = (int)a; + while(done == 0) { + n = kread(fd, rcvbuf, sizeof(rcvbuf)); + if(n <= 0) + break; + rp = (Bootp*)rcvbuf; + /* currently ignore udp's header */ + if(memcmp(req.chaddr, rp->chaddr, 6) == 0 + && rp->htype == 1 && rp->hlen == 6 + && getfields((char*)rp->vend+4, field, 4, 1, " ") == 4 + && strncmp((char*)rp->vend, "p9 ", 4) == 0){ + if(ipaddr == 0) + ipaddr = nhgetl(rp->yiaddr); + if(ipmask == 0) + ipmask = parseip(ip, field[0]); + if(fsip == 0) + fsip = parseip(ip, field[1]); + if(auip == 0) + auip = parseip(ip, field[2]); + if(gwip == 0) + gwip = parseip(ip, field[3]); + break; + } + } + poperror(); + rcvprocp = nil; + + recv = 1; + wakeup(&bootpr); + pexit("", 0); +} + +static char* +rbootp(Ipifc *ifc) +{ + int cfd, dfd, tries, n; + char ia[5+3*24], im[16], *av[3]; + uchar nipaddr[4], ngwip[4], nipmask[4]; + char dir[Maxpath]; + + av[1] = "0.0.0.0"; + av[2] = "0.0.0.0"; + ipifcadd(ifc, av, 3, 0, nil); + + cfd = kannounce("udp!*!68", dir); + if(cfd < 0) + return "bootp announce failed"; + strcat(dir, "/data"); + if(kwrite(cfd, "headers", 7) < 0){ + kclose(cfd); + return "bootp ctl headers failed"; + } + kwrite(cfd, "oldheaders", 10); + dfd = kopen(dir, ORDWR); + if(dfd < 0){ + kclose(cfd); + return "bootp open data failed"; + } + kclose(cfd); + + + /* create request */ + memset(&req, 0, sizeof(req)); + ipmove(req.raddr, IPv4bcast); + hnputs(req.rport, 67); + req.op = Bootrequest; + req.htype = 1; /* ethernet (all we know) */ + req.hlen = 6; /* ethernet (all we know) */ + + /* Hardware MAC address */ + memmove(req.chaddr, ifc->mac, 6); + /* Fill in the local IP address if we know it */ + ipv4local(ifc, req.ciaddr); + memset(req.file, 0, sizeof(req.file)); + strcpy((char*)req.vend, "p9 "); + + done = 0; + recv = 0; + + kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG); + + /* + * broadcast bootp's till we get a reply, + * or fixed number of tries + */ + tries = 0; + while(recv == 0) { + if(kwrite(dfd, &req, sizeof(req)) < 0) + print("bootp: write: %s\n", commonerror()); + + tsleep(&bootpr, return0, 0, 1000); + if(++tries > 10) { + print("bootp: timed out\n"); + break; + } + } + kclose(dfd); + done = 1; + if(rcvprocp != nil){ + postnote(rcvprocp, 1, "timeout", 0); + rcvprocp = nil; + } + + av[1] = "0.0.0.0"; + av[2] = "0.0.0.0"; + ipifcrem(ifc, av, 3); + + hnputl(nipaddr, ipaddr); + sprint(ia, "%V", nipaddr); + hnputl(nipmask, ipmask); + sprint(im, "%V", nipmask); + av[1] = ia; + av[2] = im; + ipifcadd(ifc, av, 3, 0, nil); + + if(gwip != 0) { + hnputl(ngwip, gwip); + n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip); + routewrite(ifc->conv->p->f, nil, ia, n); + } + return nil; +} + +static int +rbootpread(char *bp, ulong offset, int len) +{ + int n; + char *buf; + uchar a[4]; + + buf = smalloc(READSTR); + if(waserror()){ + free(buf); + nexterror(); + } + hnputl(a, fsip); + n = snprint(buf, READSTR, "fsip %15V\n", a); + hnputl(a, auip); + n += snprint(buf + n, READSTR-n, "auip %15V\n", a); + hnputl(a, gwip); + n += snprint(buf + n, READSTR-n, "gwip %15V\n", a); + hnputl(a, ipmask); + n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a); + hnputl(a, ipaddr); + snprint(buf + n, READSTR-n, "ipaddr %15V\n", a); + + len = readstr(offset, bp, len, buf); + poperror(); + free(buf); + return len; +} + +char* (*bootp)(Ipifc*) = rbootp; +int (*bootpread)(char*, ulong, int) = rbootpread; diff --git a/os/ip/compress.c b/os/ip/compress.c new file mode 100644 index 00000000..0a7bd7a3 --- /dev/null +++ b/os/ip/compress.c @@ -0,0 +1,520 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ppp.h" + +typedef struct Iphdr Iphdr; +typedef struct Tcphdr Tcphdr; +typedef struct Ilhdr Ilhdr; +typedef struct Hdr Hdr; +typedef struct Tcpc Tcpc; + +struct Iphdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar cksum[2]; /* Header checksum */ + ulong src; /* Ip source (byte ordering unimportant) */ + ulong dst; /* Ip destination (byte ordering unimportant) */ +}; + +struct Tcphdr +{ + ulong ports; /* defined as a ulong to make comparisons easier */ + uchar seq[4]; + uchar ack[4]; + uchar flag[2]; + uchar win[2]; + uchar cksum[2]; + uchar urg[2]; +}; + +struct Ilhdr +{ + uchar sum[2]; /* Checksum including header */ + uchar len[2]; /* Packet length */ + uchar type; /* Packet type */ + uchar spec; /* Special */ + uchar src[2]; /* Src port */ + uchar dst[2]; /* Dst port */ + uchar id[4]; /* Sequence id */ + uchar ack[4]; /* Acked sequence */ +}; + +enum +{ + URG = 0x20, /* Data marked urgent */ + ACK = 0x10, /* Aknowledge is valid */ + PSH = 0x08, /* Whole data pipe is pushed */ + RST = 0x04, /* Reset connection */ + SYN = 0x02, /* Pkt. is synchronise */ + FIN = 0x01, /* Start close down */ + + IP_DF = 0x4000, /* Don't fragment */ + + IP_TCPPROTO = 6, + IP_ILPROTO = 40, + IL_IPHDR = 20, +}; + +struct Hdr +{ + uchar buf[128]; + Iphdr *ip; + Tcphdr *tcp; + int len; +}; + +struct Tcpc +{ + uchar lastrecv; + uchar lastxmit; + uchar basexmit; + uchar err; + uchar compressid; + Hdr t[MAX_STATES]; + Hdr r[MAX_STATES]; +}; + +enum +{ /* flag bits for what changed in a packet */ + NEW_U=(1<<0), /* tcp only */ + NEW_W=(1<<1), /* tcp only */ + NEW_A=(1<<2), /* il tcp */ + NEW_S=(1<<3), /* tcp only */ + NEW_P=(1<<4), /* tcp only */ + NEW_I=(1<<5), /* il tcp */ + NEW_C=(1<<6), /* il tcp */ + NEW_T=(1<<7), /* il only */ + TCP_PUSH_BIT = 0x10, +}; + +/* reserved, special-case values of above for tcp */ +#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */ +#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */ +#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U) + +int +encode(void *p, ulong n) +{ + uchar *cp; + + cp = p; + if(n >= 256 || n == 0) { + *cp++ = 0; + cp[0] = n >> 8; + cp[1] = n; + return 3; + } else + *cp = n; + return 1; +} + +#define DECODEL(f) { \ + if (*cp == 0) {\ + hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \ + cp += 3; \ + } else { \ + hnputl(f, nhgetl(f) + (ulong)*cp++); \ + } \ +} +#define DECODES(f) { \ + if (*cp == 0) {\ + hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \ + cp += 3; \ + } else { \ + hnputs(f, nhgets(f) + (ulong)*cp++); \ + } \ +} + +ushort +tcpcompress(Tcpc *comp, Block *b, Fs *) +{ + Iphdr *ip; /* current packet */ + Tcphdr *tcp; /* current pkt */ + ulong iplen, tcplen, hlen; /* header length in bytes */ + ulong deltaS, deltaA; /* general purpose temporaries */ + ulong changes; /* change mask */ + uchar new_seq[16]; /* changes from last to current */ + uchar *cp; + Hdr *h; /* last packet */ + int i, j; + + /* + * Bail if this is not a compressible TCP/IP packet + */ + ip = (Iphdr*)b->rp; + iplen = (ip->vihl & 0xf) << 2; + tcp = (Tcphdr*)(b->rp + iplen); + tcplen = (tcp->flag[0] & 0xf0) >> 2; + hlen = iplen + tcplen; + if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK) + return Pip; /* connection control */ + + /* + * Packet is compressible, look for a connection + */ + changes = 0; + cp = new_seq; + j = comp->lastxmit; + h = &comp->t[j]; + if(ip->src != h->ip->src || ip->dst != h->ip->dst + || tcp->ports != h->tcp->ports) { + for(i = 0; i < MAX_STATES; ++i) { + j = (comp->basexmit + i) % MAX_STATES; + h = &comp->t[j]; + if(ip->src == h->ip->src && ip->dst == h->ip->dst + && tcp->ports == h->tcp->ports) + goto found; + } + + /* no connection, reuse the oldest */ + if(i == MAX_STATES) { + j = comp->basexmit; + j = (j + MAX_STATES - 1) % MAX_STATES; + comp->basexmit = j; + h = &comp->t[j]; + goto raise; + } + } +found: + + /* + * Make sure that only what we expect to change changed. + */ + if(ip->vihl != h->ip->vihl || ip->tos != h->ip->tos || + ip->ttl != h->ip->ttl || ip->proto != h->ip->proto) + goto raise; /* headers changed */ + if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr))) + goto raise; /* ip options changed */ + if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr))) + goto raise; /* tcp options changed */ + + if(tcp->flag[1] & URG) { + cp += encode(cp, nhgets(tcp->urg)); + changes |= NEW_U; + } else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0) + goto raise; + if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) { + cp += encode(cp, deltaS); + changes |= NEW_W; + } + if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) { + if(deltaA > 0xffff) + goto raise; + cp += encode(cp, deltaA); + changes |= NEW_A; + } + if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) { + if (deltaS > 0xffff) + goto raise; + cp += encode(cp, deltaS); + changes |= NEW_S; + } + + /* + * Look for the special-case encodings. + */ + switch(changes) { + case 0: + /* + * Nothing changed. If this packet contains data and the last + * one didn't, this is probably a data packet following an + * ack (normal on an interactive connection) and we send it + * compressed. Otherwise it's probably a retransmit, + * retransmitted ack or window probe. Send it uncompressed + * in case the other side missed the compressed version. + */ + if(nhgets(ip->length) == nhgets(h->ip->length) || + nhgets(h->ip->length) != hlen) + goto raise; + break; + case SPECIAL_I: + case SPECIAL_D: + /* + * Actual changes match one of our special case encodings -- + * send packet uncompressed. + */ + goto raise; + case NEW_S | NEW_A: + if (deltaS == deltaA && + deltaS == nhgets(h->ip->length) - hlen) { + /* special case for echoed terminal traffic */ + changes = SPECIAL_I; + cp = new_seq; + } + break; + case NEW_S: + if (deltaS == nhgets(h->ip->length) - hlen) { + /* special case for data xfer */ + changes = SPECIAL_D; + cp = new_seq; + } + break; + } + deltaS = nhgets(ip->id) - nhgets(h->ip->id); + if(deltaS != 1) { + cp += encode(cp, deltaS); + changes |= NEW_I; + } + if (tcp->flag[1] & PSH) + changes |= TCP_PUSH_BIT; + /* + * Grab the cksum before we overwrite it below. Then update our + * state with this packet's header. + */ + deltaA = nhgets(tcp->cksum); + memmove(h->buf, b->rp, hlen); + h->len = hlen; + h->tcp = (Tcphdr*)(h->buf + iplen); + + /* + * We want to use the original packet as our compressed packet. (cp - + * new_seq) is the number of bytes we need for compressed sequence + * numbers. In addition we need one byte for the change mask, one + * for the connection id and two for the tcp checksum. So, (cp - + * new_seq) + 4 bytes of header are needed. hlen is how many bytes + * of the original packet to toss so subtract the two to get the new + * packet size. The temporaries are gross -egs. + */ + deltaS = cp - new_seq; + cp = b->rp; + if(comp->lastxmit != j || comp->compressid == 0) { + comp->lastxmit = j; + hlen -= deltaS + 4; + cp += hlen; + *cp++ = (changes | NEW_C); + *cp++ = j; + } else { + hlen -= deltaS + 3; + cp += hlen; + *cp++ = changes; + } + b->rp += hlen; + hnputs(cp, deltaA); + cp += 2; + memmove(cp, new_seq, deltaS); + return Pvjctcp; + +raise: + /* + * Update connection state & send uncompressed packet + */ + memmove(h->buf, b->rp, hlen); + h->tcp = (Tcphdr*)(h->buf + iplen); + h->len = hlen; + h->ip->proto = j; + comp->lastxmit = j; + return Pvjutcp; +} + +Block* +tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f) +{ + uchar *cp, changes; + int i; + int iplen, len; + Iphdr *ip; + Tcphdr *tcp; + Hdr *h; + + if(type == Pvjutcp) { + /* + * Locate the saved state for this connection. If the state + * index is legal, clear the 'discard' flag. + */ + ip = (Iphdr*)b->rp; + if(ip->proto >= MAX_STATES) + goto raise; + iplen = (ip->vihl & 0xf) << 2; + tcp = (Tcphdr*)(b->rp + iplen); + comp->lastrecv = ip->proto; + len = iplen + ((tcp->flag[0] & 0xf0) >> 2); + comp->err = 0; +netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv); + /* + * Restore the IP protocol field then save a copy of this + * packet header. The checksum is zeroed in the copy so we + * don't have to zero it each time we process a compressed + * packet. + */ + ip->proto = IP_TCPPROTO; + h = &comp->r[comp->lastrecv]; + memmove(h->buf, b->rp, len); + h->tcp = (Tcphdr*)(h->buf + iplen); + h->len = len; + h->ip->cksum[0] = h->ip->cksum[1] = 0; + return b; + } + + cp = b->rp; + changes = *cp++; + if(changes & NEW_C) { + /* + * Make sure the state index is in range, then grab the + * state. If we have a good state index, clear the 'discard' + * flag. + */ + if(*cp >= MAX_STATES) + goto raise; + comp->err = 0; + comp->lastrecv = *cp++; +netlog(f, Logcompress, "newc %d\n", comp->lastrecv); + } else { + /* + * This packet has no state index. If we've had a + * line error since the last time we got an explicit state + * index, we have to toss the packet. + */ + if(comp->err != 0){ + freeblist(b); + return nil; + } +netlog(f, Logcompress, "oldc %d\n", comp->lastrecv); + } + + /* + * Find the state then fill in the TCP checksum and PUSH bit. + */ + h = &comp->r[comp->lastrecv]; + ip = h->ip; + tcp = h->tcp; + len = h->len; + memmove(tcp->cksum, cp, sizeof tcp->cksum); + cp += 2; + if(changes & TCP_PUSH_BIT) + tcp->flag[1] |= PSH; + else + tcp->flag[1] &= ~PSH; + /* + * Fix up the state's ack, seq, urg and win fields based on the + * changemask. + */ + switch (changes & SPECIALS_MASK) { + case SPECIAL_I: + i = nhgets(ip->length) - len; + hnputl(tcp->ack, nhgetl(tcp->ack) + i); + hnputl(tcp->seq, nhgetl(tcp->seq) + i); + break; + + case SPECIAL_D: + hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len); + break; + + default: + if(changes & NEW_U) { + tcp->flag[1] |= URG; + if(*cp == 0){ + hnputs(tcp->urg, nhgets(cp+1)); + cp += 3; + }else + hnputs(tcp->urg, *cp++); + } else + tcp->flag[1] &= ~URG; + if(changes & NEW_W) + DECODES(tcp->win) + if(changes & NEW_A) + DECODEL(tcp->ack) + if(changes & NEW_S) + DECODEL(tcp->seq) + break; + } + + /* Update the IP ID */ + if(changes & NEW_I) + DECODES(ip->id) + else + hnputs(ip->id, nhgets(ip->id) + 1); + + /* + * At this point, cp points to the first byte of data in the packet. + * Back up cp by the TCP/IP header length to make room for the + * reconstructed header. + * We assume the packet we were handed has enough space to prepend + * up to 128 bytes of header. + */ + b->rp = cp; + if(b->rp - b->base < len){ + b = padblock(b, len); + b = pullupblock(b, blocklen(b)); + } else + b->rp -= len; + hnputs(ip->length, BLEN(b)); + memmove(b->rp, ip, len); + + /* recompute the ip header checksum */ + ip = (Iphdr*)b->rp; + hnputs(ip->cksum, ipcsum(b->rp)); + return b; + +raise: + netlog(f, Logcompress, "Bad Packet!\n"); + comp->err = 1; + freeblist(b); + return nil; +} + +Tcpc* +compress_init(Tcpc *c) +{ + int i; + Hdr *h; + + if(c == nil){ + c = malloc(sizeof(Tcpc)); + if(c == nil) + return nil; + } + memset(c, 0, sizeof(*c)); + for(i = 0; i < MAX_STATES; i++){ + h = &c->t[i]; + h->ip = (Iphdr*)h->buf; + h->tcp = (Tcphdr*)(h->buf + 10); + h->len = 20; + h = &c->r[i]; + h->ip = (Iphdr*)h->buf; + h->tcp = (Tcphdr*)(h->buf + 10); + h->len = 20; + } + + return c; +} + +ushort +compress(Tcpc *tcp, Block *b, Fs *f) +{ + Iphdr *ip; + + /* + * Bail if this is not a compressible IP packet + */ + ip = (Iphdr*)b->rp; + if((nhgets(ip->frag) & 0x3fff) != 0) + return Pip; + + switch(ip->proto) { + case IP_TCPPROTO: + return tcpcompress(tcp, b, f); + default: + return Pip; + } +} + +int +compress_negotiate(Tcpc *tcp, uchar *data) +{ + if(data[0] != MAX_STATES - 1) + return -1; + tcp->compressid = data[1]; + return 0; +} diff --git a/os/ip/devip.c b/os/ip/devip.c new file mode 100644 index 00000000..8564e987 --- /dev/null +++ b/os/ip/devip.c @@ -0,0 +1,1419 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +enum +{ + Qtopdir= 1, /* top level directory */ + Qtopbase, + Qarp= Qtopbase, + Qbootp, + Qndb, + Qiproute, + Qiprouter, + Qipselftab, + Qlog, + + Qprotodir, /* directory for a protocol */ + Qprotobase, + Qclone= Qprotobase, + Qstats, + + Qconvdir, /* directory for a conversation */ + Qconvbase, + Qctl= Qconvbase, + Qdata, + Qerr, + Qlisten, + Qlocal, + Qremote, + Qstatus, + Qsnoop, + + Logtype= 5, + Masktype= (1<<Logtype)-1, + Logconv= 12, + Maskconv= (1<<Logconv)-1, + Shiftconv= Logtype, + Logproto= 8, + Maskproto= (1<<Logproto)-1, + Shiftproto= Logtype + Logconv, + + Nfs= 32, +}; +#define TYPE(x) ( ((ulong)(x).path) & Masktype ) +#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv ) +#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto ) +#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) ) + +static char network[] = "network"; + +QLock fslock; +Fs *ipfs[Nfs]; /* attached fs's */ +Queue *qlog; + +extern void nullmediumlink(void); +extern void pktmediumlink(void); +static long ndbwrite(Fs*, char*, ulong, int); +static void closeconv(Conv*); + +static int +ip3gen(Chan *c, int i, Dir *dp) +{ + Qid q; + Conv *cv; + char *p; + + cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)]; + if(cv->owner == nil) + kstrdup(&cv->owner, eve); + mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE); + + switch(i) { + default: + return -1; + case Qctl: + devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp); + return 1; + case Qdata: + devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp); + return 1; + case Qerr: + devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp); + return 1; + case Qlisten: + devdir(c, q, "listen", 0, cv->owner, cv->perm, dp); + return 1; + case Qlocal: + p = "local"; + break; + case Qremote: + p = "remote"; + break; + case Qsnoop: + if(strcmp(cv->p->name, "ipifc") != 0) + return -1; + devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp); + return 1; + case Qstatus: + p = "status"; + break; + } + devdir(c, q, p, 0, cv->owner, 0444, dp); + return 1; +} + +static int +ip2gen(Chan *c, int i, Dir *dp) +{ + Qid q; + + switch(i) { + case Qclone: + mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE); + devdir(c, q, "clone", 0, network, 0666, dp); + return 1; + case Qstats: + mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE); + devdir(c, q, "stats", 0, network, 0444, dp); + return 1; + } + return -1; +} + +static int +ip1gen(Chan *c, int i, Dir *dp) +{ + Qid q; + char *p; + int prot; + int len = 0; + Fs *f; + extern ulong kerndate; + + f = ipfs[c->dev]; + + prot = 0666; + mkqid(&q, QID(0, 0, i), 0, QTFILE); + switch(i) { + default: + return -1; + case Qarp: + p = "arp"; + break; + case Qbootp: + p = "bootp"; + if(bootp == nil) + return 0; + break; + case Qndb: + p = "ndb"; + len = strlen(f->ndb); + q.vers = f->ndbvers; + break; + case Qiproute: + p = "iproute"; + break; + case Qipselftab: + p = "ipselftab"; + prot = 0444; + break; + case Qiprouter: + p = "iprouter"; + break; + case Qlog: + p = "log"; + break; + } + devdir(c, q, p, len, network, prot, dp); + if(i == Qndb && f->ndbmtime > kerndate) + dp->mtime = f->ndbmtime; + return 1; +} + +static int +ipgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Conv *cv; + Fs *f; + + f = ipfs[c->dev]; + + switch(TYPE(c->qid)) { + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#I%lud", c->dev); + devdir(c, q, up->genbuf, 0, network, 0555, dp); + return 1; + } + if(s < f->np) { + if(f->p[s]->connect == nil) + return 0; /* protocol with no user interface */ + mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR); + devdir(c, q, f->p[s]->name, 0, network, 0555, dp); + return 1; + } + s -= f->np; + return ip1gen(c, s+Qtopbase, dp); + case Qarp: + case Qbootp: + case Qndb: + case Qlog: + case Qiproute: + case Qiprouter: + case Qipselftab: + return ip1gen(c, TYPE(c->qid), dp); + case Qprotodir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#I%lud", c->dev); + devdir(c, q, up->genbuf, 0, network, 0555, dp); + return 1; + } + if(s < f->p[PROTO(c->qid)]->ac) { + cv = f->p[PROTO(c->qid)]->conv[s]; + sprint(up->genbuf, "%d", s); + mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR); + devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp); + return 1; + } + s -= f->p[PROTO(c->qid)]->ac; + return ip2gen(c, s+Qprotobase, dp); + case Qclone: + case Qstats: + return ip2gen(c, TYPE(c->qid), dp); + case Qconvdir: + if(s == DEVDOTDOT){ + s = PROTO(c->qid); + mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR); + devdir(c, q, f->p[s]->name, 0, network, 0555, dp); + return 1; + } + return ip3gen(c, s+Qconvbase, dp); + case Qctl: + case Qdata: + case Qerr: + case Qlisten: + case Qlocal: + case Qremote: + case Qstatus: + case Qsnoop: + return ip3gen(c, TYPE(c->qid), dp); + } + return -1; +} + +static void +ipreset(void) +{ + nullmediumlink(); + pktmediumlink(); + + fmtinstall('i', eipfmt); + fmtinstall('I', eipfmt); + fmtinstall('E', eipfmt); + fmtinstall('V', eipfmt); + fmtinstall('M', eipfmt); +} + +static Fs* +ipgetfs(int dev) +{ + extern void (*ipprotoinit[])(Fs*); + Fs *f; + int i; + + if(dev >= Nfs) + return nil; + + qlock(&fslock); + if(ipfs[dev] == nil){ + f = smalloc(sizeof(Fs)); + ip_init(f); + arpinit(f); + netloginit(f); + for(i = 0; ipprotoinit[i]; i++) + ipprotoinit[i](f); + f->dev = dev; + ipfs[dev] = f; + } + qunlock(&fslock); + + return ipfs[dev]; +} + +IPaux* +newipaux(char *owner, char *tag) +{ + IPaux *a; + int n; + + a = smalloc(sizeof(*a)); + kstrdup(&a->owner, owner); + memset(a->tag, ' ', sizeof(a->tag)); + n = strlen(tag); + if(n > sizeof(a->tag)) + n = sizeof(a->tag); + memmove(a->tag, tag, n); + return a; +} + +#define ATTACHER(c) (((IPaux*)((c)->aux))->owner) + +static Chan* +ipattach(char* spec) +{ + Chan *c; + int dev; + + dev = atoi(spec); + if(dev >= Nfs) + error("bad specification"); + + ipgetfs(dev); + c = devattach('I', spec); + mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR); + c->dev = dev; + + c->aux = newipaux(commonuser(), "none"); + + return c; +} + +static Walkqid* +ipwalk(Chan* c, Chan *nc, char **name, int nname) +{ + IPaux *a = c->aux; + Walkqid* w; + + w = devwalk(c, nc, name, nname, nil, 0, ipgen); + if(w != nil && w->clone != nil) + w->clone->aux = newipaux(a->owner, a->tag); + return w; +} + +static int +ipstat(Chan* c, uchar* db, int n) +{ + return devstat(c, db, n, nil, 0, ipgen); +} + +static int +incoming(void* arg) +{ + Conv *conv; + + conv = arg; + return conv->incall != nil; +} + +static int m2p[] = { + [OREAD] 4, + [OWRITE] 2, + [ORDWR] 6 +}; + +static Chan* +ipopen(Chan* c, int omode) +{ + Conv *cv, *nc; + Proto *p; + int perm; + Fs *f; + + perm = m2p[omode&3]; + + f = ipfs[c->dev]; + + switch(TYPE(c->qid)) { + default: + break; + case Qndb: + if(omode & (OWRITE|OTRUNC) && !iseve()) + error(Eperm); + if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC)) + f->ndb[0] = 0; + break; + case Qlog: + netlogopen(f); + break; + case Qiprouter: + iprouteropen(f); + break; + case Qiproute: + break; + case Qtopdir: + case Qprotodir: + case Qconvdir: + case Qstatus: + case Qremote: + case Qlocal: + case Qstats: + case Qbootp: + case Qipselftab: + if(omode != OREAD) + error(Eperm); + break; + case Qsnoop: + if(omode != OREAD) + error(Eperm); + p = f->p[PROTO(c->qid)]; + cv = p->conv[CONV(c->qid)]; + if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve()) + error(Eperm); + incref(&cv->snoopers); + break; + case Qclone: + p = f->p[PROTO(c->qid)]; + qlock(p); + if(waserror()){ + qunlock(p); + nexterror(); + } + cv = Fsprotoclone(p, ATTACHER(c)); + qunlock(p); + poperror(); + if(cv == nil) { + error(Enodev); + break; + } + mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE); + break; + case Qdata: + case Qctl: + case Qerr: + p = f->p[PROTO(c->qid)]; + qlock(p); + cv = p->conv[CONV(c->qid)]; + qlock(cv); + if(waserror()) { + qunlock(cv); + qunlock(p); + nexterror(); + } + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if((perm & cv->perm) != perm) + error(Eperm); + + } + cv->inuse++; + if(cv->inuse == 1){ + kstrdup(&cv->owner, ATTACHER(c)); + cv->perm = 0660; + } + qunlock(cv); + qunlock(p); + poperror(); + break; + case Qlisten: + cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)]; + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if((perm & cv->perm) != perm) + error(Eperm); + + } + + if(cv->state != Announced) + error("not announced"); + + if(waserror()){ + closeconv(cv); + nexterror(); + } + qlock(cv); + cv->inuse++; + qunlock(cv); + + nc = nil; + while(nc == nil) { + /* give up if we got a hangup */ + if(qisclosed(cv->rq)) + error("listen hungup"); + + qlock(&cv->listenq); + if(waserror()) { + qunlock(&cv->listenq); + nexterror(); + } + + /* wait for a connect */ + sleep(&cv->listenr, incoming, cv); + + qlock(cv); + nc = cv->incall; + if(nc != nil){ + cv->incall = nc->next; + mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE); + kstrdup(&cv->owner, ATTACHER(c)); + } + qunlock(cv); + + qunlock(&cv->listenq); + poperror(); + } + closeconv(cv); + poperror(); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static int +ipwstat(Chan *c, uchar *dp, int n) +{ + Dir *d; + Conv *cv; + Fs *f; + Proto *p; + + f = ipfs[c->dev]; + switch(TYPE(c->qid)) { + default: + error(Eperm); + break; + case Qctl: + case Qdata: + break; + } + + d = smalloc(sizeof(*d)+n); + if(waserror()){ + free(d); + nexterror(); + } + n = convM2D(dp, n, d, (char*)&d[1]); + if(n == 0) + error(Eshortstat); + p = f->p[PROTO(c->qid)]; + cv = p->conv[CONV(c->qid)]; + if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if(!emptystr(d->uid)) + kstrdup(&cv->owner, d->uid); + if(d->mode != ~0UL) + cv->perm = d->mode & 0777; + poperror(); + free(d); + return n; +} + +static void +closeconv(Conv *cv) +{ + Conv *nc; + Ipmulti *mp; + + qlock(cv); + + if(--cv->inuse > 0) { + qunlock(cv); + return; + } + + /* close all incoming calls since no listen will ever happen */ + for(nc = cv->incall; nc; nc = cv->incall){ + cv->incall = nc->next; + closeconv(nc); + } + cv->incall = nil; + + kstrdup(&cv->owner, network); + cv->perm = 0660; + + while((mp = cv->multi) != nil) + ipifcremmulti(cv, mp->ma, mp->ia); + + cv->r = nil; + cv->rgen = 0; + cv->p->close(cv); + cv->state = Idle; + qunlock(cv); +} + +static void +ipclose(Chan* c) +{ + Fs *f; + + f = ipfs[c->dev]; + switch(TYPE(c->qid)) { + default: + break; + case Qlog: + if(c->flag & COPEN) + netlogclose(f); + break; + case Qiprouter: + if(c->flag & COPEN) + iprouterclose(f); + break; + case Qdata: + case Qctl: + case Qerr: + if(c->flag & COPEN) + closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]); + break; + case Qsnoop: + if(c->flag & COPEN) + decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers); + break; + } + free(((IPaux*)c->aux)->owner); + free(c->aux); +} + +enum +{ + Statelen= 32*1024, +}; + +static long +ipread(Chan *ch, void *a, long n, vlong off) +{ + Conv *c; + Proto *x; + char *buf, *p; + long rv; + Fs *f; + ulong offset = off; + + f = ipfs[ch->dev]; + + p = a; + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qtopdir: + case Qprotodir: + case Qconvdir: + return devdirread(ch, a, n, 0, 0, ipgen); + case Qarp: + return arpread(f->arp, a, offset, n); + case Qbootp: + return bootpread(a, offset, n); + case Qndb: + return readstr(offset, a, n, f->ndb); + case Qiproute: + return routeread(f, a, offset, n); + case Qiprouter: + return iprouterread(f, a, n); + case Qipselftab: + return ipselftabread(f, a, offset, n); + case Qlog: + return netlogread(f, a, offset, n); + case Qctl: + sprint(up->genbuf, "%lud", CONV(ch->qid)); + return readstr(offset, p, n, up->genbuf); + case Qremote: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + if(x->remote == nil) { + sprint(buf, "%I!%d\n", c->raddr, c->rport); + } else { + (*x->remote)(c, buf, Statelen-2); + } + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qlocal: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + if(x->local == nil) { + sprint(buf, "%I!%d\n", c->laddr, c->lport); + } else { + (*x->local)(c, buf, Statelen-2); + } + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qstatus: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + (*x->state)(c, buf, Statelen-2); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qdata: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->rq, a, n); + case Qerr: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->eq, a, n); + case Qsnoop: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->sq, a, n); + case Qstats: + x = f->p[PROTO(ch->qid)]; + if(x->stats == nil) + error("stats not implemented"); + buf = smalloc(Statelen); + (*x->stats)(x, buf, Statelen); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + } +} + +static Block* +ipbread(Chan* ch, long n, ulong offset) +{ + Conv *c; + Proto *x; + Fs *f; + + switch(TYPE(ch->qid)){ + case Qdata: + f = ipfs[ch->dev]; + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + return qbread(c->rq, n); + default: + return devbread(ch, n, offset); + } +} + +/* + * set local address to be that of the ifc closest to remote address + */ +static void +setladdr(Conv* c) +{ + findlocalip(c->p->f, c->laddr, c->raddr); +} + +/* + * set a local port making sure the quad of raddr,rport,laddr,lport is unique + */ +static char* +setluniqueport(Conv* c, int lport) +{ + Proto *p; + Conv *xp; + int x; + + p = c->p; + + qlock(p); + for(x = 0; x < p->nc; x++){ + xp = p->conv[x]; + if(xp == nil) + break; + if(xp == c) + continue; + if((xp->state == Connected || xp->state == Announced) + && xp->lport == lport + && xp->rport == c->rport + && ipcmp(xp->raddr, c->raddr) == 0 + && ipcmp(xp->laddr, c->laddr) == 0){ + qunlock(p); + return "address in use"; + } + } + c->lport = lport; + qunlock(p); + return nil; +} + +/* + * pick a local port and set it + */ +static void +setlport(Conv* c) +{ + Proto *p; + ushort *pp; + int x, found; + + p = c->p; + if(c->restricted) + pp = &p->nextrport; + else + pp = &p->nextport; + qlock(p); + for(;;(*pp)++){ + /* + * Fsproto initialises p->nextport to 0 and the restricted + * ports (p->nextrport) to 600. + * Restricted ports must lie between 600 and 1024. + * For the initial condition or if the unrestricted port number + * has wrapped round, select a random port between 5000 and 1<<15 + * to start at. + */ + if(c->restricted){ + if(*pp >= 1024) + *pp = 600; + } + else while(*pp < 5000) + *pp = nrand(1<<15); + + found = 0; + for(x = 0; x < p->nc; x++){ + if(p->conv[x] == nil) + break; + if(p->conv[x]->lport == *pp){ + found = 1; + break; + } + } + if(!found) + break; + } + c->lport = (*pp)++; + qunlock(p); +} + +/* + * set a local address and port from a string of the form + * [address!]port[!r] + */ +static char* +setladdrport(Conv* c, char* str, int announcing) +{ + char *p; + char *rv; + ushort lport; + uchar addr[IPaddrlen]; + + rv = nil; + + /* + * ignore restricted part if it exists. it's + * meaningless on local ports. + */ + p = strchr(str, '!'); + if(p != nil){ + *p++ = 0; + if(strcmp(p, "r") == 0) + p = nil; + } + + c->lport = 0; + if(p == nil){ + if(announcing) + ipmove(c->laddr, IPnoaddr); + else + setladdr(c); + p = str; + } else { + if(strcmp(str, "*") == 0) + ipmove(c->laddr, IPnoaddr); + else { + parseip(addr, str); + if(ipforme(c->p->f, addr)) + ipmove(c->laddr, addr); + else + return "not a local IP address"; + } + } + + /* one process can get all connections */ + if(announcing && strcmp(p, "*") == 0){ + if(!iseve()) + error(Eperm); + return setluniqueport(c, 0); + } + + lport = atoi(p); + if(lport <= 0) + setlport(c); + else + rv = setluniqueport(c, lport); + return rv; +} + +static char* +setraddrport(Conv* c, char* str) +{ + char *p; + + p = strchr(str, '!'); + if(p == nil) + return "malformed address"; + *p++ = 0; + parseip(c->raddr, str); + c->rport = atoi(p); + p = strchr(p, '!'); + if(p){ + if(strstr(p, "!r") != nil) + c->restricted = 1; + } + return nil; +} + +/* + * called by protocol connect routine to set addresses + */ +char* +Fsstdconnect(Conv *c, char *argv[], int argc) +{ + char *p; + + switch(argc) { + default: + return "bad args to connect"; + case 2: + p = setraddrport(c, argv[1]); + if(p != nil) + return p; + setladdr(c); + setlport(c); + break; + case 3: + p = setraddrport(c, argv[1]); + if(p != nil) + return p; + p = setladdrport(c, argv[2], 0); + if(p != nil) + return p; + } + + if((memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) + || ipcmp(c->raddr, IPnoaddr) == 0) + c->ipversion = V4; + else + c->ipversion = V6; + + return nil; +} +/* + * initiate connection and sleep till its set up + */ +static int +connected(void* a) +{ + return ((Conv*)a)->state == Connected; +} +static void +connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(c->state != 0) + error(Econinuse); + c->state = Connecting; + c->cerr[0] = '\0'; + if(x->connect == nil) + error("connect not supported"); + p = x->connect(c, cb->f, cb->nf); + if(p != nil) + error(p); + + qunlock(c); + if(waserror()){ + qlock(c); + nexterror(); + } + sleep(&c->cr, connected, c); + qlock(c); + poperror(); + + if(c->cerr[0] != '\0') + error(c->cerr); +} + +/* + * called by protocol announce routine to set addresses + */ +char* +Fsstdannounce(Conv* c, char* argv[], int argc) +{ + memset(c->raddr, 0, sizeof(c->raddr)); + c->rport = 0; + switch(argc){ + default: + return "bad args to announce"; + case 2: + return setladdrport(c, argv[1], 1); + } + return nil; +} + +/* + * initiate announcement and sleep till its set up + */ +static int +announced(void* a) +{ + return ((Conv*)a)->state == Announced; +} +static void +announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(c->state != 0) + error(Econinuse); + c->state = Announcing; + c->cerr[0] = '\0'; + if(x->announce == nil) + error("announce not supported"); + p = x->announce(c, cb->f, cb->nf); + if(p != nil) + error(p); + + qunlock(c); + if(waserror()){ + qlock(c); + nexterror(); + } + sleep(&c->cr, announced, c); + qlock(c); + poperror(); + + if(c->cerr[0] != '\0') + error(c->cerr); +} + +/* + * called by protocol bind routine to set addresses + */ +char* +Fsstdbind(Conv* c, char* argv[], int argc) +{ + switch(argc){ + default: + return "bad args to bind"; + case 2: + return setladdrport(c, argv[1], 0); + } + return nil; +} + +static void +bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(x->bind == nil) + p = Fsstdbind(c, cb->f, cb->nf); + else + p = x->bind(c, cb->f, cb->nf); + if(p != nil) + error(p); +} + +static void +tosctlmsg(Conv *c, Cmdbuf *cb) +{ + if(cb->nf < 2) + c->tos = 0; + else + c->tos = atoi(cb->f[1]); +} + +static void +ttlctlmsg(Conv *c, Cmdbuf *cb) +{ + if(cb->nf < 2) + c->ttl = MAXTTL; + else + c->ttl = atoi(cb->f[1]); +} + +static long +ipwrite(Chan* ch, void *v, long n, vlong off) +{ + Conv *c; + Proto *x; + char *p; + Cmdbuf *cb; + uchar ia[IPaddrlen], ma[IPaddrlen]; + Fs *f; + char *a; + + a = v; + f = ipfs[ch->dev]; + + switch(TYPE(ch->qid)){ + default: + error(Eperm); + case Qdata: + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + + if(c->wq == nil) + error(Eperm); + + qwrite(c->wq, a, n); + break; + case Qarp: + return arpwrite(f, a, n); + case Qiproute: + return routewrite(f, ch, a, n); + case Qlog: + netlogctl(f, a, n); + return n; + case Qndb: + return ndbwrite(f, a, off, n); + case Qctl: + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + cb = parsecmd(a, n); + + qlock(c); + if(waserror()) { + qunlock(c); + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short control request"); + if(strcmp(cb->f[0], "connect") == 0) + connectctlmsg(x, c, cb); + else if(strcmp(cb->f[0], "announce") == 0) + announcectlmsg(x, c, cb); + else if(strcmp(cb->f[0], "bind") == 0) + bindctlmsg(x, c, cb); + else if(strcmp(cb->f[0], "ttl") == 0) + ttlctlmsg(c, cb); + else if(strcmp(cb->f[0], "tos") == 0) + tosctlmsg(c, cb); + else if(strcmp(cb->f[0], "ignoreadvice") == 0) + c->ignoreadvice = 1; + else if(strcmp(cb->f[0], "addmulti") == 0){ + if(cb->nf < 2) + error("addmulti needs interface address"); + if(cb->nf == 2){ + if(!ipismulticast(c->raddr)) + error("addmulti for a non multicast address"); + parseip(ia, cb->f[1]); + ipifcaddmulti(c, c->raddr, ia); + } else { + parseip(ma, cb->f[2]); + if(!ipismulticast(ma)) + error("addmulti for a non multicast address"); + parseip(ia, cb->f[1]); + ipifcaddmulti(c, ma, ia); + } + } else if(strcmp(cb->f[0], "remmulti") == 0){ + if(cb->nf < 2) + error("remmulti needs interface address"); + if(!ipismulticast(c->raddr)) + error("remmulti for a non multicast address"); + parseip(ia, cb->f[1]); + ipifcremmulti(c, c->raddr, ia); + } else if(x->ctl != nil) { + p = x->ctl(c, cb->f, cb->nf); + if(p != nil) + error(p); + } else + error("unknown control request"); + qunlock(c); + free(cb); + poperror(); + } + return n; +} + +static long +ipbwrite(Chan* ch, Block* bp, ulong offset) +{ + Conv *c; + Proto *x; + Fs *f; + int n; + + switch(TYPE(ch->qid)){ + case Qdata: + f = ipfs[ch->dev]; + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + + if(c->wq == nil) + error(Eperm); + + if(bp->next) + bp = concatblock(bp); + n = BLEN(bp); + qbwrite(c->wq, bp); + return n; + default: + return devbwrite(ch, bp, offset); + } +} + +Dev ipdevtab = { + 'I', + "ip", + + ipreset, + devinit, + devshutdown, + ipattach, + ipwalk, + ipstat, + ipopen, + devcreate, + ipclose, + ipread, + ipbread, + ipwrite, + ipbwrite, + devremove, + ipwstat, +}; + +int +Fsproto(Fs *f, Proto *p) +{ + if(f->np >= Maxproto) + return -1; + + p->f = f; + + if(p->ipproto > 0){ + if(f->t2p[p->ipproto] != nil) + return -1; + f->t2p[p->ipproto] = p; + } + + p->qid.type = QTDIR; + p->qid.path = QID(f->np, 0, Qprotodir); + p->conv = malloc(sizeof(Conv*)*(p->nc+1)); + if(p->conv == nil) + panic("Fsproto"); + + p->x = f->np; + p->nextport = 0; + p->nextrport = 600; + f->p[f->np++] = p; + + return 0; +} + +/* + * return true if this protocol is + * built in + */ +int +Fsbuiltinproto(Fs* f, uchar proto) +{ + return f->t2p[proto] != nil; +} + +/* + * called with protocol locked + */ +Conv* +Fsprotoclone(Proto *p, char *user) +{ + Conv *c, **pp, **ep; + +retry: + c = nil; + ep = &p->conv[p->nc]; + for(pp = p->conv; pp < ep; pp++) { + c = *pp; + if(c == nil){ + c = malloc(sizeof(Conv)); + if(c == nil) + error(Enomem); + qlock(c); + c->p = p; + c->x = pp - p->conv; + if(p->ptclsize != 0){ + c->ptcl = malloc(p->ptclsize); + if(c->ptcl == nil) { + free(c); + error(Enomem); + } + } + *pp = c; + p->ac++; + c->eq = qopen(1024, Qmsg, 0, 0); + (*p->create)(c); + break; + } + if(canqlock(c)){ + /* + * make sure both processes and protocol + * are done with this Conv + */ + if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0)) + break; + + qunlock(c); + } + } + if(pp >= ep) { + if(p->gc != nil && (*p->gc)(p)) + goto retry; + return nil; + } + + c->inuse = 1; + kstrdup(&c->owner, user); + c->perm = 0660; + c->state = Idle; + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->r = nil; + c->rgen = 0; + c->lport = 0; + c->rport = 0; + c->restricted = 0; + c->ttl = MAXTTL; + c->tos = DFLTTOS; + qreopen(c->rq); + qreopen(c->wq); + qreopen(c->eq); + + qunlock(c); + return c; +} + +int +Fsconnected(Conv* c, char* msg) +{ + if(msg != nil && *msg != '\0') + kstrcpy(c->cerr, msg, sizeof(c->cerr)); + + switch(c->state){ + + case Announcing: + c->state = Announced; + break; + + case Connecting: + c->state = Connected; + break; + } + + wakeup(&c->cr); + return 0; +} + +Proto* +Fsrcvpcol(Fs* f, uchar proto) +{ + if(f->ipmux) + return f->ipmux; + else + return f->t2p[proto]; +} + +Proto* +Fsrcvpcolx(Fs *f, uchar proto) +{ + return f->t2p[proto]; +} + +/* + * called with protocol locked + */ +Conv* +Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version) +{ + Conv *nc; + Conv **l; + int i; + + qlock(c); + i = 0; + for(l = &c->incall; *l; l = &(*l)->next) + i++; + if(i >= Maxincall) { + qunlock(c); + return nil; + } + + /* find a free conversation */ + nc = Fsprotoclone(c->p, network); + if(nc == nil) { + qunlock(c); + return nil; + } + ipmove(nc->raddr, raddr); + nc->rport = rport; + ipmove(nc->laddr, laddr); + nc->lport = lport; + nc->next = nil; + *l = nc; + nc->state = Connected; + nc->ipversion = version; + + qunlock(c); + + wakeup(&c->listenr); + + return nc; +} + +static long +ndbwrite(Fs *f, char *a, ulong off, int n) +{ + if(off > strlen(f->ndb)) + error(Eio); + if(off+n >= sizeof(f->ndb)-1) + error(Eio); + memmove(f->ndb+off, a, n); + f->ndb[off+n] = 0; + f->ndbvers++; + f->ndbmtime = seconds(); + return n; +} + +ulong +scalednconv(void) +{ + if(conf.npage*BY2PG >= 128*MB) + return Nchans*4; + return Nchans; +} diff --git a/os/ip/dhcp.c b/os/ip/dhcp.c new file mode 100644 index 00000000..639e51bb --- /dev/null +++ b/os/ip/dhcp.c @@ -0,0 +1,447 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kernel.h" +#include "ip.h" +#include "ppp.h" + +Ipaddr pppdns[2]; + +static ulong fsip; +static ulong auip; +static ulong gwip; +static ulong ipmask; +static ulong ipaddr; +static ulong dns1ip; +static ulong dns2ip; + +int dhcpmsgtype; +int debug=0; +enum +{ + Bootrequest = 1, + Bootreply = 2, +}; + +typedef struct Bootp +{ + /* udp.c oldheader */ + uchar raddr[IPaddrlen]; + uchar laddr[IPaddrlen]; + uchar rport[2]; + uchar lport[2]; + /* bootp itself */ + uchar op; /* opcode */ + uchar htype; /* hardware type */ + uchar hlen; /* hardware address len */ + uchar hops; /* hops */ + uchar xid[4]; /* a random number */ + uchar secs[2]; /* elapsed snce client started booting */ + uchar flags[2]; /* flags */ + uchar ciaddr[4]; /* client IP address (client tells server) */ + uchar yiaddr[4]; /* client IP address (server tells client) */ + uchar siaddr[4]; /* server IP address */ + uchar giaddr[4]; /* gateway IP address */ + uchar chaddr[16]; /* client hardware address */ + uchar sname[64]; /* server host name (optional) */ + uchar file[128]; /* boot file name */ + uchar vend[128]; /* vendor-specific goo 340 */ +} Bootp; + +static Bootp req; +static Proc* rcvprocp; +static int recv; +static int done; +static Rendez bootpr; +static char rcvbuf[512+2*IPaddrlen+2*2]; /* 576 */ +static uchar sid[4]; +static ulong iplease; + +/* + * bootp returns: + * + * "fsip d.d.d.d + * auip d.d.d.d + * gwip d.d.d.d + * ipmask d.d.d.d + * ipaddr d.d.d.d + * dns1ip d.d.d.d + * dns2ip d.d.d.d + * + * where d.d.d.d is the IP address in dotted decimal notation, and each + * address is followed by a newline. + Last change: SUN 13 Sep 2001 4:36 pm + */ + +/* + * Parse the vendor specific fields according to RFC 1084. + * We are overloading the "cookie server" to be the Inferno + * authentication server and the "resource location server" + * to be the Inferno file server. + * + * If the vendor specific field is formatted properly, it + * will being with the four bytes 99.130.83.99 and end with + * an 0xFF byte. + */ +static int +parsevend(uchar* pvend) +{ + uchar *vend=pvend; + int dhcpmsg=0; + /* The field must start with 99.130.83.99 to be compliant */ + if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){ + print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]); + return -1; + } + + /* Skip over the magic cookie */ + vend += 4; + + while ((vend[0] != 0) && (vend[0] != 0xFF)) { + int i; +// + if(debug){ + print(">>>Opt[%d] [%d]", vend[0], vend[1]); + for(i=0; i<vend[1]; i++) + print(" %2.2x", vend[i+2]); + print("\n"); + } +// + switch (vend[0]) { + case 1: /* Subnet mask field */ + /* There must be only one subnet mask */ + if (vend[1] == 4) + ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5]; + else{ + return -1; + } + break; + + case 3: /* Gateway/router field */ + /* We are only concerned with first address */ + if (vend[1] >0 && vend[1]%4==0) + gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5]; + else + return -1; + break; + case 6: /* domain name server */ + if(vend[1]>0 && vend[1] %4==0){ + dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5]; + if(vend[1]>4) + dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9]; + }else + return -1; + break; + + case 8: /* "Cookie server" (auth server) field */ + /* We are only concerned with first address */ + if (vend[1] > 0 && vend[1]%4==0) + auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5]; + else + return -1; + break; + + case 11: /* "Resource loc server" (file server) field */ + /* We are only concerned with first address */ + if (vend[1] > 0 && vend[1]%4==0) + fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5]; + else + return -1; + break; + case 51: /* ip lease time */ + if(vend[1]==4){ + iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5]; + }else + return -1; + break; + case 53: /* DHCP message type */ + if(vend[1]==1) + dhcpmsg=vend[2]; + else + return -1; + break; + case 54: /* server identifier */ + if(vend[1]==4){ + memmove(sid, vend+2, 4); + }else + return -1; + break; + + default: /* Everything else stops us */ + break; + } + + /* Skip over the field */ + vend += vend[1] + 2; + } + if(debug) + print(">>>Opt[%d] [%d]\n", vend[0], vend[1]); + return dhcpmsg; +} + +static void +dispvend(uchar* pvend) +{ + uchar *vend=pvend; + + //print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]); + + vend += 4; /* Skip over the magic cookie */ + while ((vend[0] != 0) && (vend[0] != 0xFF)) { + // int i; + // print("<<<Opt[%d] [%d]", vend[0], vend[1]); + //for(i=0; i<vend[1]; i++) + // print(" %2.2x", vend[i+2]); + //print("\n"); + + vend += vend[1] + 2; + } + //print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]); +} + +static void +rcvbootp(void *a) +{ + int n, fd, dhcp; + Bootp *rp; + + if(waserror()) + pexit("", 0); + rcvprocp = up; /* store for postnote below */ + fd = (int)a; + while(done == 0) { + if(debug) + print("rcvbootp:looping\n"); + + n = kread(fd, rcvbuf, sizeof(rcvbuf)); + if(n <= 0) + break; + rp = (Bootp*)rcvbuf; + if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) { + ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3]; + if(debug) + print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]); + //memmove(req.siaddr, rp->siaddr, 4); /* siaddr */ + dhcp = parsevend(rp->vend); + + if(dhcpmsgtype < dhcp){ + dhcpmsgtype=dhcp; + recv = 1; + wakeup(&bootpr); + if(dhcp==0 || dhcp ==5 || dhcp == 6 ) + break; + } + } + } + poperror(); + rcvprocp = nil; + + if(debug) + print("rcvbootp exit\n"); + pexit("", 0); +} + +static char* +rbootp(Ipifc *ifc) +{ + int cfd, dfd, tries, n; + char ia[5+3*16], im[16], *av[3]; + uchar nipaddr[4], ngwip[4], nipmask[4]; + char dir[Maxpath]; + static uchar vend_rfc1048[] = { 99, 130, 83, 99 }; + uchar *vend; + + /* + * broadcast bootp's till we get a reply, + * or fixed number of tries + */ + if(debug) + print("dhcp: bootp() called\n"); + tries = 0; + av[1] = "0.0.0.0"; + av[2] = "0.0.0.0"; + ipifcadd(ifc, av, 3, 0, nil); + + cfd = kannounce("udp!*!68", dir); + if(cfd < 0) + return "dhcp announce failed"; + strcat(dir, "/data"); + if(kwrite(cfd, "headers", 7) < 0){ + kclose(cfd); + return "dhcp ctl headers failed"; + } + kwrite(cfd, "oldheaders", 10); + dfd = kopen(dir, ORDWR); + if(dfd < 0){ + kclose(cfd); + return "dhcp open data failed"; + } + kclose(cfd); + + while(tries<1){ + tries++; + memset(sid, 0, 4); + iplease=0; + dhcpmsgtype=-2; +/* DHCPDISCOVER*/ + done = 0; + recv = 0; + kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG); + /* Prepare DHCPDISCOVER */ + memset(&req, 0, sizeof(req)); + ipmove(req.raddr, IPv4bcast); + hnputs(req.rport, 67); + req.op = Bootrequest; + req.htype = 1; /* ethernet (all we know) */ + req.hlen = 6; /* ethernet (all we know) */ + + memmove(req.chaddr, ifc->mac, 6); /* Hardware MAC address */ + //ipv4local(ifc, req.ciaddr); /* Fill in the local IP address if we know it */ + memset(req.file, 0, sizeof(req.file)); + vend=req.vend; + memmove(vend, vend_rfc1048, 4); vend+=4; + *vend++=53; *vend++=1;*vend++=1; /* dhcp msg type==3, dhcprequest */ + + *vend++=61;*vend++=7;*vend++=1; + memmove(vend, ifc->mac, 6);vend+=6; + *vend=0xff; + + if(debug) + dispvend(req.vend); + for(n=0;n<4;n++){ + if(kwrite(dfd, &req, sizeof(req))<0) /* SEND DHCPDISCOVER */ + print("DHCPDISCOVER: %r"); + + tsleep(&bootpr, return0, 0, 1000); /* wait DHCPOFFER */ + if(debug) + print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype); + + if(dhcpmsgtype==2) /* DHCPOFFER */ + break; + else if(dhcpmsgtype==0) /* bootp */ + return nil; + else if(dhcpmsgtype== -2) /* time out */ + continue; + else + break; + + } + if(dhcpmsgtype!=2) + continue; + +/* DHCPREQUEST */ + memset(req.vend, 0, sizeof(req.vend)); + vend=req.vend; + memmove(vend, vend_rfc1048, 4);vend+=4; + + *vend++=53; *vend++=1;*vend++=3; /* dhcp msg type==3, dhcprequest */ + + *vend++=50; *vend++=4; /* requested ip address */ + *vend++=(ipaddr >> 24)&0xff; + *vend++=(ipaddr >> 16)&0xff; + *vend++=(ipaddr >> 8) & 0xff; + *vend++=ipaddr & 0xff; + + *vend++=51;*vend++=4; /* lease time */ + *vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff; + + *vend++=54; *vend++=4; /* server identifier */ + memmove(vend, sid, 4); vend+=4; + + *vend++=61;*vend++=07;*vend++=01; /* client identifier */ + memmove(vend, ifc->mac, 6);vend+=6; + *vend=0xff; + if(debug) + dispvend(req.vend); + if(kwrite(dfd, &req, sizeof(req))<0){ + print("DHCPREQUEST: %r"); + continue; + } + tsleep(&bootpr, return0, 0, 2000); + if(dhcpmsgtype==5) /* wait for DHCPACK */ + break; + else + continue; + /* CHECK ARP */ + /* DHCPDECLINE */ + } + kclose(dfd); + done = 1; + if(rcvprocp != nil){ + postnote(rcvprocp, 1, "timeout", 0); + rcvprocp = nil; + } + + av[1] = "0.0.0.0"; + av[2] = "0.0.0.0"; + ipifcrem(ifc, av, 3); + + hnputl(nipaddr, ipaddr); + sprint(ia, "%V", nipaddr); + hnputl(nipmask, ipmask); + sprint(im, "%V", nipmask); + av[1] = ia; + av[2] = im; + ipifcadd(ifc, av, 3, 0, nil); + + if(gwip != 0) { + hnputl(ngwip, gwip); + n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip); + routewrite(ifc->conv->p->f, nil, ia, n); + } + return nil; +} + +static int +rbootpread(char *bp, ulong offset, int len) +{ + int n, i; + char *buf; + uchar a[4]; + + if(debug) + print("dhcp: bootpread() \n"); + buf = smalloc(READSTR); + if(waserror()){ + free(buf); + nexterror(); + } + + hnputl(a, fsip); + n = snprint(buf, READSTR, "fsip %15V\n", a); + hnputl(a, auip); + n += snprint(buf + n, READSTR-n, "auip %15V\n", a); + hnputl(a, gwip); + n += snprint(buf + n, READSTR-n, "gwip %15V\n", a); + hnputl(a, ipmask); + n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a); + hnputl(a, ipaddr); + n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a); + n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease); + + n += snprint(buf + n, READSTR-n, "dns"); + if(dns2ip){ + hnputl(a, dns2ip); + n+=snprint(buf + n, READSTR-n, " %15V", a); + } + if(dns1ip){ + hnputl(a, dns1ip); + n += snprint(buf + n, READSTR-n, " %15V", a); + } + + for(i=0; i<2; i++) + if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0) + n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]); + + snprint(buf + n, READSTR-n, "\n"); + len = readstr(offset, bp, len, buf); + poperror(); + free(buf); + return len; +} + +char* (*bootp)(Ipifc*) = rbootp; +int (*bootpread)(char*, ulong, int) = rbootpread; diff --git a/os/ip/eipconvtest.c b/os/ip/eipconvtest.c new file mode 100644 index 00000000..06b0f9b5 --- /dev/null +++ b/os/ip/eipconvtest.c @@ -0,0 +1,152 @@ +#include <u.h> +#include <libc.h> + +enum +{ + Isprefix= 16, +}; + +uchar prefixvals[256] = +{ +[0x00] 0 | Isprefix, +[0x80] 1 | Isprefix, +[0xC0] 2 | Isprefix, +[0xE0] 3 | Isprefix, +[0xF0] 4 | Isprefix, +[0xF8] 5 | Isprefix, +[0xFC] 6 | Isprefix, +[0xFE] 7 | Isprefix, +[0xFF] 8 | Isprefix, +}; + +uchar v4prefix[16] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0xff, 0xff, + 0, 0, 0, 0 +}; + +void +hnputl(void *p, ulong v) +{ + uchar *a; + + a = p; + a[0] = v>>24; + a[1] = v>>16; + a[2] = v>>8; + a[3] = v; +} + +int +eipconv(va_list *arg, Fconv *f) +{ + char buf[8*5]; + static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux"; + static char *ifmt = "%d.%d.%d.%d"; + uchar *p, ip[16]; + ulong *lp; + ushort s; + int i, j, n, eln, eli; + + switch(f->chr) { + case 'E': /* Ethernet address */ + p = va_arg(*arg, uchar*); + sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]); + break; + case 'I': /* Ip address */ + p = va_arg(*arg, uchar*); +common: + if(memcmp(p, v4prefix, 12) == 0) + sprint(buf, ifmt, p[12], p[13], p[14], p[15]); + else { + /* find longest elision */ + eln = eli = -1; + for(i = 0; i < 16; i += 2){ + for(j = i; j < 16; j += 2) + if(p[j] != 0 || p[j+1] != 0) + break; + if(j > i && j - i > eln){ + eli = i; + eln = j - i; + } + } + + /* print with possible elision */ + n = 0; + for(i = 0; i < 16; i += 2){ + if(i == eli){ + n += sprint(buf+n, "::"); + i += eln; + if(i >= 16) + break; + } else if(i != 0) + n += sprint(buf+n, ":"); + s = (p[i]<<8) + p[i+1]; + n += sprint(buf+n, "%ux", s); + } + } + break; + case 'i': /* v6 address as 4 longs */ + lp = va_arg(*arg, ulong*); + for(i = 0; i < 4; i++) + hnputl(ip+4*i, *lp++); + p = ip; + goto common; + case 'V': /* v4 ip address */ + p = va_arg(*arg, uchar*); + sprint(buf, ifmt, p[0], p[1], p[2], p[3]); + break; + case 'M': /* ip mask */ + p = va_arg(*arg, uchar*); + + /* look for a prefix mask */ + for(i = 0; i < 16; i++) + if(p[i] != 0xff) + break; + if(i < 16){ + if((prefixvals[p[i]] & Isprefix) == 0) + goto common; + for(j = i+1; j < 16; j++) + if(p[j] != 0) + goto common; + n = 8*i + (prefixvals[p[i]] & ~Isprefix); + } else + n = 8*16; + + /* got one, use /xx format */ + sprint(buf, "/%d", n); + break; + default: + strcpy(buf, "(eipconv)"); + } + strconv(buf, f); + return sizeof(uchar*); +} + +uchar testvec[11][16] = +{ + { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, }, + { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, }, + { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, }, + { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, }, + { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, }, + { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, }, + { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, }, + { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, }, + { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, }, + { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, }, + { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, }, +}; + +void +main(void) +{ + int i; + + fmtinstall('I', eipconv); + fmtinstall('M', eipconv); + for(i = 0; i < 11; i++) + print("%I\n%M\n", testvec[i], testvec[i]); + exits(0); +} diff --git a/os/ip/esp.c b/os/ip/esp.c new file mode 100644 index 00000000..9c9f33f8 --- /dev/null +++ b/os/ip/esp.c @@ -0,0 +1,866 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +#include "libsec.h" + +typedef struct Esphdr Esphdr; +typedef struct Esptail Esptail; +typedef struct Userhdr Userhdr; +typedef struct Esppriv Esppriv; +typedef struct Espcb Espcb; +typedef struct Algorithm Algorithm; +typedef struct Esprc4 Esprc4; + +#define DPRINT if(0)print + +enum +{ + IP_ESPPROTO = 50, + EsphdrSize = 28, // includes IP header + IphdrSize = 20, // options have been striped + EsptailSize = 2, // does not include pad or auth data + UserhdrSize = 4, // user visable header size - if enabled +}; + +struct Esphdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar espproto; /* Protocol */ + uchar espplen[2]; /* Header plus data length */ + uchar espsrc[4]; /* Ip source */ + uchar espdst[4]; /* Ip destination */ + + /* esp header */ + uchar espspi[4]; /* Security parameter index */ + uchar espseq[4]; /* Sequence number */ +}; + +struct Esptail +{ + uchar pad; + uchar nexthdr; +}; + +/* header as seen by the user */ +struct Userhdr +{ + uchar nexthdr; // next protocol + uchar unused[3]; +}; + +struct Esppriv +{ + ulong in; + ulong inerrors; +}; + +/* + * protocol specific part of Conv + */ +struct Espcb +{ + int incoming; + int header; // user user level header + ulong spi; + ulong seq; // last seq sent + ulong window; // for replay attacks + char *espalg; + void *espstate; // other state for esp + int espivlen; // in bytes + int espblklen; + int (*cipher)(Espcb*, uchar *buf, int len); + char *ahalg; + void *ahstate; // other state for esp + int ahlen; // auth data length in bytes + int ahblklen; + int (*auth)(Espcb*, uchar *buf, int len, uchar *hash); +}; + +struct Algorithm +{ + char *name; + int keylen; // in bits + void (*init)(Espcb*, char* name, uchar *key, int keylen); +}; + + +enum { + RC4forward = 10*1024*1024, // maximum skip forward + RC4back = 100*1024, // maximum look back +}; + +struct Esprc4 +{ + ulong cseq; // current byte sequence number + RC4state current; + + int ovalid; // old is valid + ulong lgseq; // last good sequence + ulong oseq; // old byte sequence number + RC4state old; +}; + +static Conv* convlookup(Proto *esp, ulong spi); +static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg); +static void nullespinit(Espcb*, char*, uchar *key, int keylen); +static void nullahinit(Espcb*, char*, uchar *key, int keylen); +static void shaahinit(Espcb*, char*, uchar *key, int keylen); +static void md5ahinit(Espcb*, char*, uchar *key, int keylen); +static void desespinit(Espcb *ecb, char *name, uchar *k, int n); +static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n); +static void espkick(void *x); + +static Algorithm espalg[] = +{ + "null", 0, nullespinit, + "des_56_cbc", 64, desespinit, + "rc4_128", 128, rc4espinit, + nil, 0, nil, +}; + +static Algorithm ahalg[] = +{ + "null", 0, nullahinit, + "hmac_sha1_96", 128, shaahinit, + "hmac_md5_96", 128, md5ahinit, + nil, 0, nil, +}; + +static char* +espconnect(Conv *c, char **argv, int argc) +{ + char *p, *pp; + char *e = nil; + ulong spi; + Espcb *ecb = (Espcb*)c->ptcl; + + switch(argc) { + default: + e = "bad args to connect"; + break; + case 2: + p = strchr(argv[1], '!'); + if(p == nil){ + e = "malformed address"; + break; + } + *p++ = 0; + parseip(c->raddr, argv[1]); + findlocalip(c->p->f, c->laddr, c->raddr); + ecb->incoming = 0; + ecb->seq = 0; + if(strcmp(p, "*") == 0) { + qlock(c->p); + for(;;) { + spi = nrand(1<<16) + 256; + if(convlookup(c->p, spi) == nil) + break; + } + qunlock(c->p); + ecb->spi = spi; + ecb->incoming = 1; + qhangup(c->wq, nil); + } else { + spi = strtoul(p, &pp, 10); + if(pp == p) { + e = "malformed address"; + break; + } + ecb->spi = spi; + qhangup(c->rq, nil); + } + nullespinit(ecb, "null", nil, 0); + nullahinit(ecb, "null", nil, 0); + } + Fsconnected(c, e); + + return e; +} + + +static int +espstate(Conv *c, char *state, int n) +{ + return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n"); +} + +static void +espcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, 0); + c->wq = qopen(64*1024, Qkick, espkick, c); +} + +static void +espclose(Conv *c) +{ + Espcb *ecb; + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + + ecb = (Espcb*)c->ptcl; + free(ecb->espstate); + free(ecb->ahstate); + memset(ecb, 0, sizeof(Espcb)); +} + +static void +espkick(void *x) +{ + Conv *c = x; + Esphdr *eh; + Esptail *et; + Userhdr *uh; + Espcb *ecb; + Block *bp; + int nexthdr; + int payload; + int pad; + int align; + uchar *auth; + + bp = qget(c->wq); + if(bp == nil) + return; + + qlock(c); + ecb = c->ptcl; + + if(ecb->header) { + /* make sure the message has a User header */ + bp = pullupblock(bp, UserhdrSize); + if(bp == nil) { + qunlock(c); + return; + } + uh = (Userhdr*)bp->rp; + nexthdr = uh->nexthdr; + bp->rp += UserhdrSize; + } else { + nexthdr = 0; // what should this be? + } + + payload = BLEN(bp) + ecb->espivlen; + + /* Make space to fit ip header */ + bp = padblock(bp, EsphdrSize + ecb->espivlen); + + align = 4; + if(ecb->espblklen > align) + align = ecb->espblklen; + if(align % ecb->ahblklen != 0) + panic("espkick: ahblklen is important after all"); + pad = (align-1) - (payload + EsptailSize-1)%align; + + /* + * Make space for tail + * this is done by calling padblock with a negative size + * Padblock does not change bp->wp! + */ + bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen)); + bp->wp += pad+EsptailSize+ecb->ahlen; + + eh = (Esphdr *)(bp->rp); + et = (Esptail*)(bp->rp + EsphdrSize + payload + pad); + + // fill in tail + et->pad = pad; + et->nexthdr = nexthdr; + + ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize); + auth = bp->rp + EsphdrSize + payload + pad + EsptailSize; + + // fill in head + eh->vihl = IP_VER4; + hnputl(eh->espspi, ecb->spi); + hnputl(eh->espseq, ++ecb->seq); + v6tov4(eh->espsrc, c->laddr); + v6tov4(eh->espdst, c->raddr); + eh->espproto = IP_ESPPROTO; + eh->frag[0] = 0; + eh->frag[1] = 0; + + ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth); + + qunlock(c); + //print("esp: pass down: %uld\n", BLEN(bp)); + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c); +} + +void +espiput(Proto *esp, Ipifc*, Block *bp) +{ + Esphdr *eh; + Esptail *et; + Userhdr *uh; + Conv *c; + Espcb *ecb; + uchar raddr[IPaddrlen], laddr[IPaddrlen]; + Fs *f; + uchar *auth; + ulong spi; + int payload, nexthdr; + + f = esp->f; + + bp = pullupblock(bp, EsphdrSize+EsptailSize); + if(bp == nil) { + netlog(f, Logesp, "esp: short packet\n"); + return; + } + + eh = (Esphdr*)(bp->rp); + spi = nhgetl(eh->espspi); + v4tov6(raddr, eh->espsrc); + v4tov6(laddr, eh->espdst); + + qlock(esp); + /* Look for a conversation structure for this port */ + c = convlookup(esp, spi); + if(c == nil) { + qunlock(esp); + netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr, + laddr, spi); + icmpnoconv(f, bp); + freeblist(bp); + return; + } + + qlock(c); + qunlock(esp); + + ecb = c->ptcl; + // too hard to do decryption/authentication on block lists + if(bp->next) + bp = concatblock(bp); + + if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) { + qunlock(c); + netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr, + laddr, spi); + freeb(bp); + return; + } + + eh = (Esphdr*)(bp->rp); + auth = bp->wp - ecb->ahlen; + if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) { + qunlock(c); +print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi); + netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr, + laddr, spi); + freeb(bp); + return; + } + + payload = BLEN(bp)-EsphdrSize-ecb->ahlen; + if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) { + qunlock(c); + netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr, + laddr, spi, payload, BLEN(bp)); + freeb(bp); + return; + } + if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) { + qunlock(c); +print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi); + netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr, + laddr, spi); + freeb(bp); + return; + } + + payload -= EsptailSize; + et = (Esptail*)(bp->rp + EsphdrSize + payload); + payload -= et->pad + ecb->espivlen; + nexthdr = et->nexthdr; + if(payload <= 0) { + qunlock(c); + netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr, + laddr, spi); + freeb(bp); + return; + } + + // trim packet + bp->rp += EsphdrSize + ecb->espivlen; + bp->wp = bp->rp + payload; + if(ecb->header) { + // assume UserhdrSize < EsphdrSize + bp->rp -= UserhdrSize; + uh = (Userhdr*)bp->rp; + memset(uh, 0, UserhdrSize); + uh->nexthdr = nexthdr; + } + + if(qfull(c->rq)){ + netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr, + laddr, spi); + freeblist(bp); + }else { +//print("esp: pass up: %uld\n", BLEN(bp)); + qpass(c->rq, bp); + } + + qunlock(c); +} + +char* +espctl(Conv *c, char **f, int n) +{ + Espcb *ecb = c->ptcl; + char *e = nil; + + if(strcmp(f[0], "esp") == 0) + e = setalg(ecb, f, n, espalg); + else if(strcmp(f[0], "ah") == 0) + e = setalg(ecb, f, n, ahalg); + else if(strcmp(f[0], "header") == 0) + ecb->header = 1; + else if(strcmp(f[0], "noheader") == 0) + ecb->header = 0; + else + e = "unknown control request"; + return e; +} + +void +espadvise(Proto *esp, Block *bp, char *msg) +{ + Esphdr *h; + Conv *c; + ulong spi; + + h = (Esphdr*)(bp->rp); + + spi = nhgets(h->espspi); + qlock(esp); + c = convlookup(esp, spi); + if(c != nil) { + qhangup(c->rq, msg); + qhangup(c->wq, msg); + } + qunlock(esp); + freeblist(bp); +} + +int +espstats(Proto *esp, char *buf, int len) +{ + Esppriv *upriv; + + upriv = esp->priv; + return snprint(buf, len, "%lud %lud\n", + upriv->in, + upriv->inerrors); +} + +static int +esplocal(Conv *c, char *buf, int len) +{ + Espcb *ecb = c->ptcl; + int n; + + qlock(c); + if(ecb->incoming) + n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi); + else + n = snprint(buf, len, "%I\n", c->laddr); + qunlock(c); + return n; +} + +static int +espremote(Conv *c, char *buf, int len) +{ + Espcb *ecb = c->ptcl; + int n; + + qlock(c); + if(ecb->incoming) + n = snprint(buf, len, "%I\n", c->raddr); + else + n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi); + qunlock(c); + return n; +} + +static Conv* +convlookup(Proto *esp, ulong spi) +{ + Conv *c, **p; + Espcb *ecb; + + for(p=esp->conv; *p; p++){ + c = *p; + ecb = c->ptcl; + if(ecb->incoming && ecb->spi == spi) + return c; + } + return nil; +} + +static char * +setalg(Espcb *ecb, char **f, int n, Algorithm *alg) +{ + uchar *key; + int i, nbyte, nchar; + int c; + + if(n < 2) + return "bad format"; + for(; alg->name; alg++) + if(strcmp(f[1], alg->name) == 0) + break; + if(alg->name == nil) + return "unknown algorithm"; + + if(n != 3) + return "bad format"; + nbyte = (alg->keylen + 7) >> 3; + nchar = strlen(f[2]); + for(i=0; i<nchar; i++) { + c = f[2][i]; + if(c >= '0' && c <= '9') + f[2][i] -= '0'; + else if(c >= 'a' && c <= 'f') + f[2][i] -= 'a'-10; + else if(c >= 'A' && c <= 'F') + f[2][i] -= 'A'-10; + else + return "bad character in key"; + } + key = smalloc(nbyte); + for(i=0; i<nchar && i*2<nbyte; i++) { + c = f[2][nchar-i-1]; + if(i&1) + c <<= 4; + key[i>>1] |= c; + } + + alg->init(ecb, alg->name, key, alg->keylen); + free(key); + return nil; +} + +static int +nullcipher(Espcb*, uchar*, int) +{ + return 1; +} + +static void +nullespinit(Espcb *ecb, char *name, uchar*, int) +{ + ecb->espalg = name; + ecb->espblklen = 1; + ecb->espivlen = 0; + ecb->cipher = nullcipher; +} + +static int +nullauth(Espcb*, uchar*, int, uchar*) +{ + return 1; +} + +static void +nullahinit(Espcb *ecb, char *name, uchar*, int) +{ + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = 0; + ecb->auth = nullauth; +} + +void +seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen) +{ + uchar ipad[65], opad[65]; + int i; + DigestState *digest; + uchar innerhash[SHA1dlen]; + + for(i=0; i<64; i++){ + ipad[i] = 0x36; + opad[i] = 0x5c; + } + ipad[64] = opad[64] = 0; + for(i=0; i<klen; i++){ + ipad[i] ^= key[i]; + opad[i] ^= key[i]; + } + digest = sha1(ipad, 64, nil, nil); + sha1(t, tlen, innerhash, digest); + digest = sha1(opad, 64, nil, nil); + sha1(innerhash, SHA1dlen, hash, digest); +} + +static int +shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth) +{ + uchar hash[SHA1dlen]; + int r; + + memset(hash, 0, SHA1dlen); + seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16); + r = memcmp(auth, hash, ecb->ahlen) == 0; + memmove(auth, hash, ecb->ahlen); + return r; +} + +static void +shaahinit(Espcb *ecb, char *name, uchar *key, int klen) +{ + if(klen != 128) + panic("shaahinit: bad keylen"); + klen >>= 8; // convert to bytes + + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = 12; + ecb->auth = shaauth; + ecb->ahstate = smalloc(klen); + memmove(ecb->ahstate, key, klen); +} + +void +seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen) +{ + uchar ipad[65], opad[65]; + int i; + DigestState *digest; + uchar innerhash[MD5dlen]; + + for(i=0; i<64; i++){ + ipad[i] = 0x36; + opad[i] = 0x5c; + } + ipad[64] = opad[64] = 0; + for(i=0; i<klen; i++){ + ipad[i] ^= key[i]; + opad[i] ^= key[i]; + } + digest = md5(ipad, 64, nil, nil); + md5(t, tlen, innerhash, digest); + digest = md5(opad, 64, nil, nil); + md5(innerhash, MD5dlen, hash, digest); +} + +static int +md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth) +{ + uchar hash[MD5dlen]; + int r; + + memset(hash, 0, MD5dlen); + seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16); + r = memcmp(auth, hash, ecb->ahlen) == 0; + memmove(auth, hash, ecb->ahlen); + return r; +} + +static void +md5ahinit(Espcb *ecb, char *name, uchar *key, int klen) +{ + if(klen != 128) + panic("md5ahinit: bad keylen"); + klen >>= 3; // convert to bytes + + + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = 12; + ecb->auth = md5auth; + ecb->ahstate = smalloc(klen); + memmove(ecb->ahstate, key, klen); +} + +static int +descipher(Espcb *ecb, uchar *p, int n) +{ + uchar tmp[8]; + uchar *pp, *tp, *ip, *eip, *ep; + DESstate *ds = ecb->espstate; + + ep = p + n; + if(ecb->incoming) { + memmove(ds->ivec, p, 8); + p += 8; + while(p < ep){ + memmove(tmp, p, 8); + block_cipher(ds->expanded, p, 1); + tp = tmp; + ip = ds->ivec; + for(eip = ip+8; ip < eip; ){ + *p++ ^= *ip; + *ip++ = *tp++; + } + } + } else { + memmove(p, ds->ivec, 8); + for(p += 8; p < ep; p += 8){ + pp = p; + ip = ds->ivec; + for(eip = ip+8; ip < eip; ) + *pp++ ^= *ip++; + block_cipher(ds->expanded, p, 0); + memmove(ds->ivec, p, 8); + } + } + return 1; +} + +static void +desespinit(Espcb *ecb, char *name, uchar *k, int n) +{ + uchar key[8]; + uchar ivec[8]; + int i; + + // bits to bytes + n = (n+7)>>3; + if(n > 8) + n = 8; + memset(key, 0, sizeof(key)); + memmove(key, k, n); + for(i=0; i<8; i++) + ivec[i] = nrand(256); + ecb->espalg = name; + ecb->espblklen = 8; + ecb->espivlen = 8; + ecb->cipher = descipher; + ecb->espstate = smalloc(sizeof(DESstate)); + setupDESstate(ecb->espstate, key, ivec); +} + +static int +rc4cipher(Espcb *ecb, uchar *p, int n) +{ + Esprc4 *esprc4; + RC4state tmpstate; + ulong seq; + long d, dd; + + if(n < 4) + return 0; + + esprc4 = ecb->espstate; + if(ecb->incoming) { + seq = nhgetl(p); + p += 4; + n -= 4; + d = seq-esprc4->cseq; + if(d == 0) { + rc4(&esprc4->current, p, n); + esprc4->cseq += n; + if(esprc4->ovalid) { + dd = esprc4->cseq - esprc4->lgseq; + if(dd > RC4back) + esprc4->ovalid = 0; + } + } else if(d > 0) { +print("missing packet: %uld %ld\n", seq, d); + // this link is hosed + if(d > RC4forward) { + strcpy(up->errstr, "rc4cipher: skipped too much"); + return 0; + } + esprc4->lgseq = seq; + if(!esprc4->ovalid) { + esprc4->ovalid = 1; + esprc4->oseq = esprc4->cseq; + memmove(&esprc4->old, &esprc4->current, sizeof(RC4state)); + } + rc4skip(&esprc4->current, d); + rc4(&esprc4->current, p, n); + esprc4->cseq = seq+n; + } else { +print("reordered packet: %uld %ld\n", seq, d); + dd = seq - esprc4->oseq; + if(!esprc4->ovalid || -d > RC4back || dd < 0) { + strcpy(up->errstr, "rc4cipher: too far back"); + return 0; + } + memmove(&tmpstate, &esprc4->old, sizeof(RC4state)); + rc4skip(&tmpstate, dd); + rc4(&tmpstate, p, n); + return 1; + } + + // move old state up + if(esprc4->ovalid) { + dd = esprc4->cseq - RC4back - esprc4->oseq; + if(dd > 0) { + rc4skip(&esprc4->old, dd); + esprc4->oseq += dd; + } + } + } else { + hnputl(p, esprc4->cseq); + p += 4; + n -= 4; + rc4(&esprc4->current, p, n); + esprc4->cseq += n; + } + return 1; +} + +static void +rc4espinit(Espcb *ecb, char *name, uchar *k, int n) +{ + Esprc4 *esprc4; + + // bits to bytes + n = (n+7)>>3; + esprc4 = smalloc(sizeof(Esprc4)); + memset(esprc4, 0, sizeof(Esprc4)); + setupRC4state(&esprc4->current, k, n); + ecb->espalg = name; + ecb->espblklen = 4; + ecb->espivlen = 4; + ecb->cipher = rc4cipher; + ecb->espstate = esprc4; +} + +void +espinit(Fs *fs) +{ + Proto *esp; + + esp = smalloc(sizeof(Proto)); + esp->priv = smalloc(sizeof(Esppriv)); + esp->name = "esp"; + esp->connect = espconnect; + esp->announce = nil; + esp->ctl = espctl; + esp->state = espstate; + esp->create = espcreate; + esp->close = espclose; + esp->rcv = espiput; + esp->advise = espadvise; + esp->stats = espstats; + esp->local = esplocal; + esp->remote = espremote; + esp->ipproto = IP_ESPPROTO; + esp->nc = Nchans; + esp->ptclsize = sizeof(Espcb); + + Fsproto(fs, esp); +} diff --git a/os/ip/ethermedium.c b/os/ip/ethermedium.c new file mode 100644 index 00000000..18778176 --- /dev/null +++ b/os/ip/ethermedium.c @@ -0,0 +1,792 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" +#include "kernel.h" + +typedef struct Etherhdr Etherhdr; +struct Etherhdr +{ + uchar d[6]; + uchar s[6]; + uchar t[2]; +}; + +static uchar ipbroadcast[IPaddrlen] = { + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, +}; + +static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +static void etherread4(void *a); +static void etherread6(void *a); +static void etherbind(Ipifc *ifc, int argc, char **argv); +static void etherunbind(Ipifc *ifc); +static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia); +static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia); +static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac); +static void sendarp(Ipifc *ifc, Arpent *a); +static void sendgarp(Ipifc *ifc, uchar*); +static int multicastea(uchar *ea, uchar *ip); +static void recvarpproc(void*); +static void resolveaddr6(Ipifc *ifc, Arpent *a); +static void etherpref2addr(uchar *pref, uchar *ea); + +Medium ethermedium = +{ +.name= "ether", +.hsize= 14, +.mintu= 60, +.maxtu= 1514, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +Medium gbemedium = +{ +.name= "gbe", +.hsize= 14, +.mintu= 60, +.maxtu= 9014, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +typedef struct Etherrock Etherrock; +struct Etherrock +{ + Fs *f; /* file system we belong to */ + Proc *arpp; /* arp process */ + Proc *read4p; /* reading process (v4)*/ + Proc *read6p; /* reading process (v6)*/ + Chan *mchan4; /* Data channel for v4 */ + Chan *achan; /* Arp channel */ + Chan *cchan4; /* Control channel for v4 */ + Chan *mchan6; /* Data channel for v6 */ + Chan *cchan6; /* Control channel for v6 */ +}; + +/* + * ethernet arp request + */ +enum +{ + ETARP = 0x0806, + ETIP4 = 0x0800, + ETIP6 = 0x86DD, + ARPREQUEST = 1, + ARPREPLY = 2, +}; + +typedef struct Etherarp Etherarp; +struct Etherarp +{ + uchar d[6]; + uchar s[6]; + uchar type[2]; + uchar hrd[2]; + uchar pro[2]; + uchar hln; + uchar pln; + uchar op[2]; + uchar sha[6]; + uchar spa[4]; + uchar tha[6]; + uchar tpa[4]; +}; + +static char *nbmsg = "nonblocking"; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +etherbind(Ipifc *ifc, int argc, char **argv) +{ + Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6; + char addr[Maxpath]; //char addr[2*KNAMELEN]; + char dir[Maxpath]; //char dir[2*KNAMELEN]; + char *buf; + int fd, cfd, n; + char *ptr; + Etherrock *er; + + if(argc < 2) + error(Ebadarg); + + mchan4 = cchan4 = achan = mchan6 = cchan6 = nil; + buf = nil; + if(waserror()){ + if(mchan4 != nil) + cclose(mchan4); + if(cchan4 != nil) + cclose(cchan4); + if(achan != nil) + cclose(achan); + if(mchan6 != nil) + cclose(mchan6); + if(cchan6 != nil) + cclose(cchan6); + if(buf != nil) + free(buf); + nexterror(); + } + + /* + * open ip converstation + * + * the dial will fail if the type is already open on + * this device. + */ + snprint(addr, sizeof(addr), "%s!0x800", argv[2]); + fd = kdial(addr, nil, dir, &cfd); + if(fd < 0) + errorf("dial 0x800 failed: %s", up->env->errstr); + mchan4 = commonfdtochan(fd, ORDWR, 0, 1); + cchan4 = commonfdtochan(cfd, ORDWR, 0, 1); + kclose(fd); + kclose(cfd); + + /* + * make it non-blocking + */ + devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0); + + /* + * get mac address and speed + */ + snprint(addr, sizeof(addr), "%s/stats", dir); + fd = kopen(addr, OREAD); + if(fd < 0) + errorf("can't open ether stats: %s", up->env->errstr); + + buf = smalloc(512); + n = kread(fd, buf, 511); + kclose(fd); + if(n <= 0) + error(Eio); + buf[n] = 0; + + ptr = strstr(buf, "addr: "); + if(!ptr) + error(Eio); + ptr += 6; + parsemac(ifc->mac, ptr, 6); + + ptr = strstr(buf, "mbps: "); + if(ptr){ + ptr += 6; + ifc->mbps = atoi(ptr); + } else + ifc->mbps = 100; + + /* + * open arp conversation + */ + snprint(addr, sizeof(addr), "%s!0x806", argv[2]); + fd = kdial(addr, nil, nil, nil); + if(fd < 0) + errorf("dial 0x806 failed: %s", up->env->errstr); + achan = commonfdtochan(fd, ORDWR, 0, 1); + kclose(fd); + + /* + * open ip conversation + * + * the dial will fail if the type is already open on + * this device. + */ + snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); + fd = kdial(addr, nil, dir, &cfd); + if(fd < 0) + errorf("dial 0x86DD failed: %s", up->env->errstr); + mchan6 = commonfdtochan(fd, ORDWR, 0, 1); + cchan6 = commonfdtochan(cfd, ORDWR, 0, 1); + kclose(fd); + kclose(cfd); + + /* + * make it non-blocking + */ + devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0); + + er = smalloc(sizeof(*er)); + er->mchan4 = mchan4; + er->cchan4 = cchan4; + er->achan = achan; + er->mchan6 = mchan6; + er->cchan6 = cchan6; + er->f = ifc->conv->p->f; + ifc->arg = er; + + free(buf); + poperror(); + + kproc("etherread4", etherread4, ifc, 0); + kproc("recvarpproc", recvarpproc, ifc, 0); + kproc("etherread6", etherread6, ifc, 0); +} + +/* + * called with ifc wlock'd + */ +static void +etherunbind(Ipifc *ifc) +{ + Etherrock *er = ifc->arg; + + if(er->read4p) + postnote(er->read4p, 1, "unbind", 0); + if(er->read6p) + postnote(er->read6p, 1, "unbind", 0); + if(er->arpp) + postnote(er->arpp, 1, "unbind", 0); + + /* wait for readers to die */ + while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0) + tsleep(&up->sleep, return0, 0, 300); + + if(er->mchan4 != nil) + cclose(er->mchan4); + if(er->achan != nil) + cclose(er->achan); + if(er->cchan4 != nil) + cclose(er->cchan4); + if(er->mchan6 != nil) + cclose(er->mchan6); + if(er->cchan6 != nil) + cclose(er->cchan6); + + free(er); +} + +/* + * called by ipoput with a single block to write with ifc rlock'd + */ +static void +etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip) +{ + Etherhdr *eh; + Arpent *a; + uchar mac[6]; + Etherrock *er = ifc->arg; + + /* get mac address of destination */ + a = arpget(er->f->arp, bp, version, ifc, ip, mac); + if(a){ + /* check for broadcast or multicast */ + bp = multicastarp(er->f, a, ifc->m, mac); + if(bp==nil){ + switch(version){ + case V4: + sendarp(ifc, a); + break; + case V6: + resolveaddr6(ifc, a); + break; + default: + panic("etherbwrite: version %d", version); + } + return; + } + } + + /* make it a single block with space for the ether header */ + bp = padblock(bp, ifc->m->hsize); + if(bp->next) + bp = concatblock(bp); + if(BLEN(bp) < ifc->mintu) + bp = adjustblock(bp, ifc->mintu); + eh = (Etherhdr*)bp->rp; + + /* copy in mac addresses and ether type */ + memmove(eh->s, ifc->mac, sizeof(eh->s)); + memmove(eh->d, mac, sizeof(eh->d)); + + switch(version){ + case V4: + eh->t[0] = 0x08; + eh->t[1] = 0x00; + devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0); + break; + case V6: + eh->t[0] = 0x86; + eh->t[1] = 0xDD; + devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0); + break; + default: + panic("etherbwrite2: version %d", version); + } + ifc->out++; +} + + +/* + * process to read from the ethernet + */ +static void +etherread4(void *a) +{ + Ipifc *ifc; + Block *bp; + Etherrock *er; + + ifc = a; + er = ifc->arg; + er->read4p = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->read4p = 0; + pexit("hangup", 1); + } + for(;;){ + bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + bp->rp += ifc->m->hsize; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + + +/* + * process to read from the ethernet, IPv6 + */ +static void +etherread6(void *a) +{ + Ipifc *ifc; + Block *bp; + Etherrock *er; + + ifc = a; + er = ifc->arg; + er->read6p = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->read6p = 0; + pexit("hangup", 1); + } + for(;;){ + bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + bp->rp += ifc->m->hsize; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput6(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +static void +etheraddmulti(Ipifc *ifc, uchar *a, uchar *) +{ + uchar mac[6]; + char buf[64]; + Etherrock *er = ifc->arg; + int version; + + version = multicastea(mac, a); + sprint(buf, "addmulti %E", mac); + switch(version){ + case V4: + devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0); + break; + case V6: + devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0); + break; + default: + panic("etheraddmulti: version %d", version); + } +} + +static void +etherremmulti(Ipifc *ifc, uchar *a, uchar *) +{ + uchar mac[6]; + char buf[64]; + Etherrock *er = ifc->arg; + int version; + + version = multicastea(mac, a); + sprint(buf, "remmulti %E", mac); + switch(version){ + case V4: + devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0); + break; + case V6: + devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0); + break; + default: + panic("etherremmulti: version %d", version); + } +} + +/* + * send an ethernet arp + * (only v4, v6 uses the neighbor discovery, rfc1970) + */ +static void +sendarp(Ipifc *ifc, Arpent *a) +{ + int n; + Block *bp; + Etherarp *e; + Etherrock *er = ifc->arg; + + /* don't do anything if it's been less than a second since the last */ + if(NOW - a->ctime < 1000){ + arprelease(er->f->arp, a); + return; + } + + /* remove all but the last message */ + while((bp = a->hold) != nil){ + if(bp == a->last) + break; + a->hold = bp->list; + freeblist(bp); + } + + /* try to keep it around for a second more */ + a->ctime = NOW; + arprelease(er->f->arp, a); + + n = sizeof(Etherarp); + if(n < a->type->mintu) + n = a->type->mintu; + bp = allocb(n); + memset(bp->rp, 0, n); + e = (Etherarp*)bp->rp; + memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa)); + ipv4local(ifc, e->spa); + memmove(e->sha, ifc->mac, sizeof(e->sha)); + memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */ + memmove(e->s, ifc->mac, sizeof(e->s)); + + hnputs(e->type, ETARP); + hnputs(e->hrd, 1); + hnputs(e->pro, ETIP4); + e->hln = sizeof(e->sha); + e->pln = sizeof(e->spa); + hnputs(e->op, ARPREQUEST); + bp->wp += n; + + n = devtab[er->achan->type]->bwrite(er->achan, bp, 0); + if(n < 0) + print("arp: send: %r\n"); +} + +static void +resolveaddr6(Ipifc *ifc, Arpent *a) +{ + int sflag; + Block *bp; + Etherrock *er = ifc->arg; + uchar ipsrc[IPaddrlen]; + + /* don't do anything if it's been less than a second since the last */ + if(NOW - a->ctime < ReTransTimer){ + arprelease(er->f->arp, a); + return; + } + + /* remove all but the last message */ + while((bp = a->hold) != nil){ + if(bp == a->last) + break; + a->hold = bp->list; + freeblist(bp); + } + + /* try to keep it around for a second more */ + a->ctime = NOW; + a->rtime = NOW + ReTransTimer; + if(a->rxtsrem <= 0) { + arprelease(er->f->arp, a); + return; + } + + a->rxtsrem--; + arprelease(er->f->arp, a); + + if(sflag = ipv6anylocal(ifc, ipsrc)) + icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); +} + +/* + * send a gratuitous arp to refresh arp caches + */ +static void +sendgarp(Ipifc *ifc, uchar *ip) +{ + int n; + Block *bp; + Etherarp *e; + Etherrock *er = ifc->arg; + + /* don't arp for our initial non address */ + if(ipcmp(ip, IPnoaddr) == 0) + return; + + n = sizeof(Etherarp); + if(n < ifc->m->mintu) + n = ifc->m->mintu; + bp = allocb(n); + memset(bp->rp, 0, n); + e = (Etherarp*)bp->rp; + memmove(e->tpa, ip+IPv4off, sizeof(e->tpa)); + memmove(e->spa, ip+IPv4off, sizeof(e->spa)); + memmove(e->sha, ifc->mac, sizeof(e->sha)); + memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */ + memmove(e->s, ifc->mac, sizeof(e->s)); + + hnputs(e->type, ETARP); + hnputs(e->hrd, 1); + hnputs(e->pro, ETIP4); + e->hln = sizeof(e->sha); + e->pln = sizeof(e->spa); + hnputs(e->op, ARPREQUEST); + bp->wp += n; + + n = devtab[er->achan->type]->bwrite(er->achan, bp, 0); + if(n < 0) + print("garp: send: %r\n"); +} + +static void +recvarp(Ipifc *ifc) +{ + int n; + Block *ebp, *rbp; + Etherarp *e, *r; + uchar ip[IPaddrlen]; + static uchar eprinted[4]; + Etherrock *er = ifc->arg; + + ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0); + if(ebp == nil) { + print("arp: rcv: %r\n"); + return; + } + + e = (Etherarp*)ebp->rp; + switch(nhgets(e->op)) { + default: + break; + + case ARPREPLY: + /* check for machine using my ip address */ + v4tov6(ip, e->spa); + if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){ + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){ + print("arprep: 0x%E/0x%E also has ip addr %V\n", + e->s, e->sha, e->spa); + break; + } + } + + /* make sure we're not entering broadcast addresses */ + if(ipcmp(ip, ipbroadcast) == 0 || + !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){ + print("arprep: 0x%E/0x%E cannot register broadcast address %I\n", + e->s, e->sha, e->spa); + break; + } + + arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0); + break; + + case ARPREQUEST: + /* don't answer arps till we know who we are */ + if(ifc->lifc == 0) + break; + + /* check for machine using my ip or ether address */ + v4tov6(ip, e->spa); + if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){ + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){ + if (memcmp(eprinted, e->spa, sizeof(e->spa))){ + /* print only once */ + print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa); + memmove(eprinted, e->spa, sizeof(e->spa)); + } + } + } else { + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){ + print("arpreq: %V also has ether addr %E\n", e->spa, e->sha); + break; + } + } + + /* refresh what we know about sender */ + arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1); + + /* answer only requests for our address or systems we're proxying for */ + v4tov6(ip, e->tpa); + if(!iplocalonifc(ifc, ip)) + if(!ipproxyifc(er->f, ifc, ip)) + break; + + n = sizeof(Etherarp); + if(n < ifc->mintu) + n = ifc->mintu; + rbp = allocb(n); + r = (Etherarp*)rbp->rp; + memset(r, 0, sizeof(Etherarp)); + hnputs(r->type, ETARP); + hnputs(r->hrd, 1); + hnputs(r->pro, ETIP4); + r->hln = sizeof(r->sha); + r->pln = sizeof(r->spa); + hnputs(r->op, ARPREPLY); + memmove(r->tha, e->sha, sizeof(r->tha)); + memmove(r->tpa, e->spa, sizeof(r->tpa)); + memmove(r->sha, ifc->mac, sizeof(r->sha)); + memmove(r->spa, e->tpa, sizeof(r->spa)); + memmove(r->d, e->sha, sizeof(r->d)); + memmove(r->s, ifc->mac, sizeof(r->s)); + rbp->wp += n; + + n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0); + if(n < 0) + print("arp: write: %r\n"); + } + freeb(ebp); +} + +static void +recvarpproc(void *v) +{ + Ipifc *ifc = v; + Etherrock *er = ifc->arg; + + er->arpp = up; + if(waserror()){ + er->arpp = 0; + pexit("hangup", 1); + } + for(;;) + recvarp(ifc); +} + +static int +multicastea(uchar *ea, uchar *ip) +{ + int x; + + switch(x = ipismulticast(ip)){ + case V4: + ea[0] = 0x01; + ea[1] = 0x00; + ea[2] = 0x5e; + ea[3] = ip[13] & 0x7f; + ea[4] = ip[14]; + ea[5] = ip[15]; + break; + case V6: + ea[0] = 0x33; + ea[1] = 0x33; + ea[2] = ip[12]; + ea[3] = ip[13]; + ea[4] = ip[14]; + ea[5] = ip[15]; + break; + } + return x; +} + +/* + * fill in an arp entry for broadcast or multicast + * addresses. Return the first queued packet for the + * IP address. + */ +static Block* +multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac) +{ + /* is it broadcast? */ + switch(ipforme(f, a->ip)){ + case Runi: + return nil; + case Rbcast: + memset(mac, 0xff, 6); + return arpresolve(f->arp, a, medium, mac); + default: + break; + } + + /* if multicast, fill in mac */ + switch(multicastea(mac, a->ip)){ + case V4: + case V6: + return arpresolve(f->arp, a, medium, mac); + } + + /* let arp take care of it */ + return nil; +} + +void +ethermediumlink(void) +{ + addipmedium(ðermedium); + addipmedium(&gbemedium); +} + + +static void +etherpref2addr(uchar *pref, uchar *ea) +{ + pref[8] = ea[0] | 0x2; + pref[9] = ea[1]; + pref[10] = ea[2]; + pref[11] = 0xFF; + pref[12] = 0xFE; + pref[13] = ea[3]; + pref[14] = ea[4]; + pref[15] = ea[5]; +} diff --git a/os/ip/gre.c b/os/ip/gre.c new file mode 100644 index 00000000..96106331 --- /dev/null +++ b/os/ip/gre.c @@ -0,0 +1,282 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +#define DPRINT if(0)print + +enum +{ + GRE_IPONLY = 12, /* size of ip header */ + GRE_IPPLUSGRE = 12, /* minimum size of GRE header */ + IP_GREPROTO = 47, + + GRErxms = 200, + GREtickms = 100, + GREmaxxmit = 10, +}; + +typedef struct GREhdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar len[2]; /* packet length (including headers) */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar proto; /* Protocol */ + uchar cksum[2]; /* checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + + /* gre header */ + uchar flags[2]; + uchar eproto[2]; /* encapsulation protocol */ +} GREhdr; + +typedef struct GREpriv GREpriv; +struct GREpriv +{ + int raw; /* Raw GRE mode */ + + /* non-MIB stats */ + ulong csumerr; /* checksum errors */ + ulong lenerr; /* short packet */ +}; + +static void grekick(void *x, Block *bp); + +static char* +greconnect(Conv *c, char **argv, int argc) +{ + Proto *p; + char *err; + Conv *tc, **cp, **ecp; + + err = Fsstdconnect(c, argv, argc); + if(err != nil) + return err; + + /* make sure noone's already connected to this other sys */ + p = c->p; + qlock(p); + ecp = &p->conv[p->nc]; + for(cp = p->conv; cp < ecp; cp++){ + tc = *cp; + if(tc == nil) + break; + if(tc == c) + continue; + if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){ + err = "already connected to that addr/proto"; + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + break; + } + } + qunlock(p); + + if(err != nil) + return err; + Fsconnected(c, nil); + + return nil; +} + +static void +grecreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(grekick, c); +} + +static int +grestate(Conv *c, char *state, int n) +{ + USED(c); + return snprint(state, n, "%s", "Datagram"); +} + +static char* +greannounce(Conv*, char**, int) +{ + return "pktifc does not support announce"; +} + +static void +greclose(Conv *c) +{ + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; +} + +int drop; + +static void +grekick(void *x, Block *bp) +{ + Conv *c = x; + GREhdr *ghp; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + + if(bp == nil) + return; + + /* Make space to fit ip header (gre header already there) */ + bp = padblock(bp, GRE_IPONLY); + if(bp == nil) + return; + + /* make sure the message has a GRE header */ + bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE); + if(bp == nil) + return; + + ghp = (GREhdr *)(bp->rp); + ghp->vihl = IP_VER4; + + if(!((GREpriv*)c->p->priv)->raw){ + v4tov6(raddr, ghp->dst); + if(ipcmp(raddr, v4prefix) == 0) + memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen); + v4tov6(laddr, ghp->src); + if(ipcmp(laddr, v4prefix) == 0){ + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */ + memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen); + } + hnputs(ghp->eproto, c->rport); + } + + ghp->proto = IP_GREPROTO; + ghp->frag[0] = 0; + ghp->frag[1] = 0; + + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +static void +greiput(Proto *gre, Ipifc*, Block *bp) +{ + int len; + GREhdr *ghp; + Conv *c, **p; + ushort eproto; + uchar raddr[IPaddrlen]; + GREpriv *gpriv; + + gpriv = gre->priv; + ghp = (GREhdr*)(bp->rp); + + v4tov6(raddr, ghp->src); + eproto = nhgets(ghp->eproto); + qlock(gre); + + /* Look for a conversation structure for this port and address */ + c = nil; + for(p = gre->conv; *p; p++) { + c = *p; + if(c->inuse == 0) + continue; + if(c->rport == eproto && + (gpriv->raw || ipcmp(c->raddr, raddr) == 0)) + break; + } + + if(*p == nil) { + qunlock(gre); + freeblist(bp); + return; + } + + qunlock(gre); + + /* + * Trim the packet down to data size + */ + len = nhgets(ghp->len) - GRE_IPONLY; + if(len < GRE_IPPLUSGRE){ + freeblist(bp); + return; + } + bp = trimblock(bp, GRE_IPONLY, len); + if(bp == nil){ + gpriv->lenerr++; + return; + } + + /* + * Can't delimit packet so pull it all into one block. + */ + if(qlen(c->rq) > 64*1024) + freeblist(bp); + else{ + bp = concatblock(bp); + if(bp == 0) + panic("greiput"); + qpass(c->rq, bp); + } +} + +int +grestats(Proto *gre, char *buf, int len) +{ + GREpriv *gpriv; + + gpriv = gre->priv; + + return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr); +} + +char* +grectl(Conv *c, char **f, int n) +{ + GREpriv *gpriv; + + gpriv = c->p->priv; + if(n == 1){ + if(strcmp(f[0], "raw") == 0){ + gpriv->raw = 1; + return nil; + } + else if(strcmp(f[0], "cooked") == 0){ + gpriv->raw = 0; + return nil; + } + } + return "unknown control request"; +} + +void +greinit(Fs *fs) +{ + Proto *gre; + + gre = smalloc(sizeof(Proto)); + gre->priv = smalloc(sizeof(GREpriv)); + gre->name = "gre"; + gre->connect = greconnect; + gre->announce = greannounce; + gre->state = grestate; + gre->create = grecreate; + gre->close = greclose; + gre->rcv = greiput; + gre->ctl = grectl; + gre->advise = nil; + gre->stats = grestats; + gre->ipproto = IP_GREPROTO; + gre->nc = 64; + gre->ptclsize = 0; + + Fsproto(fs, gre); +} diff --git a/os/ip/icmp.c b/os/ip/icmp.c new file mode 100644 index 00000000..53eaf372 --- /dev/null +++ b/os/ip/icmp.c @@ -0,0 +1,490 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +typedef struct Icmp { + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar ipcksum[2]; /* Header checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + uchar type; + uchar code; + uchar cksum[2]; + uchar icmpid[2]; + uchar seq[2]; + uchar data[1]; +} Icmp; + +enum { /* Packet Types */ + EchoReply = 0, + Unreachable = 3, + SrcQuench = 4, + Redirect = 5, + EchoRequest = 8, + TimeExceed = 11, + InParmProblem = 12, + Timestamp = 13, + TimestampReply = 14, + InfoRequest = 15, + InfoReply = 16, + AddrMaskRequest = 17, + AddrMaskReply = 18, + + Maxtype = 18, +}; + +enum +{ + MinAdvise = 24, /* minimum needed for us to advise another protocol */ +}; + +char *icmpnames[Maxtype+1] = +{ +[EchoReply] "EchoReply", +[Unreachable] "Unreachable", +[SrcQuench] "SrcQuench", +[Redirect] "Redirect", +[EchoRequest] "EchoRequest", +[TimeExceed] "TimeExceed", +[InParmProblem] "InParmProblem", +[Timestamp] "Timestamp", +[TimestampReply] "TimestampReply", +[InfoRequest] "InfoRequest", +[InfoReply] "InfoReply", +[AddrMaskRequest] "AddrMaskRequest", +[AddrMaskReply ] "AddrMaskReply ", +}; + +enum { + IP_ICMPPROTO = 1, + ICMP_IPSIZE = 20, + ICMP_HDRSIZE = 8, +}; + +enum +{ + InMsgs, + InErrors, + OutMsgs, + CsumErrs, + LenErrs, + HlenErrs, + + Nstats, +}; + +static char *statnames[Nstats] = +{ +[InMsgs] "InMsgs", +[InErrors] "InErrors", +[OutMsgs] "OutMsgs", +[CsumErrs] "CsumErrs", +[LenErrs] "LenErrs", +[HlenErrs] "HlenErrs", +}; + +typedef struct Icmppriv Icmppriv; +struct Icmppriv +{ + ulong stats[Nstats]; + + /* message counts */ + ulong in[Maxtype+1]; + ulong out[Maxtype+1]; +}; + +static void icmpkick(void *x, Block*); + +static void +icmpcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(icmpkick, c); +} + +extern char* +icmpconnect(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, e); + + return nil; +} + +extern int +icmpstate(Conv *c, char *state, int n) +{ + USED(c); + return snprint(state, n, "%s qin %d qout %d", + "Datagram", + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0 + ); +} + +extern char* +icmpannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + + return nil; +} + +extern void +icmpclose(Conv *c) +{ + qclose(c->rq); + qclose(c->wq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; +} + +static void +icmpkick(void *x, Block *bp) +{ + Conv *c = x; + Icmp *p; + Icmppriv *ipriv; + + if(bp == nil) + return; + + if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){ + freeblist(bp); + return; + } + p = (Icmp *)(bp->rp); + p->vihl = IP_VER4; + ipriv = c->p->priv; + if(p->type <= Maxtype) + ipriv->out[p->type]++; + + v6tov4(p->dst, c->raddr); + v6tov4(p->src, c->laddr); + p->proto = IP_ICMPPROTO; + hnputs(p->icmpid, c->lport); + memset(p->cksum, 0, sizeof(p->cksum)); + hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + ipriv->stats[OutMsgs]++; + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +extern void +icmpttlexceeded(Fs *f, uchar *ia, Block *bp) +{ + Block *nbp; + Icmp *p, *np; + + p = (Icmp *)bp->rp; + + netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src); + nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8); + nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8; + np = (Icmp *)nbp->rp; + np->vihl = IP_VER4; + memmove(np->dst, p->src, sizeof(np->dst)); + v6tov4(np->src, ia); + memmove(np->data, bp->rp, ICMP_IPSIZE + 8); + np->type = TimeExceed; + np->code = 0; + np->proto = IP_ICMPPROTO; + hnputs(np->icmpid, 0); + hnputs(np->seq, 0); + memset(np->cksum, 0, sizeof(np->cksum)); + hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE)); + ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil); + +} + +static void +icmpunreachable(Fs *f, Block *bp, int code, int seq) +{ + Block *nbp; + Icmp *p, *np; + int i; + uchar addr[IPaddrlen]; + + p = (Icmp *)bp->rp; + + /* only do this for unicast sources and destinations */ + v4tov6(addr, p->dst); + i = ipforme(f, addr); + if((i&Runi) == 0) + return; + v4tov6(addr, p->src); + i = ipforme(f, addr); + if(i != 0 && (i&Runi) == 0) + return; + + netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src); + nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8); + nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8; + np = (Icmp *)nbp->rp; + np->vihl = IP_VER4; + memmove(np->dst, p->src, sizeof(np->dst)); + memmove(np->src, p->dst, sizeof(np->src)); + memmove(np->data, bp->rp, ICMP_IPSIZE + 8); + np->type = Unreachable; + np->code = code; + np->proto = IP_ICMPPROTO; + hnputs(np->icmpid, 0); + hnputs(np->seq, seq); + memset(np->cksum, 0, sizeof(np->cksum)); + hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE)); + ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmpnoconv(Fs *f, Block *bp) +{ + icmpunreachable(f, bp, 3, 0); +} + +extern void +icmpcantfrag(Fs *f, Block *bp, int mtu) +{ + icmpunreachable(f, bp, 4, mtu); +} + +static void +goticmpkt(Proto *icmp, Block *bp) +{ + Conv **c, *s; + Icmp *p; + uchar dst[IPaddrlen]; + ushort recid; + + p = (Icmp *) bp->rp; + v4tov6(dst, p->src); + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, dst) == 0){ + bp = concatblock(bp); + if(bp != nil) + qpass(s->rq, bp); + return; + } + } + freeblist(bp); +} + +static Block * +mkechoreply(Block *bp) +{ + Icmp *q; + uchar ip[4]; + + q = (Icmp *)bp->rp; + q->vihl = IP_VER4; + memmove(ip, q->src, sizeof(q->dst)); + memmove(q->src, q->dst, sizeof(q->src)); + memmove(q->dst, ip, sizeof(q->dst)); + q->type = EchoReply; + memset(q->cksum, 0, sizeof(q->cksum)); + hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + + return bp; +} + +static char *unreachcode[] = +{ +[0] "net unreachable", +[1] "host unreachable", +[2] "protocol unreachable", +[3] "port unreachable", +[4] "fragmentation needed and DF set", +[5] "source route failed", +}; + +static void +icmpiput(Proto *icmp, Ipifc*, Block *bp) +{ + int n, iplen; + Icmp *p; + Block *r; + Proto *pr; + char *msg; + char m2[128]; + Icmppriv *ipriv; + + ipriv = icmp->priv; + + ipriv->stats[InMsgs]++; + + p = (Icmp *)bp->rp; + netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code); + n = blocklen(bp); + if(n < ICMP_IPSIZE+ICMP_HDRSIZE){ + ipriv->stats[InErrors]++; + ipriv->stats[HlenErrs]++; + netlog(icmp->f, Logicmp, "icmp hlen %d\n", n); + goto raise; + } + iplen = nhgets(p->length); + if(iplen > n || (iplen % 1)){ + ipriv->stats[LenErrs]++; + ipriv->stats[InErrors]++; + netlog(icmp->f, Logicmp, "icmp length %d\n", iplen); + goto raise; + } + if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){ + ipriv->stats[InErrors]++; + ipriv->stats[CsumErrs]++; + netlog(icmp->f, Logicmp, "icmp checksum error\n"); + goto raise; + } + if(p->type <= Maxtype) + ipriv->in[p->type]++; + + switch(p->type) { + case EchoRequest: + if (iplen < n) + bp = trimblock(bp, 0, iplen); + r = mkechoreply(bp); + ipriv->out[EchoReply]++; + ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); + break; + case Unreachable: + if(p->code > 5) + msg = unreachcode[1]; + else + msg = unreachcode[p->code]; + + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(blocklen(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, msg); + return; + } + + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + goticmpkt(icmp, bp); + break; + case TimeExceed: + if(p->code == 0){ + sprint(m2, "ttl exceeded at %V", p->src); + + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(blocklen(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, m2); + return; + } + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + } + + goticmpkt(icmp, bp); + break; + default: + goticmpkt(icmp, bp); + break; + } + return; + +raise: + freeblist(bp); +} + +void +icmpadvise(Proto *icmp, Block *bp, char *msg) +{ + Conv **c, *s; + Icmp *p; + uchar dst[IPaddrlen]; + ushort recid; + + p = (Icmp *) bp->rp; + v4tov6(dst, p->dst); + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, dst) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +int +icmpstats(Proto *icmp, char *buf, int len) +{ + Icmppriv *priv; + char *p, *e; + int i; + + priv = icmp->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]); + for(i = 0; i <= Maxtype; i++){ + if(icmpnames[i]) + p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]); + else + p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]); + } + return p - buf; +} + +void +icmpinit(Fs *fs) +{ + Proto *icmp; + + icmp = smalloc(sizeof(Proto)); + icmp->priv = smalloc(sizeof(Icmppriv)); + icmp->name = "icmp"; + icmp->connect = icmpconnect; + icmp->announce = icmpannounce; + icmp->state = icmpstate; + icmp->create = icmpcreate; + icmp->close = icmpclose; + icmp->rcv = icmpiput; + icmp->stats = icmpstats; + icmp->ctl = nil; + icmp->advise = icmpadvise; + icmp->gc = nil; + icmp->ipproto = IP_ICMPPROTO; + icmp->nc = 128; + icmp->ptclsize = 0; + + Fsproto(fs, icmp); +} diff --git a/os/ip/icmp6.c b/os/ip/icmp6.c new file mode 100644 index 00000000..bca78a34 --- /dev/null +++ b/os/ip/icmp6.c @@ -0,0 +1,917 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" +#include "ipv6.h" + +typedef struct ICMPpkt ICMPpkt; +typedef struct IPICMP IPICMP; +typedef struct Ndpkt Ndpkt; +typedef struct NdiscC NdiscC; + +struct ICMPpkt { + uchar type; + uchar code; + uchar cksum[2]; + uchar icmpid[2]; + uchar seq[2]; +}; + +struct IPICMP { + Ip6hdr; + ICMPpkt; +}; + +struct NdiscC +{ + IPICMP; + uchar target[IPaddrlen]; +}; + +struct Ndpkt +{ + NdiscC; + uchar otype; + uchar olen; // length in units of 8 octets(incl type, code), + // 1 for IEEE 802 addresses + uchar lnaddr[6]; // link-layer address +}; + +enum { + // ICMPv6 types + EchoReply = 0, + UnreachableV6 = 1, + PacketTooBigV6 = 2, + TimeExceedV6 = 3, + SrcQuench = 4, + ParamProblemV6 = 4, + Redirect = 5, + EchoRequest = 8, + TimeExceed = 11, + InParmProblem = 12, + Timestamp = 13, + TimestampReply = 14, + InfoRequest = 15, + InfoReply = 16, + AddrMaskRequest = 17, + AddrMaskReply = 18, + EchoRequestV6 = 128, + EchoReplyV6 = 129, + RouterSolicit = 133, + RouterAdvert = 134, + NbrSolicit = 135, + NbrAdvert = 136, + RedirectV6 = 137, + + Maxtype6 = 137, +}; + +char *icmpnames6[Maxtype6+1] = +{ +[EchoReply] "EchoReply", +[UnreachableV6] "UnreachableV6", +[PacketTooBigV6] "PacketTooBigV6", +[TimeExceedV6] "TimeExceedV6", +[SrcQuench] "SrcQuench", +[Redirect] "Redirect", +[EchoRequest] "EchoRequest", +[TimeExceed] "TimeExceed", +[InParmProblem] "InParmProblem", +[Timestamp] "Timestamp", +[TimestampReply] "TimestampReply", +[InfoRequest] "InfoRequest", +[InfoReply] "InfoReply", +[AddrMaskRequest] "AddrMaskRequest", +[AddrMaskReply] "AddrMaskReply", +[EchoRequestV6] "EchoRequestV6", +[EchoReplyV6] "EchoReplyV6", +[RouterSolicit] "RouterSolicit", +[RouterAdvert] "RouterAdvert", +[NbrSolicit] "NbrSolicit", +[NbrAdvert] "NbrAdvert", +[RedirectV6] "RedirectV6", +}; + +enum +{ + InMsgs6, + InErrors6, + OutMsgs6, + CsumErrs6, + LenErrs6, + HlenErrs6, + HoplimErrs6, + IcmpCodeErrs6, + TargetErrs6, + OptlenErrs6, + AddrmxpErrs6, + RouterAddrErrs6, + + Nstats6, +}; + +static char *statnames6[Nstats6] = +{ +[InMsgs6] "InMsgs", +[InErrors6] "InErrors", +[OutMsgs6] "OutMsgs", +[CsumErrs6] "CsumErrs", +[LenErrs6] "LenErrs", +[HlenErrs6] "HlenErrs", +[HoplimErrs6] "HoplimErrs", +[IcmpCodeErrs6] "IcmpCodeErrs", +[TargetErrs6] "TargetErrs", +[OptlenErrs6] "OptlenErrs", +[AddrmxpErrs6] "AddrmxpErrs", +[RouterAddrErrs6] "RouterAddrErrs", +}; + +typedef struct Icmppriv6 +{ + ulong stats[Nstats6]; + + /* message counts */ + ulong in[Maxtype6+1]; + ulong out[Maxtype6+1]; +} Icmppriv6; + +typedef struct Icmpcb6 +{ + QLock; + uchar headers; +} Icmpcb6; + +static char *unreachcode[] = +{ +[icmp6_no_route] "no route to destination", +[icmp6_ad_prohib] "comm with destination administratively prohibited", +[icmp6_unassigned] "icmp unreachable: unassigned error code (2)", +[icmp6_adr_unreach] "address unreachable", +[icmp6_port_unreach] "port unreachable", +[icmp6_unkn_code] "icmp unreachable: unknown code", +}; + +enum { + ICMP_USEAD6 = 40, +}; + +enum { + Oflag = 1<<5, + Sflag = 1<<6, + Rflag = 1<<7, +}; + +enum { + slladd = 1, + tlladd = 2, + prfinfo = 3, + redhdr = 4, + mtuopt = 5, +}; + +static void icmpkick6(void *x, Block *bp); + +static void +icmpcreate6(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(icmpkick6, c); +} + +static void +set_cksum(Block *bp) +{ + IPICMP *p = (IPICMP *)(bp->rp); + + hnputl(p->vcf, 0); // borrow IP header as pseudoheader + hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN); + p->proto = 0; + p->ttl = ICMPv6; // ttl gets set later + hnputs(p->cksum, 0); + hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp))); + p->proto = ICMPv6; +} + +static Block * +newIPICMP(int packetlen) +{ + Block *nbp; + nbp = allocb(packetlen); + nbp->wp += packetlen; + memset(nbp->rp, 0, packetlen); + return nbp; +} + +void +icmpadvise6(Proto *icmp, Block *bp, char *msg) +{ + Conv **c, *s; + IPICMP *p; + ushort recid; + + p = (IPICMP *) bp->rp; + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, p->dst) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +static void +icmpkick6(void *x, Block *bp) +{ + Conv *c = x; + IPICMP *p; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Icmppriv6 *ipriv = c->p->priv; + Icmpcb6 *icb = (Icmpcb6*)c->ptcl; + + if(bp == nil) + return; + + if(icb->headers==6) { + /* get user specified addresses */ + bp = pullupblock(bp, ICMP_USEAD6); + if(bp == nil) + return; + bp->rp += 8; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + bp = padblock(bp, sizeof(Ip6hdr)); + } + + if(blocklen(bp) < sizeof(IPICMP)){ + freeblist(bp); + return; + } + p = (IPICMP *)(bp->rp); + if(icb->headers == 6) { + ipmove(p->dst, raddr); + ipmove(p->src, laddr); + } else { + ipmove(p->dst, c->raddr); + ipmove(p->src, c->laddr); + hnputs(p->icmpid, c->lport); + } + + set_cksum(bp); + p->vcf[0] = 0x06 << 4; + if(p->type <= Maxtype6) + ipriv->out[p->type]++; + ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +char* +icmpctl6(Conv *c, char **argv, int argc) +{ + Icmpcb6 *icb; + + icb = (Icmpcb6*) c->ptcl; + + if(argc==1) { + if(strcmp(argv[0], "headers")==0) { + icb->headers = 6; + return nil; + } + } + return "unknown control request"; +} + +static void +goticmpkt6(Proto *icmp, Block *bp, int muxkey) +{ + Conv **c, *s; + IPICMP *p = (IPICMP *)bp->rp; + ushort recid; + uchar *addr; + + if(muxkey == 0) { + recid = nhgets(p->icmpid); + addr = p->src; + } + else { + recid = muxkey; + addr = p->dst; + } + + for(c = icmp->conv; *c; c++){ + s = *c; + if(s->lport == recid && ipcmp(s->raddr, addr) == 0){ + bp = concatblock(bp); + if(bp != nil) + qpass(s->rq, bp); + return; + } + } + + freeblist(bp); +} + +static Block * +mkechoreply6(Block *bp) +{ + IPICMP *p = (IPICMP *)(bp->rp); + uchar addr[IPaddrlen]; + + ipmove(addr, p->src); + ipmove(p->src, p->dst); + ipmove(p->dst, addr); + p->type = EchoReplyV6; + set_cksum(bp); + return bp; +} + +/* + * sends out an ICMPv6 neighbor solicitation + * suni == SRC_UNSPEC or SRC_UNI, + * tuni == TARG_MULTI => multicast for address resolution, + * and tuni == TARG_UNI => neighbor reachability. + */ + +extern void +icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac) +{ + Block *nbp; + Ndpkt *np; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + + nbp = newIPICMP(sizeof(Ndpkt)); + np = (Ndpkt*) nbp->rp; + + + if(suni == SRC_UNSPEC) + memmove(np->src, v6Unspecified, IPaddrlen); + else + memmove(np->src, src, IPaddrlen); + + if(tuni == TARG_UNI) + memmove(np->dst, targ, IPaddrlen); + else + ipv62smcast(np->dst, targ); + + np->type = NbrSolicit; + np->code = 0; + memmove(np->target, targ, IPaddrlen); + if(suni != SRC_UNSPEC) { + np->otype = SRC_LLADDRESS; + np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */ + memmove(np->lnaddr, mac, sizeof(np->lnaddr)); + } + else { + int r = sizeof(Ndpkt)-sizeof(NdiscC); + nbp->wp -= r; + } + + set_cksum(nbp); + np = (Ndpkt*) nbp->rp; + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[NbrSolicit]++; + netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ); + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +/* + * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags. + */ +extern void +icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags) +{ + Block *nbp; + Ndpkt *np; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + nbp = newIPICMP(sizeof(Ndpkt)); + np = (Ndpkt*) nbp->rp; + + memmove(np->src, src, IPaddrlen); + memmove(np->dst, dst, IPaddrlen); + + np->type = NbrAdvert; + np->code = 0; + np->icmpid[0] = flags; + memmove(np->target, targ, IPaddrlen); + + np->otype = TARGET_LLADDRESS; + np->olen = 1; + memmove(np->lnaddr, mac, sizeof(np->lnaddr)); + + set_cksum(nbp); + np = (Ndpkt*) nbp->rp; + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[NbrAdvert]++; + netlog(f, Logicmp, "sending neighbor advertisement %I\n", src); + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free) +{ + Block *nbp; + IPICMP *np; + Ip6hdr *p; + int osz = BLEN(bp); + int sz = MIN(sizeof(IPICMP) + osz, v6MINTU); + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *) bp->rp; + + if(isv6mcast(p->src)) + goto clean; + + nbp = newIPICMP(sz); + np = (IPICMP *) nbp->rp; + + rlock(ifc); + if(ipv6anylocal(ifc, np->src)) { + netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst); + } + else { + netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst); + freeblist(nbp); + if(free) + goto clean; + else + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = UnreachableV6; + np->code = code; + memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP)); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[UnreachableV6]++; + + if(free) + ipiput6(f, ifc, nbp); + else { + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); + return; + } + +clean: + runlock(ifc); + freeblist(bp); +} + +extern void +icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp) +{ + Block *nbp; + IPICMP *np; + Ip6hdr *p; + int osz = BLEN(bp); + int sz = MIN(sizeof(IPICMP) + osz, v6MINTU); + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *) bp->rp; + + if(isv6mcast(p->src)) + return; + + nbp = newIPICMP(sz); + np = (IPICMP *) nbp->rp; + + if(ipv6anylocal(ifc, np->src)) { + netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst); + } + else { + netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst); + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = TimeExceedV6; + np->code = 0; + memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP)); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[TimeExceedV6]++; + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp) +{ + Block *nbp; + IPICMP *np; + Ip6hdr *p; + int osz = BLEN(bp); + int sz = MIN(sizeof(IPICMP) + osz, v6MINTU); + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *) bp->rp; + + if(isv6mcast(p->src)) + return; + + nbp = newIPICMP(sz); + np = (IPICMP *) nbp->rp; + + if(ipv6anylocal(ifc, np->src)) { + netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst); + } + else { + netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst); + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = PacketTooBigV6; + np->code = 0; + hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize); + memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP)); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[PacketTooBigV6]++; + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +/* + * RFC 2461, pages 39-40, pages 57-58. + */ +static int +valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) { + int sz, osz, unsp, n, ttl, iplen; + int pktsz = BLEN(bp); + uchar *packet = bp->rp; + IPICMP *p = (IPICMP *) packet; + Ndpkt *np; + + USED(ifc); + n = blocklen(bp); + if(n < sizeof(IPICMP)) { + ipriv->stats[HlenErrs6]++; + netlog(icmp->f, Logicmp, "icmp hlen %d\n", n); + goto err; + } + + iplen = nhgets(p->ploadlen); + if(iplen > n-IPV6HDR_LEN || (iplen % 1)) { + ipriv->stats[LenErrs6]++; + netlog(icmp->f, Logicmp, "icmp length %d\n", iplen); + goto err; + } + + // Rather than construct explicit pseudoheader, overwrite IPv6 header + if(p->proto != ICMPv6) { + // This code assumes no extension headers!!! + netlog(icmp->f, Logicmp, "icmp error: extension header\n"); + goto err; + } + memset(packet, 0, 4); + ttl = p->ttl; + p->ttl = p->proto; + p->proto = 0; + if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) { + ipriv->stats[CsumErrs6]++; + netlog(icmp->f, Logicmp, "icmp checksum error\n"); + goto err; + } + p->proto = p->ttl; + p->ttl = ttl; + + /* additional tests for some pkt types */ + if( (p->type == NbrSolicit) || + (p->type == NbrAdvert) || + (p->type == RouterAdvert) || + (p->type == RouterSolicit) || + (p->type == RedirectV6) ) { + + if(p->ttl != HOP_LIMIT) { + ipriv->stats[HoplimErrs6]++; + goto err; + } + if(p->code != 0) { + ipriv->stats[IcmpCodeErrs6]++; + goto err; + } + + switch (p->type) { + case NbrSolicit: + case NbrAdvert: + np = (Ndpkt*) p; + if(isv6mcast(np->target)) { + ipriv->stats[TargetErrs6]++; + goto err; + } + if(optexsts(np) && (np->olen == 0)) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + + if(p->type == NbrSolicit) { + if(ipcmp(np->src, v6Unspecified) == 0) { + if(!issmcast(np->dst) || optexsts(np)) { + ipriv->stats[AddrmxpErrs6]++; + goto err; + } + } + } + + if(p->type == NbrAdvert) { + if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){ + ipriv->stats[AddrmxpErrs6]++; + goto err; + } + } + break; + + case RouterAdvert: + if(pktsz - sizeof(Ip6hdr) < 16) { + ipriv->stats[HlenErrs6]++; + goto err; + } + if(!islinklocal(p->src)) { + ipriv->stats[RouterAddrErrs6]++; + goto err; + } + sz = sizeof(IPICMP) + 8; + while ((sz+1) < pktsz) { + osz = *(packet+sz+1); + if(osz <= 0) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + sz += 8*osz; + } + break; + + case RouterSolicit: + if(pktsz - sizeof(Ip6hdr) < 8) { + ipriv->stats[HlenErrs6]++; + goto err; + } + unsp = (ipcmp(p->src, v6Unspecified) == 0); + sz = sizeof(IPICMP) + 8; + while ((sz+1) < pktsz) { + osz = *(packet+sz+1); + if((osz <= 0) || + (unsp && (*(packet+sz) == slladd)) ) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + sz += 8*osz; + } + break; + + case RedirectV6: + //to be filled in + break; + + default: + goto err; + } + } + + return 1; + +err: + ipriv->stats[InErrors6]++; + return 0; +} + +static int +targettype(Fs *f, Ipifc *ifc, uchar *target) +{ + Iplifc *lifc; + int t; + + rlock(ifc); + if(ipproxyifc(f, ifc, target)) { + runlock(ifc); + return t_uniproxy; + } + + for(lifc = ifc->lifc; lifc; lifc = lifc->next) { + if(ipcmp(lifc->local, target) == 0) { + t = (lifc->tentative) ? t_unitent : t_unirany; + runlock(ifc); + return t; + } + } + + runlock(ifc); + return 0; +} + +static void +icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp) +{ + uchar *packet = bp->rp; + IPICMP *p = (IPICMP *)packet; + Icmppriv6 *ipriv = icmp->priv; + Block *r; + Proto *pr; + char *msg, m2[128]; + Ndpkt* np; + uchar pktflags; + uchar lsrc[IPaddrlen]; + int refresh = 1; + Iplifc *lifc; + + if(!valid(icmp, ipifc, bp, ipriv)) + goto raise; + + if(p->type <= Maxtype6) + ipriv->in[p->type]++; + else + goto raise; + + switch(p->type) { + case EchoRequestV6: + r = mkechoreply6(bp); + ipriv->out[EchoReply]++; + ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); + break; + + case UnreachableV6: + if(p->code > 4) + msg = unreachcode[icmp6_unkn_code]; + else + msg = unreachcode[p->code]; + + bp->rp += sizeof(IPICMP); + if(blocklen(bp) < 8){ + ipriv->stats[LenErrs6]++; + goto raise; + } + p = (IPICMP *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, msg); + return; + } + + bp->rp -= sizeof(IPICMP); + goticmpkt6(icmp, bp, 0); + break; + + case TimeExceedV6: + if(p->code == 0){ + sprint(m2, "ttl exceeded at %I", p->src); + + bp->rp += sizeof(IPICMP); + if(blocklen(bp) < 8){ + ipriv->stats[LenErrs6]++; + goto raise; + } + p = (IPICMP *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, m2); + return; + } + bp->rp -= sizeof(IPICMP); + } + + goticmpkt6(icmp, bp, 0); + break; + + case RouterAdvert: + case RouterSolicit: + /* using lsrc as a temp, munge hdr for goticmp6 + memmove(lsrc, p->src, IPaddrlen); + memmove(p->src, p->dst, IPaddrlen); + memmove(p->dst, lsrc, IPaddrlen); */ + + goticmpkt6(icmp, bp, p->type); + break; + + case NbrSolicit: + np = (Ndpkt*) p; + pktflags = 0; + switch (targettype(icmp->f, ipifc, np->target)) { + case t_unirany: + pktflags |= Oflag; + /* fall through */ + + case t_uniproxy: + if(ipcmp(np->src, v6Unspecified) != 0) { + arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0); + pktflags |= Sflag; + } + if(ipv6local(ipifc, lsrc)) { + icmpna(icmp->f, lsrc, + (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src, + np->target, ipifc->mac, pktflags); + } + else + freeblist(bp); + break; + + case t_unitent: + /* not clear what needs to be done. send up + * an icmp mesg saying don't use this address? */ + + default: + freeblist(bp); + } + + break; + + case NbrAdvert: + np = (Ndpkt*) p; + + /* if the target address matches one of the local interface + * address and the local interface address has tentative bit set, + * then insert into ARP table. this is so the duplication address + * detection part of ipconfig can discover duplication through + * the arp table + */ + lifc = iplocalonifc(ipifc, np->target); + if(lifc && lifc->tentative) + refresh = 0; + arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh); + freeblist(bp); + break; + + case PacketTooBigV6: + + default: + goticmpkt6(icmp, bp, 0); + break; + } + return; + +raise: + freeblist(bp); + +} + +int +icmpstats6(Proto *icmp6, char *buf, int len) +{ + Icmppriv6 *priv; + char *p, *e; + int i; + + priv = icmp6->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats6; i++) + p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]); + for(i = 0; i <= Maxtype6; i++){ + if(icmpnames6[i]) + p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]); +/* else + p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]); +*/ + } + return p - buf; +} + + +// need to import from icmp.c +extern int icmpstate(Conv *c, char *state, int n); +extern char* icmpannounce(Conv *c, char **argv, int argc); +extern char* icmpconnect(Conv *c, char **argv, int argc); +extern void icmpclose(Conv *c); + +void +icmp6init(Fs *fs) +{ + Proto *icmp6 = smalloc(sizeof(Proto)); + + icmp6->priv = smalloc(sizeof(Icmppriv6)); + icmp6->name = "icmpv6"; + icmp6->connect = icmpconnect; + icmp6->announce = icmpannounce; + icmp6->state = icmpstate; + icmp6->create = icmpcreate6; + icmp6->close = icmpclose; + icmp6->rcv = icmpiput6; + icmp6->stats = icmpstats6; + icmp6->ctl = icmpctl6; + icmp6->advise = icmpadvise6; + icmp6->gc = nil; + icmp6->ipproto = ICMPv6; + icmp6->nc = 16; + icmp6->ptclsize = sizeof(Icmpcb6); + + Fsproto(fs, icmp6); +} + diff --git a/os/ip/igmp.c b/os/ip/igmp.c new file mode 100644 index 00000000..109df303 --- /dev/null +++ b/os/ip/igmp.c @@ -0,0 +1,291 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + IGMP_IPHDRSIZE = 20, /* size of ip header */ + IGMP_HDRSIZE = 8, /* size of IGMP header */ + IP_IGMPPROTO = 2, + + IGMPquery = 1, + IGMPreport = 2, + + MSPTICK = 100, + MAXTIMEOUT = 10000/MSPTICK, /* at most 10 secs for a response */ +}; + +typedef struct IGMPpkt IGMPpkt; +struct IGMPpkt +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar len[2]; /* packet length (including headers) */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar proto; /* Protocol */ + uchar cksum[2]; /* checksum of ip portion */ + uchar src[IPaddrlen]; /* Ip source */ + uchar dst[IPaddrlen]; /* Ip destination */ + + /* igmp header */ + uchar vertype; /* version and type */ + uchar unused; + uchar igmpcksum[2]; /* checksum of igmp portion */ + uchar group[IPaddrlen]; /* multicast group */ +}; + +/* + * lists for group reports + */ +typedef struct IGMPrep IGMPrep; +struct IGMPrep +{ + IGMPrep *next; + Media *m; + int ticks; + Multicast *multi; +}; + +typedef struct IGMP IGMP; +struct IGMP +{ + Lock; + Rendez r; + IGMPrep *reports; +}; + +IGMP igmpalloc; + + Proto igmp; +extern Fs fs; + +static struct Stats +{ + ulong inqueries; + ulong outqueries; + ulong inreports; + ulong outreports; +} stats; + +void +igmpsendreport(Media *m, uchar *addr) +{ + IGMPpkt *p; + Block *bp; + + bp = allocb(sizeof(IGMPpkt)); + if(bp == nil) + return; + p = (IGMPpkt*)bp->wp; + p->vihl = IP_VER4; + bp->wp += sizeof(IGMPpkt); + memset(bp->rp, 0, sizeof(IGMPpkt)); + hnputl(p->src, Mediagetaddr(m)); + hnputl(p->dst, Ipallsys); + p->vertype = (1<<4) | IGMPreport; + p->proto = IP_IGMPPROTO; + memmove(p->group, addr, IPaddrlen); + hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)); + netlog(Logigmp, "igmpreport %I\n", p->group); + stats.outreports++; + ipoput4(bp, 0, 1, DFLTTOS, nil); /* TTL of 1 */ +} + +static int +isreport(void *a) +{ + USED(a); + return igmpalloc.reports != 0; +} + + +void +igmpproc(void *a) +{ + IGMPrep *rp, **lrp; + Multicast *mp, **lmp; + uchar ip[IPaddrlen]; + + USED(a); + + for(;;){ + sleep(&igmpalloc.r, isreport, 0); + for(;;){ + lock(&igmpalloc); + + if(igmpalloc.reports == nil) + break; + + /* look for a single report */ + lrp = &igmpalloc.reports; + mp = nil; + for(rp = *lrp; rp; rp = *lrp){ + rp->ticks++; + lmp = &rp->multi; + for(mp = *lmp; mp; mp = *lmp){ + if(rp->ticks >= mp->timeout){ + *lmp = mp->next; + break; + } + lmp = &mp->next; + } + if(mp != nil) + break; + + if(rp->multi != nil){ + lrp = &rp->next; + continue; + } else { + *lrp = rp->next; + free(rp); + } + } + unlock(&igmpalloc); + + if(mp){ + /* do a single report and try again */ + hnputl(ip, mp->addr); + igmpsendreport(rp->m, ip); + free(mp); + continue; + } + + tsleep(&up->sleep, return0, 0, MSPTICK); + } + unlock(&igmpalloc); + } + +} + +void +igmpiput(Media *m, Ipifc *, Block *bp) +{ + int n; + IGMPpkt *ghp; + Ipaddr group; + IGMPrep *rp, **lrp; + Multicast *mp, **lmp; + + ghp = (IGMPpkt*)(bp->rp); + netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group); + + n = blocklen(bp); + if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){ + netlog(Logigmp, "igmpiput: bad len\n"); + goto error; + } + if((ghp->vertype>>4) != 1){ + netlog(Logigmp, "igmpiput: bad igmp type\n"); + goto error; + } + if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){ + netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src); + goto error; + } + + group = nhgetl(ghp->group); + + lock(&igmpalloc); + switch(ghp->vertype & 0xf){ + case IGMPquery: + /* + * start reporting groups that we're a member of. + */ + stats.inqueries++; + for(rp = igmpalloc.reports; rp; rp = rp->next) + if(rp->m == m) + break; + if(rp != nil) + break; /* already reporting */ + + mp = Mediacopymulti(m); + if(mp == nil) + break; + + rp = malloc(sizeof(*rp)); + if(rp == nil) + break; + + rp->m = m; + rp->multi = mp; + rp->ticks = 0; + for(; mp; mp = mp->next) + mp->timeout = nrand(MAXTIMEOUT); + rp->next = igmpalloc.reports; + igmpalloc.reports = rp; + + wakeup(&igmpalloc.r); + + break; + case IGMPreport: + /* + * find report list for this medium + */ + stats.inreports++; + lrp = &igmpalloc.reports; + for(rp = *lrp; rp; rp = *lrp){ + if(rp->m == m) + break; + lrp = &rp->next; + } + if(rp == nil) + break; + + /* + * if someone else has reported a group, + * we don't have to. + */ + lmp = &rp->multi; + for(mp = *lmp; mp; mp = *lmp){ + if(mp->addr == group){ + *lmp = mp->next; + free(mp); + break; + } + lmp = &mp->next; + } + + break; + } + unlock(&igmpalloc); + +error: + freeb(bp); +} + +int +igmpstats(char *buf, int len) +{ + return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n", + stats.inqueries, stats.inreports, + stats.outqueries, stats.outreports); +} + +void +igmpinit(Fs *fs) +{ + igmp.name = "igmp"; + igmp.connect = nil; + igmp.announce = nil; + igmp.ctl = nil; + igmp.state = nil; + igmp.close = nil; + igmp.rcv = igmpiput; + igmp.stats = igmpstats; + igmp.ipproto = IP_IGMPPROTO; + igmp.nc = 0; + igmp.ptclsize = 0; + + igmpreportfn = igmpsendreport; + kproc("igmpproc", igmpproc, 0, 0); + + Fsproto(fs, &igmp); +} diff --git a/os/ip/ihbootp.c b/os/ip/ihbootp.c new file mode 100644 index 00000000..68b14b1d --- /dev/null +++ b/os/ip/ihbootp.c @@ -0,0 +1,323 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kernel.h" +#include "ip.h" + +static ulong fsip; +static ulong auip; +static ulong gwip; +static ulong ipmask; +static ulong ipaddr; +static ulong dnsip; + +enum +{ + Bootrequest = 1, + Bootreply = 2, +}; + +typedef struct Bootp +{ + /* udp.c oldheader */ + uchar raddr[IPaddrlen]; + uchar laddr[IPaddrlen]; + uchar rport[2]; + uchar lport[2]; + /* bootp itself */ + uchar op; /* opcode */ + uchar htype; /* hardware type */ + uchar hlen; /* hardware address len */ + uchar hops; /* hops */ + uchar xid[4]; /* a random number */ + uchar secs[2]; /* elapsed snce client started booting */ + uchar pad[2]; + uchar ciaddr[4]; /* client IP address (client tells server) */ + uchar yiaddr[4]; /* client IP address (server tells client) */ + uchar siaddr[4]; /* server IP address */ + uchar giaddr[4]; /* gateway IP address */ + uchar chaddr[16]; /* client hardware address */ + uchar sname[64]; /* server host name (optional) */ + uchar file[128]; /* boot file name */ + uchar vend[128]; /* vendor-specific goo */ +} Bootp; + +/* + * bootp returns: + * + * "fsip d.d.d.d + * auip d.d.d.d + * gwip d.d.d.d + * ipmask d.d.d.d + * ipaddr d.d.d.d + * dnsip d.d.d.d" + * + * where d.d.d.d is the IP address in dotted decimal notation, and each + * address is followed by a newline. + */ + +static Bootp req; +static Proc* rcvprocp; +static int recv; +static int done; +static Rendez bootpr; +static char rcvbuf[512]; +static int bootpdebug; + +/* + * Parse the vendor specific fields according to RFC 1084. + * We are overloading the "cookie server" to be the Inferno + * authentication server and the "resource location server" + * to be the Inferno file server. + * + * If the vendor specific field is formatted properly, it + * will begin with the four bytes 99.130.83.99 and end with + * an 0xFF byte. + */ +static void +parsevend(uchar* vend) +{ + /* The field must start with 99.130.83.99 to be compliant */ + if ((vend[0] != 99) || (vend[1] != 130) || + (vend[2] != 83) || (vend[3] != 99)){ + if(bootpdebug) + print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]); + return; + } + + /* Skip over the magic cookie */ + vend += 4; + + while ((vend[0] != 0) && (vend[0] != 0xFF)) { + if(bootpdebug){ + int i; + print("vend %d [%d]", vend[0], vend[1]); + for(i=0; i<vend[1]; i++) + print(" %2.2x", vend[i]); + print("\n"); + } + switch (vend[0]) { + case 1: /* Subnet mask field */ + /* There must be only one subnet mask */ + if (vend[1] != 4) + return; + + ipmask = (vend[2]<<24)| + (vend[3]<<16)| + (vend[4]<<8)| + vend[5]; + break; + + case 3: /* Gateway/router field */ + /* We are only concerned with first address */ + if (vend[1] < 4) + break; + + gwip = (vend[2]<<24)| + (vend[3]<<16)| + (vend[4]<<8)| + vend[5]; + break; + + case 6: /* DNS server */ + /* We are only concerned with first address */ + if (vend[1] < 4) + break; + + dnsip = (vend[2]<<24)| + (vend[3]<<16)| + (vend[4]<<8)| + vend[5]; + break; + + case 8: /* "Cookie server" (auth server) field */ + /* We are only concerned with first address */ + if (vend[1] < 4) + break; + + auip = (vend[2]<<24)| + (vend[3]<<16)| + (vend[4]<<8)| + vend[5]; + break; + + case 11: /* "Resource loc server" (file server) field */ + /* We are only concerned with first address */ + if (vend[1] < 4) + break; + + fsip = (vend[2]<<24)| + (vend[3]<<16)| + (vend[4]<<8)| + vend[5]; + break; + + default: /* Ignore everything else */ + break; + } + + /* Skip over the field */ + vend += vend[1] + 2; + } +} + +static void +rcvbootp(void *a) +{ + int n, fd; + Bootp *rp; + + if(waserror()) + pexit("", 0); + rcvprocp = up; /* store for postnote below */ + fd = (int)a; + while(done == 0) { + n = kread(fd, rcvbuf, sizeof(rcvbuf)); + if(n <= 0) + break; + rp = (Bootp*)rcvbuf; + if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && + rp->htype == 1 && rp->hlen == 6) { + ipaddr = (rp->yiaddr[0]<<24)| + (rp->yiaddr[1]<<16)| + (rp->yiaddr[2]<<8)| + rp->yiaddr[3]; + parsevend(rp->vend); + break; + } + } + poperror(); + rcvprocp = nil; + + recv = 1; + wakeup(&bootpr); + pexit("", 0); +} + +static char* +rbootp(Ipifc *ifc) +{ + int cfd, dfd, tries, n; + char ia[5+3*16], im[16], *av[3]; + uchar nipaddr[4], ngwip[4], nipmask[4]; + char dir[Maxpath]; + static uchar vend_rfc1048[] = { 99, 130, 83, 99 }; + + av[1] = "0.0.0.0"; + av[2] = "0.0.0.0"; + ipifcadd(ifc, av, 3, 0, nil); + + cfd = kannounce("udp!*!68", dir); + if(cfd < 0) + return "bootp announce failed"; + strcat(dir, "/data"); + if(kwrite(cfd, "headers", 7) < 0){ + kclose(cfd); + return "bootp ctl headers failed"; + } + kwrite(cfd, "oldheaders", 10); + dfd = kopen(dir, ORDWR); + if(dfd < 0){ + kclose(cfd); + return "bootp open data failed"; + } + kclose(cfd); + + /* create request */ + memset(&req, 0, sizeof(req)); + ipmove(req.raddr, IPv4bcast); + hnputs(req.rport, 67); + req.op = Bootrequest; + req.htype = 1; /* ethernet (all we know) */ + req.hlen = 6; /* ethernet (all we know) */ + + /* Hardware MAC address */ + memmove(req.chaddr, ifc->mac, 6); + /* Fill in the local IP address if we know it */ + ipv4local(ifc, req.ciaddr); + memset(req.file, 0, sizeof(req.file)); + memmove(req.vend, vend_rfc1048, 4); + + done = 0; + recv = 0; + + kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG); + + /* + * broadcast bootp's till we get a reply, + * or fixed number of tries + */ + tries = 0; + while(recv == 0) { + if(kwrite(dfd, &req, sizeof(req)) < 0) + print("bootp: write: %r"); + + tsleep(&bootpr, return0, 0, 1000); + if(++tries > 10) { + print("bootp: timed out\n"); + break; + } + } + kclose(dfd); + done = 1; + if(rcvprocp != nil){ + postnote(rcvprocp, 1, "timeout", 0); + rcvprocp = nil; + } + + av[1] = "0.0.0.0"; + av[2] = "0.0.0.0"; + ipifcrem(ifc, av, 3); + + hnputl(nipaddr, ipaddr); + sprint(ia, "%V", nipaddr); + hnputl(nipmask, ipmask); + sprint(im, "%V", nipmask); + av[1] = ia; + av[2] = im; + ipifcadd(ifc, av, 3, 0, nil); + + if(gwip != 0) { + hnputl(ngwip, gwip); + n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip); + routewrite(ifc->conv->p->f, nil, ia, n); + } + return nil; +} + +static int +rbootpread(char *bp, ulong offset, int len) +{ + int n; + char *buf; + uchar a[4]; + + buf = smalloc(READSTR); + if(waserror()){ + free(buf); + nexterror(); + } + hnputl(a, fsip); + n = snprint(buf, READSTR, "fsip %15V\n", a); + hnputl(a, auip); + n += snprint(buf + n, READSTR-n, "auip %15V\n", a); + hnputl(a, gwip); + n += snprint(buf + n, READSTR-n, "gwip %15V\n", a); + hnputl(a, ipmask); + n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a); + hnputl(a, ipaddr); + n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a); + hnputl(a, dnsip); + snprint(buf + n, READSTR-n, "dnsip %15V\n", a); + + len = readstr(offset, bp, len, buf); + poperror(); + free(buf); + return len; +} + +char* (*bootp)(Ipifc*) = rbootp; +int (*bootpread)(char*, ulong, int) = rbootpread; diff --git a/os/ip/il.c b/os/ip/il.c new file mode 100644 index 00000000..5423194c --- /dev/null +++ b/os/ip/il.c @@ -0,0 +1,1408 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum /* Connection state */ +{ + Ilclosed, + Ilsyncer, + Ilsyncee, + Ilestablished, + Illistening, + Ilclosing, + Ilopening, /* only for file server */ +}; + +char *ilstates[] = +{ + "Closed", + "Syncer", + "Syncee", + "Established", + "Listen", + "Closing", + "Opening", /* only for file server */ +}; + +enum /* Packet types */ +{ + Ilsync, + Ildata, + Ildataquery, + Ilack, + Ilquery, + Ilstate, + Ilclose, +}; + +char *iltype[] = +{ + "sync", + "data", + "dataquery", + "ack", + "query", + "state", + "close" +}; + +enum +{ + Seconds = 1000, + Iltickms = 50, /* time base */ + AckDelay = 2*Iltickms, /* max time twixt message rcvd & ack sent */ + MaxTimeout = 30*Seconds, /* max time between rexmit */ + QueryTime = 10*Seconds, /* time between subsequent queries */ + DeathTime = 30*QueryTime, + + MaxRexmit = 16, /* max retransmissions before hangup */ + Defaultwin = 20, + + LogAGain = 3, + AGain = 1<<LogAGain, + LogDGain = 2, + DGain = 1<<LogDGain, + + DefByteRate = 100, /* assume a megabit link */ + DefRtt = 50, /* cross country on a great day */ + + Maxrq = 64*1024, +}; + +enum +{ + Nqt= 8, +}; + +typedef struct Ilcb Ilcb; +struct Ilcb /* Control block */ +{ + int state; /* Connection state */ + Conv *conv; + QLock ackq; /* Unacknowledged queue */ + Block *unacked; + Block *unackedtail; + ulong unackedbytes; + QLock outo; /* Out of order packet queue */ + Block *outoforder; + ulong next; /* Id of next to send */ + ulong recvd; /* Last packet received */ + ulong acksent; /* Last packet acked */ + ulong start; /* Local start id */ + ulong rstart; /* Remote start id */ + int window; /* Maximum receive window */ + int rxquery; /* number of queries on this connection */ + int rxtot; /* number of retransmits on this connection */ + int rexmit; /* number of retransmits of *unacked */ + ulong qt[Nqt+1]; /* state table for query messages */ + int qtx; /* ... index into qt */ + + /* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */ + int fasttimeout; + + /* timers */ + ulong lastxmit; /* time of last xmit */ + ulong lastrecv; /* time of last recv */ + ulong timeout; /* retransmission time for *unacked */ + ulong acktime; /* time to send next ack */ + ulong querytime; /* time to send next query */ + + /* adaptive measurements */ + int delay; /* Average of the fixed rtt delay */ + int rate; /* Average uchar rate */ + int mdev; /* Mean deviation of rtt */ + int maxrtt; /* largest rtt seen */ + ulong rttack; /* The ack we are waiting for */ + int rttlen; /* Length of rttack packet */ + uvlong rttstart; /* Time we issued rttack packet */ +}; + +enum +{ + IL_IPSIZE = 20, + IL_HDRSIZE = 18, + IL_LISTEN = 0, + IL_CONNECT = 1, + IP_ILPROTO = 40, +}; + +typedef struct Ilhdr Ilhdr; +struct Ilhdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar cksum[2]; /* Header checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + uchar ilsum[2]; /* Checksum including header */ + uchar illen[2]; /* Packet length */ + uchar iltype; /* Packet type */ + uchar ilspec; /* Special */ + uchar ilsrc[2]; /* Src port */ + uchar ildst[2]; /* Dst port */ + uchar ilid[4]; /* Sequence id */ + uchar ilack[4]; /* Acked sequence */ +}; + +enum +{ + InMsgs, + OutMsgs, + CsumErrs, /* checksum errors */ + HlenErrs, /* header length error */ + LenErrs, /* short packet */ + OutOfOrder, /* out of order */ + Retrans, /* retransmissions */ + DupMsg, + DupBytes, + DroppedMsgs, + + Nstats, +}; + +static char *statnames[] = +{ +[InMsgs] "InMsgs", +[OutMsgs] "OutMsgs", +[CsumErrs] "CsumErrs", +[HlenErrs] "HlenErr", +[LenErrs] "LenErrs", +[OutOfOrder] "OutOfOrder", +[Retrans] "Retrans", +[DupMsg] "DupMsg", +[DupBytes] "DupBytes", +[DroppedMsgs] "DroppedMsgs", +}; + +typedef struct Ilpriv Ilpriv; +struct Ilpriv +{ + Ipht ht; + + ulong stats[Nstats]; + + ulong csumerr; /* checksum errors */ + ulong hlenerr; /* header length error */ + ulong lenerr; /* short packet */ + ulong order; /* out of order */ + ulong rexmit; /* retransmissions */ + ulong dup; + ulong dupb; + + /* keeping track of the ack kproc */ + int ackprocstarted; + QLock apl; +}; + +/* state for query/dataquery messages */ + + +void ilrcvmsg(Conv*, Block*); +void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int); +void ilackq(Ilcb*, Block*); +void ilprocess(Conv*, Ilhdr*, Block*); +void ilpullup(Conv*); +void ilhangup(Conv*, char*); +void ilfreeq(Ilcb*); +void ilrexmit(Ilcb*); +void ilbackoff(Ilcb*); +void ilsettimeout(Ilcb*); +char* ilstart(Conv*, int, int); +void ilackproc(void*); +void iloutoforder(Conv*, Ilhdr*, Block*); +void iliput(Proto*, Ipifc*, Block*); +void iladvise(Proto*, Block*, char*); +int ilnextqt(Ilcb*); +void ilcbinit(Ilcb*); +int later(ulong, ulong, char*); +void ilreject(Fs*, Ilhdr*); +void illocalclose(Conv *c); + int ilcksum = 1; +static int initseq = 25001; +static ulong scalediv, scalemul; +static char *etime = "connection timed out"; + +static char* +ilconnect(Conv *c, char **argv, int argc) +{ + char *e, *p; + int fast; + + /* huge hack to quickly try an il connection */ + fast = 0; + if(argc > 1){ + p = strstr(argv[1], "!fasttimeout"); + if(p != nil){ + *p = 0; + fast = 1; + } + } + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + return ilstart(c, IL_CONNECT, fast); +} + +static int +ilstate(Conv *c, char *state, int n) +{ + Ilcb *ic; + + ic = (Ilcb*)(c->ptcl); + return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d", + ilstates[ic->state], + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0, + ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain, + ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt); +} + +static int +ilinuse(Conv *c) +{ + Ilcb *ic; + + ic = (Ilcb*)(c->ptcl); + return ic->state != Ilclosed; + +} + +/* called with c locked */ +static char* +ilannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + e = ilstart(c, IL_LISTEN, 0); + if(e != nil) + return e; + Fsconnected(c, nil); + + return nil; +} + +void +illocalclose(Conv *c) +{ + Ilcb *ic; + Ilpriv *ipriv; + + ipriv = c->p->priv; + ic = (Ilcb*)c->ptcl; + ic->state = Ilclosed; + iphtrem(&ipriv->ht, c); + ipmove(c->laddr, IPnoaddr); + c->lport = 0; +} + +static void +ilclose(Conv *c) +{ + Ilcb *ic; + + ic = (Ilcb*)c->ptcl; + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + + switch(ic->state) { + case Ilclosing: + case Ilclosed: + break; + case Ilsyncer: + case Ilsyncee: + case Ilestablished: + ic->state = Ilclosing; + ilsettimeout(ic); + ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0); + break; + case Illistening: + illocalclose(c); + break; + } + ilfreeq(ic); +} + +void +ilkick(void *x, Block *bp) +{ + Conv *c = x; + Ilhdr *ih; + Ilcb *ic; + int dlen; + ulong id, ack; + Fs *f; + Ilpriv *priv; + + f = c->p->f; + priv = c->p->priv; + ic = (Ilcb*)c->ptcl; + + if(bp == nil) + return; + + switch(ic->state) { + case Ilclosed: + case Illistening: + case Ilclosing: + freeblist(bp); + qhangup(c->rq, nil); + return; + } + + dlen = blocklen(bp); + + /* Make space to fit il & ip */ + bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE); + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->frag[0] = 0; + ih->frag[1] = 0; + v6tov4(ih->dst, c->raddr); + v6tov4(ih->src, c->laddr); + ih->proto = IP_ILPROTO; + + /* Il fields */ + hnputs(ih->illen, dlen+IL_HDRSIZE); + hnputs(ih->ilsrc, c->lport); + hnputs(ih->ildst, c->rport); + + qlock(&ic->ackq); + id = ic->next++; + hnputl(ih->ilid, id); + ack = ic->recvd; + hnputl(ih->ilack, ack); + ic->acksent = ack; + ic->acktime = NOW + AckDelay; + ih->iltype = Ildata; + ih->ilspec = 0; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + /* Checksum of ilheader plus data (not ip & no pseudo header) */ + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE)); + + ilackq(ic, bp); + qunlock(&ic->ackq); + + /* Start the round trip timer for this packet if the timer is free */ + if(ic->rttack == 0) { + ic->rttack = id; + ic->rttstart = fastticks(nil); + ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE; + } + + if(later(NOW, ic->timeout, nil)) + ilsettimeout(ic); + ipoput4(f, bp, 0, c->ttl, c->tos, c); + priv->stats[OutMsgs]++; +} + +static void +ilcreate(Conv *c) +{ + c->rq = qopen(Maxrq, 0, 0, c); + c->wq = qbypass(ilkick, c); +} + +int +ilxstats(Proto *il, char *buf, int len) +{ + Ilpriv *priv; + char *p, *e; + int i; + + priv = il->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]); + return p - buf; +} + +void +ilackq(Ilcb *ic, Block *bp) +{ + Block *np; + int n; + + n = blocklen(bp); + + /* Enqueue a copy on the unacked queue in case this one gets lost */ + np = copyblock(bp, n); + if(ic->unacked) + ic->unackedtail->list = np; + else + ic->unacked = np; + ic->unackedtail = np; + np->list = nil; + ic->unackedbytes += n; +} + +static +void +ilrttcalc(Ilcb *ic, Block *bp) +{ + int rtt, tt, pt, delay, rate; + + rtt = fastticks(nil) - ic->rttstart; + rtt = (rtt*scalemul)/scalediv; + delay = ic->delay; + rate = ic->rate; + + /* Guard against zero wrap */ + if(rtt > 120000 || rtt < 0) + return; + + /* this block had to be transmitted after the one acked so count its size */ + ic->rttlen += blocklen(bp) + IL_IPSIZE + IL_HDRSIZE; + + if(ic->rttlen < 256){ + /* guess fixed delay as rtt of small packets */ + delay += rtt - (delay>>LogAGain); + if(delay < AGain) + delay = AGain; + ic->delay = delay; + } else { + /* if packet took longer than avg rtt delay, recalc rate */ + tt = rtt - (delay>>LogAGain); + if(tt > 0){ + rate += ic->rttlen/tt - (rate>>LogAGain); + if(rate < AGain) + rate = AGain; + ic->rate = rate; + } + } + + /* mdev */ + pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain); + ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain); + + if(rtt > ic->maxrtt) + ic->maxrtt = rtt; +} + +void +ilackto(Ilcb *ic, ulong ackto, Block *bp) +{ + Ilhdr *h; + ulong id; + + if(ic->rttack == ackto) + ilrttcalc(ic, bp); + + /* Cancel if we've passed the packet we were interested in */ + if(ic->rttack <= ackto) + ic->rttack = 0; + + qlock(&ic->ackq); + while(ic->unacked) { + h = (Ilhdr *)ic->unacked->rp; + id = nhgetl(h->ilid); + if(ackto < id) + break; + + bp = ic->unacked; + ic->unacked = bp->list; + bp->list = nil; + ic->unackedbytes -= blocklen(bp); + freeblist(bp); + ic->rexmit = 0; + ilsettimeout(ic); + } + qunlock(&ic->ackq); +} + +void +iliput(Proto *il, Ipifc*, Block *bp) +{ + char *st; + Ilcb *ic; + Ilhdr *ih; + uchar raddr[IPaddrlen]; + uchar laddr[IPaddrlen]; + ushort sp, dp, csum; + int plen, illen; + Conv *new, *s; + Ilpriv *ipriv; + + ipriv = il->priv; + + ih = (Ilhdr *)bp->rp; + plen = blocklen(bp); + if(plen < IL_IPSIZE+IL_HDRSIZE){ + netlog(il->f, Logil, "il: hlenerr\n"); + ipriv->stats[HlenErrs]++; + goto raise; + } + + illen = nhgets(ih->illen); + if(illen+IL_IPSIZE > plen){ + netlog(il->f, Logil, "il: lenerr\n"); + ipriv->stats[LenErrs]++; + goto raise; + } + + sp = nhgets(ih->ildst); + dp = nhgets(ih->ilsrc); + v4tov6(raddr, ih->src); + v4tov6(laddr, ih->dst); + + if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) { + if(ih->iltype > Ilclose) + st = "?"; + else + st = iltype[ih->iltype]; + ipriv->stats[CsumErrs]++; + netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n", + csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); + goto raise; + } + + qlock(il); + s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp); + if(s == nil){ + if(ih->iltype == Ilsync) + ilreject(il->f, ih); /* no listener */ + qunlock(il); + goto raise; + } + + ic = (Ilcb*)s->ptcl; + if(ic->state == Illistening){ + if(ih->iltype != Ilsync){ + qunlock(il); + if(ih->iltype > Ilclose) + st = "?"; + else + st = iltype[ih->iltype]; + ilreject(il->f, ih); /* no channel and not sync */ + netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n", + st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); + goto raise; + } + + new = Fsnewcall(s, raddr, dp, laddr, sp, V4); + if(new == nil){ + qunlock(il); + netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp); + ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0); + goto raise; + } + s = new; + + ic = (Ilcb*)s->ptcl; + + ic->conv = s; + ic->state = Ilsyncee; + ilcbinit(ic); + ic->rstart = nhgetl(ih->ilid); + iphtadd(&ipriv->ht, s); + } + + qlock(s); + qunlock(il); + if(waserror()){ + qunlock(s); + nexterror(); + } + ilprocess(s, ih, bp); + qunlock(s); + poperror(); + return; +raise: + freeblist(bp); +} + +void +_ilprocess(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + ulong id, ack; + Ilpriv *priv; + + id = nhgetl(h->ilid); + ack = nhgetl(h->ilack); + + ic = (Ilcb*)s->ptcl; + + ic->lastrecv = NOW; + ic->querytime = NOW + QueryTime; + priv = s->p->priv; + priv->stats[InMsgs]++; + + switch(ic->state) { + default: + netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state); + case Ilclosed: + freeblist(bp); + break; + case Ilsyncer: + switch(h->iltype) { + default: + break; + case Ilsync: + if(ack != ic->start) + ilhangup(s, "connection rejected"); + else { + ic->recvd = id; + ic->rstart = id; + ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0); + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + Fsconnected(s, nil); + ilpullup(s); + } + break; + case Ilclose: + if(ack == ic->start) + ilhangup(s, "connection rejected"); + break; + } + freeblist(bp); + break; + case Ilsyncee: + switch(h->iltype) { + default: + break; + case Ilsync: + if(id != ic->rstart || ack != 0){ + illocalclose(s); + } else { + ic->recvd = id; + ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0); + } + break; + case Ilack: + if(ack == ic->start) { + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + ilpullup(s); + } + break; + case Ildata: + if(ack == ic->start) { + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + goto established; + } + break; + case Ilclose: + if(ack == ic->start) + ilhangup(s, "remote close"); + break; + } + freeblist(bp); + break; + case Ilestablished: + established: + switch(h->iltype) { + case Ilsync: + if(id != ic->rstart) + ilhangup(s, "remote close"); + else + ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0); + freeblist(bp); + break; + case Ildata: + /* + * avoid consuming all the mount rpc buffers in the + * system. if the input queue is too long, drop this + * packet. + */ + if (s->rq && qlen(s->rq) >= Maxrq) { + priv->stats[DroppedMsgs]++; + freeblist(bp); + break; + } + + ilackto(ic, ack, bp); + iloutoforder(s, h, bp); + ilpullup(s); + break; + case Ildataquery: + ilackto(ic, ack, bp); + iloutoforder(s, h, bp); + ilpullup(s); + ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec); + break; + case Ilack: + ilackto(ic, ack, bp); + freeblist(bp); + break; + case Ilquery: + ilackto(ic, ack, bp); + ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec); + freeblist(bp); + break; + case Ilstate: + if(ack >= ic->rttack) + ic->rttack = 0; + ilackto(ic, ack, bp); + if(h->ilspec > Nqt) + h->ilspec = 0; + if(ic->qt[h->ilspec] > ack){ + ilrexmit(ic); + ilsettimeout(ic); + } + freeblist(bp); + break; + case Ilclose: + freeblist(bp); + if(ack < ic->start || ack > ic->next) + break; + ic->recvd = id; + ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0); + ic->state = Ilclosing; + ilsettimeout(ic); + ilfreeq(ic); + break; + } + break; + case Illistening: + freeblist(bp); + break; + case Ilclosing: + switch(h->iltype) { + case Ilclose: + ic->recvd = id; + ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0); + if(ack == ic->next) + ilhangup(s, nil); + break; + default: + break; + } + freeblist(bp); + break; + } +} + +void +ilrexmit(Ilcb *ic) +{ + Ilhdr *h; + Block *nb; + Conv *c; + ulong id; + Ilpriv *priv; + + nb = nil; + qlock(&ic->ackq); + if(ic->unacked) + nb = copyblock(ic->unacked, blocklen(ic->unacked)); + qunlock(&ic->ackq); + + if(nb == nil) + return; + + h = (Ilhdr*)nb->rp; + h->vihl = IP_VER4; + + h->iltype = Ildataquery; + hnputl(h->ilack, ic->recvd); + h->ilspec = ilnextqt(ic); + h->ilsum[0] = 0; + h->ilsum[1] = 0; + hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen))); + + c = ic->conv; + id = nhgetl(h->ilid); + netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd, + ic->rexmit, ic->timeout, + c->raddr, c->lport, c->rport); + + ilbackoff(ic); + + ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c); + + /* statistics */ + ic->rxtot++; + priv = c->p->priv; + priv->rexmit++; +} + +/* DEBUG */ +void +ilprocess(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + + ic = (Ilcb*)s->ptcl; + + USED(ic); + netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ", + ilstates[ic->state], ic->rstart, ic->recvd, ic->start, + ic->next, iltype[h->iltype], nhgetl(h->ilid), + nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst)); + + _ilprocess(s, h, bp); + + netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next); +} + +void +ilhangup(Conv *s, char *msg) +{ + Ilcb *ic; + int callout; + + netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr, + s->lport, s->rport, msg?msg:"no reason"); + + ic = (Ilcb*)s->ptcl; + callout = ic->state == Ilsyncer; + illocalclose(s); + + qhangup(s->rq, msg); + qhangup(s->wq, msg); + + if(callout) + Fsconnected(s, msg); +} + +void +ilpullup(Conv *s) +{ + Ilcb *ic; + Ilhdr *oh; + Block *bp; + ulong oid, dlen; + Ilpriv *ipriv; + + ic = (Ilcb*)s->ptcl; + if(ic->state != Ilestablished) + return; + + qlock(&ic->outo); + while(ic->outoforder) { + bp = ic->outoforder; + oh = (Ilhdr*)bp->rp; + oid = nhgetl(oh->ilid); + if(oid <= ic->recvd) { + ic->outoforder = bp->list; + freeblist(bp); + continue; + } + if(oid != ic->recvd+1){ + ipriv = s->p->priv; + ipriv->stats[OutOfOrder]++; + break; + } + + ic->recvd = oid; + ic->outoforder = bp->list; + + bp->list = nil; + dlen = nhgets(oh->illen)-IL_HDRSIZE; + bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen); + /* + * Upper levels don't know about multiple-block + * messages so copy all into one (yick). + */ + bp = concatblock(bp); + if(bp == 0) + panic("ilpullup"); + bp = packblock(bp); + if(bp == 0) + panic("ilpullup2"); + qpass(s->rq, bp); + } + qunlock(&ic->outo); +} + +void +iloutoforder(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + uchar *lid; + Block *f, **l; + ulong id, newid; + Ilpriv *ipriv; + + ipriv = s->p->priv; + ic = (Ilcb*)s->ptcl; + bp->list = nil; + + id = nhgetl(h->ilid); + /* Window checks */ + if(id <= ic->recvd || id > ic->recvd+ic->window) { + netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n", + id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport); + freeblist(bp); + return; + } + + /* Packet is acceptable so sort onto receive queue for pullup */ + qlock(&ic->outo); + if(ic->outoforder == nil) + ic->outoforder = bp; + else { + l = &ic->outoforder; + for(f = *l; f; f = f->list) { + lid = ((Ilhdr*)(f->rp))->ilid; + newid = nhgetl(lid); + if(id <= newid) { + if(id == newid) { + ipriv->stats[DupMsg]++; + ipriv->stats[DupBytes] += blocklen(bp); + qunlock(&ic->outo); + freeblist(bp); + return; + } + bp->list = f; + *l = bp; + qunlock(&ic->outo); + return; + } + l = &f->list; + } + *l = bp; + } + qunlock(&ic->outo); +} + +void +ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec) +{ + Ilhdr *ih; + Ilcb *ic; + Block *bp; + int ttl, tos; + + bp = allocb(IL_IPSIZE+IL_HDRSIZE); + bp->wp += IL_IPSIZE+IL_HDRSIZE; + + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->proto = IP_ILPROTO; + hnputs(ih->illen, IL_HDRSIZE); + ih->frag[0] = 0; + ih->frag[1] = 0; + if(inih) { + hnputl(ih->dst, nhgetl(inih->src)); + hnputl(ih->src, nhgetl(inih->dst)); + hnputs(ih->ilsrc, nhgets(inih->ildst)); + hnputs(ih->ildst, nhgets(inih->ilsrc)); + hnputl(ih->ilid, nhgetl(inih->ilack)); + hnputl(ih->ilack, nhgetl(inih->ilid)); + ttl = MAXTTL; + tos = DFLTTOS; + } + else { + v6tov4(ih->dst, ipc->raddr); + v6tov4(ih->src, ipc->laddr); + hnputs(ih->ilsrc, ipc->lport); + hnputs(ih->ildst, ipc->rport); + hnputl(ih->ilid, id); + hnputl(ih->ilack, ack); + ic = (Ilcb*)ipc->ptcl; + ic->acksent = ack; + ic->acktime = NOW; + ttl = ipc->ttl; + tos = ipc->tos; + } + ih->iltype = type; + ih->ilspec = ilspec; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE)); + +if(ipc==nil) + panic("ipc is nil caller is %.8lux", getcallerpc(&ipc)); +if(ipc->p==nil) + panic("ipc->p is nil"); + + netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n", + iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), + nhgets(ih->ilsrc), nhgets(ih->ildst)); + + ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc); +} + +void +ilreject(Fs *f, Ilhdr *inih) +{ + Ilhdr *ih; + Block *bp; + + bp = allocb(IL_IPSIZE+IL_HDRSIZE); + bp->wp += IL_IPSIZE+IL_HDRSIZE; + + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->proto = IP_ILPROTO; + hnputs(ih->illen, IL_HDRSIZE); + ih->frag[0] = 0; + ih->frag[1] = 0; + hnputl(ih->dst, nhgetl(inih->src)); + hnputl(ih->src, nhgetl(inih->dst)); + hnputs(ih->ilsrc, nhgets(inih->ildst)); + hnputs(ih->ildst, nhgets(inih->ilsrc)); + hnputl(ih->ilid, nhgetl(inih->ilack)); + hnputl(ih->ilack, nhgetl(inih->ilid)); + ih->iltype = Ilclose; + ih->ilspec = 0; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE)); + + ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil); +} + +void +ilsettimeout(Ilcb *ic) +{ + ulong pt; + + pt = (ic->delay>>LogAGain) + + ic->unackedbytes/(ic->rate>>LogAGain) + + (ic->mdev>>(LogDGain-1)) + + AckDelay; + if(pt > MaxTimeout) + pt = MaxTimeout; + ic->timeout = NOW + pt; +} + +void +ilbackoff(Ilcb *ic) +{ + ulong pt; + int i; + + pt = (ic->delay>>LogAGain) + + ic->unackedbytes/(ic->rate>>LogAGain) + + (ic->mdev>>(LogDGain-1)) + + AckDelay; + for(i = 0; i < ic->rexmit; i++) + pt = pt + (pt>>1); + if(pt > MaxTimeout) + pt = MaxTimeout; + ic->timeout = NOW + pt; + + if(ic->fasttimeout) + ic->timeout = NOW+Iltickms; + + ic->rexmit++; +} + +// complain if two numbers not within an hour of each other +#define Tfuture (1000*60*60) +int +later(ulong t1, ulong t2, char *x) +{ + int dt; + + dt = t1 - t2; + if(dt > 0) { + if(x != nil && dt > Tfuture) + print("%s: way future %d\n", x, dt); + return 1; + } + if(dt < -Tfuture) { + if(x != nil) + print("%s: way past %d\n", x, -dt); + return 1; + } + return 0; +} + +void +ilackproc(void *x) +{ + Ilcb *ic; + Conv **s, *p; + Proto *il; + + il = x; + +loop: + tsleep(&up->sleep, return0, 0, Iltickms); + for(s = il->conv; s && *s; s++) { + p = *s; + ic = (Ilcb*)p->ptcl; + + switch(ic->state) { + case Ilclosed: + case Illistening: + break; + case Ilclosing: + if(later(NOW, ic->timeout, "timeout0")) { + if(ic->rexmit > MaxRexmit){ + ilhangup(p, nil); + break; + } + ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0); + ilbackoff(ic); + } + break; + + case Ilsyncee: + case Ilsyncer: + if(later(NOW, ic->timeout, "timeout1")) { + if(ic->rexmit > MaxRexmit){ + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0); + ilbackoff(ic); + } + break; + + case Ilestablished: + if(ic->recvd != ic->acksent) + if(later(NOW, ic->acktime, "acktime")) + ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0); + + if(later(NOW, ic->querytime, "querytime")){ + if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){ + netlog(il->f, Logil, "il: hangup: deathtime\n"); + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic)); + ic->querytime = NOW + QueryTime; + } + + if(ic->unacked != nil) + if(later(NOW, ic->timeout, "timeout2")) { + if(ic->rexmit > MaxRexmit){ + netlog(il->f, Logil, "il: hangup: too many rexmits\n"); + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic)); + ic->rxquery++; + ilbackoff(ic); + } + break; + } + } + goto loop; +} + +void +ilcbinit(Ilcb *ic) +{ + ic->start = nrand(0x1000000); + ic->next = ic->start+1; + ic->recvd = 0; + ic->window = Defaultwin; + ic->unackedbytes = 0; + ic->unacked = nil; + ic->outoforder = nil; + ic->rexmit = 0; + ic->rxtot = 0; + ic->rxquery = 0; + ic->qtx = 1; + ic->fasttimeout = 0; + + /* timers */ + ic->delay = DefRtt<<LogAGain; + ic->mdev = DefRtt<<LogDGain; + ic->rate = DefByteRate<<LogAGain; + ic->querytime = NOW + QueryTime; + ic->lastrecv = NOW; /* or we'll timeout right away */ + ilsettimeout(ic); +} + +char* +ilstart(Conv *c, int type, int fasttimeout) +{ + Ilcb *ic; + Ilpriv *ipriv; + char kpname[KNAMELEN]; + + ipriv = c->p->priv; + + if(ipriv->ackprocstarted == 0){ + qlock(&ipriv->apl); + if(ipriv->ackprocstarted == 0){ + sprint(kpname, "#I%dilack", c->p->f->dev); + kproc(kpname, ilackproc, c->p, 0); + ipriv->ackprocstarted = 1; + } + qunlock(&ipriv->apl); + } + + ic = (Ilcb*)c->ptcl; + ic->conv = c; + + if(ic->state != Ilclosed) + return nil; + + ilcbinit(ic); + + if(fasttimeout){ + /* timeout if we can't connect quickly */ + ic->fasttimeout = 1; + ic->timeout = NOW+Iltickms; + ic->rexmit = MaxRexmit - 4; + }; + + switch(type) { + default: + netlog(c->p->f, Logil, "il: start: type %d\n", type); + break; + case IL_LISTEN: + ic->state = Illistening; + iphtadd(&ipriv->ht, c); + break; + case IL_CONNECT: + ic->state = Ilsyncer; + iphtadd(&ipriv->ht, c); + ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0); + break; + } + + return nil; +} + +void +ilfreeq(Ilcb *ic) +{ + Block *bp, *next; + + qlock(&ic->ackq); + for(bp = ic->unacked; bp; bp = next) { + next = bp->list; + freeblist(bp); + } + ic->unacked = nil; + qunlock(&ic->ackq); + + qlock(&ic->outo); + for(bp = ic->outoforder; bp; bp = next) { + next = bp->list; + freeblist(bp); + } + ic->outoforder = nil; + qunlock(&ic->outo); +} + +void +iladvise(Proto *il, Block *bp, char *msg) +{ + Ilhdr *h; + Ilcb *ic; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource; + Conv *s, **p; + + h = (Ilhdr*)(bp->rp); + + v4tov6(dest, h->dst); + v4tov6(source, h->src); + psource = nhgets(h->ilsrc); + + + /* Look for a connection, unfortunately the destination port is missing */ + qlock(il); + for(p = il->conv; *p; p++) { + s = *p; + if(s->lport == psource) + if(ipcmp(s->laddr, source) == 0) + if(ipcmp(s->raddr, dest) == 0){ + qunlock(il); + ic = (Ilcb*)s->ptcl; + switch(ic->state){ + case Ilsyncer: + ilhangup(s, msg); + break; + } + freeblist(bp); + return; + } + } + qunlock(il); + freeblist(bp); +} + +int +ilnextqt(Ilcb *ic) +{ + int x; + + qlock(&ic->ackq); + x = ic->qtx; + if(++x > Nqt) + x = 1; + ic->qtx = x; + ic->qt[x] = ic->next-1; /* highest xmitted packet */ + ic->qt[0] = ic->qt[x]; /* compatibility with old implementations */ + qunlock(&ic->ackq); + + return x; +} + +/* calculate scale constants that converts fast ticks to ms (more or less) */ +static void +inittimescale(void) +{ + uvlong hz; + + fastticks(&hz); + if(hz > 1000){ + scalediv = hz/1000; + scalemul = 1; + } else { + scalediv = 1; + scalemul = 1000/hz; + } +} + +void +ilinit(Fs *f) +{ + Proto *il; + + inittimescale(); + + il = smalloc(sizeof(Proto)); + il->priv = smalloc(sizeof(Ilpriv)); + il->name = "il"; + il->connect = ilconnect; + il->announce = ilannounce; + il->state = ilstate; + il->create = ilcreate; + il->close = ilclose; + il->rcv = iliput; + il->ctl = nil; + il->advise = iladvise; + il->stats = ilxstats; + il->inuse = ilinuse; + il->gc = nil; + il->ipproto = IP_ILPROTO; + il->nc = scalednconv(); + il->ptclsize = sizeof(Ilcb); + Fsproto(f, il); +} diff --git a/os/ip/ip.c b/os/ip/ip.c new file mode 100644 index 00000000..b0d3f5a6 --- /dev/null +++ b/os/ip/ip.c @@ -0,0 +1,805 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +typedef struct Ip4hdr Ip4hdr; +typedef struct IP IP; +typedef struct Fragment4 Fragment4; +typedef struct Fragment6 Fragment6; +typedef struct Ipfrag Ipfrag; + +enum +{ + IP4HDR = 20, /* sizeof(Ip4hdr) */ + IP6HDR = 40, /* sizeof(Ip6hdr) */ + IP_HLEN4 = 0x05, /* Header length in words */ + IP_DF = 0x4000, /* Don't fragment */ + IP_MF = 0x2000, /* More fragments */ + IP6FHDR = 8, /* sizeof(Fraghdr6) */ + IP_MAX = 64*1024, /* Maximum Internet packet size */ +}; + +#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0) + +struct Ip4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* ip->identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar cksum[2]; /* Header checksum */ + uchar src[4]; /* IP source */ + uchar dst[4]; /* IP destination */ +}; + +/* MIB II counters */ +enum +{ + Forwarding, + DefaultTTL, + InReceives, + InHdrErrors, + InAddrErrors, + ForwDatagrams, + InUnknownProtos, + InDiscards, + InDelivers, + OutRequests, + OutDiscards, + OutNoRoutes, + ReasmTimeout, + ReasmReqds, + ReasmOKs, + ReasmFails, + FragOKs, + FragFails, + FragCreates, + + Nstats, +}; + +struct Fragment4 +{ + Block* blist; + Fragment4* next; + ulong src; + ulong dst; + ushort id; + ulong age; +}; + +struct Fragment6 +{ + Block* blist; + Fragment6* next; + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; + uint id; + ulong age; +}; + +struct Ipfrag +{ + ushort foff; + ushort flen; +}; + +/* an instance of IP */ +struct IP +{ + ulong stats[Nstats]; + + QLock fraglock4; + Fragment4* flisthead4; + Fragment4* fragfree4; + Ref id4; + + QLock fraglock6; + Fragment6* flisthead6; + Fragment6* fragfree6; + Ref id6; + + int iprouting; /* true if we route like a gateway */ +}; + +static char *statnames[] = +{ +[Forwarding] "Forwarding", +[DefaultTTL] "DefaultTTL", +[InReceives] "InReceives", +[InHdrErrors] "InHdrErrors", +[InAddrErrors] "InAddrErrors", +[ForwDatagrams] "ForwDatagrams", +[InUnknownProtos] "InUnknownProtos", +[InDiscards] "InDiscards", +[InDelivers] "InDelivers", +[OutRequests] "OutRequests", +[OutDiscards] "OutDiscards", +[OutNoRoutes] "OutNoRoutes", +[ReasmTimeout] "ReasmTimeout", +[ReasmReqds] "ReasmReqds", +[ReasmOKs] "ReasmOKs", +[ReasmFails] "ReasmFails", +[FragOKs] "FragOKs", +[FragFails] "FragFails", +[FragCreates] "FragCreates", +}; + +#define BLKIP(xp) ((Ip4hdr*)((xp)->rp)) +/* + * This sleazy macro relies on the media header size being + * larger than sizeof(Ipfrag). ipreassemble checks this is true + */ +#define BKFG(xp) ((Ipfrag*)((xp)->base)) + +ushort ipcsum(uchar*); +Block* ip4reassemble(IP*, int, Block*, Ip4hdr*); +void ipfragfree4(IP*, Fragment4*); +Fragment4* ipfragallo4(IP*); + + +void +ip_init_6(Fs *f) +{ + V6params *v6p; + + v6p = smalloc(sizeof(V6params)); + + v6p->rp.mflag = 0; // default not managed + v6p->rp.oflag = 0; + v6p->rp.maxraint = 600000; // millisecs + v6p->rp.minraint = 200000; + v6p->rp.linkmtu = 0; // no mtu sent + v6p->rp.reachtime = 0; + v6p->rp.rxmitra = 0; + v6p->rp.ttl = MAXTTL; + v6p->rp.routerlt = 3*(v6p->rp.maxraint); + + v6p->hp.rxmithost = 1000; // v6 RETRANS_TIMER + + v6p->cdrouter = -1; + + f->v6p = v6p; + +} + +void +initfrag(IP *ip, int size) +{ + Fragment4 *fq4, *eq4; + Fragment6 *fq6, *eq6; + + ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size); + if(ip->fragfree4 == nil) + panic("initfrag"); + + eq4 = &ip->fragfree4[size]; + for(fq4 = ip->fragfree4; fq4 < eq4; fq4++) + fq4->next = fq4+1; + + ip->fragfree4[size-1].next = nil; + + ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size); + if(ip->fragfree6 == nil) + panic("initfrag"); + + eq6 = &ip->fragfree6[size]; + for(fq6 = ip->fragfree6; fq6 < eq6; fq6++) + fq6->next = fq6+1; + + ip->fragfree6[size-1].next = nil; +} + +void +ip_init(Fs *f) +{ + IP *ip; + + ip = smalloc(sizeof(IP)); + initfrag(ip, 100); + f->ip = ip; + + ip_init_6(f); +} + +void +iprouting(Fs *f, int on) +{ + f->ip->iprouting = on; + if(f->ip->iprouting==0) + f->ip->stats[Forwarding] = 2; + else + f->ip->stats[Forwarding] = 1; +} + +int +ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c) +{ + Ipifc *ifc; + uchar *gate; + ulong fragoff; + Block *xp, *nb; + Ip4hdr *eh, *feh; + int lid, len, seglen, chunk, dlen, blklen, offset, medialen; + Route *r, *sr; + IP *ip; + int rv = 0; + + ip = f->ip; + + /* Fill out the ip header */ + eh = (Ip4hdr*)(bp->rp); + + ip->stats[OutRequests]++; + + /* Number of uchars in data and ip header to write */ + len = blocklen(bp); + + if(gating){ + chunk = nhgets(eh->length); + if(chunk > len){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "short gated packet\n"); + goto free; + } + if(chunk < len) + len = chunk; + } + if(len >= IP_MAX){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "exceeded ip max size %V\n", eh->dst); + goto free; + } + + r = v4lookup(f, eh->dst, c); + if(r == nil){ + ip->stats[OutNoRoutes]++; + netlog(f, Logip, "no interface %V\n", eh->dst); + rv = -1; + goto free; + } + + ifc = r->ifc; + if(r->type & (Rifc|Runi)) + gate = eh->dst; + else + if(r->type & (Rbcast|Rmulti)) { + gate = eh->dst; + sr = v4lookup(f, eh->src, nil); + if(sr != nil && (sr->type & Runi)) + ifc = sr->ifc; + } + else + gate = r->v4.gate; + + if(!gating) + eh->vihl = IP_VER4|IP_HLEN4; + eh->ttl = ttl; + if(!gating) + eh->tos = tos; + + if(!canrlock(ifc)) + goto free; + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->m == nil) + goto raise; + + /* If we dont need to fragment just send it */ + medialen = ifc->maxtu - ifc->m->hsize; + if(len <= medialen) { + if(!gating) + hnputs(eh->id, incref(&ip->id4)); + hnputs(eh->length, len); + if(!gating){ + eh->frag[0] = 0; + eh->frag[1] = 0; + } + eh->cksum[0] = 0; + eh->cksum[1] = 0; + hnputs(eh->cksum, ipcsum(&eh->vihl)); + ifc->m->bwrite(ifc, bp, V4, gate); + runlock(ifc); + poperror(); + return 0; + } + +if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst); + + if(eh->frag[0] & (IP_DF>>8)){ + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + icmpcantfrag(f, bp, medialen); + netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst); + goto raise; + } + + seglen = (medialen - IP4HDR) & ~7; + if(seglen < 8){ + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%V seglen < 8\n", eh->dst); + goto raise; + } + + dlen = len - IP4HDR; + xp = bp; + if(gating) + lid = nhgets(eh->id); + else + lid = incref(&ip->id4); + + offset = IP4HDR; + while(xp != nil && offset && offset >= BLEN(xp)) { + offset -= BLEN(xp); + xp = xp->next; + } + xp->rp += offset; + + if(gating) + fragoff = nhgets(eh->frag)<<3; + else + fragoff = 0; + dlen += fragoff; + for(; fragoff < dlen; fragoff += seglen) { + nb = allocb(IP4HDR+seglen); + feh = (Ip4hdr*)(nb->rp); + + memmove(nb->wp, eh, IP4HDR); + nb->wp += IP4HDR; + + if((fragoff + seglen) >= dlen) { + seglen = dlen - fragoff; + hnputs(feh->frag, fragoff>>3); + } + else + hnputs(feh->frag, (fragoff>>3)|IP_MF); + + hnputs(feh->length, seglen + IP4HDR); + hnputs(feh->id, lid); + + /* Copy up the data area */ + chunk = seglen; + while(chunk) { + if(!xp) { + ip->stats[OutDiscards]++; + ip->stats[FragFails]++; + freeblist(nb); + netlog(f, Logip, "!xp: chunk %d\n", chunk); + goto raise; + } + blklen = chunk; + if(BLEN(xp) < chunk) + blklen = BLEN(xp); + memmove(nb->wp, xp->rp, blklen); + nb->wp += blklen; + xp->rp += blklen; + chunk -= blklen; + if(xp->rp == xp->wp) + xp = xp->next; + } + + feh->cksum[0] = 0; + feh->cksum[1] = 0; + hnputs(feh->cksum, ipcsum(&feh->vihl)); + ifc->m->bwrite(ifc, nb, V4, gate); + ip->stats[FragCreates]++; + } + ip->stats[FragOKs]++; +raise: + runlock(ifc); + poperror(); +free: + freeblist(bp); + return rv; +} + +void +ipiput4(Fs *f, Ipifc *ifc, Block *bp) +{ + int hl; + int hop, tos, proto, olen; + Ip4hdr *h; + Proto *p; + ushort frag; + int notforme; + uchar *dp, v6dst[IPaddrlen]; + IP *ip; + Route *r; + + if(BLKIPVER(bp) != IP_VER4) { + ipiput6(f, ifc, bp); + return; + } + + ip = f->ip; + ip->stats[InReceives]++; + + /* + * Ensure we have all the header info in the first + * block. Make life easier for other protocols by + * collecting up to the first 64 bytes in the first block. + */ + if(BLEN(bp) < 64) { + hl = blocklen(bp); + if(hl < IP4HDR) + hl = IP4HDR; + if(hl > 64) + hl = 64; + bp = pullupblock(bp, hl); + if(bp == nil) + return; + } + + h = (Ip4hdr*)(bp->rp); + + /* dump anything that whose header doesn't checksum */ + if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: checksum error %V\n", h->src); + freeblist(bp); + return; + } + v4tov6(v6dst, h->dst); + notforme = ipforme(f, v6dst) == 0; + + /* Check header length and version */ + if((h->vihl&0x0F) != IP_HLEN4) { + hl = (h->vihl&0xF)<<2; + if(hl < (IP_HLEN4<<2)) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl); + freeblist(bp); + return; + } + /* If this is not routed strip off the options */ + if(notforme == 0) { + olen = nhgets(h->length); + dp = bp->rp + (hl - (IP_HLEN4<<2)); + memmove(dp, h, IP_HLEN4<<2); + bp->rp = dp; + h = (Ip4hdr*)(bp->rp); + h->vihl = (IP_VER4|IP_HLEN4); + hnputs(h->length, olen-hl+(IP_HLEN4<<2)); + } + } + + /* route */ + if(notforme) { + Conv conv; + + if(!ip->iprouting){ + freeb(bp); + return; + } + + /* don't forward to source's network */ + conv.r = nil; + r = v4lookup(f, h->dst, &conv); + if(r == nil || r->ifc == ifc){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward if packet has timed out */ + hop = h->ttl; + if(hop < 1) { + ip->stats[InHdrErrors]++; + icmpttlexceeded(f, ifc->lifc->local, bp); + freeblist(bp); + return; + } + + /* reassemble if the interface expects it */ +if(r->ifc == nil) panic("nil route rfc"); + if(r->ifc->reassemble){ + frag = nhgets(h->frag); + if(frag) { + h->tos = 0; + if(frag & IP_MF) + h->tos = 1; + bp = ip4reassemble(ip, frag, bp, h); + if(bp == nil) + return; + h = (Ip4hdr*)(bp->rp); + } + } + + ip->stats[ForwDatagrams]++; + tos = h->tos; + hop = h->ttl; + ipoput4(f, bp, 1, hop - 1, tos, &conv); + return; + } + + frag = nhgets(h->frag); + if(frag) { + h->tos = 0; + if(frag & IP_MF) + h->tos = 1; + bp = ip4reassemble(ip, frag, bp, h); + if(bp == nil) + return; + h = (Ip4hdr*)(bp->rp); + } + + /* don't let any frag info go up the stack */ + h->frag[0] = 0; + h->frag[1] = 0; + + proto = h->proto; + p = Fsrcvpcol(f, proto); + if(p != nil && p->rcv != nil) { + ip->stats[InDelivers]++; + (*p->rcv)(p, ifc, bp); + return; + } + ip->stats[InDiscards]++; + ip->stats[InUnknownProtos]++; + freeblist(bp); +} + +int +ipstats(Fs *f, char *buf, int len) +{ + IP *ip; + char *p, *e; + int i; + + ip = f->ip; + ip->stats[DefaultTTL] = MAXTTL; + + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]); + return p - buf; +} + +Block* +ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih) +{ + int fend; + ushort id; + Fragment4 *f, *fnext; + ulong src, dst; + Block *bl, **l, *last, *prev; + int ovlap, len, fragsize, pktposn; + + src = nhgetl(ih->src); + dst = nhgetl(ih->dst); + id = nhgets(ih->id); + + /* + * block lists are too hard, pullupblock into a single block + */ + if(bp->next){ + bp = pullupblock(bp, blocklen(bp)); + ih = (Ip4hdr*)(bp->rp); + } + + qlock(&ip->fraglock4); + + /* + * find a reassembly queue for this fragment + */ + for(f = ip->flisthead4; f; f = fnext){ + fnext = f->next; /* because ipfragfree4 changes the list */ + if(f->src == src && f->dst == dst && f->id == id) + break; + if(f->age < NOW){ + ip->stats[ReasmTimeout]++; + ipfragfree4(ip, f); + } + } + + /* + * if this isn't a fragmented packet, accept it + * and get rid of any fragments that might go + * with it. + */ + if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) { + if(f != nil) { + ipfragfree4(ip, f); + ip->stats[ReasmFails]++; + } + qunlock(&ip->fraglock4); + return bp; + } + + if(bp->base+sizeof(Ipfrag) >= bp->rp){ + bp = padblock(bp, sizeof(Ipfrag)); + bp->rp += sizeof(Ipfrag); + } + + BKFG(bp)->foff = offset<<3; + BKFG(bp)->flen = nhgets(ih->length)-IP4HDR; + + /* First fragment allocates a reassembly queue */ + if(f == nil) { + f = ipfragallo4(ip); + f->id = id; + f->src = src; + f->dst = dst; + + f->blist = bp; + + qunlock(&ip->fraglock4); + ip->stats[ReasmReqds]++; + return nil; + } + + /* + * find the new fragment's position in the queue + */ + prev = nil; + l = &f->blist; + bl = f->blist; + while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) { + prev = bl; + l = &bl->next; + bl = bl->next; + } + + /* Check overlap of a previous fragment - trim away as necessary */ + if(prev) { + ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff; + if(ovlap > 0) { + if(ovlap >= BKFG(bp)->flen) { + freeblist(bp); + qunlock(&ip->fraglock4); + return nil; + } + BKFG(prev)->flen -= ovlap; + } + } + + /* Link onto assembly queue */ + bp->next = *l; + *l = bp; + + /* Check to see if succeeding segments overlap */ + if(bp->next) { + l = &bp->next; + fend = BKFG(bp)->foff + BKFG(bp)->flen; + /* Take completely covered segments out */ + while(*l) { + ovlap = fend - BKFG(*l)->foff; + if(ovlap <= 0) + break; + if(ovlap < BKFG(*l)->flen) { + BKFG(*l)->flen -= ovlap; + BKFG(*l)->foff += ovlap; + /* move up ih hdrs */ + memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR); + (*l)->rp += ovlap; + break; + } + last = (*l)->next; + (*l)->next = nil; + freeblist(*l); + *l = last; + } + } + + /* + * look for a complete packet. if we get to a fragment + * without IP_MF set, we're done. + */ + pktposn = 0; + for(bl = f->blist; bl; bl = bl->next) { + if(BKFG(bl)->foff != pktposn) + break; + if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) { + bl = f->blist; + len = nhgets(BLKIP(bl)->length); + bl->wp = bl->rp + len; + + /* Pullup all the fragment headers and + * return a complete packet + */ + for(bl = bl->next; bl; bl = bl->next) { + fragsize = BKFG(bl)->flen; + len += fragsize; + bl->rp += IP4HDR; + bl->wp = bl->rp + fragsize; + } + + bl = f->blist; + f->blist = nil; + ipfragfree4(ip, f); + ih = BLKIP(bl); + hnputs(ih->length, len); + qunlock(&ip->fraglock4); + ip->stats[ReasmOKs]++; + return bl; + } + pktposn += BKFG(bl)->flen; + } + qunlock(&ip->fraglock4); + return nil; +} + +/* + * ipfragfree4 - Free a list of fragments - assume hold fraglock4 + */ +void +ipfragfree4(IP *ip, Fragment4 *frag) +{ + Fragment4 *fl, **l; + + if(frag->blist) + freeblist(frag->blist); + + frag->src = 0; + frag->id = 0; + frag->blist = nil; + + l = &ip->flisthead4; + for(fl = *l; fl; fl = fl->next) { + if(fl == frag) { + *l = frag->next; + break; + } + l = &fl->next; + } + + frag->next = ip->fragfree4; + ip->fragfree4 = frag; + +} + +/* + * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4 + */ +Fragment4 * +ipfragallo4(IP *ip) +{ + Fragment4 *f; + + while(ip->fragfree4 == nil) { + /* free last entry on fraglist */ + for(f = ip->flisthead4; f->next; f = f->next) + ; + ipfragfree4(ip, f); + } + f = ip->fragfree4; + ip->fragfree4 = f->next; + f->next = ip->flisthead4; + ip->flisthead4 = f; + f->age = NOW + 30000; + + return f; +} + +ushort +ipcsum(uchar *addr) +{ + int len; + ulong sum; + + sum = 0; + len = (addr[0]&0xf)<<2; + + while(len > 0) { + sum += addr[0]<<8 | addr[1] ; + len -= 2; + addr += 2; + } + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + return (sum^0xffff); +} diff --git a/os/ip/ip.h b/os/ip/ip.h new file mode 100644 index 00000000..ca49430a --- /dev/null +++ b/os/ip/ip.h @@ -0,0 +1,673 @@ +typedef struct Conv Conv; +typedef struct Fs Fs; +typedef union Hwaddr Hwaddr; +typedef struct IP IP; +typedef struct IPaux IPaux; +typedef struct Ipself Ipself; +typedef struct Ipselftab Ipselftab; +typedef struct Iplink Iplink; +typedef struct Iplifc Iplifc; +typedef struct Ipmulti Ipmulti; +typedef struct IProuter IProuter; +typedef struct Ipifc Ipifc; +typedef struct Iphash Iphash; +typedef struct Ipht Ipht; +typedef struct Netlog Netlog; +typedef struct Ifclog Ifclog; +typedef struct Medium Medium; +typedef struct Proto Proto; +typedef struct Arpent Arpent; +typedef struct Arp Arp; +typedef struct Route Route; + +typedef struct Routerparams Routerparams; +typedef struct Hostparams Hostparams; +typedef struct V6router V6router; +typedef struct V6params V6params; + +#pragma incomplete Arp +#pragma incomplete Ifclog +#pragma incomplete Ipself +#pragma incomplete Ipselftab +#pragma incomplete IP +#pragma incomplete Netlog + +enum +{ + Addrlen= 64, + Maxproto= 20, + Nhash= 64, + Maxincall= 5, + Nchans= 256, + MAClen= 16, /* longest mac address */ + + MAXTTL= 255, + DFLTTOS= 0, + + IPaddrlen= 16, + IPv4addrlen= 4, + IPv4off= 12, + IPllen= 4, + + /* ip versions */ + V4= 4, + V6= 6, + IP_VER4= 0x40, + IP_VER6= 0x60, + + /* 2^Lroot trees in the root table */ + Lroot= 10, + + Maxpath = 64, +}; + +enum +{ + Idle= 0, + Announcing= 1, + Announced= 2, + Connecting= 3, + Connected= 4, +}; + +/* + * one per conversation directory + */ +struct Conv +{ + QLock; + + int x; /* conversation index */ + Proto* p; + + int restricted; /* remote port is restricted */ + uint ttl; /* max time to live */ + uint tos; /* type of service */ + int ignoreadvice; /* don't terminate connection on icmp errors */ + + uchar ipversion; + uchar laddr[IPaddrlen]; /* local IP address */ + uchar raddr[IPaddrlen]; /* remote IP address */ + ushort lport; /* local port number */ + ushort rport; /* remote port number */ + + char *owner; /* protections */ + int perm; + int inuse; /* opens of listen/data/ctl */ + int length; + int state; + + /* udp specific */ + int headers; /* data src/dst headers in udp */ + int reliable; /* true if reliable udp */ + + Conv* incall; /* calls waiting to be listened for */ + Conv* next; + + Queue* rq; /* queued data waiting to be read */ + Queue* wq; /* queued data waiting to be written */ + Queue* eq; /* returned error packets */ + Queue* sq; /* snooping queue */ + Ref snoopers; /* number of processes with snoop open */ + + Rendez cr; + char cerr[ERRMAX]; + + QLock listenq; + Rendez listenr; + + Ipmulti *multi; /* multicast bindings for this interface */ + + void* ptcl; /* protocol specific stuff */ + + Route *r; /* last route used */ + ulong rgen; /* routetable generation for *r */ +}; + +struct Medium +{ + char *name; + int hsize; /* medium header size */ + int mintu; /* default min mtu */ + int maxtu; /* default max mtu */ + int maclen; /* mac address length */ + void (*bind)(Ipifc*, int, char**); + void (*unbind)(Ipifc*); + void (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip); + + /* for arming interfaces to receive multicast */ + void (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia); + void (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia); + + /* process packets written to 'data' */ + void (*pktin)(Fs *f, Ipifc *ifc, Block *bp); + + /* routes for router boards */ + void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int); + void (*remroute)(Ipifc *ifc, int, uchar*, uchar*); + void (*flushroutes)(Ipifc *ifc); + + /* for routing multicast groups */ + void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia); + void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia); + + /* address resolution */ + void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */ + void (*areg)(Ipifc*, uchar*); /* register */ + + /* v6 address generation */ + void (*pref2addr)(uchar *pref, uchar *ea); + + int unbindonclose; /* if non-zero, unbind on last close */ +}; + +/* logical interface associated with a physical one */ +struct Iplifc +{ + uchar local[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar remote[IPaddrlen]; + uchar net[IPaddrlen]; + uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */ + uchar onlink; /* =1 => onlink, =0 offlink. */ + uchar autoflag; /* v6 autonomous flag */ + long validlt; /* v6 valid lifetime */ + long preflt; /* v6 preferred lifetime */ + long origint; /* time when addr was added */ + Iplink *link; /* addresses linked to this lifc */ + Iplifc *next; +}; + +/* binding twixt Ipself and Iplifc */ +struct Iplink +{ + Ipself *self; + Iplifc *lifc; + Iplink *selflink; /* next link for this local address */ + Iplink *lifclink; /* next link for this ifc */ + ulong expire; + Iplink *next; /* free list */ + int ref; +}; + +/* rfc 2461, pp.40--43. */ + +/* default values, one per stack */ +struct Routerparams { + int mflag; + int oflag; + int maxraint; + int minraint; + int linkmtu; + int reachtime; + int rxmitra; + int ttl; + int routerlt; +}; + +struct Hostparams { + int rxmithost; +}; + +struct Ipifc +{ + RWlock; + + Conv *conv; /* link to its conversation structure */ + char dev[64]; /* device we're attached to */ + Medium *m; /* Media pointer */ + int maxtu; /* Maximum transfer unit */ + int mintu; /* Minumum tranfer unit */ + int mbps; /* megabits per second */ + void *arg; /* medium specific */ + int reassemble; /* reassemble IP packets before forwarding */ + + /* these are used so that we can unbind on the fly */ + Lock idlock; + uchar ifcid; /* incremented each 'bind/unbind/add/remove' */ + int ref; /* number of proc's using this ipifc */ + Rendez wait; /* where unbinder waits for ref == 0 */ + int unbinding; + + uchar mac[MAClen]; /* MAC address */ + + Iplifc *lifc; /* logical interfaces on this physical one */ + + ulong in, out; /* message statistics */ + ulong inerr, outerr; /* ... */ + + uchar sendra6; /* == 1 => send router advs on this ifc */ + uchar recvra6; /* == 1 => recv router advs on this ifc */ + Routerparams rp; /* router parameters as in RFC 2461, pp.40--43. + used only if node is router */ +}; + +/* + * one per multicast-lifc pair used by a Conv + */ +struct Ipmulti +{ + uchar ma[IPaddrlen]; + uchar ia[IPaddrlen]; + Ipmulti *next; +}; + +/* + * hash table for 2 ip addresses + 2 ports + */ +enum +{ + Nipht= 521, /* convenient prime */ + + IPmatchexact= 0, /* match on 4 tuple */ + IPmatchany, /* *!* */ + IPmatchport, /* *!port */ + IPmatchaddr, /* addr!* */ + IPmatchpa, /* addr!port */ +}; +struct Iphash +{ + Iphash *next; + Conv *c; + int match; +}; +struct Ipht +{ + Lock; + Iphash *tab[Nipht]; +}; +void iphtadd(Ipht*, Conv*); +void iphtrem(Ipht*, Conv*); +Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp); + +/* + * one per multiplexed protocol + */ +struct Proto +{ + QLock; + char* name; /* protocol name */ + int x; /* protocol index */ + int ipproto; /* ip protocol type */ + + char* (*connect)(Conv*, char**, int); + char* (*announce)(Conv*, char**, int); + char* (*bind)(Conv*, char**, int); + int (*state)(Conv*, char*, int); + void (*create)(Conv*); + void (*close)(Conv*); + void (*rcv)(Proto*, Ipifc*, Block*); + char* (*ctl)(Conv*, char**, int); + void (*advise)(Proto*, Block*, char*); + int (*stats)(Proto*, char*, int); + int (*local)(Conv*, char*, int); + int (*remote)(Conv*, char*, int); + int (*inuse)(Conv*); + int (*gc)(Proto*); /* returns true if any conversations are freed */ + + Fs *f; /* file system this proto is part of */ + Conv **conv; /* array of conversations */ + int ptclsize; /* size of per protocol ctl block */ + int nc; /* number of conversations */ + int ac; + Qid qid; /* qid for protocol directory */ + ushort nextport; + ushort nextrport; + + void *priv; +}; + +/* + * Stream for sending packets to user level + */ +struct IProuter { + QLock; + int opens; + Queue *q; +}; + +/* + * one per IP protocol stack + */ +struct Fs +{ + RWlock; + int dev; + + int np; + Proto* p[Maxproto+1]; /* list of supported protocols */ + Proto* t2p[256]; /* vector of all protocols */ + Proto* ipifc; /* kludge for ipifcremroute & ipifcaddroute */ + Proto* ipmux; /* kludge for finding an ip multiplexor */ + + IP *ip; + Ipselftab *self; + Arp *arp; + V6params *v6p; + IProuter iprouter; + + Route *v4root[1<<Lroot]; /* v4 routing forest */ + Route *v6root[1<<Lroot]; /* v6 routing forest */ + Route *queue; /* used as temp when reinjecting routes */ + + Netlog *alog; + Ifclog *ilog; + + char ndb[1024]; /* an ndb entry for this interface */ + int ndbvers; + long ndbmtime; +}; + +/* one per default router known to host */ +struct V6router { + uchar inuse; + Ipifc *ifc; + int ifcid; + uchar routeraddr[IPaddrlen]; + long ltorigin; + Routerparams rp; +}; + +struct V6params +{ + Routerparams rp; /* v6 params, one copy per node now */ + Hostparams hp; + V6router v6rlist[3]; /* max 3 default routers, currently */ + int cdrouter; /* uses only v6rlist[cdrouter] if */ + /* cdrouter >= 0. */ +}; + + +int Fsconnected(Conv*, char*); +Conv* Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar); +int Fspcolstats(char*, int); +int Fsproto(Fs*, Proto*); +int Fsbuiltinproto(Fs*, uchar); +Conv* Fsprotoclone(Proto*, char*); +Proto* Fsrcvpcol(Fs*, uchar); +Proto* Fsrcvpcolx(Fs*, uchar); +char* Fsstdconnect(Conv*, char**, int); +char* Fsstdannounce(Conv*, char**, int); +char* Fsstdbind(Conv*, char**, int); +ulong scalednconv(void); + +/* + * logging + */ +enum +{ + Logip= 1<<1, + Logtcp= 1<<2, + Logfs= 1<<3, + Logil= 1<<4, + Logicmp= 1<<5, + Logudp= 1<<6, + Logcompress= 1<<7, + Logilmsg= 1<<8, + Loggre= 1<<9, + Logppp= 1<<10, + Logtcprxmt= 1<<11, + Logigmp= 1<<12, + Logudpmsg= 1<<13, + Logipmsg= 1<<14, + Logrudp= 1<<15, + Logrudpmsg= 1<<16, + Logesp= 1<<17, + Logtcpwin= 1<<18, +}; + +void netloginit(Fs*); +void netlogopen(Fs*); +void netlogclose(Fs*); +void netlogctl(Fs*, char*, int); +long netlogread(Fs*, void*, ulong, long); +void netlog(Fs*, int, char*, ...); +void ifcloginit(Fs*); +long ifclogread(Fs*, Chan *,void*, ulong, long); +void ifclog(Fs*, uchar *, int); +void ifclogopen(Fs*, Chan*); +void ifclogclose(Fs*, Chan*); + +/* + * iproute.c + */ +typedef struct RouteTree RouteTree; +typedef struct Routewalk Routewalk; +typedef struct V4route V4route; +typedef struct V6route V6route; + +enum +{ + + /* type bits */ + Rv4= (1<<0), /* this is a version 4 route */ + Rifc= (1<<1), /* this route is a directly connected interface */ + Rptpt= (1<<2), /* this route is a pt to pt interface */ + Runi= (1<<3), /* a unicast self address */ + Rbcast= (1<<4), /* a broadcast self address */ + Rmulti= (1<<5), /* a multicast self address */ + Rproxy= (1<<6), /* this route should be proxied */ +}; + +struct Routewalk +{ + int o; + int h; + char* p; + char* e; + void* state; + void (*walk)(Route*, Routewalk*); +}; + +struct RouteTree +{ + Route* right; + Route* left; + Route* mid; + uchar depth; + uchar type; + uchar ifcid; /* must match ifc->id */ + Ipifc *ifc; + char tag[4]; + int ref; +}; + +struct V4route +{ + ulong address; + ulong endaddress; + uchar gate[IPv4addrlen]; +}; + +struct V6route +{ + ulong address[IPllen]; + ulong endaddress[IPllen]; + uchar gate[IPaddrlen]; +}; + +struct Route +{ + RouteTree; + + union { + V6route v6; + V4route v4; + }; +}; +extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type); +extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type); +extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock); +extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock); +extern Route* v4lookup(Fs *f, uchar *a, Conv *c); +extern Route* v6lookup(Fs *f, uchar *a, Conv *c); +extern long routeread(Fs *f, char*, ulong, int); +extern long routewrite(Fs *f, Chan*, char*, int); +extern void routetype(int, char*); +extern void ipwalkroutes(Fs*, Routewalk*); +extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*); + +/* + * devip.c + */ + +/* + * Hanging off every ip channel's ->aux is the following structure. + * It maintains the state used by devip and iproute. + */ +struct IPaux +{ + char *owner; /* the user that did the attach */ + char tag[4]; +}; + +extern IPaux* newipaux(char*, char*); + +/* + * arp.c + */ +struct Arpent +{ + uchar ip[IPaddrlen]; + uchar mac[MAClen]; + Medium *type; /* media type */ + Arpent* hash; + Block* hold; + Block* last; + uint ctime; /* time entry was created or refreshed */ + uint utime; /* time entry was last used */ + uchar state; + Arpent *nextrxt; /* re-transmit chain */ + uint rtime; /* time for next retransmission */ + uchar rxtsrem; + Ipifc *ifc; + uchar ifcid; /* must match ifc->id */ +}; + +extern void arpinit(Fs*); +extern int arpread(Arp*, char*, ulong, int); +extern int arpwrite(Fs*, char*, int); +extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h); +extern void arprelease(Arp*, Arpent *a); +extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac); +extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh); + +/* + * ipaux.c + */ + +extern int myetheraddr(uchar*, char*); +extern ulong parseip(uchar*, char*); +extern ulong parseipmask(uchar*, char*); +extern char* v4parseip(uchar*, char*); +extern void maskip(uchar *from, uchar *mask, uchar *to); +extern int parsemac(uchar *to, char *from, int len); +extern uchar* defmask(uchar*); +extern int isv4(uchar*); +extern void v4tov6(uchar *v6, uchar *v4); +extern int v6tov4(uchar *v4, uchar *v6); +extern int eipfmt(Fmt*); + +#define ipmove(x, y) memmove(x, y, IPaddrlen) +#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) ) + +extern uchar IPv4bcast[IPaddrlen]; +extern uchar IPv4bcastobs[IPaddrlen]; +extern uchar IPv4allsys[IPaddrlen]; +extern uchar IPv4allrouter[IPaddrlen]; +extern uchar IPnoaddr[IPaddrlen]; +extern uchar v4prefix[IPaddrlen]; +extern uchar IPallbits[IPaddrlen]; + +#define NOW TK2MS(MACHP(0)->ticks) + +/* + * media + */ +extern Medium ethermedium; +extern Medium nullmedium; +extern Medium pktmedium; +extern Medium tripmedium; + +/* + * ipifc.c + */ +extern Medium* ipfindmedium(char *name); +extern void addipmedium(Medium *med); +extern int ipforme(Fs*, uchar *addr); +extern int iptentative(Fs*, uchar *addr); +extern int ipisbm(uchar *); +extern int ipismulticast(uchar *); +extern Ipifc* findipifc(Fs*, uchar *remote, int type); +extern void findprimaryip(Fs*, uchar*); +extern void findlocalip(Fs*, uchar *local, uchar *remote); +extern int ipv4local(Ipifc *ifc, uchar *addr); +extern int ipv6local(Ipifc *ifc, uchar *addr); +extern int ipv6anylocal(Ipifc *ifc, uchar *addr); +extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip); +extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip); +extern int ipismulticast(uchar *ip); +extern int ipisbooting(void); +extern int ipifccheckin(Ipifc *ifc, Medium *med); +extern void ipifccheckout(Ipifc *ifc); +extern int ipifcgrab(Ipifc *ifc); +extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int); +extern void ipifcremroute(Fs*, int, uchar*, uchar*); +extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia); +extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia); +extern char* ipifcrem(Ipifc *ifc, char **argv, int argc); +extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp); +extern long ipselftabread(Fs*, char *a, ulong offset, int n); +extern char* ipifcaddpref6(Ipifc *ifc, char**argv, int argc); +extern void ipsendra6(Fs *f, int on); + +/* + * ip.c + */ +extern void iprouting(Fs*, int); +extern void icmpnoconv(Fs*, Block*); +extern void icmpcantfrag(Fs*, Block*, int); +extern void icmpttlexceeded(Fs*, uchar*, Block*); +extern ushort ipcsum(uchar*); +extern void ipiput4(Fs*, Ipifc*, Block*); +extern void ipiput6(Fs*, Ipifc*, Block*); +extern int ipoput4(Fs*, Block*, int, int, int, Conv*); +extern int ipoput6(Fs*, Block*, int, int, int, Conv*); +extern int ipstats(Fs*, char*, int); +extern ushort ptclbsum(uchar*, int); +extern ushort ptclcsum(Block*, int, int); +extern void ip_init(Fs*); +extern void update_mtucache(uchar*, ulong); +extern ulong restrict_mtu(uchar*, ulong); + +/* + * bootp.c + */ +char* (*bootp)(Ipifc*); +int (*bootpread)(char*, ulong, int); + +/* + * iprouter.c + */ +void useriprouter(Fs*, Ipifc*, Block*); +void iprouteropen(Fs*); +void iprouterclose(Fs*); +long iprouterread(Fs*, void*, int); + +/* + * resolving inferno/plan9 differences + */ +Chan* commonfdtochan(int, int, int, int); +char* commonuser(void); +char* commonerror(void); + +/* + * chandial.c + */ +extern Chan* chandial(char*, char*, char*, Chan**); + +/* + * global to all of the stack + */ +extern int debug; +extern void (*igmpreportfn)(Ipifc*, uchar*); diff --git a/os/ip/ipaux.c b/os/ip/ipaux.c new file mode 100644 index 00000000..2ddae041 --- /dev/null +++ b/os/ip/ipaux.c @@ -0,0 +1,730 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" +#include "ipv6.h" + +/* + * well known IP addresses + */ +uchar IPv4bcast[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff +}; +uchar IPv4allsys[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0xff, 0xff, + 0xe0, 0, 0, 0x01 +}; +uchar IPv4allrouter[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0xff, 0xff, + 0xe0, 0, 0, 0x02 +}; +uchar IPallbits[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff +}; + +uchar IPnoaddr[IPaddrlen]; + +/* + * prefix of all v4 addresses + */ +uchar v4prefix[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0xff, 0xff, + 0, 0, 0, 0 +}; + + +char *v6hdrtypes[Maxhdrtype] = +{ + [HBH] "HopbyHop", + [ICMP] "ICMP", + [IGMP] "IGMP", + [GGP] "GGP", + [IPINIP] "IP", + [ST] "ST", + [TCP] "TCP", + [UDP] "UDP", + [ISO_TP4] "ISO_TP4", + [RH] "Routinghdr", + [FH] "Fraghdr", + [IDRP] "IDRP", + [RSVP] "RSVP", + [AH] "Authhdr", + [ESP] "ESP", + [ICMPv6] "ICMPv6", + [NNH] "Nonexthdr", + [ISO_IP] "ISO_IP", + [IGRP] "IGRP", + [OSPF] "OSPF", +}; + +/* + * well known IPv6 addresses + */ +uchar v6Unspecified[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6loopback[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6linklocal[IPaddrlen] = { + 0xfe, 0x80, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6linklocalmask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6llpreflen = 8; // link-local prefix length +uchar v6sitelocal[IPaddrlen] = { + 0xfe, 0xc0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6sitelocalmask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6slpreflen = 6; // site-local prefix length +uchar v6glunicast[IPaddrlen] = { + 0x08, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6multicast[IPaddrlen] = { + 0xff, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6multicastmask[IPaddrlen] = { + 0xff, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6mcpreflen = 1; // multicast prefix length +uchar v6allnodesN[IPaddrlen] = { + 0xff, 0x01, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6allnodesNmask[IPaddrlen] = { + 0xff, 0xff, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6aNpreflen = 2; // all nodes (N) prefix +uchar v6allnodesL[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6allnodesLmask[IPaddrlen] = { + 0xff, 0xff, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6aLpreflen = 2; // all nodes (L) prefix +uchar v6allroutersN[IPaddrlen] = { + 0xff, 0x01, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6allroutersL[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6allroutersS[IPaddrlen] = { + 0xff, 0x05, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6solicitednode[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01, + 0xff, 0, 0, 0 +}; +uchar v6solicitednodemask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0x0, 0x0, 0x0 +}; +int v6snpreflen = 13; + + + + +ushort +ptclcsum(Block *bp, int offset, int len) +{ + uchar *addr; + ulong losum, hisum; + ushort csum; + int odd, blocklen, x; + + /* Correct to front of data area */ + while(bp != nil && offset && offset >= BLEN(bp)) { + offset -= BLEN(bp); + bp = bp->next; + } + if(bp == nil) + return 0; + + addr = bp->rp + offset; + blocklen = BLEN(bp) - offset; + + if(bp->next == nil) { + if(blocklen < len) + len = blocklen; + return ~ptclbsum(addr, len) & 0xffff; + } + + losum = 0; + hisum = 0; + + odd = 0; + while(len) { + x = blocklen; + if(len < x) + x = len; + + csum = ptclbsum(addr, x); + if(odd) + hisum += csum; + else + losum += csum; + odd = (odd+x) & 1; + len -= x; + + bp = bp->next; + if(bp == nil) + break; + blocklen = BLEN(bp); + addr = bp->rp; + } + + losum += hisum>>8; + losum += (hisum&0xff)<<8; + while((csum = losum>>16) != 0) + losum = csum + (losum & 0xffff); + + return ~losum & 0xffff; +} + +enum +{ + Isprefix= 16, +}; + +static uchar prefixvals[256] = +{ +[0x00] 0 | Isprefix, +[0x80] 1 | Isprefix, +[0xC0] 2 | Isprefix, +[0xE0] 3 | Isprefix, +[0xF0] 4 | Isprefix, +[0xF8] 5 | Isprefix, +[0xFC] 6 | Isprefix, +[0xFE] 7 | Isprefix, +[0xFF] 8 | Isprefix, +}; + +int +eipfmt(Fmt *f) +{ + char buf[5*8]; + static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux"; + static char *ifmt = "%d.%d.%d.%d"; + uchar *p, ip[16]; + ulong *lp; + ushort s; + int i, j, n, eln, eli; + + switch(f->r) { + case 'E': /* Ethernet address */ + p = va_arg(f->args, uchar*); + return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]); + return fmtstrcpy(f, buf); + + case 'I': /* Ip address */ + p = va_arg(f->args, uchar*); +common: + if(memcmp(p, v4prefix, 12) == 0) + return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]); + + /* find longest elision */ + eln = eli = -1; + for(i = 0; i < 16; i += 2){ + for(j = i; j < 16; j += 2) + if(p[j] != 0 || p[j+1] != 0) + break; + if(j > i && j - i > eln){ + eli = i; + eln = j - i; + } + } + + /* print with possible elision */ + n = 0; + for(i = 0; i < 16; i += 2){ + if(i == eli){ + n += sprint(buf+n, "::"); + i += eln; + if(i >= 16) + break; + } else if(i != 0) + n += sprint(buf+n, ":"); + s = (p[i]<<8) + p[i+1]; + n += sprint(buf+n, "%ux", s); + } + return fmtstrcpy(f, buf); + + case 'i': /* v6 address as 4 longs */ + lp = va_arg(f->args, ulong*); + for(i = 0; i < 4; i++) + hnputl(ip+4*i, *lp++); + p = ip; + goto common; + + case 'V': /* v4 ip address */ + p = va_arg(f->args, uchar*); + return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]); + + case 'M': /* ip mask */ + p = va_arg(f->args, uchar*); + + /* look for a prefix mask */ + for(i = 0; i < 16; i++) + if(p[i] != 0xff) + break; + if(i < 16){ + if((prefixvals[p[i]] & Isprefix) == 0) + goto common; + for(j = i+1; j < 16; j++) + if(p[j] != 0) + goto common; + n = 8*i + (prefixvals[p[i]] & ~Isprefix); + } else + n = 8*16; + + /* got one, use /xx format */ + return fmtprint(f, "/%d", n); + } + return fmtstrcpy(f, "(eipfmt)"); +} + +#define CLASS(p) ((*(uchar*)(p))>>6) + +extern char* +v4parseip(uchar *to, char *from) +{ + int i; + char *p; + + p = from; + for(i = 0; i < 4 && *p; i++){ + to[i] = strtoul(p, &p, 0); + if(*p == '.') + p++; + } + switch(CLASS(to)){ + case 0: /* class A - 1 uchar net */ + case 1: + if(i == 3){ + to[3] = to[2]; + to[2] = to[1]; + to[1] = 0; + } else if(i == 2){ + to[3] = to[1]; + to[1] = 0; + } + break; + case 2: /* class B - 2 uchar net */ + if(i == 3){ + to[3] = to[2]; + to[2] = 0; + } + break; + } + return p; +} + +int +isv4(uchar *ip) +{ + return memcmp(ip, v4prefix, IPv4off) == 0; +} + + +/* + * the following routines are unrolled with no memset's to speed + * up the usual case + */ +void +v4tov6(uchar *v6, uchar *v4) +{ + v6[0] = 0; + v6[1] = 0; + v6[2] = 0; + v6[3] = 0; + v6[4] = 0; + v6[5] = 0; + v6[6] = 0; + v6[7] = 0; + v6[8] = 0; + v6[9] = 0; + v6[10] = 0xff; + v6[11] = 0xff; + v6[12] = v4[0]; + v6[13] = v4[1]; + v6[14] = v4[2]; + v6[15] = v4[3]; +} + +int +v6tov4(uchar *v4, uchar *v6) +{ + if(v6[0] == 0 + && v6[1] == 0 + && v6[2] == 0 + && v6[3] == 0 + && v6[4] == 0 + && v6[5] == 0 + && v6[6] == 0 + && v6[7] == 0 + && v6[8] == 0 + && v6[9] == 0 + && v6[10] == 0xff + && v6[11] == 0xff) + { + v4[0] = v6[12]; + v4[1] = v6[13]; + v4[2] = v6[14]; + v4[3] = v6[15]; + return 0; + } else { + memset(v4, 0, 4); + return -1; + } +} + +ulong +parseip(uchar *to, char *from) +{ + int i, elipsis = 0, v4 = 1; + ulong x; + char *p, *op; + + memset(to, 0, IPaddrlen); + p = from; + for(i = 0; i < 16 && *p; i+=2){ + op = p; + x = strtoul(p, &p, 16); + if(*p == '.' || (*p == 0 && i == 0)){ + p = v4parseip(to+i, op); + i += 4; + break; + } else { + to[i] = x>>8; + to[i+1] = x; + } + if(*p == ':'){ + v4 = 0; + if(*++p == ':'){ + elipsis = i+2; + p++; + } + } + } + if(i < 16){ + memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis); + memset(&to[elipsis], 0, 16-i); + } + if(v4){ + to[10] = to[11] = 0xff; + return nhgetl(to+12); + } else + return 6; +} + +/* + * hack to allow ip v4 masks to be entered in the old + * style + */ +ulong +parseipmask(uchar *to, char *from) +{ + ulong x; + int i; + uchar *p; + + if(*from == '/'){ + /* as a number of prefix bits */ + i = atoi(from+1); + if(i < 0) + i = 0; + if(i > 128) + i = 128; + memset(to, 0, IPaddrlen); + for(p = to; i >= 8; i -= 8) + *p++ = 0xff; + if(i > 0) + *p = ~((1<<(8-i))-1); + x = nhgetl(to+IPv4off); + } else { + /* as a straight bit mask */ + x = parseip(to, from); + if(memcmp(to, v4prefix, IPv4off) == 0) + memset(to, 0xff, IPv4off); + } + return x; +} + +void +maskip(uchar *from, uchar *mask, uchar *to) +{ + int i; + + for(i = 0; i < IPaddrlen; i++) + to[i] = from[i] & mask[i]; +} + +uchar classmask[4][16] = { + 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00, + 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00, + 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00, + 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00, +}; + +uchar* +defmask(uchar *ip) +{ + if(isv4(ip)) + return classmask[ip[IPv4off]>>6]; + else { + if(ipcmp(ip, v6loopback) == 0) + return IPallbits; + else if(memcmp(ip, v6linklocal, v6llpreflen) == 0) + return v6linklocalmask; + else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0) + return v6sitelocalmask; + else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0) + return v6solicitednodemask; + else if(memcmp(ip, v6multicast, v6mcpreflen) == 0) + return v6multicastmask; + return IPallbits; + } +} + +void +ipv62smcast(uchar *smcast, uchar *a) +{ + assert(IPaddrlen == 16); + memmove(smcast, v6solicitednode, IPaddrlen); + smcast[13] = a[13]; + smcast[14] = a[14]; + smcast[15] = a[15]; +} + + +/* + * parse a hex mac address + */ +int +parsemac(uchar *to, char *from, int len) +{ + char nip[4]; + char *p; + int i; + + p = from; + memset(to, 0, len); + for(i = 0; i < len; i++){ + if(p[0] == '\0' || p[1] == '\0') + break; + + nip[0] = p[0]; + nip[1] = p[1]; + nip[2] = '\0'; + p += 2; + + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return i; +} + +/* + * hashing tcp, udp, ... connections + */ +ulong +iphash(uchar *sa, ushort sp, uchar *da, ushort dp) +{ + return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash; +} + +void +iphtadd(Ipht *ht, Conv *c) +{ + ulong hv; + Iphash *h; + + hv = iphash(c->raddr, c->rport, c->laddr, c->lport); + h = smalloc(sizeof(*h)); + if(ipcmp(c->raddr, IPnoaddr) != 0) + h->match = IPmatchexact; + else { + if(ipcmp(c->laddr, IPnoaddr) != 0){ + if(c->lport == 0) + h->match = IPmatchaddr; + else + h->match = IPmatchpa; + } else { + if(c->lport == 0) + h->match = IPmatchany; + else + h->match = IPmatchport; + } + } + h->c = c; + + lock(ht); + h->next = ht->tab[hv]; + ht->tab[hv] = h; + unlock(ht); +} + +void +iphtrem(Ipht *ht, Conv *c) +{ + ulong hv; + Iphash **l, *h; + + hv = iphash(c->raddr, c->rport, c->laddr, c->lport); + lock(ht); + for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next) + if((*l)->c == c){ + h = *l; + (*l) = h->next; + free(h); + break; + } + unlock(ht); +} + +/* look for a matching conversation with the following precedence + * connected && raddr,rport,laddr,lport + * announced && laddr,lport + * announced && *,lport + * announced && laddr,* + * announced && *,* + */ +Conv* +iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) +{ + ulong hv; + Iphash *h; + Conv *c; + + /* exact 4 pair match (connection) */ + hv = iphash(sa, sp, da, dp); + lock(ht); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchexact) + continue; + c = h->c; + if(sp == c->rport && dp == c->lport + && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* match local address and port */ + hv = iphash(IPnoaddr, 0, da, dp); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchpa) + continue; + c = h->c; + if(dp == c->lport && ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* match just port */ + hv = iphash(IPnoaddr, 0, IPnoaddr, dp); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchport) + continue; + c = h->c; + if(dp == c->lport){ + unlock(ht); + return c; + } + } + + /* match local address */ + hv = iphash(IPnoaddr, 0, da, 0); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchaddr) + continue; + c = h->c; + if(ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* look for something that matches anything */ + hv = iphash(IPnoaddr, 0, IPnoaddr, 0); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchany) + continue; + c = h->c; + unlock(ht); + return c; + } + unlock(ht); + return nil; +} diff --git a/os/ip/ipifc.c b/os/ip/ipifc.c new file mode 100644 index 00000000..345c7404 --- /dev/null +++ b/os/ip/ipifc.c @@ -0,0 +1,1721 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +#define DPRINT if(0)print + +enum { + Maxmedia = 32, + Nself = Maxmedia*5, + NHASH = (1<<6), + NCACHE = 256, + QMAX = 64*1024-1, +}; + +Medium *media[Maxmedia] = +{ + 0 +}; + +/* + * cache of local addresses (addresses we answer to) + */ +struct Ipself +{ + uchar a[IPaddrlen]; + Ipself *hnext; /* next address in the hash table */ + Iplink *link; /* binding twixt Ipself and Ipifc */ + ulong expire; + uchar type; /* type of address */ + int ref; + Ipself *next; /* free list */ +}; + +struct Ipselftab +{ + QLock; + int inited; + int acceptall; /* true if an interface has the null address */ + Ipself *hash[NHASH]; /* hash chains */ +}; + +/* + * Multicast addresses are chained onto a Chan so that + * we can remove them when the Chan is closed. + */ +typedef struct Ipmcast Ipmcast; +struct Ipmcast +{ + Ipmcast *next; + uchar ma[IPaddrlen]; /* multicast address */ + uchar ia[IPaddrlen]; /* interface address */ +}; + +/* quick hash for ip addresses */ +#define hashipa(a) ( ( ((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1] )%NHASH ) + +static char tifc[] = "ifc "; + +static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type); +static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a); +static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc); +static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc); +static void ipifcregisterproxy(Fs*, Ipifc*, uchar*); +static char* ipifcremlifc(Ipifc*, Iplifc*); + +/* + * link in a new medium + */ +void +addipmedium(Medium *med) +{ + int i; + + for(i = 0; i < nelem(media)-1; i++) + if(media[i] == nil){ + media[i] = med; + break; + } +} + +/* + * find the medium with this name + */ +Medium* +ipfindmedium(char *name) +{ + Medium **mp; + + for(mp = media; *mp != nil; mp++) + if(strcmp((*mp)->name, name) == 0) + break; + return *mp; +} + +/* + * attach a device (or pkt driver) to the interface. + * called with c locked + */ +static char* +ipifcbind(Conv *c, char **argv, int argc) +{ + Ipifc *ifc; + Medium *m; + + if(argc < 2) + return Ebadarg; + + ifc = (Ipifc*)c->ptcl; + + /* bind the device to the interface */ + m = ipfindmedium(argv[1]); + if(m == nil) + return "unknown interface type"; + + wlock(ifc); + if(ifc->m != nil){ + wunlock(ifc); + return "interface already bound"; + } + if(waserror()){ + wunlock(ifc); + nexterror(); + } + + /* do medium specific binding */ + (*m->bind)(ifc, argc, argv); + + /* set the bound device name */ + if(argc > 2) + strncpy(ifc->dev, argv[2], sizeof(ifc->dev)); + else + sprint(ifc->dev, "%s%d", m->name, c->x); + ifc->dev[sizeof(ifc->dev)-1] = 0; + + /* set up parameters */ + ifc->m = m; + ifc->mintu = ifc->m->mintu; + ifc->maxtu = ifc->m->maxtu; + if(ifc->m->unbindonclose == 0) + ifc->conv->inuse++; + ifc->rp.mflag = 0; // default not managed + ifc->rp.oflag = 0; + ifc->rp.maxraint = 600000; // millisecs + ifc->rp.minraint = 200000; + ifc->rp.linkmtu = 0; // no mtu sent + ifc->rp.reachtime = 0; + ifc->rp.rxmitra = 0; + ifc->rp.ttl = MAXTTL; + ifc->rp.routerlt = 3*(ifc->rp.maxraint); + + /* any ancillary structures (like routes) no longer pertain */ + ifc->ifcid++; + + /* reopen all the queues closed by a previous unbind */ + qreopen(c->rq); + qreopen(c->eq); + qreopen(c->sq); + + wunlock(ifc); + poperror(); + + return nil; +} + +/* + * detach a device from an interface, close the interface + * called with ifc->conv closed + */ +static char* +ipifcunbind(Ipifc *ifc) +{ + char *err; + + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + + /* dissociate routes */ + if(ifc->m != nil && ifc->m->unbindonclose == 0) + ifc->conv->inuse--; + ifc->ifcid++; + + /* disassociate device */ + if(ifc->m != nil && ifc->m->unbind) + (*ifc->m->unbind)(ifc); + memset(ifc->dev, 0, sizeof(ifc->dev)); + ifc->arg = nil; + ifc->reassemble = 0; + + /* close queues to stop queuing of packets */ + qclose(ifc->conv->rq); + qclose(ifc->conv->wq); + qclose(ifc->conv->sq); + + /* disassociate logical interfaces */ + while(ifc->lifc){ + err = ipifcremlifc(ifc, ifc->lifc); + if(err) + error(err); + } + + ifc->m = nil; + wunlock(ifc); + poperror(); + return nil; +} + + + +char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n"; + +char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n"; + + +static int +ipifcstate(Conv *c, char *state, int n) +{ + Ipifc *ifc; + Iplifc *lifc; + int m; + + ifc = (Ipifc*)c->ptcl; + + m = snprint(state, n, sfixedformat, + ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6, + ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint, + ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime, + ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt, + ifc->in, ifc->out, ifc->inerr, ifc->outerr); + + rlock(ifc); + for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next) + m += snprint(state+m, n - m, slineformat, + lifc->local, lifc->mask, lifc->remote, + lifc->validlt, lifc->preflt); + if(ifc->lifc == nil) + m += snprint(state+m, n - m, "\n"); + runlock(ifc); + return m; +} + +static int +ipifclocal(Conv *c, char *state, int n) +{ + Ipifc *ifc; + Iplifc *lifc; + Iplink *link; + int m; + + ifc = (Ipifc*)c->ptcl; + + m = 0; + + rlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + m += snprint(state+m, n - m, "%-40.40I ->", lifc->local); + for(link = lifc->link; link; link = link->lifclink) + m += snprint(state+m, n - m, " %-40.40I", link->self->a); + m += snprint(state+m, n - m, "\n"); + } + runlock(ifc); + return m; +} + +static int +ipifcinuse(Conv *c) +{ + Ipifc *ifc; + + ifc = (Ipifc*)c->ptcl; + return ifc->m != nil; +} + +/* + * called when a process writes to an interface's 'data' + */ +static void +ipifckick(void *x) +{ + Conv *c = x; + Block *bp; + Ipifc *ifc; + + bp = qget(c->wq); + if(bp == nil) + return; + + ifc = (Ipifc*)c->ptcl; + if(!canrlock(ifc)){ + freeb(bp); + return; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->m == nil || ifc->m->pktin == nil) + freeb(bp); + else + (*ifc->m->pktin)(c->p->f, ifc, bp); + runlock(ifc); + poperror(); +} + +/* + * called when a new ipifc structure is created + */ +static void +ipifccreate(Conv *c) +{ + Ipifc *ifc; + + c->rq = qopen(QMAX, 0, 0, 0); + c->sq = qopen(2*QMAX, 0, 0, 0); + c->wq = qopen(QMAX, Qkick, ipifckick, c); + ifc = (Ipifc*)c->ptcl; + ifc->conv = c; + ifc->unbinding = 0; + ifc->m = nil; + ifc->reassemble = 0; +} + +/* + * called after last close of ipifc data or ctl + * called with c locked, we must unlock + */ +static void +ipifcclose(Conv *c) +{ + Ipifc *ifc; + Medium *m; + + ifc = (Ipifc*)c->ptcl; + m = ifc->m; + if(m != nil && m->unbindonclose) + ipifcunbind(ifc); +} + +/* + * change an interface's mtu + */ +char* +ipifcsetmtu(Ipifc *ifc, char **argv, int argc) +{ + int mtu; + + if(argc < 2) + return Ebadarg; + if(ifc->m == nil) + return Ebadarg; + mtu = strtoul(argv[1], 0, 0); + if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu) + return Ebadarg; + ifc->maxtu = mtu; + return nil; +} + +/* + * add an address to an interface. + */ +char* +ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp) +{ + uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen]; + uchar bcast[IPaddrlen], net[IPaddrlen]; + Iplifc *lifc, **l; + int i, type, mtu; + Fs *f; + int sendnbrdisc = 0; + + if(ifc->m == nil) + return "ipifc not yet bound to device"; + + f = ifc->conv->p->f; + + type = Rifc; + memset(ip, 0, IPaddrlen); + memset(mask, 0, IPaddrlen); + memset(rem, 0, IPaddrlen); + switch(argc){ + case 6: + if(strcmp(argv[5], "proxy") == 0) + type |= Rproxy; + /* fall through */ + case 5: + mtu = strtoul(argv[4], 0, 0); + if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu) + ifc->maxtu = mtu; + /* fall through */ + case 4: + parseip(ip, argv[1]); + parseipmask(mask, argv[2]); + parseip(rem, argv[3]); + maskip(rem, mask, net); + break; + case 3: + parseip(ip, argv[1]); + parseipmask(mask, argv[2]); + maskip(ip, mask, rem); + maskip(rem, mask, net); + break; + case 2: + parseip(ip, argv[1]); + memmove(mask, defmask(ip), IPaddrlen); + maskip(ip, mask, rem); + maskip(rem, mask, net); + break; + default: + return Ebadarg; + break; + } + if(isv4(ip)) + tentative = 0; + wlock(ifc); + + /* ignore if this is already a local address for this ifc */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next) { + if(ipcmp(lifc->local, ip) == 0) { + if(lifc->tentative != tentative) + lifc->tentative = tentative; + if(lifcp != nil) { + lifc->onlink = lifcp->onlink; + lifc->autoflag = lifcp->autoflag; + lifc->validlt = lifcp->validlt; + lifc->preflt = lifcp->preflt; + lifc->origint = lifcp->origint; + } + goto out; + } + } + + /* add the address to the list of logical ifc's for this ifc */ + lifc = smalloc(sizeof(Iplifc)); + ipmove(lifc->local, ip); + ipmove(lifc->mask, mask); + ipmove(lifc->remote, rem); + ipmove(lifc->net, net); + lifc->tentative = tentative; + if(lifcp != nil) { + lifc->onlink = lifcp->onlink; + lifc->autoflag = lifcp->autoflag; + lifc->validlt = lifcp->validlt; + lifc->preflt = lifcp->preflt; + lifc->origint = lifcp->origint; + } + else { // default values + lifc->onlink = 1; + lifc->autoflag = 1; + lifc->validlt = 0xffffffff; + lifc->preflt = 0xffffffff; + lifc->origint = NOW / 10^3; + } + lifc->next = nil; + + for(l = &ifc->lifc; *l; l = &(*l)->next) + ; + *l = lifc; + + /* check for point-to-point interface */ + if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */ + if(ipcmp(mask, IPallbits) == 0) + type |= Rptpt; + + /* add local routes */ + if(isv4(ip)) + v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type); + else + v6addroute(f, tifc, rem, mask, rem, type); + + addselfcache(f, ifc, lifc, ip, Runi); + + if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){ + ipifcregisterproxy(f, ifc, rem); + goto out; + } + + if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) { + /* add subnet directed broadcast address to the self cache */ + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) | ~mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add subnet directed network address to the self cache */ + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) & mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add network directed broadcast address to the self cache */ + memmove(mask, defmask(ip), IPaddrlen); + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) | ~mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add network directed network address to the self cache */ + memmove(mask, defmask(ip), IPaddrlen); + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) & mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + addselfcache(f, ifc, lifc, IPv4bcast, Rbcast); + } + else { + if(ipcmp(ip, v6loopback) == 0) { + /* add node-local mcast address */ + addselfcache(f, ifc, lifc, v6allnodesN, Rmulti); + + /* add route for all node multicast */ + v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti); + } + + /* add all nodes multicast address */ + addselfcache(f, ifc, lifc, v6allnodesL, Rmulti); + + /* add route for all nodes multicast */ + v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti); + + /* add solicited-node multicast address */ + ipv62smcast(bcast, ip); + addselfcache(f, ifc, lifc, bcast, Rmulti); + + sendnbrdisc = 1; + } + + /* register the address on this network for address resolution */ + if(isv4(ip) && ifc->m->areg != nil) + (*ifc->m->areg)(ifc, ip); + +out: + wunlock(ifc); + if(tentative && sendnbrdisc) + icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac); + return nil; +} + +/* + * remove a logical interface from an ifc + * always called with ifc wlock'd + */ +static char* +ipifcremlifc(Ipifc *ifc, Iplifc *lifc) +{ + Iplifc **l; + Fs *f; + + f = ifc->conv->p->f; + + /* + * find address on this interface and remove from chain. + * for pt to pt we actually specify the remote address as the + * addresss to remove. + */ + for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next) + ; + if(*l == nil) + return "address not on this interface"; + *l = lifc->next; + + /* disassociate any addresses */ + while(lifc->link) + remselfcache(f, ifc, lifc, lifc->link->self->a); + + /* remove the route for this logical interface */ + if(isv4(lifc->local)) + v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1); + else { + v6delroute(f, lifc->remote, lifc->mask, 1); + if(ipcmp(lifc->local, v6loopback) == 0) + /* remove route for all node multicast */ + v6delroute(f, v6allnodesN, v6allnodesNmask, 1); + else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0) + /* remove route for all link multicast */ + v6delroute(f, v6allnodesL, v6allnodesLmask, 1); + } + + free(lifc); + return nil; + +} + +/* + * remove an address from an interface. + * called with c locked + */ +char* +ipifcrem(Ipifc *ifc, char **argv, int argc) +{ + uchar ip[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar rem[IPaddrlen]; + Iplifc *lifc; + char *rv; + + if(argc < 3) + return Ebadarg; + + parseip(ip, argv[1]); + parseipmask(mask, argv[2]); + if(argc < 4) + maskip(ip, mask, rem); + else + parseip(rem, argv[3]); + + wlock(ifc); + + /* + * find address on this interface and remove from chain. + * for pt to pt we actually specify the remote address as the + * addresss to remove. + */ + for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) { + if (memcmp(ip, lifc->local, IPaddrlen) == 0 + && memcmp(mask, lifc->mask, IPaddrlen) == 0 + && memcmp(rem, lifc->remote, IPaddrlen) == 0) + break; + } + + rv = ipifcremlifc(ifc, lifc); + wunlock(ifc); + return rv; +} + +/* + * distribute routes to active interfaces like the + * TRIP linecards + */ +void +ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type) +{ + Medium *m; + Conv **cp, **e; + Ipifc *ifc; + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp != nil) { + ifc = (Ipifc*)(*cp)->ptcl; + m = ifc->m; + if(m == nil) + continue; + if(m->addroute != nil) + m->addroute(ifc, vers, addr, mask, gate, type); + } + } +} + +void +ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask) +{ + Medium *m; + Conv **cp, **e; + Ipifc *ifc; + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp != nil) { + ifc = (Ipifc*)(*cp)->ptcl; + m = ifc->m; + if(m == nil) + continue; + if(m->remroute != nil) + m->remroute(ifc, vers, addr, mask); + } + } +} + +/* + * associate an address with the interface. This wipes out any previous + * addresses. This is a macro that means, remove all the old interfaces + * and add a new one. + */ +static char* +ipifcconnect(Conv* c, char **argv, int argc) +{ + char *err; + Ipifc *ifc; + + ifc = (Ipifc*)c->ptcl; + + if(ifc->m == nil) + return "ipifc not yet bound to device"; + + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + while(ifc->lifc){ + err = ipifcremlifc(ifc, ifc->lifc); + if(err) + error(err); + } + wunlock(ifc); + poperror(); + + err = ipifcadd(ifc, argv, argc, 0, nil); + if(err) + return err; + + Fsconnected(c, nil); + + return nil; +} + +char* +ipifcsetpar6(Ipifc *ifc, char **argv, int argc) +{ + int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint; + + argsleft = argc - 1; + i = 1; + + if(argsleft % 2 != 0) + return Ebadarg; + + while (argsleft > 1) { + if(strcmp(argv[i],"recvra")==0) + ifc->recvra6 = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i],"sendra")==0) + ifc->sendra6 = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i],"mflag")==0) + ifc->rp.mflag = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i],"oflag")==0) + ifc->rp.oflag = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i],"maxraint")==0) + ifc->rp.maxraint = atoi(argv[i+1]); + else if(strcmp(argv[i],"minraint")==0) + ifc->rp.minraint = atoi(argv[i+1]); + else if(strcmp(argv[i],"linkmtu")==0) + ifc->rp.linkmtu = atoi(argv[i+1]); + else if(strcmp(argv[i],"reachtime")==0) + ifc->rp.reachtime = atoi(argv[i+1]); + else if(strcmp(argv[i],"rxmitra")==0) + ifc->rp.rxmitra = atoi(argv[i+1]); + else if(strcmp(argv[i],"ttl")==0) + ifc->rp.ttl = atoi(argv[i+1]); + else if(strcmp(argv[i],"routerlt")==0) + ifc->rp.routerlt = atoi(argv[i+1]); + else + return Ebadarg; + + argsleft -= 2; + i += 2; + } + + // consistency check + if(ifc->rp.maxraint < ifc->rp.minraint) { + ifc->rp.maxraint = vmax; + ifc->rp.minraint = vmin; + return Ebadarg; + } + + return nil; +} + +char* +ipifcsendra6(Ipifc *ifc, char **argv, int argc) +{ + int i; + + i = 0; + if(argc > 1) + i = atoi(argv[1]); + ifc->sendra6 = (i!=0); + return nil; +} + +char* +ipifcrecvra6(Ipifc *ifc, char **argv, int argc) +{ + int i; + + i = 0; + if(argc > 1) + i = atoi(argv[1]); + ifc->recvra6 = (i!=0); + return nil; +} + +/* + * non-standard control messages. + * called with c locked. + */ +static char* +ipifcctl(Conv* c, char**argv, int argc) +{ + Ipifc *ifc; + int i; + + ifc = (Ipifc*)c->ptcl; + if(strcmp(argv[0], "add") == 0) + return ipifcadd(ifc, argv, argc, 0, nil); + else if(strcmp(argv[0], "bootp") == 0) + return bootp(ifc); + else if(strcmp(argv[0], "try") == 0) + return ipifcadd(ifc, argv, argc, 1, nil); + else if(strcmp(argv[0], "remove") == 0) + return ipifcrem(ifc, argv, argc); + else if(strcmp(argv[0], "unbind") == 0) + return ipifcunbind(ifc); + else if(strcmp(argv[0], "joinmulti") == 0) + return ipifcjoinmulti(ifc, argv, argc); + else if(strcmp(argv[0], "leavemulti") == 0) + return ipifcleavemulti(ifc, argv, argc); + else if(strcmp(argv[0], "mtu") == 0) + return ipifcsetmtu(ifc, argv, argc); + else if(strcmp(argv[0], "reassemble") == 0){ + ifc->reassemble = 1; + return nil; + } + else if(strcmp(argv[0], "iprouting") == 0){ + i = 1; + if(argc > 1) + i = atoi(argv[1]); + iprouting(c->p->f, i); + return nil; + } + else if(strcmp(argv[0], "addpref6") == 0) + return ipifcaddpref6(ifc, argv, argc); + else if(strcmp(argv[0], "setpar6") == 0) + return ipifcsetpar6(ifc, argv, argc); + else if(strcmp(argv[0], "sendra6") == 0) + return ipifcsendra6(ifc, argv, argc); + else if(strcmp(argv[0], "recvra6") == 0) + return ipifcrecvra6(ifc, argv, argc); + return "unsupported ctl"; +} + +ipifcstats(Proto *ipifc, char *buf, int len) +{ + return ipstats(ipifc->f, buf, len); +} + +void +ipifcinit(Fs *f) +{ + Proto *ipifc; + + ipifc = smalloc(sizeof(Proto)); + ipifc->name = "ipifc"; + ipifc->connect = ipifcconnect; + ipifc->announce = nil; + ipifc->bind = ipifcbind; + ipifc->state = ipifcstate; + ipifc->create = ipifccreate; + ipifc->close = ipifcclose; + ipifc->rcv = nil; + ipifc->ctl = ipifcctl; + ipifc->advise = nil; + ipifc->stats = ipifcstats; + ipifc->inuse = ipifcinuse; + ipifc->local = ipifclocal; + ipifc->ipproto = -1; + ipifc->nc = Maxmedia; + ipifc->ptclsize = sizeof(Ipifc); + + f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */ + f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */ + + Fsproto(f, ipifc); +} + +/* + * add to self routing cache + * called with c locked + */ +static void +addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type) +{ + Ipself *p; + Iplink *lp; + int h; + + qlock(f->self); + + /* see if the address already exists */ + h = hashipa(a); + for(p = f->self->hash[h]; p; p = p->next) + if(memcmp(a, p->a, IPaddrlen) == 0) + break; + + /* allocate a local address and add to hash chain */ + if(p == nil){ + p = smalloc(sizeof(*p)); + ipmove(p->a, a); + p->type = type; + p->next = f->self->hash[h]; + f->self->hash[h] = p; + + /* if the null address, accept all packets */ + if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0) + f->self->acceptall = 1; + } + + /* look for a link for this lifc */ + for(lp = p->link; lp; lp = lp->selflink) + if(lp->lifc == lifc) + break; + + /* allocate a lifc-to-local link and link to both */ + if(lp == nil){ + lp = smalloc(sizeof(*lp)); + lp->ref = 1; + lp->lifc = lifc; + lp->self = p; + lp->selflink = p->link; + p->link = lp; + lp->lifclink = lifc->link; + lifc->link = lp; + + /* add to routing table */ + if(isv4(a)) + v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type); + else + v6addroute(f, tifc, a, IPallbits, a, type); + + if((type & Rmulti) && ifc->m->addmulti != nil) + (*ifc->m->addmulti)(ifc, a, lifc->local); + } else { + lp->ref++; + } + + qunlock(f->self); +} + +/* + * These structures are unlinked from their chains while + * other threads may be using them. To avoid excessive locking, + * just put them aside for a while before freeing them. + * called with f->self locked + */ +static Iplink *freeiplink; +static Ipself *freeipself; + +static void +iplinkfree(Iplink *p) +{ + Iplink **l, *np; + ulong now = NOW; + + l = &freeiplink; + for(np = *l; np; np = *l){ + if(np->expire > now){ + *l = np->next; + free(np); + continue; + } + l = &np->next; + } + p->expire = now + 5000; /* give other threads 5 secs to get out */ + p->next = nil; + *l = p; +} +static void +ipselffree(Ipself *p) +{ + Ipself **l, *np; + ulong now = NOW; + + l = &freeipself; + for(np = *l; np; np = *l){ + if(np->expire > now){ + *l = np->next; + free(np); + continue; + } + l = &np->next; + } + p->expire = now + 5000; /* give other threads 5 secs to get out */ + p->next = nil; + *l = p; +} + +/* + * Decrement reference for this address on this link. + * Unlink from selftab if this is the last ref. + * called with c locked + */ +static void +remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a) +{ + Ipself *p, **l; + Iplink *link, **l_self, **l_lifc; + + qlock(f->self); + + /* find the unique selftab entry */ + l = &f->self->hash[hashipa(a)]; + for(p = *l; p; p = *l){ + if(ipcmp(p->a, a) == 0) + break; + l = &p->next; + } + + if(p == nil) + goto out; + + /* + * walk down links from an ifc looking for one + * that matches the selftab entry + */ + l_lifc = &lifc->link; + for(link = *l_lifc; link; link = *l_lifc){ + if(link->self == p) + break; + l_lifc = &link->lifclink; + } + + if(link == nil) + goto out; + + /* + * walk down the links from the selftab looking for + * the one we just found + */ + l_self = &p->link; + for(link = *l_self; link; link = *l_self){ + if(link == *(l_lifc)) + break; + l_self = &link->selflink; + } + + if(link == nil) + panic("remselfcache"); + + if(--(link->ref) != 0) + goto out; + + if((p->type & Rmulti) && ifc->m->remmulti != nil) + (*ifc->m->remmulti)(ifc, a, lifc->local); + + /* ref == 0, remove from both chains and free the link */ + *l_lifc = link->lifclink; + *l_self = link->selflink; + iplinkfree(link); + + if(p->link != nil) + goto out; + + /* remove from routing table */ + if(isv4(a)) + v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1); + else + v6delroute(f, a, IPallbits, 1); + + /* no more links, remove from hash and free */ + *l = p->next; + ipselffree(p); + + /* if IPnoaddr, forget */ + if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0) + f->self->acceptall = 0; + +out: + qunlock(f->self); +} + +static char *stformat = "%-44.44I %2.2d %4.4s\n"; +enum +{ + Nstformat= 41, +}; + +long +ipselftabread(Fs *f, char *cp, ulong offset, int n) +{ + int i, m, nifc, off; + Ipself *p; + Iplink *link; + char state[8]; + + m = 0; + off = offset; + qlock(f->self); + for(i = 0; i < NHASH && m < n; i++){ + for(p = f->self->hash[i]; p != nil && m < n; p = p->next){ + nifc = 0; + for(link = p->link; link; link = link->selflink) + nifc++; + routetype(p->type, state); + m += snprint(cp + m, n - m, stformat, p->a, nifc, state); + if(off > 0){ + off -= m; + m = 0; + } + } + } + qunlock(f->self); + return m; +} + +int +iptentative(Fs *f, uchar *addr) +{ + Ipself *p; + + p = f->self->hash[hashipa(addr)]; + for(; p; p = p->next){ + if(ipcmp(addr, p->a) == 0) { + return p->link->lifc->tentative; + } + } + return 0; +} + +/* + * returns + * 0 - no match + * Runi + * Rbcast + * Rmcast + */ +int +ipforme(Fs *f, uchar *addr) +{ + Ipself *p; + + p = f->self->hash[hashipa(addr)]; + for(; p; p = p->next){ + if(ipcmp(addr, p->a) == 0) + return p->type; + } + + /* hack to say accept anything */ + if(f->self->acceptall) + return Runi; + + return 0; +} + +/* + * find the ifc on same net as the remote system. If none, + * return nil. + */ +Ipifc* +findipifc(Fs *f, uchar *remote, int type) +{ + Ipifc *ifc, *x; + Iplifc *lifc; + Conv **cp, **e; + uchar gnet[IPaddrlen]; + uchar xmask[IPaddrlen]; + + x = nil; memset(xmask, 0, IPaddrlen); + + /* find most specific match */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + + ifc = (Ipifc*)(*cp)->ptcl; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(remote, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0){ + if(x == nil || ipcmp(lifc->mask, xmask) > 0){ + x = ifc; + ipmove(xmask, lifc->mask); + } + } + } + } + if(x != nil) + return x; + + /* for now for broadcast and multicast, just use first interface */ + if(type & (Rbcast|Rmulti)){ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + if(ifc->lifc != nil) + return ifc; + } + } + + return nil; +} + +enum { + unknownv6, + multicastv6, + unspecifiedv6, + linklocalv6, + sitelocalv6, + globalv6, +}; + +int +v6addrtype(uchar *addr) +{ + if(isv6global(addr)) + return globalv6; + if(islinklocal(addr)) + return linklocalv6; + if(isv6mcast(addr)) + return multicastv6; + if(issitelocal(addr)) + return sitelocalv6; + return unknownv6; +} + +#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff )) + +void +findprimaryip6(Fs *f, uchar *local) +{ + Conv **cp, **e; + Ipifc *ifc; + Iplifc *lifc; + int atype, atypel; + + ipmove(local, v6Unspecified); + atype = unspecifiedv6; + + /* find "best" (global > sitelocal > link local > unspecified) + * local address; address must be current */ + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + atypel = v6addrtype(lifc->local); + if(atypel > atype) + if(v6addrcurr(lifc)) { + ipmove(local, lifc->local); + atype = atypel; + if(atype == globalv6) + return; + } + } + } +} + +/* + * returns first ip address configured + */ +void +findprimaryip(Fs *f, uchar *local) +{ + Conv **cp, **e; + Ipifc *ifc; + Iplifc *lifc; + + /* find first ifc local address */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + ipmove(local, lifc->local); + return; + } + } +} + +/* + * find the local address 'closest' to the remote system, copy it to + * local and return the ifc for that address + */ +void +findlocalip(Fs *f, uchar *local, uchar *remote) +{ + Ipifc *ifc; + Iplifc *lifc; + Route *r; + uchar gate[IPaddrlen]; + uchar gnet[IPaddrlen]; + int version; + int atype = unspecifiedv6, atypel = unknownv6; + + USED(atype); + USED(atypel); + qlock(f->ipifc); + r = v6lookup(f, remote, nil); + version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6; + + if(r != nil){ + ifc = r->ifc; + if(r->type & Rv4) + v4tov6(gate, r->v4.gate); + else { + ipmove(gate, r->v6.gate); + ipmove(local, v6Unspecified); + } + + /* find ifc address closest to the gateway to use */ + switch(version) { + case V4: + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(gate, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0){ + ipmove(local, lifc->local); + goto out; + } + } + break; + case V6: + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + atypel = v6addrtype(lifc->local); + maskip(gate, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0) + if(atypel > atype) + if(v6addrcurr(lifc)) { + ipmove(local, lifc->local); + atype = atypel; + if(atype == globalv6) + break; + } + } + if(atype > unspecifiedv6) + goto out; + break; + default: + panic("findlocalip: version %d", version); + } + } + + switch(version){ + case V4: + findprimaryip(f, local); + break; + case V6: + findprimaryip6(f, local); + break; + default: + panic("findlocalip2: version %d", version); + } + +out: + qunlock(f->ipifc); +} + +/* + * return first v4 address associated with an interface + */ +int +ipv4local(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(isv4(lifc->local)){ + memmove(addr, lifc->local+IPv4off, IPv4addrlen); + return 1; + } + } + return 0; +} + +/* + * return first v6 address associated with an interface + */ +int +ipv6local(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(!isv4(lifc->local) && !(lifc->tentative)){ + ipmove(addr, lifc->local); + return 1; + } + } + return 0; +} + +int +ipv6anylocal(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(!isv4(lifc->local)){ + ipmove(addr, lifc->local); + return SRC_UNI; + } + } + return SRC_UNSPEC; +} + +/* + * see if this address is bound to the interface + */ +Iplifc* +iplocalonifc(Ipifc *ifc, uchar *ip) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ip, lifc->local) == 0) + return lifc; + return nil; +} + + +/* + * See if we're proxying for this address on this interface + */ +int +ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip) +{ + Route *r; + uchar net[IPaddrlen]; + Iplifc *lifc; + + /* see if this is a direct connected pt to pt address */ + r = v6lookup(f, ip, nil); + if(r == nil) + return 0; + if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy)) + return 0; + + /* see if this is on the right interface */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0) + return 1; + } + + return 0; +} + +/* + * return multicast version if any + */ +int +ipismulticast(uchar *ip) +{ + if(isv4(ip)){ + if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) + return V4; + } else { + if(ip[0] == 0xff) + return V6; + } + return 0; +} + +int +ipisbm(uchar *ip) +{ + if(isv4(ip)){ + if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) + return V4; + if(ipcmp(ip, IPv4bcast) == 0) + return V4; + } else { + if(ip[0] == 0xff) + return V6; + } + return 0; +} + + +/* + * add a multicast address to an interface, called with c locked + */ +void +ipifcaddmulti(Conv *c, uchar *ma, uchar *ia) +{ + Ipifc *ifc; + Iplifc *lifc; + Conv **p; + Ipmulti *multi, **l; + Fs *f; + + f = c->p->f; + + for(l = &c->multi; *l; l = &(*l)->next) + if(ipcmp(ma, (*l)->ma) == 0) + if(ipcmp(ia, (*l)->ia) == 0) + return; /* it's already there */ + + multi = *l = smalloc(sizeof(*multi)); + ipmove(multi->ma, ma); + ipmove(multi->ia, ia); + multi->next = nil; + + for(p = f->ipifc->conv; *p; p++){ + if((*p)->inuse == 0) + continue; + ifc = (Ipifc*)(*p)->ptcl; + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ia, lifc->local) == 0) + addselfcache(f, ifc, lifc, ma, Rmulti); + wunlock(ifc); + poperror(); + } +} + + +/* + * remove a multicast address from an interface, called with c locked + */ +void +ipifcremmulti(Conv *c, uchar *ma, uchar *ia) +{ + Ipmulti *multi, **l; + Iplifc *lifc; + Conv **p; + Ipifc *ifc; + Fs *f; + + f = c->p->f; + + for(l = &c->multi; *l; l = &(*l)->next) + if(ipcmp(ma, (*l)->ma) == 0) + if(ipcmp(ia, (*l)->ia) == 0) + break; + + multi = *l; + if(multi == nil) + return; /* we don't have it open */ + + *l = multi->next; + + for(p = f->ipifc->conv; *p; p++){ + if((*p)->inuse == 0) + continue; + + ifc = (Ipifc*)(*p)->ptcl; + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ia, lifc->local) == 0) + remselfcache(f, ifc, lifc, ma); + wunlock(ifc); + poperror(); + } + + free(multi); +} + +/* + * make lifc's join and leave multicast groups + */ +static char* +ipifcjoinmulti(Ipifc *ifc, char **argv, int argc) +{ + USED(ifc, argv, argc); + return nil; +} + +static char* +ipifcleavemulti(Ipifc *ifc, char **argv, int argc) +{ + USED(ifc, argv, argc); + return nil; +} + +static void +ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip) +{ + Conv **cp, **e; + Ipifc *nifc; + Iplifc *lifc; + Medium *m; + uchar net[IPaddrlen]; + + /* register the address on any network that will proxy for us */ + e = &f->ipifc->conv[f->ipifc->nc]; + + if(!isv4(ip)) { // V6 + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == nil) + continue; + nifc = (Ipifc*)(*cp)->ptcl; + if(nifc == ifc) + continue; + + rlock(nifc); + m = nifc->m; + if(m == nil || m->addmulti == nil) { + runlock(nifc); + continue; + } + for(lifc = nifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */ + ipv62smcast(net, ip); + addselfcache(f, nifc, lifc, net, Rmulti); + arpenter(f, V6, ip, nifc->mac, 6, 0); + //(*m->addmulti)(nifc, net, ip); + break; + } + } + runlock(nifc); + } + return; + } + else { // V4 + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == nil) + continue; + nifc = (Ipifc*)(*cp)->ptcl; + if(nifc == ifc) + continue; + + rlock(nifc); + m = nifc->m; + if(m == nil || m->areg == nil){ + runlock(nifc); + continue; + } + for(lifc = nifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0){ + (*m->areg)(nifc, ip); + break; + } + } + runlock(nifc); + } + } +} + + +// added for new v6 mesg types +static void +adddefroute6(Fs *f, uchar *gate, int force) +{ + Route *r; + + r = v6lookup(f, v6Unspecified, nil); + if(r!=nil) + if(!(force) && (strcmp(r->tag,"ra")!=0)) // route entries generated + return; // by all other means take + // precedence over router annc + + v6delroute(f, v6Unspecified, v6Unspecified, 1); + v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0); +} + +enum +{ + Ngates = 3, +}; + +char* +ipifcaddpref6(Ipifc *ifc, char**argv, int argc) +{ + uchar onlink = 1; + uchar autoflag = 1; + long validlt = 0xffffffff; + long preflt = 0xffffffff; + long origint = NOW / 10^3; + uchar prefix[IPaddrlen]; + int plen = 64; + Iplifc *lifc; + char addr[40], preflen[6]; + char *params[3]; + + switch(argc) { + case 7: + preflt = atoi(argv[6]); + /* fall through */ + case 6: + validlt = atoi(argv[5]); + /* fall through */ + case 5: + autoflag = atoi(argv[4]); + /* fall through */ + case 4: + onlink = atoi(argv[3]); + /* fall through */ + case 3: + plen = atoi(argv[2]); + case 2: + break; + default: + return Ebadarg; + } + + if((parseip(prefix, argv[1])!=6) || + (validlt < preflt) || + (plen < 0) || (plen > 64) || + (islinklocal(prefix)) + ) + return Ebadarg; + + lifc = smalloc(sizeof(Iplifc)); + lifc->onlink = (onlink!=0); + lifc->autoflag = (autoflag!=0); + lifc->validlt = validlt; + lifc->preflt = preflt; + lifc->origint = origint; + + if(ifc->m->pref2addr!=nil) + ifc->m->pref2addr(prefix, ifc->mac); + else + return Ebadarg; + + sprint(addr, "%I", prefix); + sprint(preflen, "/%d", plen); + params[0] = "add"; + params[1] = addr; + params[2] = preflen; + + return ipifcadd(ifc, params, 3, 0, lifc); +} + diff --git a/os/ip/ipmux.c b/os/ip/ipmux.c new file mode 100644 index 00000000..ef67b0fa --- /dev/null +++ b/os/ip/ipmux.c @@ -0,0 +1,857 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#define DPRINT if(0)print + +#define offsetof(s, m) (ulong)(&(((s*)0)->m)) + +typedef struct Ipmuxrock Ipmuxrock; +typedef struct Ipmux Ipmux; +typedef struct Ip4hdr Ip4hdr; +typedef struct Ip6hdr Ip6hdr; + +enum +{ + IPHDR = 20, /* sizeof(Ip4hdr) */ +}; + +struct Ip4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* ip->identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar cksum[2]; /* Header checksum */ + uchar src[4]; /* IP source */ + uchar dst[4]; /* IP destination */ + uchar data[1]; /* start of data */ +}; + +struct Ip6hdr +{ + uchar vcf[4]; /* version, class label, and flow label */ + uchar ploadlen[2]; /* payload length */ + uchar proto; /* next header, i.e. proto */ + uchar ttl; /* hop limit, i.e. ttl */ + uchar src[16]; /* IP source */ + uchar dst[16]; /* IP destination */ +}; + + +enum +{ + Tproto, + Tdata, + Tiph, + Tdst, + Tsrc, + Tifc, + + Cother = 0, + Cbyte, /* single byte */ + Cmbyte, /* single byte with mask */ + Cshort, /* single short */ + Cmshort, /* single short with mask */ + Clong, /* single long */ + Cmlong, /* single long with mask */ + Cifc, + Cmifc, +}; + +char *ftname[] = +{ +[Tproto] "proto", +[Tdata] "data", +[Tiph] "iph", +[Tdst] "dst", +[Tsrc] "src", +[Tifc] "ifc", +}; + +/* + * a node in the decision tree + */ +struct Ipmux +{ + Ipmux *yes; + Ipmux *no; + uchar type; /* type of field(Txxxx) */ + uchar ctype; /* tupe of comparison(Cxxxx) */ + uchar len; /* length in bytes of item to compare */ + uchar n; /* number of items val points to */ + short off; /* offset of comparison */ + short eoff; /* end offset of comparison */ + uchar skiphdr; /* should offset start after ipheader */ + uchar *val; + uchar *mask; + uchar *e; /* val+n*len*/ + + int ref; /* so we can garbage collect */ + Conv *conv; +}; + +/* + * someplace to hold per conversation data + */ +struct Ipmuxrock +{ + Ipmux *chain; +}; + +static int ipmuxsprint(Ipmux*, int, char*, int); +static void ipmuxkick(void *x); + +static char* +skipwhite(char *p) +{ + while(*p == ' ' || *p == '\t') + p++; + return p; +} + +static char* +follows(char *p, char c) +{ + char *f; + + f = strchr(p, c); + if(f == nil) + return nil; + *f++ = 0; + f = skipwhite(f); + if(*f == 0) + return nil; + return f; +} + +static Ipmux* +parseop(char **pp) +{ + char *p = *pp; + int type, off, end, len; + Ipmux *f; + + p = skipwhite(p); + if(strncmp(p, "dst", 3) == 0){ + type = Tdst; + off = offsetof(Ip4hdr, dst[0]); + len = IPv4addrlen; + p += 3; + } + else if(strncmp(p, "src", 3) == 0){ + type = Tsrc; + off = offsetof(Ip4hdr, src[0]); + len = IPv4addrlen; + p += 3; + } + else if(strncmp(p, "ifc", 3) == 0){ + type = Tifc; + off = -IPv4addrlen; + len = IPv4addrlen; + p += 3; + } + else if(strncmp(p, "proto", 5) == 0){ + type = Tproto; + off = offsetof(Ip4hdr, proto); + len = 1; + p += 5; + } + else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){ + if(strncmp(p, "data", 4) == 0) { + type = Tdata; + p += 4; + } + else { + type = Tiph; + p += 3; + } + p = skipwhite(p); + if(*p != '[') + return nil; + p++; + off = strtoul(p, &p, 0); + if(off < 0 || off > (64-IPHDR)) + return nil; + p = skipwhite(p); + if(*p != ':') + end = off; + else { + p++; + p = skipwhite(p); + end = strtoul(p, &p, 0); + if(end < off) + return nil; + p = skipwhite(p); + } + if(*p != ']') + return nil; + p++; + len = end - off + 1; + } + else + return nil; + + f = smalloc(sizeof(*f)); + f->type = type; + f->len = len; + f->off = off; + f->val = nil; + f->mask = nil; + f->n = 1; + f->ref = 1; + if(type == Tdata) + f->skiphdr = 1; + else + f->skiphdr = 0; + + return f; +} + +static int +htoi(char x) +{ + if(x >= '0' && x <= '9') + x -= '0'; + else if(x >= 'a' && x <= 'f') + x -= 'a' - 10; + else if(x >= 'A' && x <= 'F') + x -= 'A' - 10; + else + x = 0; + return x; +} + +static int +hextoi(char *p) +{ + return (htoi(p[0])<<4) | htoi(p[1]); +} + +static void +parseval(uchar *v, char *p, int len) +{ + while(*p && len-- > 0){ + *v++ = hextoi(p); + p += 2; + } +} + +static Ipmux* +parsemux(char *p) +{ + int n, nomask; + Ipmux *f; + char *val; + char *mask; + char *vals[20]; + uchar *v; + + /* parse operand */ + f = parseop(&p); + if(f == nil) + return nil; + + /* find value */ + val = follows(p, '='); + if(val == nil) + goto parseerror; + + /* parse mask */ + mask = follows(val, '&'); + if(mask != nil){ + switch(f->type){ + case Tsrc: + case Tdst: + case Tifc: + f->mask = smalloc(f->len); + v4parseip(f->mask, mask); + break; + case Tdata: + case Tiph: + f->mask = smalloc(f->len); + parseval(f->mask, mask, f->len); + break; + default: + goto parseerror; + } + nomask = 0; + } else { + nomask = 1; + f->mask = smalloc(f->len); + memset(f->mask, 0xff, f->len); + } + + /* parse vals */ + f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|"); + if(f->n == 0) + goto parseerror; + f->val = smalloc(f->n*f->len); + v = f->val; + for(n = 0; n < f->n; n++){ + switch(f->type){ + case Tsrc: + case Tdst: + case Tifc: + v4parseip(v, vals[n]); + break; + case Tproto: + case Tdata: + case Tiph: + parseval(v, vals[n], f->len); + break; + } + v += f->len; + } + + f->eoff = f->off + f->len; + f->e = f->val + f->n*f->len; + f->ctype = Cother; + if(f->n == 1){ + switch(f->len){ + case 1: + f->ctype = nomask ? Cbyte : Cmbyte; + break; + case 2: + f->ctype = nomask ? Cshort : Cmshort; + break; + case 4: + if(f->type == Tifc) + f->ctype = nomask ? Cifc : Cmifc; + else + f->ctype = nomask ? Clong : Cmlong; + break; + } + } + return f; + +parseerror: + if(f->mask) + free(f->mask); + if(f->val) + free(f->val); + free(f); + return nil; +} + +/* + * Compare relative ordering of two ipmuxs. This doesn't compare the + * values, just the fields being looked at. + * + * returns: <0 if a is a more specific match + * 0 if a and b are matching on the same fields + * >0 if b is a more specific match + */ +static int +ipmuxcmp(Ipmux *a, Ipmux *b) +{ + int n; + + /* compare types, lesser ones are more important */ + n = a->type - b->type; + if(n != 0) + return n; + + /* compare offsets, call earlier ones more specific */ + n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) - + (b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0])); + if(n != 0) + return n; + + /* compare match lengths, longer ones are more specific */ + n = b->len - a->len; + if(n != 0) + return n; + + /* + * if we get here we have two entries matching + * the same bytes of the record. Now check + * the mask for equality. Longer masks are + * more specific. + */ + if(a->mask != nil && b->mask == nil) + return -1; + if(a->mask == nil && b->mask != nil) + return 1; + if(a->mask != nil && b->mask != nil){ + n = memcmp(b->mask, a->mask, a->len); + if(n != 0) + return n; + } + return 0; +} + +/* + * Compare the values of two ipmuxs. We're assuming that ipmuxcmp + * returned 0 comparing them. + */ +static int +ipmuxvalcmp(Ipmux *a, Ipmux *b) +{ + int n; + + n = b->len*b->n - a->len*a->n; + if(n != 0) + return n; + return memcmp(a->val, b->val, a->len*a->n); +} + +/* + * add onto an existing ipmux chain in the canonical comparison + * order + */ +static void +ipmuxchain(Ipmux **l, Ipmux *f) +{ + for(; *l; l = &(*l)->yes) + if(ipmuxcmp(f, *l) < 0) + break; + f->yes = *l; + *l = f; +} + +/* + * copy a tree + */ +static Ipmux* +ipmuxcopy(Ipmux *f) +{ + Ipmux *nf; + + if(f == nil) + return nil; + nf = smalloc(sizeof *nf); + *nf = *f; + nf->no = ipmuxcopy(f->no); + nf->yes = ipmuxcopy(f->yes); + nf->val = smalloc(f->n*f->len); + nf->e = nf->val + f->len*f->n; + memmove(nf->val, f->val, f->n*f->len); + return nf; +} + +static void +ipmuxfree(Ipmux *f) +{ + if(f->val != nil) + free(f->val); + free(f); +} + +static void +ipmuxtreefree(Ipmux *f) +{ + if(f == nil) + return; + if(f->no != nil) + ipmuxfree(f->no); + if(f->yes != nil) + ipmuxfree(f->yes); + ipmuxfree(f); +} + +/* + * merge two trees + */ +static Ipmux* +ipmuxmerge(Ipmux *a, Ipmux *b) +{ + int n; + Ipmux *f; + + if(a == nil) + return b; + if(b == nil) + return a; + n = ipmuxcmp(a, b); + if(n < 0){ + f = ipmuxcopy(b); + a->yes = ipmuxmerge(a->yes, b); + a->no = ipmuxmerge(a->no, f); + return a; + } + if(n > 0){ + f = ipmuxcopy(a); + b->yes = ipmuxmerge(b->yes, a); + b->no = ipmuxmerge(b->no, f); + return b; + } + if(ipmuxvalcmp(a, b) == 0){ + a->yes = ipmuxmerge(a->yes, b->yes); + a->no = ipmuxmerge(a->no, b->no); + a->ref++; + ipmuxfree(b); + return a; + } + a->no = ipmuxmerge(a->no, b); + return a; +} + +/* + * remove a chain from a demux tree. This is like merging accept that + * we remove instead of insert. + */ +static int +ipmuxremove(Ipmux **l, Ipmux *f) +{ + int n, rv; + Ipmux *ft; + + if(f == nil) + return 0; /* we've removed it all */ + if(*l == nil) + return -1; + + ft = *l; + n = ipmuxcmp(ft, f); + if(n < 0){ + /* *l is maching an earlier field, descend both paths */ + rv = ipmuxremove(&ft->yes, f); + rv += ipmuxremove(&ft->no, f); + return rv; + } + if(n > 0){ + /* f represents an earlier field than *l, this should be impossible */ + return -1; + } + + /* if we get here f and *l are comparing the same fields */ + if(ipmuxvalcmp(ft, f) != 0){ + /* different values mean mutually exclusive */ + return ipmuxremove(&ft->no, f); + } + + /* we found a match */ + if(--(ft->ref) == 0){ + /* + * a dead node implies the whole yes side is also dead. + * since our chain is constrained to be on that side, + * we're done. + */ + ipmuxtreefree(ft->yes); + *l = ft->no; + ipmuxfree(ft); + return 0; + } + + /* + * free the rest of the chain. it is constrained to match the + * yes side. + */ + return ipmuxremove(&ft->yes, f->yes); +} + +/* + * connection request is a semi separated list of filters + * e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0 + * + * there's no protection against overlapping specs. + */ +static char* +ipmuxconnect(Conv *c, char **argv, int argc) +{ + int i, n; + char *field[10]; + Ipmux *mux, *chain; + Ipmuxrock *r; + Fs *f; + + f = c->p->f; + + if(argc != 2) + return Ebadarg; + + n = getfields(argv[1], field, nelem(field), 1, ";"); + if(n <= 0) + return Ebadarg; + + chain = nil; + mux = nil; + for(i = 0; i < n; i++){ + mux = parsemux(field[i]); + if(mux == nil){ + ipmuxtreefree(chain); + return Ebadarg; + } + ipmuxchain(&chain, mux); + } + if(chain == nil) + return Ebadarg; + mux->conv = c; + + /* save a copy of the chain so we can later remove it */ + mux = ipmuxcopy(chain); + r = (Ipmuxrock*)(c->ptcl); + r->chain = chain; + + /* add the chain to the protocol demultiplexor tree */ + wlock(f); + f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux); + wunlock(f); + + Fsconnected(c, nil); + return nil; +} + +static int +ipmuxstate(Conv *c, char *state, int n) +{ + Ipmuxrock *r; + + r = (Ipmuxrock*)(c->ptcl); + return ipmuxsprint(r->chain, 0, state, n); +} + +static void +ipmuxcreate(Conv *c) +{ + Ipmuxrock *r; + + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qopen(64*1024, Qkick, ipmuxkick, c); + r = (Ipmuxrock*)(c->ptcl); + r->chain = nil; +} + +static char* +ipmuxannounce(Conv*, char**, int) +{ + return "ipmux does not support announce"; +} + +static void +ipmuxclose(Conv *c) +{ + Ipmuxrock *r; + Fs *f = c->p->f; + + r = (Ipmuxrock*)(c->ptcl); + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; + + wlock(f); + ipmuxremove(&(c->p->priv), r->chain); + wunlock(f); + ipmuxtreefree(r->chain); + r->chain = nil; +} + +/* + * takes a fully formed ip packet and just passes it down + * the stack + */ +static void +ipmuxkick(void *x) +{ + Conv *c = x; + Block *bp; + + bp = qget(c->wq); + if(bp == nil) + return; + else { + Ip4hdr *ih4 = (Ip4hdr*)(bp->rp); + if((ih4->vihl)&0xF0 != 0x60) + ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil); + else { + Ip6hdr *ih6 = (Ip6hdr*)(bp->rp); + ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil); + } + } +} + +static void +ipmuxiput(Proto *p, Ipifc *ifc, Block *bp) +{ + int len, hl; + Fs *f = p->f; + uchar *m, *h, *v, *e, *ve, *hp; + Conv *c; + Ipmux *mux; + Ip4hdr *ip; + Ip6hdr *ip6; + + ip = (Ip4hdr*)bp->rp; + hl = (ip->vihl&0x0F)<<2; + + if(p->priv == nil) + goto nomatch; + + h = bp->rp; + len = BLEN(bp); + + /* run the v4 filter */ + rlock(f); + c = nil; + mux = f->ipmux->priv; + while(mux != nil){ + if(mux->eoff > len){ + mux = mux->no; + continue; + } + hp = h + mux->off + ((int)mux->skiphdr)*hl; + switch(mux->ctype){ + case Cbyte: + if(*mux->val == *hp) + goto yes; + break; + case Cmbyte: + if((*hp & *mux->mask) == *mux->val) + goto yes; + break; + case Cshort: + if(*((ushort*)mux->val) == *(ushort*)hp) + goto yes; + break; + case Cmshort: + if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val)) + goto yes; + break; + case Clong: + if(*((ulong*)mux->val) == *(ulong*)hp) + goto yes; + break; + case Cmlong: + if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val)) + goto yes; + break; + case Cifc: + if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off)) + goto yes; + break; + case Cmifc: + if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val)) + goto yes; + break; + default: + v = mux->val; + for(e = mux->e; v < e; v = ve){ + m = mux->mask; + hp = h + mux->off; + for(ve = v + mux->len; v < ve; v++){ + if((*hp++ & *m++) != *v) + break; + } + if(v == ve) + goto yes; + } + } + mux = mux->no; + continue; +yes: + if(mux->conv != nil) + c = mux->conv; + mux = mux->yes; + } + runlock(f); + + if(c != nil){ + /* tack on interface address */ + bp = padblock(bp, IPaddrlen); + ipmove(bp->rp, ifc->lifc->local); + bp = concatblock(bp); + if(bp != nil) + if(qpass(c->rq, bp) < 0) + print("Q"); + return; + } + +nomatch: + /* doesn't match any filter, hand it to the specific protocol handler */ + ip = (Ip4hdr*)bp->rp; + if((ip->vihl&0xF0)==0x40) { + p = f->t2p[ip->proto]; + } else { + ip6 = (Ip6hdr*)bp->rp; + p = f->t2p[ip6->proto]; + } + if(p && p->rcv) + (*p->rcv)(p, ifc, bp); + else + freeblist(bp); + return; +} + +static int +ipmuxsprint(Ipmux *mux, int level, char *buf, int len) +{ + int i, j, n; + uchar *v; + + n = 0; + for(i = 0; i < level; i++) + n += snprint(buf+n, len-n, " "); + if(mux == nil){ + n += snprint(buf+n, len-n, "\n"); + return n; + } + n += snprint(buf+n, len-n, "h[%d:%d]&", + mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])), + mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1); + for(i = 0; i < mux->len; i++) + n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]); + n += snprint(buf+n, len-n, "="); + v = mux->val; + for(j = 0; j < mux->n; j++){ + for(i = 0; i < mux->len; i++) + n += snprint(buf+n, len - n, "%2.2ux", *v++); + n += snprint(buf+n, len-n, "|"); + } + n += snprint(buf+n, len-n, "\n"); + level++; + n += ipmuxsprint(mux->no, level, buf+n, len-n); + n += ipmuxsprint(mux->yes, level, buf+n, len-n); + return n; +} + +static int +ipmuxstats(Proto *p, char *buf, int len) +{ + int n; + Fs *f = p->f; + + rlock(f); + n = ipmuxsprint(p->priv, 0, buf, len); + runlock(f); + + return n; +} + +void +ipmuxinit(Fs *f) +{ + Proto *ipmux; + + ipmux = smalloc(sizeof(Proto)); + ipmux->priv = nil; + ipmux->name = "ipmux"; + ipmux->connect = ipmuxconnect; + ipmux->announce = ipmuxannounce; + ipmux->state = ipmuxstate; + ipmux->create = ipmuxcreate; + ipmux->close = ipmuxclose; + ipmux->rcv = ipmuxiput; + ipmux->ctl = nil; + ipmux->advise = nil; + ipmux->stats = ipmuxstats; + ipmux->ipproto = -1; + ipmux->nc = 64; + ipmux->ptclsize = sizeof(Ipmuxrock); + + f->ipmux = ipmux; /* hack for Fsrcvpcol */ + + Fsproto(f, ipmux); +} diff --git a/os/ip/iproute.c b/os/ip/iproute.c new file mode 100644 index 00000000..3229435e --- /dev/null +++ b/os/ip/iproute.c @@ -0,0 +1,852 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void walkadd(Fs*, Route**, Route*); +static void addnode(Fs*, Route**, Route*); +static void calcd(Route*); + +/* these are used for all instances of IP */ +Route* v4freelist; +Route* v6freelist; +RWlock routelock; +ulong v4routegeneration, v6routegeneration; + +static void +freeroute(Route *r) +{ + Route **l; + + r->left = nil; + r->right = nil; + if(r->type & Rv4) + l = &v4freelist; + else + l = &v6freelist; + r->mid = *l; + *l = r; +} + +static Route* +allocroute(int type) +{ + Route *r; + int n; + Route **l; + + if(type & Rv4){ + n = sizeof(RouteTree) + sizeof(V4route); + l = &v4freelist; + } else { + n = sizeof(RouteTree) + sizeof(V6route); + l = &v6freelist; + } + + r = *l; + if(r != nil){ + *l = r->mid; + } else { + r = malloc(n); + if(r == nil) + panic("out of routing nodes"); + } + memset(r, 0, n); + r->type = type; + r->ifc = nil; + r->ref = 1; + + return r; +} + +static void +addqueue(Route **q, Route *r) +{ + Route *l; + + if(r == nil) + return; + + l = allocroute(r->type); + l->mid = *q; + *q = l; + l->left = r; +} + +/* + * compare 2 v6 addresses + */ +static int +lcmp(ulong *a, ulong *b) +{ + int i; + + for(i = 0; i < IPllen; i++){ + if(a[i] > b[i]) + return 1; + if(a[i] < b[i]) + return -1; + } + return 0; +} + +/* + * compare 2 v4 or v6 ranges + */ +enum +{ + Rpreceeds, + Rfollows, + Requals, + Rcontains, + Rcontained, +}; + +static int +rangecompare(Route *a, Route *b) +{ + if(a->type & Rv4){ + if(a->v4.endaddress < b->v4.address) + return Rpreceeds; + + if(a->v4.address > b->v4.endaddress) + return Rfollows; + + if(a->v4.address <= b->v4.address + && a->v4.endaddress >= b->v4.endaddress){ + if(a->v4.address == b->v4.address + && a->v4.endaddress == b->v4.endaddress) + return Requals; + return Rcontains; + } + return Rcontained; + } + + if(lcmp(a->v6.endaddress, b->v6.address) < 0) + return Rpreceeds; + + if(lcmp(a->v6.address, b->v6.endaddress) > 0) + return Rfollows; + + if(lcmp(a->v6.address, b->v6.address) <= 0 + && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){ + if(lcmp(a->v6.address, b->v6.address) == 0 + && lcmp(a->v6.endaddress, b->v6.endaddress) == 0) + return Requals; + return Rcontains; + } + + return Rcontained; +} + +static void +copygate(Route *old, Route *new) +{ + if(new->type & Rv4) + memmove(old->v4.gate, new->v4.gate, IPv4addrlen); + else + memmove(old->v6.gate, new->v6.gate, IPaddrlen); +} + +/* + * walk down a tree adding nodes back in + */ +static void +walkadd(Fs *f, Route **root, Route *p) +{ + Route *l, *r; + + l = p->left; + r = p->right; + p->left = 0; + p->right = 0; + addnode(f, root, p); + if(l) + walkadd(f, root, l); + if(r) + walkadd(f, root, r); +} + +/* + * calculate depth + */ +static void +calcd(Route *p) +{ + Route *q; + int d; + + if(p) { + d = 0; + q = p->left; + if(q) + d = q->depth; + q = p->right; + if(q && q->depth > d) + d = q->depth; + q = p->mid; + if(q && q->depth > d) + d = q->depth; + p->depth = d+1; + } +} + +/* + * balance the tree at the current node + */ +static void +balancetree(Route **cur) +{ + Route *p, *l, *r; + int dl, dr; + + /* + * if left and right are + * too out of balance, + * rotate tree node + */ + p = *cur; + dl = 0; if(l = p->left) dl = l->depth; + dr = 0; if(r = p->right) dr = r->depth; + + if(dl > dr+1) { + p->left = l->right; + l->right = p; + *cur = l; + calcd(p); + calcd(l); + } else + if(dr > dl+1) { + p->right = r->left; + r->left = p; + *cur = r; + calcd(p); + calcd(r); + } else + calcd(p); +} + +/* + * add a new node to the tree + */ +static void +addnode(Fs *f, Route **cur, Route *new) +{ + Route *p; + + p = *cur; + if(p == 0) { + *cur = new; + new->depth = 1; + return; + } + + switch(rangecompare(new, p)){ + case Rpreceeds: + addnode(f, &p->left, new); + break; + case Rfollows: + addnode(f, &p->right, new); + break; + case Rcontains: + /* + * if new node is superset + * of tree node, + * replace tree node and + * queue tree node to be + * merged into root. + */ + *cur = new; + new->depth = 1; + addqueue(&f->queue, p); + break; + case Requals: + /* + * supercede the old entry if the old one isn't + * a local interface. + */ + if((p->type & Rifc) == 0){ + p->type = new->type; + p->ifcid = -1; + copygate(p, new); + } else if(new->type & Rifc) + p->ref++; + freeroute(new); + break; + case Rcontained: + addnode(f, &p->mid, new); + break; + } + + balancetree(cur); +} + +#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5)) + +void +v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type) +{ + Route *p; + ulong sa; + ulong m; + ulong ea; + int h, eh; + + m = nhgetl(mask); + sa = nhgetl(a) & m; + ea = sa | ~m; + + eh = V4H(ea); + for(h=V4H(sa); h<=eh; h++) { + p = allocroute(Rv4 | type); + p->v4.address = sa; + p->v4.endaddress = ea; + memmove(p->v4.gate, gate, sizeof(p->v4.gate)); + memmove(p->tag, tag, sizeof(p->tag)); + + wlock(&routelock); + addnode(f, &f->v4root[h], p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v4root[h], p->left); + freeroute(p); + } + wunlock(&routelock); + } + v4routegeneration++; + + ipifcaddroute(f, Rv4, a, mask, gate, type); +} + +#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5)) +#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0)) + +void +v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type) +{ + Route *p; + ulong sa[IPllen], ea[IPllen]; + ulong x, y; + int h, eh; + + /* + if(ISDFLT(a, mask, tag)) + f->v6p->cdrouter = -1; + */ + + + for(h = 0; h < IPllen; h++){ + x = nhgetl(a+4*h); + y = nhgetl(mask+4*h); + sa[h] = x & y; + ea[h] = x | ~y; + } + + eh = V6H(ea); + for(h = V6H(sa); h <= eh; h++) { + p = allocroute(type); + memmove(p->v6.address, sa, IPaddrlen); + memmove(p->v6.endaddress, ea, IPaddrlen); + memmove(p->v6.gate, gate, IPaddrlen); + memmove(p->tag, tag, sizeof(p->tag)); + + wlock(&routelock); + addnode(f, &f->v6root[h], p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v6root[h], p->left); + freeroute(p); + } + wunlock(&routelock); + } + v6routegeneration++; + + ipifcaddroute(f, 0, a, mask, gate, type); +} + +Route** +looknode(Route **cur, Route *r) +{ + Route *p; + + for(;;){ + p = *cur; + if(p == 0) + return 0; + + switch(rangecompare(r, p)){ + case Rcontains: + return 0; + case Rpreceeds: + cur = &p->left; + break; + case Rfollows: + cur = &p->right; + break; + case Rcontained: + cur = &p->mid; + break; + case Requals: + return cur; + } + } +} + +void +v4delroute(Fs *f, uchar *a, uchar *mask, int dolock) +{ + Route **r, *p; + Route rt; + int h, eh; + ulong m; + + m = nhgetl(mask); + rt.v4.address = nhgetl(a) & m; + rt.v4.endaddress = rt.v4.address | ~m; + rt.type = Rv4; + + eh = V4H(rt.v4.endaddress); + for(h=V4H(rt.v4.address); h<=eh; h++) { + if(dolock) + wlock(&routelock); + r = looknode(&f->v4root[h], &rt); + if(r) { + p = *r; + if(--(p->ref) == 0){ + *r = 0; + addqueue(&f->queue, p->left); + addqueue(&f->queue, p->mid); + addqueue(&f->queue, p->right); + freeroute(p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v4root[h], p->left); + freeroute(p); + } + } + } + if(dolock) + wunlock(&routelock); + } + v4routegeneration++; + + ipifcremroute(f, Rv4, a, mask); +} + +void +v6delroute(Fs *f, uchar *a, uchar *mask, int dolock) +{ + Route **r, *p; + Route rt; + int h, eh; + ulong x, y; + + for(h = 0; h < IPllen; h++){ + x = nhgetl(a+4*h); + y = nhgetl(mask+4*h); + rt.v6.address[h] = x & y; + rt.v6.endaddress[h] = x | ~y; + } + rt.type = 0; + + eh = V6H(rt.v6.endaddress); + for(h=V6H(rt.v6.address); h<=eh; h++) { + if(dolock) + wlock(&routelock); + r = looknode(&f->v6root[h], &rt); + if(r) { + p = *r; + if(--(p->ref) == 0){ + *r = 0; + addqueue(&f->queue, p->left); + addqueue(&f->queue, p->mid); + addqueue(&f->queue, p->right); + freeroute(p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v6root[h], p->left); + freeroute(p); + } + } + } + if(dolock) + wunlock(&routelock); + } + v6routegeneration++; + + ipifcremroute(f, 0, a, mask); +} + +Route* +v4lookup(Fs *f, uchar *a, Conv *c) +{ + Route *p, *q; + ulong la; + uchar gate[IPaddrlen]; + Ipifc *ifc; + + if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration) + return c->r; + + la = nhgetl(a); + q = nil; + for(p=f->v4root[V4H(la)]; p;) + if(la >= p->v4.address) { + if(la <= p->v4.endaddress) { + q = p; + p = p->mid; + } else + p = p->right; + } else + p = p->left; + + if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){ + if(q->type & Rifc) { + hnputl(gate+IPv4off, q->v4.address); + memmove(gate, v4prefix, IPv4off); + } else + v4tov6(gate, q->v4.gate); + ifc = findipifc(f, gate, q->type); + if(ifc == nil) + return nil; + q->ifc = ifc; + q->ifcid = ifc->ifcid; + } + + if(c != nil){ + c->r = q; + c->rgen = v4routegeneration; + } + + return q; +} + +Route* +v6lookup(Fs *f, uchar *a, Conv *c) +{ + Route *p, *q; + ulong la[IPllen]; + int h; + ulong x, y; + uchar gate[IPaddrlen]; + Ipifc *ifc; + + if(memcmp(a, v4prefix, IPv4off) == 0){ + q = v4lookup(f, a+IPv4off, c); + if(q != nil) + return q; + } + + if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration) + return c->r; + + for(h = 0; h < IPllen; h++) + la[h] = nhgetl(a+4*h); + + q = 0; + for(p=f->v6root[V6H(la)]; p;){ + for(h = 0; h < IPllen; h++){ + x = la[h]; + y = p->v6.address[h]; + if(x == y) + continue; + if(x < y){ + p = p->left; + goto next; + } + break; + } + for(h = 0; h < IPllen; h++){ + x = la[h]; + y = p->v6.endaddress[h]; + if(x == y) + continue; + if(x > y){ + p = p->right; + goto next; + } + break; + } + q = p; + p = p->mid; +next: ; + } + + if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){ + if(q->type & Rifc) { + for(h = 0; h < IPllen; h++) + hnputl(gate+4*h, q->v6.address[h]); + ifc = findipifc(f, gate, q->type); + } else + ifc = findipifc(f, q->v6.gate, q->type); + if(ifc == nil) + return nil; + q->ifc = ifc; + q->ifcid = ifc->ifcid; + } + if(c != nil){ + c->r = q; + c->rgen = v6routegeneration; + } + + return q; +} + +void +routetype(int type, char *p) +{ + memset(p, ' ', 4); + p[4] = 0; + if(type & Rv4) + *p++ = '4'; + else + *p++ = '6'; + if(type & Rifc) + *p++ = 'i'; + if(type & Runi) + *p++ = 'u'; + else if(type & Rbcast) + *p++ = 'b'; + else if(type & Rmulti) + *p++ = 'm'; + if(type & Rptpt) + *p = 'p'; +} + +char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n"; + +void +convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc) +{ + int i; + + if(r->type & Rv4){ + memmove(addr, v4prefix, IPv4off); + hnputl(addr+IPv4off, r->v4.address); + memset(mask, 0xff, IPv4off); + hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address)); + memmove(gate, v4prefix, IPv4off); + memmove(gate+IPv4off, r->v4.gate, IPv4addrlen); + } else { + for(i = 0; i < IPllen; i++){ + hnputl(addr + 4*i, r->v6.address[i]); + hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i])); + } + memmove(gate, r->v6.gate, IPaddrlen); + } + + routetype(r->type, t); + + if(r->ifc) + *nifc = r->ifc->conv->x; + else + *nifc = -1; +} + +/* + * this code is not in rr to reduce stack size + */ +static void +sprintroute(Route *r, Routewalk *rw) +{ + int nifc, n; + char t[5], *iname, ifbuf[5]; + uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen]; + char *p; + + convroute(r, addr, mask, gate, t, &nifc); + iname = "-"; + if(nifc != -1) { + iname = ifbuf; + sprint(ifbuf, "%d", nifc); + } + p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname); + if(rw->o < 0){ + n = p - rw->p; + if(n > -rw->o){ + memmove(rw->p, rw->p-rw->o, n+rw->o); + rw->p = p + rw->o; + } + rw->o += n; + } else + rw->p = p; +} + +/* + * recurse descending tree, applying the function in Routewalk + */ +static int +rr(Route *r, Routewalk *rw) +{ + int h; + + if(rw->e <= rw->p) + return 0; + if(r == nil) + return 1; + + if(rr(r->left, rw) == 0) + return 0; + + if(r->type & Rv4) + h = V4H(r->v4.address); + else + h = V6H(r->v6.address); + + if(h == rw->h) + rw->walk(r, rw); + + if(rr(r->mid, rw) == 0) + return 0; + + return rr(r->right, rw); +} + +void +ipwalkroutes(Fs *f, Routewalk *rw) +{ + rlock(&routelock); + if(rw->e > rw->p) { + for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++) + if(rr(f->v4root[rw->h], rw) == 0) + break; + } + if(rw->e > rw->p) { + for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++) + if(rr(f->v6root[rw->h], rw) == 0) + break; + } + runlock(&routelock); +} + +long +routeread(Fs *f, char *p, ulong offset, int n) +{ + Routewalk rw; + + rw.p = p; + rw.e = p+n; + rw.o = -offset; + rw.walk = sprintroute; + + ipwalkroutes(f, &rw); + + return rw.p - p; +} + +/* + * this code is not in routeflush to reduce stack size + */ +void +delroute(Fs *f, Route *r, int dolock) +{ + uchar addr[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar gate[IPaddrlen]; + char t[5]; + int nifc; + + convroute(r, addr, mask, gate, t, &nifc); + if(r->type & Rv4) + v4delroute(f, addr+IPv4off, mask+IPv4off, dolock); + else + v6delroute(f, addr, mask, dolock); +} + +/* + * recurse until one route is deleted + * returns 0 if nothing is deleted, 1 otherwise + */ +int +routeflush(Fs *f, Route *r, char *tag) +{ + if(r == nil) + return 0; + if(routeflush(f, r->mid, tag)) + return 1; + if(routeflush(f, r->left, tag)) + return 1; + if(routeflush(f, r->right, tag)) + return 1; + if((r->type & Rifc) == 0){ + if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){ + delroute(f, r, 0); + return 1; + } + } + return 0; +} + +long +routewrite(Fs *f, Chan *c, char *p, int n) +{ + int h, changed; + char *tag; + Cmdbuf *cb; + uchar addr[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar gate[IPaddrlen]; + IPaux *a, *na; + + cb = parsecmd(p, n); + if(waserror()){ + free(cb); + nexterror(); + } + + if(strcmp(cb->f[0], "flush") == 0){ + tag = cb->f[1]; + for(h = 0; h < nelem(f->v4root); h++) + for(changed = 1; changed;){ + wlock(&routelock); + changed = routeflush(f, f->v4root[h], tag); + wunlock(&routelock); + } + for(h = 0; h < nelem(f->v6root); h++) + for(changed = 1; changed;){ + wlock(&routelock); + changed = routeflush(f, f->v6root[h], tag); + wunlock(&routelock); + } + } else if(strcmp(cb->f[0], "remove") == 0){ + if(cb->nf < 3) + error(Ebadarg); + parseip(addr, cb->f[1]); + parseipmask(mask, cb->f[2]); + if(memcmp(addr, v4prefix, IPv4off) == 0) + v4delroute(f, addr+IPv4off, mask+IPv4off, 1); + else + v6delroute(f, addr, mask, 1); + } else if(strcmp(cb->f[0], "add") == 0){ + if(cb->nf < 4) + error(Ebadarg); + parseip(addr, cb->f[1]); + parseipmask(mask, cb->f[2]); + parseip(gate, cb->f[3]); + tag = "none"; + if(c != nil){ + a = c->aux; + tag = a->tag; + } + if(memcmp(addr, v4prefix, IPv4off) == 0) + v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0); + else + v6addroute(f, tag, addr, mask, gate, 0); + } else if(strcmp(cb->f[0], "tag") == 0) { + if(cb->nf < 2) + error(Ebadarg); + + a = c->aux; + na = newipaux(a->owner, cb->f[1]); + c->aux = na; + free(a); + } + + poperror(); + free(cb); + return n; +} diff --git a/os/ip/iprouter.c b/os/ip/iprouter.c new file mode 100644 index 00000000..631e728b --- /dev/null +++ b/os/ip/iprouter.c @@ -0,0 +1,56 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +IProuter iprouter; + +/* + * User level routing. Ip packets we don't know what to do with + * come here. + */ +void +useriprouter(Fs *f, Ipifc *ifc, Block *bp) +{ + qlock(&f->iprouter); + if(f->iprouter.q != nil){ + bp = padblock(bp, IPaddrlen); + if(bp == nil) + return; + ipmove(bp->rp, ifc->lifc->local); + qpass(f->iprouter.q, bp); + }else + freeb(bp); + qunlock(&f->iprouter); +} + +void +iprouteropen(Fs *f) +{ + qlock(&f->iprouter); + f->iprouter.opens++; + if(f->iprouter.q == nil) + f->iprouter.q = qopen(64*1024, 0, 0, 0); + else if(f->iprouter.opens == 1) + qreopen(f->iprouter.q); + qunlock(&f->iprouter); +} + +void +iprouterclose(Fs *f) +{ + qlock(&f->iprouter); + f->iprouter.opens--; + if(f->iprouter.opens == 0) + qclose(f->iprouter.q); + qunlock(&f->iprouter); +} + +long +iprouterread(Fs *f, void *a, int n) +{ + return qread(f->iprouter.q, a, n); +} diff --git a/os/ip/ipv6.c b/os/ip/ipv6.c new file mode 100644 index 00000000..03f5e2df --- /dev/null +++ b/os/ip/ipv6.c @@ -0,0 +1,747 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +enum +{ + IP4HDR = 20, /* sizeof(Ip4hdr) */ + IP6HDR = 40, /* sizeof(Ip6hdr) */ + IP_HLEN4 = 0x05, /* Header length in words */ + IP_DF = 0x4000, /* Don't fragment */ + IP_MF = 0x2000, /* More fragments */ + IP6FHDR = 8, /* sizeof(Fraghdr6) */ + IP_MAX = (32*1024), /* Maximum Internet packet size */ +}; + +#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2) +#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0) +/* + * This sleazy macro is stolen shamelessly from ip.c, see comment there. + */ +#define BKFG(xp) ((Ipfrag*)((xp)->base)) + +typedef struct IP IP; +typedef struct Fragment4 Fragment4; +typedef struct Fragment6 Fragment6; +typedef struct Ipfrag Ipfrag; + +Block* ip6reassemble(IP*, int, Block*, Ip6hdr*); +void ipfragfree6(IP*, Fragment6*); +Fragment6* ipfragallo6(IP*); +static Block* procxtns(IP *ip, Block *bp, int doreasm); +int unfraglen(Block *bp, uchar *nexthdr, int setfh); +Block* procopts(Block *bp); + +/* MIB II counters */ +enum +{ + Forwarding, + DefaultTTL, + InReceives, + InHdrErrors, + InAddrErrors, + ForwDatagrams, + InUnknownProtos, + InDiscards, + InDelivers, + OutRequests, + OutDiscards, + OutNoRoutes, + ReasmTimeout, + ReasmReqds, + ReasmOKs, + ReasmFails, + FragOKs, + FragFails, + FragCreates, + + Nstats, +}; + +static char *statnames[] = +{ +[Forwarding] "Forwarding", +[DefaultTTL] "DefaultTTL", +[InReceives] "InReceives", +[InHdrErrors] "InHdrErrors", +[InAddrErrors] "InAddrErrors", +[ForwDatagrams] "ForwDatagrams", +[InUnknownProtos] "InUnknownProtos", +[InDiscards] "InDiscards", +[InDelivers] "InDelivers", +[OutRequests] "OutRequests", +[OutDiscards] "OutDiscards", +[OutNoRoutes] "OutNoRoutes", +[ReasmTimeout] "ReasmTimeout", +[ReasmReqds] "ReasmReqds", +[ReasmOKs] "ReasmOKs", +[ReasmFails] "ReasmFails", +[FragOKs] "FragOKs", +[FragFails] "FragFails", +[FragCreates] "FragCreates", +}; + +struct Fragment4 +{ + Block* blist; + Fragment4* next; + ulong src; + ulong dst; + ushort id; + ulong age; +}; + +struct Fragment6 +{ + Block* blist; + Fragment6* next; + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; + uint id; + ulong age; +}; + +struct Ipfrag +{ + ushort foff; + ushort flen; +}; + +/* an instance of IP */ +struct IP +{ + ulong stats[Nstats]; + + QLock fraglock4; + Fragment4* flisthead4; + Fragment4* fragfree4; + Ref id4; + + QLock fraglock6; + Fragment6* flisthead6; + Fragment6* fragfree6; + Ref id6; + + int iprouting; /* true if we route like a gateway */ +}; + +int +ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c) +{ + int tentative; + Ipifc *ifc; + uchar *gate, nexthdr; + Ip6hdr *eh; + int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen; + Route *r, *sr; + Fraghdr6 fraghdr; + Block *xp, *nb; + IP *ip; + int rv = 0; + + ip = f->ip; + + /* Fill out the ip header */ + eh = (Ip6hdr*)(bp->rp); + + ip->stats[OutRequests]++; + + /* Number of uchars in data and ip header to write */ + len = blocklen(bp); + + tentative = iptentative(f, eh->src); + if(tentative){ + netlog(f, Logip, "reject tx of packet with tentative src address\n"); + goto free; + } + + if(gating){ + chunk = nhgets(eh->ploadlen); + if(chunk > len){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "short gated packet\n"); + goto free; + } + if(chunk + IPV6HDR_LEN < len) + len = chunk + IPV6HDR_LEN; + } + + if(len >= IP_MAX){ +// print("len > IP_MAX, free\n"); + ip->stats[OutDiscards]++; + netlog(f, Logip, "exceeded ip max size %I\n", eh->dst); + goto free; + } + + r = v6lookup(f, eh->dst, c); + if(r == nil){ +// print("no route for %I, src %I free\n", eh->dst, eh->src); + ip->stats[OutNoRoutes]++; + netlog(f, Logip, "no interface %I\n", eh->dst); + rv = -1; + goto free; + } + + ifc = r->ifc; + if(r->type & (Rifc|Runi)) + gate = eh->dst; + else + if(r->type & (Rbcast|Rmulti)) { + gate = eh->dst; + sr = v6lookup(f, eh->src, nil); + if(sr != nil && (sr->type & Runi)) + ifc = sr->ifc; + } + else + gate = r->v6.gate; + + if(!gating) + eh->vcf[0] = IP_VER6; + eh->ttl = ttl; + if(!gating) { + eh->vcf[0] |= (tos >> 4); + eh->vcf[1] = (tos << 4); + } + + if(!canrlock(ifc)) { + goto free; + } + + if(waserror()){ + runlock(ifc); + nexterror(); + } + + if(ifc->m == nil) { + goto raise; + } + + /* If we dont need to fragment just send it */ + medialen = ifc->maxtu - ifc->m->hsize; + if(len <= medialen) { + hnputs(eh->ploadlen, len-IPV6HDR_LEN); + ifc->m->bwrite(ifc, bp, V6, gate); + runlock(ifc); + poperror(); + return 0; + } + + if(gating) + if(ifc->reassemble <= 0) { + + /* v6 intermediate nodes are not supposed to fragment pkts; + we fragment if ifc->reassemble is turned on; an exception + needed for nat. + */ + + ip->stats[OutDiscards]++; + icmppkttoobig6(f, ifc, bp); + netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst); + goto raise; + } + + /* start v6 fragmentation */ + uflen = unfraglen(bp, &nexthdr, 1); + if(uflen > medialen) { + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst); + goto raise; + } + + flen = len - uflen; + seglen = (medialen - (uflen + IP6FHDR)) & ~7; + if(seglen < 8) { + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%I: seglen < 8\n", eh->dst); + goto raise; + } + + lid = incref(&ip->id6); + fraghdr.nexthdr = nexthdr; + fraghdr.res = 0; + hnputl(fraghdr.id, lid); + + xp = bp; + offset = uflen; + while (xp != nil && offset && offset >= BLEN(xp)) { + offset -= BLEN(xp); + xp = xp->next; + } + xp->rp += offset; + + fragoff = 0; + morefrags = 1; + + for(; fragoff < flen; fragoff += seglen) { + nb = allocb(uflen + IP6FHDR + seglen); + + if(fragoff + seglen >= flen) { + seglen = flen - fragoff; + morefrags = 0; + } + + hnputs(eh->ploadlen, seglen+IP6FHDR); + memmove(nb->wp, eh, uflen); + nb->wp += uflen; + + hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0 + fraghdr.offsetRM[1] |= morefrags; + memmove(nb->wp, &fraghdr, IP6FHDR); + nb->wp += IP6FHDR; + + /* Copy data */ + chunk = seglen; + while (chunk) { + if(!xp) { + ip->stats[OutDiscards]++; + ip->stats[FragFails]++; + freeblist(nb); + netlog(f, Logip, "!xp: chunk in v6%d\n", chunk); + goto raise; + } + blklen = chunk; + if(BLEN(xp) < chunk) + blklen = BLEN(xp); + memmove(nb->wp, xp->rp, blklen); + + nb->wp += blklen; + xp->rp += blklen; + chunk -= blklen; + if(xp->rp == xp->wp) + xp = xp->next; + } + + ifc->m->bwrite(ifc, nb, V6, gate); + ip->stats[FragCreates]++; + } + ip->stats[FragOKs]++; + +raise: + runlock(ifc); + poperror(); +free: + freeblist(bp); + return rv; +} + +void +ipiput6(Fs *f, Ipifc *ifc, Block *bp) +{ + int hl; + int hop, tos; + uchar proto; + Ip6hdr *h; + Proto *p; + int notforme; + int tentative; + uchar v6dst[IPaddrlen]; + IP *ip; + Route *r, *sr; + + ip = f->ip; + ip->stats[InReceives]++; + + /* + * Ensure we have all the header info in the first + * block. Make life easier for other protocols by + * collecting up to the first 64 bytes in the first block. + */ + if(BLEN(bp) < 64) { + hl = blocklen(bp); + if(hl < IP6HDR) + hl = IP6HDR; + if(hl > 64) + hl = 64; + bp = pullupblock(bp, hl); + if(bp == nil) + return; + } + + h = (Ip6hdr *)(bp->rp); + + memmove(&v6dst[0], &(h->dst)[0], IPaddrlen); + notforme = ipforme(f, v6dst) == 0; + tentative = iptentative(f, v6dst); + + if(tentative && (h->proto != ICMPv6)) { + print("tentative addr, drop\n"); + freeblist(bp); + return; + } + + /* Check header version */ + if(BLKIPVER(bp) != IP_VER6) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2); + freeblist(bp); + return; + } + + /* route */ + if(notforme) { + if(!ip->iprouting){ + freeb(bp); + return; + } + /* don't forward to source's network */ + sr = v6lookup(f, h->src, nil); + r = v6lookup(f, h->dst, nil); + + if(r == nil || sr == r){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward if packet has timed out */ + hop = h->ttl; + if(hop < 1) { + ip->stats[InHdrErrors]++; + icmpttlexceeded6(f, ifc, bp); + freeblist(bp); + return; + } + + /* process headers & reassemble if the interface expects it */ + bp = procxtns(ip, bp, r->ifc->reassemble); + + if(bp == nil) + return; + + ip->stats[ForwDatagrams]++; + h = (Ip6hdr *) (bp->rp); + tos = IPV6CLASS(h); + hop = h->ttl; + ipoput6(f, bp, 1, hop-1, tos, nil); + return; + } + + /* reassemble & process headers if needed */ + bp = procxtns(ip, bp, 1); + + if(bp == nil) + return; + + h = (Ip6hdr *) (bp->rp); + proto = h->proto; + p = Fsrcvpcol(f, proto); + if(p != nil && p->rcv != nil) { + ip->stats[InDelivers]++; + (*p->rcv)(p, ifc, bp); + return; + } + + ip->stats[InDiscards]++; + ip->stats[InUnknownProtos]++; + freeblist(bp); +} + +/* + * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6 + */ +void +ipfragfree6(IP *ip, Fragment6 *frag) +{ + Fragment6 *fl, **l; + + if(frag->blist) + freeblist(frag->blist); + + memset(frag->src, 0, IPaddrlen); + frag->id = 0; + frag->blist = nil; + + l = &ip->flisthead6; + for(fl = *l; fl; fl = fl->next) { + if(fl == frag) { + *l = frag->next; + break; + } + l = &fl->next; + } + + frag->next = ip->fragfree6; + ip->fragfree6 = frag; + +} + +/* + * ipfragallo6 - copied from ipfragalloc4 + */ +Fragment6* +ipfragallo6(IP *ip) +{ + Fragment6 *f; + + while(ip->fragfree6 == nil) { + /* free last entry on fraglist */ + for(f = ip->flisthead6; f->next; f = f->next) + ; + ipfragfree6(ip, f); + } + f = ip->fragfree6; + ip->fragfree6 = f->next; + f->next = ip->flisthead6; + ip->flisthead6 = f; + f->age = NOW + 30000; + + return f; +} + +static Block* +procxtns(IP *ip, Block *bp, int doreasm) { + + int offset; + uchar proto; + Ip6hdr *h; + + h = (Ip6hdr *) (bp->rp); + offset = unfraglen(bp, &proto, 0); + + if((proto == FH) && (doreasm != 0)) { + bp = ip6reassemble(ip, offset, bp, h); + if(bp == nil) + return nil; + offset = unfraglen(bp, &proto, 0); + } + + if(proto == DOH || offset > IP6HDR) + bp = procopts(bp); + + return bp; +} + + +/* returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr, + * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value + * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr + * field of the last header in the "Unfragmentable part" is set to FH. + */ +int +unfraglen(Block *bp, uchar *nexthdr, int setfh) +{ + uchar *p, *q; + int ufl, hs; + + p = bp->rp; + q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */ + *nexthdr = *q; + ufl = IP6HDR; + p += ufl; + + for(;;) { + if(*nexthdr == HBH || *nexthdr == RH) { + *nexthdr = *p; + hs = ((int)*(p+1) + 1) * 8; + ufl += hs; + q = p; + p += hs; + } + else + break; + } + + if(*nexthdr == FH) + *q = *p; + + if(setfh) + *q = FH; + + return ufl; +} + +Block* +procopts(Block *bp) +{ + return bp; +} + +Block* +ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih) +{ + + int fend, offset; + uint id; + Fragment6 *f, *fnext; + Fraghdr6 *fraghdr; + uchar src[IPaddrlen], dst[IPaddrlen]; + Block *bl, **l, *last, *prev; + int ovlap, len, fragsize, pktposn; + + fraghdr = (Fraghdr6 *) (bp->rp + uflen); + memmove(src, ih->src, IPaddrlen); + memmove(dst, ih->dst, IPaddrlen); + id = nhgetl(fraghdr->id); + offset = nhgets(fraghdr->offsetRM) & ~7; + + /* + * block lists are too hard, pullupblock into a single block + */ + if(bp->next){ + bp = pullupblock(bp, blocklen(bp)); + ih = (Ip6hdr *)(bp->rp); + } + + + qlock(&ip->fraglock6); + + /* + * find a reassembly queue for this fragment + */ + for(f = ip->flisthead6; f; f = fnext){ + fnext = f->next; + if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id) + break; + if(f->age < NOW){ + ip->stats[ReasmTimeout]++; + ipfragfree6(ip, f); + } + } + + + /* + * if this isn't a fragmented packet, accept it + * and get rid of any fragments that might go + * with it. + */ + if(nhgets(fraghdr->offsetRM)==0) { // first frag is also the last + if(f != nil) { + ipfragfree6(ip, f); + ip->stats[ReasmFails]++; + } + qunlock(&ip->fraglock6); + return bp; + } + + if(bp->base+sizeof(Ipfrag) >= bp->rp){ + bp = padblock(bp, sizeof(Ipfrag)); + bp->rp += sizeof(Ipfrag); + } + + BKFG(bp)->foff = offset; + BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR; + + /* First fragment allocates a reassembly queue */ + if(f == nil) { + f = ipfragallo6(ip); + f->id = id; + memmove(f->src, src, IPaddrlen); + memmove(f->dst, dst, IPaddrlen); + + f->blist = bp; + + qunlock(&ip->fraglock6); + ip->stats[ReasmReqds]++; + return nil; + } + + /* + * find the new fragment's position in the queue + */ + prev = nil; + l = &f->blist; + bl = f->blist; + while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) { + prev = bl; + l = &bl->next; + bl = bl->next; + } + + /* Check overlap of a previous fragment - trim away as necessary */ + if(prev) { + ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff; + if(ovlap > 0) { + if(ovlap >= BKFG(bp)->flen) { + freeblist(bp); + qunlock(&ip->fraglock6); + return nil; + } + BKFG(prev)->flen -= ovlap; + } + } + + /* Link onto assembly queue */ + bp->next = *l; + *l = bp; + + /* Check to see if succeeding segments overlap */ + if(bp->next) { + l = &bp->next; + fend = BKFG(bp)->foff + BKFG(bp)->flen; + + /* Take completely covered segments out */ + + while(*l) { + ovlap = fend - BKFG(*l)->foff; + + if(ovlap <= 0) + break; + if(ovlap < BKFG(*l)->flen) { + BKFG(*l)->flen -= ovlap; + BKFG(*l)->foff += ovlap; + /* move up ih hdrs */ + memmove((*l)->rp + ovlap, (*l)->rp, uflen); + (*l)->rp += ovlap; + break; + } + last = (*l)->next; + (*l)->next = nil; + freeblist(*l); + *l = last; + } + } + + /* + * look for a complete packet. if we get to a fragment + * with the trailing bit of fraghdr->offsetRM[1] set, we're done. + */ + pktposn = 0; + for(bl = f->blist; bl; bl = bl->next) { + if(BKFG(bl)->foff != pktposn) + break; + + fraghdr = (Fraghdr6 *) (bl->rp + uflen); + if((fraghdr->offsetRM[1] & 1) == 0) { + + bl = f->blist; + + /* get rid of frag header in first fragment */ + + memmove(bl->rp + IP6FHDR, bl->rp, uflen); + bl->rp += IP6FHDR; + len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR; + bl->wp = bl->rp + len + IP6HDR; + + /* Pullup all the fragment headers and + * return a complete packet + */ + for(bl = bl->next; bl; bl = bl->next) { + fragsize = BKFG(bl)->flen; + len += fragsize; + bl->rp += uflen + IP6FHDR; + bl->wp = bl->rp + fragsize; + } + + bl = f->blist; + f->blist = nil; + ipfragfree6(ip, f); + ih = (Ip6hdr*)(bl->rp); + hnputs(ih->ploadlen, len); + qunlock(&ip->fraglock6); + ip->stats[ReasmOKs]++; + return bl; + } + pktposn += BKFG(bl)->flen; + } + qunlock(&ip->fraglock6); + return nil; +} + diff --git a/os/ip/ipv6.h b/os/ip/ipv6.h new file mode 100644 index 00000000..8da63cfd --- /dev/null +++ b/os/ip/ipv6.h @@ -0,0 +1,185 @@ +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +/* rfc 3513 defines the address prefices */ +#define isv6mcast(addr) ((addr)[0] == 0xff) +#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80) +#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0) +#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20) + +#define optexsts(np) (nhgets((np)->ploadlen) > 24) +#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0) + +/* from RFC 2460 */ + +typedef struct Ip6hdr Ip6hdr; +typedef struct Opthdr Opthdr; +typedef struct Routinghdr Routinghdr; +typedef struct Fraghdr6 Fraghdr6; + +struct Ip6hdr { + uchar vcf[4]; // version:4, traffic class:8, flow label:20 + uchar ploadlen[2]; // payload length: packet length - 40 + uchar proto; // next header type + uchar ttl; // hop limit + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; +}; + +struct Opthdr { + uchar nexthdr; + uchar len; +}; + +struct Routinghdr { + uchar nexthdr; + uchar len; + uchar rtetype; + uchar segrem; +}; + +struct Fraghdr6 { + uchar nexthdr; + uchar res; + uchar offsetRM[2]; // Offset, Res, M flag + uchar id[4]; +}; + + +enum { /* Header Types */ + HBH = 0, //? + ICMP = 1, + IGMP = 2, + GGP = 3, + IPINIP = 4, + ST = 5, + TCP = 6, + UDP = 17, + ISO_TP4 = 29, + RH = 43, + FH = 44, + IDRP = 45, + RSVP = 46, + AH = 51, + ESP = 52, + ICMPv6 = 58, + NNH = 59, + DOH = 60, + ISO_IP = 80, + IGRP = 88, + OSPF = 89, + + Maxhdrtype = 256, +}; + + +enum { + // multicast flgs and scop + + well_known_flg = 0, + transient_flg = 1, + + node_local_scop = 1, + link_local_scop = 2, + site_local_scop = 5, + org_local_scop = 8, + global_scop = 14, + + // various prefix lengths + + SOLN_PREF_LEN = 13, + + // icmpv6 unreach codes + icmp6_no_route = 0, + icmp6_ad_prohib = 1, + icmp6_unassigned = 2, + icmp6_adr_unreach = 3, + icmp6_port_unreach = 4, + icmp6_unkn_code = 5, + + // various flags & constants + + v6MINTU = 1280, + HOP_LIMIT = 255, + ETHERHDR_LEN = 14, + IPV6HDR_LEN = 40, + IPV4HDR_LEN = 20, + + // option types + + SRC_LLADDRESS = 1, + TARGET_LLADDRESS = 2, + PREFIX_INFO = 3, + REDIR_HEADER = 4, + MTU_OPTION = 5, + + SRC_UNSPEC = 0, + SRC_UNI = 1, + TARG_UNI = 2, + TARG_MULTI = 3, + + t_unitent = 1, + t_uniproxy = 2, + t_unirany = 3, + + // Router constants (all times in milliseconds) + + MAX_INITIAL_RTR_ADVERT_INTERVAL = 16000, + MAX_INITIAL_RTR_ADVERTISEMENTS = 3, + MAX_FINAL_RTR_ADVERTISEMENTS = 3, + MIN_DELAY_BETWEEN_RAS = 3000, + MAX_RA_DELAY_TIME = 500, + + // Host constants + + MAX_RTR_SOLICITATION_DELAY = 1000, + RTR_SOLICITATION_INTERVAL = 4000, + MAX_RTR_SOLICITATIONS = 3, + + // Node constants + + MAX_MULTICAST_SOLICIT = 3, + MAX_UNICAST_SOLICIT = 3, + MAX_ANYCAST_DELAY_TIME = 1000, + MAX_NEIGHBOR_ADVERTISEMENT = 3, + REACHABLE_TIME = 30000, + RETRANS_TIMER = 1000, + DELAY_FIRST_PROBE_TIME = 5000, + +}; + +extern void ipv62smcast(uchar *, uchar *); +extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac); +extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags); +extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp); +extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp); +extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free); + +extern uchar v6allnodesN[IPaddrlen]; +extern uchar v6allnodesL[IPaddrlen]; +extern uchar v6allroutersN[IPaddrlen]; +extern uchar v6allroutersL[IPaddrlen]; +extern uchar v6allnodesNmask[IPaddrlen]; +extern uchar v6allnodesLmask[IPaddrlen]; +extern uchar v6allroutersS[IPaddrlen]; +extern uchar v6solicitednode[IPaddrlen]; +extern uchar v6solicitednodemask[IPaddrlen]; +extern uchar v6Unspecified[IPaddrlen]; +extern uchar v6loopback[IPaddrlen]; +extern uchar v6loopbackmask[IPaddrlen]; +extern uchar v6linklocal[IPaddrlen]; +extern uchar v6linklocalmask[IPaddrlen]; +extern uchar v6sitelocal[IPaddrlen]; +extern uchar v6sitelocalmask[IPaddrlen]; +extern uchar v6glunicast[IPaddrlen]; +extern uchar v6multicast[IPaddrlen]; +extern uchar v6multicastmask[IPaddrlen]; + +extern int v6llpreflen; +extern int v6slpreflen; +extern int v6lbpreflen; +extern int v6mcpreflen; +extern int v6snpreflen; +extern int v6aNpreflen; +extern int v6aLpreflen; + +extern int ReTransTimer; diff --git a/os/ip/kernel.h b/os/ip/kernel.h new file mode 100644 index 00000000..55718fa1 --- /dev/null +++ b/os/ip/kernel.h @@ -0,0 +1,10 @@ +extern int kclose(int); +extern int kdial(char*, char*, char*, int*); +extern int kannounce(char*, char*); +extern void kerrstr(char*); +extern void kgerrstr(char*); +extern int kopen(char*, int); +extern long kread(int, void*, long); +extern long kseek(int, vlong, int); +extern long kwrite(int, void*, long); +extern void kwerrstr(char *, ...); diff --git a/os/ip/loopbackmedium.c b/os/ip/loopbackmedium.c new file mode 100644 index 00000000..69d87449 --- /dev/null +++ b/os/ip/loopbackmedium.c @@ -0,0 +1,121 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + Maxtu= 16*1024, +}; + +typedef struct LB LB; +struct LB +{ + Proc *readp; + Queue *q; + Fs *f; +}; + +static void loopbackread(void *a); + +static void +loopbackbind(Ipifc *ifc, int, char**) +{ + LB *lb; + + lb = smalloc(sizeof(*lb)); + lb->f = ifc->conv->p->f; + /* TO DO: make queue size a function of kernel memory */ + lb->q = qopen(128*1024, Qmsg, nil, nil); + ifc->arg = lb; + ifc->mbps = 1000; + + kproc("loopbackread", loopbackread, ifc); + +} + +static void +loopbackunbind(Ipifc *ifc) +{ + LB *lb = ifc->arg; + + if(lb->readp) + postnote(lb->readp, 1, "unbind", 0); + + /* wait for reader to die */ + while(lb->readp != 0) + tsleep(&up->sleep, return0, 0, 300); + + /* clean up */ + qfree(lb->q); + free(lb); +} + +static void +loopbackbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + LB *lb; + + lb = ifc->arg; + if(qpass(lb->q, bp) < 0) + ifc->outerr++; + ifc->out++; +} + +static void +loopbackread(void *a) +{ + Ipifc *ifc; + Block *bp; + LB *lb; + + ifc = a; + lb = ifc->arg; + lb->readp = up; /* hide identity under a rock for unbind */ + if(waserror()){ + lb->readp = 0; + pexit("hangup", 1); + } + for(;;){ + bp = qbread(lb->q, Maxtu); + if(bp == nil) + continue; + ifc->in++; + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(lb->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +Medium loopbackmedium = +{ +.hsize= 0, +.mintu= 0, +.maxtu= Maxtu, +.maclen= 0, +.name= "loopback", +.bind= loopbackbind, +.unbind= loopbackunbind, +.bwrite= loopbackbwrite, +}; + +void +loopbackmediumlink(void) +{ + addipmedium(&loopbackmedium); +} diff --git a/os/ip/netdevmedium.c b/os/ip/netdevmedium.c new file mode 100644 index 00000000..9d5f9749 --- /dev/null +++ b/os/ip/netdevmedium.c @@ -0,0 +1,153 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void netdevbind(Ipifc *ifc, int argc, char **argv); +static void netdevunbind(Ipifc *ifc); +static void netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void netdevread(void *a); + +typedef struct Netdevrock Netdevrock; +struct Netdevrock +{ + Fs *f; /* file system we belong to */ + Proc *readp; /* reading process */ + Chan *mchan; /* Data channel */ +}; + +Medium netdevmedium = +{ +.name= "netdev", +.hsize= 0, +.mintu= 0, +.maxtu= 64000, +.maclen= 0, +.bind= netdevbind, +.unbind= netdevunbind, +.bwrite= netdevbwrite, +.unbindonclose= 0, +}; + +/* + * called to bind an IP ifc to a generic network device + * called with ifc qlock'd + */ +static void +netdevbind(Ipifc *ifc, int argc, char **argv) +{ + Chan *mchan; + Netdevrock *er; + + if(argc < 2) + error(Ebadarg); + + mchan = namec(argv[2], Aopen, ORDWR, 0); + + er = smalloc(sizeof(*er)); + er->mchan = mchan; + er->f = ifc->conv->p->f; + + ifc->arg = er; + + kproc("netdevread", netdevread, ifc, 0); +} + +/* + * called with ifc wlock'd + */ +static void +netdevunbind(Ipifc *ifc) +{ + Netdevrock *er = ifc->arg; + + if(er->readp != nil) + postnote(er->readp, 1, "unbind", 0); + + /* wait for readers to die */ + while(er->readp != nil) + tsleep(&up->sleep, return0, 0, 300); + + if(er->mchan != nil) + cclose(er->mchan); + + free(er); +} + +/* + * called by ipoput with a single block to write + */ +static void +netdevbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + Netdevrock *er = ifc->arg; + + if(bp->next) + bp = concatblock(bp); + if(BLEN(bp) < ifc->mintu) + bp = adjustblock(bp, ifc->mintu); + + devtab[er->mchan->type]->bwrite(er->mchan, bp, 0); + ifc->out++; +} + +/* + * process to read from the device + */ +static void +netdevread(void *a) +{ + Ipifc *ifc; + Block *bp; + Netdevrock *er; + char *argv[1]; + + ifc = a; + er = ifc->arg; + er->readp = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->readp = nil; + pexit("hangup", 1); + } + for(;;){ + bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0); + if(bp == nil){ + /* + * get here if mchan is a pipe and other side hangs up + * clean up this interface & get out +ZZZ is this a good idea? + */ + poperror(); + er->readp = nil; + argv[0] = "unbind"; + if(!waserror()) + ifc->conv->p->ctl(ifc->conv, argv, 1); + pexit("hangup", 1); + } + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +void +netdevmediumlink(void) +{ + addipmedium(&netdevmedium); +} diff --git a/os/ip/netlog.c b/os/ip/netlog.c new file mode 100644 index 00000000..3ee200fe --- /dev/null +++ b/os/ip/netlog.c @@ -0,0 +1,263 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +enum { + Nlog = 4*1024, +}; + +/* + * action log + */ +struct Netlog { + Lock; + int opens; + char* buf; + char *end; + char *rptr; + int len; + + int logmask; /* mask of things to debug */ + uchar iponly[IPaddrlen]; /* ip address to print debugging for */ + int iponlyset; + + QLock; + Rendez; +}; + +typedef struct Netlogflag { + char* name; + int mask; +} Netlogflag; + +static Netlogflag flags[] = +{ + { "ppp", Logppp, }, + { "ip", Logip, }, + { "fs", Logfs, }, + { "tcp", Logtcp, }, + { "il", Logil, }, + { "icmp", Logicmp, }, + { "udp", Logudp, }, + { "compress", Logcompress, }, + { "ilmsg", Logil|Logilmsg, }, + { "gre", Loggre, }, + { "tcpwin", Logtcp|Logtcpwin, }, + { "tcprxmt", Logtcp|Logtcprxmt, }, + { "udpmsg", Logudp|Logudpmsg, }, + { "ipmsg", Logip|Logipmsg, }, + { "esp", Logesp, }, + { nil, 0, }, +}; + +char Ebadnetctl[] = "too few arguments for netlog control message"; + +enum +{ + CMset, + CMclear, + CMonly, +}; + +static +Cmdtab routecmd[] = { + CMset, "set", 0, + CMclear, "clear", 0, + CMonly, "only", 0, +}; + +void +netloginit(Fs *f) +{ + f->alog = smalloc(sizeof(Netlog)); +} + +void +netlogopen(Fs *f) +{ + lock(f->alog); + if(waserror()){ + unlock(f->alog); + nexterror(); + } + if(f->alog->opens == 0){ + if(f->alog->buf == nil) + f->alog->buf = malloc(Nlog); + f->alog->rptr = f->alog->buf; + f->alog->end = f->alog->buf + Nlog; + } + f->alog->opens++; + unlock(f->alog); + poperror(); +} + +void +netlogclose(Fs *f) +{ + lock(f->alog); + if(waserror()){ + unlock(f->alog); + nexterror(); + } + f->alog->opens--; + if(f->alog->opens == 0){ + free(f->alog->buf); + f->alog->buf = nil; + } + unlock(f->alog); + poperror(); +} + +static int +netlogready(void *a) +{ + Fs *f = a; + + return f->alog->len; +} + +long +netlogread(Fs *f, void *a, ulong, long n) +{ + int i, d; + char *p, *rptr; + + qlock(f->alog); + if(waserror()){ + qunlock(f->alog); + nexterror(); + } + + for(;;){ + lock(f->alog); + if(f->alog->len){ + if(n > f->alog->len) + n = f->alog->len; + d = 0; + rptr = f->alog->rptr; + f->alog->rptr += n; + if(f->alog->rptr >= f->alog->end){ + d = f->alog->rptr - f->alog->end; + f->alog->rptr = f->alog->buf + d; + } + f->alog->len -= n; + unlock(f->alog); + + i = n-d; + p = a; + memmove(p, rptr, i); + memmove(p+i, f->alog->buf, d); + break; + } + else + unlock(f->alog); + + sleep(f->alog, netlogready, f); + } + + qunlock(f->alog); + poperror(); + + return n; +} + +void +netlogctl(Fs *f, char* s, int n) +{ + int i, set; + Netlogflag *fp; + Cmdbuf *cb; + Cmdtab *ct; + + cb = parsecmd(s, n); + if(waserror()){ + free(cb); + nexterror(); + } + + if(cb->nf < 2) + error(Ebadnetctl); + + ct = lookupcmd(cb, routecmd, nelem(routecmd)); + + SET(set); + + switch(ct->index){ + case CMset: + set = 1; + break; + + case CMclear: + set = 0; + break; + + case CMonly: + parseip(f->alog->iponly, cb->f[1]); + if(ipcmp(f->alog->iponly, IPnoaddr) == 0) + f->alog->iponlyset = 0; + else + f->alog->iponlyset = 1; + free(cb); + return; + + default: + cmderror(cb, "unknown ip control message"); + } + + for(i = 1; i < cb->nf; i++){ + for(fp = flags; fp->name; fp++) + if(strcmp(fp->name, cb->f[i]) == 0) + break; + if(fp->name == nil) + continue; + if(set) + f->alog->logmask |= fp->mask; + else + f->alog->logmask &= ~fp->mask; + } + + free(cb); + poperror(); +} + +void +netlog(Fs *f, int mask, char *fmt, ...) +{ + char buf[128], *t, *fp; + int i, n; + va_list arg; + + if(!(f->alog->logmask & mask)) + return; + + if(f->alog->opens == 0) + return; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + lock(f->alog); + i = f->alog->len + n - Nlog; + if(i > 0){ + f->alog->len -= i; + f->alog->rptr += i; + if(f->alog->rptr >= f->alog->end) + f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end); + } + t = f->alog->rptr + f->alog->len; + fp = buf; + f->alog->len += n; + while(n-- > 0){ + if(t >= f->alog->end) + t = f->alog->buf + (t - f->alog->end); + *t++ = *fp++; + } + unlock(f->alog); + + wakeup(f->alog); +} diff --git a/os/ip/nullmedium.c b/os/ip/nullmedium.c new file mode 100644 index 00000000..bc575c05 --- /dev/null +++ b/os/ip/nullmedium.c @@ -0,0 +1,39 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void +nullbind(Ipifc*, int, char**) +{ + error("cannot bind null device"); +} + +static void +nullunbind(Ipifc*) +{ +} + +static void +nullbwrite(Ipifc*, Block*, int, uchar*) +{ + error("nullbwrite"); +} + +Medium nullmedium = +{ +.name= "null", +.bind= nullbind, +.unbind= nullunbind, +.bwrite= nullbwrite, +}; + +void +nullmediumlink(void) +{ + addipmedium(&nullmedium); +} diff --git a/os/ip/pktmedium.c b/os/ip/pktmedium.c new file mode 100644 index 00000000..6bba1a4c --- /dev/null +++ b/os/ip/pktmedium.c @@ -0,0 +1,79 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + + +static void pktbind(Ipifc*, int, char**); +static void pktunbind(Ipifc*); +static void pktbwrite(Ipifc*, Block*, int, uchar*); +static void pktin(Fs*, Ipifc*, Block*); + +Medium pktmedium = +{ +.name= "pkt", +.hsize= 14, +.mintu= 40, +.maxtu= 4*1024, +.maclen= 6, +.bind= pktbind, +.unbind= pktunbind, +.bwrite= pktbwrite, +.pktin= pktin, +.unbindonclose= 1, +}; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +pktbind(Ipifc*, int, char**) +{ +} + +/* + * called with ifc wlock'd + */ +static void +pktunbind(Ipifc*) +{ +} + +/* + * called by ipoput with a single packet to write + */ +static void +pktbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + /* enqueue onto the conversation's rq */ + bp = concatblock(bp); + if(ifc->conv->snoopers.ref > 0) + qpass(ifc->conv->sq, copyblock(bp, BLEN(bp))); + qpass(ifc->conv->rq, bp); +} + +/* + * called with ifc rlocked when someone write's to 'data' + */ +static void +pktin(Fs *f, Ipifc *ifc, Block *bp) +{ + if(ifc->lifc == nil) + freeb(bp); + else { + if(ifc->conv->snoopers.ref > 0) + qpass(ifc->conv->sq, copyblock(bp, BLEN(bp))); + ipiput4(f, ifc, bp); + } +} + +void +pktmediumlink(void) +{ + addipmedium(&pktmedium); +} diff --git a/os/ip/plan9.c b/os/ip/plan9.c new file mode 100644 index 00000000..ce24a8d3 --- /dev/null +++ b/os/ip/plan9.c @@ -0,0 +1,36 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" + +/* + * some hacks for commonality twixt inferno and plan9 + */ + +char* +commonuser(void) +{ + return up->env->user; +} + +Chan* +commonfdtochan(int fd, int mode, int a, int b) +{ + return fdtochan(up->env->fgrp, fd, mode, a, b); +} + +char* +commonerror(void) +{ + return up->env->errstr; +} + +int +postnote(Proc *p, int, char *, int) +{ + swiproc(p, 0); + return 0; +} diff --git a/os/ip/ppp.c b/os/ip/ppp.c new file mode 100644 index 00000000..73885adc --- /dev/null +++ b/os/ip/ppp.c @@ -0,0 +1,1656 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include <libcrypt.h> +#include <kernel.h> +#include "ip.h" +#include "ppp.h" + +int nocompress; +Ipaddr pppdns[2]; + +/* + * Calculate FCS - rfc 1331 + */ +ushort fcstab[256] = +{ + 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, + 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, + 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, + 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, + 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, + 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, + 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, + 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, + 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, + 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, + 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, + 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, + 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, + 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, + 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, + 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, + 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, + 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, + 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, + 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, + 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, + 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, + 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, + 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, + 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, + 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, + 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, + 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, + 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, + 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, + 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, + 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 +}; + +static char *snames[] = +{ + "Sclosed", + "Sclosing", + "Sreqsent", + "Sackrcvd", + "Sacksent", + "Sopened", +}; + +static void init(PPP*); +static void setphase(PPP*, int); +static void pinit(PPP*, Pstate*); +static void ppptimer(void*); +static void ptimer(PPP*, Pstate*); +static int getframe(PPP*, Block**); +static Block* putframe(PPP*, int, Block*); +static uchar* escapebyte(PPP*, ulong, uchar*, ushort*); +static void config(PPP*, Pstate*, int); +static int getopts(PPP*, Pstate*, Block*); +static void rejopts(PPP*, Pstate*, Block*, int); +static void newstate(PPP*, Pstate*, int); +static void rcv(PPP*, Pstate*, Block*); +static void getchap(PPP*, Block*); +static void getpap(PPP*, Block*); +static void sendpap(PPP*); +static void getlqm(PPP*, Block*); +static void putlqm(PPP*); +static void hangup(PPP*); +static void remove(PPP*); + +static int validv4(Ipaddr); +static void invalidate(Ipaddr); +static void ipconnect(PPP *); +static void setdefroute(PPP *, Ipaddr); +static void printopts(PPP *, Pstate*, Block*, int); +static void sendtermreq(PPP*, Pstate*); + +static void +errlog(PPP *ppp, char *err) +{ + int n; + char msg[64]; + + n = snprint(msg, sizeof(msg), "%s\n", err); + qproduce(ppp->ifc->conv->eq, msg, n); +} + +static void +init(PPP* ppp) +{ + if(ppp->inbuf == nil){ + ppp->inbuf = allocb(4096); + ppp->outbuf = allocb(4096); + + ppp->lcp = malloc(sizeof(Pstate)); + ppp->ipcp = malloc(sizeof(Pstate)); + if(ppp->lcp == nil || ppp->ipcp == nil) + error("ppp init: malloc"); + + ppp->lcp->proto = Plcp; + ppp->lcp->state = Sclosed; + ppp->ipcp->proto = Pipcp; + ppp->ipcp->state = Sclosed; + + kproc("ppptimer", ppptimer, ppp, KPDUPPG|KPDUPFDG); + } + + pinit(ppp, ppp->lcp); + setphase(ppp, Plink); +} + +static void +setphase(PPP *ppp, int phase) +{ + int oldphase; + + oldphase = ppp->phase; + + ppp->phase = phase; + switch(phase){ + default: + panic("ppp: unknown phase %d", phase); + case Pdead: + /* restart or exit? */ + pinit(ppp, ppp->lcp); + setphase(ppp, Plink); + break; + case Plink: + /* link down */ + switch(oldphase) { + case Pnet: + newstate(ppp, ppp->ipcp, Sclosed); + } + break; + case Pauth: + if(ppp->usepap) + sendpap(ppp); + else if(!ppp->usechap) + setphase(ppp, Pnet); + break; + case Pnet: + pinit(ppp, ppp->ipcp); + break; + case Pterm: + /* what? */ + break; + } +} + +static void +pinit(PPP *ppp, Pstate *p) +{ + p->timeout = 0; + + switch(p->proto){ + case Plcp: + ppp->magic = TK2MS(MACHP(0)->ticks); + ppp->xctlmap = 0xffffffff; + ppp->period = 0; + p->optmask = 0xffffffff; + ppp->rctlmap = 0; + ppp->ipcp->state = Sclosed; + ppp->ipcp->optmask = 0xffffffff; + + /* quality goo */ + ppp->timeout = 0; + memset(&ppp->in, 0, sizeof(ppp->in)); + memset(&ppp->out, 0, sizeof(ppp->out)); + memset(&ppp->pin, 0, sizeof(ppp->pin)); + memset(&ppp->pout, 0, sizeof(ppp->pout)); + memset(&ppp->sin, 0, sizeof(ppp->sin)); + break; + case Pipcp: + if(ppp->localfrozen == 0) + invalidate(ppp->local); + if(ppp->remotefrozen == 0) + invalidate(ppp->remote); + p->optmask = 0xffffffff; + ppp->ctcp = compress_init(ppp->ctcp); + ppp->usedns = 3; + invalidate(ppp->dns1); + invalidate(ppp->dns2); + break; + } + p->confid = p->rcvdconfid = -1; + config(ppp, p, 1); + newstate(ppp, p, Sreqsent); +} + +/* + * change protocol to a new state. + */ +static void +newstate(PPP *ppp, Pstate *p, int state) +{ + netlog(ppp->f, Logppp, "%ux %ux %s->%s ctlmap %lux/%lux flags %ux mtu %d mru %d\n", ppp, p->proto, + snames[p->state], snames[state], ppp->rctlmap, ppp->xctlmap, p->flags, + ppp->mtu, ppp->mru); + + if(p->proto == Plcp) { + if(state == Sopened) + setphase(ppp, Pauth); + else if(state == Sclosed) + setphase(ppp, Pdead); + else if(p->state == Sopened) + setphase(ppp, Plink); + } + + if(p->proto == Pipcp && state == Sopened && validv4(ppp->local) && validv4(ppp->remote)){ + netlog(ppp->f, Logppp, "pppnewstate: local %I remote %I\n", ppp->local, ppp->remote); + ipmove(pppdns[0], ppp->dns1); + ipmove(pppdns[1], ppp->dns2); + ipconnect(ppp); + /* if this is the only network, set up a default route */ +// if(ppp->ifc->link==nil) /* how??? */ + setdefroute(ppp, ppp->remote); + errlog(ppp, Enoerror); + } + + p->state = state; +} + +static void +remove(PPP *ppp) +{ + free(ppp->ipcp); + ppp->ipcp = 0; + free(ppp->ctcp); + ppp->ctcp = 0; + free(ppp->lcp); + ppp->lcp = 0; + if (ppp->inbuf) { + freeb(ppp->inbuf); + ppp->inbuf = nil; + } + if (ppp->outbuf) { + freeb(ppp->outbuf); + ppp->outbuf = nil; + } + free(ppp); +} + +void +pppclose(PPP *ppp) +{ + hangup(ppp); + remove(ppp); +} + +static void +dumpblock(Block *b) +{ + char x[256]; + int i; + + for(i = 0; i < (sizeof(x)-1)/3 && b->rp+i < b->wp; i++) + sprint(&x[3*i], "%2.2ux ", b->rp[i]); + print("%s\n", x); +} + +/* returns (protocol, information) */ +static int +getframe(PPP *ppp, Block **info) +{ + uchar *p, *from, *to; + int n, len, proto; + ulong c; + ushort fcs; + Block *buf, *b; + + buf = ppp->inbuf; + for(;;){ + /* read till we hit a frame byte or run out of room */ + for(p = buf->rp; buf->wp < buf->lim;){ + for(; p < buf->wp; p++) + if(*p == HDLC_frame) + goto break2; + + len = buf->lim - buf->wp; + n = 0; + if(ppp->dchan != nil) + n = kchanio(ppp->dchan, buf->wp, len, OREAD); + netlog(ppp->f, Logppp, "ppp kchanio %d bytes\n", n); + if(n <= 0){ + buf->wp = buf->rp; +// if(n < 0) +// print("ppp kchanio(%s) returned %d: %r", +// ppp->dchan->path->elem, n); + *info = nil; + return 0; + } + buf->wp += n; + } +break2: + + /* copy into block, undoing escapes, and caculating fcs */ + fcs = PPP_initfcs; + b = allocb(p - buf->rp); + to = b->wp; + for(from = buf->rp; from != p;){ + c = *from++; + if(c == HDLC_esc){ + if(from == p) + break; + c = *from++ ^ 0x20; + } else if((c < 0x20) && (ppp->rctlmap & (1 << c))) + continue; + *to++ = c; + fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff]; + } + + /* copy down what's left in buffer */ + p++; + memmove(buf->rp, p, buf->wp - p); + n = p - buf->rp; + buf->wp -= n; + b->wp = to - 2; + + /* return to caller if checksum matches */ + if(fcs == PPP_goodfcs){ + if(b->rp[0] == PPP_addr && b->rp[1] == PPP_ctl) + b->rp += 2; + proto = *b->rp++; + if((proto & 0x1) == 0) + proto = (proto<<8) | *b->rp++; + if(b->rp < b->wp){ + ppp->in.bytes += n; + ppp->in.packets++; + *info = b; + return proto; + } + } else if(BLEN(b) > 0){ + ppp->ifc->inerr++; + ppp->in.discards++; + netlog(ppp->f, Logppp, "len %d/%d cksum %ux (%ux %ux %ux %ux)\n", + BLEN(b), BLEN(buf), fcs, b->rp[0], + b->rp[1], b->rp[2], b->rp[3]); + } + + freeblist(b); + } + *info = nil; + return 0; +} + +/* send a PPP frame */ +static Block * +putframe(PPP *ppp, int proto, Block *b) +{ + Block *buf; + uchar *to, *from; + ushort fcs; + ulong ctlmap; + int c; + Block *bp; + + if(ppp->dchan == nil){ + netlog(ppp->f, Logppp, "putframe: dchan down\n"); + errlog(ppp, Ehungup); + return b; + } + netlog(ppp->f, Logppp, "putframe %ux %d %d (%d bytes)\n", proto, b->rp[0], b->rp[1], BLEN(b)); + + ppp->out.packets++; + + if(proto == Plcp) + ctlmap = 0xffffffff; + else + ctlmap = ppp->xctlmap; + + /* make sure we have head room */ + if(b->rp - b->base < 4){ + b = padblock(b, 4); + b->rp += 4; + } + + /* add in the protocol and address, we'd better have left room */ + from = b->rp; + *--from = proto; + if(!(ppp->lcp->flags&Fpc) || proto > 0x100 || proto == Plcp) + *--from = proto>>8; + if(!(ppp->lcp->flags&Fac) || proto == Plcp){ + *--from = PPP_ctl; + *--from = PPP_addr; + } + + qlock(&ppp->outlock); + buf = ppp->outbuf; + + /* escape and checksum the body */ + fcs = PPP_initfcs; + to = buf->rp; + + *to++ = HDLC_frame; + + for(bp = b; bp; bp = bp->next){ + if(bp != b) + from = bp->rp; + for(; from < bp->wp; from++){ + c = *from; + if(c == HDLC_frame || c == HDLC_esc + || (c < 0x20 && ((1<<c) & ctlmap))){ + *to++ = HDLC_esc; + *to++ = c ^ 0x20; + } else + *to++ = c; + fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff]; + } + } + + /* add on and escape the checksum */ + fcs = ~fcs; + c = fcs; + if(c == HDLC_frame || c == HDLC_esc + || (c < 0x20 && ((1<<c) & ctlmap))){ + *to++ = HDLC_esc; + *to++ = c ^ 0x20; + } else + *to++ = c; + c = fcs>>8; + if(c == HDLC_frame || c == HDLC_esc + || (c < 0x20 && ((1<<c) & ctlmap))){ + *to++ = HDLC_esc; + *to++ = c ^ 0x20; + } else + *to++ = c; + + /* add frame marker and send */ + *to++ = HDLC_frame; + buf->wp = to; + if(ppp->dchan == nil){ + netlog(ppp->f, Logppp, "putframe: dchan down\n"); + errlog(ppp, Ehungup); + }else{ + kchanio(ppp->dchan, buf->rp, BLEN(buf), OWRITE); + ppp->out.bytes += BLEN(buf); + } + + qunlock(&ppp->outlock); + return b; +} + +#define IPB2LCP(b) ((Lcpmsg*)((b)->wp-4)) + +static Block* +alloclcp(int code, int id, int len) +{ + Block *b; + Lcpmsg *m; + + /* + * leave room for header + */ + b = allocb(len); + + m = (Lcpmsg*)b->wp; + m->code = code; + m->id = id; + b->wp += 4; + + return b; +} + +static void +putao(Block *b, int type, int aproto, int alg) +{ + *b->wp++ = type; + *b->wp++ = 5; + hnputs(b->wp, aproto); + b->wp += 2; + *b->wp++ = alg; +} + +static void +putlo(Block *b, int type, ulong val) +{ + *b->wp++ = type; + *b->wp++ = 6; + hnputl(b->wp, val); + b->wp += 4; +} + +static void +putv4o(Block *b, int type, Ipaddr val) +{ + *b->wp++ = type; + *b->wp++ = 6; + if(v6tov4(b->wp, val) < 0){ + /*panic("putv4o")*/; + } + b->wp += 4; +} + +static void +putso(Block *b, int type, ulong val) +{ + *b->wp++ = type; + *b->wp++ = 4; + hnputs(b->wp, val); + b->wp += 2; +} + +static void +puto(Block *b, int type) +{ + *b->wp++ = type; + *b->wp++ = 2; +} + +/* + * send configuration request + */ +static void +config(PPP *ppp, Pstate *p, int newid) +{ + Block *b; + Lcpmsg *m; + int id; + + if(newid){ + id = ++(p->id); + p->confid = id; + p->timeout = Timeout; + } else + id = p->confid; + b = alloclcp(Lconfreq, id, 256); + m = IPB2LCP(b); + USED(m); + + switch(p->proto){ + case Plcp: + if(p->optmask & Fmagic) + putlo(b, Omagic, ppp->magic); + if(p->optmask & Fmtu) + putso(b, Omtu, ppp->mru); + if(p->optmask & Fac) + puto(b, Oac); + if(p->optmask & Fpc) + puto(b, Opc); + if(p->optmask & Fctlmap) + putlo(b, Octlmap, 0); /* we don't want anything escaped */ + break; + case Pipcp: + if((p->optmask & Fipaddr) /*&& validv4(ppp->local)*/) + putv4o(b, Oipaddr, ppp->local); + if(!nocompress && (p->optmask & Fipcompress)){ + *b->wp++ = Oipcompress; + *b->wp++ = 6; + hnputs(b->wp, Pvjctcp); + b->wp += 2; + *b->wp++ = MAX_STATES-1; + *b->wp++ = 1; + } + if(ppp->usedns & 1) + putlo(b, Oipdns, 0); + if(ppp->usedns & 2) + putlo(b, Oipdns2, 0); + break; + } + + hnputs(m->len, BLEN(b)); + b = putframe(ppp, p->proto, b); + freeblist(b); +} + +/* + * parse configuration request, sends an ack or reject packet + * + * returns: -1 if request was syntacticly incorrect + * 0 if packet was accepted + * 1 if packet was rejected + */ +static int +getopts(PPP *ppp, Pstate *p, Block *b) +{ + Lcpmsg *m, *repm; + Lcpopt *o; + uchar *cp; + ulong rejecting, nacking, flags, proto; + ulong mtu, ctlmap, period; + ulong x; + Block *repb; + Ipaddr ipaddr; + + rejecting = 0; + nacking = 0; + flags = 0; + + /* defaults */ + invalidate(ipaddr); + mtu = ppp->mtu; + + ctlmap = 0xffffffff; + period = 0; + + m = (Lcpmsg*)b->rp; + repb = alloclcp(Lconfack, m->id, BLEN(b)); + repm = IPB2LCP(repb); + + /* copy options into ack packet */ + memmove(repm->data, m->data, b->wp - m->data); + repb->wp += b->wp - m->data; + + /* look for options we don't recognize or like */ + for(cp = m->data; cp < b->wp; cp += o->len){ + o = (Lcpopt*)cp; + if(cp + o->len > b->wp || o->len == 0){ + freeblist(repb); + netlog(ppp->f, Logppp, "ppp %s: bad option length %ux\n", ppp->ifc->dev, + o->type); + return -1; + } + + switch(p->proto){ + case Plcp: + switch(o->type){ + case Oac: + flags |= Fac; + continue; + case Opc: + flags |= Fpc; + continue; + case Omtu: + mtu = nhgets(o->data); + if(mtu < ppp->ifc->m->mintu){ + netlog(ppp->f, Logppp, "bogus mtu %d\n", mtu); + mtu = ppp->ifc->m->mintu; + } + continue; + case Omagic: + if(ppp->magic == nhgetl(o->data)) + netlog(ppp->f, Logppp, "ppp: possible loop\n"); + continue; + case Octlmap: + ctlmap = nhgetl(o->data); + continue; + case Oquality: + proto = nhgets(o->data); + if(proto != Plqm) + break; + x = nhgetl(o->data+2)*10; + period = (x+Period-1)/Period; + continue; + case Oauth: + proto = nhgets(o->data); + if(proto == Ppap && ppp->chapname[0] && ppp->secret[0]){ + ppp->usepap = 1; + netlog(ppp->f, Logppp, "PPP %s: select PAP\n", ppp->ifc->dev); + continue; + } + if(proto != Pchap || o->data[2] != APmd5){ + if(!nacking){ + nacking = 1; + repb->wp = repm->data; + repm->code = Lconfnak; + } + putao(repb, Oauth, Pchap, APmd5); + } + else + ppp->usechap = 1; + ppp->usepap = 0; + continue; + } + break; + case Pipcp: + switch(o->type){ + case Oipaddr: + v4tov6(ipaddr, o->data); + if(!validv4(ppp->remote)) + continue; + if(!validv4(ipaddr) && !rejecting){ + /* other side requesting an address */ + if(!nacking){ + nacking = 1; + repb->wp = repm->data; + repm->code = Lconfnak; + } + putv4o(repb, Oipaddr, ppp->remote); + } + continue; + case Oipcompress: + proto = nhgets(o->data); + if(nocompress || proto != Pvjctcp || compress_negotiate(ppp->ctcp, o->data+2) < 0) + break; + flags |= Fipcompress; + continue; + } + break; + } + + /* come here if option is not recognized */ + if(!rejecting){ + rejecting = 1; + repb->wp = repm->data; + repm->code = Lconfrej; + } + netlog(ppp->f, Logppp, "ppp %s: bad %ux option %d\n", ppp->ifc->dev, p->proto, o->type); + memmove(repb->wp, o, o->len); + repb->wp += o->len; + } + + /* permanent changes only after we know that we liked the packet */ + if(!rejecting && !nacking){ + switch(p->proto){ + case Plcp: + netlog(ppp->f, Logppp, "Plcp: mtu: %d %d x:%lux/r:%lux %lux\n", mtu, ppp->mtu, ppp->xctlmap, ppp->rctlmap, ctlmap); + ppp->period = period; + ppp->xctlmap = ctlmap; + if(mtu > Maxmtu) + mtu = Maxmtu; + if(mtu < Minmtu) + mtu = Minmtu; + ppp->mtu = mtu; + break; + case Pipcp: + if(validv4(ipaddr) && ppp->remotefrozen == 0) + ipmove(ppp->remote, ipaddr); + break; + } + p->flags = flags; + } + + hnputs(repm->len, BLEN(repb)); + repb = putframe(ppp, p->proto, repb); + freeblist(repb); + + return rejecting || nacking; +} + +/* + * parse configuration rejection, just stop sending anything that they + * don't like (except for ipcp address nak). + */ +static void +rejopts(PPP *ppp, Pstate *p, Block *b, int code) +{ + Lcpmsg *m; + Lcpopt *o; + + /* just give up trying what the other side doesn't like */ + m = (Lcpmsg*)b->rp; + for(b->rp = m->data; b->rp < b->wp; b->rp += o->len){ + o = (Lcpopt*)b->rp; + if(b->rp + o->len > b->wp || o->len == 0){ + netlog(ppp->f, Logppp, "ppp %s: bad roption length %ux\n", ppp->ifc->dev, + o->type); + return; + } + + if(code == Lconfrej){ + if(o->type < 8*sizeof(p->optmask)) + p->optmask &= ~(1<<o->type); + if(o->type == Oipdns) + ppp->usedns &= ~1; + else if(o->type == Oipdns2) + ppp->usedns &= ~2; + netlog(ppp->f, Logppp, "ppp %s: %ux rejecting %d\n", ppp->ifc->dev, p->proto, + o->type); + continue; + } + + switch(p->proto){ + case Plcp: + switch(o->type){ + case Octlmap: + ppp->rctlmap = nhgetl(o->data); + break; + default: + if(o->type < 8*sizeof(p->optmask)) + p->optmask &= ~(1<<o->type); + break; + }; + case Pipcp: + switch(o->type){ + case Oipaddr: + if(!validv4(ppp->local)) + v4tov6(ppp->local, o->data); +// if(o->type < 8*sizeof(p->optmask)) +// p->optmask &= ~(1<<o->type); + break; + case Oipdns: + if(!validv4(ppp->dns1)) + v4tov6(ppp->dns1, o->data); + ppp->usedns &= ~1; + break; + case Oipdns2: + if(!validv4(ppp->dns2)) + v4tov6(ppp->dns2, o->data); + ppp->usedns &= ~2; + break; + default: + if(o->type < 8*sizeof(p->optmask)) + p->optmask &= ~(1<<o->type); + break; + } + break; + } + } +} + + +/* + * put a messages through the lcp or ipcp state machine. They are + * very similar. + */ +static void +rcv(PPP *ppp, Pstate *p, Block *b) +{ + ulong len; + int err; + Lcpmsg *m; + + if(BLEN(b) < 4){ + netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev); + freeblist(b); + return; + } + m = (Lcpmsg*)b->rp; + len = nhgets(m->len); + if(BLEN(b) < len){ + netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev); + freeblist(b); + return; + } + + netlog(ppp->f, Logppp, "ppp: %ux rcv %d len %d id %d/%d/%d\n", + p->proto, m->code, len, m->id, p->confid, p->id); + + if(p->proto != Plcp && ppp->lcp->state != Sopened){ + netlog(ppp->f, Logppp, "ppp: non-lcp with lcp not open\n"); + freeb(b); + return; + } + + qlock(ppp); + switch(m->code){ + case Lconfreq: + /* flush the output queue */ + if(p->state == Sopened && p->proto == Plcp) + kchanio(ppp->cchan, "f", 1, OWRITE); + + printopts(ppp, p, b, 0); + err = getopts(ppp, p, b); + if(err < 0) + break; + + if(m->id == p->rcvdconfid) + break; /* don't change state for duplicates */ + p->rcvdconfid = m->id; + + switch(p->state){ + case Sackrcvd: + if(err) + break; + newstate(ppp, p, Sopened); + break; + case Sclosed: + case Sopened: + config(ppp, p, 1); + if(err == 0) + newstate(ppp, p, Sacksent); + else + newstate(ppp, p, Sreqsent); + break; + break; + case Sreqsent: + case Sacksent: + if(err == 0) + newstate(ppp, p, Sacksent); + else + newstate(ppp, p, Sreqsent); + break; + } + break; + case Lconfack: + if(p->confid != m->id){ + /* ignore if it isn't the message we're sending */ + netlog(ppp->f, Logppp, "ppp: dropping confack\n"); + break; + } + p->confid = -1; /* ignore duplicates */ + p->id++; /* avoid sending duplicates */ + + switch(p->state){ + case Sopened: + case Sackrcvd: + config(ppp, p, 1); + newstate(ppp, p, Sreqsent); + break; + case Sreqsent: + newstate(ppp, p, Sackrcvd); + break; + case Sacksent: + newstate(ppp, p, Sopened); + break; + } + break; + case Lconfrej: + case Lconfnak: + if(p->confid != m->id) { + /* ignore if it isn't the message we're sending */ + netlog(ppp->f, Logppp, "ppp: dropping confrej or confnak\n"); + break; + } + p->confid = -1; /* ignore duplicates */ + p->id++; /* avoid sending duplicates */ + + switch(p->state){ + case Sopened: + case Sackrcvd: + config(ppp, p, 1); + newstate(ppp, p, Sreqsent); + break; + case Sreqsent: + case Sacksent: + printopts(ppp, p, b, 0); + rejopts(ppp, p, b, m->code); + config(ppp, p, 1); + break; + } + break; + case Ltermreq: + m->code = Ltermack; + b = putframe(ppp, p->proto, b); + + switch(p->state){ + case Sackrcvd: + case Sacksent: + newstate(ppp, p, Sreqsent); + break; + case Sopened: + newstate(ppp, p, Sclosing); + break; + } + break; + case Ltermack: + if(p->termid != m->id) /* ignore if it isn't the message we're sending */ + break; + + if(p->proto == Plcp) + ppp->ipcp->state = Sclosed; + switch(p->state){ + case Sclosing: + newstate(ppp, p, Sclosed); + break; + case Sackrcvd: + newstate(ppp, p, Sreqsent); + break; + case Sopened: + config(ppp, p, 0); + newstate(ppp, p, Sreqsent); + break; + } + break; + case Lcoderej: + netlog(ppp->f, Logppp, "ppp %s: code reject %d\n", ppp->ifc->dev, m->data[0]); + break; + case Lprotorej: + netlog(ppp->f, Logppp, "ppp %s: proto reject %lux\n", ppp->ifc->dev, nhgets(m->data)); + break; + case Lechoreq: + m->code = Lechoack; + b = putframe(ppp, p->proto, b); + break; + case Lechoack: + case Ldiscard: + /* nothing to do */ + break; + } + + qunlock(ppp); + freeblist(b); +} + +/* + * timer for protocol state machine + */ +static void +ptimer(PPP *ppp, Pstate *p) +{ + if(p->state == Sopened || p->state == Sclosed) + return; + + p->timeout--; + switch(p->state){ + case Sclosing: + sendtermreq(ppp, p); + break; + case Sreqsent: + case Sacksent: + if(p->timeout <= 0){ + if(p->proto && ppp->cchan != nil) + kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */ + newstate(ppp, p, Sclosed); + } else { + config(ppp, p, 0); + } + break; + case Sackrcvd: + if(p->timeout <= 0){ + if(p->proto && ppp->cchan != nil) + kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */ + newstate(ppp, p, Sclosed); + } + else { + config(ppp, p, 0); + newstate(ppp, p, Sreqsent); + } + break; + } +} + +/* + * timer for ppp + */ +static void +ppptimer(void *arg) +{ + PPP *ppp; + + ppp = arg; + ppp->timep = up; + if(waserror()){ + netlog(ppp->f, Logppp, "ppptimer: %I: %s\n", ppp->local, up->env->errstr); + ppp->timep = 0; + pexit("hangup", 1); + } + for(;;){ + tsleep(&up->sleep, return0, nil, Period); + if(ppp->pppup){ + qlock(ppp); + + ptimer(ppp, ppp->lcp); + if(ppp->lcp->state == Sopened) + ptimer(ppp, ppp->ipcp); + + if(ppp->period && --(ppp->timeout) <= 0){ + ppp->timeout = ppp->period; + putlqm(ppp); + } + + qunlock(ppp); + } + } +} + +static void +setdefroute(PPP *ppp, Ipaddr gate) +{ + int fd, n; + char path[128], msg[128]; + + snprint(path, sizeof path, "#I%d/iproute", ppp->f->dev); + fd = kopen(path, ORDWR); + if(fd < 0) + return; + n = snprint(msg, sizeof(msg), "add 0 0 %I", gate); + kwrite(fd, msg, n); + kclose(fd); +} + +static void +ipconnect(PPP *ppp) +{ + int fd, n; + char path[128], msg[128]; + + snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ppp->ifc->conv->x); + fd = kopen(path, ORDWR); + if(fd < 0) + return; + n = snprint(msg, sizeof(msg), "connect %I 255.255.255.255 %I", ppp->local, ppp->remote); + if (kwrite(fd, msg, n) != n) + print("ppp ipconnect: %s: %r\n", msg); + kclose(fd); +} + +PPP* +pppopen(PPP *ppp, char *dev, + Ipaddr ipaddr, Ipaddr remip, + int mtu, int framing, + char *chapname, char *secret) +{ + int fd, cfd; + char ctl[Maxpath]; + + invalidate(ppp->remote); + invalidate(ppp->local); + invalidate(ppp->dns1); + invalidate(ppp->dns2); + ppp->mtu = Defmtu; + ppp->mru = mtu; + ppp->framing = framing; + + if(remip != nil && validv4(remip)){ + ipmove(ppp->remote, remip); + ppp->remotefrozen = 1; + } + if(ipaddr != nil && validv4(ipaddr)){ + ipmove(ppp->local, ipaddr); + ppp->localfrozen = 1; + } + + /* authentication goo */ + ppp->secret[0] = 0; + if(secret != nil) + strncpy(ppp->secret, secret, sizeof(ppp->secret)); + ppp->chapname[0] = 0; + if(chapname != nil) + strncpy(ppp->chapname, chapname, sizeof(ppp->chapname)); + + if(strchr(dev, '!')) + fd = kdial(dev, nil, nil, nil); + else + fd = kopen(dev, ORDWR); + if(fd < 0){ + netlog(ppp->f, Logppp, "ppp: can't open %s\n", dev); + return nil; + } + ppp->dchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1); + kclose(fd); + + /* set up serial line */ +/* XXX this stuff belongs in application, not driver */ + sprint(ctl, "%sctl", dev); + cfd = kopen(ctl, ORDWR); + if(cfd >= 0){ + ppp->cchan = fdtochan(up->env->fgrp, cfd, ORDWR, 0, 1); + kclose(cfd); + kchanio(ppp->cchan, "m1", 2, OWRITE); /* cts/rts flow control/fifo's) on */ + kchanio(ppp->cchan, "q64000", 6, OWRITE);/* increas q size to 64k */ + kchanio(ppp->cchan, "n1", 2, OWRITE); /* nonblocking writes on */ + kchanio(ppp->cchan, "r1", 2, OWRITE); /* rts on */ + kchanio(ppp->cchan, "d1", 2, OWRITE); /* dtr on */ + } + + ppp->pppup = 1; + init(ppp); + return ppp; +} + +static void +hangup(PPP *ppp) +{ + qlock(ppp); + if(waserror()){ + qunlock(ppp); + nexterror(); + } + netlog(ppp->f, Logppp, "PPP Hangup\n"); + errlog(ppp, Ehungup); + if(ppp->pppup && ppp->cchan != nil){ + kchanio(ppp->cchan, "f", 1, OWRITE); /* flush */ + kchanio(ppp->cchan, "h", 1, OWRITE); /* hangup */ + } + cclose(ppp->dchan); + cclose(ppp->cchan); + ppp->dchan = nil; + ppp->cchan = nil; + ppp->pppup = 0; + qunlock(ppp); + poperror(); +} + +/* return next input IP packet */ +Block* +pppread(PPP *ppp) +{ + Block *b; + int proto; + Lcpmsg *m; + + for(;;){ + proto = getframe(ppp, &b); + if(b == nil) + return nil; + netlog(ppp->f, Logppp, "ppp: read proto %d len %d\n", proto, blocklen(b)); + switch(proto){ + case Plcp: + rcv(ppp, ppp->lcp, b); + break; + case Pipcp: + rcv(ppp, ppp->ipcp, b); + break; + case Pip: + if(ppp->ipcp->state == Sopened) + return b; + freeblist(b); + break; + case Plqm: + getlqm(ppp, b); + break; + case Pchap: + getchap(ppp, b); + break; + case Ppap: + getpap(ppp, b); + break; + case Pvjctcp: + case Pvjutcp: + if(ppp->ipcp->state == Sopened){ + b = tcpuncompress(ppp->ctcp, b, proto, ppp->f); + if(b != nil) + return b; + } + freeblist(b); + break; + default: + netlog(ppp->f, Logppp, "unknown proto %ux\n", proto); + if(ppp->lcp->state == Sopened){ + /* reject the protocol */ + b->rp -= 6; + m = (Lcpmsg*)b->rp; + m->code = Lprotorej; + m->id = ++ppp->lcp->id; + hnputs(m->data, proto); + hnputs(m->len, BLEN(b)); + b = putframe(ppp, Plcp, b); + } + freeblist(b); + break; + } + } + return nil; /* compiler confused */ +} + +/* transmit an IP packet */ +int +pppwrite(PPP *ppp, Block *b) +{ + ushort proto; + int r; + + qlock(ppp); + + /* can't send ip packets till we're established */ + if(ppp->ipcp->state != Sopened) + goto ret; + + /* link hung up */ + if(ppp->dchan == nil) + goto ret; + + b = concatblock(b); /* or else compression will barf */ + + proto = Pip; + if(ppp->ipcp->flags & Fipcompress) + proto = compress(ppp->ctcp, b, ppp->f); + b = putframe(ppp, proto, b); + + +ret: + qunlock(ppp); + + r = blocklen(b); + netlog(ppp->f, Logppp, "ppp wrt len %d\n", r); + + freeblist(b); + return r; +} + +/* + * link quality management + */ +static void +getlqm(PPP *ppp, Block *b) +{ + Qualpkt *p; + + p = (Qualpkt*)b->rp; + if(BLEN(b) == sizeof(Qualpkt)){ + ppp->in.reports++; + ppp->pout.reports = nhgetl(p->peeroutreports); + ppp->pout.packets = nhgetl(p->peeroutpackets); + ppp->pout.bytes = nhgetl(p->peeroutbytes); + ppp->pin.reports = nhgetl(p->peerinreports); + ppp->pin.packets = nhgetl(p->peerinpackets); + ppp->pin.discards = nhgetl(p->peerindiscards); + ppp->pin.errors = nhgetl(p->peerinerrors); + ppp->pin.bytes = nhgetl(p->peerinbytes); + + /* save our numbers at time of reception */ + memmove(&ppp->sin, &ppp->in, sizeof(Qualstats)); + + } + freeblist(b); + if(ppp->period == 0) + putlqm(ppp); + +} +static void +putlqm(PPP *ppp) +{ + Qualpkt *p; + Block *b; + + b = allocb(sizeof(Qualpkt)); + b->wp += sizeof(Qualpkt); + p = (Qualpkt*)b->rp; + hnputl(p->magic, 0); + + /* heresay (what he last told us) */ + hnputl(p->lastoutreports, ppp->pout.reports); + hnputl(p->lastoutpackets, ppp->pout.packets); + hnputl(p->lastoutbytes, ppp->pout.bytes); + + /* our numbers at time of last reception */ + hnputl(p->peerinreports, ppp->sin.reports); + hnputl(p->peerinpackets, ppp->sin.packets); + hnputl(p->peerindiscards, ppp->sin.discards); + hnputl(p->peerinerrors, ppp->sin.errors); + hnputl(p->peerinbytes, ppp->sin.bytes); + + /* our numbers now */ + hnputl(p->peeroutreports, ppp->out.reports+1); + hnputl(p->peeroutpackets, ppp->out.packets+1); + hnputl(p->peeroutbytes, ppp->out.bytes+53/*hack*/); + + b = putframe(ppp, Plqm, b); + freeblist(b); + ppp->out.reports++; +} + +/* + * challenge response dialog + */ +static void +getchap(PPP *ppp, Block *b) +{ + Lcpmsg *m; + int len, vlen, n; + char md5buf[512]; + + m = (Lcpmsg*)b->rp; + len = nhgets(m->len); + if(BLEN(b) < len){ + netlog(ppp->f, Logppp, "ppp %s: short chap message\n", ppp->ifc->dev); + freeblist(b); + return; + } + + switch(m->code){ + case Cchallenge: + vlen = m->data[0]; + if(vlen > len - 5){ + netlog(ppp->f, Logppp, "PPP %s: bad challenge len\n", ppp->ifc->dev); + freeblist(b); + break; + } + + netlog(ppp->f, Logppp, "PPP %s: CHAP Challenge\n", ppp->ifc->dev); +netlog(ppp->f, Logppp, "(secret %s chapname %s id %d)\n", ppp->secret, ppp->chapname, m->id); + /* create string to hash */ + md5buf[0] = m->id; + strcpy(md5buf+1, ppp->secret); + n = strlen(ppp->secret) + 1; + memmove(md5buf+n, m->data+1, vlen); + n += vlen; + freeblist(b); + + /* send reply */ + len = 4 + 1 + 16 + strlen(ppp->chapname); + b = alloclcp(2, md5buf[0], len); + m = IPB2LCP(b); + m->data[0] = 16; + md5((uchar*)md5buf, n, m->data+1, 0); + memmove((char*)m->data+17, ppp->chapname, strlen(ppp->chapname)); + hnputs(m->len, len); + b->wp += len-4; + b = putframe(ppp, Pchap, b); + break; + case Cresponse: + netlog(ppp->f, Logppp, "PPP %s: chap response?\n", ppp->ifc->dev); + break; + case Csuccess: + netlog(ppp->f, Logppp, "PPP %s: chap succeeded\n", ppp->ifc->dev); + setphase(ppp, Pnet); + break; + case Cfailure: + netlog(ppp->f, Logppp, "PPP %s: chap failed: %.*s\n", ppp->ifc->dev, len-4, m->data); + errlog(ppp, Eperm); + break; + default: + netlog(ppp->f, Logppp, "PPP %s: chap code %d?\n", ppp->ifc->dev, m->code); + break; + } + freeblist(b); +} + +/* + * password authentication protocol dialog + * -- obsolete but all we know how to use with NT just now + */ +static void +sendpap(PPP *ppp) +{ + Lcpmsg *m; + int clen, slen, len; + Block *b; + uchar *p; + + clen = strlen(ppp->chapname); + slen = strlen(ppp->secret); + len = 4 + 1 + clen + 1 + slen; + ppp->papid = ++ppp->lcp->id; + b = alloclcp(Cpapreq, ppp->papid, len); + m = IPB2LCP(b); + p = m->data; + p[0] = clen; + memmove(p+1, ppp->chapname, clen); + p += clen + 1; + p[0] = slen; + memmove(p+1, ppp->secret, slen); + hnputs(m->len, len); + b->wp += len-4; + b = putframe(ppp, Ppap, b); + netlog(ppp->f, Logppp, "PPP %s: sent pap auth req (%d)\n", ppp->ifc->dev, len); + freeblist(b); +} + +static void +getpap(PPP *ppp, Block *b) +{ + Lcpmsg *m; + int len; + + m = (Lcpmsg*)b->rp; + len = nhgets(m->len); + if(BLEN(b) < len){ + netlog(ppp->f, Logppp, "ppp %s: short pap message\n", ppp->ifc->dev); + freeblist(b); + return; + } + + switch(m->code){ + case Cpapreq: + netlog(ppp->f, Logppp, "PPP %s: pap request?\n", ppp->ifc->dev); + break; + case Cpapack: + netlog(ppp->f, Logppp, "PPP %s: PAP succeeded\n", ppp->ifc->dev); + setphase(ppp, Pnet); + break; + case Cpapnak: + if(m->data[0]) + netlog(ppp->f, Logppp, "PPP %s: PAP failed: %.*s\n", ppp->ifc->dev, len-5, m->data+1); + else + netlog(ppp->f, Logppp, "PPP %s: PAP failed\n", ppp->ifc->dev); + errlog(ppp, Eperm); + break; + default: + netlog(ppp->f, Logppp, "PPP %s: pap code %d?\n", ppp->ifc->dev, m->code); + break; + } + freeblist(b); +} + +static void +printopts(PPP *ppp, Pstate *p, Block *b, int send) +{ + Lcpmsg *m; + Lcpopt *o; + int proto, x, period; + uchar *cp; + char *code, *dir; + + m = (Lcpmsg*)b->rp; + switch(m->code) { + default: code = "<unknown>"; break; + case Lconfreq: code = "confrequest"; break; + case Lconfack: code = "confack"; break; + case Lconfnak: code = "confnak"; break; + case Lconfrej: code = "confreject"; break; + } + + if(send) + dir = "send"; + else + dir = "recv"; + + netlog(ppp->f, Logppp, "ppp: %s %s: id=%d\n", dir, code, m->id); + + for(cp = m->data; cp < b->wp; cp += o->len){ + o = (Lcpopt*)cp; + if(cp + o->len > b->wp || o->len == 0){ + netlog(ppp->f, Logppp, "\tbad option length %ux\n", o->type); + return; + } + + switch(p->proto){ + case Plcp: + switch(o->type){ + default: + netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len); + break; + case Omtu: + netlog(ppp->f, Logppp, "\tmtu = %d\n", nhgets(o->data)); + break; + case Octlmap: + netlog(ppp->f, Logppp, "\tctlmap = %ux\n", nhgetl(o->data)); + break; + case Oauth: + netlog(ppp->f, Logppp, "\tauth = ", nhgetl(o->data)); + proto = nhgets(o->data); + switch(proto) { + default: + netlog(ppp->f, Logppp, "unknown auth proto %d\n", proto); + break; + case Ppap: + netlog(ppp->f, Logppp, "password\n"); + break; + case Pchap: + netlog(ppp->f, Logppp, "chap %ux\n", o->data[2]); + break; + } + break; + case Oquality: + proto = nhgets(o->data); + switch(proto) { + default: + netlog(ppp->f, Logppp, "\tunknown quality proto %d\n", proto); + break; + case Plqm: + x = nhgetl(o->data+2)*10; + period = (x+Period-1)/Period; + netlog(ppp->f, Logppp, "\tlqm period = %d\n", period); + break; + } + case Omagic: + netlog(ppp->f, Logppp, "\tmagic = %ux\n", nhgetl(o->data)); + break; + case Opc: + netlog(ppp->f, Logppp, "\tprotocol compress\n"); + break; + case Oac: + netlog(ppp->f, Logppp, "\taddr compress\n"); + break; + } + break; + case Pccp: + switch(o->type){ + default: + netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len); + break; + case Ocoui: + netlog(ppp->f, Logppp, "\tOUI\n"); + break; + case Ocstac: + netlog(ppp->f, Logppp, "\tstac LZS\n"); + break; + case Ocmppc: + netlog(ppp->f, Logppp, "\tMicrosoft PPC len=%d %ux\n", o->len, nhgetl(o->data)); + break; + } + break; + case Pecp: + switch(o->type){ + default: + netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len); + break; + case Oeoui: + netlog(ppp->f, Logppp, "\tOUI\n"); + break; + case Oedese: + netlog(ppp->f, Logppp, "\tDES\n"); + break; + } + break; + case Pipcp: + switch(o->type){ + default: + netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len); + break; + case Oipaddrs: + netlog(ppp->f, Logppp, "\tip addrs - deprecated\n"); + break; + case Oipcompress: + netlog(ppp->f, Logppp, "\tip compress\n"); + break; + case Oipaddr: + netlog(ppp->f, Logppp, "\tip addr %V\n", o->data); + break; + case Oipdns: + netlog(ppp->f, Logppp, "\tdns addr %V\n", o->data); + break; + case Oipwins: + netlog(ppp->f, Logppp, "\twins addr %V\n", o->data); + break; + case Oipdns2: + netlog(ppp->f, Logppp, "\tdns2 addr %V\n", o->data); + break; + case Oipwins2: + netlog(ppp->f, Logppp, "\twins2 addr %V\n", o->data); + break; + } + break; + } + } +} + +static void +sendtermreq(PPP *ppp, Pstate *p) +{ + Block *b; + Lcpmsg *m; + + p->termid = ++(p->id); + b = alloclcp(Ltermreq, p->termid, 4); + m = IPB2LCP(b); + hnputs(m->len, 4); + putframe(ppp, p->proto, b); + freeb(b); + newstate(ppp, p, Sclosing); +} + +static void +sendechoreq(PPP *ppp, Pstate *p) +{ + Block *b; + Lcpmsg *m; + + p->termid = ++(p->id); + b = alloclcp(Lechoreq, p->id, 4); + m = IPB2LCP(b); + hnputs(m->len, 4); + putframe(ppp, p->proto, b); + freeb(b); +} + +/* + * return non-zero if this is a valid v4 address + */ +static int +validv4(Ipaddr addr) +{ + return memcmp(addr, v4prefix, IPv4off) == 0; +} + +static void +invalidate(Ipaddr addr) +{ + ipmove(addr, IPnoaddr); +} diff --git a/os/ip/ppp.h b/os/ip/ppp.h new file mode 100644 index 00000000..4f1e00de --- /dev/null +++ b/os/ip/ppp.h @@ -0,0 +1,258 @@ +typedef struct PPP PPP; +typedef struct Pstate Pstate; +typedef struct Lcpmsg Lcpmsg; +typedef struct Lcpopt Lcpopt; +typedef struct Qualpkt Qualpkt; +typedef struct Qualstats Qualstats; +typedef struct Tcpc Tcpc; + +typedef uchar Ipaddr[IPaddrlen]; + +enum +{ + HDLC_frame= 0x7e, + HDLC_esc= 0x7d, + + /* PPP frame fields */ + PPP_addr= 0xff, + PPP_ctl= 0x3, + PPP_initfcs= 0xffff, + PPP_goodfcs= 0xf0b8, + + /* PPP phases */ + Pdead= 0, + Plink, /* doing LCP */ + Pauth, /* doing chap */ + Pnet, /* doing IPCP, CCP */ + Pterm, /* closing down */ + + /* PPP protocol types */ + Pip= 0x21, /* internet */ + Pvjctcp= 0x2d, /* compressing van jacobson tcp */ + Pvjutcp= 0x2f, /* uncompressing van jacobson tcp */ + Pcdata= 0xfd, /* compressed datagram */ + Pipcp= 0x8021, /* ip control */ + Pecp= 0x8053, /* encryption control */ + Pccp= 0x80fd, /* compressed datagram control */ + Plcp= 0xc021, /* link control */ + Ppap= 0xc023, /* password auth. protocol */ + Plqm= 0xc025, /* link quality monitoring */ + Pchap= 0xc223, /* challenge/response */ + + /* LCP codes */ + Lconfreq= 1, + Lconfack= 2, + Lconfnak= 3, + Lconfrej= 4, + Ltermreq= 5, + Ltermack= 6, + Lcoderej= 7, + Lprotorej= 8, + Lechoreq= 9, + Lechoack= 10, + Ldiscard= 11, + + /* Lcp configure options */ + Omtu= 1, + Octlmap= 2, + Oauth= 3, + Oquality= 4, + Omagic= 5, + Opc= 7, + Oac= 8, + Obad= 12, /* for testing */ + + /* authentication protocols */ + APmd5= 5, + + /* lcp flags */ + Fmtu= 1<<Omtu, + Fctlmap= 1<<Octlmap, + Fauth= 1<<Oauth, + Fquality= 1<<Oquality, + Fmagic= 1<<Omagic, + Fpc= 1<<Opc, + Fac= 1<<Oac, + Fbad= 1<<Obad, + + /* Chap codes */ + Cchallenge= 1, + Cresponse= 2, + Csuccess= 3, + Cfailure= 4, + + /* Pap codes */ + Cpapreq= 1, + Cpapack= 2, + Cpapnak= 3, + + /* link states */ + Sclosed= 0, + Sclosing, + Sreqsent, + Sackrcvd, + Sacksent, + Sopened, + + /* ccp configure options */ + Ocoui= 0, /* proprietary compression */ + Ocstac= 17, /* stac electronics LZS */ + Ocmppc= 18, /* microsoft ppc */ + + /* ccp flags */ + Fcoui= 1<<Ocoui, + Fcstac= 1<<Ocstac, + Fcmppc= 1<<Ocmppc, + + /* ecp configure options */ + Oeoui= 0, /* proprietary compression */ + Oedese= 1, /* DES */ + + /* ecp flags */ + Feoui= 1<<Oeoui, + Fedese= 1<<Oedese, + + /* ipcp configure options */ + Oipaddrs= 1, + Oipcompress= 2, + Oipaddr= 3, + Oipdns= 129, + Oipwins= 130, + Oipdns2= 131, + Oipwins2= 132, + + /* ipcp flags */ + Fipaddrs= 1<<Oipaddrs, + Fipcompress= 1<<Oipcompress, + Fipaddr= 1<<Oipaddr, + + Period= 3*1000, /* period of retransmit process (in ms) */ + Timeout= 10, /* xmit timeout (in Periods) */ + + MAX_STATES = 16, /* van jacobson compression states */ + Defmtu= 1450, /* default that we will ask for */ + Minmtu= 128, /* minimum that we will accept */ + Maxmtu= 2000, /* maximum that we will accept */ +}; + + +struct Pstate +{ + int proto; /* protocol type */ + int timeout; /* for current state */ + int rxtimeout; /* for current retransmit */ + ulong flags; /* options received */ + uchar id; /* id of current message */ + uchar confid; /* id of current config message */ + uchar termid; /* id of current termination message */ + uchar rcvdconfid; /* id of last conf message received */ + uchar state; /* PPP link state */ + ulong optmask; /* which options to request */ + int echoack; /* recieved echo ack */ + int echotimeout; /* echo timeout */ +}; + +struct Qualstats +{ + ulong reports; + ulong packets; + ulong bytes; + ulong discards; + ulong errors; +}; + +struct PPP +{ + QLock; + + Chan* dchan; /* serial line */ + Chan* cchan; /* serial line control */ + int framing; /* non-zero to use framing characters */ + Ipaddr local; + int localfrozen; + Ipaddr remote; + int remotefrozen; + + int pppup; + Fs *f; /* file system we belong to */ + Ipifc* ifc; + Proc* readp; /* reading process */ + Proc* timep; /* timer process */ + Block* inbuf; /* input buffer */ + Block* outbuf; /* output buffer */ + QLock outlock; /* and its lock */ + + ulong magic; /* magic number to detect loop backs */ + ulong rctlmap; /* map of chars to ignore in rcvr */ + ulong xctlmap; /* map of chars to excape in xmit */ + int phase; /* PPP phase */ + Pstate* lcp; /* lcp state */ + Pstate* ipcp; /* ipcp state */ + char secret[256]; /* md5 key */ + char chapname[256]; /* chap system name */ + Tcpc* ctcp; + ulong mtu; /* maximum xmit size */ + ulong mru; /* maximum recv size */ + + int baud; + int usepap; /* authentication is PAP in every sense, not CHAP */ + int papid; + int usechap; + + /* rfc */ + int usedns; + Ipaddr dns1; + Ipaddr dns2; + + /* link quality monitoring */ + int period; /* lqm period */ + int timeout; /* time to next lqm packet */ + Qualstats in; /* local */ + Qualstats out; + Qualstats pin; /* peer */ + Qualstats pout; + Qualstats sin; /* saved */ +}; + +PPP* pppopen(PPP*, char*, Ipaddr, Ipaddr, int, int, char*, char*); +Block* pppread(PPP*); +int pppwrite(PPP*, Block*); +void pppclose(PPP*); + +struct Lcpmsg +{ + uchar code; + uchar id; + uchar len[2]; + uchar data[1]; +}; + +struct Lcpopt +{ + uchar type; + uchar len; + uchar data[1]; +}; + +struct Qualpkt +{ + uchar magic[4]; + + uchar lastoutreports[4]; + uchar lastoutpackets[4]; + uchar lastoutbytes[4]; + uchar peerinreports[4]; + uchar peerinpackets[4]; + uchar peerindiscards[4]; + uchar peerinerrors[4]; + uchar peerinbytes[4]; + uchar peeroutreports[4]; + uchar peeroutpackets[4]; + uchar peeroutbytes[4]; +}; + +ushort compress(Tcpc*, Block*, Fs*); +Tcpc* compress_init(Tcpc*); +int compress_negotiate(Tcpc*, uchar*); +ushort tcpcompress(Tcpc*, Block*, Fs*); +Block* tcpuncompress(Tcpc*, Block*, ushort, Fs*); diff --git a/os/ip/pppmedium.c b/os/ip/pppmedium.c new file mode 100644 index 00000000..2354728a --- /dev/null +++ b/os/ip/pppmedium.c @@ -0,0 +1,192 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "kernel.h" +#include "ppp.h" + +static void pppreader(void *a); +static void pppbind(Ipifc *ifc, int argc, char **argv); +static void pppunbind(Ipifc *ifc); +static void pppbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void deadremote(Ipifc *ifc); + +Medium pppmedium = +{ +.name= "ppp", +.hsize= 4, +.mintu= Minmtu, +.maxtu= Maxmtu, +.maclen= 0, +.bind= pppbind, +.unbind= pppunbind, +.bwrite= pppbwrite, +.unbindonclose= 0, /* don't unbind on last close */ +}; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +pppbind(Ipifc *ifc, int argc, char **argv) +{ + PPP *ppp; + Ipaddr ipaddr, remip; + int mtu, framing; + char *chapname, *secret; + + if(argc < 3) + error(Ebadarg); + + ipmove(ipaddr, IPnoaddr); + ipmove(remip, IPnoaddr); + mtu = Defmtu; + framing = 1; + chapname = nil; + secret = nil; + + switch(argc){ + default: + case 9: + if(argv[8][0] != '-') + secret = argv[8]; + case 8: + if(argv[7][0] != '-') + chapname = argv[7]; + case 7: + if(argv[6][0] != '-') + framing = strtoul(argv[6], 0, 0); + case 6: + if(argv[5][0] != '-') + mtu = strtoul(argv[5], 0, 0); + case 5: + if(argv[4][0] != '-') + parseip(remip, argv[4]); + case 4: + if(argv[3][0] != '-') + parseip(ipaddr, argv[3]); + case 3: + break; + } + + ppp = smalloc(sizeof(*ppp)); + ppp->ifc = ifc; + ppp->f = ifc->conv->p->f; + ifc->arg = ppp; + if(waserror()){ + pppunbind(ifc); + nexterror(); + } + if(pppopen(ppp, argv[2], ipaddr, remip, mtu, framing, chapname, secret) == nil) + error("ppp open failed"); + poperror(); + kproc("pppreader", pppreader, ifc, KPDUPPG|KPDUPFDG); +} + +static void +pppreader(void *a) +{ + Ipifc *ifc; + Block *bp; + PPP *ppp; + + ifc = a; + ppp = ifc->arg; + ppp->readp = up; /* hide identity under a rock for unbind */ + setpri(PriHi); + + if(waserror()){ + netlog(ppp->f, Logppp, "pppreader: %I: %s\n", ppp->local, up->env->errstr); + ppp->readp = 0; + deadremote(ifc); + pexit("hangup", 1); + } + + for(;;){ + bp = pppread(ppp); + if(bp == nil) + error("hungup"); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput(ppp->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +/* + * called with ifc wlock'd + */ +static void +pppunbind(Ipifc *ifc) +{ + PPP *ppp = ifc->arg; + + if(ppp == nil) + return; + if(ppp->readp) + postnote(ppp->readp, 1, "unbind", 0); + if(ppp->timep) + postnote(ppp->timep, 1, "unbind", 0); + + /* wait for kprocs to die */ + while(ppp->readp != 0 || ppp->timep != 0) + tsleep(&up->sleep, return0, 0, 300); + + pppclose(ppp); + qclose(ifc->conv->eq); + ifc->arg = nil; +} + +/* + * called by ipoput with a single packet to write with ifc rlock'd + */ +static void +pppbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + PPP *ppp = ifc->arg; + + pppwrite(ppp, bp); + ifc->out++; +} + +/* + * If the other end hangs up, we have to unbind the interface. An extra + * unbind (in the case where we are hanging up) won't do any harm. + */ +static void +deadremote(Ipifc *ifc) +{ + int fd; + char path[128]; + PPP *ppp; + + ppp = ifc->arg; + snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ifc->conv->x); + fd = kopen(path, ORDWR); + if(fd < 0) + return; + kwrite(fd, "unbind", sizeof("unbind")-1); + kclose(fd); +} + +void +pppmediumlink(void) +{ + addipmedium(&pppmedium); +} diff --git a/os/ip/ptclbsum.c b/os/ip/ptclbsum.c new file mode 100644 index 00000000..4b895ecf --- /dev/null +++ b/os/ip/ptclbsum.c @@ -0,0 +1,72 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" + +static short endian = 1; +static uchar* aendian = (uchar*)&endian; +#define LITTLE *aendian + +ushort +ptclbsum(uchar *addr, int len) +{ + ulong losum, hisum, mdsum, x; + ulong t1, t2; + + losum = 0; + hisum = 0; + mdsum = 0; + + x = 0; + if((ulong)addr & 1) { + if(len) { + hisum += addr[0]; + len--; + addr++; + } + x = 1; + } + while(len >= 16) { + t1 = *(ushort*)(addr+0); + t2 = *(ushort*)(addr+2); mdsum += t1; + t1 = *(ushort*)(addr+4); mdsum += t2; + t2 = *(ushort*)(addr+6); mdsum += t1; + t1 = *(ushort*)(addr+8); mdsum += t2; + t2 = *(ushort*)(addr+10); mdsum += t1; + t1 = *(ushort*)(addr+12); mdsum += t2; + t2 = *(ushort*)(addr+14); mdsum += t1; + mdsum += t2; + len -= 16; + addr += 16; + } + while(len >= 2) { + mdsum += *(ushort*)addr; + len -= 2; + addr += 2; + } + if(x) { + if(len) + losum += addr[0]; + if(LITTLE) + losum += mdsum; + else + hisum += mdsum; + } else { + if(len) + hisum += addr[0]; + if(LITTLE) + hisum += mdsum; + else + losum += mdsum; + } + + losum += hisum >> 8; + losum += (hisum & 0xff) << 8; + while(hisum = losum>>16) + losum = hisum + (losum & 0xffff); + + return losum & 0xffff; +} diff --git a/os/ip/rudp.c b/os/ip/rudp.c new file mode 100644 index 00000000..ce431333 --- /dev/null +++ b/os/ip/rudp.c @@ -0,0 +1,1085 @@ +/* + * This protocol is compatible with UDP's packet format. + * It could be done over UDP if need be. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +#define DEBUG 0 +#define DPRINT if(DEBUG)print + +#define SEQDIFF(a,b) ( (a)>=(b)?\ + (a)-(b):\ + 0xffffffffUL-((b)-(a)) ) +#define INSEQ(a,start,end) ( (start)<=(end)?\ + ((a)>(start)&&(a)<=(end)):\ + ((a)>(start)||(a)<=(end)) ) +#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd) +#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 ) + +enum +{ + UDP_HDRSIZE = 20, /* pseudo header + udp header */ + UDP_PHDRSIZE = 12, /* pseudo header */ + UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */ + UDP_IPHDR = 8, /* ip header */ + IP_UDPPROTO = 254, + UDP_USEAD7 = 52, + UDP_USEAD6 = 36, + UDP_USEAD4 = 12, + + Rudprxms = 200, + Rudptickms = 50, + Rudpmaxxmit = 10, + Maxunacked = 100, + +}; + +#define Hangupgen 0xffffffff /* used only in hangup messages */ + +typedef struct Udphdr Udphdr; +struct Udphdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + + /* pseudo header starts here */ + uchar Unused; + uchar udpproto; /* Protocol */ + uchar udpplen[2]; /* Header plus data length */ + uchar udpsrc[4]; /* Ip source */ + uchar udpdst[4]; /* Ip destination */ + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +typedef struct Rudphdr Rudphdr; +struct Rudphdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + + /* pseudo header starts here */ + uchar Unused; + uchar udpproto; /* Protocol */ + uchar udpplen[2]; /* Header plus data length */ + uchar udpsrc[4]; /* Ip source */ + uchar udpdst[4]; /* Ip destination */ + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length (includes rudp header) */ + uchar udpcksum[2]; /* Checksum */ + + /* rudp header */ + uchar relseq[4]; /* id of this packet (or 0) */ + uchar relsgen[4]; /* generation/time stamp */ + uchar relack[4]; /* packet being acked (or 0) */ + uchar relagen[4]; /* generation/time stamp */ +}; + + +/* + * one state structure per destination + */ +typedef struct Reliable Reliable; +struct Reliable +{ + Ref; + + Reliable *next; + + uchar addr[IPaddrlen]; /* always V6 when put here */ + ushort port; + + Block *unacked; /* unacked msg list */ + Block *unackedtail; /* and its tail */ + + int timeout; /* time since first unacked msg sent */ + int xmits; /* number of times first unacked msg sent */ + + ulong sndseq; /* next packet to be sent */ + ulong sndgen; /* and its generation */ + + ulong rcvseq; /* last packet received */ + ulong rcvgen; /* and its generation */ + + ulong acksent; /* last ack sent */ + ulong ackrcvd; /* last msg for which ack was rcvd */ + + /* flow control */ + QLock lock; + Rendez vous; + int blocked; +}; + + + +/* MIB II counters */ +typedef struct Rudpstats Rudpstats; +struct Rudpstats +{ + ulong rudpInDatagrams; + ulong rudpNoPorts; + ulong rudpInErrors; + ulong rudpOutDatagrams; +}; + +typedef struct Rudppriv Rudppriv; +struct Rudppriv +{ + Ipht ht; + + /* MIB counters */ + Rudpstats ustats; + + /* non-MIB stats */ + ulong csumerr; /* checksum errors */ + ulong lenerr; /* short packet */ + ulong rxmits; /* # of retransmissions */ + ulong orders; /* # of out of order pkts */ + + /* keeping track of the ack kproc */ + int ackprocstarted; + QLock apl; +}; + + +static ulong generation = 0; +static Rendez rend; + +/* + * protocol specific part of Conv + */ +typedef struct Rudpcb Rudpcb; +struct Rudpcb +{ + QLock; + uchar headers; + uchar randdrop; + Reliable *r; +}; + +/* + * local functions + */ +void relsendack(Conv*, Reliable*, int); +int reliput(Conv*, Block*, uchar*, ushort); +Reliable *relstate(Rudpcb*, uchar*, ushort, char*); +void relput(Reliable*); +void relforget(Conv *, uchar*, int, int); +void relackproc(void *); +void relackq(Reliable *, Block*); +void relhangup(Conv *, Reliable*); +void relrexmit(Conv *, Reliable*); +void relput(Reliable*); +void rudpkick(void *x); + +static void +rudpstartackproc(Proto *rudp) +{ + Rudppriv *rpriv; + char kpname[KNAMELEN]; + + rpriv = rudp->priv; + if(rpriv->ackprocstarted == 0){ + qlock(&rpriv->apl); + if(rpriv->ackprocstarted == 0){ + sprint(kpname, "#I%drudpack", rudp->f->dev); + kproc(kpname, relackproc, rudp); + rpriv->ackprocstarted = 1; + } + qunlock(&rpriv->apl); + } +} + +static char* +rudpconnect(Conv *c, char **argv, int argc) +{ + char *e; + Rudppriv *upriv; + + upriv = c->p->priv; + rudpstartackproc(c->p); + e = Fsstdconnect(c, argv, argc); + Fsconnected(c, e); + iphtadd(&upriv->ht, c); + + return e; +} + + +static int +rudpstate(Conv *c, char *state, int n) +{ + Rudpcb *ucb; + Reliable *r; + int m; + + m = snprint(state, n, "%s", c->inuse?"Open":"Closed"); + ucb = (Rudpcb*)c->ptcl; + qlock(ucb); + for(r = ucb->r; r; r = r->next) + m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r)); + qunlock(ucb); + return m; +} + +static char* +rudpannounce(Conv *c, char** argv, int argc) +{ + char *e; + Rudppriv *upriv; + + upriv = c->p->priv; + rudpstartackproc(c->p); + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + iphtadd(&upriv->ht, c); + + return nil; +} + +static void +rudpcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, 0); + c->wq = qopen(64*1024, Qkick, rudpkick, c); +} + +static void +rudpclose(Conv *c) +{ + Rudpcb *ucb; + Reliable *r, *nr; + Rudppriv *upriv; + + upriv = c->p->priv; + iphtrem(&upriv->ht, c); + + /* force out any delayed acks */ + ucb = (Rudpcb*)c->ptcl; + qlock(ucb); + for(r = ucb->r; r; r = r->next){ + if(r->acksent != r->rcvseq) + relsendack(c, r, 0); + } + qunlock(ucb); + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; + + ucb->headers = 0; + ucb->randdrop = 0; + qlock(ucb); + for(r = ucb->r; r; r = nr){ + if(r->acksent != r->rcvseq) + relsendack(c, r, 0); + nr = r->next; + relhangup(c, r); + relput(r); + } + ucb->r = 0; + + qunlock(ucb); +} + +/* + * randomly don't send packets + */ +static void +doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos) +{ + Rudpcb *ucb; + + ucb = (Rudpcb*)c->ptcl; + if(ucb->randdrop && nrand(100) < ucb->randdrop) + freeblist(bp); + else + ipoput4(f, bp, x, ttl, tos, nil); +} + +int +flow(void *v) +{ + Reliable *r = v; + + return UNACKED(r) <= Maxunacked; +} + +void +rudpkick(void *x) +{ + Conv *c = x; + Udphdr *uh; + ushort rport; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Block *bp; + Rudpcb *ucb; + Rudphdr *rh; + Reliable *r; + int dlen, ptcllen; + Rudppriv *upriv; + Fs *f; + + upriv = c->p->priv; + f = c->p->f; + + netlog(c->p->f, Logrudp, "rudp: kick\n"); + bp = qget(c->wq); + if(bp == nil) + return; + + ucb = (Rudpcb*)c->ptcl; + switch(ucb->headers) { + case 7: + /* get user specified addresses */ + bp = pullupblock(bp, UDP_USEAD7); + if(bp == nil) + return; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + /* pick interface closest to dest */ + if(ipforme(f, laddr) != Runi) + findlocalip(f, laddr, raddr); + bp->rp += IPaddrlen; /* Ignore ifc address */ + rport = nhgets(bp->rp); + bp->rp += 2+2; /* Ignore local port */ + break; + case 6: + /* get user specified addresses */ + bp = pullupblock(bp, UDP_USEAD6); + if(bp == nil) + return; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + /* pick interface closest to dest */ + if(ipforme(f, laddr) != Runi) + findlocalip(f, laddr, raddr); + rport = nhgets(bp->rp); + + bp->rp += 4; /* Igonore local port */ + break; + default: + ipmove(raddr, c->raddr); + ipmove(laddr, c->laddr); + rport = c->rport; + + break; + } + + dlen = blocklen(bp); + + /* Make space to fit rudp & ip header */ + bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE); + if(bp == nil) + return; + + uh = (Udphdr *)(bp->rp); + uh->vihl = IP_VER4; + + rh = (Rudphdr*)uh; + + ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE); + uh->Unused = 0; + uh->udpproto = IP_UDPPROTO; + uh->frag[0] = 0; + uh->frag[1] = 0; + hnputs(uh->udpplen, ptcllen); + switch(ucb->headers){ + case 6: + case 7: + v6tov4(uh->udpdst, raddr); + hnputs(uh->udpdport, rport); + v6tov4(uh->udpsrc, laddr); + break; + default: + v6tov4(uh->udpdst, c->raddr); + hnputs(uh->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + v6tov4(uh->udpsrc, c->laddr); + break; + } + hnputs(uh->udpsport, c->lport); + hnputs(uh->udplen, ptcllen); + uh->udpcksum[0] = 0; + uh->udpcksum[1] = 0; + + qlock(ucb); + r = relstate(ucb, raddr, rport, "kick"); + r->sndseq = NEXTSEQ(r->sndseq); + hnputl(rh->relseq, r->sndseq); + hnputl(rh->relsgen, r->sndgen); + + hnputl(rh->relack, r->rcvseq); /* ACK last rcvd packet */ + hnputl(rh->relagen, r->rcvgen); + + if(r->rcvseq != r->acksent) + r->acksent = r->rcvseq; + + hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE)); + + relackq(r, bp); + qunlock(ucb); + + upriv->ustats.rudpOutDatagrams++; + + DPRINT("sent: %lud/%lud, %lud/%lud\n", + r->sndseq, r->sndgen, r->rcvseq, r->rcvgen); + + doipoput(c, f, bp, 0, c->ttl, c->tos); + + if(waserror()) { + relput(r); + qunlock(&r->lock); + nexterror(); + } + + /* flow control of sorts */ + qlock(&r->lock); + if(UNACKED(r) > Maxunacked){ + r->blocked = 1; + sleep(&r->vous, flow, r); + r->blocked = 0; + } + + qunlock(&r->lock); + relput(r); + poperror(); +} + +void +rudpiput(Proto *rudp, Ipifc *ifc, Block *bp) +{ + int len, olen, ottl; + Udphdr *uh; + Conv *c; + Rudpcb *ucb; + uchar raddr[IPaddrlen], laddr[IPaddrlen]; + ushort rport, lport; + Rudppriv *upriv; + Fs *f; + uchar *p; + + upriv = rudp->priv; + f = rudp->f; + + upriv->ustats.rudpInDatagrams++; + + uh = (Udphdr*)(bp->rp); + + /* Put back pseudo header for checksum + * (remember old values for icmpnoconv()) + */ + ottl = uh->Unused; + uh->Unused = 0; + len = nhgets(uh->udplen); + olen = nhgets(uh->udpplen); + hnputs(uh->udpplen, len); + + v4tov6(raddr, uh->udpsrc); + v4tov6(laddr, uh->udpdst); + lport = nhgets(uh->udpdport); + rport = nhgets(uh->udpsport); + + if(nhgets(uh->udpcksum)) { + if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) { + upriv->ustats.rudpInErrors++; + upriv->csumerr++; + netlog(f, Logrudp, "rudp: checksum error %I\n", raddr); + DPRINT("rudp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + } + + qlock(rudp); + + c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); + if(c == nil){ + /* no converstation found */ + upriv->ustats.rudpNoPorts++; + qunlock(rudp); + netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport, + laddr, lport); + uh->Unused = ottl; + hnputs(uh->udpplen, olen); + icmpnoconv(f, bp); + freeblist(bp); + return; + } + ucb = (Rudpcb*)c->ptcl; + qlock(ucb); + qunlock(rudp); + + if(reliput(c, bp, raddr, rport) < 0){ + qunlock(ucb); + freeb(bp); + return; + } + + /* + * Trim the packet down to data size + */ + + len -= (UDP_RHDRSIZE-UDP_PHDRSIZE); + bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len); + if(bp == nil) { + netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n", + raddr, rport, laddr, lport); + DPRINT("rudp: len err %I.%d -> %I.%d\n", + raddr, rport, laddr, lport); + upriv->lenerr++; + return; + } + + netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n", + raddr, rport, laddr, lport, len); + + switch(ucb->headers){ + case 7: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD7); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, laddr); p += IPaddrlen; + ipmove(p, ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + case 6: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD6); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + default: + /* connection oriented rudp */ + if(ipcmp(c->raddr, IPnoaddr) == 0){ + /* save the src address in the conversation */ + ipmove(c->raddr, raddr); + c->rport = rport; + + /* reply with the same ip address (if not broadcast) */ + if(ipforme(f, laddr) == Runi) + ipmove(c->laddr, laddr); + else + v4tov6(c->laddr, ifc->lifc->local); + } + break; + } + if(bp->next) + bp = concatblock(bp); + + if(qfull(c->rq)) { + netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + freeblist(bp); + } + else + qpass(c->rq, bp); + + qunlock(ucb); +} + +static char *rudpunknown = "unknown rudp ctl request"; + +char* +rudpctl(Conv *c, char **f, int n) +{ + Rudpcb *ucb; + uchar ip[IPaddrlen]; + int x; + + ucb = (Rudpcb*)c->ptcl; + if(n < 1) + return rudpunknown; + + if(strcmp(f[0], "headers++4") == 0){ + ucb->headers = 7; + return nil; + } else if(strcmp(f[0], "headers") == 0){ + ucb->headers = 6; + return nil; + } else if(strcmp(f[0], "hangup") == 0){ + if(n < 3) + return "bad syntax"; + parseip(ip, f[1]); + x = atoi(f[2]); + qlock(ucb); + relforget(c, ip, x, 1); + qunlock(ucb); + return nil; + } else if(strcmp(f[0], "randdrop") == 0){ + x = 10; /* default is 10% */ + if(n > 1) + x = atoi(f[1]); + if(x > 100 || x < 0) + return "illegal rudp drop rate"; + ucb->randdrop = x; + return nil; + } + return rudpunknown; +} + +void +rudpadvise(Proto *rudp, Block *bp, char *msg) +{ + Udphdr *h; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + + h = (Udphdr*)(bp->rp); + + v4tov6(dest, h->udpdst); + v4tov6(source, h->udpsrc); + psource = nhgets(h->udpsport); + pdest = nhgets(h->udpdport); + + /* Look for a connection */ + for(p = rudp->conv; *p; p++) { + s = *p; + if(s->rport == pdest) + if(s->lport == psource) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +int +rudpstats(Proto *rudp, char *buf, int len) +{ + Rudppriv *upriv; + + upriv = rudp->priv; + return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n", + upriv->ustats.rudpInDatagrams, + upriv->ustats.rudpNoPorts, + upriv->ustats.rudpInErrors, + upriv->ustats.rudpOutDatagrams, + upriv->rxmits, + upriv->orders); +} + +void +rudpinit(Fs *fs) +{ + + Proto *rudp; + + rudp = smalloc(sizeof(Proto)); + rudp->priv = smalloc(sizeof(Rudppriv)); + rudp->name = "rudp"; + rudp->connect = rudpconnect; + rudp->announce = rudpannounce; + rudp->ctl = rudpctl; + rudp->state = rudpstate; + rudp->create = rudpcreate; + rudp->close = rudpclose; + rudp->rcv = rudpiput; + rudp->advise = rudpadvise; + rudp->stats = rudpstats; + rudp->ipproto = IP_UDPPROTO; + rudp->nc = 16; + rudp->ptclsize = sizeof(Rudpcb); + + Fsproto(fs, rudp); +} + +/*********************************************/ +/* Here starts the reliable helper functions */ +/*********************************************/ +/* + * Enqueue a copy of an unacked block for possible retransmissions + */ +void +relackq(Reliable *r, Block *bp) +{ + Block *np; + + np = copyblock(bp, blocklen(bp)); + if(r->unacked) + r->unackedtail->list = np; + else { + /* restart timer */ + r->timeout = 0; + r->xmits = 1; + r->unacked = np; + } + r->unackedtail = np; + np->list = nil; +} + +/* + * retransmit unacked blocks + */ +void +relackproc(void *a) +{ + Rudpcb *ucb; + Proto *rudp; + Reliable *r; + Conv **s, *c; + + rudp = (Proto *)a; + +loop: + tsleep(&up->sleep, return0, 0, Rudptickms); + + for(s = rudp->conv; *s; s++) { + c = *s; + ucb = (Rudpcb*)c->ptcl; + qlock(ucb); + + for(r = ucb->r; r; r = r->next) { + if(r->unacked != nil){ + r->timeout += Rudptickms; + if(r->timeout > Rudprxms*r->xmits) + relrexmit(c, r); + } + if(r->acksent != r->rcvseq) + relsendack(c, r, 0); + } + qunlock(ucb); + } + goto loop; +} + +/* + * get the state record for a conversation + */ +Reliable* +relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from) +{ + Reliable *r, **l; + + l = &ucb->r; + for(r = *l; r; r = *l){ + if(memcmp(addr, r->addr, IPaddrlen) == 0 && + port == r->port) + break; + l = &r->next; + } + + /* no state for this addr/port, create some */ + if(r == nil){ + while(generation == 0) + generation = rand(); + + DPRINT("from %s new state %lud for %I!%ud\n", + from, generation, addr, port); + + r = smalloc(sizeof(Reliable)); + memmove(r->addr, addr, IPaddrlen); + r->port = port; + r->unacked = 0; + if(generation == Hangupgen) + generation++; + r->sndgen = generation++; + r->sndseq = 0; + r->ackrcvd = 0; + r->rcvgen = 0; + r->rcvseq = 0; + r->acksent = 0; + r->xmits = 0; + r->timeout = 0; + r->ref = 0; + incref(r); /* one reference for being in the list */ + + *l = r; + } + + incref(r); + return r; +} + +void +relput(Reliable *r) +{ + if(decref(r) == 0) + free(r); +} + +/* + * forget a Reliable state + */ +void +relforget(Conv *c, uchar *ip, int port, int originator) +{ + Rudpcb *ucb; + Reliable *r, **l; + + ucb = (Rudpcb*)c->ptcl; + + l = &ucb->r; + for(r = *l; r; r = *l){ + if(ipcmp(ip, r->addr) == 0 && port == r->port){ + *l = r->next; + if(originator) + relsendack(c, r, 1); + relhangup(c, r); + relput(r); /* remove from the list */ + break; + } + l = &r->next; + } +} + +/* + * process a rcvd reliable packet. return -1 if not to be passed to user process, + * 0 therwise. + * + * called with ucb locked. + */ +int +reliput(Conv *c, Block *bp, uchar *addr, ushort port) +{ + Block *nbp; + Rudpcb *ucb; + Rudppriv *upriv; + Udphdr *uh; + Reliable *r; + Rudphdr *rh; + ulong seq, ack, sgen, agen, ackreal; + int rv = -1; + + /* get fields */ + uh = (Udphdr*)(bp->rp); + rh = (Rudphdr*)uh; + seq = nhgetl(rh->relseq); + sgen = nhgetl(rh->relsgen); + ack = nhgetl(rh->relack); + agen = nhgetl(rh->relagen); + + upriv = c->p->priv; + ucb = (Rudpcb*)c->ptcl; + r = relstate(ucb, addr, port, "input"); + + DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n", + seq, sgen, ack, agen, r->sndgen); + + /* if acking an incorrect generation, ignore */ + if(ack && agen != r->sndgen) + goto out; + + /* Look for a hangup */ + if(sgen == Hangupgen) { + if(agen == r->sndgen) + relforget(c, addr, port, 0); + goto out; + } + + /* make sure we're not talking to a new remote side */ + if(r->rcvgen != sgen){ + if(seq != 0 && seq != 1) + goto out; + + /* new connection */ + if(r->rcvgen != 0){ + DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen); + relhangup(c, r); + } + r->rcvgen = sgen; + } + + /* dequeue acked packets */ + if(ack && agen == r->sndgen){ + ackreal = 0; + while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){ + nbp = r->unacked; + r->unacked = nbp->list; + DPRINT("%lud/%lud acked, r->sndgen = %lud\n", + ack, agen, r->sndgen); + freeb(nbp); + r->ackrcvd = NEXTSEQ(r->ackrcvd); + ackreal = 1; + } + + /* flow control */ + if(UNACKED(r) < Maxunacked/8 && r->blocked) + wakeup(&r->vous); + + /* + * retransmit next packet if the acked packet + * was transmitted more than once + */ + if(ackreal && r->unacked != nil){ + r->timeout = 0; + if(r->xmits > 1){ + r->xmits = 1; + relrexmit(c, r); + } + } + + } + + /* no message or input queue full */ + if(seq == 0 || qfull(c->rq)) + goto out; + + /* refuse out of order delivery */ + if(seq != NEXTSEQ(r->rcvseq)){ + relsendack(c, r, 0); /* tell him we got it already */ + upriv->orders++; + DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq)); + goto out; + } + r->rcvseq = seq; + + rv = 0; +out: + relput(r); + return rv; +} + +void +relsendack(Conv *c, Reliable *r, int hangup) +{ + Udphdr *uh; + Block *bp; + Rudphdr *rh; + int ptcllen; + Fs *f; + + bp = allocb(UDP_IPHDR + UDP_RHDRSIZE); + if(bp == nil) + return; + bp->wp += UDP_IPHDR + UDP_RHDRSIZE; + f = c->p->f; + uh = (Udphdr *)(bp->rp); + uh->vihl = IP_VER4; + rh = (Rudphdr*)uh; + + ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE); + uh->Unused = 0; + uh->udpproto = IP_UDPPROTO; + uh->frag[0] = 0; + uh->frag[1] = 0; + hnputs(uh->udpplen, ptcllen); + + v6tov4(uh->udpdst, r->addr); + hnputs(uh->udpdport, r->port); + hnputs(uh->udpsport, c->lport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + v6tov4(uh->udpsrc, c->laddr); + hnputs(uh->udplen, ptcllen); + + if(hangup) + hnputl(rh->relsgen, Hangupgen); + else + hnputl(rh->relsgen, r->sndgen); + hnputl(rh->relseq, 0); + hnputl(rh->relagen, r->rcvgen); + hnputl(rh->relack, r->rcvseq); + + if(r->acksent < r->rcvseq) + r->acksent = r->rcvseq; + + uh->udpcksum[0] = 0; + uh->udpcksum[1] = 0; + hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE)); + + DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen); + doipoput(c, f, bp, 0, c->ttl, c->tos); +} + + +/* + * called with ucb locked (and c locked if user initiated close) + */ +void +relhangup(Conv *c, Reliable *r) +{ + int n; + Block *bp; + char hup[ERRMAX]; + + n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port); + qproduce(c->eq, hup, n); + + /* + * dump any unacked outgoing messages + */ + for(bp = r->unacked; bp != nil; bp = r->unacked){ + r->unacked = bp->list; + bp->list = nil; + freeb(bp); + } + + r->rcvgen = 0; + r->rcvseq = 0; + r->acksent = 0; + if(generation == Hangupgen) + generation++; + r->sndgen = generation++; + r->sndseq = 0; + r->ackrcvd = 0; + r->xmits = 0; + r->timeout = 0; + wakeup(&r->vous); +} + +/* + * called with ucb locked + */ +void +relrexmit(Conv *c, Reliable *r) +{ + Rudppriv *upriv; + Block *np; + Fs *f; + + upriv = c->p->priv; + f = c->p->f; + r->timeout = 0; + if(r->xmits++ > Rudpmaxxmit){ + relhangup(c, r); + return; + } + + upriv->rxmits++; + np = copyblock(r->unacked, blocklen(r->unacked)); + DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1); + doipoput(c, f, np, 0, c->ttl, c->tos); +} diff --git a/os/ip/tcp.c b/os/ip/tcp.c new file mode 100644 index 00000000..c2bf7274 --- /dev/null +++ b/os/ip/tcp.c @@ -0,0 +1,3177 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + QMAX = 64*1024-1, + IP_TCPPROTO = 6, + + TCP4_IPLEN = 8, + TCP4_PHDRSIZE = 12, + TCP4_HDRSIZE = 20, + TCP4_TCBPHDRSZ = 40, + TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE, + + TCP6_IPLEN = 0, + TCP6_PHDRSIZE = 40, + TCP6_HDRSIZE = 20, + TCP6_TCBPHDRSZ = 60, + TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE, + + TcptimerOFF = 0, + TcptimerON = 1, + TcptimerDONE = 2, + MAX_TIME = (1<<20), /* Forever */ + TCP_ACK = 50, /* Timed ack sequence in ms */ + MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */ + + URG = 0x20, /* Data marked urgent */ + ACK = 0x10, /* Acknowledge is valid */ + PSH = 0x08, /* Whole data pipe is pushed */ + RST = 0x04, /* Reset connection */ + SYN = 0x02, /* Pkt. is synchronise */ + FIN = 0x01, /* Start close down */ + + EOLOPT = 0, + NOOPOPT = 1, + MSSOPT = 2, + MSS_LENGTH = 4, /* Mean segment size */ + WSOPT = 3, + WS_LENGTH = 3, /* Bits to scale window size by */ + MSL2 = 10, + MSPTICK = 50, /* Milliseconds per timer tick */ + DEF_MSS = 1460, /* Default mean segment */ + DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */ + DEF_RTT = 500, /* Default round trip */ + DEF_KAT = 120000, /* Default time (ms) between keep alives */ + TCP_LISTEN = 0, /* Listen connection */ + TCP_CONNECT = 1, /* Outgoing connection */ + SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */ + + TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */ + + FORCE = 1, + CLONE = 2, + RETRAN = 4, + ACTIVE = 8, + SYNACK = 16, + + LOGAGAIN = 3, + LOGDGAIN = 2, + + Closed = 0, /* Connection states */ + Listen, + Syn_sent, + Syn_received, + Established, + Finwait1, + Finwait2, + Close_wait, + Closing, + Last_ack, + Time_wait, + + Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */ + NLHT = 256, /* hash table size, must be a power of 2 */ + LHTMASK = NLHT-1, + + HaveWS = 1<<8, +}; + +/* Must correspond to the enumeration above */ +char *tcpstates[] = +{ + "Closed", "Listen", "Syn_sent", "Syn_received", + "Established", "Finwait1", "Finwait2", "Close_wait", + "Closing", "Last_ack", "Time_wait" +}; + +typedef struct Tcptimer Tcptimer; +struct Tcptimer +{ + Tcptimer *next; + Tcptimer *prev; + Tcptimer *readynext; + int state; + int start; + int count; + void (*func)(void*); + void *arg; +}; + +/* + * v4 and v6 pseudo headers used for + * checksuming tcp + */ +typedef struct Tcp4hdr Tcp4hdr; +struct Tcp4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar proto; + uchar tcplen[2]; + uchar tcpsrc[4]; + uchar tcpdst[4]; + uchar tcpsport[2]; + uchar tcpdport[2]; + uchar tcpseq[4]; + uchar tcpack[4]; + uchar tcpflag[2]; + uchar tcpwin[2]; + uchar tcpcksum[2]; + uchar tcpurg[2]; + /* Options segment */ + uchar tcpopt[1]; +}; + +typedef struct Tcp6hdr Tcp6hdr; +struct Tcp6hdr +{ + uchar vcf[4]; + uchar ploadlen[2]; + uchar proto; + uchar ttl; + uchar tcpsrc[IPaddrlen]; + uchar tcpdst[IPaddrlen]; + uchar tcpsport[2]; + uchar tcpdport[2]; + uchar tcpseq[4]; + uchar tcpack[4]; + uchar tcpflag[2]; + uchar tcpwin[2]; + uchar tcpcksum[2]; + uchar tcpurg[2]; + /* Options segment */ + uchar tcpopt[1]; +}; + +/* + * this represents the control info + * for a single packet. It is derived from + * a packet in ntohtcp{4,6}() and stuck into + * a packet in htontcp{4,6}(). + */ +typedef struct Tcp Tcp; +struct Tcp +{ + ushort source; + ushort dest; + ulong seq; + ulong ack; + uchar flags; + ushort ws; /* window scale option (if not zero) */ + ulong wnd; + ushort urg; + ushort mss; /* max segment size option (if not zero) */ + ushort len; /* size of data */ +}; + +/* + * this header is malloc'd to thread together fragments + * waiting to be coalesced + */ +typedef struct Reseq Reseq; +struct Reseq +{ + Reseq *next; + Tcp seg; + Block *bp; + ushort length; +}; + +/* + * the qlock in the Conv locks this structure + */ +typedef struct Tcpctl Tcpctl; +struct Tcpctl +{ + uchar state; /* Connection state */ + uchar type; /* Listening or active connection */ + uchar code; /* Icmp code */ + struct { + ulong una; /* Unacked data pointer */ + ulong nxt; /* Next sequence expected */ + ulong ptr; /* Data pointer */ + ulong wnd; /* Tcp send window */ + ulong urg; /* Urgent data pointer */ + ulong wl2; + int scale; /* how much to right shift window in xmitted packets */ + /* to implement tahoe and reno TCP */ + ulong dupacks; /* number of duplicate acks rcvd */ + int recovery; /* loss recovery flag */ + ulong rxt; /* right window marker for recovery */ + } snd; + struct { + ulong nxt; /* Receive pointer to next uchar slot */ + ulong wnd; /* Receive window incoming */ + ulong urg; /* Urgent pointer */ + int blocked; + int una; /* unacked data segs */ + int scale; /* how much to left shift window in rcved packets */ + } rcv; + ulong iss; /* Initial sequence number */ + int sawwsopt; /* true if we saw a wsopt on the incoming SYN */ + ulong cwind; /* Congestion window */ + int scale; /* desired snd.scale */ + ushort ssthresh; /* Slow start threshold */ + int resent; /* Bytes just resent */ + int irs; /* Initial received squence */ + ushort mss; /* Mean segment size */ + int rerecv; /* Overlap of data rerecevived */ + ulong window; /* Recevive window */ + uchar backoff; /* Exponential backoff counter */ + int backedoff; /* ms we've backed off for rexmits */ + uchar flags; /* State flags */ + Reseq *reseq; /* Resequencing queue */ + Tcptimer timer; /* Activity timer */ + Tcptimer acktimer; /* Acknowledge timer */ + Tcptimer rtt_timer; /* Round trip timer */ + Tcptimer katimer; /* keep alive timer */ + ulong rttseq; /* Round trip sequence */ + int srtt; /* Shortened round trip */ + int mdev; /* Mean deviation of round trip */ + int kacounter; /* count down for keep alive */ + uint sndsyntime; /* time syn sent */ + ulong time; /* time Finwait2 or Syn_received was sent */ + int nochecksum; /* non-zero means don't send checksums */ + int flgcnt; /* number of flags in the sequence (FIN,SEQ) */ + + union { + Tcp4hdr tcp4hdr; + Tcp6hdr tcp6hdr; + } protohdr; /* prototype header */ +}; + +/* + * New calls are put in limbo rather than having a conversation structure + * allocated. Thus, a SYN attack results in lots of limbo'd calls but not + * any real Conv structures mucking things up. Calls in limbo rexmit their + * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second. + * + * In particular they aren't on a listener's queue so that they don't figure + * in the input queue limit. + * + * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue + * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore + * there is no hashing of this list. + */ +typedef struct Limbo Limbo; +struct Limbo +{ + Limbo *next; + + uchar laddr[IPaddrlen]; + uchar raddr[IPaddrlen]; + ushort lport; + ushort rport; + ulong irs; /* initial received sequence */ + ulong iss; /* initial sent sequence */ + ushort mss; /* mss from the other end */ + ushort rcvscale; /* how much to scale rcvd windows */ + ushort sndscale; /* how much to scale sent windows */ + ulong lastsend; /* last time we sent a synack */ + uchar version; /* v4 or v6 */ + uchar rexmits; /* number of retransmissions */ +}; + +int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */ +ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */ + +enum { + /* MIB stats */ + MaxConn, + ActiveOpens, + PassiveOpens, + EstabResets, + CurrEstab, + InSegs, + OutSegs, + RetransSegs, + RetransTimeouts, + InErrs, + OutRsts, + + /* non-MIB stats */ + CsumErrs, + HlenErrs, + LenErrs, + OutOfOrder, + + Nstats +}; + +static char *statnames[] = +{ +[MaxConn] "MaxConn", +[ActiveOpens] "ActiveOpens", +[PassiveOpens] "PassiveOpens", +[EstabResets] "EstabResets", +[CurrEstab] "CurrEstab", +[InSegs] "InSegs", +[OutSegs] "OutSegs", +[RetransSegs] "RetransSegs", +[RetransTimeouts] "RetransTimeouts", +[InErrs] "InErrs", +[OutRsts] "OutRsts", +[CsumErrs] "CsumErrs", +[HlenErrs] "HlenErrs", +[LenErrs] "LenErrs", +[OutOfOrder] "OutOfOrder", +}; + +typedef struct Tcppriv Tcppriv; +struct Tcppriv +{ + /* List of active timers */ + QLock tl; + Tcptimer *timers; + + /* hash table for matching conversations */ + Ipht ht; + + /* calls in limbo waiting for an ACK to our SYN ACK */ + int nlimbo; + Limbo *lht[NLHT]; + + /* for keeping track of tcpackproc */ + QLock apl; + int ackprocstarted; + + ulong stats[Nstats]; +}; + +/* + * Setting tcpporthogdefense to non-zero enables Dong Lin's + * solution to hijacked systems staking out port's as a form + * of DoS attack. + * + * To avoid stateless Conv hogs, we pick a sequence number at random. If + * it that number gets acked by the other end, we shut down the connection. + * Look for tcpporthogedefense in the code. + */ +int tcpporthogdefense = 0; + +int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort); +void getreseq(Tcpctl*, Tcp*, Block**, ushort*); +void localclose(Conv*, char*); +void procsyn(Conv*, Tcp*); +void tcpiput(Proto*, Ipifc*, Block*); +void tcpoutput(Conv*); +int tcptrim(Tcpctl*, Tcp*, Block**, ushort*); +void tcpstart(Conv*, int); +void tcptimeout(void*); +void tcpsndsyn(Conv*, Tcpctl*); +void tcprcvwin(Conv*); +void tcpacktimer(void*); +void tcpkeepalive(void*); +void tcpsetkacounter(Tcpctl*); +void tcprxmit(Conv*); +void tcpsettimer(Tcpctl*); +void tcpsynackrtt(Conv*); +void tcpsetscale(Conv*, Tcpctl*, ushort, ushort); + +static void limborexmit(Proto*); +static void limbo(Conv*, uchar*, uchar*, Tcp*, int); + +void +tcpsetstate(Conv *s, uchar newstate) +{ + Tcpctl *tcb; + uchar oldstate; + Tcppriv *tpriv; + + tpriv = s->p->priv; + + tcb = (Tcpctl*)s->ptcl; + + oldstate = tcb->state; + if(oldstate == newstate) + return; + + if(oldstate == Established) + tpriv->stats[CurrEstab]--; + if(newstate == Established) + tpriv->stats[CurrEstab]++; + + /** + print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport, + tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab ); + **/ + + switch(newstate) { + case Closed: + qclose(s->rq); + qclose(s->wq); + qclose(s->eq); + break; + + case Close_wait: /* Remote closes */ + qhangup(s->rq, nil); + break; + } + + tcb->state = newstate; + + if(oldstate == Syn_sent && newstate != Closed) + Fsconnected(s, nil); +} + +static char* +tcpconnect(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + tcpstart(c, TCP_CONNECT); + + return nil; +} + +static int +tcpstate(Conv *c, char *state, int n) +{ + Tcpctl *s; + + s = (Tcpctl*)(c->ptcl); + + return snprint(state, n, + "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", + tcpstates[s->state], + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0, + s->srtt, s->mdev, + s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, + s->timer.start, s->timer.count, s->rerecv, + s->katimer.start, s->katimer.count); +} + +static int +tcpinuse(Conv *c) +{ + Tcpctl *s; + + s = (Tcpctl*)(c->ptcl); + return s->state != Closed; +} + +static char* +tcpannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + tcpstart(c, TCP_LISTEN); + Fsconnected(c, nil); + + return nil; +} + +/* + * tcpclose is always called with the q locked + */ +static void +tcpclose(Conv *c) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)c->ptcl; + + qhangup(c->rq, nil); + qhangup(c->wq, nil); + qhangup(c->eq, nil); + qflush(c->rq); + + switch(tcb->state) { + case Listen: + /* + * reset any incoming calls to this listener + */ + Fsconnected(c, "Hangup"); + + localclose(c, nil); + break; + case Closed: + case Syn_sent: + localclose(c, nil); + break; + case Syn_received: + case Established: + tcb->flgcnt++; + tcb->snd.nxt++; + tcpsetstate(c, Finwait1); + tcpoutput(c); + break; + case Close_wait: + tcb->flgcnt++; + tcb->snd.nxt++; + tcpsetstate(c, Last_ack); + tcpoutput(c); + break; + } +} + +void +tcpkick(void *x) +{ + Conv *s = x; + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + + switch(tcb->state) { + case Syn_sent: + case Syn_received: + case Established: + case Close_wait: + /* + * Push data + */ + tcprcvwin(s); + tcpoutput(s); + break; + default: + localclose(s, "Hangup"); + break; + } + + qunlock(s); + poperror(); +} + +void +tcprcvwin(Conv *s) /* Call with tcb locked */ +{ + int w; + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + w = tcb->window - qlen(s->rq); + if(w < 0) + w = 0; + tcb->rcv.wnd = w; + if(w == 0) + tcb->rcv.blocked = 1; +} + +void +tcpacktimer(void *v) +{ + Tcpctl *tcb; + Conv *s; + + s = v; + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + if(tcb->state != Closed){ + tcb->flags |= FORCE; + tcprcvwin(s); + tcpoutput(s); + } + qunlock(s); + poperror(); +} + +static void +tcpcreate(Conv *c) +{ + c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c); + c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c); +} + +static void +timerstate(Tcppriv *priv, Tcptimer *t, int newstate) +{ + if(newstate != TcptimerON){ + if(t->state == TcptimerON){ + // unchain + if(priv->timers == t){ + priv->timers = t->next; + if(t->prev != nil) + panic("timerstate1"); + } + if(t->next) + t->next->prev = t->prev; + if(t->prev) + t->prev->next = t->next; + t->next = t->prev = nil; + } + } else { + if(t->state != TcptimerON){ + // chain + if(t->prev != nil || t->next != nil) + panic("timerstate2"); + t->prev = nil; + t->next = priv->timers; + if(t->next) + t->next->prev = t; + priv->timers = t; + } + } + t->state = newstate; +} + +void +tcpackproc(void *a) +{ + Tcptimer *t, *tp, *timeo; + Proto *tcp; + Tcppriv *priv; + int loop; + + tcp = a; + priv = tcp->priv; + + for(;;) { + tsleep(&up->sleep, return0, 0, MSPTICK); + + qlock(&priv->tl); + timeo = nil; + loop = 0; + for(t = priv->timers; t != nil; t = tp) { + if(loop++ > 10000) + panic("tcpackproc1"); + tp = t->next; + if(t->state == TcptimerON) { + t->count--; + if(t->count == 0) { + timerstate(priv, t, TcptimerDONE); + t->readynext = timeo; + timeo = t; + } + } + } + qunlock(&priv->tl); + + loop = 0; + for(t = timeo; t != nil; t = t->readynext) { + if(loop++ > 10000) + panic("tcpackproc2"); + if(t->state == TcptimerDONE && t->func != nil && !waserror()){ + (*t->func)(t->arg); + poperror(); + } + } + + limborexmit(tcp); + } +} + +void +tcpgo(Tcppriv *priv, Tcptimer *t) +{ + if(t == nil || t->start == 0) + return; + + qlock(&priv->tl); + t->count = t->start; + timerstate(priv, t, TcptimerON); + qunlock(&priv->tl); +} + +void +tcphalt(Tcppriv *priv, Tcptimer *t) +{ + if(t == nil) + return; + + qlock(&priv->tl); + timerstate(priv, t, TcptimerOFF); + qunlock(&priv->tl); +} + +int +backoff(int n) +{ + return 1 << n; +} + +void +localclose(Conv *s, char *reason) /* called with tcb locked */ +{ + Tcpctl *tcb; + Reseq *rp,*rp1; + Tcppriv *tpriv; + + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + iphtrem(&tpriv->ht, s); + + tcphalt(tpriv, &tcb->timer); + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + + /* Flush reassembly queue; nothing more can arrive */ + for(rp = tcb->reseq; rp != nil; rp = rp1) { + rp1 = rp->next; + freeblist(rp->bp); + free(rp); + } + tcb->reseq = nil; + + if(tcb->state == Syn_sent) + Fsconnected(s, reason); + if(s->state == Announced) + wakeup(&s->listenr); + + qhangup(s->rq, reason); + qhangup(s->wq, reason); + + tcpsetstate(s, Closed); +} + +/* mtu (- TCP + IP hdr len) of 1st hop */ +int +tcpmtu(Proto *tcp, uchar *addr, int version, int *scale) +{ + Ipifc *ifc; + int mtu; + + ifc = findipifc(tcp->f, addr, 0); + switch(version){ + default: + case V4: + mtu = DEF_MSS; + if(ifc != nil) + mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE); + break; + case V6: + mtu = DEF_MSS6; + if(ifc != nil) + mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE); + break; + } + if(ifc != nil){ + if(ifc->mbps > 100) + *scale = HaveWS | 3; + else if(ifc->mbps > 10) + *scale = HaveWS | 1; + else + *scale = HaveWS | 0; + } else + *scale = HaveWS | 0; + + return mtu; +} + +void +inittcpctl(Conv *s, int mode) +{ + Tcpctl *tcb; + Tcp4hdr* h4; + Tcp6hdr* h6; + int mss; + + tcb = (Tcpctl*)s->ptcl; + + memset(tcb, 0, sizeof(Tcpctl)); + + tcb->ssthresh = 65535; + tcb->srtt = tcp_irtt<<LOGAGAIN; + tcb->mdev = 0; + + /* setup timers */ + tcb->timer.start = tcp_irtt / MSPTICK; + tcb->timer.func = tcptimeout; + tcb->timer.arg = s; + tcb->rtt_timer.start = MAX_TIME; + tcb->acktimer.start = TCP_ACK / MSPTICK; + tcb->acktimer.func = tcpacktimer; + tcb->acktimer.arg = s; + tcb->katimer.start = DEF_KAT / MSPTICK; + tcb->katimer.func = tcpkeepalive; + tcb->katimer.arg = s; + + mss = DEF_MSS; + + /* create a prototype(pseudo) header */ + if(mode != TCP_LISTEN){ + if(ipcmp(s->laddr, IPnoaddr) == 0) + findlocalip(s->p->f, s->laddr, s->raddr); + + switch(s->ipversion){ + case V4: + h4 = &tcb->protohdr.tcp4hdr; + memset(h4, 0, sizeof(*h4)); + h4->proto = IP_TCPPROTO; + hnputs(h4->tcpsport, s->lport); + hnputs(h4->tcpdport, s->rport); + v6tov4(h4->tcpsrc, s->laddr); + v6tov4(h4->tcpdst, s->raddr); + break; + case V6: + h6 = &tcb->protohdr.tcp6hdr; + memset(h6, 0, sizeof(*h6)); + h6->proto = IP_TCPPROTO; + hnputs(h6->tcpsport, s->lport); + hnputs(h6->tcpdport, s->rport); + ipmove(h6->tcpsrc, s->laddr); + ipmove(h6->tcpdst, s->raddr); + mss = DEF_MSS6; + break; + default: + panic("inittcpctl: version %d", s->ipversion); + } + } + + tcb->mss = tcb->cwind = mss; + + /* default is no window scaling */ + tcb->window = QMAX; + tcb->rcv.wnd = QMAX; + tcb->rcv.scale = 0; + tcb->snd.scale = 0; + qsetlimit(s->rq, QMAX); +} + +/* + * called with s qlocked + */ +void +tcpstart(Conv *s, int mode) +{ + Tcpctl *tcb; + Tcppriv *tpriv; + char kpname[KNAMELEN]; + + tpriv = s->p->priv; + + if(tpriv->ackprocstarted == 0){ + qlock(&tpriv->apl); + if(tpriv->ackprocstarted == 0){ + sprint(kpname, "#I%dtcpack", s->p->f->dev); + kproc(kpname, tcpackproc, s->p, 0); + tpriv->ackprocstarted = 1; + } + qunlock(&tpriv->apl); + } + + tcb = (Tcpctl*)s->ptcl; + + inittcpctl(s, mode); + + iphtadd(&tpriv->ht, s); + switch(mode) { + case TCP_LISTEN: + tpriv->stats[PassiveOpens]++; + tcb->flags |= CLONE; + tcpsetstate(s, Listen); + break; + + case TCP_CONNECT: + tpriv->stats[ActiveOpens]++; + tcb->flags |= ACTIVE; + tcpsndsyn(s, tcb); + tcpsetstate(s, Syn_sent); + tcpoutput(s); + break; + } +} + +static char* +tcpflag(ushort flag) +{ + static char buf[128]; + + sprint(buf, "%d", flag>>10); /* Head len */ + if(flag & URG) + strcat(buf, " URG"); + if(flag & ACK) + strcat(buf, " ACK"); + if(flag & PSH) + strcat(buf, " PSH"); + if(flag & RST) + strcat(buf, " RST"); + if(flag & SYN) + strcat(buf, " SYN"); + if(flag & FIN) + strcat(buf, " FIN"); + + return buf; +} + +Block * +htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb) +{ + int dlen; + Tcp6hdr *h; + ushort csum; + ushort hdrlen, optpad = 0; + uchar *opt; + + hdrlen = TCP6_HDRSIZE; + if(tcph->flags & SYN){ + if(tcph->mss) + hdrlen += MSS_LENGTH; + if(tcph->ws) + hdrlen += WS_LENGTH; + optpad = hdrlen & 3; + if(optpad) + optpad = 4 - optpad; + hdrlen += optpad; + } + + if(data) { + dlen = blocklen(data); + data = padblock(data, hdrlen + TCP6_PKT); + if(data == nil) + return nil; + } + else { + dlen = 0; + data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */ + if(data == nil) + return nil; + data->wp += hdrlen + TCP6_PKT; + } + + /* copy in pseudo ip header plus port numbers */ + h = (Tcp6hdr *)(data->rp); + memmove(h, ph, TCP6_TCBPHDRSZ); + + /* compose pseudo tcp header, do cksum calculation */ + hnputl(h->vcf, hdrlen + dlen); + h->ploadlen[0] = h->ploadlen[1] = h->proto = 0; + h->ttl = ph->proto; + + /* copy in variable bits */ + hnputl(h->tcpseq, tcph->seq); + hnputl(h->tcpack, tcph->ack); + hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); + hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0)); + hnputs(h->tcpurg, tcph->urg); + + if(tcph->flags & SYN){ + opt = h->tcpopt; + if(tcph->mss != 0){ + *opt++ = MSSOPT; + *opt++ = MSS_LENGTH; + hnputs(opt, tcph->mss); + opt += 2; + } + if(tcph->ws != 0){ + *opt++ = WSOPT; + *opt++ = WS_LENGTH; + *opt++ = tcph->ws; + } + while(optpad-- > 0) + *opt++ = NOOPOPT; + } + + if(tcb != nil && tcb->nochecksum){ + h->tcpcksum[0] = h->tcpcksum[1] = 0; + } else { + csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE); + hnputs(h->tcpcksum, csum); + } + + /* move from pseudo header back to normal ip header */ + memset(h->vcf, 0, 4); + h->vcf[0] = IP_VER6; + hnputs(h->ploadlen, hdrlen+dlen); + h->proto = ph->proto; + + return data; +} + +Block * +htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb) +{ + int dlen; + Tcp4hdr *h; + ushort csum; + ushort hdrlen, optpad = 0; + uchar *opt; + + hdrlen = TCP4_HDRSIZE; + if(tcph->flags & SYN){ + if(tcph->mss) + hdrlen += MSS_LENGTH; + if(tcph->ws) + hdrlen += WS_LENGTH; + optpad = hdrlen & 3; + if(optpad) + optpad = 4 - optpad; + hdrlen += optpad; + } + + if(data) { + dlen = blocklen(data); + data = padblock(data, hdrlen + TCP4_PKT); + if(data == nil) + return nil; + } + else { + dlen = 0; + data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */ + if(data == nil) + return nil; + data->wp += hdrlen + TCP4_PKT; + } + + /* copy in pseudo ip header plus port numbers */ + h = (Tcp4hdr *)(data->rp); + memmove(h, ph, TCP4_TCBPHDRSZ); + + /* copy in variable bits */ + hnputs(h->tcplen, hdrlen + dlen); + hnputl(h->tcpseq, tcph->seq); + hnputl(h->tcpack, tcph->ack); + hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); + hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0)); + hnputs(h->tcpurg, tcph->urg); + + if(tcph->flags & SYN){ + opt = h->tcpopt; + if(tcph->mss != 0){ + *opt++ = MSSOPT; + *opt++ = MSS_LENGTH; + hnputs(opt, tcph->mss); + opt += 2; + } + if(tcph->ws != 0){ + *opt++ = WSOPT; + *opt++ = WS_LENGTH; + *opt++ = tcph->ws; + } + while(optpad-- > 0) + *opt++ = NOOPOPT; + } + + if(tcb != nil && tcb->nochecksum){ + h->tcpcksum[0] = h->tcpcksum[1] = 0; + } else { + csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE); + hnputs(h->tcpcksum, csum); + } + + return data; +} + +int +ntohtcp6(Tcp *tcph, Block **bpp) +{ + Tcp6hdr *h; + uchar *optr; + ushort hdrlen; + ushort optlen; + int n; + + *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE); + if(*bpp == nil) + return -1; + + h = (Tcp6hdr *)((*bpp)->rp); + tcph->source = nhgets(h->tcpsport); + tcph->dest = nhgets(h->tcpdport); + tcph->seq = nhgetl(h->tcpseq); + tcph->ack = nhgetl(h->tcpack); + hdrlen = (h->tcpflag[0]>>2) & ~3; + if(hdrlen < TCP6_HDRSIZE) { + freeblist(*bpp); + return -1; + } + + tcph->flags = h->tcpflag[1]; + tcph->wnd = nhgets(h->tcpwin); + tcph->urg = nhgets(h->tcpurg); + tcph->mss = 0; + tcph->ws = 0; + tcph->len = nhgets(h->ploadlen) - hdrlen; + + *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT); + if(*bpp == nil) + return -1; + + optr = h->tcpopt; + n = hdrlen - TCP6_HDRSIZE; + while(n > 0 && *optr != EOLOPT) { + if(*optr == NOOPOPT) { + n--; + optr++; + continue; + } + optlen = optr[1]; + if(optlen < 2 || optlen > n) + break; + switch(*optr) { + case MSSOPT: + if(optlen == MSS_LENGTH) + tcph->mss = nhgets(optr+2); + break; + case WSOPT: + if(optlen == WS_LENGTH && *(optr+2) <= 14) + tcph->ws = HaveWS | *(optr+2); + break; + } + n -= optlen; + optr += optlen; + } + return hdrlen; +} + +int +ntohtcp4(Tcp *tcph, Block **bpp) +{ + Tcp4hdr *h; + uchar *optr; + ushort hdrlen; + ushort optlen; + int n; + + *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE); + if(*bpp == nil) + return -1; + + h = (Tcp4hdr *)((*bpp)->rp); + tcph->source = nhgets(h->tcpsport); + tcph->dest = nhgets(h->tcpdport); + tcph->seq = nhgetl(h->tcpseq); + tcph->ack = nhgetl(h->tcpack); + + hdrlen = (h->tcpflag[0]>>2) & ~3; + if(hdrlen < TCP4_HDRSIZE) { + freeblist(*bpp); + return -1; + } + + tcph->flags = h->tcpflag[1]; + tcph->wnd = nhgets(h->tcpwin); + tcph->urg = nhgets(h->tcpurg); + tcph->mss = 0; + tcph->ws = 0; + tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT); + + *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT); + if(*bpp == nil) + return -1; + + optr = h->tcpopt; + n = hdrlen - TCP4_HDRSIZE; + while(n > 0 && *optr != EOLOPT) { + if(*optr == NOOPOPT) { + n--; + optr++; + continue; + } + optlen = optr[1]; + if(optlen < 2 || optlen > n) + break; + switch(*optr) { + case MSSOPT: + if(optlen == MSS_LENGTH) + tcph->mss = nhgets(optr+2); + break; + case WSOPT: + if(optlen == WS_LENGTH && *(optr+2) <= 14) + tcph->ws = HaveWS | *(optr+2); + break; + } + n -= optlen; + optr += optlen; + } + return hdrlen; +} + +/* + * For outgiing calls, generate an initial sequence + * number and put a SYN on the send queue + */ +void +tcpsndsyn(Conv *s, Tcpctl *tcb) +{ + tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); + tcb->rttseq = tcb->iss; + tcb->snd.wl2 = tcb->iss; + tcb->snd.una = tcb->iss; + tcb->snd.ptr = tcb->rttseq; + tcb->snd.nxt = tcb->rttseq; + tcb->flgcnt++; + tcb->flags |= FORCE; + tcb->sndsyntime = NOW; + + /* set desired mss and scale */ + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale); +} + +void +sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason) +{ + Block *hbp; + uchar rflags; + Tcppriv *tpriv; + Tcp4hdr ph4; + Tcp6hdr ph6; + + netlog(tcp->f, Logtcp, "sndrst: %s", reason); + + tpriv = tcp->priv; + + if(seg->flags & RST) + return; + + /* make pseudo header */ + switch(version) { + case V4: + memset(&ph4, 0, sizeof(ph4)); + ph4.vihl = IP_VER4; + v6tov4(ph4.tcpsrc, dest); + v6tov4(ph4.tcpdst, source); + ph4.proto = IP_TCPPROTO; + hnputs(ph4.tcplen, TCP4_HDRSIZE); + hnputs(ph4.tcpsport, seg->dest); + hnputs(ph4.tcpdport, seg->source); + break; + case V6: + memset(&ph6, 0, sizeof(ph6)); + ph6.vcf[0] = IP_VER6; + ipmove(ph6.tcpsrc, dest); + ipmove(ph6.tcpdst, source); + ph6.proto = IP_TCPPROTO; + hnputs(ph6.ploadlen, TCP6_HDRSIZE); + hnputs(ph6.tcpsport, seg->dest); + hnputs(ph6.tcpdport, seg->source); + break; + default: + panic("sndrst: version %d", version); + } + + tpriv->stats[OutRsts]++; + rflags = RST; + + /* convince the other end that this reset is in band */ + if(seg->flags & ACK) { + seg->seq = seg->ack; + seg->ack = 0; + } + else { + rflags |= ACK; + seg->ack = seg->seq; + seg->seq = 0; + if(seg->flags & SYN) + seg->ack++; + seg->ack += length; + if(seg->flags & FIN) + seg->ack++; + } + seg->flags = rflags; + seg->wnd = 0; + seg->urg = 0; + seg->mss = 0; + seg->ws = 0; + switch(version) { + case V4: + hbp = htontcp4(seg, nil, &ph4, nil); + if(hbp == nil) + return; + ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + case V6: + hbp = htontcp6(seg, nil, &ph6, nil); + if(hbp == nil) + return; + ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + default: + panic("sndrst2: version %d", version); + } +} + +/* + * send a reset to the remote side and close the conversation + * called with s qlocked + */ +char* +tcphangup(Conv *s) +{ + Tcp seg; + Tcpctl *tcb; + Block *hbp; + + tcb = (Tcpctl*)s->ptcl; + if(waserror()) + return commonerror(); + if(s->raddr != 0) { + if(!waserror()){ + seg.flags = RST | ACK; + seg.ack = tcb->rcv.nxt; + tcb->rcv.una = 0; + seg.seq = tcb->snd.ptr; + seg.wnd = 0; + seg.urg = 0; + seg.mss = 0; + seg.ws = 0; + switch(s->ipversion) { + case V4: + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb); + ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s); + break; + case V6: + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb); + ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s); + break; + default: + panic("tcphangup: version %d", s->ipversion); + } + poperror(); + } + } + localclose(s, nil); + poperror(); + return nil; +} + +/* + * (re)send a SYN ACK + */ +int +sndsynack(Proto *tcp, Limbo *lp) +{ + Block *hbp; + Tcp4hdr ph4; + Tcp6hdr ph6; + Tcp seg; + int scale; + + /* make pseudo header */ + switch(lp->version) { + case V4: + memset(&ph4, 0, sizeof(ph4)); + ph4.vihl = IP_VER4; + v6tov4(ph4.tcpsrc, lp->laddr); + v6tov4(ph4.tcpdst, lp->raddr); + ph4.proto = IP_TCPPROTO; + hnputs(ph4.tcplen, TCP4_HDRSIZE); + hnputs(ph4.tcpsport, lp->lport); + hnputs(ph4.tcpdport, lp->rport); + break; + case V6: + memset(&ph6, 0, sizeof(ph6)); + ph6.vcf[0] = IP_VER6; + ipmove(ph6.tcpsrc, lp->laddr); + ipmove(ph6.tcpdst, lp->raddr); + ph6.proto = IP_TCPPROTO; + hnputs(ph6.ploadlen, TCP6_HDRSIZE); + hnputs(ph6.tcpsport, lp->lport); + hnputs(ph6.tcpdport, lp->rport); + break; + default: + panic("sndrst: version %d", lp->version); + } + + seg.seq = lp->iss; + seg.ack = lp->irs+1; + seg.flags = SYN|ACK; + seg.urg = 0; + seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale); + seg.wnd = QMAX; + + /* if the other side set scale, we should too */ + if(lp->rcvscale){ + seg.ws = scale; + lp->sndscale = scale; + } else { + seg.ws = 0; + lp->sndscale = 0; + } + + switch(lp->version) { + case V4: + hbp = htontcp4(&seg, nil, &ph4, nil); + if(hbp == nil) + return -1; + ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + case V6: + hbp = htontcp6(&seg, nil, &ph6, nil); + if(hbp == nil) + return -1; + ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + default: + panic("sndsnack: version %d", lp->version); + } + lp->lastsend = NOW; + return 0; +} + +#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK ) + +/* + * put a call into limbo and respond with a SYN ACK + * + * called with proto locked + */ +static void +limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version) +{ + Limbo *lp, **l; + Tcppriv *tpriv; + int h; + + tpriv = s->p->priv; + h = hashipa(source, seg->source); + + for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){ + lp = *l; + if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version) + continue; + if(ipcmp(lp->raddr, source) != 0) + continue; + if(ipcmp(lp->laddr, dest) != 0) + continue; + + /* each new SYN restarts the retransmits */ + lp->irs = seg->seq; + break; + } + lp = *l; + if(lp == nil){ + if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){ + lp = tpriv->lht[h]; + tpriv->lht[h] = lp->next; + lp->next = nil; + } else { + lp = malloc(sizeof(*lp)); + if(lp == nil) + return; + tpriv->nlimbo++; + } + *l = lp; + lp->version = version; + ipmove(lp->laddr, dest); + ipmove(lp->raddr, source); + lp->lport = seg->dest; + lp->rport = seg->source; + lp->mss = seg->mss; + lp->rcvscale = seg->ws; + lp->irs = seg->seq; + lp->iss = (nrand(1<<16)<<16)|nrand(1<<16); + } + + if(sndsynack(s->p, lp) < 0){ + *l = lp->next; + tpriv->nlimbo--; + free(lp); + } +} + +/* + * resend SYN ACK's once every SYNACK_RXTIMER ms. + */ +static void +limborexmit(Proto *tcp) +{ + Tcppriv *tpriv; + Limbo **l, *lp; + int h; + int seen; + ulong now; + + tpriv = tcp->priv; + + if(!canqlock(tcp)) + return; + seen = 0; + now = NOW; + for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){ + for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){ + lp = *l; + seen++; + if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER) + continue; + + /* time it out after 1 second */ + if(++(lp->rexmits) > 5){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + continue; + } + + /* if we're being attacked, don't bother resending SYN ACK's */ + if(tpriv->nlimbo > 100) + continue; + + if(sndsynack(tcp, lp) < 0){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + continue; + } + + l = &lp->next; + } + } + qunlock(tcp); +} + +/* + * lookup call in limbo. if found, throw it out. + * + * called with proto locked + */ +static void +limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) +{ + Limbo *lp, **l; + int h; + Tcppriv *tpriv; + + tpriv = s->p->priv; + + /* find a call in limbo */ + h = hashipa(src, segp->source); + for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){ + lp = *l; + if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version) + continue; + if(ipcmp(lp->laddr, dst) != 0) + continue; + if(ipcmp(lp->raddr, src) != 0) + continue; + + /* RST can only follow the SYN */ + if(segp->seq == lp->irs+1){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + } + break; + } +} + +/* + * come here when we finally get an ACK to our SYN-ACK. + * lookup call in limbo. if found, create a new conversation + * + * called with proto locked + */ +static Conv* +tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) +{ + Conv *new; + Tcpctl *tcb; + Tcppriv *tpriv; + Tcp4hdr *h4; + Tcp6hdr *h6; + Limbo *lp, **l; + int h; + + /* unless it's just an ack, it can't be someone coming out of limbo */ + if((segp->flags & SYN) || (segp->flags & ACK) == 0) + return nil; + + tpriv = s->p->priv; + + /* find a call in limbo */ + h = hashipa(src, segp->source); + for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){ + netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d", + src, segp->source, lp->raddr, lp->rport, + dst, segp->dest, lp->laddr, lp->lport, + version, lp->version + ); + + if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version) + continue; + if(ipcmp(lp->laddr, dst) != 0) + continue; + if(ipcmp(lp->raddr, src) != 0) + continue; + + /* we're assuming no data with the initial SYN */ + if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){ + netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux", + segp->seq, lp->irs+1, segp->ack, lp->iss+1); + lp = nil; + } else { + tpriv->nlimbo--; + *l = lp->next; + } + break; + } + if(lp == nil) + return nil; + + new = Fsnewcall(s, src, segp->source, dst, segp->dest, version); + if(new == nil) + return nil; + + memmove(new->ptcl, s->ptcl, sizeof(Tcpctl)); + tcb = (Tcpctl*)new->ptcl; + tcb->flags &= ~CLONE; + tcb->timer.arg = new; + tcb->timer.state = TcptimerOFF; + tcb->acktimer.arg = new; + tcb->acktimer.state = TcptimerOFF; + tcb->katimer.arg = new; + tcb->katimer.state = TcptimerOFF; + tcb->rtt_timer.arg = new; + tcb->rtt_timer.state = TcptimerOFF; + + tcb->irs = lp->irs; + tcb->rcv.nxt = tcb->irs+1; + tcb->rcv.urg = tcb->rcv.nxt; + + tcb->iss = lp->iss; + tcb->rttseq = tcb->iss; + tcb->snd.wl2 = tcb->iss; + tcb->snd.una = tcb->iss+1; + tcb->snd.ptr = tcb->iss+1; + tcb->snd.nxt = tcb->iss+1; + tcb->flgcnt = 0; + tcb->flags |= SYNACK; + + /* our sending max segment size cannot be bigger than what he asked for */ + if(lp->mss != 0 && lp->mss < tcb->mss) + tcb->mss = lp->mss; + + /* window scaling */ + tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale); + + /* the congestion window always starts out as a single segment */ + tcb->snd.wnd = segp->wnd; + tcb->cwind = tcb->mss; + + /* set initial round trip time */ + tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER; + tcpsynackrtt(new); + + free(lp); + + /* set up proto header */ + switch(version){ + case V4: + h4 = &tcb->protohdr.tcp4hdr; + memset(h4, 0, sizeof(*h4)); + h4->proto = IP_TCPPROTO; + hnputs(h4->tcpsport, new->lport); + hnputs(h4->tcpdport, new->rport); + v6tov4(h4->tcpsrc, dst); + v6tov4(h4->tcpdst, src); + break; + case V6: + h6 = &tcb->protohdr.tcp6hdr; + memset(h6, 0, sizeof(*h6)); + h6->proto = IP_TCPPROTO; + hnputs(h6->tcpsport, new->lport); + hnputs(h6->tcpdport, new->rport); + ipmove(h6->tcpsrc, dst); + ipmove(h6->tcpdst, src); + break; + default: + panic("tcpincoming: version %d", new->ipversion); + } + + tcpsetstate(new, Established); + + iphtadd(&tpriv->ht, new); + + return new; +} + +int +seq_within(ulong x, ulong low, ulong high) +{ + if(low <= high){ + if(low <= x && x <= high) + return 1; + } + else { + if(x >= low || x <= high) + return 1; + } + return 0; +} + +int +seq_lt(ulong x, ulong y) +{ + return (int)(x-y) < 0; +} + +int +seq_le(ulong x, ulong y) +{ + return (int)(x-y) <= 0; +} + +int +seq_gt(ulong x, ulong y) +{ + return (int)(x-y) > 0; +} + +int +seq_ge(ulong x, ulong y) +{ + return (int)(x-y) >= 0; +} + +/* + * use the time between the first SYN and it's ack as the + * initial round trip time + */ +void +tcpsynackrtt(Conv *s) +{ + Tcpctl *tcb; + int delta; + Tcppriv *tpriv; + + tcb = (Tcpctl*)s->ptcl; + tpriv = s->p->priv; + + delta = NOW - tcb->sndsyntime; + tcb->srtt = delta<<LOGAGAIN; + tcb->mdev = delta<<LOGDGAIN; + + /* halt round trip timer */ + tcphalt(tpriv, &tcb->rtt_timer); +} + +void +update(Conv *s, Tcp *seg) +{ + int rtt, delta; + Tcpctl *tcb; + ulong acked; + ulong expand; + Tcppriv *tpriv; + + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + /* if everything has been acked, force output(?) */ + if(seq_gt(seg->ack, tcb->snd.nxt)) { + tcb->flags |= FORCE; + return; + } + + /* added by Dong Lin for fast retransmission */ + if(seg->ack == tcb->snd.una + && tcb->snd.una != tcb->snd.nxt + && seg->len == 0 + && seg->wnd == tcb->snd.wnd) { + + /* this is a pure ack w/o window update */ + netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n", + tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd); + + if(++tcb->snd.dupacks == TCPREXMTTHRESH) { + /* + * tahoe tcp rxt the packet, half sshthresh, + * and set cwnd to one packet + */ + tcb->snd.recovery = 1; + tcb->snd.rxt = tcb->snd.nxt; + netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt); + tcprxmit(s); + } else { + /* do reno tcp here. */ + } + } + + /* + * update window + */ + if( seq_gt(seg->ack, tcb->snd.wl2) + || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){ + tcb->snd.wnd = seg->wnd; + tcb->snd.wl2 = seg->ack; + } + + if(!seq_gt(seg->ack, tcb->snd.una)){ + /* + * don't let us hangup if sending into a closed window and + * we're still getting acks + */ + if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){ + tcb->backedoff = MAXBACKMS/4; + } + return; + } + + /* + * any positive ack turns off fast rxt, + * (should we do new-reno on partial acks?) + */ + if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) { + tcb->snd.dupacks = 0; + tcb->snd.recovery = 0; + } else + netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind); + + /* Compute the new send window size */ + acked = seg->ack - tcb->snd.una; + + /* avoid slow start and timers for SYN acks */ + if((tcb->flags & SYNACK) == 0) { + tcb->flags |= SYNACK; + acked--; + tcb->flgcnt--; + goto done; + } + + /* slow start as long as we're not recovering from lost packets */ + if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) { + if(tcb->cwind < tcb->ssthresh) { + expand = tcb->mss; + if(acked < expand) + expand = acked; + } + else + expand = ((int)tcb->mss * tcb->mss) / tcb->cwind; + + if(tcb->cwind + expand < tcb->cwind) + expand = tcb->snd.wnd - tcb->cwind; + if(tcb->cwind + expand > tcb->snd.wnd) + expand = tcb->snd.wnd - tcb->cwind; + tcb->cwind += expand; + } + + /* Adjust the timers according to the round trip time */ + if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) { + tcphalt(tpriv, &tcb->rtt_timer); + if((tcb->flags&RETRAN) == 0) { + tcb->backoff = 0; + tcb->backedoff = 0; + rtt = tcb->rtt_timer.start - tcb->rtt_timer.count; + if(rtt == 0) + rtt = 1; /* otherwise all close systems will rexmit in 0 time */ + rtt *= MSPTICK; + if(tcb->srtt == 0) { + tcb->srtt = rtt << LOGAGAIN; + tcb->mdev = rtt << LOGDGAIN; + } else { + delta = rtt - (tcb->srtt>>LOGAGAIN); + tcb->srtt += delta; + if(tcb->srtt <= 0) + tcb->srtt = 1; + + delta = abs(delta) - (tcb->mdev>>LOGDGAIN); + tcb->mdev += delta; + if(tcb->mdev <= 0) + tcb->mdev = 1; + } + tcpsettimer(tcb); + } + } + +done: + if(qdiscard(s->wq, acked) < acked) + tcb->flgcnt--; + + tcb->snd.una = seg->ack; + if(seq_gt(seg->ack, tcb->snd.urg)) + tcb->snd.urg = seg->ack; + + if(tcb->snd.una != tcb->snd.nxt) + tcpgo(tpriv, &tcb->timer); + else + tcphalt(tpriv, &tcb->timer); + + if(seq_lt(tcb->snd.ptr, tcb->snd.una)) + tcb->snd.ptr = tcb->snd.una; + + tcb->flags &= ~RETRAN; + tcb->backoff = 0; + tcb->backedoff = 0; +} + +void +tcpiput(Proto *tcp, Ipifc*, Block *bp) +{ + Tcp seg; + Tcp4hdr *h4; + Tcp6hdr *h6; + int hdrlen; + Tcpctl *tcb; + ushort length; + uchar source[IPaddrlen], dest[IPaddrlen]; + Conv *s; + Fs *f; + Tcppriv *tpriv; + uchar version; + + f = tcp->f; + tpriv = tcp->priv; + + tpriv->stats[InSegs]++; + + h4 = (Tcp4hdr*)(bp->rp); + h6 = (Tcp6hdr*)(bp->rp); + + if((h4->vihl&0xF0)==IP_VER4) { + version = V4; + length = nhgets(h4->length); + v4tov6(dest, h4->tcpdst); + v4tov6(source, h4->tcpsrc); + + h4->Unused = 0; + hnputs(h4->tcplen, length-TCP4_PKT); + if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) && + ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) { + tpriv->stats[CsumErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp proto cksum\n"); + freeblist(bp); + return; + } + + hdrlen = ntohtcp4(&seg, &bp); + if(hdrlen < 0){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp hdr len\n"); + return; + } + + /* trim the packet to the size claimed by the datagram */ + length -= hdrlen+TCP4_PKT; + bp = trimblock(bp, hdrlen+TCP4_PKT, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcp len < 0 after trim\n"); + return; + } + } + else { + int ttl = h6->ttl; + int proto = h6->proto; + + version = V6; + length = nhgets(h6->ploadlen); + ipmove(dest, h6->tcpdst); + ipmove(source, h6->tcpsrc); + + h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0; + h6->ttl = proto; + hnputl(h6->vcf, length); + if((h6->tcpcksum[0] || h6->tcpcksum[1]) && + ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) { + tpriv->stats[CsumErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp proto cksum\n"); + freeblist(bp); + return; + } + h6->ttl = ttl; + h6->proto = proto; + hnputs(h6->ploadlen, length); + + hdrlen = ntohtcp6(&seg, &bp); + if(hdrlen < 0){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp hdr len\n"); + return; + } + + /* trim the packet to the size claimed by the datagram */ + length -= hdrlen; + bp = trimblock(bp, hdrlen+TCP6_PKT, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcp len < 0 after trim\n"); + return; + } + } + + /* lock protocol while searching for a conversation */ + qlock(tcp); + + /* Look for a matching conversation */ + s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); + if(s == nil){ + netlog(f, Logtcp, "iphtlook failed"); +reset: + qunlock(tcp); + sndrst(tcp, source, dest, length, &seg, version, "no conversation"); + freeblist(bp); + return; + } + + /* if it's a listener, look for the right flags and get a new conv */ + tcb = (Tcpctl*)s->ptcl; + if(tcb->state == Listen){ + if(seg.flags & RST){ + limborst(s, &seg, source, dest, version); + qunlock(tcp); + freeblist(bp); + return; + } + + /* if this is a new SYN, put the call into limbo */ + if((seg.flags & SYN) && (seg.flags & ACK) == 0){ + limbo(s, source, dest, &seg, version); + qunlock(tcp); + freeblist(bp); + return; + } + + /* + * if there's a matching call in limbo, tcpincoming will + * return it in state Syn_received + */ + s = tcpincoming(s, &seg, source, dest, version); + if(s == nil) + goto reset; + } + + /* The rest of the input state machine is run with the control block + * locked and implements the state machine directly out of the RFC. + * Out-of-band data is ignored - it was always a bad idea. + */ + tcb = (Tcpctl*)s->ptcl; + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + qunlock(tcp); + + /* fix up window */ + seg.wnd <<= tcb->rcv.scale; + + /* every input packet in puts off the keep alive time out */ + tcpsetkacounter(tcb); + + switch(tcb->state) { + case Closed: + sndrst(tcp, source, dest, length, &seg, version, "sending to Closed"); + goto raise; + case Syn_sent: + if(seg.flags & ACK) { + if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) { + sndrst(tcp, source, dest, length, &seg, version, + "bad seq in Syn_sent"); + goto raise; + } + } + if(seg.flags & RST) { + if(seg.flags & ACK) + localclose(s, Econrefused); + goto raise; + } + + if(seg.flags & SYN) { + procsyn(s, &seg); + if(seg.flags & ACK){ + update(s, &seg); + tcpsynackrtt(s); + tcpsetstate(s, Established); + tcpsetscale(s, tcb, seg.ws, tcb->scale); + } + else { + tcb->time = NOW; + tcpsetstate(s, Syn_received); /* DLP - shouldn't this be a reset? */ + } + + if(length != 0 || (seg.flags & FIN)) + break; + + freeblist(bp); + goto output; + } + else + freeblist(bp); + + qunlock(s); + poperror(); + return; + case Syn_received: + /* doesn't matter if it's the correct ack, we're just trying to set timing */ + if(seg.flags & ACK) + tcpsynackrtt(s); + break; + } + + /* + * One DOS attack is to open connections to us and then forget about them, + * thereby tying up a conv at no long term cost to the attacker. + * This is an attempt to defeat these stateless DOS attacks. See + * corresponding code in tcpsendka(). + */ + if(tcb->state != Syn_received && (seg.flags & RST) == 0){ + if(tcpporthogdefense + && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){ + print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n", + source, seg.source, dest, seg.dest, seg.flags, + tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29)); + localclose(s, "stateless hog"); + } + } + + /* Cut the data to fit the receive window */ + if(tcptrim(tcb, &seg, &bp, &length) == -1) { + netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length); + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000 / MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + if(!(seg.flags & RST)) { + tcb->flags |= FORCE; + goto output; + } + qunlock(s); + poperror(); + return; + } + + /* Cannot accept so answer with a rst */ + if(length && tcb->state == Closed) { + sndrst(tcp, source, dest, length, &seg, version, "sending to Closed"); + goto raise; + } + + /* The segment is beyond the current receive pointer so + * queue the data in the resequence queue + */ + if(seg.seq != tcb->rcv.nxt) + if(length != 0 || (seg.flags & (SYN|FIN))) { + update(s, &seg); + if(addreseq(tcb, tpriv, &seg, bp, length) < 0) + print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport); + tcb->flags |= FORCE; + goto output; + } + + /* + * keep looping till we've processed this packet plus any + * adjacent packets in the resequence queue + */ + for(;;) { + if(seg.flags & RST) { + if(tcb->state == Established) { + tpriv->stats[EstabResets]++; + if(tcb->rcv.nxt != seg.seq) + print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq); + } + localclose(s, Econrefused); + goto raise; + } + + if((seg.flags&ACK) == 0) + goto raise; + + switch(tcb->state) { + case Syn_received: + if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){ + sndrst(tcp, source, dest, length, &seg, version, + "bad seq in Syn_received"); + goto raise; + } + update(s, &seg); + tcpsetstate(s, Established); + case Established: + case Close_wait: + update(s, &seg); + break; + case Finwait1: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0){ + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcpsetkacounter(tcb); + tcb->time = NOW; + tcpsetstate(s, Finwait2); + tcb->katimer.start = MSL2 * (1000 / MSPTICK); + tcpgo(tpriv, &tcb->katimer); + } + break; + case Finwait2: + update(s, &seg); + break; + case Closing: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000 / MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + break; + case Last_ack: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0) { + localclose(s, nil); + goto raise; + } + case Time_wait: + tcb->flags |= FORCE; + if(tcb->timer.state != TcptimerON) + tcpgo(tpriv, &tcb->timer); + } + + if((seg.flags&URG) && seg.urg) { + if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) { + tcb->rcv.urg = seg.urg + seg.seq; + pullblock(&bp, seg.urg); + } + } + else + if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg)) + tcb->rcv.urg = tcb->rcv.nxt; + + if(length == 0) { + if(bp != nil) + freeblist(bp); + } + else { + switch(tcb->state){ + default: + /* Ignore segment text */ + if(bp != nil) + freeblist(bp); + break; + + case Syn_received: + case Established: + case Finwait1: + /* If we still have some data place on + * receive queue + */ + if(bp) { + bp = packblock(bp); + if(bp == nil) + panic("tcp packblock"); + qpassnolim(s->rq, bp); + bp = nil; + + /* + * Force an ack every 2 data messages. This is + * a hack for rob to make his home system run + * faster. + * + * this also keeps the standard TCP congestion + * control working since it needs an ack every + * 2 max segs worth. This is not quite that, + * but under a real stream is equivalent since + * every packet has a max seg in it. + */ + if(++(tcb->rcv.una) >= 2) + tcb->flags |= FORCE; + } + tcb->rcv.nxt += length; + + /* + * update our rcv window + */ + tcprcvwin(s); + + /* + * turn on the acktimer if there's something + * to ack + */ + if(tcb->acktimer.state != TcptimerON) + tcpgo(tpriv, &tcb->acktimer); + + break; + case Finwait2: + /* no process to read the data, send a reset */ + if(bp != nil) + freeblist(bp); + sndrst(tcp, source, dest, length, &seg, version, + "send to Finwait2"); + qunlock(s); + poperror(); + return; + } + } + + if(seg.flags & FIN) { + tcb->flags |= FORCE; + + switch(tcb->state) { + case Syn_received: + case Established: + tcb->rcv.nxt++; + tcpsetstate(s, Close_wait); + break; + case Finwait1: + tcb->rcv.nxt++; + if(qlen(s->wq)+tcb->flgcnt == 0) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000/MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + else + tcpsetstate(s, Closing); + break; + case Finwait2: + tcb->rcv.nxt++; + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2 * (1000/MSPTICK); + tcpgo(tpriv, &tcb->timer); + break; + case Close_wait: + case Closing: + case Last_ack: + break; + case Time_wait: + tcpgo(tpriv, &tcb->timer); + break; + } + } + + /* + * get next adjacent segment from the resequence queue. + * dump/trim any overlapping segments + */ + for(;;) { + if(tcb->reseq == nil) + goto output; + + if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0) + goto output; + + getreseq(tcb, &seg, &bp, &length); + + if(tcptrim(tcb, &seg, &bp, &length) == 0) + break; + } + } +output: + tcpoutput(s); + qunlock(s); + poperror(); + return; +raise: + qunlock(s); + poperror(); + freeblist(bp); + tcpkick(s); +} + +/* + * always enters and exits with the s locked. We drop + * the lock to ipoput the packet so some care has to be + * taken by callers. + */ +void +tcpoutput(Conv *s) +{ + Tcp seg; + int msgs; + Tcpctl *tcb; + Block *hbp, *bp; + int sndcnt, n; + ulong ssize, dsize, usable, sent; + Fs *f; + Tcppriv *tpriv; + uchar version; + + f = s->p->f; + tpriv = s->p->priv; + version = s->ipversion; + + for(msgs = 0; msgs < 100; msgs++) { + tcb = (Tcpctl*)s->ptcl; + + switch(tcb->state) { + case Listen: + case Closed: + case Finwait2: + return; + } + + /* force an ack when a window has opened up */ + if(tcb->rcv.blocked && tcb->rcv.wnd > 0){ + tcb->rcv.blocked = 0; + tcb->flags |= FORCE; + } + + sndcnt = qlen(s->wq)+tcb->flgcnt; + sent = tcb->snd.ptr - tcb->snd.una; + + /* Don't send anything else until our SYN has been acked */ + if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0) + break; + + /* Compute usable segment based on offered window and limit + * window probes to one + */ + if(tcb->snd.wnd == 0){ + if(sent != 0) { + if((tcb->flags&FORCE) == 0) + break; +// tcb->snd.ptr = tcb->snd.una; + } + usable = 1; + } + else { + usable = tcb->cwind; + if(tcb->snd.wnd < usable) + usable = tcb->snd.wnd; + usable -= sent; + } + ssize = sndcnt-sent; + if(ssize && usable < 2) + netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n", + tcb->snd.wnd, tcb->cwind); + if(usable < ssize) + ssize = usable; + if(tcb->mss < ssize) + ssize = tcb->mss; + dsize = ssize; + seg.urg = 0; + + if(ssize == 0) + if((tcb->flags&FORCE) == 0) + break; + + tcb->flags &= ~FORCE; + tcprcvwin(s); + + /* By default we will generate an ack */ + tcphalt(tpriv, &tcb->acktimer); + tcb->rcv.una = 0; + seg.source = s->lport; + seg.dest = s->rport; + seg.flags = ACK; + seg.mss = 0; + seg.ws = 0; + switch(tcb->state){ + case Syn_sent: + seg.flags = 0; + if(tcb->snd.ptr == tcb->iss){ + seg.flags |= SYN; + dsize--; + seg.mss = tcb->mss; + seg.ws = tcb->scale; + } + break; + case Syn_received: + /* + * don't send any data with a SYN/ACK packet + * because Linux rejects the packet in its + * attempt to solve the SYN attack problem + */ + if(tcb->snd.ptr == tcb->iss){ + seg.flags |= SYN; + dsize = 0; + ssize = 1; + seg.mss = tcb->mss; + seg.ws = tcb->scale; + } + break; + } + seg.seq = tcb->snd.ptr; + seg.ack = tcb->rcv.nxt; + seg.wnd = tcb->rcv.wnd; + + /* Pull out data to send */ + bp = nil; + if(dsize != 0) { + bp = qcopy(s->wq, dsize, sent); + if(BLEN(bp) != dsize) { + seg.flags |= FIN; + dsize--; + } + } + + if(sent+dsize == sndcnt) + seg.flags |= PSH; + + /* keep track of balance of resent data */ + if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) { + n = tcb->snd.nxt - tcb->snd.ptr; + if(ssize < n) + n = ssize; + tcb->resent += n; + netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n", + s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt); + tpriv->stats[RetransSegs]++; + } + + tcb->snd.ptr += ssize; + + /* Pull up the send pointer so we can accept acks + * for this window + */ + if(seq_gt(tcb->snd.ptr,tcb->snd.nxt)) + tcb->snd.nxt = tcb->snd.ptr; + + /* Build header, link data and compute cksum */ + switch(version){ + case V4: + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb); + if(hbp == nil) { + freeblist(bp); + return; + } + break; + case V6: + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb); + if(hbp == nil) { + freeblist(bp); + return; + } + break; + default: + hbp = nil; /* to suppress a warning */ + panic("tcpoutput: version %d", version); + } + + /* Start the transmission timers if there is new data and we + * expect acknowledges + */ + if(ssize != 0){ + if(tcb->timer.state != TcptimerON) + tcpgo(tpriv, &tcb->timer); + + /* If round trip timer isn't running, start it. + * measure the longest packet only in case the + * transmission time dominates RTT + */ + if(tcb->rtt_timer.state != TcptimerON) + if(ssize == tcb->mss) { + tcpgo(tpriv, &tcb->rtt_timer); + tcb->rttseq = tcb->snd.ptr; + } + } + + tpriv->stats[OutSegs]++; + + /* put off the next keep alive */ + tcpgo(tpriv, &tcb->katimer); + + switch(version){ + case V4: + if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){ + /* a negative return means no route */ + localclose(s, "no route"); + } + break; + case V6: + if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){ + /* a negative return means no route */ + localclose(s, "no route"); + } + break; + default: + panic("tcpoutput2: version %d", version); + } + if((msgs%4) == 1){ + qunlock(s); + sched(); + qlock(s); + } + } +} + +/* + * the BSD convention (hack?) for keep alives. resend last uchar acked. + */ +void +tcpsendka(Conv *s) +{ + Tcp seg; + Tcpctl *tcb; + Block *hbp,*dbp; + + tcb = (Tcpctl*)s->ptcl; + + dbp = nil; + seg.urg = 0; + seg.source = s->lport; + seg.dest = s->rport; + seg.flags = ACK|PSH; + seg.mss = 0; + seg.ws = 0; + if(tcpporthogdefense) + seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20); + else + seg.seq = tcb->snd.una-1; + seg.ack = tcb->rcv.nxt; + tcb->rcv.una = 0; + seg.wnd = tcb->rcv.wnd; + if(tcb->state == Finwait2){ + seg.flags |= FIN; + } else { + dbp = allocb(1); + dbp->wp++; + } + + if(isv4(s->raddr)) { + /* Build header, link data and compute cksum */ + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb); + if(hbp == nil) { + freeblist(dbp); + return; + } + ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s); + } + else { + /* Build header, link data and compute cksum */ + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb); + if(hbp == nil) { + freeblist(dbp); + return; + } + ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s); + } +} + +/* + * set connection to time out after 12 minutes + */ +void +tcpsetkacounter(Tcpctl *tcb) +{ + tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK); + if(tcb->kacounter < 3) + tcb->kacounter = 3; +} + +/* + * if we've timed out, close the connection + * otherwise, send a keepalive and restart the timer + */ +void +tcpkeepalive(void *v) +{ + Tcpctl *tcb; + Conv *s; + + s = v; + tcb = (Tcpctl*)s->ptcl; + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + if(tcb->state != Closed){ + if(--(tcb->kacounter) <= 0) { + localclose(s, Etimedout); + } else { + tcpsendka(s); + tcpgo(s->p->priv, &tcb->katimer); + } + } + qunlock(s); + poperror(); +} + +/* + * start keepalive timer + */ +char* +tcpstartka(Conv *s, char **f, int n) +{ + Tcpctl *tcb; + int x; + + tcb = (Tcpctl*)s->ptcl; + if(tcb->state != Established) + return "connection must be in Establised state"; + if(n > 1){ + x = atoi(f[1]); + if(x >= MSPTICK) + tcb->katimer.start = x/MSPTICK; + } + tcpsetkacounter(tcb); + tcpgo(s->p->priv, &tcb->katimer); + + return nil; +} + +/* + * turn checksums on/off + */ +char* +tcpsetchecksum(Conv *s, char **f, int) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + tcb->nochecksum = !atoi(f[1]); + + return nil; +} + +void +tcprxmit(Conv *s) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + + tcb->flags |= RETRAN|FORCE; + tcb->snd.ptr = tcb->snd.una; + + /* + * We should be halving the slow start threshhold (down to one + * mss) but leaving it at mss seems to work well enough + */ + tcb->ssthresh = tcb->mss; + + /* + * pull window down to a single packet + */ + tcb->cwind = tcb->mss; + tcpoutput(s); +} + +void +tcptimeout(void *arg) +{ + Conv *s; + Tcpctl *tcb; + int maxback; + Tcppriv *tpriv; + + s = (Conv*)arg; + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + switch(tcb->state){ + default: + tcb->backoff++; + if(tcb->state == Syn_sent) + maxback = MAXBACKMS/2; + else + maxback = MAXBACKMS; + tcb->backedoff += tcb->timer.start * MSPTICK; + if(tcb->backedoff >= maxback) { + localclose(s, Etimedout); + break; + } + netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW); + tcpsettimer(tcb); + tcprxmit(s); + tpriv->stats[RetransTimeouts]++; + tcb->snd.dupacks = 0; + break; + case Time_wait: + localclose(s, nil); + break; + case Closed: + break; + } + qunlock(s); + poperror(); +} + +int +inwindow(Tcpctl *tcb, int seq) +{ + return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1); +} + +/* + * set up state for a received SYN (or SYN ACK) packet + */ +void +procsyn(Conv *s, Tcp *seg) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + tcb->flags |= FORCE; + + tcb->rcv.nxt = seg->seq + 1; + tcb->rcv.urg = tcb->rcv.nxt; + tcb->irs = seg->seq; + + /* our sending max segment size cannot be bigger than what he asked for */ + if(seg->mss != 0 && seg->mss < tcb->mss) + tcb->mss = seg->mss; + + /* the congestion window always starts out as a single segment */ + tcb->snd.wnd = seg->wnd; + tcb->cwind = tcb->mss; +} + +int +addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length) +{ + Reseq *rp, *rp1; + int i; + static int once; + + rp = malloc(sizeof(Reseq)); + if(rp == nil){ + freeblist(bp); /* bp always consumed by add_reseq */ + return 0; + } + + rp->seg = *seg; + rp->bp = bp; + rp->length = length; + + /* Place on reassembly list sorting by starting seq number */ + rp1 = tcb->reseq; + if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) { + rp->next = rp1; + tcb->reseq = rp; + if(rp->next != nil) + tpriv->stats[OutOfOrder]++; + return 0; + } + + length = 0; + for(i = 0;; i++) { + length += rp1->length; + if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) { + rp->next = rp1->next; + rp1->next = rp; + if(rp->next != nil) + tpriv->stats[OutOfOrder]++; + break; + } + rp1 = rp1->next; + } + if(length > QMAX && once++ == 0){ + print("very long tcp resequence queue: %d\n", length); + for(rp1 = tcb->reseq, i = 0; i < 10 && rp1 != nil; rp1 = rp1->next, i++) + print("0x%lux 0x%lux 0x%ux\n", rp1->seg.seq, rp1->seg.ack, + rp1->seg.flags); + return -1; + } + return 0; +} + +void +getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) +{ + Reseq *rp; + + rp = tcb->reseq; + if(rp == nil) + return; + + tcb->reseq = rp->next; + + *seg = rp->seg; + *bp = rp->bp; + *length = rp->length; + + free(rp); +} + +int +tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) +{ + ushort len; + uchar accept; + int dupcnt, excess; + + accept = 0; + len = *length; + if(seg->flags & SYN) + len++; + if(seg->flags & FIN) + len++; + + if(tcb->rcv.wnd == 0) { + if(len == 0 && seg->seq == tcb->rcv.nxt) + return 0; + } + else { + /* Some part of the segment should be in the window */ + if(inwindow(tcb,seg->seq)) + accept++; + else + if(len != 0) { + if(inwindow(tcb, seg->seq+len-1) || + seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1)) + accept++; + } + } + if(!accept) { + freeblist(*bp); + return -1; + } + dupcnt = tcb->rcv.nxt - seg->seq; + if(dupcnt > 0){ + tcb->rerecv += dupcnt; + if(seg->flags & SYN){ + seg->flags &= ~SYN; + seg->seq++; + + if(seg->urg > 1) + seg->urg--; + else + seg->flags &= ~URG; + dupcnt--; + } + if(dupcnt > 0){ + pullblock(bp, (ushort)dupcnt); + seg->seq += dupcnt; + *length -= dupcnt; + + if(seg->urg > dupcnt) + seg->urg -= dupcnt; + else { + seg->flags &= ~URG; + seg->urg = 0; + } + } + } + excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd); + if(excess > 0) { + tcb->rerecv += excess; + *length -= excess; + *bp = trimblock(*bp, 0, *length); + if(*bp == nil) + panic("presotto is a boofhead"); + seg->flags &= ~FIN; + } + return 0; +} + +void +tcpadvise(Proto *tcp, Block *bp, char *msg) +{ + Tcp4hdr *h4; + Tcp6hdr *h6; + Tcpctl *tcb; + uchar source[IPaddrlen]; + uchar dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + + h4 = (Tcp4hdr*)(bp->rp); + h6 = (Tcp6hdr*)(bp->rp); + + if((h4->vihl&0xF0)==IP_VER4) { + v4tov6(dest, h4->tcpdst); + v4tov6(source, h4->tcpsrc); + psource = nhgets(h4->tcpsport); + pdest = nhgets(h4->tcpdport); + } + else { + ipmove(dest, h6->tcpdst); + ipmove(source, h6->tcpsrc); + psource = nhgets(h6->tcpsport); + pdest = nhgets(h6->tcpdport); + } + + /* Look for a connection */ + qlock(tcp); + for(p = tcp->conv; *p; p++) { + s = *p; + tcb = (Tcpctl*)s->ptcl; + if(s->rport == pdest) + if(s->lport == psource) + if(tcb->state != Closed) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + qlock(s); + qunlock(tcp); + switch(tcb->state){ + case Syn_sent: + localclose(s, msg); + break; + } + qunlock(s); + freeblist(bp); + return; + } + } + qunlock(tcp); + freeblist(bp); +} + +static char* +tcpporthogdefensectl(char *val) +{ + if(strcmp(val, "on") == 0) + tcpporthogdefense = 1; + else if(strcmp(val, "off") == 0) + tcpporthogdefense = 0; + else + return "unknown value for tcpporthogdefense"; + return nil; +} + +/* called with c qlocked */ +char* +tcpctl(Conv* c, char** f, int n) +{ + if(n == 1 && strcmp(f[0], "hangup") == 0) + return tcphangup(c); + if(n >= 1 && strcmp(f[0], "keepalive") == 0) + return tcpstartka(c, f, n); + if(n >= 1 && strcmp(f[0], "checksum") == 0) + return tcpsetchecksum(c, f, n); + if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0) + return tcpporthogdefensectl(f[1]); + return "unknown control request"; +} + +int +tcpstats(Proto *tcp, char *buf, int len) +{ + Tcppriv *priv; + char *p, *e; + int i; + + priv = tcp->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]); + return p - buf; +} + +/* + * garbage collect any stale conversations: + * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack) + * - Finwait2 after 5 minutes + * + * this is called whenever we run out of channels. Both checks are + * of questionable validity so we try to use them only when we're + * up against the wall. + */ +int +tcpgc(Proto *tcp) +{ + Conv *c, **pp, **ep; + int n; + Tcpctl *tcb; + + + n = 0; + ep = &tcp->conv[tcp->nc]; + for(pp = tcp->conv; pp < ep; pp++) { + c = *pp; + if(c == nil) + break; + if(!canqlock(c)) + continue; + tcb = (Tcpctl*)c->ptcl; + switch(tcb->state){ + case Syn_received: + if(NOW - tcb->time > 5000){ + localclose(c, "timed out"); + n++; + } + break; + case Finwait2: + if(NOW - tcb->time > 5*60*1000){ + localclose(c, "timed out"); + n++; + } + break; + } + qunlock(c); + } + return n; +} + +void +tcpsettimer(Tcpctl *tcb) +{ + int x; + + /* round trip dependency */ + x = backoff(tcb->backoff) * + (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK; + + /* bounded twixt 1/2 and 64 seconds */ + if(x < 500/MSPTICK) + x = 500/MSPTICK; + else if(x > (64000/MSPTICK)) + x = 64000/MSPTICK; + tcb->timer.start = x; +} + +void +tcpinit(Fs *fs) +{ + Proto *tcp; + Tcppriv *tpriv; + + tcp = smalloc(sizeof(Proto)); + tpriv = tcp->priv = smalloc(sizeof(Tcppriv)); + tcp->name = "tcp"; + tcp->connect = tcpconnect; + tcp->announce = tcpannounce; + tcp->ctl = tcpctl; + tcp->state = tcpstate; + tcp->create = tcpcreate; + tcp->close = tcpclose; + tcp->rcv = tcpiput; + tcp->advise = tcpadvise; + tcp->stats = tcpstats; + tcp->inuse = tcpinuse; + tcp->gc = tcpgc; + tcp->ipproto = IP_TCPPROTO; + tcp->nc = scalednconv(); + tcp->ptclsize = sizeof(Tcpctl); + tpriv->stats[MaxConn] = tcp->nc; + + Fsproto(fs, tcp); +} + +void +tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale) +{ + if(rcvscale){ + tcb->rcv.scale = rcvscale & 0xff; + tcb->snd.scale = sndscale & 0xff; + tcb->window = QMAX<<tcb->snd.scale; + qsetlimit(s->rq, tcb->window); + } else { + tcb->rcv.scale = 0; + tcb->snd.scale = 0; + tcb->window = QMAX; + qsetlimit(s->rq, tcb->window); + } +} diff --git a/os/ip/udp.c b/os/ip/udp.c new file mode 100644 index 00000000..89cfbffb --- /dev/null +++ b/os/ip/udp.c @@ -0,0 +1,649 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + + +#define DPRINT if(0)print + +enum +{ + UDP_UDPHDR_SZ = 8, + + UDP4_PHDR_OFF = 8, + UDP4_PHDR_SZ = 12, + UDP4_IPHDR_SZ = 20, + UDP6_IPHDR_SZ = 40, + UDP6_PHDR_SZ = 40, + UDP6_PHDR_OFF = 0, + + IP_UDPPROTO = 17, + UDP_USEAD7 = 52, + UDP_USEAD6 = 36, + + Udprxms = 200, + Udptickms = 100, + Udpmaxxmit = 10, +}; + +typedef struct Udp4hdr Udp4hdr; +struct Udp4hdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar udpproto; /* Protocol */ + uchar udpplen[2]; /* Header plus data length */ + uchar udpsrc[IPv4addrlen]; /* Ip source */ + uchar udpdst[IPv4addrlen]; /* Ip destination */ + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +typedef struct Udp6hdr Udp6hdr; +struct Udp6hdr { + uchar viclfl[4]; + uchar len[2]; + uchar nextheader; + uchar hoplimit; + uchar udpsrc[IPaddrlen]; + uchar udpdst[IPaddrlen]; + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +/* MIB II counters */ +typedef struct Udpstats Udpstats; +struct Udpstats +{ + ulong udpInDatagrams; + ulong udpNoPorts; + ulong udpInErrors; + ulong udpOutDatagrams; +}; + +typedef struct Udppriv Udppriv; +struct Udppriv +{ + Ipht ht; + + /* MIB counters */ + Udpstats ustats; + + /* non-MIB stats */ + ulong csumerr; /* checksum errors */ + ulong lenerr; /* short packet */ +}; + +void (*etherprofiler)(char *name, int qlen); +void udpkick(void *x, Block *bp); + +/* + * protocol specific part of Conv + */ +typedef struct Udpcb Udpcb; +struct Udpcb +{ + QLock; + uchar headers; +}; + +static char* +udpconnect(Conv *c, char **argv, int argc) +{ + char *e; + Udppriv *upriv; + + upriv = c->p->priv; + e = Fsstdconnect(c, argv, argc); + Fsconnected(c, e); + if(e != nil) + return e; + + iphtadd(&upriv->ht, c); + return nil; +} + + +static int +udpstate(Conv *c, char *state, int n) +{ + return snprint(state, n, "%s qin %d qout %d", + c->inuse ? "Open" : "Closed", + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0 + ); +} + +static char* +udpannounce(Conv *c, char** argv, int argc) +{ + char *e; + Udppriv *upriv; + + upriv = c->p->priv; + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + iphtadd(&upriv->ht, c); + + return nil; +} + +static void +udpcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, 0); + c->wq = qbypass(udpkick, c); +} + +static void +udpclose(Conv *c) +{ + Udpcb *ucb; + Udppriv *upriv; + + upriv = c->p->priv; + iphtrem(&upriv->ht, c); + + c->state = 0; + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; + + ucb = (Udpcb*)c->ptcl; + ucb->headers = 0; + + qunlock(c); +} + +void +udpkick(void *x, Block *bp) +{ + Conv *c = x; + Udp4hdr *uh4; + Udp6hdr *uh6; + ushort rport; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Udpcb *ucb; + int dlen, ptcllen; + Udppriv *upriv; + Fs *f; + int version; + Conv *rc; + + upriv = c->p->priv; + f = c->p->f; + + netlog(c->p->f, Logudp, "udp: kick\n"); + if(bp == nil) + return; + + ucb = (Udpcb*)c->ptcl; + switch(ucb->headers) { + case 7: + /* get user specified addresses */ + bp = pullupblock(bp, UDP_USEAD7); + if(bp == nil) + return; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + /* pick interface closest to dest */ + if(ipforme(f, laddr) != Runi) + findlocalip(f, laddr, raddr); + bp->rp += IPaddrlen; /* Ignore ifc address */ + rport = nhgets(bp->rp); + bp->rp += 2+2; /* Ignore local port */ + break; + case 6: + /* get user specified addresses */ + bp = pullupblock(bp, UDP_USEAD6); + if(bp == nil) + return; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + /* pick interface closest to dest */ + if(ipforme(f, laddr) != Runi) + findlocalip(f, laddr, raddr); + rport = nhgets(bp->rp); + bp->rp += 2+2; /* Ignore local port */ + break; + default: + rport = 0; + break; + } + + if(ucb->headers) { + if(memcmp(laddr, v4prefix, IPv4off) == 0 || + ipcmp(laddr, IPnoaddr) == 0) + version = V4; + else + version = V6; + } else { + if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) + || ipcmp(c->raddr, IPnoaddr) == 0) + version = V4; + else + version = V6; + } + + dlen = blocklen(bp); + + /* fill in pseudo header and compute checksum */ + switch(version){ + case V4: + bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ); + if(bp == nil) + return; + + uh4 = (Udp4hdr *)(bp->rp); + ptcllen = dlen + UDP_UDPHDR_SZ; + uh4->Unused = 0; + uh4->udpproto = IP_UDPPROTO; + uh4->frag[0] = 0; + uh4->frag[1] = 0; + hnputs(uh4->udpplen, ptcllen); + if(ucb->headers) { + v6tov4(uh4->udpdst, raddr); + hnputs(uh4->udpdport, rport); + v6tov4(uh4->udpsrc, laddr); + rc = nil; + } else { + v6tov4(uh4->udpdst, c->raddr); + hnputs(uh4->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + v6tov4(uh4->udpsrc, c->laddr); + rc = c; + } + hnputs(uh4->udpsport, c->lport); + hnputs(uh4->udplen, ptcllen); + uh4->udpcksum[0] = 0; + uh4->udpcksum[1] = 0; + hnputs(uh4->udpcksum, + ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ)); + uh4->vihl = IP_VER4; + ipoput4(f, bp, 0, c->ttl, c->tos, rc); + break; + + case V6: + bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ); + if(bp == nil) + return; + + // using the v6 ip header to create pseudo header + // first then reset it to the normal ip header + uh6 = (Udp6hdr *)(bp->rp); + memset(uh6, 0, 8); + ptcllen = dlen + UDP_UDPHDR_SZ; + hnputl(uh6->viclfl, ptcllen); + uh6->hoplimit = IP_UDPPROTO; + if(ucb->headers) { + ipmove(uh6->udpdst, raddr); + hnputs(uh6->udpdport, rport); + ipmove(uh6->udpsrc, laddr); + rc = nil; + } else { + ipmove(uh6->udpdst, c->raddr); + hnputs(uh6->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + ipmove(uh6->udpsrc, c->laddr); + rc = c; + } + hnputs(uh6->udpsport, c->lport); + hnputs(uh6->udplen, ptcllen); + uh6->udpcksum[0] = 0; + uh6->udpcksum[1] = 0; + hnputs(uh6->udpcksum, + ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ)); + memset(uh6, 0, 8); + uh6->viclfl[0] = IP_VER6; + hnputs(uh6->len, ptcllen); + uh6->nextheader = IP_UDPPROTO; + ipoput6(f, bp, 0, c->ttl, c->tos, rc); + break; + + default: + panic("udpkick: version %d", version); + } + upriv->ustats.udpOutDatagrams++; +} + +void +udpiput(Proto *udp, Ipifc *ifc, Block *bp) +{ + int len; + Udp4hdr *uh4; + Udp6hdr *uh6; + Conv *c; + Udpcb *ucb; + uchar raddr[IPaddrlen], laddr[IPaddrlen]; + ushort rport, lport; + Udppriv *upriv; + Fs *f; + int version; + int ottl, oviclfl, olen; + uchar *p; + + upriv = udp->priv; + f = udp->f; + upriv->ustats.udpInDatagrams++; + + uh4 = (Udp4hdr*)(bp->rp); + version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4; + + /* + * Put back pseudo header for checksum + * (remember old values for icmpnoconv()) + */ + switch(version) { + case V4: + ottl = uh4->Unused; + uh4->Unused = 0; + len = nhgets(uh4->udplen); + olen = nhgets(uh4->udpplen); + hnputs(uh4->udpplen, len); + + v4tov6(raddr, uh4->udpsrc); + v4tov6(laddr, uh4->udpdst); + lport = nhgets(uh4->udpdport); + rport = nhgets(uh4->udpsport); + + if(nhgets(uh4->udpcksum)) { + if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) { + upriv->ustats.udpInErrors++; + netlog(f, Logudp, "udp: checksum error %I\n", raddr); + DPRINT("udp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + } + uh4->Unused = ottl; + hnputs(uh4->udpplen, olen); + break; + case V6: + uh6 = (Udp6hdr*)(bp->rp); + len = nhgets(uh6->udplen); + oviclfl = nhgetl(uh6->viclfl); + olen = nhgets(uh6->len); + ottl = uh6->hoplimit; + ipmove(raddr, uh6->udpsrc); + ipmove(laddr, uh6->udpdst); + lport = nhgets(uh6->udpdport); + rport = nhgets(uh6->udpsport); + memset(uh6, 0, 8); + hnputl(uh6->viclfl, len); + uh6->hoplimit = IP_UDPPROTO; + if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) { + upriv->ustats.udpInErrors++; + netlog(f, Logudp, "udp: checksum error %I\n", raddr); + DPRINT("udp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + hnputl(uh6->viclfl, oviclfl); + hnputs(uh6->len, olen); + uh6->nextheader = IP_UDPPROTO; + uh6->hoplimit = ottl; + break; + default: + panic("udpiput: version %d", version); + return; /* to avoid a warning */ + } + + qlock(udp); + + c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); + if(c == nil){ + /* no converstation found */ + upriv->ustats.udpNoPorts++; + qunlock(udp); + netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport, + laddr, lport); + + switch(version){ + case V4: + icmpnoconv(f, bp); + break; + case V6: + icmphostunr(f, ifc, bp, icmp6_port_unreach, 0); + break; + default: + panic("udpiput2: version %d", version); + } + + freeblist(bp); + return; + } + ucb = (Udpcb*)c->ptcl; + + if(c->state == Announced){ + if(ucb->headers == 0){ + /* create a new conversation */ + if(ipforme(f, laddr) != Runi) { + switch(version){ + case V4: + v4tov6(laddr, ifc->lifc->local); + break; + case V6: + ipmove(laddr, ifc->lifc->local); + break; + default: + panic("udpiput3: version %d", version); + } + } + c = Fsnewcall(c, raddr, rport, laddr, lport, version); + if(c == nil){ + qunlock(udp); + freeblist(bp); + return; + } + iphtadd(&upriv->ht, c); + ucb = (Udpcb*)c->ptcl; + } + } + + qlock(c); + qunlock(udp); + + /* + * Trim the packet down to data size + */ + len -= UDP_UDPHDR_SZ; + switch(version){ + case V4: + bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len); + break; + case V6: + bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len); + break; + default: + bp = nil; + panic("udpiput4: version %d", version); + } + if(bp == nil){ + qunlock(c); + netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + upriv->lenerr++; + return; + } + + netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport, + laddr, lport, len); + + switch(ucb->headers){ + case 7: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD7); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, laddr); p += IPaddrlen; + ipmove(p, ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + case 6: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD6); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + } + + if(bp->next) + bp = concatblock(bp); + + if(qfull(c->rq)){ + qunlock(c); + netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + freeblist(bp); + return; + } + + qpass(c->rq, bp); + qunlock(c); + +} + +char* +udpctl(Conv *c, char **f, int n) +{ + Udpcb *ucb; + + ucb = (Udpcb*)c->ptcl; + if(n == 1){ + if(strcmp(f[0], "oldheaders") == 0){ + ucb->headers = 6; + return nil; + } else if(strcmp(f[0], "headers") == 0){ + ucb->headers = 7; + return nil; + } + } + return "unknown control request"; +} + +void +udpadvise(Proto *udp, Block *bp, char *msg) +{ + Udp4hdr *h4; + Udp6hdr *h6; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + int version; + + h4 = (Udp4hdr*)(bp->rp); + version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4; + + switch(version) { + case V4: + v4tov6(dest, h4->udpdst); + v4tov6(source, h4->udpsrc); + psource = nhgets(h4->udpsport); + pdest = nhgets(h4->udpdport); + break; + case V6: + h6 = (Udp6hdr*)(bp->rp); + ipmove(dest, h6->udpdst); + ipmove(source, h6->udpsrc); + psource = nhgets(h6->udpsport); + pdest = nhgets(h6->udpdport); + break; + default: + panic("udpadvise: version %d", version); + return; /* to avoid a warning */ + } + + /* Look for a connection */ + qlock(udp); + for(p = udp->conv; *p; p++) { + s = *p; + if(s->rport == pdest) + if(s->lport == psource) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + if(s->ignoreadvice) + break; + qlock(s); + qunlock(udp); + qhangup(s->rq, msg); + qhangup(s->wq, msg); + qunlock(s); + freeblist(bp); + return; + } + } + qunlock(udp); + freeblist(bp); +} + +int +udpstats(Proto *udp, char *buf, int len) +{ + Udppriv *upriv; + + upriv = udp->priv; + return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n", + upriv->ustats.udpInDatagrams, + upriv->ustats.udpNoPorts, + upriv->ustats.udpInErrors, + upriv->ustats.udpOutDatagrams); +} + +void +udpinit(Fs *fs) +{ + Proto *udp; + + udp = smalloc(sizeof(Proto)); + udp->priv = smalloc(sizeof(Udppriv)); + udp->name = "udp"; + udp->connect = udpconnect; + udp->announce = udpannounce; + udp->ctl = udpctl; + udp->state = udpstate; + udp->create = udpcreate; + udp->close = udpclose; + udp->rcv = udpiput; + udp->advise = udpadvise; + udp->stats = udpstats; + udp->ipproto = IP_UDPPROTO; + udp->nc = Nchans; + udp->ptclsize = sizeof(Udpcb); + + Fsproto(fs, udp); +} |
