summaryrefslogtreecommitdiff
path: root/appl/lib/bloomfilter.b
blob: 8860c8b0a55bd9fadb7a9a229a38795a6579c4b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
implement Bloomfilter;

include "sys.m";
	sys: Sys;
include "sets.m";
	sets: Sets;
	Set: import sets;
include "keyring.m";
	keyring: Keyring;
include "bloomfilter.m";

init()
{
	sys = load Sys Sys->PATH;
	sets = load Sets Sets->PATH;
	sets->init();
	keyring = load Keyring Keyring->PATH;
}

Bits: adt {
	d: array of byte;
	n: int;			# bits used
	get:	fn(bits: self ref Bits, n: int): int;
};

filter(d: array of byte, logm, k: int): Set
{
	if(logm < 3 || logm > 30)
		raise "invalid bloom filter size";
	nb := 1 << logm;
	f := array[nb / 8 + 1] of {* => byte 0};	# one extra zero to make sure set's not inverted.
	bits := hashbits(d, logm * k);
	while(k--){
		v := bits.get(logm);
		f[v >> 3] |= byte 1 << (v & 7);
	}
	return sets->bytes2set(f);
}

hashbits(data: array of byte, n: int): ref Bits
{
	d := array[((n + 7) / 8)] of byte;
	digest := array[Keyring->SHA1dlen] of byte;
	state := keyring->sha1(data, len data, nil, nil);
	extra := array[2] of byte;
	e := 0;
	for(i := 0; i < len d; i += Keyring->SHA1dlen){
		extra[0] = byte e;
		extra[1] = byte (e>>8);
		e++;
		state = keyring->sha1(extra, len extra, digest, state);
		if(i + Keyring->SHA1dlen > len d)
			digest = digest[0:len d - i];
		d[i:] = digest;
	}
	return ref Bits(d, 0);
}

# XXX could be more efficient.
Bits.get(bits: self ref Bits, n: int): int
{
	d := bits.d;
	v := 0;
	nb := bits.n;
	for(i := 0; i < n; i++){
		j := nb + i;
		if(int d[j >> 3] & (1 << (j & 7)))
			v |= (1 << i);
	}
	bits.n += n;
	return v;
}