Source file
src/hash/maphash/smhasher_test.go
1
2
3
4
5
6
7 package maphash
8
9 import (
10 "fmt"
11 "math"
12 "math/rand"
13 "runtime"
14 "strings"
15 "testing"
16 "unsafe"
17 )
18
19
20
21
22
23
24
25
26 var fixedSeed = MakeSeed()
27
28
29
30
31 func TestSmhasherSanity(t *testing.T) {
32 r := rand.New(rand.NewSource(1234))
33 const REP = 10
34 const KEYMAX = 128
35 const PAD = 16
36 const OFFMAX = 16
37 for k := 0; k < REP; k++ {
38 for n := 0; n < KEYMAX; n++ {
39 for i := 0; i < OFFMAX; i++ {
40 var b [KEYMAX + OFFMAX + 2*PAD]byte
41 var c [KEYMAX + OFFMAX + 2*PAD]byte
42 randBytes(r, b[:])
43 randBytes(r, c[:])
44 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
45 if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) {
46 t.Errorf("hash depends on bytes outside key")
47 }
48 }
49 }
50 }
51 }
52
53 func bytesHash(b []byte) uint64 {
54 var h Hash
55 h.SetSeed(fixedSeed)
56 h.Write(b)
57 return h.Sum64()
58 }
59 func stringHash(s string) uint64 {
60 var h Hash
61 h.SetSeed(fixedSeed)
62 h.WriteString(s)
63 return h.Sum64()
64 }
65
66 const hashSize = 64
67
68 func randBytes(r *rand.Rand, b []byte) {
69 r.Read(b)
70 }
71
72
73 type hashSet struct {
74 m map[uint64]struct{}
75 n int
76 }
77
78 func newHashSet() *hashSet {
79 return &hashSet{make(map[uint64]struct{}), 0}
80 }
81 func (s *hashSet) add(h uint64) {
82 s.m[h] = struct{}{}
83 s.n++
84 }
85 func (s *hashSet) addS(x string) {
86 s.add(stringHash(x))
87 }
88 func (s *hashSet) addB(x []byte) {
89 s.add(bytesHash(x))
90 }
91 func (s *hashSet) addS_seed(x string, seed Seed) {
92 var h Hash
93 h.SetSeed(seed)
94 h.WriteString(x)
95 s.add(h.Sum64())
96 }
97 func (s *hashSet) check(t *testing.T) {
98 const SLOP = 10.0
99 collisions := s.n - len(s.m)
100 pairs := int64(s.n) * int64(s.n-1) / 2
101 expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
102 stddev := math.Sqrt(expected)
103 if float64(collisions) > expected+SLOP*(3*stddev+1) {
104 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
105 }
106 }
107
108
109 func TestSmhasherAppendedZeros(t *testing.T) {
110 s := "hello" + strings.Repeat("\x00", 256)
111 h := newHashSet()
112 for i := 0; i <= len(s); i++ {
113 h.addS(s[:i])
114 }
115 h.check(t)
116 }
117
118
119 func TestSmhasherSmallKeys(t *testing.T) {
120 h := newHashSet()
121 var b [3]byte
122 for i := 0; i < 256; i++ {
123 b[0] = byte(i)
124 h.addB(b[:1])
125 for j := 0; j < 256; j++ {
126 b[1] = byte(j)
127 h.addB(b[:2])
128 if !testing.Short() {
129 for k := 0; k < 256; k++ {
130 b[2] = byte(k)
131 h.addB(b[:3])
132 }
133 }
134 }
135 }
136 h.check(t)
137 }
138
139
140 func TestSmhasherZeros(t *testing.T) {
141 N := 256 * 1024
142 if testing.Short() {
143 N = 1024
144 }
145 h := newHashSet()
146 b := make([]byte, N)
147 for i := 0; i <= N; i++ {
148 h.addB(b[:i])
149 }
150 h.check(t)
151 }
152
153
154 func TestSmhasherTwoNonzero(t *testing.T) {
155 if runtime.GOARCH == "wasm" {
156 t.Skip("Too slow on wasm")
157 }
158 if testing.Short() {
159 t.Skip("Skipping in short mode")
160 }
161 h := newHashSet()
162 for n := 2; n <= 16; n++ {
163 twoNonZero(h, n)
164 }
165 h.check(t)
166 }
167 func twoNonZero(h *hashSet, n int) {
168 b := make([]byte, n)
169
170
171 h.addB(b)
172
173
174 for i := 0; i < n; i++ {
175 for x := 1; x < 256; x++ {
176 b[i] = byte(x)
177 h.addB(b)
178 b[i] = 0
179 }
180 }
181
182
183 for i := 0; i < n; i++ {
184 for x := 1; x < 256; x++ {
185 b[i] = byte(x)
186 for j := i + 1; j < n; j++ {
187 for y := 1; y < 256; y++ {
188 b[j] = byte(y)
189 h.addB(b)
190 b[j] = 0
191 }
192 }
193 b[i] = 0
194 }
195 }
196 }
197
198
199 func TestSmhasherCyclic(t *testing.T) {
200 if testing.Short() {
201 t.Skip("Skipping in short mode")
202 }
203 r := rand.New(rand.NewSource(1234))
204 const REPEAT = 8
205 const N = 1000000
206 for n := 4; n <= 12; n++ {
207 h := newHashSet()
208 b := make([]byte, REPEAT*n)
209 for i := 0; i < N; i++ {
210 b[0] = byte(i * 79 % 97)
211 b[1] = byte(i * 43 % 137)
212 b[2] = byte(i * 151 % 197)
213 b[3] = byte(i * 199 % 251)
214 randBytes(r, b[4:n])
215 for j := n; j < n*REPEAT; j++ {
216 b[j] = b[j-n]
217 }
218 h.addB(b)
219 }
220 h.check(t)
221 }
222 }
223
224
225 func TestSmhasherSparse(t *testing.T) {
226 if runtime.GOARCH == "wasm" {
227 t.Skip("Too slow on wasm")
228 }
229 if testing.Short() {
230 t.Skip("Skipping in short mode")
231 }
232 sparse(t, 32, 6)
233 sparse(t, 40, 6)
234 sparse(t, 48, 5)
235 sparse(t, 56, 5)
236 sparse(t, 64, 5)
237 sparse(t, 96, 4)
238 sparse(t, 256, 3)
239 sparse(t, 2048, 2)
240 }
241 func sparse(t *testing.T, n int, k int) {
242 b := make([]byte, n/8)
243 h := newHashSet()
244 setbits(h, b, 0, k)
245 h.check(t)
246 }
247
248
249 func setbits(h *hashSet, b []byte, i int, k int) {
250 h.addB(b)
251 if k == 0 {
252 return
253 }
254 for j := i; j < len(b)*8; j++ {
255 b[j/8] |= byte(1 << uint(j&7))
256 setbits(h, b, j+1, k-1)
257 b[j/8] &= byte(^(1 << uint(j&7)))
258 }
259 }
260
261
262
263 func TestSmhasherPermutation(t *testing.T) {
264 if runtime.GOARCH == "wasm" {
265 t.Skip("Too slow on wasm")
266 }
267 if testing.Short() {
268 t.Skip("Skipping in short mode")
269 }
270 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
271 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
272 permutation(t, []uint32{0, 1}, 20)
273 permutation(t, []uint32{0, 1 << 31}, 20)
274 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
275 }
276 func permutation(t *testing.T, s []uint32, n int) {
277 b := make([]byte, n*4)
278 h := newHashSet()
279 genPerm(h, b, s, 0)
280 h.check(t)
281 }
282 func genPerm(h *hashSet, b []byte, s []uint32, n int) {
283 h.addB(b[:n])
284 if n == len(b) {
285 return
286 }
287 for _, v := range s {
288 b[n] = byte(v)
289 b[n+1] = byte(v >> 8)
290 b[n+2] = byte(v >> 16)
291 b[n+3] = byte(v >> 24)
292 genPerm(h, b, s, n+4)
293 }
294 }
295
296 type key interface {
297 clear()
298 random(r *rand.Rand)
299 bits() int
300 flipBit(i int)
301 hash() uint64
302 name() string
303 }
304
305 type bytesKey struct {
306 b []byte
307 }
308
309 func (k *bytesKey) clear() {
310 for i := range k.b {
311 k.b[i] = 0
312 }
313 }
314 func (k *bytesKey) random(r *rand.Rand) {
315 randBytes(r, k.b)
316 }
317 func (k *bytesKey) bits() int {
318 return len(k.b) * 8
319 }
320 func (k *bytesKey) flipBit(i int) {
321 k.b[i>>3] ^= byte(1 << uint(i&7))
322 }
323 func (k *bytesKey) hash() uint64 {
324 return bytesHash(k.b)
325 }
326 func (k *bytesKey) name() string {
327 return fmt.Sprintf("bytes%d", len(k.b))
328 }
329
330
331 func TestSmhasherAvalanche(t *testing.T) {
332 if runtime.GOARCH == "wasm" {
333 t.Skip("Too slow on wasm")
334 }
335 if testing.Short() {
336 t.Skip("Skipping in short mode")
337 }
338 avalancheTest1(t, &bytesKey{make([]byte, 2)})
339 avalancheTest1(t, &bytesKey{make([]byte, 4)})
340 avalancheTest1(t, &bytesKey{make([]byte, 8)})
341 avalancheTest1(t, &bytesKey{make([]byte, 16)})
342 avalancheTest1(t, &bytesKey{make([]byte, 32)})
343 avalancheTest1(t, &bytesKey{make([]byte, 200)})
344 }
345 func avalancheTest1(t *testing.T, k key) {
346 const REP = 100000
347 r := rand.New(rand.NewSource(1234))
348 n := k.bits()
349
350
351
352 grid := make([][hashSize]int, n)
353
354 for z := 0; z < REP; z++ {
355
356 k.random(r)
357 h := k.hash()
358
359
360 for i := 0; i < n; i++ {
361 k.flipBit(i)
362 d := h ^ k.hash()
363 k.flipBit(i)
364
365
366 g := &grid[i]
367 for j := 0; j < hashSize; j++ {
368 g[j] += int(d & 1)
369 d >>= 1
370 }
371 }
372 }
373
374
375
376
377
378
379 N := n * hashSize
380 var c float64
381
382 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
383 }
384 c *= 11.0
385 mean := .5 * REP
386 stddev := .5 * math.Sqrt(REP)
387 low := int(mean - c*stddev)
388 high := int(mean + c*stddev)
389 for i := 0; i < n; i++ {
390 for j := 0; j < hashSize; j++ {
391 x := grid[i][j]
392 if x < low || x > high {
393 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
394 }
395 }
396 }
397 }
398
399
400 func TestSmhasherWindowed(t *testing.T) {
401 windowed(t, &bytesKey{make([]byte, 128)})
402 }
403 func windowed(t *testing.T, k key) {
404 if runtime.GOARCH == "wasm" {
405 t.Skip("Too slow on wasm")
406 }
407 if testing.Short() {
408 t.Skip("Skipping in short mode")
409 }
410 const BITS = 16
411
412 for r := 0; r < k.bits(); r++ {
413 h := newHashSet()
414 for i := 0; i < 1<<BITS; i++ {
415 k.clear()
416 for j := 0; j < BITS; j++ {
417 if i>>uint(j)&1 != 0 {
418 k.flipBit((j + r) % k.bits())
419 }
420 }
421 h.add(k.hash())
422 }
423 h.check(t)
424 }
425 }
426
427
428 func TestSmhasherText(t *testing.T) {
429 if testing.Short() {
430 t.Skip("Skipping in short mode")
431 }
432 text(t, "Foo", "Bar")
433 text(t, "FooBar", "")
434 text(t, "", "FooBar")
435 }
436 func text(t *testing.T, prefix, suffix string) {
437 const N = 4
438 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
439 const L = len(S)
440 b := make([]byte, len(prefix)+N+len(suffix))
441 copy(b, prefix)
442 copy(b[len(prefix)+N:], suffix)
443 h := newHashSet()
444 c := b[len(prefix):]
445 for i := 0; i < L; i++ {
446 c[0] = S[i]
447 for j := 0; j < L; j++ {
448 c[1] = S[j]
449 for k := 0; k < L; k++ {
450 c[2] = S[k]
451 for x := 0; x < L; x++ {
452 c[3] = S[x]
453 h.addB(b)
454 }
455 }
456 }
457 }
458 h.check(t)
459 }
460
461
462 func TestSmhasherSeed(t *testing.T) {
463 if unsafe.Sizeof(uintptr(0)) == 4 {
464 t.Skip("32-bit platforms don't have ideal seed-input distributions (see issue 33988)")
465 }
466 h := newHashSet()
467 const N = 100000
468 s := "hello"
469 for i := 0; i < N; i++ {
470 h.addS_seed(s, Seed{s: uint64(i + 1)})
471 h.addS_seed(s, Seed{s: uint64(i+1) << 32})
472 }
473 h.check(t)
474 }
475
View as plain text