...
Run Format

Source file src/runtime/hash_test.go

Documentation: runtime

  // Copyright 2013 The Go Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style
  // license that can be found in the LICENSE file.
  
  package runtime_test
  
  import (
  	"fmt"
  	"math"
  	"math/rand"
  	. "runtime"
  	"strings"
  	"testing"
  	"unsafe"
  )
  
  // Smhasher is a torture test for hash functions.
  // https://code.google.com/p/smhasher/
  // This code is a port of some of the Smhasher tests to Go.
  //
  // The current AES hash function passes Smhasher. Our fallback
  // hash functions don't, so we only enable the difficult tests when
  // we know the AES implementation is available.
  
  // Sanity checks.
  // hash should not depend on values outside key.
  // hash should not depend on alignment.
  func TestSmhasherSanity(t *testing.T) {
  	r := rand.New(rand.NewSource(1234))
  	const REP = 10
  	const KEYMAX = 128
  	const PAD = 16
  	const OFFMAX = 16
  	for k := 0; k < REP; k++ {
  		for n := 0; n < KEYMAX; n++ {
  			for i := 0; i < OFFMAX; i++ {
  				var b [KEYMAX + OFFMAX + 2*PAD]byte
  				var c [KEYMAX + OFFMAX + 2*PAD]byte
  				randBytes(r, b[:])
  				randBytes(r, c[:])
  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
  					t.Errorf("hash depends on bytes outside key")
  				}
  			}
  		}
  	}
  }
  
  type HashSet struct {
  	m map[uintptr]struct{} // set of hashes added
  	n int                  // number of hashes added
  }
  
  func newHashSet() *HashSet {
  	return &HashSet{make(map[uintptr]struct{}), 0}
  }
  func (s *HashSet) add(h uintptr) {
  	s.m[h] = struct{}{}
  	s.n++
  }
  func (s *HashSet) addS(x string) {
  	s.add(StringHash(x, 0))
  }
  func (s *HashSet) addB(x []byte) {
  	s.add(BytesHash(x, 0))
  }
  func (s *HashSet) addS_seed(x string, seed uintptr) {
  	s.add(StringHash(x, seed))
  }
  func (s *HashSet) check(t *testing.T) {
  	const SLOP = 10.0
  	collisions := s.n - len(s.m)
  	//fmt.Printf("%d/%d\n", len(s.m), s.n)
  	pairs := int64(s.n) * int64(s.n-1) / 2
  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
  	stddev := math.Sqrt(expected)
  	if float64(collisions) > expected+SLOP*(3*stddev+1) {
  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
  	}
  }
  
  // a string plus adding zeros must make distinct hashes
  func TestSmhasherAppendedZeros(t *testing.T) {
  	s := "hello" + strings.Repeat("\x00", 256)
  	h := newHashSet()
  	for i := 0; i <= len(s); i++ {
  		h.addS(s[:i])
  	}
  	h.check(t)
  }
  
  // All 0-3 byte strings have distinct hashes.
  func TestSmhasherSmallKeys(t *testing.T) {
  	h := newHashSet()
  	var b [3]byte
  	for i := 0; i < 256; i++ {
  		b[0] = byte(i)
  		h.addB(b[:1])
  		for j := 0; j < 256; j++ {
  			b[1] = byte(j)
  			h.addB(b[:2])
  			if !testing.Short() {
  				for k := 0; k < 256; k++ {
  					b[2] = byte(k)
  					h.addB(b[:3])
  				}
  			}
  		}
  	}
  	h.check(t)
  }
  
  // Different length strings of all zeros have distinct hashes.
  func TestSmhasherZeros(t *testing.T) {
  	N := 256 * 1024
  	if testing.Short() {
  		N = 1024
  	}
  	h := newHashSet()
  	b := make([]byte, N)
  	for i := 0; i <= N; i++ {
  		h.addB(b[:i])
  	}
  	h.check(t)
  }
  
  // Strings with up to two nonzero bytes all have distinct hashes.
  func TestSmhasherTwoNonzero(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	h := newHashSet()
  	for n := 2; n <= 16; n++ {
  		twoNonZero(h, n)
  	}
  	h.check(t)
  }
  func twoNonZero(h *HashSet, n int) {
  	b := make([]byte, n)
  
  	// all zero
  	h.addB(b[:])
  
  	// one non-zero byte
  	for i := 0; i < n; i++ {
  		for x := 1; x < 256; x++ {
  			b[i] = byte(x)
  			h.addB(b[:])
  			b[i] = 0
  		}
  	}
  
  	// two non-zero bytes
  	for i := 0; i < n; i++ {
  		for x := 1; x < 256; x++ {
  			b[i] = byte(x)
  			for j := i + 1; j < n; j++ {
  				for y := 1; y < 256; y++ {
  					b[j] = byte(y)
  					h.addB(b[:])
  					b[j] = 0
  				}
  			}
  			b[i] = 0
  		}
  	}
  }
  
  // Test strings with repeats, like "abcdabcdabcdabcd..."
  func TestSmhasherCyclic(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	r := rand.New(rand.NewSource(1234))
  	const REPEAT = 8
  	const N = 1000000
  	for n := 4; n <= 12; n++ {
  		h := newHashSet()
  		b := make([]byte, REPEAT*n)
  		for i := 0; i < N; i++ {
  			b[0] = byte(i * 79 % 97)
  			b[1] = byte(i * 43 % 137)
  			b[2] = byte(i * 151 % 197)
  			b[3] = byte(i * 199 % 251)
  			randBytes(r, b[4:n])
  			for j := n; j < n*REPEAT; j++ {
  				b[j] = b[j-n]
  			}
  			h.addB(b)
  		}
  		h.check(t)
  	}
  }
  
  // Test strings with only a few bits set
  func TestSmhasherSparse(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	sparse(t, 32, 6)
  	sparse(t, 40, 6)
  	sparse(t, 48, 5)
  	sparse(t, 56, 5)
  	sparse(t, 64, 5)
  	sparse(t, 96, 4)
  	sparse(t, 256, 3)
  	sparse(t, 2048, 2)
  }
  func sparse(t *testing.T, n int, k int) {
  	b := make([]byte, n/8)
  	h := newHashSet()
  	setbits(h, b, 0, k)
  	h.check(t)
  }
  
  // set up to k bits at index i and greater
  func setbits(h *HashSet, b []byte, i int, k int) {
  	h.addB(b)
  	if k == 0 {
  		return
  	}
  	for j := i; j < len(b)*8; j++ {
  		b[j/8] |= byte(1 << uint(j&7))
  		setbits(h, b, j+1, k-1)
  		b[j/8] &= byte(^(1 << uint(j&7)))
  	}
  }
  
  // Test all possible combinations of n blocks from the set s.
  // "permutation" is a bad name here, but it is what Smhasher uses.
  func TestSmhasherPermutation(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
  	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
  	permutation(t, []uint32{0, 1}, 20)
  	permutation(t, []uint32{0, 1 << 31}, 20)
  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
  }
  func permutation(t *testing.T, s []uint32, n int) {
  	b := make([]byte, n*4)
  	h := newHashSet()
  	genPerm(h, b, s, 0)
  	h.check(t)
  }
  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
  	h.addB(b[:n])
  	if n == len(b) {
  		return
  	}
  	for _, v := range s {
  		b[n] = byte(v)
  		b[n+1] = byte(v >> 8)
  		b[n+2] = byte(v >> 16)
  		b[n+3] = byte(v >> 24)
  		genPerm(h, b, s, n+4)
  	}
  }
  
  type Key interface {
  	clear()              // set bits all to 0
  	random(r *rand.Rand) // set key to something random
  	bits() int           // how many bits key has
  	flipBit(i int)       // flip bit i of the key
  	hash() uintptr       // hash the key
  	name() string        // for error reporting
  }
  
  type BytesKey struct {
  	b []byte
  }
  
  func (k *BytesKey) clear() {
  	for i := range k.b {
  		k.b[i] = 0
  	}
  }
  func (k *BytesKey) random(r *rand.Rand) {
  	randBytes(r, k.b)
  }
  func (k *BytesKey) bits() int {
  	return len(k.b) * 8
  }
  func (k *BytesKey) flipBit(i int) {
  	k.b[i>>3] ^= byte(1 << uint(i&7))
  }
  func (k *BytesKey) hash() uintptr {
  	return BytesHash(k.b, 0)
  }
  func (k *BytesKey) name() string {
  	return fmt.Sprintf("bytes%d", len(k.b))
  }
  
  type Int32Key struct {
  	i uint32
  }
  
  func (k *Int32Key) clear() {
  	k.i = 0
  }
  func (k *Int32Key) random(r *rand.Rand) {
  	k.i = r.Uint32()
  }
  func (k *Int32Key) bits() int {
  	return 32
  }
  func (k *Int32Key) flipBit(i int) {
  	k.i ^= 1 << uint(i)
  }
  func (k *Int32Key) hash() uintptr {
  	return Int32Hash(k.i, 0)
  }
  func (k *Int32Key) name() string {
  	return "int32"
  }
  
  type Int64Key struct {
  	i uint64
  }
  
  func (k *Int64Key) clear() {
  	k.i = 0
  }
  func (k *Int64Key) random(r *rand.Rand) {
  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
  }
  func (k *Int64Key) bits() int {
  	return 64
  }
  func (k *Int64Key) flipBit(i int) {
  	k.i ^= 1 << uint(i)
  }
  func (k *Int64Key) hash() uintptr {
  	return Int64Hash(k.i, 0)
  }
  func (k *Int64Key) name() string {
  	return "int64"
  }
  
  type EfaceKey struct {
  	i interface{}
  }
  
  func (k *EfaceKey) clear() {
  	k.i = nil
  }
  func (k *EfaceKey) random(r *rand.Rand) {
  	k.i = uint64(r.Int63())
  }
  func (k *EfaceKey) bits() int {
  	// use 64 bits. This tests inlined interfaces
  	// on 64-bit targets and indirect interfaces on
  	// 32-bit targets.
  	return 64
  }
  func (k *EfaceKey) flipBit(i int) {
  	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
  }
  func (k *EfaceKey) hash() uintptr {
  	return EfaceHash(k.i, 0)
  }
  func (k *EfaceKey) name() string {
  	return "Eface"
  }
  
  type IfaceKey struct {
  	i interface {
  		F()
  	}
  }
  type fInter uint64
  
  func (x fInter) F() {
  }
  
  func (k *IfaceKey) clear() {
  	k.i = nil
  }
  func (k *IfaceKey) random(r *rand.Rand) {
  	k.i = fInter(r.Int63())
  }
  func (k *IfaceKey) bits() int {
  	// use 64 bits. This tests inlined interfaces
  	// on 64-bit targets and indirect interfaces on
  	// 32-bit targets.
  	return 64
  }
  func (k *IfaceKey) flipBit(i int) {
  	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
  }
  func (k *IfaceKey) hash() uintptr {
  	return IfaceHash(k.i, 0)
  }
  func (k *IfaceKey) name() string {
  	return "Iface"
  }
  
  // Flipping a single bit of a key should flip each output bit with 50% probability.
  func TestSmhasherAvalanche(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
  	avalancheTest1(t, &Int32Key{})
  	avalancheTest1(t, &Int64Key{})
  	avalancheTest1(t, &EfaceKey{})
  	avalancheTest1(t, &IfaceKey{})
  }
  func avalancheTest1(t *testing.T, k Key) {
  	const REP = 100000
  	r := rand.New(rand.NewSource(1234))
  	n := k.bits()
  
  	// grid[i][j] is a count of whether flipping
  	// input bit i affects output bit j.
  	grid := make([][hashSize]int, n)
  
  	for z := 0; z < REP; z++ {
  		// pick a random key, hash it
  		k.random(r)
  		h := k.hash()
  
  		// flip each bit, hash & compare the results
  		for i := 0; i < n; i++ {
  			k.flipBit(i)
  			d := h ^ k.hash()
  			k.flipBit(i)
  
  			// record the effects of that bit flip
  			g := &grid[i]
  			for j := 0; j < hashSize; j++ {
  				g[j] += int(d & 1)
  				d >>= 1
  			}
  		}
  	}
  
  	// Each entry in the grid should be about REP/2.
  	// More precisely, we did N = k.bits() * hashSize experiments where
  	// each is the sum of REP coin flips. We want to find bounds on the
  	// sum of coin flips such that a truly random experiment would have
  	// all sums inside those bounds with 99% probability.
  	N := n * hashSize
  	var c float64
  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
  	}
  	c *= 4.0 // allowed slack - we don't need to be perfectly random
  	mean := .5 * REP
  	stddev := .5 * math.Sqrt(REP)
  	low := int(mean - c*stddev)
  	high := int(mean + c*stddev)
  	for i := 0; i < n; i++ {
  		for j := 0; j < hashSize; j++ {
  			x := grid[i][j]
  			if x < low || x > high {
  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
  			}
  		}
  	}
  }
  
  // All bit rotations of a set of distinct keys
  func TestSmhasherWindowed(t *testing.T) {
  	windowed(t, &Int32Key{})
  	windowed(t, &Int64Key{})
  	windowed(t, &BytesKey{make([]byte, 128)})
  }
  func windowed(t *testing.T, k Key) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	const BITS = 16
  
  	for r := 0; r < k.bits(); r++ {
  		h := newHashSet()
  		for i := 0; i < 1<<BITS; i++ {
  			k.clear()
  			for j := 0; j < BITS; j++ {
  				if i>>uint(j)&1 != 0 {
  					k.flipBit((j + r) % k.bits())
  				}
  			}
  			h.add(k.hash())
  		}
  		h.check(t)
  	}
  }
  
  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
  func TestSmhasherText(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	text(t, "Foo", "Bar")
  	text(t, "FooBar", "")
  	text(t, "", "FooBar")
  }
  func text(t *testing.T, prefix, suffix string) {
  	const N = 4
  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
  	const L = len(S)
  	b := make([]byte, len(prefix)+N+len(suffix))
  	copy(b, prefix)
  	copy(b[len(prefix)+N:], suffix)
  	h := newHashSet()
  	c := b[len(prefix):]
  	for i := 0; i < L; i++ {
  		c[0] = S[i]
  		for j := 0; j < L; j++ {
  			c[1] = S[j]
  			for k := 0; k < L; k++ {
  				c[2] = S[k]
  				for x := 0; x < L; x++ {
  					c[3] = S[x]
  					h.addB(b)
  				}
  			}
  		}
  	}
  	h.check(t)
  }
  
  // Make sure different seed values generate different hashes.
  func TestSmhasherSeed(t *testing.T) {
  	h := newHashSet()
  	const N = 100000
  	s := "hello"
  	for i := 0; i < N; i++ {
  		h.addS_seed(s, uintptr(i))
  	}
  	h.check(t)
  }
  
  // size of the hash output (32 or 64 bits)
  const hashSize = 32 + int(^uintptr(0)>>63<<5)
  
  func randBytes(r *rand.Rand, b []byte) {
  	for i := range b {
  		b[i] = byte(r.Uint32())
  	}
  }
  
  func benchmarkHash(b *testing.B, n int) {
  	s := strings.Repeat("A", n)
  
  	for i := 0; i < b.N; i++ {
  		StringHash(s, 0)
  	}
  	b.SetBytes(int64(n))
  }
  
  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
  
  func TestArrayHash(t *testing.T) {
  	// Make sure that "" in arrays hash correctly. The hash
  	// should at least scramble the input seed so that, e.g.,
  	// {"","foo"} and {"foo",""} have different hashes.
  
  	// If the hash is bad, then all (8 choose 4) = 70 keys
  	// have the same hash. If so, we allocate 70/8 = 8
  	// overflow buckets. If the hash is good we don't
  	// normally allocate any overflow buckets, and the
  	// probability of even one or two overflows goes down rapidly.
  	// (There is always 1 allocation of the bucket array. The map
  	// header is allocated on the stack.)
  	f := func() {
  		// Make the key type at most 128 bytes. Otherwise,
  		// we get an allocation per key.
  		type key [8]string
  		m := make(map[key]bool, 70)
  
  		// fill m with keys that have 4 "foo"s and 4 ""s.
  		for i := 0; i < 256; i++ {
  			var k key
  			cnt := 0
  			for j := uint(0); j < 8; j++ {
  				if i>>j&1 != 0 {
  					k[j] = "foo"
  					cnt++
  				}
  			}
  			if cnt == 4 {
  				m[k] = true
  			}
  		}
  		if len(m) != 70 {
  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
  		}
  	}
  	if n := testing.AllocsPerRun(10, f); n > 6 {
  		t.Errorf("too many allocs %f - hash not balanced", n)
  	}
  }
  func TestStructHash(t *testing.T) {
  	// See the comment in TestArrayHash.
  	f := func() {
  		type key struct {
  			a, b, c, d, e, f, g, h string
  		}
  		m := make(map[key]bool, 70)
  
  		// fill m with keys that have 4 "foo"s and 4 ""s.
  		for i := 0; i < 256; i++ {
  			var k key
  			cnt := 0
  			if i&1 != 0 {
  				k.a = "foo"
  				cnt++
  			}
  			if i&2 != 0 {
  				k.b = "foo"
  				cnt++
  			}
  			if i&4 != 0 {
  				k.c = "foo"
  				cnt++
  			}
  			if i&8 != 0 {
  				k.d = "foo"
  				cnt++
  			}
  			if i&16 != 0 {
  				k.e = "foo"
  				cnt++
  			}
  			if i&32 != 0 {
  				k.f = "foo"
  				cnt++
  			}
  			if i&64 != 0 {
  				k.g = "foo"
  				cnt++
  			}
  			if i&128 != 0 {
  				k.h = "foo"
  				cnt++
  			}
  			if cnt == 4 {
  				m[k] = true
  			}
  		}
  		if len(m) != 70 {
  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
  		}
  	}
  	if n := testing.AllocsPerRun(10, f); n > 6 {
  		t.Errorf("too many allocs %f - hash not balanced", n)
  	}
  }
  
  var sink uint64
  
  func BenchmarkAlignedLoad(b *testing.B) {
  	var buf [16]byte
  	p := unsafe.Pointer(&buf[0])
  	var s uint64
  	for i := 0; i < b.N; i++ {
  		s += ReadUnaligned64(p)
  	}
  	sink = s
  }
  
  func BenchmarkUnalignedLoad(b *testing.B) {
  	var buf [16]byte
  	p := unsafe.Pointer(&buf[1])
  	var s uint64
  	for i := 0; i < b.N; i++ {
  		s += ReadUnaligned64(p)
  	}
  	sink = s
  }
  
  func TestCollisions(t *testing.T) {
  	if testing.Short() {
  		t.Skip("Skipping in short mode")
  	}
  	for i := 0; i < 16; i++ {
  		for j := 0; j < 16; j++ {
  			if j == i {
  				continue
  			}
  			var a [16]byte
  			m := make(map[uint16]struct{}, 1<<16)
  			for n := 0; n < 1<<16; n++ {
  				a[i] = byte(n)
  				a[j] = byte(n >> 8)
  				m[uint16(BytesHash(a[:], 0))] = struct{}{}
  			}
  			if len(m) <= 1<<15 {
  				t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
  			}
  		}
  	}
  }
  

View as plain text