From d6d0da08e3ccd8bf84463fc2b30b8dcd31a3d42f Mon Sep 17 00:00:00 2001 From: Bob Hansen Date: Wed, 29 Aug 2018 11:53:43 -0400 Subject: [PATCH] Added prefix match for consistent hash --- consistenthash/consistenthash.go | 104 ++++++++++++++++++++++++-- consistenthash/consistenthash_test.go | 26 ++++--- 2 files changed, 114 insertions(+), 16 deletions(-) diff --git a/consistenthash/consistenthash.go b/consistenthash/consistenthash.go index a9c56f07..00b4110a 100644 --- a/consistenthash/consistenthash.go +++ b/consistenthash/consistenthash.go @@ -19,24 +19,78 @@ package consistenthash import ( "hash/crc32" + "math/bits" "sort" "strconv" ) type Hash func(data []byte) uint32 +const defaultHashExpansion = 6 + type Map struct { - hash Hash + // Inputs + + // hash is the hash function that will be applied to both added + // keys and fetched keys + hash Hash + + // replicas is the number of virtual nodes that will be inserted + // into the consistent hash ring for each key added replicas int - keys []int // Sorted - hashMap map[int]string + + // prefixTableExpansion is the multiple of virtual nodes that + // will be inserted into the internal hash table for O(1) lookups. + prefixTableExpansion int + + // Internal data + + // keys is the hash of the virtual nodes, sorted by hash value + keys []int // Sorted + + // hashMap maps the hashed keys back to the input strings. + // Note that all virtual nodes will map back to the same input + // string + hashMap map[int]string + + // prefixShift is the number of bits an input hash should + // be right-shifted to act as a lookup in the prefixTable + prefixShift uint32 + + // prefixTable is a map of the most significant bits of + // a hash value to output all hashes with that prefix + // map to. If the result is ambiguous (i.e. there is a + // hash range split within this prefix) the value will + // be blank and we should fall back to a binary search + // through keys to find the exact output + prefixTable []string } +// New returns a blank consistent hash ring that will return +// the key whose hash comes next after the hash of the input to +// Map.Get. +// Increasing the number of replicas will improve the smoothness +// of the hash ring and reduce the data moved when adding/removing +// nodes, at the cost of more memory. func New(replicas int, fn Hash) *Map { + return NewConsistentHash(replicas, defaultHashExpansion, fn) +} + +// NewConsistentHash returns a blank consistent hash ring that will return +// the key whose hash comes next after the hash of the input to +// Map.Get. +// Increasing the number of replicas will improve the smoothness +// of the hash ring and reduce the data moved when adding/removing +// nodes. +// Increasing the tableExpansion will allocate more entries in the +// internal hash table, reducing the frequency of lg(n) binary +// searches during calls to the Map.Get method. +func NewConsistentHash(replicas int, tableExpansion int, fn Hash) *Map { m := &Map{ - replicas: replicas, - hash: fn, - hashMap: make(map[int]string), + replicas: replicas, + hash: fn, + hashMap: make(map[int]string), + prefixTableExpansion: tableExpansion, } if m.hash == nil { m.hash = crc32.ChecksumIEEE @@ -59,6 +113,37 @@ func (m *Map) Add(keys ...string) { } } sort.Ints(m.keys) + + // Find minimum number of bits to hold |keys| * prefixTableExpansion + prefixBits := uint32(bits.Len32(uint32(len(m.keys) * m.prefixTableExpansion))) + m.prefixShift = 32 - prefixBits + + prefixTableSize := 1 << prefixBits + m.prefixTable = make([]string, prefixTableSize) + + previousKeyPrefix := -1 // Effectively -Inf + currentKeyIdx := 0 + currentKeyPrefix := m.keys[currentKeyIdx] >> m.prefixShift + + for i := range m.prefixTable { + if previousKeyPrefix < i && currentKeyPrefix > i { + // All keys with this prefix will map to a single value + m.prefixTable[i] = m.hashMap[m.keys[currentKeyIdx]] + } else { + // Several keys might have the same prefix. Walk + // over them until it changes + previousKeyPrefix = currentKeyPrefix + for currentKeyPrefix == previousKeyPrefix { + currentKeyIdx++ + if currentKeyIdx < len(m.keys) { + currentKeyPrefix = m.keys[currentKeyIdx] >> m.prefixShift + } else { + currentKeyIdx = 0 + currentKeyPrefix = prefixTableSize + 1 // Effectively +Inf + } + } + } + } } // Gets the closest item in the hash to the provided key. @@ -69,6 +154,13 @@ func (m *Map) Get(key string) string { hash := int(m.hash([]byte(key))) + // Look for the hash prefix in the prefix table + prefixSlot := hash >> m.prefixShift + tableResult := m.prefixTable[prefixSlot] + if len(tableResult) > 0 { + return tableResult + } + // Binary search for appropriate replica. idx := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= hash }) diff --git a/consistenthash/consistenthash_test.go b/consistenthash/consistenthash_test.go index 1a37fd7f..228aa85c 100644 --- a/consistenthash/consistenthash_test.go +++ b/consistenthash/consistenthash_test.go @@ -26,7 +26,7 @@ func TestHashing(t *testing.T) { // Override the hash function to return easier to reason about values. Assumes // the keys can be converted to an integer. - hash := New(3, func(key []byte) uint32 { + hash := NewConsistentHash(3, 6, func(key []byte) uint32 { i, err := strconv.Atoi(string(key)) if err != nil { panic(err) @@ -66,8 +66,8 @@ func TestHashing(t *testing.T) { } func TestConsistency(t *testing.T) { - hash1 := New(1, nil) - hash2 := New(1, nil) + hash1 := NewConsistentHash(1, 6, nil) + hash2 := NewConsistentHash(1, 6, nil) hash1.Add("Bill", "Bob", "Bonny") hash2.Add("Bob", "Bonny", "Bill") @@ -86,25 +86,31 @@ func TestConsistency(t *testing.T) { } -func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) } -func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32) } -func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128) } -func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512) } +func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8, 6) } +func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32, 6) } +func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128, 6) } +func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512, 6) } -func benchmarkGet(b *testing.B, shards int) { +func benchmarkGet(b *testing.B, shards int, expansion int) { - hash := New(50, nil) + hash := NewConsistentHash(50, expansion, nil) var buckets []string for i := 0; i < shards; i++ { buckets = append(buckets, fmt.Sprintf("shard-%d", i)) } + testStringCount := shards + var testStrings []string + for i := 0; i < testStringCount; i++ { + testStrings = append(testStrings, fmt.Sprintf("%d", i)) + } + hash.Add(buckets...) b.ResetTimer() for i := 0; i < b.N; i++ { - hash.Get(buckets[i&(shards-1)]) + hash.Get(testStrings[i&(testStringCount-1)]) } }