Skip to content

Commit 86a154e

Browse files
committed
Added prefix match for consistent hash
1 parent 24b0969 commit 86a154e

File tree

2 files changed

+118
-17
lines changed

2 files changed

+118
-17
lines changed

consistenthash/consistenthash.go

+102-7
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,81 @@ package consistenthash
1919

2020
import (
2121
"hash/crc32"
22+
"math/bits"
2223
"sort"
2324
"strconv"
2425
)
2526

2627
type Hash func(data []byte) uint32
2728

29+
const defaultHashExpansion = 6
30+
2831
type Map struct {
29-
hash Hash
30-
replicas int
31-
keys []int // Sorted
32-
hashMap map[int]string
32+
// Inputs
33+
34+
// hash is the hash function that will be applied to both added
35+
// keys and fetched keys
36+
hash Hash
37+
38+
// replicas is the number of virtual nodes that will be inserted
39+
// into the consistent hash ring for each key added
40+
replicas int
41+
42+
// prefixTableExpansion is the multiple of virtual nodes that
43+
// will be inserted into the internal hash table for O(1) lookups.
44+
prefixTableExpansion int
45+
46+
47+
48+
// Internal data
49+
50+
// keys is the hash of the virtual nodes, sorted by hash value
51+
keys []int // Sorted
52+
53+
// hashMap maps the hashed keys back to the input strings.
54+
// Note that all virtual nodes will map back to the same input
55+
// string
56+
hashMap map[int]string
57+
58+
// prefixShift is the number of bits an input hash should
59+
// be right-shifted to act as a lookup in the prefixTable
60+
prefixShift uint32
61+
62+
// prefixTable is a map of the most significant bits of
63+
// a hash value to output all hashes with that prefix
64+
// map to. If the result is ambiguous (i.e. there is a
65+
// hash range split within this prefix) the value will
66+
// be blank and we should fall back to a binary search
67+
// through keys to find the exact output
68+
prefixTable []string
3369
}
3470

71+
// New returns a blank consistent hash ring that will return
72+
// the key whos hash comes next after the hash of the input to
73+
// Get().
74+
// Increasing the number of replicas will improve the smoothness
75+
// of the hash ring and reduce the data moved when adding/removing
76+
// nodes, at the cost of more memory.
3577
func New(replicas int, fn Hash) *Map {
78+
return NewConsistentHash(replicas, defaultHashExpansion, fn)
79+
}
80+
81+
82+
// NewConsistentHash returns a blank consistent hash ring that will return
83+
// the key whos hash comes next after the hash of the input to
84+
// Get().
85+
// Increasing the number of replicas will improve the smoothness
86+
// of the hash ring and reduce the data moved when adding/removing
87+
// nodes.
88+
// Increasing the tableExpansion will allocate more entries in the
89+
// internal hash table, reducing the frequency of lg(n) binary
90+
// searches during Get() calls.
91+
func NewConsistentHash(replicas int, tableExpansion int, fn Hash) *Map {
3692
m := &Map{
37-
replicas: replicas,
38-
hash: fn,
39-
hashMap: make(map[int]string),
93+
replicas: replicas,
94+
hash: fn,
95+
hashMap: make(map[int]string),
96+
prefixTableExpansion: tableExpansion,
4097
}
4198
if m.hash == nil {
4299
m.hash = crc32.ChecksumIEEE
@@ -59,6 +116,37 @@ func (m *Map) Add(keys ...string) {
59116
}
60117
}
61118
sort.Ints(m.keys)
119+
120+
// Find minimum number of bits to hold |keys| * prefixTableExpansion
121+
prefixBits := uint32(bits.Len32(uint32(len(m.keys) * m.prefixTableExpansion)))
122+
m.prefixShift = 32 - prefixBits
123+
124+
prefixTableSize := 1 << prefixBits
125+
m.prefixTable = make([]string, prefixTableSize)
126+
127+
previousKeyPrefix := -1 // Effectively -Inf
128+
currentKeyIdx := 0
129+
currentKeyPrefix := m.keys[currentKeyIdx] >> m.prefixShift
130+
131+
for i := range m.prefixTable {
132+
if previousKeyPrefix < i && currentKeyPrefix > i {
133+
// All keys with this prefix will map to a single value
134+
m.prefixTable[i] = m.hashMap[m.keys[currentKeyIdx]]
135+
} else {
136+
// Several keys might have the same prefix. Walk
137+
// over them until it changes
138+
previousKeyPrefix = currentKeyPrefix
139+
for currentKeyPrefix == previousKeyPrefix {
140+
currentKeyIdx++
141+
if currentKeyIdx < len(m.keys) {
142+
currentKeyPrefix = m.keys[currentKeyIdx] >> m.prefixShift
143+
} else {
144+
currentKeyIdx = 0
145+
currentKeyPrefix = prefixTableSize + 1 // Effectively +Inf
146+
}
147+
}
148+
}
149+
}
62150
}
63151

64152
// Gets the closest item in the hash to the provided key.
@@ -69,6 +157,13 @@ func (m *Map) Get(key string) string {
69157

70158
hash := int(m.hash([]byte(key)))
71159

160+
// Look for the hash prefix in the prefix table
161+
prefixSlot := hash >> m.prefixShift
162+
tableResult := m.prefixTable[prefixSlot]
163+
if len(tableResult) > 0 {
164+
return tableResult
165+
}
166+
72167
// Binary search for appropriate replica.
73168
idx := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= hash })
74169

consistenthash/consistenthash_test.go

+16-10
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func TestHashing(t *testing.T) {
2626

2727
// Override the hash function to return easier to reason about values. Assumes
2828
// the keys can be converted to an integer.
29-
hash := New(3, func(key []byte) uint32 {
29+
hash := New(3, 6, func(key []byte) uint32 {
3030
i, err := strconv.Atoi(string(key))
3131
if err != nil {
3232
panic(err)
@@ -66,8 +66,8 @@ func TestHashing(t *testing.T) {
6666
}
6767

6868
func TestConsistency(t *testing.T) {
69-
hash1 := New(1, nil)
70-
hash2 := New(1, nil)
69+
hash1 := New(1, 6, nil)
70+
hash2 := New(1, 6, nil)
7171

7272
hash1.Add("Bill", "Bob", "Bonny")
7373
hash2.Add("Bob", "Bonny", "Bill")
@@ -86,25 +86,31 @@ func TestConsistency(t *testing.T) {
8686

8787
}
8888

89-
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) }
90-
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32) }
91-
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128) }
92-
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512) }
89+
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8, 6) }
90+
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32, 6) }
91+
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128, 6) }
92+
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512, 6) }
9393

94-
func benchmarkGet(b *testing.B, shards int) {
94+
func benchmarkGet(b *testing.B, shards int, expansion int) {
9595

96-
hash := New(50, nil)
96+
hash := New(50, expansion, nil)
9797

9898
var buckets []string
9999
for i := 0; i < shards; i++ {
100100
buckets = append(buckets, fmt.Sprintf("shard-%d", i))
101101
}
102102

103+
testStringCount := shards
104+
var testStrings []string
105+
for i := 0; i < testStringCount; i++ {
106+
testStrings = append(testStrings, fmt.Sprintf("%d", i))
107+
}
108+
103109
hash.Add(buckets...)
104110

105111
b.ResetTimer()
106112

107113
for i := 0; i < b.N; i++ {
108-
hash.Get(buckets[i&(shards-1)])
114+
hash.Get(testStrings[i&(testStringCount-1)])
109115
}
110116
}

0 commit comments

Comments
 (0)