Skip to content

Commit e8921ed

Browse files
committed
Added prefix match for consistent hash
1 parent 24b0969 commit e8921ed

File tree

2 files changed

+114
-16
lines changed

2 files changed

+114
-16
lines changed

consistenthash/consistenthash.go

+98-6
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,78 @@ package consistenthash
1919

2020
import (
2121
"hash/crc32"
22+
"math/bits"
2223
"sort"
2324
"strconv"
2425
)
2526

2627
type Hash func(data []byte) uint32
2728

29+
const defaultHashExpansion = 6
30+
2831
type Map struct {
29-
hash Hash
32+
// Inputs
33+
34+
// hash is the hash function that will be applied to both added
35+
// keys and fetched keys
36+
hash Hash
37+
38+
// replicas is the number of virtual nodes that will be inserted
39+
// into the consistent hash ring for each key added
3040
replicas int
31-
keys []int // Sorted
32-
hashMap map[int]string
41+
42+
// prefixTableExpansion is the multiple of virtual nodes that
43+
// will be inserted into the internal hash table for O(1) lookups.
44+
prefixTableExpansion int
45+
46+
// Internal data
47+
48+
// keys is the hash of the virtual nodes, sorted by hash value
49+
keys []int // Sorted
50+
51+
// hashMap maps the hashed keys back to the input strings.
52+
// Note that all virtual nodes will map back to the same input
53+
// string
54+
hashMap map[int]string
55+
56+
// prefixShift is the number of bits an input hash should
57+
// be right-shifted to act as a lookup in the prefixTable
58+
prefixShift uint32
59+
60+
// prefixTable is a map of the most significant bits of
61+
// a hash value to output all hashes with that prefix
62+
// map to. If the result is ambiguous (i.e. there is a
63+
// hash range split within this prefix) the value will
64+
// be blank and we should fall back to a binary search
65+
// through keys to find the exact output
66+
prefixTable []string
3367
}
3468

69+
// New returns a blank consistent hash ring that will return
70+
// the key whose hash comes next after the hash of the input to
71+
// Get().
72+
// Increasing the number of replicas will improve the smoothness
73+
// of the hash ring and reduce the data moved when adding/removing
74+
// nodes, at the cost of more memory.
3575
func New(replicas int, fn Hash) *Map {
76+
return NewConsistentHash(replicas, defaultHashExpansion, fn)
77+
}
78+
79+
// NewConsistentHash returns a blank consistent hash ring that will return
80+
// the key whose hash comes next after the hash of the input to
81+
// Get().
82+
// Increasing the number of replicas will improve the smoothness
83+
// of the hash ring and reduce the data moved when adding/removing
84+
// nodes.
85+
// Increasing the tableExpansion will allocate more entries in the
86+
// internal hash table, reducing the frequency of lg(n) binary
87+
// searches during calls to the Map.Get method.
88+
func NewConsistentHash(replicas int, tableExpansion int, fn Hash) *Map {
3689
m := &Map{
37-
replicas: replicas,
38-
hash: fn,
39-
hashMap: make(map[int]string),
90+
replicas: replicas,
91+
hash: fn,
92+
hashMap: make(map[int]string),
93+
prefixTableExpansion: tableExpansion,
4094
}
4195
if m.hash == nil {
4296
m.hash = crc32.ChecksumIEEE
@@ -59,6 +113,37 @@ func (m *Map) Add(keys ...string) {
59113
}
60114
}
61115
sort.Ints(m.keys)
116+
117+
// Find minimum number of bits to hold |keys| * prefixTableExpansion
118+
prefixBits := uint32(bits.Len32(uint32(len(m.keys) * m.prefixTableExpansion)))
119+
m.prefixShift = 32 - prefixBits
120+
121+
prefixTableSize := 1 << prefixBits
122+
m.prefixTable = make([]string, prefixTableSize)
123+
124+
previousKeyPrefix := -1 // Effectively -Inf
125+
currentKeyIdx := 0
126+
currentKeyPrefix := m.keys[currentKeyIdx] >> m.prefixShift
127+
128+
for i := range m.prefixTable {
129+
if previousKeyPrefix < i && currentKeyPrefix > i {
130+
// All keys with this prefix will map to a single value
131+
m.prefixTable[i] = m.hashMap[m.keys[currentKeyIdx]]
132+
} else {
133+
// Several keys might have the same prefix. Walk
134+
// over them until it changes
135+
previousKeyPrefix = currentKeyPrefix
136+
for currentKeyPrefix == previousKeyPrefix {
137+
currentKeyIdx++
138+
if currentKeyIdx < len(m.keys) {
139+
currentKeyPrefix = m.keys[currentKeyIdx] >> m.prefixShift
140+
} else {
141+
currentKeyIdx = 0
142+
currentKeyPrefix = prefixTableSize + 1 // Effectively +Inf
143+
}
144+
}
145+
}
146+
}
62147
}
63148

64149
// Gets the closest item in the hash to the provided key.
@@ -69,6 +154,13 @@ func (m *Map) Get(key string) string {
69154

70155
hash := int(m.hash([]byte(key)))
71156

157+
// Look for the hash prefix in the prefix table
158+
prefixSlot := hash >> m.prefixShift
159+
tableResult := m.prefixTable[prefixSlot]
160+
if len(tableResult) > 0 {
161+
return tableResult
162+
}
163+
72164
// Binary search for appropriate replica.
73165
idx := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= hash })
74166

consistenthash/consistenthash_test.go

+16-10
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func TestHashing(t *testing.T) {
2626

2727
// Override the hash function to return easier to reason about values. Assumes
2828
// the keys can be converted to an integer.
29-
hash := New(3, func(key []byte) uint32 {
29+
hash := NewConsistentHash(3, 6, func(key []byte) uint32 {
3030
i, err := strconv.Atoi(string(key))
3131
if err != nil {
3232
panic(err)
@@ -66,8 +66,8 @@ func TestHashing(t *testing.T) {
6666
}
6767

6868
func TestConsistency(t *testing.T) {
69-
hash1 := New(1, nil)
70-
hash2 := New(1, nil)
69+
hash1 := NewConsistentHash(1, 6, nil)
70+
hash2 := NewConsistentHash(1, 6, nil)
7171

7272
hash1.Add("Bill", "Bob", "Bonny")
7373
hash2.Add("Bob", "Bonny", "Bill")
@@ -86,25 +86,31 @@ func TestConsistency(t *testing.T) {
8686

8787
}
8888

89-
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) }
90-
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32) }
91-
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128) }
92-
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512) }
89+
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8, 6) }
90+
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32, 6) }
91+
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128, 6) }
92+
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512, 6) }
9393

94-
func benchmarkGet(b *testing.B, shards int) {
94+
func benchmarkGet(b *testing.B, shards int, expansion int) {
9595

96-
hash := New(50, nil)
96+
hash := NewConsistentHash(50, expansion, nil)
9797

9898
var buckets []string
9999
for i := 0; i < shards; i++ {
100100
buckets = append(buckets, fmt.Sprintf("shard-%d", i))
101101
}
102102

103+
testStringCount := shards
104+
var testStrings []string
105+
for i := 0; i < testStringCount; i++ {
106+
testStrings = append(testStrings, fmt.Sprintf("%d", i))
107+
}
108+
103109
hash.Add(buckets...)
104110

105111
b.ResetTimer()
106112

107113
for i := 0; i < b.N; i++ {
108-
hash.Get(buckets[i&(shards-1)])
114+
hash.Get(testStrings[i&(testStringCount-1)])
109115
}
110116
}

0 commit comments

Comments
 (0)