-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrigram_index.rb
93 lines (78 loc) · 1.53 KB
/
trigram_index.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Basic Trigram Indexing
#
# Usage:
# => t = TrigramIndex.new
#
# Insert into the index
# => t.insert('foo')
# => t.insert('food')
#
# Search the index
# => t.search('foo')
# ['foo', 'food']
#
class TrigramIndex
attr_accessor :index
attr_accessor :records
# Initialize the Trigram Index
def initialize
@index = {}
@records = []
@joins = []
end
# Insert a value into the index
# value: String
#
# Returns an Array of Trigrams
def insert(value)
records << value
record_id = records.count - 1
trigrams = hash(value)
trigrams.each do |tri|
index[tri] ||= []
index[tri] << record_id
end
end
# Search the index
# value: String
#
# Returns an Array of found Strings
def search(value)
record_ids = []
hash(value).each do |tri|
if ids = index[tri]
record_ids += ids
end
end
record_ids.uniq!
record_ids.collect { |id| records[id] }
end
private
def hash(value)
index = 0
chars = value.chars
chars.collect do |char|
tri = []
before_index = index - 1
after_index = index + 1
if before_index == -1
tri << "*"
else
tri << character_filter(chars[before_index])
end
tri << character_filter(chars[index])
tri << character_filter(chars[after_index])
index += 1
tri.join('')
end
end
def character_filter(char)
return '*' if char.nil?
char = char.downcase
if char.match /[a-z]/
return char
else
return '*'
end
end
end