-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordmap.go
137 lines (114 loc) · 2.46 KB
/
wordmap.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package wordpatterns
import (
"bufio"
"log"
"os"
"strings"
"unicode"
"github.com/mrap/stringutil"
)
type wordmap map[string][]string
type Wordmap struct {
m wordmap
wordSubstrs map[string][]string
opts WordmapOptions
ignoreChars map[rune]struct{}
}
type WordmapOptions struct {
IgnoreCase bool
IgnoreOrder bool
IgnoreChars []rune
MinSubstrLen int
}
func NewWordmap(opts *WordmapOptions) *Wordmap {
if opts == nil {
opts = &WordmapOptions{}
}
if opts.MinSubstrLen < 1 {
opts.MinSubstrLen = 1
}
ignoreChars := make(map[rune]struct{})
for _, c := range opts.IgnoreChars {
if opts.IgnoreCase {
c = unicode.ToLower(c)
}
ignoreChars[c] = struct{}{}
}
return &Wordmap{
m: make(wordmap),
wordSubstrs: make(map[string][]string),
opts: *opts,
ignoreChars: ignoreChars,
}
}
func (wm *Wordmap) Has(word string) bool {
_, exists := wm.wordSubstrs[word]
return exists
}
func (wm *Wordmap) Substrs(word string) []string {
return wm.wordSubstrs[word]
}
func (wm *Wordmap) AddWord(word string) {
substrs := stringutil.Substrs(wm.filteredSubstr(word), wm.opts.MinSubstrLen)
for _, s := range substrs {
wm.m[s] = append(wm.m[s], word)
}
wm.wordSubstrs[word] = substrs
}
func (wm *Wordmap) RemoveWord(word string) {
for _, s := range wm.wordSubstrs[word] {
if words, exists := wm.m[s]; exists {
wm.m[s] = removeStr(words, word)
}
}
delete(wm.wordSubstrs, word)
}
func (wm Wordmap) WordsContaining(substr string) []string {
return wm.m[wm.filteredSubstr(substr)]
}
func (wm Wordmap) SubstringCount() int {
return len(wm.m)
}
func (wm Wordmap) filteredSubstr(substr string) string {
if wm.opts.IgnoreCase {
substr = strings.ToLower(substr)
}
substr = wm.removeIgnoredChars(substr)
if wm.opts.IgnoreOrder {
substr = stringutil.SortString(substr)
}
return substr
}
func (wm Wordmap) removeIgnoredChars(str string) string {
mapFunc := func(c rune) rune {
if _, ignore := wm.ignoreChars[c]; ignore {
return -1
}
return c
}
return strings.Map(mapFunc, str)
}
func removeStr(arr []string, str string) []string {
i := 0
for _, s := range arr {
if s != str {
arr[i] = s
i++
}
}
return arr[:i]
}
func PopulateFromFile(wm *Wordmap, filename string) {
file, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
wm.AddWord(scanner.Text())
}
if err = scanner.Err(); err != nil {
log.Fatal(err)
}
}