32
32
import opennlp .tools .dictionary .serializer .DictionaryEntryPersistor ;
33
33
import opennlp .tools .dictionary .serializer .Entry ;
34
34
import opennlp .tools .util .StringList ;
35
- import opennlp .tools .util .StringUtil ;
36
35
import opennlp .tools .util .model .DictionarySerializer ;
37
36
import opennlp .tools .util .model .SerializableArtifact ;
38
37
43
42
* @see Iterable
44
43
*/
45
44
public class Dictionary implements Iterable <StringList >, SerializableArtifact {
46
-
47
- private class StringListWrapper {
48
-
49
- private final StringList stringList ;
50
-
51
- private StringListWrapper (StringList stringList ) {
52
- this .stringList = stringList ;
53
- }
54
-
55
- private StringList getStringList () {
56
- return stringList ;
57
- }
58
-
59
- @ Override
60
- public boolean equals (Object obj ) {
61
-
62
- boolean result ;
63
-
64
- if (obj == this ) {
65
- result = true ;
66
- }
67
- else if (obj instanceof StringListWrapper other ) {
68
-
69
- if (isCaseSensitive ) {
70
- result = this .stringList .equals (other .getStringList ());
71
- }
72
- else {
73
- result = this .stringList .compareToIgnoreCase (other .getStringList ());
74
- }
75
- }
76
- else {
77
- result = false ;
78
- }
79
-
80
- return result ;
81
- }
82
-
83
- @ Override
84
- public int hashCode () {
85
- // if lookup is too slow optimize this
86
- return StringUtil .toLowerCase (this .stringList .toString ()).hashCode ();
87
- }
88
-
89
- @ Override
90
- public String toString () {
91
- return this .stringList .toString ();
92
- }
93
- }
94
-
95
- private final Set <StringListWrapper > entrySet = new HashSet <>();
45
+ private final Set <StringList > entrySet = new HashSet <>();
96
46
private final boolean isCaseSensitive ;
97
47
private int minTokenCount = 99999 ;
98
48
private int maxTokenCount = 0 ;
@@ -131,7 +81,7 @@ public Dictionary(InputStream in) throws IOException {
131
81
* @param tokens the new entry
132
82
*/
133
83
public void put (StringList tokens ) {
134
- entrySet .add (new StringListWrapper (tokens ));
84
+ entrySet .add (applyCaseSensitivity (tokens ));
135
85
minTokenCount = StrictMath .min (minTokenCount , tokens .size ());
136
86
maxTokenCount = StrictMath .max (maxTokenCount , tokens .size ());
137
87
}
@@ -151,7 +101,7 @@ public int getMaxTokenCount() {
151
101
* @return {@code true} if it contains the entry, {@code false} otherwise.
152
102
*/
153
103
public boolean contains (StringList tokens ) {
154
- return entrySet .contains (new StringListWrapper (tokens ));
104
+ return entrySet .contains (applyCaseSensitivity (tokens ));
155
105
}
156
106
157
107
/**
@@ -160,15 +110,15 @@ public boolean contains(StringList tokens) {
160
110
* @param tokens The tokens to be filtered out (= removed).
161
111
*/
162
112
public void remove (StringList tokens ) {
163
- entrySet .remove (new StringListWrapper (tokens ));
113
+ entrySet .remove (applyCaseSensitivity (tokens ));
164
114
}
165
115
166
116
/**
167
117
* @return Retrieves a token-{@link Iterator} over all elements.
168
118
*/
169
119
@ Override
170
120
public Iterator <StringList > iterator () {
171
- final Iterator <StringListWrapper > entries = entrySet .iterator ();
121
+ final Iterator <StringList > entries = entrySet .iterator ();
172
122
173
123
return new Iterator <>() {
174
124
@@ -179,7 +129,7 @@ public boolean hasNext() {
179
129
180
130
@ Override
181
131
public StringList next () {
182
- return entries .next (). getStringList () ;
132
+ return entries .next ();
183
133
}
184
134
185
135
@ Override
@@ -308,7 +258,7 @@ public Set<String> asStringSet() {
308
258
309
259
@ Override
310
260
public Iterator <String > iterator () {
311
- final Iterator <StringListWrapper > entries = entrySet .iterator ();
261
+ final Iterator <StringList > entries = entrySet .iterator ();
312
262
313
263
return new Iterator <>() {
314
264
@ Override
@@ -317,7 +267,7 @@ public boolean hasNext() {
317
267
}
318
268
@ Override
319
269
public String next () {
320
- return entries .next ().getStringList (). getToken (0 );
270
+ return entries .next ().getToken (0 );
321
271
}
322
272
@ Override
323
273
public void remove () {
@@ -337,7 +287,7 @@ public boolean contains(Object obj) {
337
287
338
288
if (obj instanceof String str ) {
339
289
340
- result = entrySet .contains (new StringListWrapper (new StringList (str )));
290
+ result = entrySet .contains (applyCaseSensitivity (new StringList (isCaseSensitive , str )));
341
291
342
292
}
343
293
return result ;
@@ -353,13 +303,13 @@ public boolean equals(Object o) {
353
303
return false ;
354
304
}
355
305
Iterator <String > toCheckIter = toCheck .iterator ();
356
- for (StringListWrapper entry : entrySet ) {
306
+ for (StringList entry : entrySet ) {
357
307
if (isCaseSensitive ) {
358
- if (!entry .stringList . equals (new StringList (toCheckIter .next ()))) {
308
+ if (!entry .equals (new StringList (true , toCheckIter .next ()))) {
359
309
return false ;
360
310
}
361
311
} else {
362
- if (!entry .stringList . compareToIgnoreCase (new StringList (toCheckIter .next ()))) {
312
+ if (!entry .compareToIgnoreCase (new StringList (false , toCheckIter .next ()))) {
363
313
return false ;
364
314
}
365
315
}
@@ -383,4 +333,19 @@ public int hashCode() {
383
333
public Class <?> getArtifactSerializerClass () {
384
334
return DictionarySerializer .class ;
385
335
}
336
+
337
+ /**
338
+ * @return {@code true}, if this {@link Dictionary} is case-sensitive.
339
+ */
340
+ public boolean isCaseSensitive () {
341
+ return isCaseSensitive ;
342
+ }
343
+
344
+ private StringList applyCaseSensitivity (StringList list ) {
345
+ if (isCaseSensitive ) {
346
+ return list .toCaseSensitive ();
347
+ } else {
348
+ return list .toCaseInsensitive ();
349
+ }
350
+ }
386
351
}
0 commit comments