Skip to content

Commit 1b4940a

Browse files
committed
OPENNLP-421 - Remove StringListWrapper
1 parent 497a7f0 commit 1b4940a

File tree

3 files changed

+96
-85
lines changed

3 files changed

+96
-85
lines changed

opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java

+27-62
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import opennlp.tools.dictionary.serializer.DictionaryEntryPersistor;
3333
import opennlp.tools.dictionary.serializer.Entry;
3434
import opennlp.tools.util.StringList;
35-
import opennlp.tools.util.StringUtil;
3635
import opennlp.tools.util.model.DictionarySerializer;
3736
import opennlp.tools.util.model.SerializableArtifact;
3837

@@ -43,56 +42,7 @@
4342
* @see Iterable
4443
*/
4544
public class Dictionary implements Iterable<StringList>, SerializableArtifact {
46-
47-
private class StringListWrapper {
48-
49-
private final StringList stringList;
50-
51-
private StringListWrapper(StringList stringList) {
52-
this.stringList = stringList;
53-
}
54-
55-
private StringList getStringList() {
56-
return stringList;
57-
}
58-
59-
@Override
60-
public boolean equals(Object obj) {
61-
62-
boolean result;
63-
64-
if (obj == this) {
65-
result = true;
66-
}
67-
else if (obj instanceof StringListWrapper other) {
68-
69-
if (isCaseSensitive) {
70-
result = this.stringList.equals(other.getStringList());
71-
}
72-
else {
73-
result = this.stringList.compareToIgnoreCase(other.getStringList());
74-
}
75-
}
76-
else {
77-
result = false;
78-
}
79-
80-
return result;
81-
}
82-
83-
@Override
84-
public int hashCode() {
85-
// if lookup is too slow optimize this
86-
return StringUtil.toLowerCase(this.stringList.toString()).hashCode();
87-
}
88-
89-
@Override
90-
public String toString() {
91-
return this.stringList.toString();
92-
}
93-
}
94-
95-
private final Set<StringListWrapper> entrySet = new HashSet<>();
45+
private final Set<StringList> entrySet = new HashSet<>();
9646
private final boolean isCaseSensitive;
9747
private int minTokenCount = 99999;
9848
private int maxTokenCount = 0;
@@ -131,7 +81,7 @@ public Dictionary(InputStream in) throws IOException {
13181
* @param tokens the new entry
13282
*/
13383
public void put(StringList tokens) {
134-
entrySet.add(new StringListWrapper(tokens));
84+
entrySet.add(applyCaseSensitivity(tokens));
13585
minTokenCount = StrictMath.min(minTokenCount, tokens.size());
13686
maxTokenCount = StrictMath.max(maxTokenCount, tokens.size());
13787
}
@@ -151,7 +101,7 @@ public int getMaxTokenCount() {
151101
* @return {@code true} if it contains the entry, {@code false} otherwise.
152102
*/
153103
public boolean contains(StringList tokens) {
154-
return entrySet.contains(new StringListWrapper(tokens));
104+
return entrySet.contains(applyCaseSensitivity(tokens));
155105
}
156106

157107
/**
@@ -160,15 +110,15 @@ public boolean contains(StringList tokens) {
160110
* @param tokens The tokens to be filtered out (= removed).
161111
*/
162112
public void remove(StringList tokens) {
163-
entrySet.remove(new StringListWrapper(tokens));
113+
entrySet.remove(applyCaseSensitivity(tokens));
164114
}
165115

166116
/**
167117
* @return Retrieves a token-{@link Iterator} over all elements.
168118
*/
169119
@Override
170120
public Iterator<StringList> iterator() {
171-
final Iterator<StringListWrapper> entries = entrySet.iterator();
121+
final Iterator<StringList> entries = entrySet.iterator();
172122

173123
return new Iterator<>() {
174124

@@ -179,7 +129,7 @@ public boolean hasNext() {
179129

180130
@Override
181131
public StringList next() {
182-
return entries.next().getStringList();
132+
return entries.next();
183133
}
184134

185135
@Override
@@ -308,7 +258,7 @@ public Set<String> asStringSet() {
308258

309259
@Override
310260
public Iterator<String> iterator() {
311-
final Iterator<StringListWrapper> entries = entrySet.iterator();
261+
final Iterator<StringList> entries = entrySet.iterator();
312262

313263
return new Iterator<>() {
314264
@Override
@@ -317,7 +267,7 @@ public boolean hasNext() {
317267
}
318268
@Override
319269
public String next() {
320-
return entries.next().getStringList().getToken(0);
270+
return entries.next().getToken(0);
321271
}
322272
@Override
323273
public void remove() {
@@ -337,7 +287,7 @@ public boolean contains(Object obj) {
337287

338288
if (obj instanceof String str) {
339289

340-
result = entrySet.contains(new StringListWrapper(new StringList(str)));
290+
result = entrySet.contains(new StringList(isCaseSensitive, str));
341291

342292
}
343293
return result;
@@ -353,13 +303,13 @@ public boolean equals(Object o) {
353303
return false;
354304
}
355305
Iterator<String> toCheckIter = toCheck.iterator();
356-
for (StringListWrapper entry : entrySet) {
306+
for (StringList entry : entrySet) {
357307
if (isCaseSensitive) {
358-
if (!entry.stringList.equals(new StringList(toCheckIter.next()))) {
308+
if (!entry.equals(new StringList(true, toCheckIter.next()))) {
359309
return false;
360310
}
361311
} else {
362-
if (!entry.stringList.compareToIgnoreCase(new StringList(toCheckIter.next()))) {
312+
if (!entry.compareToIgnoreCase(new StringList(false, toCheckIter.next()))) {
363313
return false;
364314
}
365315
}
@@ -383,4 +333,19 @@ public int hashCode() {
383333
public Class<?> getArtifactSerializerClass() {
384334
return DictionarySerializer.class;
385335
}
336+
337+
/**
338+
* @return {@code true}, if this {@link Dictionary} is case-sensitive.
339+
*/
340+
public boolean isCaseSensitive() {
341+
return isCaseSensitive;
342+
}
343+
344+
private StringList applyCaseSensitivity(StringList list) {
345+
if (isCaseSensitive) {
346+
return list.toCaseSensitive();
347+
} else {
348+
return list.toCaseInsensitive();
349+
}
350+
}
386351
}

opennlp-tools/src/main/java/opennlp/tools/namefind/DictionaryNameFinder.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public class DictionaryNameFinder implements TokenNameFinder {
4444
* @param type the name type used for the produced spans. Must not be {@code null}.
4545
*/
4646
public DictionaryNameFinder(Dictionary dictionary, String type) {
47-
mDictionary = Objects.requireNonNull(dictionary, "dictionary must not be null");
47+
this.mDictionary = Objects.requireNonNull(dictionary, "dictionary must not be null");
4848
this.type = Objects.requireNonNull(type, "type must not be null");
4949
}
5050

@@ -61,22 +61,23 @@ public DictionaryNameFinder(Dictionary dictionary) {
6161
@Override
6262
public Span[] find(String[] textTokenized) {
6363
List<Span> namesFound = new LinkedList<>();
64-
64+
final boolean caseSensitive = mDictionary.isCaseSensitive();
65+
final int maxTokenCount = mDictionary.getMaxTokenCount();
6566
for (int offsetFrom = 0; offsetFrom < textTokenized.length; offsetFrom++) {
6667
Span nameFound = null;
6768
String[] tokensSearching;
6869

6970
for (int offsetTo = offsetFrom; offsetTo < textTokenized.length; offsetTo++) {
7071
int lengthSearching = offsetTo - offsetFrom + 1;
7172

72-
if (lengthSearching > mDictionary.getMaxTokenCount()) {
73+
if (lengthSearching > maxTokenCount) {
7374
break;
7475
} else {
7576
tokensSearching = new String[lengthSearching];
7677
System.arraycopy(textTokenized, offsetFrom, tokensSearching, 0,
7778
lengthSearching);
7879

79-
StringList entryForSearch = new StringList(tokensSearching);
80+
StringList entryForSearch = new StringList(caseSensitive, tokensSearching);
8081

8182
if (mDictionary.contains(entryForSearch)) {
8283
nameFound = new Span(offsetFrom, offsetTo + 1, type);

opennlp-tools/src/main/java/opennlp/tools/util/StringList.java

+64-19
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,22 @@ public class StringList implements Iterable<String> {
3131

3232
private final String[] tokens;
3333

34+
private final boolean caseSensitive;
35+
3436
/**
35-
* Initializes a {@link StringList} instance.
37+
* Initializes a {@link StringList} instance. By default, this instance is case-sensitive.
3638
* <p>
3739
* Note: <br>
3840
* Token String will be interned via {@link StringInterners}.
3941
*
4042
* @param singleToken One single token
4143
*/
4244
public StringList(String singleToken) {
43-
tokens = new String[]{StringInterners.intern(singleToken)};
45+
this(true, singleToken);
4446
}
4547

4648
/**
47-
* Initializes a {@link StringList} instance.
49+
* Initializes a {@link StringList} instance. By default, this instance is case-sensitive.
4850
* <p>
4951
* Note: <br>
5052
* Token Strings will be interned via {@link StringInterners}.
@@ -55,6 +57,22 @@ public StringList(String singleToken) {
5557
* @throws IllegalArgumentException Thrown if parameters were invalid.
5658
*/
5759
public StringList(String... tokens) {
60+
this(true, tokens);
61+
}
62+
63+
/**
64+
* Initializes a {@link StringList} instance.
65+
* <p>
66+
* Note: <br>
67+
* Token Strings will be interned via {@link StringInterners}.
68+
*
69+
* @param isCaseSensitive Whether it will operate case-sensitive, or not.
70+
* @param tokens The string parts of the new {@link StringList}.
71+
* Must not be an empty tokens array or {@code null}.
72+
*
73+
* @throws IllegalArgumentException Thrown if parameters were invalid.
74+
*/
75+
public StringList(boolean isCaseSensitive, String... tokens) {
5876

5977
Objects.requireNonNull(tokens, "tokens must not be null");
6078

@@ -67,6 +85,8 @@ public StringList(String... tokens) {
6785
for (int i = 0; i < tokens.length; i++) {
6886
this.tokens[i] = StringInterners.intern(tokens[i]);
6987
}
88+
89+
this.caseSensitive = isCaseSensitive;
7090
}
7191

7292
/**
@@ -127,44 +147,40 @@ public void remove() {
127147
* @return {@code true} if identically with ignore the case, {@code false} otherwise.
128148
*/
129149
public boolean compareToIgnoreCase(StringList tokens) {
130-
131150
if (size() == tokens.size()) {
132151
for (int i = 0; i < size(); i++) {
133-
134-
if (getToken(i).compareToIgnoreCase(
135-
tokens.getToken(i)) != 0) {
152+
if (getToken(i).compareToIgnoreCase(tokens.getToken(i)) != 0) {
136153
return false;
137154
}
138155
}
139-
}
140-
else {
156+
} else {
141157
return false;
142158
}
143-
144159
return true;
145160
}
146161

147162
@Override
148163
public int hashCode() {
149-
return Arrays.hashCode(tokens);
164+
// if lookup is too slow optimize this
165+
return StringUtil.toLowerCase(toString()).hashCode();
150166
}
151167

152168
@Override
153169
public boolean equals(Object obj) {
154-
if (this == obj) {
170+
if (obj == this) {
155171
return true;
172+
} else if (obj instanceof StringList tokenList) {
173+
if (caseSensitive) {
174+
return Arrays.equals(tokens, tokenList.tokens);
175+
} else {
176+
return compareToIgnoreCase(tokenList);
177+
}
156178
}
157-
158-
if (obj instanceof StringList tokenList) {
159-
160-
return Arrays.equals(tokens, tokenList.tokens);
161-
}
162-
163179
return false;
164180
}
165181

166182
/**
167-
* @return A human-readable representation of this {@link Span}.
183+
* @return A human-readable representation of this {@link StringList}.
168184
*/
169185
@Override
170186
public String toString() {
@@ -184,4 +200,33 @@ public String toString() {
184200

185201
return string.toString();
186202
}
203+
204+
/**
205+
* @return {@code true}, if this {@link StringList} is case-sensitive.
206+
*/
207+
public boolean isCaseSensitive() {
208+
return caseSensitive;
209+
}
210+
211+
/**
212+
* @return If this {@link StringList} is case-insensitive,
213+
* the same instance is returned. Otherwise, a new object is returned.
214+
*/
215+
public StringList toCaseInsensitive() {
216+
if (isCaseSensitive()) {
217+
return new StringList(false, tokens);
218+
}
219+
return this;
220+
}
221+
222+
/**
223+
* @return If this {@link StringList} is case-sensitive,
224+
* the same instance is returned. Otherwise, a new object is returned.
225+
*/
226+
public StringList toCaseSensitive() {
227+
if (!isCaseSensitive()) {
228+
return new StringList(true, tokens);
229+
}
230+
return this;
231+
}
187232
}

0 commit comments

Comments
 (0)