Skip to content

Commit 4558088

Browse files
committed
OPENNLP-1654 Add thread-safe version of NameFinderME
- adds ThreadSafeNameFinderME - adds additional constructor to ThreadSafeTokenizerME & ThreadSafeSentenceDetectorME to be consistent with ThreadSafePOSTaggerME - improves existing JavaDoc along the path
1 parent ec09b7e commit 4558088

File tree

5 files changed

+143
-18
lines changed

5 files changed

+143
-18
lines changed

opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,18 @@
2424
/**
2525
* A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent.
2626
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
27-
* <p>
28-
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
27+
*
28+
* @implNote
29+
* This implementation uses a {@link ThreadLocal}. Although the implementation is
2930
* lightweight because the model is not duplicated, if you have many long-running threads,
3031
* you may run into memory problems.
31-
* </p>
3232
* <p>
3333
* Be careful when using this in a Jakarta EE application, for example.
3434
* </p>
3535
* The user is responsible for clearing the {@link ThreadLocal}.
3636
*
3737
* @see Lemmatizer
38+
* @see LemmatizerME
3839
*/
3940
@ThreadSafe
4041
public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.namefind;
19+
20+
import opennlp.tools.commons.ThreadSafe;
21+
import opennlp.tools.util.Span;
22+
23+
/**
24+
* A thread-safe version of {@link NameFinderME}. Using it is completely transparent.
25+
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
26+
*
27+
* @implNote
28+
* This implementation uses a {@link ThreadLocal}. Although the implementation is
29+
* lightweight because the model is not duplicated, if you have many long-running threads,
30+
* you may run into memory problems.
31+
* <p>
32+
* Be careful when using this in a Jakarta EE application, for example.
33+
* </p>
34+
* The user is responsible for clearing the {@link ThreadLocal}.
35+
*
36+
* @see NameFinderME
37+
* @see TokenNameFinder
38+
*/
39+
@ThreadSafe
40+
public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable {
41+
42+
private final TokenNameFinderModel model;
43+
44+
private final ThreadLocal<NameFinderME> threadLocal = new ThreadLocal<>();
45+
46+
/**
47+
* Initializes a {@link ThreadSafeNameFinderME} with the specified {@code model}.
48+
*
49+
* @param model A valid {@link TokenNameFinderModel}.
50+
*/
51+
public ThreadSafeNameFinderME(TokenNameFinderModel model) {
52+
super();
53+
this.model = model;
54+
}
55+
56+
// If a thread-local version exists, return it. Otherwise, create, then return.
57+
private NameFinderME getNameFinder() {
58+
NameFinderME sd = threadLocal.get();
59+
if (sd == null) {
60+
sd = new NameFinderME(model);
61+
threadLocal.set(sd);
62+
}
63+
return sd;
64+
}
65+
66+
@Override
67+
public void close() {
68+
threadLocal.remove();
69+
}
70+
71+
@Override
72+
public Span[] find(String[] tokens) {
73+
return getNameFinder().find(tokens);
74+
}
75+
76+
@Override
77+
public void clearAdaptiveData() {
78+
getNameFinder().clearAdaptiveData();
79+
}
80+
}

opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,18 @@
2626
/**
2727
* A thread-safe version of the {@link POSTaggerME}. Using it is completely transparent.
2828
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
29-
* <p>
30-
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
29+
*
30+
* @implNote
31+
* This implementation uses a {@link ThreadLocal}. Although the implementation is
3132
* lightweight because the model is not duplicated, if you have many long-running threads,
3233
* you may run into memory problems.
33-
* </p>
3434
* <p>
3535
* Be careful when using this in a Jakarta EE application, for example.
3636
* </p>
3737
* The user is responsible for clearing the {@link ThreadLocal}.
3838
*
3939
* @see POSTagger
40+
* @see POSTaggerME
4041
*/
4142
@ThreadSafe
4243
public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable {

opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java

+28-7
Original file line numberDiff line numberDiff line change
@@ -17,30 +17,51 @@
1717

1818
package opennlp.tools.sentdetect;
1919

20+
import java.io.IOException;
21+
2022
import opennlp.tools.commons.ThreadSafe;
23+
import opennlp.tools.util.DownloadUtil;
2124
import opennlp.tools.util.Span;
2225

2326
/**
24-
* A thread-safe version of SentenceDetectorME. Using it is completely transparent. You can use it in
25-
* a single-threaded context as well, it only incurs a minimal overhead.
26-
* <p>
27-
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
27+
* A thread-safe version of {@link SentenceDetectorME}. Using it is completely transparent.
28+
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
29+
*
30+
* @implNote
31+
* This implementation uses a {@link ThreadLocal}. Although the implementation is
2832
* lightweight because the model is not duplicated, if you have many long-running threads,
2933
* you may run into memory problems.
30-
* </p>
3134
* <p>
3235
* Be careful when using this in a Jakarta EE application, for example.
3336
* </p>
3437
* The user is responsible for clearing the {@link ThreadLocal}.
38+
*
39+
* @see SentenceDetector
40+
* @see SentenceDetectorME
3541
*/
3642
@ThreadSafe
3743
public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable {
3844

3945
private final SentenceModel model;
4046

41-
private final ThreadLocal<SentenceDetectorME> threadLocal =
42-
new ThreadLocal<>();
47+
private final ThreadLocal<SentenceDetectorME> threadLocal = new ThreadLocal<>();
48+
49+
/**
50+
* Initializes a {@link ThreadSafeSentenceDetectorME} by downloading a default model
51+
* for a given {@code language}.
52+
*
53+
* @param language An ISO conform language code.
54+
* @throws IOException Thrown if the model could not be downloaded or saved.
55+
*/
56+
public ThreadSafeSentenceDetectorME(String language) throws IOException {
57+
this(DownloadUtil.downloadModel(language, DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class));
58+
}
4359

60+
/**
61+
* Initializes a {@link ThreadSafeSentenceDetectorME} with the specified {@code model}.
62+
*
63+
* @param model A valid {@link SentenceModel}.
64+
*/
4465
public ThreadSafeSentenceDetectorME(SentenceModel model) {
4566
super();
4667
this.model = model;

opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java

+27-5
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,27 @@
1717

1818
package opennlp.tools.tokenize;
1919

20+
import java.io.IOException;
21+
2022
import opennlp.tools.commons.ThreadSafe;
23+
import opennlp.tools.util.DownloadUtil;
2124
import opennlp.tools.util.Span;
2225

2326
/**
24-
* A thread-safe version of TokenizerME. Using it is completely transparent. You can use it in
25-
* a single-threaded context as well, it only incurs a minimal overhead.
26-
* <p>
27-
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
27+
* A thread-safe version of {@link TokenizerME}. Using it is completely transparent.
28+
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
29+
*
30+
* @implNote
31+
* This implementation uses a {@link ThreadLocal}. Although the implementation is
2832
* lightweight because the model is not duplicated, if you have many long-running threads,
2933
* you may run into memory problems.
30-
* </p>
3134
* <p>
3235
* Be careful when using this in a Jakarta EE application, for example.
3336
* </p>
3437
* The user is responsible for clearing the {@link ThreadLocal}.
38+
*
39+
* @see Tokenizer
40+
* @see TokenizerME
3541
*/
3642
@ThreadSafe
3743
public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {
@@ -40,6 +46,22 @@ public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {
4046

4147
private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>();
4248

49+
/**
50+
* Initializes a {@link ThreadSafeTokenizerME} by downloading a default model
51+
* for a given {@code language}.
52+
*
53+
* @param language An ISO conform language code.
54+
* @throws IOException Thrown if the model could not be downloaded or saved.
55+
*/
56+
public ThreadSafeTokenizerME(String language) throws IOException {
57+
this(DownloadUtil.downloadModel(language, DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class));
58+
}
59+
60+
/**
61+
* Initializes a {@link ThreadSafeTokenizerME} with the specified {@code model}.
62+
*
63+
* @param model A valid {@link TokenizerModel}.
64+
*/
4365
public ThreadSafeTokenizerME(TokenizerModel model) {
4466
super();
4567
this.model = model;

0 commit comments

Comments
 (0)