From 9e8ed5bfbd26fa8be1d15f908314548cdc0e4ad1 Mon Sep 17 00:00:00 2001 From: Martin Wiesner Date: Sat, 23 Nov 2024 23:11:16 +0100 Subject: [PATCH] OPENNLP-1653 Add thread-safe version of LemmatizerME --- .../lemmatizer/ThreadSafeLemmatizerME.java | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java new file mode 100644 index 000000000..e63c27d3f --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.lemmatizer; + +import java.util.List; + +import opennlp.tools.commons.ThreadSafe; + +/** + * A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent. + * You can use it in a single-threaded context as well, it only incurs a minimal overhead. + *

+ * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * lightweight because the model is not duplicated, if you have many long-running threads, + * you may run into memory problems. + *

+ *

+ * Be careful when using this in a Jakarta EE application, for example. + *

+ * The user is responsible for clearing the {@link ThreadLocal}. + * + * @see Lemmatizer + */ +@ThreadSafe +public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable { + + private final LemmatizerModel model; + + private final ThreadLocal threadLocal = new ThreadLocal<>(); + + /** + * Initializes a {@link ThreadSafeLemmatizerME} with the specified {@code model}. + * + * @param model A valid {@link LemmatizerModel}. + */ + public ThreadSafeLemmatizerME(LemmatizerModel model) { + super(); + this.model = model; + } + + private LemmatizerME getLemmatizer() { + LemmatizerME tagger = threadLocal.get(); + if (tagger == null) { + tagger = new LemmatizerME(model); + threadLocal.set(tagger); + } + return tagger; + } + + @Override + public String[] lemmatize(String[] toks, String[] tags) { + return getLemmatizer().lemmatize(toks, tags); + } + + @Override + public List> lemmatize(List toks, List tags) { + return getLemmatizer().lemmatize(toks, tags); + } + + @Override + public void close() { + threadLocal.remove(); + } + +}