OPENNLP-1541 Conduct cleanup in opennlp.tools.chunker package

apache · Jan 1, 2024 · e329113 · e329113
1 parent 08468cc
commit e329113
Show file tree

Hide file tree

Showing 7 changed files with 37 additions and 57 deletions.
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
@@ -19,7 +19,6 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 
@@ -65,9 +64,9 @@ public ChunkSample(List<String> sentence, List<String> tags, List<String> preds)
 
     validateArguments(sentence.size(), tags.size(), preds.size());
 
-    this.sentence = Collections.unmodifiableList(new ArrayList<>(sentence));
-    this.tags = Collections.unmodifiableList(new ArrayList<>(tags));
-    this.preds = Collections.unmodifiableList(new ArrayList<>(preds));
+    this.sentence = List.copyOf(sentence);
+    this.tags = List.copyOf(tags);
+    this.preds = List.copyOf(preds);
   }
 
   /**

diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/Chunker.java
@@ -36,31 +36,36 @@ public interface Chunker {
   String[] chunk(String[] toks, String[] tags);
 
   /**
-   * Generates tagged chunk spans for the given sequence returning the result in a span array.
+   * Generates tagged chunk {@link Span spans} for the given sequence returning
+   * the result in a {@link Span span} array.
    *
    * @param toks an array of the tokens or words of the sequence.
    * @param tags an array of the pos tags of the sequence.
    *
-   * @return an array of spans with chunk tags for each chunk in the sequence.
+   * @return an array of {@link Span spans} with chunk tags for each chunk in the sequence.
    */
   Span[] chunkAsSpans(String[] toks, String[] tags);
 
   /**
-   * Returns the top k chunk sequences for the specified sentence with the specified pos-tags
+   * Computes the top k chunk {@link Sequence sequences} for the specified sentence with
+   * the specified pos-tags.
+   *
    * @param sentence The tokens of the sentence.
    * @param tags The pos-tags for the specified sentence.
    *
-   * @return the top k chunk sequences for the specified sentence.
+   * @return the top k chunk {@link Sequence sequences} for the specified sentence.
    */
   Sequence[] topKSequences(String[] sentence, String[] tags);
 
   /**
-   * Returns the top k chunk sequences for the specified sentence with the specified pos-tags
+   * Computes the top k chunk {@link Sequence sequences} for the specified sentence with
+   * the specified pos-tags.
+   *
    * @param sentence The tokens of the sentence.
    * @param tags The pos-tags for the specified sentence.
    * @param minSequenceScore A lower bound on the score of a returned sequence.
    *
-   * @return the top k chunk sequences for the specified sentence.
+   * @return the top k chunk {@link Sequence sequences} for the specified sentence.
    */
   Sequence[] topKSequences(String[] sentence, String[] tags, double minSequenceScore);
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -51,37 +51,13 @@ public class ChunkerME implements Chunker {
   /**
    * The model used to assign chunk tags to a sequence of tokens.
    */
-  protected SequenceClassificationModel<TokenTag> model;
+  private final SequenceClassificationModel<TokenTag> model;
 
   private final ChunkerContextGenerator contextGenerator;
   private final SequenceValidator<TokenTag> sequenceValidator;
 
   /**
-   * Initializes the current instance with the specified model and
-   * the specified beam size.
-   *
-   * @param model The model for this {@link Chunker}.
-   * @param beamSize The size of the beam that should be used when decoding sequences.
-   *
-   * @deprecated {@code beamSize} is now stored inside the model
-   */
-  @Deprecated
-  private ChunkerME(ChunkerModel model, int beamSize) {
-
-    contextGenerator = model.getFactory().getContextGenerator();
-    sequenceValidator = model.getFactory().getSequenceValidator();
-
-    if (model.getChunkerSequenceModel() != null) {
-      this.model = model.getChunkerSequenceModel();
-    }
-    else {
-      this.model = new opennlp.tools.ml.BeamSearch<>(beamSize,
-          model.getChunkerModel(), 0);
-    }
-  }
-
-  /**
-   * Initializes the {@link Chunker} by downloading a default model.
+   * Initializes a {@link Chunker} by downloading a default model.
    *
    * @param language The language of the model.
    * @throws IOException Thrown if the model cannot be downloaded or saved.
@@ -91,13 +67,21 @@ public ChunkerME(String language) throws IOException {
   }
 
   /**
-   * Initializes the current instance with the specified {@link ChunkerModel}.
+   * Initializes a {@link Chunker} with the specified {@link ChunkerModel}.
    * The {@link #DEFAULT_BEAM_SIZE} is used.
    *
    * @param model A valid {@link ChunkerModel model} instance.
    */
   public ChunkerME(ChunkerModel model) {
-    this(model, DEFAULT_BEAM_SIZE);
+    contextGenerator = model.getFactory().getContextGenerator();
+    sequenceValidator = model.getFactory().getSequenceValidator();
+
+    if (model.getChunkerSequenceModel() != null) {
+      this.model = model.getChunkerSequenceModel();
+    }
+    else {
+      this.model = new BeamSearch<>(DEFAULT_BEAM_SIZE, model.getChunkerModel(), 0);
+    }
   }
 
   @Override
@@ -143,7 +127,7 @@ public void probs(double[] probs) {
 
   /**
    * Returns an array with the probabilities of the last decoded sequence. The
-   * sequence was determined based on the previous call to {@code chunk}.
+   * sequence was determined based on the previous call to {@link #chunk(String[], String[])}.
    *
    * @return An array with the same number of probabilities as tokens when
    *         {@link ChunkerME#chunk(String[], String[])} was last called.
@@ -162,6 +146,7 @@ public double[] probs() {
    *
    * @return A valid, trained {@link ChunkerModel} instance.
    * @throws IOException Thrown if IO errors occurred.
+   * @throws IllegalArgumentException Thrown if the specified {@link TrainerType} is not supported.
    */
   public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
       TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
@@ -176,8 +161,7 @@ public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
 
     if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
       ObjectStream<Event> es = new ChunkerEventStream(in, factory.getContextGenerator());
-      EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams,
-          manifestInfoEntries);
+      EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries);
       chunkerModel = trainer.train(es);
     }
     else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {

diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
@@ -15,7 +15,6 @@
  * limitations under the License.
  */
 
-
 package opennlp.tools.chunker;
 
 import java.io.File;
@@ -159,11 +158,10 @@ protected void validateArtifactMap() throws InvalidFormatException {
 
     // Since 1.8.0 we changed the ChunkerFactory signature. This will check the if the model
     // declares a not default factory, and if yes, check if it was created before 1.8
-    if ( (getManifestProperty(FACTORY_NAME) != null
-            && !getManifestProperty(FACTORY_NAME).equals("opennlp.tools.chunker.ChunkerFactory") )
-        && this.getVersion().getMajor() <= 1
-        && this.getVersion().getMinor() < 8) {
-      throw new InvalidFormatException("The Chunker factory '" + getManifestProperty(FACTORY_NAME) +
+    final String factoryName = getManifestProperty(FACTORY_NAME);
+    if ( (factoryName != null && !factoryName.equals("opennlp.tools.chunker.ChunkerFactory") )
+        && this.getVersion().getMajor() <= 1 && this.getVersion().getMinor() < 8) {
+      throw new InvalidFormatException("The Chunker factory '" + factoryName +
       "' is no longer compatible. Please update it to match the latest ChunkerFactory.");
     }
 

diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
@@ -15,7 +15,6 @@
  * limitations under the License.
  */
 
-
 package opennlp.tools.chunker;
 
 import opennlp.tools.util.TokenTag;
@@ -33,11 +32,6 @@ public class DefaultChunkerContextGenerator implements ChunkerContextGenerator {
   public DefaultChunkerContextGenerator() {
   }
 
-  public String[] getContext(int index, String[] tokens, String[] postags,
-      String[] priorDecisions, Object[] additionalContext) {
-    return getContext(index, tokens, postags, priorDecisions);
-  }
-
   @Override
   public String[] getContext(int i, String[] toks, String[] tags, String[] preds) {
     // Words in a 5-word window
@@ -151,6 +145,6 @@ public String[] getContext(int index, TokenTag[] sequence, String[] priorDecisio
                              Object[] additionalContext) {
     String[] token = TokenTag.extractTokens(sequence);
     String[] tags = TokenTag.extractTags(sequence);
-    return getContext(index, token, tags, priorDecisions, additionalContext);
+    return getContext(index, token, tags, priorDecisions);
   }
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java
@@ -73,7 +73,7 @@ public void run(String[] args) {
 
       // TODO: It should not just throw Exception.
 
-      EntityLinker entityLinker;
+      EntityLinker<? extends Span> entityLinker;
       try {
         entityLinker = EntityLinkerFactory.getLinker(entityType, properties);
       }
@@ -124,7 +124,7 @@ public void run(String[] args) {
               text.append("\n");
             }
 
-            List<Span> linkedSpans =
+            List<? extends  Span> linkedSpans =
                 entityLinker.find(text.toString(), sentences, tokensBySentence, namesBySentence);
 
             for (Span linkedSpan : linkedSpans) {

diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearchContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearchContextGenerator.java
@@ -29,7 +29,7 @@ public interface BeamSearchContextGenerator<T> {
    * @param sequence The sequence of {@link T items} over which the beam search is performed.
    * @param priorDecisions The sequence of decisions made prior to the context for
    *                       which this decision is being made.
-   * @param additionalContext Any addition context specific to a class implementing this interface.
+   * @param additionalContext Any additional context specific to a class implementing this interface.
    *
    * @return The context for the specified {@code index} in the specified {@code sequence}.
    */