Skip to content

Commit f6adbff

Browse files
committed
OPENNLP-1369 NPE when serializing a TokenNameFinder model trained with POSTaggerNameFeatureGeneratorFactory
- adds first reproducer to check if 1369 is broken => yes it is - confirms basic workaround is curing the issue, however this is not pretty.
1 parent b74c6df commit f6adbff

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java

+5
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,11 @@ public final void serialize(OutputStream out) throws IOException {
596596
zip.putNextEntry(new ZipEntry(name));
597597

598598
Object artifact = entry.getValue();
599+
// TODO Discuss if this is the correct location to have this workaround in place
600+
if ("generator.featuregen".equals(name) && artifact == null) {
601+
// An old model format was detected, skipping the process for this entry, see: OPENNLP-1369
602+
continue;
603+
}
599604

600605
ArtifactSerializer serializer = getArtifactSerializer(name);
601606

opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,17 @@ void testNERWithPOSModel() throws IOException {
5151
// create a resources folder
5252
Path resourcesFolder = Files.createTempDirectory("resources").toAbsolutePath();
5353

54+
// TODO Restore old test and provide a separate one which does it with the pt-pos-perceptron.bin
5455
// save a POS model there
5556
POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.MAXENT);
56-
File posModelFile = new File(resourcesFolder.toFile(), "pos-model.bin");
57+
File posModelFile = new File("pt-pos-perceptron.bin");
58+
Files.copy(posModelFile.toPath(), resourcesFolder.resolve("pt-pos-perceptron.bin"));
5759

58-
posModel.serialize(posModelFile);
60+
// posModel.serialize(posModelFile);
5961

6062
Assertions.assertTrue(posModelFile.exists());
63+
Assertions.assertTrue(resourcesFolder.resolve("pt-pos-perceptron.bin").toFile().exists());
64+
// end TODO
6165

6266
// load feature generator xml bytes
6367
InputStream fgInputStream = this.getClass().getResourceAsStream("ner-pos-features.xml");

opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
<int name="prevLength">2</int>
3131
<int name="nextLength">2</int>
3232
<generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
33-
<str name="model">pos-model.bin</str>
33+
<!-- FIXME Restore old state for TokenNameFinderModelTest -->
34+
<str name="model">pt-pos-perceptron.bin</str>
3435
</generator>
3536
</generator>
3637
<generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>

0 commit comments

Comments
 (0)