Skip to content

Commit

Permalink
OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)
Browse files Browse the repository at this point in the history
- removes @disabled from multiple cmdline execution tests
- adjusts TokenizerTrainerTool to handle existing yet "empty" abb-dict files better
  • Loading branch information
mawiesne authored Dec 23, 2024
1 parent 3fd914f commit ed2682c
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
Expand All @@ -33,6 +34,7 @@
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;

Expand All @@ -53,9 +55,15 @@ public String getShortDescription() {

static Dictionary loadDict(File f) throws IOException {
Dictionary dict = null;
if (f != null) {
if (f != null && f.exists()) {
CmdLineUtil.checkInputFile("abb dict", f);
dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
try (InputStream in = new BufferedInputStream(new FileInputStream(f))) {
if (in.available() == 0) {
throw new InvalidFormatException("Encountered an empty dictionary file?!");
} else {
dict = new Dictionary(in);
}
}
}
return dict;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,22 @@

import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.LoggerContext;
import nl.altindag.log.LogCaptor;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.LoggerFactory;

import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
import opennlp.tools.namefind.NameFinderME;
Expand All @@ -44,75 +47,80 @@
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;

public class TokenNameFinderToolTest {

@Test
//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
void run() throws IOException {
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

File model1 = trainModel();
public class TokenNameFinderToolTest {

String[] args = new String[] {model1.getAbsolutePath()};
/*
* Programmatic change to debug log to ensure that we can see log messages to
* confirm no duplicate download is happening
*/
@BeforeAll
public static void prepare() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
logger.setLevel(Level.INFO);
}

final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
/*
* Programmatic restore the default log level (= OFF) after the test
*/
@AfterAll
public static void cleanup() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
logger.setLevel(Level.OFF);
}

System.setIn(stream);
@Test
void run() throws IOException {
try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
File model1 = trainModel();
String[] args = new String[] {model1.getAbsolutePath()};

ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);
final String in = "It is Stefanie Schmidt.\n";
InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));

TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);
System.setIn(stream);

final String content = baos.toString(StandardCharsets.UTF_8);
Assertions.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

Assertions.assertTrue(model1.delete());
assertEquals(1, logCaptor.getInfoLogs().size());
final String content = logCaptor.getInfoLogs().get(0);
logCaptor.clearLogs();
assertEquals("It is <START:person> Stefanie Schmidt. <END>", content);
assertTrue(model1.delete());
}
}

@Test
void invalidModel() {

Assertions.assertThrows(TerminateToolException.class, () -> {

assertThrows(TerminateToolException.class, () -> {
String[] args = new String[] {"invalidmodel.bin"};

TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

});


}

@Test
//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
void usage() {
try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
String[] args = new String[] {};

String[] args = new String[] {};

ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);

TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

final String content = baos.toString(StandardCharsets.UTF_8);
Assertions.assertEquals(tool.getHelp(), content.trim());
TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

assertEquals(1, logCaptor.getInfoLogs().size());
final String content = logCaptor.getInfoLogs().get(0);
assertEquals(tool.getHelp(), content.trim());
}
}

private File trainModel() throws IOException {

ObjectStream<String> lineStream =
new PlainTextByLineStream(new MockInputStreamFactory(
new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")),
Expand All @@ -123,7 +131,6 @@ private File trainModel() throws IOException {
params.put(TrainingParameters.CUTOFF_PARAM, 1);

TokenNameFinderModel model;

TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();

try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) {
Expand All @@ -132,12 +139,10 @@ private File trainModel() throws IOException {
}

File modelFile = Files.createTempFile("model", ".bin").toFile();

try (OutputStream modelOut =
new BufferedOutputStream(new FileOutputStream(modelFile))) {
model.serialize(modelOut);
}

return modelFile;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,40 @@
package opennlp.tools.cmdline.tokenizer;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.LoggerContext;
import nl.altindag.log.LogCaptor;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.LoggerFactory;

import opennlp.tools.AbstractTempDirTest;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* Tests for the {@link TokenizerTrainerTool} class.
*/
public class TokenizerTrainerToolTest extends AbstractTempDirTest {

private TokenizerTrainerTool tokenizerTrainerTool;

private final String sampleSuccessData =
"Pierre Vinken<SPLIT>, 61 years old<SPLIT>, will join the board as a nonexecutive " +
"director Nov. 29<SPLIT>.\n" +
Expand All @@ -54,55 +61,70 @@ public class TokenizerTrainerToolTest extends AbstractTempDirTest {

private final String sampleFailureData = "It is Fail Test Case.\n\nNothing in this sentence.";

/*
* Programmatic change to debug log to ensure that we can see log messages to
* confirm no duplicate download is happening
*/
@BeforeAll
public static void prepare() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
logger.setLevel(Level.INFO);
}

/*
* Programmatic restore the default log level (= OFF) after the test
*/
@AfterAll
public static void cleanup() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
logger.setLevel(Level.OFF);
}

@Test
public void testGetShortDescription() {
tokenizerTrainerTool = new TokenizerTrainerTool();
Assertions.assertEquals("Trainer for the learnable tokenizer",
TokenizerTrainerTool tokenizerTrainerTool = new TokenizerTrainerTool();
assertEquals("Trainer for the learnable tokenizer",
tokenizerTrainerTool.getShortDescription());
}

@Test
public void testLoadDictHappyCase() throws IOException {
File dictFile = new File("lang/ga/abb_GA.xml");
Dictionary dict = TokenizerTrainerTool.loadDict(dictFile);
Assertions.assertNotNull(dict);
assertNotNull(dict);
}

@Test
public void testLoadDictFailCase() {
Assertions.assertThrows(InvalidFormatException.class , () ->
assertThrows(InvalidFormatException.class , () ->
TokenizerTrainerTool.loadDict(prepareDataFile("")));
}

//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
@Test
public void testTestRunHappyCase() throws IOException {
File model = tempDir.resolve("model-en.bin").toFile();

String[] args =
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , "-encoding" , "UTF-8" };

InputStream stream = new ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
System.setIn(stream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);

tokenizerTrainerTool = new TokenizerTrainerTool();
tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);

final String content = baos.toString(StandardCharsets.UTF_8);
Assertions.assertTrue(content.contains("Number of Event Tokens: 171"));
Assertions.assertTrue(model.delete());
try (LogCaptor logCaptor = LogCaptor.forClass(CmdLineUtil.class)) {
File model = tempDir.resolve("model-en.bin").toFile();

String[] args =
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , "-encoding" , "UTF-8" };

InputStream stream = new ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
System.setIn(stream);

TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);

assertEquals(3, logCaptor.getInfoLogs().size());
final String content = logCaptor.getInfoLogs().get(2);
assertTrue(content.startsWith("Wrote tokenizer model to path:"));
assertTrue(model.delete());
}
}

//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
@Test
public void testTestRunExceptionCase() throws IOException {
File model = tempDir.resolve("model-en.bin").toFile();
model.deleteOnExit();
Expand All @@ -111,17 +133,10 @@ public void testTestRunExceptionCase() throws IOException {
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleFailureData)) , "-encoding" , "UTF-8" };

InputStream stream = new ByteArrayInputStream(sampleFailureData.getBytes(StandardCharsets.UTF_8));
System.setIn(stream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);

Assertions.assertThrows(TerminateToolException.class , () -> {
tokenizerTrainerTool = new TokenizerTrainerTool();
tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
assertThrows(TerminateToolException.class , () -> {
TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
});

}

// This is guaranteed to be deleted after the test finishes.
Expand Down
6 changes: 5 additions & 1 deletion opennlp-tools/src/test/resources/logback-test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,16 @@

<appender name="consoleAppender" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%date{HH:mm:ss.SSS} [%thread] %-5level %class{36}.%method:%line - %msg%n</pattern>
<pattern>%date{HH:mm:ss.SSS} [%thread] %-4level %class{36}.%method:%line - %msg%n</pattern>
</encoder>
</appender>

<logger name="opennlp" level="off"/>

<logger name="opennlp.tools.cmdline.namefind" level="off"/>

<logger name="opennlp.tools.cmdline.CmdLineUtil" level="off"/>

<root level="off">
<appender-ref ref="consoleAppender" />
</root>
Expand Down

0 comments on commit ed2682c

Please sign in to comment.