Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OPENNLP-1447: Re-enable Cmdline Tool execution tests #720

Merged
merged 1 commit into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
Expand All @@ -33,6 +34,7 @@
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;

Expand All @@ -53,9 +55,15 @@ public String getShortDescription() {

static Dictionary loadDict(File f) throws IOException {
Dictionary dict = null;
if (f != null) {
if (f != null && f.exists()) {
CmdLineUtil.checkInputFile("abb dict", f);
dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
try (InputStream in = new BufferedInputStream(new FileInputStream(f))) {
if (in.available() == 0) {
throw new InvalidFormatException("Encountered an empty dictionary file?!");
} else {
dict = new Dictionary(in);
}
}
}
return dict;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,22 @@

import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.LoggerContext;
import nl.altindag.log.LogCaptor;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.LoggerFactory;

import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
import opennlp.tools.namefind.NameFinderME;
Expand All @@ -44,75 +47,80 @@
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;

public class TokenNameFinderToolTest {

@Test
//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
void run() throws IOException {
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

File model1 = trainModel();
public class TokenNameFinderToolTest {

String[] args = new String[] {model1.getAbsolutePath()};
/*
* Programmatic change to debug log to ensure that we can see log messages to
* confirm no duplicate download is happening
*/
@BeforeAll
public static void prepare() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
logger.setLevel(Level.INFO);
}

final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
/*
* Programmatic restore the default log level (= OFF) after the test
*/
@AfterAll
public static void cleanup() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
logger.setLevel(Level.OFF);
}

System.setIn(stream);
@Test
void run() throws IOException {
try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
File model1 = trainModel();
String[] args = new String[] {model1.getAbsolutePath()};

ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);
final String in = "It is Stefanie Schmidt.\n";
InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));

TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);
System.setIn(stream);

final String content = baos.toString(StandardCharsets.UTF_8);
Assertions.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

Assertions.assertTrue(model1.delete());
assertEquals(1, logCaptor.getInfoLogs().size());
final String content = logCaptor.getInfoLogs().get(0);
logCaptor.clearLogs();
assertEquals("It is <START:person> Stefanie Schmidt. <END>", content);
assertTrue(model1.delete());
}
}

@Test
void invalidModel() {

Assertions.assertThrows(TerminateToolException.class, () -> {

assertThrows(TerminateToolException.class, () -> {
String[] args = new String[] {"invalidmodel.bin"};

TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

});


}

@Test
//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
void usage() {
try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
String[] args = new String[] {};

String[] args = new String[] {};

ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);

TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

final String content = baos.toString(StandardCharsets.UTF_8);
Assertions.assertEquals(tool.getHelp(), content.trim());
TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);

assertEquals(1, logCaptor.getInfoLogs().size());
final String content = logCaptor.getInfoLogs().get(0);
assertEquals(tool.getHelp(), content.trim());
}
}

private File trainModel() throws IOException {

ObjectStream<String> lineStream =
new PlainTextByLineStream(new MockInputStreamFactory(
new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")),
Expand All @@ -123,7 +131,6 @@ private File trainModel() throws IOException {
params.put(TrainingParameters.CUTOFF_PARAM, 1);

TokenNameFinderModel model;

TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();

try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) {
Expand All @@ -132,12 +139,10 @@ private File trainModel() throws IOException {
}

File modelFile = Files.createTempFile("model", ".bin").toFile();

try (OutputStream modelOut =
new BufferedOutputStream(new FileOutputStream(modelFile))) {
model.serialize(modelOut);
}

return modelFile;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,40 @@
package opennlp.tools.cmdline.tokenizer;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.LoggerContext;
import nl.altindag.log.LogCaptor;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.LoggerFactory;

import opennlp.tools.AbstractTempDirTest;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* Tests for the {@link TokenizerTrainerTool} class.
*/
public class TokenizerTrainerToolTest extends AbstractTempDirTest {

private TokenizerTrainerTool tokenizerTrainerTool;

private final String sampleSuccessData =
"Pierre Vinken<SPLIT>, 61 years old<SPLIT>, will join the board as a nonexecutive " +
"director Nov. 29<SPLIT>.\n" +
Expand All @@ -54,55 +61,70 @@ public class TokenizerTrainerToolTest extends AbstractTempDirTest {

private final String sampleFailureData = "It is Fail Test Case.\n\nNothing in this sentence.";

/*
* Programmatic change to debug log to ensure that we can see log messages to
* confirm no duplicate download is happening
*/
@BeforeAll
public static void prepare() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
logger.setLevel(Level.INFO);
}

/*
* Programmatic restore the default log level (= OFF) after the test
*/
@AfterAll
public static void cleanup() {
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
logger.setLevel(Level.OFF);
}

@Test
public void testGetShortDescription() {
tokenizerTrainerTool = new TokenizerTrainerTool();
Assertions.assertEquals("Trainer for the learnable tokenizer",
TokenizerTrainerTool tokenizerTrainerTool = new TokenizerTrainerTool();
assertEquals("Trainer for the learnable tokenizer",
tokenizerTrainerTool.getShortDescription());
}

@Test
public void testLoadDictHappyCase() throws IOException {
File dictFile = new File("lang/ga/abb_GA.xml");
Dictionary dict = TokenizerTrainerTool.loadDict(dictFile);
Assertions.assertNotNull(dict);
assertNotNull(dict);
}

@Test
public void testLoadDictFailCase() {
Assertions.assertThrows(InvalidFormatException.class , () ->
assertThrows(InvalidFormatException.class , () ->
TokenizerTrainerTool.loadDict(prepareDataFile("")));
}

//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
@Test
public void testTestRunHappyCase() throws IOException {
File model = tempDir.resolve("model-en.bin").toFile();

String[] args =
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , "-encoding" , "UTF-8" };

InputStream stream = new ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
System.setIn(stream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);

tokenizerTrainerTool = new TokenizerTrainerTool();
tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);

final String content = baos.toString(StandardCharsets.UTF_8);
Assertions.assertTrue(content.contains("Number of Event Tokens: 171"));
Assertions.assertTrue(model.delete());
try (LogCaptor logCaptor = LogCaptor.forClass(CmdLineUtil.class)) {
File model = tempDir.resolve("model-en.bin").toFile();

String[] args =
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , "-encoding" , "UTF-8" };

InputStream stream = new ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
System.setIn(stream);

TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);

assertEquals(3, logCaptor.getInfoLogs().size());
final String content = logCaptor.getInfoLogs().get(2);
assertTrue(content.startsWith("Wrote tokenizer model to path:"));
assertTrue(model.delete());
}
}

//TODO OPENNLP-1447
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
"We need to find a way to redirect log output (i.e. implement " +
"a custom log adapter and plug it in, if we want to do such tests.")
@Test
public void testTestRunExceptionCase() throws IOException {
File model = tempDir.resolve("model-en.bin").toFile();
model.deleteOnExit();
Expand All @@ -111,17 +133,10 @@ public void testTestRunExceptionCase() throws IOException {
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleFailureData)) , "-encoding" , "UTF-8" };

InputStream stream = new ByteArrayInputStream(sampleFailureData.getBytes(StandardCharsets.UTF_8));
System.setIn(stream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
System.setOut(ps);

Assertions.assertThrows(TerminateToolException.class , () -> {
tokenizerTrainerTool = new TokenizerTrainerTool();
tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
assertThrows(TerminateToolException.class , () -> {
TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
});

}

// This is guaranteed to be deleted after the test finishes.
Expand Down
6 changes: 5 additions & 1 deletion opennlp-tools/src/test/resources/logback-test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,16 @@

<appender name="consoleAppender" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%date{HH:mm:ss.SSS} [%thread] %-5level %class{36}.%method:%line - %msg%n</pattern>
<pattern>%date{HH:mm:ss.SSS} [%thread] %-4level %class{36}.%method:%line - %msg%n</pattern>
</encoder>
</appender>

<logger name="opennlp" level="off"/>

<logger name="opennlp.tools.cmdline.namefind" level="off"/>

<logger name="opennlp.tools.cmdline.CmdLineUtil" level="off"/>

<root level="off">
<appender-ref ref="consoleAppender" />
</root>
Expand Down
Loading