Skip to content

Commit a0526b4

Browse files
amensikomawiesne
amensiko
authored andcommitted
OPENNLP-855: New SentimentAnalysisParser
1 parent a1b2ad1 commit a0526b4

20 files changed

+1832
-0
lines changed

opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java

+8
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@
7171
import opennlp.tools.cmdline.sentdetect.SentenceDetectorEvaluatorTool;
7272
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTool;
7373
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTrainerTool;
74+
import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool;
75+
import opennlp.tools.cmdline.sentiment.SentimentEvaluatorTool;
76+
import opennlp.tools.cmdline.sentiment.SentimentTrainerTool;
7477
import opennlp.tools.cmdline.tokenizer.DictionaryDetokenizerTool;
7578
import opennlp.tools.cmdline.tokenizer.SimpleTokenizerTool;
7679
import opennlp.tools.cmdline.tokenizer.TokenizerConverterTool;
@@ -165,6 +168,11 @@ public final class CLI {
165168

166169
// Entity Linker
167170
tools.add(new EntityLinkerTool());
171+
172+
// Sentiment Analysis Parser
173+
tools.add(new SentimentTrainerTool());
174+
tools.add(new SentimentEvaluatorTool());
175+
tools.add(new SentimentCrossValidatorTool());
168176

169177
// Language Model
170178
tools.add(new NGramLanguageModelTool());

opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java

+3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import opennlp.tools.formats.NameSampleDataStreamFactory;
3636
import opennlp.tools.formats.ParseSampleStreamFactory;
3737
import opennlp.tools.formats.SentenceSampleStreamFactory;
38+
import opennlp.tools.formats.SentimentSampleStreamFactory;
3839
import opennlp.tools.formats.TokenSampleStreamFactory;
3940
import opennlp.tools.formats.TwentyNewsgroupSampleStreamFactory;
4041
import opennlp.tools.formats.WordTagSampleStreamFactory;
@@ -140,6 +141,8 @@ public final class StreamFactoryRegistry {
140141
MascPOSSampleStreamFactory.registerFactory();
141142
MascSentenceSampleStreamFactory.registerFactory();
142143
MascTokenSampleStreamFactory.registerFactory();
144+
145+
SentimentSampleStreamFactory.registerFactory();
143146
}
144147

145148
public static final String DEFAULT_FORMAT = "opennlp";
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.cmdline.sentiment;
19+
20+
import java.io.IOException;
21+
import java.util.LinkedList;
22+
import java.util.List;
23+
24+
import opennlp.tools.cmdline.AbstractCrossValidatorTool;
25+
import opennlp.tools.cmdline.CmdLineUtil;
26+
import opennlp.tools.cmdline.TerminateToolException;
27+
import opennlp.tools.cmdline.params.BasicTrainingParams;
28+
import opennlp.tools.cmdline.params.CVParams;
29+
import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool.CVToolParams;
30+
import opennlp.tools.sentiment.SentimentCrossValidator;
31+
import opennlp.tools.sentiment.SentimentEvaluationMonitor;
32+
import opennlp.tools.sentiment.SentimentFactory;
33+
import opennlp.tools.sentiment.SentimentSample;
34+
import opennlp.tools.util.eval.EvaluationMonitor;
35+
import opennlp.tools.util.model.ModelUtil;
36+
37+
/**
38+
* Class for helping perform cross validation on the Sentiment Analysis Parser.
39+
*/
40+
public class SentimentCrossValidatorTool
41+
extends AbstractCrossValidatorTool<SentimentSample, CVToolParams> {
42+
43+
/**
44+
* Interface for parameters
45+
*/
46+
interface CVToolParams extends BasicTrainingParams, CVParams {
47+
48+
}
49+
50+
/**
51+
* Constructor
52+
*/
53+
public SentimentCrossValidatorTool() {
54+
super(SentimentSample.class, CVToolParams.class);
55+
}
56+
57+
/**
58+
* Returns the short description of the tool
59+
*
60+
* @return short description
61+
*/
62+
public String getShortDescription() {
63+
return "K-fold cross validator for the learnable Sentiment Analysis Parser";
64+
}
65+
66+
/**
67+
* Runs the tool
68+
*
69+
* @param format
70+
* the format to be used
71+
* @param args
72+
* the arguments
73+
*/
74+
public void run(String format, String[] args) {
75+
super.run(format, args);
76+
77+
mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
78+
if (mlParams == null) {
79+
mlParams = ModelUtil.createDefaultTrainingParameters();
80+
}
81+
82+
List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<>();
83+
if (params.getMisclassified()) {
84+
listeners.add(new SentimentEvaluationErrorListener());
85+
}
86+
SentimentDetailedFMeasureListener detailedFListener = null;
87+
SentimentFactory sentimentFactory = new SentimentFactory();
88+
89+
SentimentCrossValidator validator;
90+
try {
91+
validator = new SentimentCrossValidator(params.getLang(), mlParams, sentimentFactory,
92+
listeners.toArray(new SentimentEvaluationMonitor[listeners.size()]));
93+
validator.evaluate(sampleStream, params.getFolds());
94+
} catch (IOException e) {
95+
throw new TerminateToolException(-1,
96+
"IO error while reading training data or indexing data: "
97+
+ e.getMessage(),
98+
e);
99+
} finally {
100+
try {
101+
sampleStream.close();
102+
} catch (IOException e) {
103+
// sorry that this can fail
104+
}
105+
}
106+
107+
System.out.println("done");
108+
109+
System.out.println();
110+
111+
if (detailedFListener == null) {
112+
System.out.println(validator.getFMeasure());
113+
} else {
114+
System.out.println(detailedFListener.toString());
115+
}
116+
}
117+
118+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.cmdline.sentiment;
19+
20+
import opennlp.tools.cmdline.DetailedFMeasureListener;
21+
import opennlp.tools.sentiment.SentimentEvaluationMonitor;
22+
import opennlp.tools.sentiment.SentimentSample;
23+
import opennlp.tools.util.Span;
24+
25+
/**
26+
* Class for creating a detailed F-Measure listener
27+
*/
28+
public class SentimentDetailedFMeasureListener
29+
extends DetailedFMeasureListener<SentimentSample>
30+
implements SentimentEvaluationMonitor {
31+
32+
/**
33+
* Returns the sentiment sample as a span array
34+
*
35+
* @param sample
36+
* the sentiment sample to be returned
37+
* @return span array of the sample
38+
*/
39+
@Override
40+
protected Span[] asSpanArray(SentimentSample sample) {
41+
return null;
42+
}
43+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.cmdline.sentiment;
19+
20+
import java.io.OutputStream;
21+
22+
import opennlp.tools.cmdline.EvaluationErrorPrinter;
23+
import opennlp.tools.sentiment.SentimentSample;
24+
import opennlp.tools.util.eval.EvaluationMonitor;
25+
26+
/**
27+
* Class for creating an evaluation error listener.
28+
*/
29+
public class SentimentEvaluationErrorListener
30+
extends EvaluationErrorPrinter<SentimentSample>
31+
implements EvaluationMonitor<SentimentSample> {
32+
33+
/**
34+
* Constructor
35+
*/
36+
public SentimentEvaluationErrorListener() {
37+
super(System.err);
38+
}
39+
40+
/**
41+
* Constructor
42+
*/
43+
protected SentimentEvaluationErrorListener(OutputStream outputStream) {
44+
super(outputStream);
45+
}
46+
47+
/**
48+
* Prints the error in case of a missclassification in the evaluator
49+
*
50+
* @param reference
51+
* the sentiment sample reference to be used
52+
* @param prediction
53+
* the sentiment sampple prediction
54+
*/
55+
@Override
56+
public void misclassified(SentimentSample reference,
57+
SentimentSample prediction) {
58+
printError(new String[] { reference.getSentiment() },
59+
new String[] { prediction.getSentiment() }, reference, prediction,
60+
reference.getSentence());
61+
}
62+
63+
}

0 commit comments

Comments
 (0)