From 832d5f1278a94594cb4ca177e2ed06a39e4b2157 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Fri, 14 Apr 2023 15:03:46 +0200
Subject: [PATCH] Pos - remove unsupported and unused StanfordPOSTagger
---
orangecontrib/text/tag/pos.py | 38 +---------------------
orangecontrib/text/tests/test_tags.py | 11 -------
orangecontrib/text/widgets/owpreprocess.py | 6 +---
3 files changed, 2 insertions(+), 53 deletions(-)
diff --git a/orangecontrib/text/tag/pos.py b/orangecontrib/text/tag/pos.py
index a9b665e05..3ff79bf0c 100644
--- a/orangecontrib/text/tag/pos.py
+++ b/orangecontrib/text/tag/pos.py
@@ -2,7 +2,6 @@
import nltk
import numpy as np
-
from Orange.util import wrap_callback, dummy_callback
from orangecontrib.text import Corpus
@@ -11,7 +10,7 @@
from orangecontrib.text.util import chunkable
-__all__ = ['POSTagger', 'StanfordPOSTagger', 'AveragedPerceptronTagger', 'MaxEntTagger']
+__all__ = ["POSTagger", "AveragedPerceptronTagger", "MaxEntTagger"]
class POSTagger(TokenizedPreprocessor):
@@ -38,41 +37,6 @@ def _preprocess(self, tokens: List[List[str]]) -> List[List[str]]:
self.tagger(tokens)))
-class StanfordPOSTaggerError(Exception):
- pass
-
-
-class StanfordPOSTagger(nltk.StanfordPOSTagger, POSTagger):
- name = 'Stanford POS Tagger'
-
- @classmethod
- def check(cls, path_to_model, path_to_jar):
- """ Checks whether provided `path_to_model` and `path_to_jar` are valid.
-
- Raises:
- ValueError: in case at least one of the paths is invalid.
-
- Notes:
- Can raise an exception if Java Development Kit is not installed or not properly configured.
-
- Examples:
- >>> try:
- ... StanfordPOSTagger.check('path/to/model', 'path/to/stanford.jar')
- ... except ValueError as e:
- ... print(e)
- Could not find stanford-postagger.jar jar file at path/to/stanford.jar
-
- """
- try:
- cls(path_to_model, path_to_jar).tag(())
- except OSError as e:
- raise StanfordPOSTaggerError(
- 'Either Java SDK not installed or some of the '
- 'files are invalid.\n' + str(e))
- except LookupError as e:
- raise StanfordPOSTaggerError(str(e).strip(' =\n'))
-
-
class AveragedPerceptronTagger(POSTagger):
name = 'Averaged Perceptron Tagger'
diff --git a/orangecontrib/text/tests/test_tags.py b/orangecontrib/text/tests/test_tags.py
index 0043c414f..9ecbb14a1 100644
--- a/orangecontrib/text/tests/test_tags.py
+++ b/orangecontrib/text/tests/test_tags.py
@@ -1,11 +1,9 @@
import pickle
import copy
-import tempfile
import unittest
from orangecontrib.text import tag
from orangecontrib.text.corpus import Corpus
-from orangecontrib.text.tag.pos import StanfordPOSTaggerError
class POSTaggerTests(unittest.TestCase):
@@ -19,15 +17,6 @@ def test_POSTagger(self):
for tokens, tags in zip(result.tokens, result.pos_tags):
self.assertEqual(len(tokens), len(tags))
- def test_Stanford_check(self):
- model = tempfile.NamedTemporaryFile()
- resource = tempfile.NamedTemporaryFile()
- with self.assertRaises(StanfordPOSTaggerError):
- tag.StanfordPOSTagger.check(model.name, resource.name)
-
- with self.assertRaises(StanfordPOSTaggerError):
- tag.StanfordPOSTagger.check('model', resource.name)
-
def test_str(self):
self.assertEqual('Averaged Perceptron Tagger', str(self.tagger))
diff --git a/orangecontrib/text/widgets/owpreprocess.py b/orangecontrib/text/widgets/owpreprocess.py
index adbb4d024..d42df3ed7 100644
--- a/orangecontrib/text/widgets/owpreprocess.py
+++ b/orangecontrib/text/widgets/owpreprocess.py
@@ -29,7 +29,6 @@
from orangecontrib.text.preprocess.normalize import UDPipeStopIteration
from orangecontrib.text.tag import AveragedPerceptronTagger, MaxEntTagger, \
POSTagger
-from orangecontrib.text.tag.pos import StanfordPOSTaggerError
_DEFAULT_NONE = "(none)"
@@ -973,7 +972,7 @@ def createinstance(params: Dict) -> NGrams:
class POSTaggingModule(SingleMethodModule):
- Averaged, MaxEnt, Stanford = range(3)
+ Averaged, MaxEnt = range(2)
Methods = {Averaged: AveragedPerceptronTagger,
MaxEnt: MaxEntTagger}
DEFAULT_METHOD = Averaged
@@ -1041,7 +1040,6 @@ class Error(Orange.widgets.data.owpreprocess.OWPreprocess.Error):
file_not_found = Msg("File not found.")
invalid_encoding = Msg("Invalid file encoding. Please save the "
"file as UTF-8 and try again.")
- stanford_tagger = Msg("Problem loading Stanford POS Tagger:\n{}")
class Warning(Orange.widgets.data.owpreprocess.OWPreprocess.Warning):
no_token_left = Msg("No tokens on the output.")
@@ -1162,8 +1160,6 @@ def apply(self):
self.Error.file_not_found()
except UnicodeError as e:
self.Error.invalid_encoding(e)
- except StanfordPOSTaggerError as e:
- self.Error.stanford_tagger(e)
except Exception as e:
self.Error.unknown_error(str(e))