Skip to content

Commit

Permalink
Merge pull request #1021 from PrimozGodec/remove-standfordpos
Browse files Browse the repository at this point in the history
POS - remove unsupported and unused StanfordPOSTagger
  • Loading branch information
ajdapretnar authored Nov 10, 2023
2 parents 3ebeaea + 832d5f1 commit 985fe05
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 53 deletions.
38 changes: 1 addition & 37 deletions orangecontrib/text/tag/pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import nltk
import numpy as np

from Orange.util import wrap_callback, dummy_callback

from orangecontrib.text import Corpus
Expand All @@ -11,7 +10,7 @@
from orangecontrib.text.util import chunkable


__all__ = ['POSTagger', 'StanfordPOSTagger', 'AveragedPerceptronTagger', 'MaxEntTagger']
__all__ = ["POSTagger", "AveragedPerceptronTagger", "MaxEntTagger"]


class POSTagger(TokenizedPreprocessor):
Expand All @@ -38,41 +37,6 @@ def _preprocess(self, tokens: List[List[str]]) -> List[List[str]]:
self.tagger(tokens)))


class StanfordPOSTaggerError(Exception):
pass


class StanfordPOSTagger(nltk.StanfordPOSTagger, POSTagger):
name = 'Stanford POS Tagger'

@classmethod
def check(cls, path_to_model, path_to_jar):
""" Checks whether provided `path_to_model` and `path_to_jar` are valid.
Raises:
ValueError: in case at least one of the paths is invalid.
Notes:
Can raise an exception if Java Development Kit is not installed or not properly configured.
Examples:
>>> try:
... StanfordPOSTagger.check('path/to/model', 'path/to/stanford.jar')
... except ValueError as e:
... print(e)
Could not find stanford-postagger.jar jar file at path/to/stanford.jar
"""
try:
cls(path_to_model, path_to_jar).tag(())
except OSError as e:
raise StanfordPOSTaggerError(
'Either Java SDK not installed or some of the '
'files are invalid.\n' + str(e))
except LookupError as e:
raise StanfordPOSTaggerError(str(e).strip(' =\n'))


class AveragedPerceptronTagger(POSTagger):
name = 'Averaged Perceptron Tagger'

Expand Down
11 changes: 0 additions & 11 deletions orangecontrib/text/tests/test_tags.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import pickle
import copy
import tempfile
import unittest

from orangecontrib.text import tag
from orangecontrib.text.corpus import Corpus
from orangecontrib.text.tag.pos import StanfordPOSTaggerError


class POSTaggerTests(unittest.TestCase):
Expand All @@ -19,15 +17,6 @@ def test_POSTagger(self):
for tokens, tags in zip(result.tokens, result.pos_tags):
self.assertEqual(len(tokens), len(tags))

def test_Stanford_check(self):
model = tempfile.NamedTemporaryFile()
resource = tempfile.NamedTemporaryFile()
with self.assertRaises(StanfordPOSTaggerError):
tag.StanfordPOSTagger.check(model.name, resource.name)

with self.assertRaises(StanfordPOSTaggerError):
tag.StanfordPOSTagger.check('model', resource.name)

def test_str(self):
self.assertEqual('Averaged Perceptron Tagger', str(self.tagger))

Expand Down
6 changes: 1 addition & 5 deletions orangecontrib/text/widgets/owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from orangecontrib.text.preprocess.normalize import UDPipeStopIteration
from orangecontrib.text.tag import AveragedPerceptronTagger, MaxEntTagger, \
POSTagger
from orangecontrib.text.tag.pos import StanfordPOSTaggerError

_DEFAULT_NONE = "(none)"

Expand Down Expand Up @@ -973,7 +972,7 @@ def createinstance(params: Dict) -> NGrams:


class POSTaggingModule(SingleMethodModule):
Averaged, MaxEnt, Stanford = range(3)
Averaged, MaxEnt = range(2)
Methods = {Averaged: AveragedPerceptronTagger,
MaxEnt: MaxEntTagger}
DEFAULT_METHOD = Averaged
Expand Down Expand Up @@ -1041,7 +1040,6 @@ class Error(Orange.widgets.data.owpreprocess.OWPreprocess.Error):
file_not_found = Msg("File not found.")
invalid_encoding = Msg("Invalid file encoding. Please save the "
"file as UTF-8 and try again.")
stanford_tagger = Msg("Problem loading Stanford POS Tagger:\n{}")

class Warning(Orange.widgets.data.owpreprocess.OWPreprocess.Warning):
no_token_left = Msg("No tokens on the output.")
Expand Down Expand Up @@ -1162,8 +1160,6 @@ def apply(self):
self.Error.file_not_found()
except UnicodeError as e:
self.Error.invalid_encoding(e)
except StanfordPOSTaggerError as e:
self.Error.stanford_tagger(e)
except Exception as e:
self.Error.unknown_error(str(e))

Expand Down

0 comments on commit 985fe05

Please sign in to comment.