Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POS - remove unsupported and unused StanfordPOSTagger #1021

Merged
merged 1 commit into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 1 addition & 37 deletions orangecontrib/text/tag/pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import nltk
import numpy as np

from Orange.util import wrap_callback, dummy_callback

from orangecontrib.text import Corpus
Expand All @@ -11,7 +10,7 @@
from orangecontrib.text.util import chunkable


__all__ = ['POSTagger', 'StanfordPOSTagger', 'AveragedPerceptronTagger', 'MaxEntTagger']
__all__ = ["POSTagger", "AveragedPerceptronTagger", "MaxEntTagger"]


class POSTagger(TokenizedPreprocessor):
Expand All @@ -38,41 +37,6 @@ def _preprocess(self, tokens: List[List[str]]) -> List[List[str]]:
self.tagger(tokens)))


class StanfordPOSTaggerError(Exception):
pass


class StanfordPOSTagger(nltk.StanfordPOSTagger, POSTagger):
name = 'Stanford POS Tagger'

@classmethod
def check(cls, path_to_model, path_to_jar):
""" Checks whether provided `path_to_model` and `path_to_jar` are valid.

Raises:
ValueError: in case at least one of the paths is invalid.

Notes:
Can raise an exception if Java Development Kit is not installed or not properly configured.

Examples:
>>> try:
... StanfordPOSTagger.check('path/to/model', 'path/to/stanford.jar')
... except ValueError as e:
... print(e)
Could not find stanford-postagger.jar jar file at path/to/stanford.jar

"""
try:
cls(path_to_model, path_to_jar).tag(())
except OSError as e:
raise StanfordPOSTaggerError(
'Either Java SDK not installed or some of the '
'files are invalid.\n' + str(e))
except LookupError as e:
raise StanfordPOSTaggerError(str(e).strip(' =\n'))


class AveragedPerceptronTagger(POSTagger):
name = 'Averaged Perceptron Tagger'

Expand Down
11 changes: 0 additions & 11 deletions orangecontrib/text/tests/test_tags.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import pickle
import copy
import tempfile
import unittest

from orangecontrib.text import tag
from orangecontrib.text.corpus import Corpus
from orangecontrib.text.tag.pos import StanfordPOSTaggerError


class POSTaggerTests(unittest.TestCase):
Expand All @@ -19,15 +17,6 @@ def test_POSTagger(self):
for tokens, tags in zip(result.tokens, result.pos_tags):
self.assertEqual(len(tokens), len(tags))

def test_Stanford_check(self):
model = tempfile.NamedTemporaryFile()
resource = tempfile.NamedTemporaryFile()
with self.assertRaises(StanfordPOSTaggerError):
tag.StanfordPOSTagger.check(model.name, resource.name)

with self.assertRaises(StanfordPOSTaggerError):
tag.StanfordPOSTagger.check('model', resource.name)

def test_str(self):
self.assertEqual('Averaged Perceptron Tagger', str(self.tagger))

Expand Down
6 changes: 1 addition & 5 deletions orangecontrib/text/widgets/owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from orangecontrib.text.preprocess.normalize import UDPipeStopIteration
from orangecontrib.text.tag import AveragedPerceptronTagger, MaxEntTagger, \
POSTagger
from orangecontrib.text.tag.pos import StanfordPOSTaggerError

_DEFAULT_NONE = "(none)"

Expand Down Expand Up @@ -973,7 +972,7 @@ def createinstance(params: Dict) -> NGrams:


class POSTaggingModule(SingleMethodModule):
Averaged, MaxEnt, Stanford = range(3)
Averaged, MaxEnt = range(2)
Methods = {Averaged: AveragedPerceptronTagger,
MaxEnt: MaxEntTagger}
DEFAULT_METHOD = Averaged
Expand Down Expand Up @@ -1041,7 +1040,6 @@ class Error(Orange.widgets.data.owpreprocess.OWPreprocess.Error):
file_not_found = Msg("File not found.")
invalid_encoding = Msg("Invalid file encoding. Please save the "
"file as UTF-8 and try again.")
stanford_tagger = Msg("Problem loading Stanford POS Tagger:\n{}")

class Warning(Orange.widgets.data.owpreprocess.OWPreprocess.Warning):
no_token_left = Msg("No tokens on the output.")
Expand Down Expand Up @@ -1162,8 +1160,6 @@ def apply(self):
self.Error.file_not_found()
except UnicodeError as e:
self.Error.invalid_encoding(e)
except StanfordPOSTaggerError as e:
self.Error.stanford_tagger(e)
except Exception as e:
self.Error.unknown_error(str(e))

Expand Down
Loading