Skip to content

Commit

Permalink
Preprocess widget - temp
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Apr 21, 2023
1 parent 8599d19 commit 6459369
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 69 deletions.
31 changes: 22 additions & 9 deletions orangecontrib/text/widgets/owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,21 @@
import pkg_resources

from AnyQt.QtCore import Qt, pyqtSignal
from AnyQt.QtWidgets import QComboBox, QButtonGroup, QLabel, QCheckBox, \
QRadioButton, QGridLayout, QLineEdit, QSpinBox, QFormLayout, QHBoxLayout, \
QDoubleSpinBox, QFileDialog, QAbstractSpinBox
from AnyQt.QtWidgets import (
QComboBox,
QButtonGroup,
QLabel,
QCheckBox,
QRadioButton,
QGridLayout,
QLineEdit,
QSpinBox,
QFormLayout,
QHBoxLayout,
QDoubleSpinBox,
QFileDialog,
QAbstractSpinBox,
)
from AnyQt.QtWidgets import QWidget, QPushButton, QSizePolicy, QStyle
from AnyQt.QtGui import QBrush, QValidator

Expand All @@ -17,8 +29,12 @@
import Orange.widgets.data.owpreprocess
from Orange.widgets import gui
from Orange.widgets.data.owpreprocess import PreprocessAction, Description
from Orange.widgets.data.utils.preprocess import ParametersRole, \
DescriptionRole, BaseEditor, StandardItemModel
from Orange.widgets.data.utils.preprocess import (
ParametersRole,
DescriptionRole,
BaseEditor,
StandardItemModel,
)
from Orange.widgets.settings import Setting
from Orange.widgets.utils.concurrent import TaskState, ConcurrentWidgetMixin
from Orange.widgets.widget import Input, Output, Msg, Message
Expand All @@ -29,7 +45,6 @@
from orangecontrib.text.preprocess.normalize import UDPipeStopIteration
from orangecontrib.text.tag import AveragedPerceptronTagger, MaxEntTagger, \
POSTagger
from orangecontrib.text.tag.pos import StanfordPOSTaggerError

_DEFAULT_NONE = "(none)"

Expand Down Expand Up @@ -446,7 +461,7 @@ def __init__(self, parent=None, **kwargs):
checked=self.DEFAULT_USE_TOKE)
self.__check_use.clicked.connect(self.__set_use_tokenizer)
self.__combo_lemm = ComboBox(
self, LemmagenLemmatizer.lemmagen_languages,
self, LemmagenLemmatizer.supported_languages,
self.__lemmagen_lang, self.__set_lemmagen_lang
)

Expand Down Expand Up @@ -1162,8 +1177,6 @@ def apply(self):
self.Error.file_not_found()
except UnicodeError as e:
self.Error.invalid_encoding(e)
except StanfordPOSTaggerError as e:
self.Error.stanford_tagger(e)
except Exception as e:
self.Error.unknown_error(str(e))

Expand Down
102 changes: 42 additions & 60 deletions orangecontrib/text/widgets/tests/test_owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ def test_migrate_settings_normalize(self):
"udpipe_tokenizer": True}}
widget = self.create_widget(OWPreprocess, stored_settings=settings)
params = [("preprocess.normalize",
{"method": 2, "snowball_language": "French",
"udpipe_language": "German", "udpipe_tokenizer": True})]
{"method": 2, "udpipe_tokenizer": True})]
self.assertEqual(widget.storedsettings["preprocessors"], params)

def test_migrate_settings_filter(self):
Expand All @@ -180,7 +179,7 @@ def test_migrate_settings_filter(self):
"use_df": False, "use_keep_n": False}}
widget = self.create_widget(OWPreprocess, stored_settings=settings)
params = [("preprocess.filter",
{"methods": [0, 2, 4], "language": "Finnish",
{"methods": [0, 2, 4],
"sw_path": None, "sw_list": [],
"lx_path": None, "lx_list": [],
"pattern": "foo", "rel_start": 0.3,
Expand Down Expand Up @@ -365,29 +364,18 @@ def test_init(self):
self.assertTrue(self.buttons[0].isChecked())
for i in range(1, 4):
self.assertFalse(self.buttons[i].isChecked())
self.assertEqual(self.combo_sbl.currentText(), "English")
self.assertEqual(self.combo_udl.currentText(), "English")
self.assertFalse(self.check_use.isChecked())

def test_parameters(self):
params = {"method": NormalizationModule.Porter,
"snowball_language": "English",
"udpipe_language": "English",
"lemmagen_language": "English",
"udpipe_tokenizer": False}
self.assertDictEqual(self.editor.parameters(), params)

def test_set_parameters(self):
params = {"method": NormalizationModule.UDPipe,
"snowball_language": "Dutch",
"udpipe_language": "Slovenian",
"lemmagen_language": "Bulgarian",
"udpipe_tokenizer": True}
self.editor.setParameters(params)
self.assertDictEqual(self.editor.parameters(), params)
self.assertEqual(self.combo_sbl.currentText(), "Dutch")
self.assertEqual(self.combo_udl.currentText(), "Slovenian")
self.assertEqual(self.combo_lemm.currentText(), "Bulgarian")
self.assertTrue(self.check_use.isChecked())

def test_createinstance(self):
Expand All @@ -397,20 +385,11 @@ def test_createinstance(self):
params = {"method": NormalizationModule.Snowball}
pp = self.editor.createinstance(params)
self.assertIsInstance(pp, SnowballStemmer)
self.assertIn("<EnglishStemmer>", str(pp.normalizer))

params = {"method": NormalizationModule.Snowball,
"snowball_language": "Dutch"}
pp = self.editor.createinstance(params)
self.assertIsInstance(pp, SnowballStemmer)
self.assertIn("<DutchStemmer>", str(pp.normalizer))

params = {"method": NormalizationModule.UDPipe,
"udpipe_language": "Finnish",
"udpipe_tokenizer": True}
pp = self.editor.createinstance(params)
self.assertIsInstance(pp, UDPipeLemmatizer)
self.assertEqual(pp._UDPipeLemmatizer__language, "Finnish")
self.assertEqual(pp._UDPipeLemmatizer__use_tokenizer, True)

def test_repr(self):
Expand All @@ -424,8 +403,6 @@ def test_udpipe_no_models(self):
editor = NormalizationModule()
button = editor._SingleMethodModule__group.button(editor.UDPipe)
self.assertFalse(button.isEnabled())
combo = editor._NormalizationModule__combo_udl
self.assertFalse(combo.isEnabled())
check = editor._NormalizationModule__check_use
self.assertFalse(check.isEnabled())

Expand All @@ -438,10 +415,6 @@ def setUp(self):
def check_boxes(self):
return [cb for i, cb in self.editor._MultipleMethodModule__cbs]

@property
def combo(self):
return self.editor._FilteringModule__combo

@property
def sw_combo(self):
return self.editor._FilteringModule__sw_loader.file_combo
Expand Down Expand Up @@ -477,7 +450,6 @@ def test_init(self):
for i in range(1, len(check_boxes)):
self.assertFalse(check_boxes[i].isChecked())
self.assertGreater(len(check_boxes[i].toolTip()), 0)
self.assertEqual(self.combo.currentText(), "English")
self.assertEqual(self.sw_combo.currentText(), "(none)")
self.assertEqual(self.lx_combo.currentText(), "(none)")
self.assertEqual(self.line_edit.text(), FilteringModule.DEFAULT_PATTERN)
Expand All @@ -490,32 +462,47 @@ def test_init(self):
self.assertEqual(self.spin.value(), 100)

def test_parameters(self):
params = {"methods": [FilteringModule.Stopwords],
"language": "English", "sw_path": None, "lx_path": None,
"sw_list": [], "lx_list": [],
"incl_num": False,
"pattern": FilteringModule.DEFAULT_PATTERN,
"freq_type": 0,
"rel_start": 0.1, "rel_end": 0.9,
"abs_start": 1, "abs_end": 10,
"n_tokens": 100, "pos_tags": "NOUN,VERB",
"invalidated": False}
params = {
"methods": [FilteringModule.Stopwords],
"sw_path": None,
"lx_path": None,
"use_default_sw": True,
"sw_list": [],
"lx_list": [],
"incl_num": False,
"pattern": FilteringModule.DEFAULT_PATTERN,
"freq_type": 0,
"rel_start": 0.1,
"rel_end": 0.9,
"abs_start": 1,
"abs_end": 10,
"n_tokens": 100,
"pos_tags": "NOUN,VERB",
"invalidated": False,
}
self.assertDictEqual(self.editor.parameters(), params)

def test_set_parameters(self):
sw_path = RecentPath.create("Foo", [])
lx_path = RecentPath.create("Bar", [])
params = {"methods": [FilteringModule.Lexicon, FilteringModule.Regexp],
"language": "Finnish",
"sw_path": sw_path, "lx_path": lx_path,
"sw_list": [sw_path], "lx_list": [lx_path],
"incl_num": False,
"pattern": "foo",
"freq_type": 1,
"rel_start": 0.2, "rel_end": 0.7,
"abs_start": 2, "abs_end": 15,
"n_tokens": 10, "pos_tags": "JJ",
"invalidated": False}
params = {
"methods": [FilteringModule.Lexicon, FilteringModule.Regexp],
"use_default_sw": True,
"sw_path": sw_path,
"lx_path": lx_path,
"sw_list": [sw_path],
"lx_list": [lx_path],
"incl_num": False,
"pattern": "foo",
"freq_type": 1,
"rel_start": 0.2,
"rel_end": 0.7,
"abs_start": 2,
"abs_end": 15,
"n_tokens": 10,
"pos_tags": "JJ",
"invalidated": False,
}
self.editor.setParameters(params)
self.assertDictEqual(self.editor.parameters(), params)

Expand All @@ -527,7 +514,6 @@ def test_set_parameters(self):
self.assertFalse(check_boxes[4].isChecked())
self.assertFalse(check_boxes[5].isChecked())

self.assertEqual(self.combo.currentText(), "Finnish")
self.assertEqual(self.sw_combo.currentText(), "Foo")
self.assertEqual(self.lx_combo.currentText(), "Bar")
self.assertEqual(self.line_edit.text(), "foo")
Expand All @@ -550,10 +536,10 @@ def test_createinstance(self):
self.assertIsInstance(pp[1], MostFrequentTokensFilter)

def test_repr(self):
self.assertEqual(str(self.editor),
"Stopwords (Language: English, File: None)")
params = {"methods": [FilteringModule.Lexicon,
FilteringModule.Regexp]}
self.assertEqual(
str(self.editor), "Stopwords (Use default stopwords: Yes, File: None)"
)
params = {"methods": [FilteringModule.Lexicon, FilteringModule.Regexp]}
self.editor.setParameters(params)
self.assertEqual(
str(self.editor),
Expand Down Expand Up @@ -646,10 +632,6 @@ def test_createinstance(self):
pp = self.editor.createinstance({"method": POSTaggingModule.MaxEnt})
self.assertIsInstance(pp, MaxEntTagger)

# TODO - implement StanfordPOSTagger
# pp = self.editor.createinstance({"method": POSTaggingModule.Stanford})
# self.assertIsInstance(pp, StanfordPOSTagger)

def test_repr(self):
self.assertEqual(str(self.editor), "Averaged Perceptron Tagger")

Expand Down

0 comments on commit 6459369

Please sign in to comment.