diff --git a/orangecontrib/text/widgets/owpreprocess.py b/orangecontrib/text/widgets/owpreprocess.py index 8f939612b..592c85c85 100644 --- a/orangecontrib/text/widgets/owpreprocess.py +++ b/orangecontrib/text/widgets/owpreprocess.py @@ -12,6 +12,8 @@ from AnyQt.QtGui import QBrush, QValidator from Orange.util import wrap_callback +from orangecanvas.gui.utils import disconnected +from orangewidget.settings import SettingsHandler from orangewidget.utils.filedialogs import RecentPath import Orange.widgets.data.owpreprocess @@ -113,7 +115,8 @@ def set_current_language(self, iso_language: Optional[str]): The ISO language code of element to be selected. """ index = self.findData(iso_language) - self.setCurrentIndex(index) + if index >= 0: + self.setCurrentIndex(index) class UDPipeComboBox(LanguageComboBox): @@ -131,15 +134,9 @@ def items(self) -> List: def add_items(self, _, include_none: bool, language: str): self.__items = self.items super().add_items(self.__items, include_none, language) - - def set_current_language(self, iso_language: Optional[str]): iso_items = {iso for _, iso in self.__items} - if iso_language in iso_items: - super().set_current_language(iso_language) - elif self.__default_lang in iso_items: + if language not in iso_items and self.__default_lang in iso_items: super().set_current_language(self.__default_lang) - elif self.__items: - self.setCurrentIndex(0) def showPopup(self): if self.__items != self.items: @@ -538,13 +535,13 @@ def __enable_udpipe(self): def setParameters(self, params: Dict): super().setParameters(params) snowball_lang = params.get("snowball_language", self.DEFAULT_LANGUAGE) - self.__set_snowball_lang(snowball_lang) + self.__combo_sbl.set_current_language(snowball_lang) udpipe_lang = params.get("udpipe_language", self.DEFAULT_LANGUAGE) - self.__set_udpipe_lang(udpipe_lang) + self.__combo_udl.set_current_language(udpipe_lang) use_tokenizer = params.get("udpipe_tokenizer", self.DEFAULT_USE_TOKE) self.__set_use_tokenizer(use_tokenizer) lemmagen_lang = params.get("lemmagen_language", self.DEFAULT_LANGUAGE) - self.__set_lemmagen_lang(lemmagen_lang) + self.__combo_lemm.set_current_language(lemmagen_lang) def _set_method(self, method: int): super()._set_method(method) @@ -553,7 +550,6 @@ def _set_method(self, method: int): def __set_snowball_lang(self, language: str): if self.__snowball_lang != language: self.__snowball_lang = language - self.__combo_sbl.set_current_language(language) self.changed.emit() if self.method == self.Snowball: self.edited.emit() @@ -561,7 +557,6 @@ def __set_snowball_lang(self, language: str): def __set_udpipe_lang(self, language: str): if self.__udpipe_lang != language: self.__udpipe_lang = language - self.__combo_udl.set_current_language(language) self.changed.emit() if self.method == self.UDPipe: self.edited.emit() @@ -569,7 +564,6 @@ def __set_udpipe_lang(self, language: str): def __set_lemmagen_lang(self, language: str): if self.__lemmagen_lang != language: self.__lemmagen_lang = language - self.__combo_lemm.set_current_language(language) self.changed.emit() if self.method == self.Lemmagen: self.edited.emit() @@ -795,7 +789,7 @@ def __spin_n_edited(self): def setParameters(self, params: Dict): super().setParameters(params) - self.__set_language(params.get("language", self.DEFAULT_LANG)) + self.__combo.set_current_language(params.get("language", self.DEFAULT_LANG)) self.__set_sw_path(params.get("sw_path", self.DEFAULT_NONE), params.get("sw_list", [])) self.__set_lx_path(params.get("lx_path", self.DEFAULT_NONE), @@ -820,7 +814,6 @@ def setParameters(self, params: Dict): def __set_language(self, language: Optional[str]): if self.__sw_lang != language: self.__sw_lang = language - self.__combo.set_current_language(language) self.changed.emit() if self.Stopwords in self.methods: self.edited.emit() @@ -1045,6 +1038,21 @@ def createinstance(params: Dict) -> POSTagger: return POSTaggingModule.Methods[method]() +class PreprocessSettingsHandler(SettingsHandler): + """ + Settings handler, that makes all language settings, which are + a part of common preprocess settings, schema_only. It removes them when + settings are not loaded from schema but from common settings. + """ + def _remove_schema_only(self, settings_dict): + super()._remove_schema_only(settings_dict) + for setting, data, _ in self.provider.traverse_settings(data=settings_dict): + for pp_name, settings in data["storedsettings"]["preprocessors"]: + for key in list(settings): + if "language" in key: + settings.pop(key) + + PREPROCESS_ACTIONS = [ PreprocessAction( "Transformation", "preprocess.transform", "", @@ -1128,12 +1136,14 @@ class Warning(Orange.widgets.data.owpreprocess.OWPreprocess.Warning): ("preprocess.tokenize", {}), ("preprocess.filter", {})] } # type: Dict[str, List[Tuple[str, Dict]]] + settingsHandler = PreprocessSettingsHandler() storedsettings = Setting(DEFAULT_PP) buttons_area_orientation = Qt.Vertical def __init__(self): ConcurrentWidgetMixin.__init__(self) Orange.widgets.data.owpreprocess.OWPreprocess.__init__(self) + self.__store_pending_languages() box = gui.vBox(self.controlArea, "Preview") self.preview = "" @@ -1151,6 +1161,16 @@ def load(self, saved: Dict) -> StandardItemModel: saved["preprocessors"][i] = (name, params) return super().load(saved) + def set_model(self, pmodel): + """Connect signal which handle setting language from corpus""" + super().set_model(pmodel) + if pmodel: + pmodel.rowsInserted.connect(self.__on_item_inserted) + + def __on_item_inserted(self, _, first: int, last: int): + assert first == last + self.__set_languages_single_editor(first) + def __update_filtering_params(self, params: Dict): params["sw_path"] = self.__relocate_file(params.get("sw_path")) params["sw_list"] = self.__relocate_files(params.get("sw_list", [])) @@ -1180,6 +1200,49 @@ def __relocate_file(self, path: RecentPath) -> RecentPath: def set_data(self, data: Corpus): self.cancel() self.data = data + self.__set_languages() + + LANG_PARAMS = { + "preprocess.normalize": [ + "snowball_language", + "udpipe_language", + "lemmagen_language" + ], + "preprocess.filter": ["language"], + } + + def __store_pending_languages(self): + settings = self.storedsettings["preprocessors"] + self.__pending_languages = { + pp_name: {p for p in par if "language" in p} for pp_name, par in settings + } + + def __set_languages(self): + if self.data is not None: + for i in range(self.preprocessormodel.rowCount()): + self.__set_languages_single_editor(i) + self.__pending_languages = {} + + def __set_languages_single_editor(self, item_index: int): + """ + Set language from corpus for single editor/module, + keep language unchanged if it comes from schema (pending). + """ + if self.data and self.data.language: + model = self.preprocessormodel + item = model.item(item_index) + pp_name = item.data(DescriptionRole).qualname + params = item.data(ParametersRole) + pending = self.__pending_languages.get(pp_name, set()) + for param in self.LANG_PARAMS.get(pp_name, []): + if param not in pending: + # set language if not in pending - if pending it is means + # that it came from schema and should not be changed + params[param] = self.data.language + with disconnected(model.dataChanged, self.__on_modelchanged): + # disconnection prevent double apply call, it is already called + # on new data and when row inserted, both caller of this method + item.setData(params, ParametersRole) def buildpreproc(self) -> PreprocessorList: plist = [] diff --git a/orangecontrib/text/widgets/tests/test_owpreprocess.py b/orangecontrib/text/widgets/tests/test_owpreprocess.py index 49ccb1229..02c53ef66 100644 --- a/orangecontrib/text/widgets/tests/test_owpreprocess.py +++ b/orangecontrib/text/widgets/tests/test_owpreprocess.py @@ -3,6 +3,8 @@ import numpy as np from Orange.data import Domain, StringVariable +from Orange.widgets.data.utils.preprocess import DescriptionRole, ParametersRole +from PyQt6.QtGui import QStandardItem, QIcon from orangewidget.utils.filedialogs import RecentPath from Orange.widgets.tests.base import WidgetTest from Orange.widgets.tests.utils import simulate @@ -180,6 +182,123 @@ def test_no_tokens_left(self): self.wait_until_finished() self.assertFalse(self.widget.Warning.no_token_left.is_shown()) + def test_language_from_corpus(self): + """Test language from corpus is set correctly""" + initial = { + "name": "", + "preprocessors": [("preprocess.normalize", {}), ("preprocess.filter", {})], + } + self.widget.storedsettings = initial + self.widget._initialize() + self.assertDictEqual(initial, self.widget.storedsettings) + combos = self.widget.mainArea.findChildren(LanguageComboBox) + self.assertEqual( + ["English", "English", "English", "English"], + [c.currentText() for c in combos] + ) + + # test with Slovenian - language should set for all preprocessors except + # Snowball that doesn't support Slovenian + self.corpus.attributes["language"] = "sl" + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.assertEqual( + ["English", "Slovenian", "Slovenian", "Slovenian"], + [c.currentText() for c in combos] + ) + + # test with Dutch that is support by two preprocessors + self.corpus.attributes["language"] = "nl" + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.assertEqual( + ["Dutch", "Slovenian", "Slovenian", "Dutch"], + [c.currentText() for c in combos] + ) + + # language not supported by any preprocessor - language shouldn't change + self.corpus.attributes["language"] = "bo" + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.assertEqual( + ["Dutch", "Slovenian", "Slovenian", "Dutch"], + [c.currentText() for c in combos] + ) + + # test with missing language - language shouldn't change + self.corpus.attributes["language"] = None + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.assertEqual( + ["Dutch", "Slovenian", "Slovenian", "Dutch"], + [c.currentText() for c in combos] + ) + + def test_language_from_schema(self): + """Test language from schema/workflow is retained""" + initial = { + "name": "", + "preprocessors": [ + ( + "preprocess.normalize", + { + "lemmagen_language": "sl", + "snowball_language": "nl", + "udpipe_language": "lt", + }, + ), + ("preprocess.filter", {"language": "nl"}), + ], + } + self.widget.storedsettings = initial + + settings = self.widget.settingsHandler.pack_data(self.widget) + widget = self.create_widget(OWPreprocess, stored_settings=settings) + self.send_signal(widget.Inputs.corpus, self.corpus, widget=widget) + self.assertDictEqual(initial, widget.storedsettings) + combos = widget.mainArea.findChildren(LanguageComboBox) + self.assertEqual( + ["Dutch", "Lithuanian", "Slovenian", "Dutch"], + [c.currentText() for c in combos] + ) + + def test_language_from_corpus_editor_inserted(self): + """Test language from corpus is set to new editor too""" + initial = { + "name": "", + "preprocessors": [("preprocess.filter", {})], + } + self.widget.storedsettings = initial + self.widget._initialize() + self.assertDictEqual(initial, self.widget.storedsettings) + combos = self.widget.mainArea.findChildren(LanguageComboBox) + self.assertEqual( + ["English"], + [c.currentText() for c in combos] + ) + + # insert data - language of stopwords combo should change to italian + self.corpus.attributes["language"] = "sl" + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.assertEqual( + ["Slovenian"], + [c.currentText() for c in combos] + ) + + # insert new editor - all languages except snowball should be set to Slovenian + pp_def = self.widget._qname2ppdef["preprocess.normalize"] + description = pp_def.description + item = QStandardItem(description.title) + icon = QIcon(description.icon) + item.setIcon(icon) + item.setToolTip(description.summary) + item.setData(pp_def, DescriptionRole) + item.setData({}, ParametersRole) + self.widget.preprocessormodel.insertRow(0, [item]) + self.wait_until_finished() + + combos = self.widget.mainArea.findChildren(LanguageComboBox) + self.assertEqual( + ['Slovenian', 'English', 'Slovenian', 'Slovenian'], + [c.currentText() for c in combos] + ) + @patch(SF_LIST, new=Mock(return_value=SERVER_FILES)) class TestOWPreprocessMigrateSettings(WidgetTest): @@ -983,14 +1102,20 @@ def test_set_current_language(self): self.assertEqual("Portuguese", cb.currentText()) cb.set_current_language("sl") self.assertEqual("Slovenian", cb.currentText()) - cb.set_current_language("abc") # should set to default - self.assertEqual("English", cb.currentText()) + cb.set_current_language("abc") # language not in list - keep current seleciton + self.assertEqual("Slovenian", cb.currentText()) + + def test_set_language_to_default(self): + """In case current item not in dropdown anymore set language to default""" + mock = Mock() + cb = UDPipeComboBox(None, "pt", "en", mock) + self.assertEqual("Portuguese", cb.currentText()) # when no default language in the dropdown set to first cb.removeItem(0) x = cb._UDPipeComboBox__items cb._UDPipeComboBox__items = x[:3] + x[4:] - cb.set_current_language("abc") - self.assertEqual("English (lines)", cb.currentText()) + cb.showPopup() + self.assertEqual("English", cb.currentText()) def test_change_item(self): mock = Mock()