From d348f08121bec1579850a6fba5b80fb8dfc83f6f Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Sat, 6 Jan 2024 23:35:35 +0800 Subject: [PATCH] Utils: Update custom stop word lists --- CHANGELOG.md | 3 +++ tests/tests_nlp/test_stop_word_lists.py | 3 --- wordless/wl_settings/wl_settings_default.py | 10 ++++++++-- wordless/wl_settings/wl_settings_global.py | 10 ++++++++++ wordless/wl_settings/wl_settings_stop_word_lists.py | 9 ++++----- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 196eb7068..0ff27e615 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ - Utils: Add VADER's sentiment analyzers - Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic +### ✨ Improvements +- Utils: Update custom stop word lists + ### 📌 Bugfixes - Utils: Fix downloading of Stanza models - Work Area: Fix Dependency Parser - analysis of files whose first token is a punctuation mark diff --git a/tests/tests_nlp/test_stop_word_lists.py b/tests/tests_nlp/test_stop_word_lists.py index dc757b873..95413c77a 100644 --- a/tests/tests_nlp/test_stop_word_lists.py +++ b/tests/tests_nlp/test_stop_word_lists.py @@ -29,9 +29,6 @@ for stop_word_list in stop_word_lists: test_stop_word_lists.append((lang, stop_word_list)) - # Add custom lists - test_stop_word_lists.append((lang, 'custom')) - @pytest.mark.parametrize('lang, stop_word_list', test_stop_word_lists) def test_get_stop_word_list(lang, stop_word_list): stop_words = wl_stop_word_lists.wl_get_stop_word_list(main, lang, stop_word_list = stop_word_list) diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index a677251f2..e02babf6c 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -20,6 +20,7 @@ from PyQt5.QtCore import QCoreApplication from PyQt5.QtWidgets import QDesktopWidget +from wordless.wl_settings import wl_settings_global from wordless.wl_tagsets import ( wl_tagset_universal, wl_tagset_eng_penn_treebank, @@ -2340,7 +2341,12 @@ def init_settings_default(main): settings_default['pos_tagging']['tagsets']['preview_settings']['preview_pos_tagger'] = settings_default['pos_tagging']['pos_tagger_settings']['pos_taggers'] # Custom stop word lists - for lang in settings_default['stop_word_lists']['stop_word_list_settings']: - settings_default['stop_word_lists']['custom_lists'][lang] = [] + for lang in wl_settings_global.SETTINGS_GLOBAL['langs'].values(): + lang_code = lang[0] + + if lang_code not in settings_default['stop_word_lists']['stop_word_list_settings']: + settings_default['stop_word_lists']['stop_word_list_settings'][lang_code] = 'custom' + + settings_default['stop_word_lists']['custom_lists'][lang_code] = [] return settings_default diff --git a/wordless/wl_settings/wl_settings_global.py b/wordless/wl_settings/wl_settings_global.py index 798d862ab..9091177c6 100644 --- a/wordless/wl_settings/wl_settings_global.py +++ b/wordless/wl_settings/wl_settings_global.py @@ -4054,3 +4054,13 @@ ''' } } + +# Custom stop word lists (preserve order of language names) +stop_word_lists = SETTINGS_GLOBAL['stop_word_lists'].copy() + +SETTINGS_GLOBAL['stop_word_lists'].clear() + +for lang in SETTINGS_GLOBAL['langs'].values(): + lang_code = lang[0] + + SETTINGS_GLOBAL['stop_word_lists'][lang_code] = stop_word_lists.get(lang_code, []) + ['custom'] diff --git a/wordless/wl_settings/wl_settings_stop_word_lists.py b/wordless/wl_settings/wl_settings_stop_word_lists.py index ff77acceb..512fdfa29 100644 --- a/wordless/wl_settings/wl_settings_stop_word_lists.py +++ b/wordless/wl_settings/wl_settings_stop_word_lists.py @@ -63,8 +63,7 @@ def __init__(self, main): items = list(wl_nlp_utils.to_lang_util_texts( self.main, util_type = 'stop_word_lists', - # Add custom lists - util_codes = self.settings_global[lang] + ['custom'] + util_codes = self.settings_global[lang] )), col = 1 )) @@ -125,7 +124,7 @@ def preview_settings_changed(self): self.settings_custom['preview']['preview_lang'] = wl_conversion.to_lang_code(self.main, self.combo_box_preview_lang.currentText()) def preview_results_changed(self): - row = list(self.settings_global.keys()).index(self.settings_custom['preview']['preview_lang']) + row = list(self.settings_global).index(self.settings_custom['preview']['preview_lang']) lang = wl_conversion.to_lang_code(self.main, self.combo_box_preview_lang.currentText()) list_stop_words = wl_nlp_utils.to_lang_util_code( self.main, @@ -153,7 +152,7 @@ def load_settings(self, defaults = False): self.table_stop_word_lists.disable_updates() - for i, lang in enumerate(settings['stop_word_list_settings']): + for i, lang in enumerate(self.settings_global): self.table_stop_word_lists.model().item(i, 1).setText(wl_nlp_utils.to_lang_util_text( self.main, util_type = 'stop_word_lists', @@ -171,7 +170,7 @@ def load_settings(self, defaults = False): self.combo_box_preview_lang.currentTextChanged.emit(self.combo_box_preview_lang.currentText()) def apply_settings(self): - for i, lang in enumerate(self.settings_custom['stop_word_list_settings']): + for i, lang in enumerate(self.settings_global): self.settings_custom['stop_word_list_settings'][lang] = wl_nlp_utils.to_lang_util_code( self.main, util_type = 'stop_word_lists',