Skip to content

Commit

Permalink
Utils: Update custom stop word lists
Browse files Browse the repository at this point in the history
  • Loading branch information
BLKSerene committed Jan 6, 2024
1 parent 815cc98 commit d348f08
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
- Utils: Add VADER's sentiment analyzers
- Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic

### ✨ Improvements
- Utils: Update custom stop word lists

### 📌 Bugfixes
- Utils: Fix downloading of Stanza models
- Work Area: Fix Dependency Parser - analysis of files whose first token is a punctuation mark
Expand Down
3 changes: 0 additions & 3 deletions tests/tests_nlp/test_stop_word_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@
for stop_word_list in stop_word_lists:
test_stop_word_lists.append((lang, stop_word_list))

# Add custom lists
test_stop_word_lists.append((lang, 'custom'))

@pytest.mark.parametrize('lang, stop_word_list', test_stop_word_lists)
def test_get_stop_word_list(lang, stop_word_list):
stop_words = wl_stop_word_lists.wl_get_stop_word_list(main, lang, stop_word_list = stop_word_list)
Expand Down
10 changes: 8 additions & 2 deletions wordless/wl_settings/wl_settings_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from PyQt5.QtCore import QCoreApplication
from PyQt5.QtWidgets import QDesktopWidget

from wordless.wl_settings import wl_settings_global
from wordless.wl_tagsets import (
wl_tagset_universal,
wl_tagset_eng_penn_treebank,
Expand Down Expand Up @@ -2340,7 +2341,12 @@ def init_settings_default(main):
settings_default['pos_tagging']['tagsets']['preview_settings']['preview_pos_tagger'] = settings_default['pos_tagging']['pos_tagger_settings']['pos_taggers']

# Custom stop word lists
for lang in settings_default['stop_word_lists']['stop_word_list_settings']:
settings_default['stop_word_lists']['custom_lists'][lang] = []
for lang in wl_settings_global.SETTINGS_GLOBAL['langs'].values():
lang_code = lang[0]

if lang_code not in settings_default['stop_word_lists']['stop_word_list_settings']:
settings_default['stop_word_lists']['stop_word_list_settings'][lang_code] = 'custom'

settings_default['stop_word_lists']['custom_lists'][lang_code] = []

return settings_default
10 changes: 10 additions & 0 deletions wordless/wl_settings/wl_settings_global.py
Original file line number Diff line number Diff line change
Expand Up @@ -4054,3 +4054,13 @@
'''
}
}

# Custom stop word lists (preserve order of language names)
stop_word_lists = SETTINGS_GLOBAL['stop_word_lists'].copy()

SETTINGS_GLOBAL['stop_word_lists'].clear()

for lang in SETTINGS_GLOBAL['langs'].values():
lang_code = lang[0]

SETTINGS_GLOBAL['stop_word_lists'][lang_code] = stop_word_lists.get(lang_code, []) + ['custom']
9 changes: 4 additions & 5 deletions wordless/wl_settings/wl_settings_stop_word_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def __init__(self, main):
items = list(wl_nlp_utils.to_lang_util_texts(
self.main,
util_type = 'stop_word_lists',
# Add custom lists
util_codes = self.settings_global[lang] + ['custom']
util_codes = self.settings_global[lang]
)),
col = 1
))
Expand Down Expand Up @@ -125,7 +124,7 @@ def preview_settings_changed(self):
self.settings_custom['preview']['preview_lang'] = wl_conversion.to_lang_code(self.main, self.combo_box_preview_lang.currentText())

def preview_results_changed(self):
row = list(self.settings_global.keys()).index(self.settings_custom['preview']['preview_lang'])
row = list(self.settings_global).index(self.settings_custom['preview']['preview_lang'])
lang = wl_conversion.to_lang_code(self.main, self.combo_box_preview_lang.currentText())
list_stop_words = wl_nlp_utils.to_lang_util_code(
self.main,
Expand Down Expand Up @@ -153,7 +152,7 @@ def load_settings(self, defaults = False):

self.table_stop_word_lists.disable_updates()

for i, lang in enumerate(settings['stop_word_list_settings']):
for i, lang in enumerate(self.settings_global):
self.table_stop_word_lists.model().item(i, 1).setText(wl_nlp_utils.to_lang_util_text(
self.main,
util_type = 'stop_word_lists',
Expand All @@ -171,7 +170,7 @@ def load_settings(self, defaults = False):
self.combo_box_preview_lang.currentTextChanged.emit(self.combo_box_preview_lang.currentText())

def apply_settings(self):
for i, lang in enumerate(self.settings_custom['stop_word_list_settings']):
for i, lang in enumerate(self.settings_global):
self.settings_custom['stop_word_list_settings'][lang] = wl_nlp_utils.to_lang_util_code(
self.main,
util_type = 'stop_word_lists',
Expand Down

0 comments on commit d348f08

Please sign in to comment.