Skip to content

Commit

Permalink
Settings: Add Settings - Tables - Dependency Parser; Work Area: Depen…
Browse files Browse the repository at this point in the history
…dency Parser - Sentence - Highlight heads and dependents
  • Loading branch information
BLKSerene committed May 17, 2024
1 parent 76ab80a commit 875d196
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 209 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
## [3.5.0](https://github.com/BLKSerene/Wordless/releases/tag/3.5.0) - ??/??/2024
### 🎉 New Features
- Settings: Add Settings - Stop Word Lists - Stop Word List Settings - Case-sensitive
- Settings: Add Settings - Tables - Dependency Parser
- Utils: Add Stanza's Sindhi part-of-speech tagger
- Utils: Add VADER's sentiment analyzers
- Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic
Expand All @@ -29,6 +30,7 @@
- Settings: Settings - Part-of-speeach Tagging - Tagsets - Mapping Settings - Allow editing of tagset mapping of spaCy's Catalan, Danish, French, Greek (Modern), Macedonian, Norwegian (Bokmål), Portuguese, Russian, Spanish, and Ukrainian part-of-speech taggers
- Settings: Settings - Part-of-speeach Tagging - Tagsets - Mapping Settings - Allow editing of tagset mapping of Stanza's Armenian (Eastern), Armenian (Western), Basque, Buryat (Russia), Danish, French, Greek (Modern), Hebrew (Modern), Hungarian, Ligurian, Manx, Marathi, Nigerian Pidgin, Pomak, Portuguese, Russian, Sanskrit, Sindhi, Sorbian (Upper), and Telugu part-of-speech taggers
- Utils: Update custom stop word lists
- Work Area: Dependency Parser - Sentence - Highlight heads and dependents

### 📌 Bugfixes
- Utils: Fix downloading of Stanza models
Expand Down
54 changes: 29 additions & 25 deletions doc/doc.md

Large diffs are not rendered by default.

40 changes: 26 additions & 14 deletions tests/tests_checks/test_checks_work_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def test_wl_status_bar_msg_success_generate_table():
def test_wl_status_bar_msg_success_generate_fig():
wl_checks_work_area.wl_status_bar_msg_success_generate_fig(main)

def test_wl_status_bar_msg_success_exp_table():
wl_checks_work_area.wl_status_bar_msg_success_exp_table(main)

def test_wl_status_bar_msg_success_no_results():
wl_checks_work_area.wl_status_bar_msg_success_no_results(main)

Expand All @@ -51,6 +54,9 @@ def test_wl_status_bar_msg_err_download_model():
def test_wl_status_bar_msg_err_fatal():
wl_checks_work_area.wl_status_bar_msg_err_fatal(main)

def test_wl_status_bar_msg_file_access_denied():
wl_checks_work_area.wl_status_bar_msg_file_access_denied(main)

def test_check_search_terms():
assert wl_checks_work_area.check_search_terms(main, {
'multi_search_mode': False,
Expand Down Expand Up @@ -81,37 +87,40 @@ def test_check_nlp_support():
assert wl_checks_work_area.check_nlp_support(
main,
nlp_utils = ['pos_taggers'],
files = [file_eng_us],
test = True
files = [file_eng_us]
)
assert not wl_checks_work_area.check_nlp_support(
main,
nlp_utils = ['pos_taggers'],
files = [file_xxx],
test = True
files = [file_xxx]
)

main.settings_custom['file_area']['files_open'] = [file_eng_us]
main.settings_custom['file_area']['files_open_ref'] = [file_xxx]

assert wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], test = True)
assert not wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], ref = True, test = True)
assert wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'])
assert not wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], ref = True)

def test_check_results():
assert wl_checks_work_area.check_results(main, '', 'test', test = True)
assert not wl_checks_work_area.check_results(main, 'test', '', test = True)
assert wl_checks_work_area.check_results(main, '', 'test')
assert not wl_checks_work_area.check_results(main, 'test', '')

def test_check_results_download_model():
wl_checks_work_area.check_results_download_model(main, '', 'test', test = True)
wl_checks_work_area.check_results_download_model(main, 'test', '', test = True)
wl_checks_work_area.check_results_download_model(main, '', 'test')
wl_checks_work_area.check_results_download_model(main, 'test', '')

def test_check_err_table():
wl_checks_work_area.check_err_table(main, '', test = True)
wl_checks_work_area.check_err_table(main, 'test', test = True)
wl_checks_work_area.check_err_table(main, '')
wl_checks_work_area.check_err_table(main, 'test')

def test_check_err_fig():
wl_checks_work_area.check_err_fig(main, '', test = True)
wl_checks_work_area.check_err_fig(main, 'test', test = True)
wl_checks_work_area.check_err_fig(main, '')
wl_checks_work_area.check_err_fig(main, 'test')

def test_check_err_exp_table():
wl_checks_work_area.check_err_exp_table(main, '', 'test')
wl_checks_work_area.check_err_exp_table(main, 'permission_err', 'test')
wl_checks_work_area.check_err_exp_table(main, 'err', 'test')

if __name__ == '__main__':
test_wl_msg_box_missing_search_terms()
Expand All @@ -122,13 +131,16 @@ def test_check_err_fig():
test_wl_status_bar_msg_missing_search_terms()
test_wl_status_bar_msg_success_generate_table()
test_wl_status_bar_msg_success_generate_fig()
test_wl_status_bar_msg_success_exp_table()
test_wl_status_bar_msg_success_no_results()
test_wl_status_bar_msg_err_download_model()
test_wl_status_bar_msg_err_fatal()
test_wl_status_bar_msg_file_access_denied()

test_check_search_terms()
test_check_nlp_support()
test_check_results()
test_check_results_download_model()
test_check_err_table()
test_check_err_fig()
test_check_err_exp_table()
7 changes: 7 additions & 0 deletions tests/tests_settings/test_settings_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@ def test_wl_settings_tables_parallel_concordancer():
settings_tables_parallel_concordancer.load_settings(defaults = True)
settings_tables_parallel_concordancer.apply_settings()

def test_wl_settings_tables_dependency_parser():
settings_tables_dependency_parser = wl_settings_tables.Wl_Settings_Tables_Dependency_Parser(main)
settings_tables_dependency_parser.load_settings()
settings_tables_dependency_parser.load_settings(defaults = True)
settings_tables_dependency_parser.apply_settings()

if __name__ == '__main__':
test_wl_settings_tables()
test_wl_settings_tables_concordancer()
test_wl_settings_tables_parallel_concordancer()
test_wl_settings_tables_dependency_parser()
71 changes: 42 additions & 29 deletions wordless/wl_checks/wl_checks_work_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def wl_status_bar_msg_success_generate_table(main):
def wl_status_bar_msg_success_generate_fig(main):
main.statusBar().showMessage(_tr('wl_checks_work_area', 'Figure generated successfully.'))

def wl_status_bar_msg_success_exp_table(main):
main.statusBar().showMessage(_tr('wl_checks_work_area', 'Table exported successfully.'))

def wl_status_bar_msg_success_no_results(main):
main.statusBar().showMessage(_tr('wl_checks_work_area', 'No results to display.'))

Expand All @@ -72,6 +75,9 @@ def wl_status_bar_msg_err_download_model(main):
def wl_status_bar_msg_err_fatal(main):
main.statusBar().showMessage(_tr('wl_checks_work_area', 'A fatal error has just occurred!'))

def wl_status_bar_msg_file_access_denied(main):
main.statusBar().showMessage(_tr('wl_checks_work_area', 'File access denied!'))

def check_search_terms(main, search_settings, show_warning = True):
if (
(not search_settings['multi_search_mode'] and search_settings['search_term'])
Expand All @@ -94,7 +100,7 @@ def check_search_terms(main, search_settings, show_warning = True):
'dependency_parsers': _tr('wl_checks_work_area', 'Dependency parsing')
}

def check_nlp_support(main, nlp_utils, files = None, ref = False, test = False):
def check_nlp_support(main, nlp_utils, files = None, ref = False):
support_ok = True
nlp_utils_no_support = []

Expand Down Expand Up @@ -147,29 +153,21 @@ def check_nlp_support(main, nlp_utils, files = None, ref = False, test = False):
)

dialog_err_files.table_err_files.enable_updates()

if test:
dialog_err_files.open()
else:
dialog_err_files.exec_()
dialog_err_files.open()

wl_status_bar_msg_lang_support_unavailable(main)

support_ok = False

return support_ok

def check_results(main, err_msg, results, test = False):
def check_results(main, err_msg, results):
results_ok = True

if err_msg:
results_ok = False

if test:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
else:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).exec_()

wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
wl_status_bar_msg_err_fatal(main)
elif not any(results):
results_ok = False
Expand All @@ -179,7 +177,7 @@ def check_results(main, err_msg, results, test = False):

return results_ok

def check_results_download_model(main, err_msg, model_name = '', test = False):
def check_results_download_model(main, err_msg, model_name = ''):
results_ok = True

try:
Expand All @@ -195,35 +193,50 @@ def check_results_download_model(main, err_msg, model_name = '', test = False):
err_msg = traceback.format_exc()

if err_msg:
if test:
wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg).open()
else:
wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg).exec_()

wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg).open()
wl_status_bar_msg_err_download_model(main)

results_ok = False

return results_ok

def check_err_table(main, err_msg, test = False):
def check_err_table(main, err_msg):
if err_msg:
if test:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
else:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).exec_()

wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
wl_status_bar_msg_err_fatal(main)
else:
wl_status_bar_msg_success_generate_table(main)

def check_err_fig(main, err_msg, test = False):
def check_err_fig(main, err_msg):
if err_msg:
if test:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
else:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).exec_()
wl_status_bar_msg_err_fatal(main)

wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()
wl_status_bar_msg_err_fatal(main)
else:
wl_status_bar_msg_success_generate_fig(main)

def check_err_exp_table(main, err_msg, file_path):
if err_msg:
if err_msg == 'permission_err':
wl_msg_boxes.Wl_Msg_Box_Info(
main,
title = _tr('wl_checks_work_area', 'File Access Denied'),
text = _tr('wl_checks_work_area', '''
<div>Access to "{}" is denied, please specify another location or close the file and try again.</div>
''').format(file_path)
).open()
else:
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open()

wl_status_bar_msg_file_access_denied(main)
else:
wl_msg_boxes.Wl_Msg_Box_Info(
main,
title = _tr('wl_checks_work_area', 'Export Completed'),
text = _tr('wl_checks_work_area', '''
<div>The table has been successfully exported to "{}".</div>
''').format(file_path)
).open()

wl_status_bar_msg_success_exp_table(main)
22 changes: 10 additions & 12 deletions wordless/wl_concordancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,16 @@ def update_gui_table(self, err_msg, concordance_lines):
no_para, len_paras = concordance_line[7]
file_name = concordance_line[8]

# Left
self.setIndexWidget(
self.model().index(i, 0),
wl_labels.Wl_Label_Html(' '.join(left_tokens_raw), self.main)
)
self.indexWidget(self.model().index(i, 0)).setAlignment(Qt.AlignRight | Qt.AlignVCenter)

self.indexWidget(self.model().index(i, 0)).tokens_raw = left_tokens_raw
self.indexWidget(self.model().index(i, 0)).tokens_search = left_tokens_search

# Node
label_node = wl_labels.Wl_Label_Html(
f'''
Expand All @@ -529,23 +539,11 @@ def update_gui_table(self, err_msg, concordance_lines):
)

self.setIndexWidget(self.model().index(i, 1), label_node)

self.indexWidget(self.model().index(i, 1)).setAlignment(Qt.AlignHCenter | Qt.AlignVCenter)

self.indexWidget(self.model().index(i, 1)).tokens_raw = node_tokens_raw
self.indexWidget(self.model().index(i, 1)).tokens_search = node_tokens_search

# Left
self.setIndexWidget(
self.model().index(i, 0),
wl_labels.Wl_Label_Html(' '.join(left_tokens_raw), self.main)
)

self.indexWidget(self.model().index(i, 0)).setAlignment(Qt.AlignRight | Qt.AlignVCenter)

self.indexWidget(self.model().index(i, 0)).tokens_raw = left_tokens_raw
self.indexWidget(self.model().index(i, 0)).tokens_search = left_tokens_search

# Right
self.setIndexWidget(
self.model().index(i, 2),
Expand Down
40 changes: 34 additions & 6 deletions wordless/wl_dependency_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@

from wordless.wl_checks import wl_checks_work_area
from wordless.wl_dialogs import wl_dialogs_misc
from wordless.wl_nlp import wl_dependency_parsing, wl_matching, wl_texts, wl_token_processing
from wordless.wl_nlp import wl_dependency_parsing, wl_matching, wl_token_processing
from wordless.wl_utils import wl_misc, wl_threading
from wordless.wl_widgets import wl_layouts, wl_tables, wl_widgets
from wordless.wl_widgets import wl_labels, wl_layouts, wl_tables, wl_widgets

_tr = QCoreApplication.translate

Expand Down Expand Up @@ -390,18 +390,26 @@ def update_gui_table(self, err_msg, results):
self.model().setItem(i, 0, wl_tables.Wl_Table_Item(head))
# Dependent
self.model().setItem(i, 1, wl_tables.Wl_Table_Item(dependent))

# Dependency Relation
self.model().setItem(i, 2, wl_tables.Wl_Table_Item(dependency_relation))

# Dependency Distance
self.set_item_num(i, 3, dependency_len)
self.set_item_num(i, 4, numpy.abs(dependency_len))

# Sentence
self.model().setItem(i, 5, wl_tables.Wl_Table_Item(' '.join(sentence_tokens_raw)))
self.model().item(i, 5).tokens_raw = sentence_tokens_raw
self.model().item(i, 5).tokens_search = sentence_tokens_search
self.setIndexWidget(
self.model().index(i, 5),
wl_labels.Wl_Label_Html(' '.join(sentence_tokens_raw), self.main)
)
self.indexWidget(self.model().index(i, 5)).tokens_raw = sentence_tokens_raw
self.indexWidget(self.model().index(i, 5)).tokens_search = sentence_tokens_search

# Sentence No.
self.set_item_num(i, 6, no_sentence)
self.set_item_num(i, 7, no_sentence, len_sentences)

# File
self.model().setItem(i, 8, wl_tables.Wl_Table_Item(file))

Expand Down Expand Up @@ -495,6 +503,9 @@ def run(self):
len_sentences = len(offsets_sentences)
i_token = 0

head_color = self.main.settings_custom['tables']['dependency_parser']['highlight_color_settings']['head_color']
dependent_color = self.main.settings_custom['tables']['dependency_parser']['highlight_color_settings']['dependent_color']

for para in text.tokens_multilevel:
for sentence in para:
sentence = list(wl_misc.flatten_list(sentence))
Expand Down Expand Up @@ -523,7 +534,24 @@ def run(self):
no_sentence = bisect.bisect(offsets_sentences, j)

# Sentence
sentence_tokens_raw = wl_texts.to_display_texts(sentence)
sentence_tokens_raw = []

for sentence_token in sentence:
if sentence_token == head:
sentence_tokens_raw.append(f'''
<span style="color: {head_color}; font-weight: bold;">
{sentence_token.display_text()}
</span>
''')
elif sentence_token == token:
sentence_tokens_raw.append(f'''
<span style="color: {dependent_color}; font-weight: bold;">
{sentence_token.display_text()}
</span>
''')
else:
sentence_tokens_raw.append(sentence_token.display_text())

# Remove empty tokens for searching in results
sentence_tokens_search = [token for token in sentence if token]

Expand Down
1 change: 1 addition & 0 deletions wordless/wl_nlp/wl_nlp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def get_langs_stanza(main, util_type):

return langs_stanza

@wl_misc.log_timing
def check_models(main, langs, lang_utils = None):
def update_gui_stanza(main, err_msg):
nonlocal models_ok
Expand Down
2 changes: 1 addition & 1 deletion wordless/wl_results/wl_results_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def run(self):
]

for col in cols_to_search:
# Concordancer - Left, Node, Right & Parallel Concordancer - Parallel Unit
# Concordancer - Left, Node, Right / Parallel Concordancer - Parallel Unit / Dependency Parser - Sentence
if table.indexWidget(table.model().index(0, col)):
for row in rows_to_search:
results[(row, col)] = table.indexWidget(table.model().index(row, col)).tokens_search
Expand Down
Loading

0 comments on commit 875d196

Please sign in to comment.