Skip to content

Commit

Permalink
Utils: Update spaCy's sentence recognizers, word tokenizers, part-of-…
Browse files Browse the repository at this point in the history
…speech taggers, lemmatizers, and dependency parsers
  • Loading branch information
BLKSerene committed Aug 8, 2023
1 parent 047d5a4 commit aff2953
Show file tree
Hide file tree
Showing 24 changed files with 607 additions and 517 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Danielson-Bryan's Readability Formula / Degrees of Reading Power / Easy Listening Formula / Fucks's Stilcharakteristik / Strain Index / Tränkle & Bailer's Readability Formula / Tuldava's Text Difficulty / Wheeler & Smith's Readability Formula

### ✨ Improvements
- Utils: Update spaCy's sentence recognizers, word tokenizers, part-of-speech taggers, lemmatizers, and dependency parsers
- Utils: Update Wordless's sentence and sentence segment splitters
- Work Area: Update Profiler - Readability - Automated Readability Index / Dale-Chall Readability Formula / Flesch Reading Ease / Flesch Reading Ease (Farr-Jenkins-Paterson) / Gunning Fog Index / Spache Grade Level

Expand Down
8 changes: 4 additions & 4 deletions tests/wl_tests_dialogs/test_dialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ def test_wl_dialog_frameless():
def test_wl_dialog_info():
wl_dialogs.Wl_Dialog_Info(main, title = 'test')

def test_wl_dialog_info_copy():
wl_dialogs.Wl_Dialog_Info_Copy(main, title = 'test')

def test_wl_dialog_settings():
wl_dialogs.Wl_Dialog_Settings(main, title = 'test')

def test_wl_dialog_err():
wl_dialogs.Wl_Dialog_Err(main, title = 'test')

if __name__ == '__main__':
test_wl_dialog()
test_wl_dialog_frameless()
test_wl_dialog_info()
test_wl_dialog_info_copy()
test_wl_dialog_settings()
test_wl_dialog_err()
12 changes: 12 additions & 0 deletions tests/wl_tests_dialogs/test_dialogs_errs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,24 @@

main = wl_test_init.Wl_Test_Main()

def test_wl_dialog_err():
wl_dialogs_errs.Wl_Dialog_Err(main, title = 'test')

def test_wl_dialog_err_info_copy():
wl_dialogs_errs.Wl_Dialog_Err_Info_Copy(main, title = 'test')

def test_wl_dialog_err_fatal():
wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg = 'test')

def test_wl_dialog_err_download_model():
wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg = 'test')

def test_wl_dialog_err_files():
wl_dialogs_errs.Wl_Dialog_Err_Files(main, title = 'test')

if __name__ == '__main__':
test_wl_dialog_err()
test_wl_dialog_err_info_copy()
test_wl_dialog_err_fatal()
test_wl_dialog_err_download_model()
test_wl_dialog_err_files()
8 changes: 6 additions & 2 deletions tests/wl_tests_dialogs/test_dialogs_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
def test_wl_dialog_progress():
wl_dialogs_misc.Wl_Dialog_Progress(main, text = 'test')

def test_wl_dialog_progress_data():
def test_wl_dialog_progress_process_data():
wl_dialogs_misc.Wl_Dialog_Progress_Process_Data(main)

def test_wl_dialog_progress_download_model():
wl_dialogs_misc.Wl_Dialog_Progress_Download_Model(main)

def test_wl_dialog_clr_table():
wl_dialogs_misc.Wl_Dialog_Clr_Table(main)

Expand All @@ -38,7 +41,8 @@ def test_wl_dialog_restart_required():

if __name__ == '__main__':
test_wl_dialog_progress()
test_wl_dialog_progress_data()
test_wl_dialog_progress_process_data()
test_wl_dialog_progress_download_model()
test_wl_dialog_clr_table()
test_wl_dialog_clr_all_tables()
test_wl_dialog_restart_required()
44 changes: 28 additions & 16 deletions tests/wl_tests_utils/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,23 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------

import os
import platform
import re

from tests import wl_test_init
from wordless.wl_utils import wl_misc

main = wl_test_init.Wl_Test_Main()

def test_change_file_owner_to_user():
with open('test', 'wb'):
pass

wl_misc.change_file_owner_to_user('test')

os.remove('test')

def test_check_os():
is_windows, is_macos, is_linux = wl_misc.check_os()

Expand All @@ -31,35 +43,35 @@ def test_check_os():
elif platform.system() == 'Linux':
assert not is_windows and not is_macos and is_linux

def test_flatten_list():
assert list(wl_misc.flatten_list([1, 2, [3, 4, [5, 6]]])) == [1, 2, 3, 4, 5, 6]

def test_get_linux_distro():
assert wl_misc.get_linux_distro() in ['ubuntu', 'debian']

def test_get_wl_ver():
assert re.search(r'^[0-9]+\.[0-9]+\.[0-9]$', wl_misc.get_wl_ver())

def test_split_ver():
assert wl_misc.split_ver('1.2.3') == (1, 2, 3)
assert wl_misc.split_ver('0.0.0') == (0, 0, 0)
assert wl_misc.split_ver('10.100.1000') == (10, 100, 1000)
assert re.search(r'^[0-9]+\.[0-9]+\.[0-9]$', str(wl_misc.get_wl_ver()))

def test_flatten_list():
assert list(wl_misc.flatten_list([1, 2, [3, 4, [5, 6]]])) == [1, 2, 3, 4, 5, 6]
def test_merge_dicts():
assert wl_misc.merge_dicts([{1: 10}, {1: 20, 2: 30}]) == {1: [10, 20], 2: [0, 30]}
assert wl_misc.merge_dicts([{1: [10, 20]}, {1: [30, 40], 2: [50, 60]}]) == {1: [[10, 20], [30, 40]], 2: [[0, 0], [50, 60]]}

def test_normalize_nums():
assert wl_misc.normalize_nums([1, 2, 3, 4, 5], 0, 100) == [0, 25, 50, 75, 100]
assert wl_misc.normalize_nums([1, 2, 3, 4, 5], 0, 100, reverse = True) == [100, 75, 50, 25, 0]

def test_merge_dicts():
assert wl_misc.merge_dicts([{1: 10}, {1: 20, 2: 30}]) == {1: [10, 20], 2: [0, 30]}
assert wl_misc.merge_dicts([{1: [10, 20]}, {1: [30, 40], 2: [50, 60]}]) == {1: [[10, 20], [30, 40]], 2: [[0, 0], [50, 60]]}
def test_wl_download():
r, status_ok = wl_misc.wl_download(main, 'https://raw.githubusercontent.com/BLKSerene/Wordless/main/VERSION')

assert r
assert status_ok

if __name__ == '__main__':
test_change_file_owner_to_user()
test_check_os()
test_flatten_list()
test_get_linux_distro()

test_get_wl_ver()
test_split_ver()

test_flatten_list()
test_normalize_nums()
test_merge_dicts()
test_normalize_nums()
test_wl_download()
48 changes: 24 additions & 24 deletions utils/wl_downloader_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,30 @@
import spacy

# Download spaCy models
spacy.cli.download('ca_core_news_sm')
spacy.cli.download('zh_core_web_sm')
spacy.cli.download('hr_core_news_sm')
spacy.cli.download('da_core_news_sm')
spacy.cli.download('nl_core_news_sm')
spacy.cli.download('en_core_web_sm')
spacy.cli.download('fi_core_news_sm')
spacy.cli.download('fr_core_news_sm')
spacy.cli.download('de_core_news_sm')
spacy.cli.download('el_core_news_sm')
spacy.cli.download('it_core_news_sm')
spacy.cli.download('ja_core_news_sm')
spacy.cli.download('ko_core_news_sm')
spacy.cli.download('lt_core_news_sm')
spacy.cli.download('mk_core_news_sm')
spacy.cli.download('nb_core_news_sm')
spacy.cli.download('pl_core_news_sm')
spacy.cli.download('pt_core_news_sm')
spacy.cli.download('ro_core_news_sm')
spacy.cli.download('ru_core_news_sm')
spacy.cli.download('sl_core_news_sm')
spacy.cli.download('es_core_news_sm')
spacy.cli.download('sv_core_news_sm')
spacy.cli.download('uk_core_news_sm')
spacy.cli.download('ca_core_news_trf')
spacy.cli.download('zh_core_web_trf')
spacy.cli.download('hr_core_news_lg')
spacy.cli.download('da_core_news_trf')
spacy.cli.download('nl_core_news_lg')
spacy.cli.download('en_core_web_trf')
spacy.cli.download('fi_core_news_lg')
spacy.cli.download('fr_dep_news_trf')
spacy.cli.download('de_dep_news_trf')
spacy.cli.download('el_core_news_lg')
spacy.cli.download('it_core_news_lg')
spacy.cli.download('ja_core_news_trf')
spacy.cli.download('ko_core_news_lg')
spacy.cli.download('lt_core_news_lg')
spacy.cli.download('mk_core_news_lg')
spacy.cli.download('nb_core_news_lg')
spacy.cli.download('pl_core_news_lg')
spacy.cli.download('pt_core_news_lg')
spacy.cli.download('ro_core_news_lg')
spacy.cli.download('ru_core_news_lg')
spacy.cli.download('sl_core_news_trf')
spacy.cli.download('es_dep_news_trf')
spacy.cli.download('sv_core_news_lg')
spacy.cli.download('uk_core_news_trf')

# Download NLTK data
# Corpora
Expand Down
90 changes: 0 additions & 90 deletions utils/wl_downloader_spacy_models.py

This file was deleted.

63 changes: 9 additions & 54 deletions utils/wl_packaging.spec
Original file line number Diff line number Diff line change
Expand Up @@ -58,35 +58,11 @@ datas.extend(PyInstaller.utils.hooks.collect_data_files('spacy.lang', include_py
datas.extend(PyInstaller.utils.hooks.copy_metadata('spacy_lookups_data'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('spacy_lookups_data', include_py_files = True))
datas.extend(PyInstaller.utils.hooks.collect_data_files('spacy_pkuseg'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('ca_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('zh_core_web_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('hr_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('da_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('de_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('el_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('en_core_web_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('fi_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('fr_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('it_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('ja_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('ko_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('lt_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('mk_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('nb_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('nl_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('pl_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('pt_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('ro_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('ru_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('sl_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('es_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('sv_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('uk_core_news_sm'))
datas.extend(PyInstaller.utils.hooks.collect_data_files('pip', include_py_files = True))
datas.extend(PyInstaller.utils.hooks.collect_data_files('en_core_web_trf'))
# SudachiPy
datas.extend(PyInstaller.utils.hooks.collect_data_files('sudachipy', include_py_files = True))
datas.extend(PyInstaller.utils.hooks.collect_data_files('sudachidict_core'))
# TextBlob
datas.extend(PyInstaller.utils.hooks.collect_data_files('textblob'))
# Underthesea
datas.extend(PyInstaller.utils.hooks.collect_data_files('underthesea'))
# wordcloud
Expand Down Expand Up @@ -130,31 +106,8 @@ hiddenimports = [
'pymorphy3_dicts_ru',
'pymorphy3_dicts_uk',

# spaCy models
'ca_core_news_sm',
'zh_core_web_sm',
'hr_core_news_sm',
'da_core_news_sm',
'de_core_news_sm',
'el_core_news_sm',
'en_core_web_sm',
'fi_core_news_sm',
'fr_core_news_sm',
'it_core_news_sm',
'ja_core_news_sm',
'ko_core_news_sm',
'lt_core_news_sm',
'mk_core_news_sm',
'nb_core_news_sm',
'nl_core_news_sm',
'pl_core_news_sm',
'pt_core_news_sm',
'ro_core_news_sm',
'ru_core_news_sm',
'sl_core_news_sm',
'es_core_news_sm',
'sv_core_news_sm',
'uk_core_news_sm',
# spaCy
'en_core_web_trf',

# SudachiPy
'sudachidict_core',
Expand Down Expand Up @@ -229,6 +182,8 @@ coll = COLLECT(
# Bundle application on macOS
# Reference: https://pyinstaller.org/en/stable/spec-files.html#spec-file-options-for-a-macos-bundle
if is_macos:
wl_ver = wl_utils.get_wl_ver()

app = BUNDLE(
coll,
name = 'Wordless.app',
Expand All @@ -242,9 +197,9 @@ if is_macos:
'CFBundleDisplayName': 'Wordless',
'CFBundleExecutable': 'Wordless',
'CFBundlePackageType': 'APPL',
'CFBundleVersion': wl_utils.get_wl_ver(),
'CFBundleShortVersionString': wl_utils.get_wl_ver(),
'CFBundleInfoDictionaryVersion': wl_utils.get_wl_ver(),
'CFBundleVersion': wl_ver,
'CFBundleShortVersionString': wl_ver,
'CFBundleInfoDictionaryVersion': wl_ver,
# Required by Retina displays on macOS
# References:
# https://developer.apple.com/documentation/bundleresources/information_property_list/nshighresolutioncapable
Expand Down
Loading

0 comments on commit aff2953

Please sign in to comment.