Skip to content

Commit

Permalink
Utils: Update spaCy's sentence recognizers, word tokenizers, part-of-…
Browse files Browse the repository at this point in the history
…speech taggers, lemmatizers, and dependency parsers
  • Loading branch information
BLKSerene committed Aug 8, 2023
1 parent 047d5a4 commit 6eba084
Show file tree
Hide file tree
Showing 109 changed files with 3,075 additions and 133,160 deletions.
56 changes: 22 additions & 34 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,17 @@ jobs:
# Run tests and collect coverage
- name: Run Tests and collect coverage
run: |
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_dependency_parsing.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_matching.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_nlp_utils.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_pos_tagging.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_sentence_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_stop_word_lists.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_syl_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_word_detokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_word_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_syl_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_pos_tagging.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_file_area
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_work_area
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
pytest --cov=./ --cov-report=xml --cov-append --ignore=tests/wl_tests_nlp
# Upload coverage to Codecov
- name: "Upload coverage to Codecov"
Expand Down Expand Up @@ -103,22 +99,18 @@ jobs:
# Run tests
- name: Run Tests
run: |
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
# Ignore tests of Profiler due to unknown errors
pytest tests/wl_tests_work_area --ignore=tests/wl_tests_work_area/test_profiler.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
# Ignore tests of Profiler due to unknown errors
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_work_area/test_profiler.py
# Linux
build-linux:
Expand Down Expand Up @@ -153,18 +145,14 @@ jobs:
# Fix PyQt
export QT_QPA_PLATFORM=offscreen
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
pytest tests/wl_tests_work_area
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
pytest --ignore=tests/wl_tests_nlp
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Danielson-Bryan's Readability Formula / Degrees of Reading Power / Easy Listening Formula / Fucks's Stilcharakteristik / Strain Index / Tränkle & Bailer's Readability Formula / Tuldava's Text Difficulty / Wheeler & Smith's Readability Formula

### ✨ Improvements
- Utils: Update spaCy's sentence recognizers, word tokenizers, part-of-speech taggers, lemmatizers, and dependency parsers
- Utils: Update Wordless's sentence and sentence segment splitters
- Work Area: Update Profiler - Readability - Automated Readability Index / Dale-Chall Readability Formula / Flesch Reading Ease / Flesch Reading Ease (Farr-Jenkins-Paterson) / Gunning Fog Index / Spache Grade Level

Expand Down
56 changes: 22 additions & 34 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,17 @@ for:

# Run tests
test_script:
- pytest tests/wl_tests_nlp/test_dependency_parsing.py
- pytest tests/wl_tests_nlp/test_lemmatization.py
- pytest tests/wl_tests_nlp/test_matching.py
- pytest tests/wl_tests_nlp/test_nlp_utils.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py

- pytest tests/wl_tests_nlp/test_sentence_tokenization.py
- pytest tests/wl_tests_nlp/test_stop_word_lists.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_word_detokenization.py
- pytest tests/wl_tests_nlp/test_word_tokenization.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_file_area
- pytest tests/wl_tests_work_area
- pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py

- pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
- pytest --ignore=tests/wl_tests_nlp

# macOS
-
Expand All @@ -94,22 +90,18 @@ for:

# Run tests
test_script:
- pytest tests/wl_tests_nlp/test_dependency_parsing.py
- pytest tests/wl_tests_nlp/test_lemmatization.py
- pytest tests/wl_tests_nlp/test_matching.py
- pytest tests/wl_tests_nlp/test_nlp_utils.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py

- pytest tests/wl_tests_nlp/test_sentence_tokenization.py
- pytest tests/wl_tests_nlp/test_stop_word_lists.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_word_detokenization.py
- pytest tests/wl_tests_nlp/test_word_tokenization.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_file_area
# Ignore tests of Profiler due to unknown errors
- pytest tests/wl_tests_work_area --ignore=tests/wl_tests_work_area/test_profiler.py
- pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py

- pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
# Ignore tests of Profiler due to unknown errors
- pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_work_area/test_profiler.py

# Ubuntu
-
Expand All @@ -132,18 +124,14 @@ for:
# Fix PyQt
- export QT_QPA_PLATFORM=offscreen

- pytest tests/wl_tests_nlp/test_dependency_parsing.py
- pytest tests/wl_tests_nlp/test_lemmatization.py
- pytest tests/wl_tests_nlp/test_matching.py
- pytest tests/wl_tests_nlp/test_nlp_utils.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py

- pytest tests/wl_tests_nlp/test_sentence_tokenization.py
- pytest tests/wl_tests_nlp/test_stop_word_lists.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_word_detokenization.py
- pytest tests/wl_tests_nlp/test_word_tokenization.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_file_area
- pytest tests/wl_tests_work_area
- pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py

- pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
- pytest --ignore=tests/wl_tests_nlp
56 changes: 22 additions & 34 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,17 @@ jobs:
# Run tests
- script: |
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
pytest tests/wl_tests_work_area
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
pytest --ignore=tests/wl_tests_nlp
displayName: 'Run tests'
# macOS
Expand Down Expand Up @@ -104,22 +100,18 @@ jobs:
# Run tests
- script: |
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
# Ignore tests of Profiler due to unknown errors
pytest tests/wl_tests_work_area --ignore=tests/wl_tests_work_area/test_profiler.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
# Ignore tests of Profiler due to unknown errors
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_work_area/test_profiler.py
displayName: 'Run tests'
# Linux
Expand Down Expand Up @@ -158,19 +150,15 @@ jobs:
# Fix PyQt
export QT_QPA_PLATFORM=offscreen
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
pytest tests/wl_tests_work_area
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --ignore=tests/wl_tests_nlp --ignore=tests/wl_tests_file_area --ignore=tests/wl_tests_work_area
pytest --ignore=tests/wl_tests_nlp
displayName: 'Run tests'
Loading

0 comments on commit 6eba084

Please sign in to comment.