Skip to content

Commit

Permalink
Utils: Update spaCy's sentence recognizers, word tokenizers, part-of-…
Browse files Browse the repository at this point in the history
…speech taggers, lemmatizers, and dependency parsers
  • Loading branch information
BLKSerene committed Aug 8, 2023
1 parent 047d5a4 commit 33f8dd7
Show file tree
Hide file tree
Showing 108 changed files with 3,044 additions and 133,110 deletions.
45 changes: 21 additions & 24 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,15 @@ jobs:
# Run tests and collect coverage
- name: Run Tests and collect coverage
run: |
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_dependency_parsing.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_matching.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_nlp_utils.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_pos_tagging.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_sentence_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_stop_word_lists.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_syl_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_word_detokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_word_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_syl_tokenization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_pos_tagging.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_file_area
pytest --cov=./ --cov-report=xml --cov-append tests/wl_tests_work_area
Expand Down Expand Up @@ -103,16 +102,15 @@ jobs:
# Run tests
- name: Run Tests
run: |
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
# Ignore tests of Profiler due to unknown errors
Expand Down Expand Up @@ -153,16 +151,15 @@ jobs:
# Fix PyQt
export QT_QPA_PLATFORM=offscreen
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
pytest tests/wl_tests_work_area
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Danielson-Bryan's Readability Formula / Degrees of Reading Power / Easy Listening Formula / Fucks's Stilcharakteristik / Strain Index / Tränkle & Bailer's Readability Formula / Tuldava's Text Difficulty / Wheeler & Smith's Readability Formula

### ✨ Improvements
- Utils: Update spaCy's sentence recognizers, word tokenizers, part-of-speech taggers, lemmatizers, and dependency parsers
- Utils: Update Wordless's sentence and sentence segment splitters
- Work Area: Update Profiler - Readability - Automated Readability Index / Dale-Chall Readability Formula / Flesch Reading Ease / Flesch Reading Ease (Farr-Jenkins-Paterson) / Gunning Fog Index / Spache Grade Level

Expand Down
45 changes: 21 additions & 24 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,15 @@ for:

# Run tests
test_script:
- pytest tests/wl_tests_nlp/test_dependency_parsing.py
- pytest tests/wl_tests_nlp/test_lemmatization.py
- pytest tests/wl_tests_nlp/test_matching.py
- pytest tests/wl_tests_nlp/test_nlp_utils.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py

- pytest tests/wl_tests_nlp/test_sentence_tokenization.py
- pytest tests/wl_tests_nlp/test_stop_word_lists.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_word_detokenization.py
- pytest tests/wl_tests_nlp/test_word_tokenization.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_file_area
- pytest tests/wl_tests_work_area
Expand All @@ -94,16 +93,15 @@ for:

# Run tests
test_script:
- pytest tests/wl_tests_nlp/test_dependency_parsing.py
- pytest tests/wl_tests_nlp/test_lemmatization.py
- pytest tests/wl_tests_nlp/test_matching.py
- pytest tests/wl_tests_nlp/test_nlp_utils.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py

- pytest tests/wl_tests_nlp/test_sentence_tokenization.py
- pytest tests/wl_tests_nlp/test_stop_word_lists.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_word_detokenization.py
- pytest tests/wl_tests_nlp/test_word_tokenization.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_file_area
# Ignore tests of Profiler due to unknown errors
Expand Down Expand Up @@ -132,16 +130,15 @@ for:
# Fix PyQt
- export QT_QPA_PLATFORM=offscreen

- pytest tests/wl_tests_nlp/test_dependency_parsing.py
- pytest tests/wl_tests_nlp/test_lemmatization.py
- pytest tests/wl_tests_nlp/test_matching.py
- pytest tests/wl_tests_nlp/test_nlp_utils.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py

- pytest tests/wl_tests_nlp/test_sentence_tokenization.py
- pytest tests/wl_tests_nlp/test_stop_word_lists.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_word_detokenization.py
- pytest tests/wl_tests_nlp/test_word_tokenization.py
- pytest tests/wl_tests_nlp/test_syl_tokenization.py
- pytest tests/wl_tests_nlp/test_pos_tagging.py
- pytest tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py

- pytest tests/wl_tests_file_area
- pytest tests/wl_tests_work_area
Expand Down
45 changes: 21 additions & 24 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,15 @@ jobs:
# Run tests
- script: |
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
pytest tests/wl_tests_work_area
Expand Down Expand Up @@ -104,16 +103,15 @@ jobs:
# Run tests
- script: |
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
# Ignore tests of Profiler due to unknown errors
Expand Down Expand Up @@ -158,16 +156,15 @@ jobs:
# Fix PyQt
export QT_QPA_PLATFORM=offscreen
pytest tests/wl_tests_nlp/test_dependency_parsing.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/test_matching.py
pytest tests/wl_tests_nlp/test_nlp_utils.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/wl_tests_spacy/test_spacy_eng.py
pytest tests/wl_tests_nlp/test_sentence_tokenization.py
pytest tests/wl_tests_nlp/test_stop_word_lists.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_word_detokenization.py
pytest tests/wl_tests_nlp/test_word_tokenization.py
pytest tests/wl_tests_nlp/test_syl_tokenization.py
pytest tests/wl_tests_nlp/test_pos_tagging.py
pytest tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_nlp/ --ignore=tests/wl_tests_nlp/wl_tests_spacy --ignore=tests/wl_tests_nlp/test_sentence_tokenization.py --ignore=tests/wl_tests_nlp/test_word_tokenization.py --ignore=tests/wl_tests_nlp/test_syl_tokenization.py --ignore=tests/wl_tests_nlp/test_pos_tagging.py --ignore=tests/wl_tests_nlp/test_lemmatization.py
pytest tests/wl_tests_file_area
pytest tests/wl_tests_work_area
Expand Down
Loading

0 comments on commit 33f8dd7

Please sign in to comment.