Skip to content

Commit

Permalink
Utils: Add encoding detection - UTF-8 with BOM
Browse files Browse the repository at this point in the history
  • Loading branch information
BLKSerene committed May 19, 2024
1 parent 6f8aa74 commit 1157cc8
Show file tree
Hide file tree
Showing 26 changed files with 513 additions and 389 deletions.
11 changes: 3 additions & 8 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ jobs:
cache: 'pip'
cache-dependency-path: 'requirements/requirements_tests.txt'

# Install dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
Expand All @@ -46,7 +45,6 @@ jobs:
# Download models and data files
python utils/wl_downloader_ci.py
# Run tests and collect coverage
- name: Run Tests and collect coverage
run: |
pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/tests_spacy/test_spacy_eng.py
Expand All @@ -59,9 +57,10 @@ jobs:
pytest --cov=./ --cov-report=xml --cov-append tests/tests_nlp/ --ignore=tests/tests_nlp/tests_spacy --ignore=tests/tests_nlp/tests_stanza --ignore=tests/tests_nlp/test_word_tokenization.py --ignore=tests/tests_nlp/test_pos_tagging.py --ignore=tests/tests_nlp/test_lemmatization.py
pytest --cov=./ --cov-report=xml --cov-append --ignore=tests/tests_nlp
# Upload coverage to Codecov
- name: "Upload coverage to Codecov"
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

# macOS
build-macos:
Expand All @@ -78,15 +77,13 @@ jobs:
cache: 'pip'
cache-dependency-path: 'requirements/requirements_tests.txt'

# Install dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
pip install --requirement requirements/requirements_tests.txt
# Download models and data files
python utils/wl_downloader_ci.py
# Run tests
- name: Run Tests
run: |
pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
Expand Down Expand Up @@ -114,15 +111,13 @@ jobs:
cache: 'pip'
cache-dependency-path: 'requirements/requirements_tests.txt'

# Install dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
pip install --requirement requirements/requirements_tests.txt
# Download models and data files
python utils/wl_downloader_ci.py
# Run tests
- name: Run Tests
run: |
# Fix PyQt
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- Utils: Add Pyphen's Basque syllable tokenizer
- Utils: Add PyThaiNLP's Han-solo
- Utils: Add Stanza's Sindhi part-of-speech tagger
- Utils: Add encoding detection - UTF-8 with BOM
- Utils: Add VADER's sentiment analyzers
- Work Area: Add Collocation/Colligation Extractor - Filter results /
- Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic
Expand Down
8 changes: 3 additions & 5 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ for:
only:
- APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2019"

# Install dependencies
install:
- cmd: set PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%
- python -m pip install --upgrade pip setuptools
- pip install --requirement requirements/requirements_tests.txt
# Download models and data files
- python utils/wl_downloader_ci.py

# Run tests
Expand All @@ -74,12 +74,11 @@ for:
only:
- APPVEYOR_BUILD_WORKER_IMAGE: "macos-catalina"

# Install dependencies
install:
- cmd: set PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%
# Install dependencies
- python -m pip install --upgrade pip setuptools
- pip install --requirement requirements/requirements_tests.txt
# Download models and data files
- python utils/wl_downloader_ci.py

# Run tests
Expand All @@ -100,12 +99,11 @@ for:
only:
- APPVEYOR_BUILD_WORKER_IMAGE: "Ubuntu1804"

# Install dependencies
install:
- cmd: set PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%
# Install dependencies
- python -m pip install --upgrade pip setuptools
- pip install --requirement requirements/requirements_tests.txt
# Download models and data files
- python utils/wl_downloader_ci.py

# Run tests
Expand Down
6 changes: 0 additions & 6 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,13 @@ jobs:
path: $(PIP_CACHE_DIR)
displayName: Cache pip packages

# Install dependencies
- script: |
pip install --upgrade pip setuptools
pip install --requirement requirements/requirements_tests.txt
:: Download models and data files
python utils/wl_downloader_ci.py
displayName: 'Install dependencies'
# Run tests
- script: |
pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
Expand Down Expand Up @@ -80,15 +78,13 @@ jobs:
path: $(PIP_CACHE_DIR)
displayName: Cache pip packages

# Install dependencies
- script: |
pip3 install --upgrade pip setuptools
pip3 install --requirement requirements/requirements_tests.txt
# Download models and data files
python3 utils/wl_downloader_ci.py
displayName: 'Install dependencies'
# Run tests
- script: |
pytest tests/tests_nlp/tests_spacy/test_spacy_eng.py
pytest tests/tests_nlp/tests_stanza/test_stanza_eng.py
Expand Down Expand Up @@ -120,15 +116,13 @@ jobs:
path: $(PIP_CACHE_DIR)
displayName: Cache pip packages

# Install dependencies
- script: |
pip3.10 install --upgrade pip setuptools
pip3.10 install --requirement requirements/requirements_tests.txt
# Download models and data files
python3.10 utils/wl_downloader_ci.py
displayName: 'Install dependencies'
# Run tests
- script: |
# Fix PyQt
export QT_QPA_PLATFORM=offscreen
Expand Down
2 changes: 1 addition & 1 deletion doc/doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ XML files¹ |\*.xml
Language |File Encoding |Auto-detection
-----------------------|-----------------------|:------------:
All languages |UTF-8 without BOM |✔
All languages |UTF-8 with BOM |✖️
All languages |UTF-8 with BOM |
All languages |UTF-16 with BOM |✔
All languages |UTF-16BE without BOM |✔
All languages |UTF-16LE without BOM |✔
Expand Down

This file was deleted.

7 changes: 4 additions & 3 deletions tests/tests_checks/test_checks_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_normalized_file_path(file_name):
]

FILE_PATHS_UNSUPPORTED = [
get_normalized_file_path('unsupported_file_type.unsupported')
get_normalized_file_path('unsupported.unsupported')
]
FILE_PATHS_EMPTY = [
get_normalized_file_path('empty_txt.txt'),
Expand All @@ -44,12 +44,12 @@ def get_normalized_file_path(file_name):
]

def test_check_file_paths_unsupported():
_, files_unsupported = wl_checks_files.check_file_paths_unsupported(main, FILE_PATHS_UNSUPPORTED)
_, files_unsupported = wl_checks_files.check_file_paths_unsupported(main, ['supported.txt'] + FILE_PATHS_UNSUPPORTED)

assert files_unsupported == FILE_PATHS_UNSUPPORTED

def test_check_file_paths_empty():
_, files_empty = wl_checks_files.check_file_paths_empty(main, FILE_PATHS_EMPTY)
_, files_empty = wl_checks_files.check_file_paths_empty(main, [FILE_PATHS_DUP[0]] + FILE_PATHS_EMPTY)

assert files_empty == FILE_PATHS_EMPTY

Expand All @@ -60,6 +60,7 @@ def test_check_file_paths_duplicate():

def test_check_err_file_area():
assert wl_checks_files.check_err_file_area(main, '')
assert not wl_checks_files.check_err_file_area(main, 'test')

if __name__ == '__main__':
test_check_file_paths_unsupported()
Expand Down
4 changes: 4 additions & 0 deletions tests/tests_checks/test_checks_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ def test_check_dir():

def test_check_new_name():
assert wl_checks_misc.check_new_name('new_name', ['new_name', 'new_name (2)', 'new_name (4)']) == 'new_name (3)'
assert wl_checks_misc.check_new_name(
'new_name', ['new_name', 'new_name (2)', 'new_name (4)'],
separator = '/'
) == 'new_name/2'

def test_check_new_path():
if os.path.exists('temp'):
Expand Down
22 changes: 17 additions & 5 deletions tests/tests_checks/test_checks_work_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,32 +82,44 @@ def test_check_search_terms():

def test_check_nlp_support():
file_eng_us = {'selected': True, 'name': 'test', 'lang': 'eng_us', 'tagged': False}
file_xxx = {'selected': True, 'name': 'test', 'lang': 'xxx', 'tagged': False}
file_other = {'selected': True, 'name': 'test', 'lang': 'other', 'tagged': False}

assert wl_checks_work_area.check_nlp_support(
main,
nlp_utils = ['pos_taggers'],
files = [file_eng_us]
)
assert wl_checks_work_area.check_nlp_support(
main,
nlp_utils = ['lemmatizers'],
files = [file_eng_us]
)
assert not wl_checks_work_area.check_nlp_support(
main,
nlp_utils = ['pos_taggers'],
files = [file_xxx]
files = [file_other]
)
assert not wl_checks_work_area.check_nlp_support(
main,
nlp_utils = ['lemmatizers'],
files = [file_other]
)

main.settings_custom['file_area']['files_open'] = [file_eng_us]
main.settings_custom['file_area']['files_open_ref'] = [file_xxx]
main.settings_custom['file_area']['files_open_ref'] = [file_other]

assert wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'])
assert not wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], ref = True)

def test_check_results():
assert wl_checks_work_area.check_results(main, '', 'test')
assert not wl_checks_work_area.check_results(main, 'test', '')
assert not wl_checks_work_area.check_results(main, '', '')

def test_check_results_download_model():
wl_checks_work_area.check_results_download_model(main, '', 'test')
wl_checks_work_area.check_results_download_model(main, 'test', '')
assert wl_checks_work_area.check_results_download_model(main, '', 'test')
assert not wl_checks_work_area.check_results_download_model(main, 'test', '')
assert not wl_checks_work_area.check_results_download_model(main, '', 'module_not_found')

def test_check_err_table():
wl_checks_work_area.check_err_table(main, '')
Expand Down
5 changes: 5 additions & 0 deletions tests/tests_dialogs/test_dialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def test_wl_dialog():
wl_dialog.set_fixed_height()
wl_dialog.move_to_center()

wl_dialog = wl_dialogs.Wl_Dialog(main, title = 'test', resizable = True)

def test_wl_dialog_frameless():
wl_dialogs.Wl_Dialog_Frameless(main).open()

Expand All @@ -40,6 +42,9 @@ def test_wl_dialog_info_copy():
wl_dialog_info_copy.get_info()
wl_dialog_info_copy.set_info('test')

wl_dialog_info_copy = wl_dialogs.Wl_Dialog_Info_Copy(main, title = 'test', is_plain_text = True)
wl_dialog_info_copy.set_info('test')

def test_wl_dialog_settings():
wl_dialog_settings = wl_dialogs.Wl_Dialog_Settings(main, title = 'test')
wl_dialog_settings.open()
Expand Down
2 changes: 2 additions & 0 deletions tests/tests_measures/test_measure_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def test_to_measure_text():
for measure_text, measure_code in measures.items():
assert wl_measure_utils.to_measure_text(main, measure_type, measure_code) == measure_text

assert wl_measure_utils.to_measure_text(main, list(main.settings_global['mapping_measures'])[0], 'test') is None

def test_to_freqs_sections_1_sample():
assert wl_measure_utils.to_freqs_sections_1_sample(
ITEMS_TO_SEARCH, ITEMS,
Expand Down
Loading

0 comments on commit 1157cc8

Please sign in to comment.