From 9f98ac4d5250e278469cc7e687e1c5b557b824cc Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Fri, 29 Sep 2023 15:29:47 +0800 Subject: [PATCH] Misc: Update translations --- CHANGELOG.md | 2 +- .../test_measures_readability.py | 4 +- tests/tests_nlp/test_matching.py | 4 +- tests/tests_nlp/test_pos_tagging.py | 8 +- tests/tests_utils/test_paths.py | 1 + trs/zho_cn.ts | 5426 +++++++++++------ trs/zho_tw.ts | 5426 +++++++++++------ utils/wl_packaging.py | 28 +- utils/wl_trs_translate.py | 49 +- wordless/wl_main.py | 35 +- .../wl_measures/wl_measures_readability.py | 2 +- wordless/wl_nlp/wl_dependency_parsing.py | 9 +- wordless/wl_nlp/wl_lemmatization.py | 2 +- wordless/wl_nlp/wl_pos_tagging.py | 2 +- wordless/wl_settings/wl_settings_default.py | 19 +- wordless/wl_settings/wl_settings_global.py | 7 +- wordless/wl_settings/wl_settings_measures.py | 2 +- wordless/wl_utils/wl_paths.py | 14 +- 18 files changed, 7501 insertions(+), 3539 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fa3021e2..db320cb6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@

📄 Changelog

-## [3.4.0](https://github.com/BLKSerene/Wordless/releases/tag/3.4.0) - ??/??/2023 +## [3.4.0](https://github.com/BLKSerene/Wordless/releases/tag/3.4.0) - 09/30/2023 ### 🎉 New Features - Settings: Add Settings - Measures - Lexical Diversity - Utils: Add LaoNLP's Lao sentence tokenizer, word tokenizer, part-of-speech taggers, and stop word list diff --git a/tests/tests_measures/test_measures_readability.py b/tests/tests_measures/test_measures_readability.py index 2b5d2284a..6aec2f688 100644 --- a/tests/tests_measures/test_measures_readability.py +++ b/tests/tests_measures/test_measures_readability.py @@ -595,8 +595,8 @@ def test_lorge_readability_index(): print(f'\tother/12: {lorge_other_12}') assert lorge_eng_0 == 'text_too_short' - assert lorge_eng_12_corrected == 12 / 3 * 0.06 + 1 / 12 * 0.1 + 2 / 12 * 0.1 + 1.99 - assert lorge_eng_12 == 12 / 3 * 0.07 + 1 / 12 * 13.01 + 2 / 12 * 10.73 + 1.6126 + assert lorge_eng_12_corrected == 12 / 3 * 0.06 + 2 / 12 * 0.1 + 2 / 12 * 0.1 + 1.99 + assert lorge_eng_12 == 12 / 3 * 0.07 + 2 / 12 * 13.01 + 2 / 12 * 10.73 + 1.6126 assert lorge_tha_12 != 'no_support' assert lorge_other_12 == 'no_support' diff --git a/tests/tests_nlp/test_matching.py b/tests/tests_nlp/test_matching.py index ecc267658..096db9b7e 100644 --- a/tests/tests_nlp/test_matching.py +++ b/tests/tests_nlp/test_matching.py @@ -235,7 +235,7 @@ def test_match_tokens(): lang = 'eng_us', tagged = False, settings = init_search_settings(match_inflected_forms = True) - ) == set(['take', 'takes', 'taked', 'taken']) + ) == set(['take', 'takes', 'taked', 'taken', 'taking']) assert wl_matching.match_tokens( main, @@ -303,7 +303,7 @@ def test_match_ngrams(): assert wl_matching.match_ngrams( main, - search_terms = ['take walked'], + search_terms = ['taking walked'], tokens = ['take', 'takes', 'walk', 'walked', 'test'], lang = 'eng_us', tagged = False, diff --git a/tests/tests_nlp/test_pos_tagging.py b/tests/tests_nlp/test_pos_tagging.py index 9e9b5aab3..4883f10cc 100644 --- a/tests/tests_nlp/test_pos_tagging.py +++ b/tests/tests_nlp/test_pos_tagging.py @@ -136,8 +136,12 @@ def test_pos_tag(lang, pos_tagger): assert tokens_tagged == [] assert tokens_tagged_universal == [] elif lang == 'lao': - assert tokens_tagged == [('ພາສາລາວ', 'N'), ('(', 'V'), ('Lao', 'PRN'), (':', 'PUNCT'), ('ລາວ', 'PRS'), (',', 'PUNCT'), ('[', 'PUNCT'), ('l', 'PRN'), ('áː', 'PRN'), ('w', 'PRN'), (']', 'PUNCT'), ('ຫຼື', 'COJ'), ('ພາສາລາວ', 'PRN'), (',', 'PUNCT'), ('[', 'N'), ('p', 'PRN'), ('ʰáː', 'PRN'), ('s', 'PRN'), ('ǎː', 'PRN'), ('l', 'PRN'), ('áː', 'PRN'), ('w', 'PRN'), ('])', 'PRN'), ('ເປັນ', 'V'), ('ພາສາ', 'N'), ('ຕະກູນ', 'PRN'), ('ໄທ', 'PRN'), ('-', 'PUNCT'), ('ກະໄດ', 'N'), ('ຂອງ', 'PRE'), ('ຄົນ', 'N'), ('ລາວ', 'PRS'), ('ໂດຍ', 'PRE'), ('ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ໃນປະເທດລາວ', 'N'), ('ເຊິ່ງ', 'REL'), ('ເປັນ', 'V'), ('ພາສາ', 'N'), ('ລັດຖະການ', 'N'), ('ຂອງ', 'PRE'), ('ສາທາລະນະລັດ', 'N'), ('ປະຊາທິປະໄຕ', 'N'), ('ປະຊາຊົນ', 'N'), ('ລາວ', 'PRS'), ('ຂອງ', 'PRE'), ('ປະຊາກອນ', 'N'), ('ປະມານ', 'IBQ'), ('7', 'V'), ('ລ້ານ', 'N'), ('ຄົນ', 'N'), ('ແລະ', 'COJ'), ('ໃນ', 'PRE'), ('ພື້ນທີ່', 'N'), ('ພາກ', 'N'), ('ຕາເວັນອອກສຽງ', 'N'), ('ເໜືອ', 'PRN'), ('ຂອງ', 'PRE'), ('ປະເທດໄທ', 'PRN'), ('ທີ່ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ປະມານ', 'IBQ'), ('23', 'V'), ('ລ້ານ', 'N'), ('ຄົນ', 'N'), ('ທາງ', 'PRE'), ('ລັດຖະບານ', 'N'), ('ປະເທດໄທ', 'PRN'), ('ມີການສະໜັບສະໜຸນ', 'V'), ('ໃຫ້', 'PVA'), ('ເອີ້ນ', 'V'), ('ພາສາລາວ', 'N'), ('ຖິ່ນ', 'N'), ('ໄທ', 'PRN'), ('ວ່າ', 'COJ'), ('ພາສາລາວ', 'PRN'), ('ຖິ່ນ', 'PRN'), ('ອີສານ', 'N'), ('ນອກຈາກ', 'PRE'), ('ນີ້', 'DMN'), (',', 'PUNCT'), ('ຢູ່', 'PRE'), ('ທາງ', 'N'), ('ພາກ', 'N'), ('ຕາເວັນອອກສຽງ', 'N'), ('ເໜືອ', 'N'), ('ຂອງ', 'PRE'), ('ປະເທດກຳປູເຈຍ', 'N'), ('ກໍ', 'IAC'), ('ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ພາສາລາວ', 'N'), ('ຄືກັນ', 'ADJ'), ('.', 'PUNCT')] - assert tokens_tagged_universal == [('ພາສາລາວ', 'NOUN'), ('(', 'VERB'), ('Lao', 'PROPN'), (':', 'PUNCT'), ('ລາວ', 'PRON'), (',', 'PUNCT'), ('[', 'PUNCT'), ('l', 'PROPN'), ('áː', 'PROPN'), ('w', 'PROPN'), (']', 'PUNCT'), ('ຫຼື', 'CONJ'), ('ພາສາລາວ', 'PROPN'), (',', 'PUNCT'), ('[', 'NOUN'), ('p', 'PROPN'), ('ʰáː', 'PROPN'), ('s', 'PROPN'), ('ǎː', 'PROPN'), ('l', 'PROPN'), ('áː', 'PROPN'), ('w', 'PROPN'), ('])', 'PROPN'), ('ເປັນ', 'VERB'), ('ພາສາ', 'NOUN'), ('ຕະກູນ', 'PROPN'), ('ໄທ', 'PROPN'), ('-', 'PUNCT'), ('ກະໄດ', 'NOUN'), ('ຂອງ', 'ADP'), ('ຄົນ', 'NOUN'), ('ລາວ', 'PRON'), ('ໂດຍ', 'ADP'), ('ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ໃນປະເທດລາວ', 'NOUN'), ('ເຊິ່ງ', 'PRON'), ('ເປັນ', 'VERB'), ('ພາສາ', 'NOUN'), ('ລັດຖະການ', 'NOUN'), ('ຂອງ', 'ADP'), ('ສາທາລະນະລັດ', 'NOUN'), ('ປະຊາທິປະໄຕ', 'NOUN'), ('ປະຊາຊົນ', 'NOUN'), ('ລາວ', 'PRON'), ('ຂອງ', 'ADP'), ('ປະຊາກອນ', 'NOUN'), ('ປະມານ', 'DET'), ('7', 'VERB'), ('ລ້ານ', 'NOUN'), ('ຄົນ', 'NOUN'), ('ແລະ', 'CONJ'), ('ໃນ', 'ADP'), ('ພື້ນທີ່', 'NOUN'), ('ພາກ', 'NOUN'), ('ຕາເວັນອອກສຽງ', 'NOUN'), ('ເໜືອ', 'PROPN'), ('ຂອງ', 'ADP'), ('ປະເທດໄທ', 'PROPN'), ('ທີ່ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ປະມານ', 'DET'), ('23', 'VERB'), ('ລ້ານ', 'NOUN'), ('ຄົນ', 'NOUN'), ('ທາງ', 'ADP'), ('ລັດຖະບານ', 'NOUN'), ('ປະເທດໄທ', 'PROPN'), ('ມີການສະໜັບສະໜຸນ', 'VERB'), ('ໃຫ້', 'AUX'), ('ເອີ້ນ', 'VERB'), ('ພາສາລາວ', 'NOUN'), ('ຖິ່ນ', 'NOUN'), ('ໄທ', 'PROPN'), ('ວ່າ', 'CONJ'), ('ພາສາລາວ', 'PROPN'), ('ຖິ່ນ', 'PROPN'), ('ອີສານ', 'NOUN'), ('ນອກຈາກ', 'ADP'), ('ນີ້', 'PRON'), (',', 'PUNCT'), ('ຢູ່', 'ADP'), ('ທາງ', 'NOUN'), ('ພາກ', 'NOUN'), ('ຕາເວັນອອກສຽງ', 'NOUN'), ('ເໜືອ', 'NOUN'), ('ຂອງ', 'ADP'), ('ປະເທດກຳປູເຈຍ', 'NOUN'), ('ກໍ', 'DET'), ('ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ພາສາລາວ', 'NOUN'), ('ຄືກັນ', 'ADJ'), ('.', 'PUNCT')] + if pos_tagger == 'laonlp_seqlabeling': + assert tokens_tagged == [('ພາສາລາວ', 'N'), ('(', 'V'), ('Lao', 'PRN'), (':', 'PUNCT'), ('ລາວ', 'PRS'), (',', 'PUNCT'), ('[', 'PUNCT'), ('l', 'PRN'), ('áː', 'PRN'), ('w', 'PRN'), (']', 'PUNCT'), ('ຫຼື', 'COJ'), ('ພາສາລາວ', 'PRN'), (',', 'PUNCT'), ('[', 'N'), ('p', 'PRN'), ('ʰáː', 'PRN'), ('s', 'PRN'), ('ǎː', 'PRN'), ('l', 'PRN'), ('áː', 'PRN'), ('w', 'PRN'), ('])', 'PRN'), ('ເປັນ', 'V'), ('ພາສາ', 'N'), ('ຕະກູນ', 'PRN'), ('ໄທ', 'PRN'), ('-', 'PUNCT'), ('ກະໄດ', 'N'), ('ຂອງ', 'PRE'), ('ຄົນ', 'N'), ('ລາວ', 'PRS'), ('ໂດຍ', 'PRE'), ('ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ໃນປະເທດລາວ', 'N'), ('ເຊິ່ງ', 'REL'), ('ເປັນ', 'V'), ('ພາສາ', 'N'), ('ລັດຖະການ', 'N'), ('ຂອງ', 'PRE'), ('ສາທາລະນະລັດ', 'N'), ('ປະຊາທິປະໄຕ', 'N'), ('ປະຊາຊົນ', 'N'), ('ລາວ', 'PRS'), ('ຂອງ', 'PRE'), ('ປະຊາກອນ', 'N'), ('ປະມານ', 'IBQ'), ('7', 'V'), ('ລ້ານ', 'N'), ('ຄົນ', 'N'), ('ແລະ', 'COJ'), ('ໃນ', 'PRE'), ('ພື້ນທີ່', 'N'), ('ພາກ', 'N'), ('ຕາເວັນອອກສຽງ', 'N'), ('ເໜືອ', 'PRN'), ('ຂອງ', 'PRE'), ('ປະເທດໄທ', 'PRN'), ('ທີ່ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ປະມານ', 'IBQ'), ('23', 'V'), ('ລ້ານ', 'N'), ('ຄົນ', 'N'), ('ທາງ', 'PRE'), ('ລັດຖະບານ', 'N'), ('ປະເທດໄທ', 'PRN'), ('ມີການສະໜັບສະໜຸນ', 'V'), ('ໃຫ້', 'PVA'), ('ເອີ້ນ', 'V'), ('ພາສາລາວ', 'N'), ('ຖິ່ນ', 'N'), ('ໄທ', 'PRN'), ('ວ່າ', 'COJ'), ('ພາສາລາວ', 'PRN'), ('ຖິ່ນ', 'PRN'), ('ອີສານ', 'N'), ('ນອກຈາກ', 'PRE'), ('ນີ້', 'DMN'), (',', 'PUNCT'), ('ຢູ່', 'PRE'), ('ທາງ', 'N'), ('ພາກ', 'N'), ('ຕາເວັນອອກສຽງ', 'N'), ('ເໜືອ', 'N'), ('ຂອງ', 'PRE'), ('ປະເທດກຳປູເຈຍ', 'N'), ('ກໍ', 'IAC'), ('ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ພາສາລາວ', 'N'), ('ຄືກັນ', 'ADJ'), ('.', 'PUNCT')] + assert tokens_tagged_universal == [('ພາສາລາວ', 'NOUN'), ('(', 'VERB'), ('Lao', 'PROPN'), (':', 'PUNCT'), ('ລາວ', 'PRON'), (',', 'PUNCT'), ('[', 'PUNCT'), ('l', 'PROPN'), ('áː', 'PROPN'), ('w', 'PROPN'), (']', 'PUNCT'), ('ຫຼື', 'CONJ'), ('ພາສາລາວ', 'PROPN'), (',', 'PUNCT'), ('[', 'NOUN'), ('p', 'PROPN'), ('ʰáː', 'PROPN'), ('s', 'PROPN'), ('ǎː', 'PROPN'), ('l', 'PROPN'), ('áː', 'PROPN'), ('w', 'PROPN'), ('])', 'PROPN'), ('ເປັນ', 'VERB'), ('ພາສາ', 'NOUN'), ('ຕະກູນ', 'PROPN'), ('ໄທ', 'PROPN'), ('-', 'PUNCT'), ('ກະໄດ', 'NOUN'), ('ຂອງ', 'ADP'), ('ຄົນ', 'NOUN'), ('ລາວ', 'PRON'), ('ໂດຍ', 'ADP'), ('ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ໃນປະເທດລາວ', 'NOUN'), ('ເຊິ່ງ', 'PRON'), ('ເປັນ', 'VERB'), ('ພາສາ', 'NOUN'), ('ລັດຖະການ', 'NOUN'), ('ຂອງ', 'ADP'), ('ສາທາລະນະລັດ', 'NOUN'), ('ປະຊາທິປະໄຕ', 'NOUN'), ('ປະຊາຊົນ', 'NOUN'), ('ລາວ', 'PRON'), ('ຂອງ', 'ADP'), ('ປະຊາກອນ', 'NOUN'), ('ປະມານ', 'DET'), ('7', 'VERB'), ('ລ້ານ', 'NOUN'), ('ຄົນ', 'NOUN'), ('ແລະ', 'CONJ'), ('ໃນ', 'ADP'), ('ພື້ນທີ່', 'NOUN'), ('ພາກ', 'NOUN'), ('ຕາເວັນອອກສຽງ', 'NOUN'), ('ເໜືອ', 'PROPN'), ('ຂອງ', 'ADP'), ('ປະເທດໄທ', 'PROPN'), ('ທີ່ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ປະມານ', 'DET'), ('23', 'VERB'), ('ລ້ານ', 'NOUN'), ('ຄົນ', 'NOUN'), ('ທາງ', 'ADP'), ('ລັດຖະບານ', 'NOUN'), ('ປະເທດໄທ', 'PROPN'), ('ມີການສະໜັບສະໜຸນ', 'VERB'), ('ໃຫ້', 'AUX'), ('ເອີ້ນ', 'VERB'), ('ພາສາລາວ', 'NOUN'), ('ຖິ່ນ', 'NOUN'), ('ໄທ', 'PROPN'), ('ວ່າ', 'CONJ'), ('ພາສາລາວ', 'PROPN'), ('ຖິ່ນ', 'PROPN'), ('ອີສານ', 'NOUN'), ('ນອກຈາກ', 'ADP'), ('ນີ້', 'PRON'), (',', 'PUNCT'), ('ຢູ່', 'ADP'), ('ທາງ', 'NOUN'), ('ພາກ', 'NOUN'), ('ຕາເວັນອອກສຽງ', 'NOUN'), ('ເໜືອ', 'NOUN'), ('ຂອງ', 'ADP'), ('ປະເທດກຳປູເຈຍ', 'NOUN'), ('ກໍ', 'DET'), ('ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ພາສາລາວ', 'NOUN'), ('ຄືກັນ', 'ADJ'), ('.', 'PUNCT')] + elif pos_tagger == 'laonlp_yunshan_cup_2020': + assert tokens_tagged == [('ພາສາລາວ', 'PRN'), ('(', 'PUNCT'), ('Lao', 'PRN'), (':', 'PUNCT'), ('ລາວ', 'PRS'), (',', 'PUNCT'), ('[', 'COJ'), ('l', 'N'), ('áː', 'N'), ('w', 'N'), (']', 'PUNCT'), ('ຫຼື', 'COJ'), ('ພາສາລາວ', 'PRN'), (',', 'PUNCT'), ('[', 'PUNCT'), ('p', 'PRN'), ('ʰáː', 'PRN'), ('s', 'PRN'), ('ǎː', 'PRN'), ('l', 'PRN'), ('áː', 'PRN'), ('w', 'PRN'), ('])', 'PRN'), ('ເປັນ', 'V'), ('ພາສາ', 'N'), ('ຕະກູນ', 'PRN'), ('ໄທ', 'PRN'), ('-', 'PUNCT'), ('ກະໄດ', 'N'), ('ຂອງ', 'PRE'), ('ຄົນ', 'N'), ('ລາວ', 'PRS'), ('ໂດຍ', 'PRE'), ('ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ໃນປະເທດລາວ', 'N'), ('ເຊິ່ງ', 'REL'), ('ເປັນ', 'V'), ('ພາສາ', 'N'), ('ລັດຖະການ', 'N'), ('ຂອງ', 'PRE'), ('ສາທາລະນະລັດ', 'N'), ('ປະຊາທິປະໄຕ', 'N'), ('ປະຊາຊົນ', 'N'), ('ລາວ', 'PRS'), ('ຂອງ', 'PRE'), ('ປະຊາກອນ', 'N'), ('ປະມານ', 'IBQ'), ('7', 'V'), ('ລ້ານ', 'V'), ('ຄົນ', 'N'), ('ແລະ', 'COJ'), ('ໃນ', 'PRE'), ('ພື້ນທີ່', 'N'), ('ພາກ', 'N'), ('ຕາເວັນອອກສຽງ', 'V'), ('ເໜືອ', 'PRN'), ('ຂອງ', 'PRE'), ('ປະເທດໄທ', 'PRN'), ('ທີ່ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ປະມານ', 'IBQ'), ('23', 'V'), ('ລ້ານ', 'CLF'), ('ຄົນ', 'N'), ('ທາງ', 'PRE'), ('ລັດຖະບານ', 'N'), ('ປະເທດໄທ', 'PRN'), ('ມີການສະໜັບສະໜຸນ', 'V'), ('ໃຫ້', 'PVA'), ('ເອີ້ນ', 'V'), ('ພາສາລາວ', 'N'), ('ຖິ່ນ', 'N'), ('ໄທ', 'PRN'), ('ວ່າ', 'COJ'), ('ພາສາລາວ', 'PRN'), ('ຖິ່ນ', 'PRN'), ('ອີສານ', 'N'), ('ນອກຈາກ', 'PRE'), ('ນີ້', 'DMN'), (',', 'PUNCT'), ('ຢູ່', 'ADV'), ('ທາງ', 'PRE'), ('ພາກ', 'N'), ('ຕາເວັນອອກສຽງ', 'N'), ('ເໜືອ', 'N'), ('ຂອງ', 'PRE'), ('ປະເທດກຳປູເຈຍ', 'N'), ('ກໍ', 'IAC'), ('ມີ', 'V'), ('ຄົນ', 'N'), ('ເວົ້າ', 'V'), ('ພາສາລາວ', 'N'), ('ຄືກັນ', 'ADJ'), ('.', 'PUNCT')] + assert tokens_tagged_universal == [('ພາສາລາວ', 'PROPN'), ('(', 'PUNCT'), ('Lao', 'PROPN'), (':', 'PUNCT'), ('ລາວ', 'PRON'), (',', 'PUNCT'), ('[', 'CONJ'), ('l', 'NOUN'), ('áː', 'NOUN'), ('w', 'NOUN'), (']', 'PUNCT'), ('ຫຼື', 'CONJ'), ('ພາສາລາວ', 'PROPN'), (',', 'PUNCT'), ('[', 'PUNCT'), ('p', 'PROPN'), ('ʰáː', 'PROPN'), ('s', 'PROPN'), ('ǎː', 'PROPN'), ('l', 'PROPN'), ('áː', 'PROPN'), ('w', 'PROPN'), ('])', 'PROPN'), ('ເປັນ', 'VERB'), ('ພາສາ', 'NOUN'), ('ຕະກູນ', 'PROPN'), ('ໄທ', 'PROPN'), ('-', 'PUNCT'), ('ກະໄດ', 'NOUN'), ('ຂອງ', 'ADP'), ('ຄົນ', 'NOUN'), ('ລາວ', 'PRON'), ('ໂດຍ', 'ADP'), ('ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ໃນປະເທດລາວ', 'NOUN'), ('ເຊິ່ງ', 'PRON'), ('ເປັນ', 'VERB'), ('ພາສາ', 'NOUN'), ('ລັດຖະການ', 'NOUN'), ('ຂອງ', 'ADP'), ('ສາທາລະນະລັດ', 'NOUN'), ('ປະຊາທິປະໄຕ', 'NOUN'), ('ປະຊາຊົນ', 'NOUN'), ('ລາວ', 'PRON'), ('ຂອງ', 'ADP'), ('ປະຊາກອນ', 'NOUN'), ('ປະມານ', 'DET'), ('7', 'VERB'), ('ລ້ານ', 'VERB'), ('ຄົນ', 'NOUN'), ('ແລະ', 'CONJ'), ('ໃນ', 'ADP'), ('ພື້ນທີ່', 'NOUN'), ('ພາກ', 'NOUN'), ('ຕາເວັນອອກສຽງ', 'VERB'), ('ເໜືອ', 'PROPN'), ('ຂອງ', 'ADP'), ('ປະເທດໄທ', 'PROPN'), ('ທີ່ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ປະມານ', 'DET'), ('23', 'VERB'), ('ລ້ານ', 'PART'), ('ຄົນ', 'NOUN'), ('ທາງ', 'ADP'), ('ລັດຖະບານ', 'NOUN'), ('ປະເທດໄທ', 'PROPN'), ('ມີການສະໜັບສະໜຸນ', 'VERB'), ('ໃຫ້', 'AUX'), ('ເອີ້ນ', 'VERB'), ('ພາສາລາວ', 'NOUN'), ('ຖິ່ນ', 'NOUN'), ('ໄທ', 'PROPN'), ('ວ່າ', 'CONJ'), ('ພາສາລາວ', 'PROPN'), ('ຖິ່ນ', 'PROPN'), ('ອີສານ', 'NOUN'), ('ນອກຈາກ', 'ADP'), ('ນີ້', 'PRON'), (',', 'PUNCT'), ('ຢູ່', 'ADV'), ('ທາງ', 'ADP'), ('ພາກ', 'NOUN'), ('ຕາເວັນອອກສຽງ', 'NOUN'), ('ເໜືອ', 'NOUN'), ('ຂອງ', 'ADP'), ('ປະເທດກຳປູເຈຍ', 'NOUN'), ('ກໍ', 'DET'), ('ມີ', 'VERB'), ('ຄົນ', 'NOUN'), ('ເວົ້າ', 'VERB'), ('ພາສາລາວ', 'NOUN'), ('ຄືກັນ', 'ADJ'), ('.', 'PUNCT')] elif lang == 'rus': if pos_tagger == 'nltk_perceptron_rus': assert tokens_tagged == [('Ру́сский', 'A=m'), ('язы́к', 'S'), ('(', 'NONLEX'), ('[', 'NONLEX'), ('ˈruskʲɪi̯', 'NONLEX'), ('jɪˈzɨk', 'NONLEX'), (']', 'NONLEX'), ('Информация', 'S'), ('о', 'PR'), ('файле', 'S'), ('слушать', 'V'), (')', 'NONLEX'), ('[', 'NONLEX'), ('~', 'NONLEX'), ('3', 'NUM=ciph'), (']', 'NONLEX'), ('[', 'NONLEX'), ('⇨', 'NONLEX'), (']', 'NONLEX'), ('—', 'NONLEX'), ('язык', 'S'), ('восточнославянской', 'A=f'), ('группы', 'S'), ('славянской', 'A=f'), ('ветви', 'S'), ('индоевропейской', 'A=f'), ('языковой', 'A=f'), ('семьи', 'S'), (',', 'NONLEX'), ('национальный', 'A=m'), ('язык', 'S'), ('русского', 'A=m'), ('народа', 'S'), ('.', 'NONLEX')] diff --git a/tests/tests_utils/test_paths.py b/tests/tests_utils/test_paths.py index 2b262c522..2f3679047 100644 --- a/tests/tests_utils/test_paths.py +++ b/tests/tests_utils/test_paths.py @@ -35,6 +35,7 @@ def test_get_normalized_dir(): def test_get_path_file(): assert wl_paths.get_path_file('') assert wl_paths.get_path_file('a', 'b', 'c').endswith(os.path.sep.join(['a', 'b', 'c'])) + assert wl_paths.get_path_file('a', '..', 'b').endswith('b') def test_get_path_data(): assert wl_paths.get_path_data('a').endswith(os.path.sep.join(['data', 'a'])) diff --git a/trs/zho_cn.ts b/trs/zho_cn.ts index c1a662abd..10b2fed07 100644 --- a/trs/zho_cn.ts +++ b/trs/zho_cn.ts @@ -169,85 +169,90 @@ Wl_Dialog_About - + About Wordless 关于 Wordless - + - <div style="text-align: center;"> - <h2>Wordless {}</h2> - <div> - An Integrated Corpus Tool with Multilingual Support<br> - for the Study of Language, Literature, and Translation - </div> - </div> - - - <div style="text-align: center;"> - <h2>Wordless {}</h2> - <div> - 一款拥有多语种支持的语料库集成工具<br> - 可用于语言学、文学及翻译研究 - </div> - </div> - + <div style="text-align: center;"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Version {}</div> + </div> + + <div style="text-align: center;"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;版本 {}</div> + </div> + - + - <hr> - <div style="text-align: center;"> - Copyright (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> - Licensed Under GNU GPLv3<br> - All Other Rights Reserved - </div> - + <div style="text-align: center;"> + An Integrated Corpus Tool with Multilingual Support<br> + for the Study of Language, Literature, and Translation + </div> + + <hr> + + <div style="text-align: center;"> + Copyright (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> + Licensed Under GNU GPLv3<br> + All Other Rights Reserved + </div> + - <hr> - <div style="text-align: center;"> - Copyright (C) 2018-2023&nbsp;&nbsp;叶磊<br> - 基于 GNU GPLv3 开源许可<br> - 保留所有其他权利 - </div> - + <div style="text-align: center;"> + 一款拥有多语种支持的语料库集成工具<br> + 可用于语言学、文学及翻译研究 + </div> + + <hr> + + <div style="text-align: center;"> + 版权所有 (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> + 基于 GNU GPLv3 协议<br> + 保留其他所有权利 + </div> + Wl_Dialog_Acks - + Name 名称 - + Version 版本 - + Authors 作者 - + License 许可 - + Acknowledgments 致谢 - + ACKNOWLEDGMENTS.md ACKNOWLEDGMENTS_zho_cn.md - + <div> As Wordless stands on the shoulders of giants, I hereby extend my sincere gratitude to the following open-source projects without which this project would not have been possible: @@ -263,7 +268,7 @@ Wl_Dialog_Changelog - + Changelog 更新日志 @@ -271,17 +276,17 @@ Wl_Dialog_Check_Updates - + Check for updates on startup 启动时检查更新 - + Cancel 取消 - + <div> Checking for updates... @@ -294,7 +299,7 @@ - + <div> Hooray, you are using the latest version of Wordless! @@ -307,7 +312,7 @@ - + <div> A network error has occurred, please check your network settings and try again or <a href="https://github.com/BLKSerene/Wordless/releases">check for updates manually</a>. @@ -320,17 +325,17 @@ - + OK 确认 - + Check for Updates 检查更新 - + <div> Wordless {} is out, click <a href="https://github.com/BLKSerene/Wordless#download"><b>HERE</b></a> to download the latest version of Wordless. @@ -343,27 +348,27 @@ - + Current version: 当前版本: - + Try again 重试 - + Latest version: Checking... 最新版本:查询中…… - + Latest version: 最新版本: - + Latest version: Network error 最新版本:网络错误 @@ -371,27 +376,27 @@ Wl_Dialog_Citing - + Citing 引用 - + Select citation system: 选择引用体系: - + APA (7th edition) APA(第七版) - + MLA (8th edition) MLA(第八版) - + <div> If you are going to publish a work that uses Wordless, please cite as follows. @@ -453,7 +458,7 @@ Wl_Dialog_Confirm_Exit - + <div> Are you sure you want to exit Wordless? @@ -472,22 +477,22 @@ - + Always confirm on exit 关闭时总是提示确认 - + Exit 退出 - + Cancel 取消 - + Exit Wordless 退出 Wordless @@ -523,7 +528,7 @@ Wl_Dialog_Donating - + <div> If you would like to support the development of Wordless, you may donate via <a href="https://www.paypal.com/">PayPal</a>, <a href="https://global.alipay.com/">Alipay</a>, or <a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">WeChat Pay</a>. @@ -536,22 +541,22 @@ - + Donating via: 赞助途径: - + Alipay 支付宝 - + WeChat Pay 微信支付 - + Donating 赞助 @@ -582,12 +587,12 @@ Wl_Dialog_Need_Help - + Need Help? 需要帮助? - + <div> If you have any questions, find software bugs, need to provide feedback, or want to submit feature requests, you may seek support from the open-source community or contact me directly via any of the support channels listed below. @@ -600,62 +605,62 @@ - + Support Channel 支持渠道 - + Information 信息 - + <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">Documentation</a> <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">文档</a> - + <a href="https://github.com/BLKSerene/Wordless/issues">Gihub Issues</a> - + <a href="https://github.com/BLKSerene/Wordless/discussions">Gihub Discussions</a> - + Official documentation 官方文档 - + Tutorial videos 视频教程 - + Bug reports Bug 提交 - + Usage questions 使用问题 - + Email support 邮件咨询 - + <a href="https://www.wechat.com/en/">WeChat</a> official account <a href="https://www.wechat.com/en/">微信</a>公众号 - + <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">bilibili</a> <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">B 站</a> @@ -663,12 +668,12 @@ Wl_Dialog_Preview_Settings - + coarse-grained 粗分 - + fine-grained 细分 @@ -969,27 +974,27 @@ Wl_Dialog_Settings - + Save 保存 - + Cancel 取消 - + OK 确认 - + Copy 复制 - + Close 关闭 @@ -997,7 +1002,7 @@ Wl_Loading - + Initializing Wordless... 初始化 Wordless…… @@ -1005,242 +1010,242 @@ Wl_Main - + Loading settings... 载入设置…… - + Initializing main window... 初始化主窗体…… - + Ready! 就绪! - + Starting Wordless... 启动 Wordless…… - + &File 文件 - + &Preferences 偏好 - + &Help 帮助 - + &Open Files... 打开文件... - + Open files 打开文件 - + Open &Folder... 打开文件夹... - + Open all files in the folder 打开文件夹下的所有文件 - + &Reopen Closed Files 重新打开已关闭文件 - + Reopen closed files 重新打开已经关闭的文件 - + S&elect All 全选 - + Select all files 勾选所有文件 - + &Deselect All 取消选中所有 - + Deselect all files 取消勾选所有文件 - + &Invert Selection 反选 - + Invert file selection 反向勾选文件 - + &Close Selected 关闭选中 - + Close selected file(s) 关闭已选中的文件 - + C&lose All 关闭所有 - + Close all files 关闭所有文件 - + &Exit... 退出... - + Exit the program 退出程序 - + &Settings 设置 - + Change settings 更改设置 - + &Display Language 显示语言 - + &Reset Layouts 重置布局 - + Reset layouts 重置布局 - + &Show Status Bar 显示状态栏 - + Show/Hide the status bar 显示/隐藏状态栏 - + &Citing 引用 - + Show information about citing 显示引用信息 - + &Acknowledgments 致谢 - + Show acknowldgments 显示致谢 - + &Need Help? 需要帮助? - + Show help information 显示帮助信息 - + &Donating 赞助 - + Show information about donating 显示赞助相关信息 - + Check &for Updates 检查更新 - + Check for updates of Wordless 检查 Wordless 更新 - + C&hangelog 更新日志 - + Show Changelog 显示更新日志 - + About &Wordless 关于 Wordless - + Show information about Wordless 显示 Wordless 的相关信息 - + Reset Layouts 重置布局 - + <div>Do you want to reset all layouts to their default settings?</div> @@ -1249,57 +1254,57 @@ - + Profiler 分析工具 - + Concordancer 索引工具 - + Parallel Concordancer 平行索引工具 - + Wordlist Generator 词表生成器 - + N-gram Generator n 元组生成器 - + Collocation Extractor 搭配抽取器 - + Colligation Extractor 类联接抽取器 - + Keyword Extractor 关键词抽取器 - + Observed Files 观察文件 - + Reference Files 参照文件 - + Dependency Parser 依存分析器 @@ -1372,22 +1377,22 @@ 统计方法 - + Dispersion 分布 - + Adjusted Frequency 调整频数 - + Statistical Significance 统计显著性 - + Effect Size 效应量 @@ -1438,20 +1443,15 @@ 词性标注 - + Bayes Factor 贝叶斯因子 - + Tables 表格 - - - Profiler - 分析工具 - Dependency Parsing @@ -1492,6 +1492,11 @@ Sentiment Analysis 情感分析 + + + Lexical Diversity + 词汇多样性 + Wl_Settings_Dependency_Parsing @@ -1526,12 +1531,12 @@ 预览设置 - + Show preview 显示预览 - + Processing... 处理中…… @@ -1539,12 +1544,12 @@ Wl_Settings_Figs_Line_Charts - + General Settings 全局设置 - + Font: 字体: @@ -1552,132 +1557,132 @@ Wl_Settings_Figs_Network_Graphs - + Node Settings 节点设置 - + Node shape: 节点形状: - + Node size: 节点大小: - + Node color: 节点颜色: - + Node opacity: 节点不透明度: - + Node Label Settings 节点标签设置 - + Label font: 标签字体: - + Label font size: 标签字体大小: - + Label font weight: 标签字体粗细: - + Label font color: 标签字体颜色: - + Label opacity: 标签不透明度: - + Edge Settings 边设置 - + Connection style: 连接样式: - + Edge width: 边的宽度: - + Minimum 最小 - + Maximum 最大 - + Edge style: 边的样式: - + Edge color: 边的颜色: - + Edge opacity: 边的不透明度: - + Arrow style: 箭头样式: - + Arrow size: 箭头大小: - + Edge Label Settings 边的标签设置 - + Label position: 标签位置: - + Rotate labels to lie parallel to edges 旋转标签使其与边平行 - + Advanced Settings 高级设置 - + Layout: 布局: @@ -1685,112 +1690,112 @@ Wl_Settings_Figs_Word_Clouds - + Font Settings 字体设置 - + Font: 字体: - + Select Font 选择字体 - + Font size: 字体大小: - + Minimum 最小 - + Maximum 最大 - + Relative scaling: 相对缩放: - + Font color: 字体颜色: - + Custom 自定义 - + Monochrome 单色 - + Colormap 色谱 - + Auto 自动 - + Background Settings 背景设置 - + Background color: 背景颜色: - + Mask Settings 蒙版设置 - + Mask path: 蒙版路径: - + Select Mask 选择蒙版 - + Contour width: 轮廓宽度: - + Contour color: 轮廓颜色: - + Advanced Settings 高级设置 - + Prefer horizontal: 水平优先度: - + Allow repeated words 允许重复单词 @@ -2081,7 +2086,7 @@ Wl_Settings_Measures_Adjusted_Freq - + General Settings 全局设置 @@ -2089,17 +2094,17 @@ Wl_Settings_Measures_Bayes_Factor - + Log-likelihood Ratio Test 对数似然比检验 - + Apply Yates's correction for continuity 应用耶茨连续性校正 - + Student's t-test (2-sample) 学生 t 检验(双样本) @@ -2107,17 +2112,17 @@ Wl_Settings_Measures_Dispersion - + General Settings 全局设置 - + Gries's DP - + Apply normalization 应用归一化 @@ -2125,16 +2130,89 @@ Wl_Settings_Measures_Effect_Size - + Kilgarriff's Ratio - + Smoothing parameter: 平滑系数: + + Wl_Settings_Measures_Lexical_Diversity + + + HD-D + + + + + Sample size: + 样本大小: + + + + LogTTR + + + + + Mean Segmental TTR + + + + + Number of tokens in each segment: + 各部分中形符数: + + + + Measure of Textual Lexical Diversity + + + + + Factor size: + 因子大小: + + + + Moving-average TTR + + + + + Window size: + 窗口大小: + + + + Repeat Rate + + + + + Use data: + 使用数据: + + + + Rank-frequency distribution + 频数排序分布 + + + + Frequency spectrum + 频数谱 + + + + Shannon Entropy + 香农熵 + + Wl_Settings_Measures_Readability @@ -2175,7 +2253,7 @@ Cloze criterion score: - + 完型填空标准分: @@ -2215,17 +2293,12 @@ Dutch variant: - 荷兰语变体: + 荷兰语变体: Spanish variant: - 西班牙语变体: - - - - Brouwer's Leesindex A - Brouwer Leesindex A + 西班牙语变体: @@ -2245,7 +2318,7 @@ English variant: - 英语变体: + 英语变体: @@ -2291,57 +2364,57 @@ Wl_Settings_Measures_Statistical_Significance - + Fisher's Exact Test 费希尔精确检验 - + Log-likelihood Ratio Test 对数似然比检验 - + Apply Yates's correction for continuity 应用耶茨连续性校正 - + Mann-Whitney U Test 曼惠特尼 U 检验 - + Apply continuity correction 应用连续性校正 - + Pearson's Chi-squared Test 皮尔森卡方检验 - + Student's t-test (1-sample) 学生 t 检验(单样本) - + Student's t-test (2-sample) 学生 t 检验(双样本) - + Welch's t-test Welch t 检验 - + z-score z 值 - + z-score (Berry-Rogghe) z 值(Berry-Rogghe) @@ -2432,17 +2505,17 @@ 示例 - + Fetching data... 获取数据中…… - + Reset Mappings 重置映射 - + <div>Do you want to reset all mappings to their default settings?</div> <div><b>Note: This will only affect the mapping settings in the currently shown table.</b></div> @@ -2453,12 +2526,12 @@ - + Reset All Mappings 重置所有映射 - + <div>Do you want to reset all mappings to their default settings?</div> <div><b>Warning: This will affect the mapping settings in all tables!</b></div> @@ -2479,7 +2552,7 @@ 重置所有 - + Number of part-of-speech tags: 词性标签数: @@ -2631,7 +2704,7 @@ 选择语种: - + Show preview 显示预览 @@ -2677,42 +2750,42 @@ Wl_Settings_Tables_Concordancer - + Sorting Settings 排序设置 - + Highlight colors: 高亮颜色: - + Level 1 / Node: 第1级/节点: - + Level 2: 第2级: - + Level 3: 第3级: - + Level 4: 第4级: - + Level 5: 第5级: - + Level 6: 第6级: @@ -2720,29 +2793,16 @@ Wl_Settings_Tables_Parallel_Concordancer - + Color Settings 颜色设置 - + Search term color: 检索项颜色: - - Wl_Settings_Tables_Profiler - - - General Settings - 全局设置 - - - - Number of tokens in each section when calculating standardized type-token ratio: - 计算标准化类符形符比时各部分中的形符数: - - Wl_Settings_Word_Tokenization @@ -3629,47 +3689,47 @@ Frequency % Wl_Table_Profiler_Len_Breakdown - + Total 合计 - + Count of {}-token-long Sentences {}个形符长的句子数 - + Count of {}-token-long Sentences % {}个形符长的句子数% - + Count of {}-token-long Sentence Segment {}个形符长的句段数 - + Count of {}-token-long Sentence Segment % {}个形符长的句段数% - + Count of {}-character-long Tokens {}个字符长的形符数 - + Count of {}-character-long Tokens % {}个字符长的形符数% - + Count of {}-syllables-long Tokens {}个音节长的形符数 - + Count of {}-syllables-long Tokens % {}个音节长的形符数% @@ -3677,16 +3737,24 @@ Frequency % Wl_Table_Profiler_Lens - + Total 合计 - + No language support 无语种支持 + + Wl_Table_Profiler_Lexical_Diversity + + + Total + 合计 + + Wl_Table_Profiler_Readability @@ -3705,14 +3773,6 @@ Frequency % 文件过短 - - Wl_Table_Profiler_Ttrs - - - Total - 合计 - - Wl_Table_Results_Sort_Conordancer @@ -3986,7 +4046,7 @@ Frequency % Wl_Worker_Colligation_Extractor_Fig - + Rendering figure... 渲染图表中…… @@ -3994,7 +4054,7 @@ Frequency % Wl_Worker_Colligation_Extractor_Table - + Rendering table... 渲染表格中…… @@ -4002,7 +4062,7 @@ Frequency % Wl_Worker_Collocation_Extractor_Fig - + Rendering figure... 渲染图表中…… @@ -4010,7 +4070,7 @@ Frequency % Wl_Worker_Collocation_Extractor_Table - + Rendering table... 渲染表格中…… @@ -4087,32 +4147,45 @@ Frequency % Wl_Worker_Dependency_Parser - + Rendering table... 渲染表格中…… - Wl_Worker_Download_Spacy_Model + Wl_Worker_Download_Model_Spacy - + Fetching model information... 获取模型信息中…… - + + Downloading model ({:.2f} MB)... + 下载模型中({:.2f} MB)…… + + + Downloading model... 下载模型中…… - + Download completed successfully. 模型下载完毕。 + + + Wl_Worker_Download_Model_Stanza - - Downloading model ({:.2f} MB)... - 下载模型中({:.2f} MB)…… + + Downloading model... + 下载模型中…… + + + + Download completed successfully. + 模型下载完毕。 @@ -4131,7 +4204,7 @@ Frequency % Wl_Worker_Fetch_Data_Tagsets - + Updating table... 更新表格中…… @@ -4139,7 +4212,7 @@ Frequency % Wl_Worker_Keyword_Extractor_Fig - + Rendering figure... 渲染图表中…… @@ -4147,7 +4220,7 @@ Frequency % Wl_Worker_Keyword_Extractor_Table - + Rendering table... 渲染表格中…… @@ -4184,7 +4257,7 @@ Frequency % Wl_Worker_Profiler_Table - + Rendering table... 渲染表格中…… @@ -4717,11 +4790,6 @@ Frequency Counts 计数 - - - Type-token Ratios - 类符形符比 - Lengths @@ -4732,6 +4800,11 @@ Frequency Length Breakdown 细分长度 + + + Lexical Diversity + 词汇多样性 + Wrapper_Wordlist_Generator @@ -4793,164 +4866,16 @@ Frequency - init_settings_default - - - None - - + init_settings_global - - Total - 合计 + + Afrikaans + 南非语 - - Frequency - 频数 - - - - General - 全局 - - - - Non-embedded - 非嵌入式 - - - - Header - - - - - Embedded - 嵌入式 - - - - Others - 其他 - - - - Paragraph - 段落 - - - - Sentence - 句子 - - - - Word - 单词 - - - - Profiler - 分析工具 - - - - Token - 形符 - - - - File - 文件 - - - - Ascending - 升序 - - - - p-value - p 值 - - - - Two-tailed - 双尾 - - - - Observed Files - 观察文件 - - - - APA (7th edition) - APA(第七版) - - - - Counts - 计数 - - - - Token no. - 形符序号 - - - - Line chart - 折线图 - - - - Excel workbooks (*.xlsx) - Excel 工作簿 (*.xlsx) - - - - Part of speech - 词性 - - - - Relative frequency - 相对频数 - - - - Colormap - 色谱 - - - - Policy one - - - - - New - 新版 - - - - Original - 原版 - - - - init_settings_global - - - Afrikaans - 南非语 - - - - Albanian - 阿尔巴尼亚语 + + Albanian + 阿尔巴尼亚语 @@ -4963,1052 +4888,1017 @@ Frequency 阿拉伯语 - - Armenian - 亚美尼亚语 - - - + Assamese 阿萨姆语 - + Asturian 阿斯图里亚斯语 - + Azerbaijani 阿塞拜疆语 - + Basque 巴斯克语 - + Belarusian 白俄罗斯语 - + Bengali 孟加拉语 - - - Breton - 布列塔尼语 - Bulgarian 保加利亚语 - + Catalan 加泰罗尼亚语 - + Chinese (Simplified) 汉语(简体) - + Chinese (Traditional) 汉语(繁体) - + Croatian 克罗地亚语 - + Czech 捷克语 - + Danish 丹麦语 - + Dutch 荷兰语 - + English (United Kingdom) 英语(英国) - + English (United States) 英语(美国) - + Esperanto 世界语 - + Estonian 爱沙尼亚语 - + Finnish 芬兰语 - + French 法语 - + Galician 加里西亚语 - + German (Austria) 德语(奥地利) - + German (Germany) 德语(德国) - + German (Switzerland) 德语(瑞士) - + Greek (Ancient) 希腊语(古) - + Greek (Modern) 希腊语(现代) - + Gujarati 古吉拉特语 - - Hausa - 豪萨语 - - - - Hebrew - 希伯来语 - - - + Hindi 印地语 - + Hungarian 匈牙利语 - + Icelandic 冰岛语 - + Indonesian 印度尼西亚语 - + Irish 爱尔兰语 - + Italian 意大利语 - + Japanese 日语 - + Kannada 卡纳达语 - + Kazakh 哈萨克语 - + Korean 韩语 - - Kurdish - 库尔德语 - - - + Kyrgyz 吉尔吉斯语 - + Latin 拉丁语 - + Latvian 拉脱维亚语 - + Ligurian 利古里亚语 - + Lithuanian 立陶宛语 - + Luxembourgish 卢森堡语 - + Macedonian 马其顿语 - + Malay 马来语 - + Malayalam 马拉雅拉姆语 - + Manx 马恩语 - + Marathi 马拉地语 - + Meitei 曼尼普尔语 - + Mongolian 蒙古语 - + Nepali 尼泊尔语 - + Norwegian Bokmål 书面挪威语 - + Norwegian Nynorsk 新挪威语 - + Oriya 奥里亚语 - + Persian 波斯语 - + Polish 波兰语 - + Portuguese (Brazil) 葡萄牙语(巴西) - + Portuguese (Portugal) 葡萄牙语(葡萄牙) - + Romanian 罗马尼亚语 - + Russian 俄语 - + Sanskrit 梵语 - + Scottish Gaelic 苏格兰盖尔语 - + Serbian (Cyrillic) 塞尔维亚语(西里尔) - + Serbian (Latin) 塞尔维亚语(拉丁) - + Sinhala 僧伽罗语 - + Slovak 斯洛伐克语 - + Slovenian 斯洛文尼亚语 - - Somali - 索马里语 - - - - Sotho (Southern) - 塞索托语 - - - + Spanish 西班牙语 - + Swahili 斯瓦西里语 - + Swedish 瑞典语 - + Tagalog 他加禄语 - + Tajik 塔吉克语 - + Tamil 泰米尔语 - + Tatar 鞑靼语 - + Telugu 泰卢固语 - + Tetun Dili 帝力德顿语 - + Thai 泰语 - + Tibetan 藏语 - + Tigrinya 提格雷尼亚语 - + Tswana 茨瓦纳语 - + Turkish 土耳其语 - + Ukrainian 乌克兰语 - + Urdu 乌尔都语 - + Vietnamese 越南语 - + Welsh 威尔士语 - + Yoruba 约鲁巴语 - + Zulu 祖鲁语 - + Arabic (CP720) 阿拉伯语(CP720) - + Arabic (CP864) 阿拉伯语(CP864) - + Arabic (ISO-8859-6) 阿拉伯语(ISO-8859-6) - + Arabic (Mac OS Arabic) 阿拉伯语(Mac OS Arabic) - + Arabic (Windows-1256) 阿拉伯语(Windows-1256) - + Chinese (GB18030) 汉语(GB18030) - + Chinese (GBK) 汉语(GBK) - + Chinese (Simplified) (GB2312) 汉语(简体)(GB2312) - + Chinese (Simplified) (HZ) 汉语(简体)(HZ) - + Chinese (Traditional) (Big-5) 汉语(繁体)(Big-5) - + Chinese (Traditional) (Big5-HKSCS) 汉语(繁体)(Big5-HKSCS) - + Chinese (Traditional) (CP950) 汉语(繁体)(CP950) - + Croatian (Mac OS Croatian) 克罗地亚语(Mac OS Croatian) - + Cyrillic (CP855) 西里尔(CP855) - + Cyrillic (CP866) 西里尔(CP866) - + Cyrillic (ISO-8859-5) 西里尔(ISO-8859-5) - + Cyrillic (Mac OS Cyrillic) 西里尔(Mac OS Cyrillic) - + Cyrillic (Windows-1251) 西里尔(Windows-1251) - + English (ASCII) 英语(ASCII) - + English (EBCDIC 037) 英语(EBCDIC 037) - + English (CP437) 英语(CP437) - + European (HP Roman-8) 欧洲(HP Roman-8) - + European (Central) (CP852) 欧洲(中部)(CP852) - + European (Central) (ISO-8859-2) 欧洲(中部)(ISO-8859-2) - + European (Central) (Mac OS Central European) 欧洲(中部)(Mac OS Central European) - + European (Central) (Windows-1250) 欧洲(中部)(Windows-1250) - + European (Northern) (ISO-8859-4) 欧洲(北部)(ISO-8859-4) - + European (Southern) (ISO-8859-3) 欧洲(南部)(ISO-8859-3) - + European (Western) (EBCDIC 500) 欧洲(西部)(EBCDIC 500) - + European (Western) (CP850) 欧洲(西部)(CP850) - + European (Western) (CP858) 欧洲(西部)(CP858) - + European (Western) (CP1140) 欧洲(西部)(CP1140) - + European (Western) (ISO-8859-1) 欧洲(西部)(ISO-8859-1) - + European (Western) (ISO-8859-15) 欧洲(西部)(ISO-8859-15) - + European (Western) (Mac OS Roman) 欧洲(西部)(Mac OS Roman) - + European (Western) (Windows-1252) 欧洲(西部)(Windows-1252) - + French (CP863) 法语(CP863) - + German (EBCDIC 273) 德语(EBCDIC 273) - + Greek (CP737) 希腊语(CP737) - + Greek (CP869) 希腊语(CP869) - + Greek (CP875) 希腊语(CP875) - + Greek (ISO-8859-7) 希腊语(ISO-8859-7) - + Greek (Mac OS Greek) 希腊语(Mac OS Greek) - + Greek (Windows-1253) 希腊语(Windows-1253) - + Hebrew (CP856) 希伯来语(CP856) - + Hebrew (CP862) 希伯来语(CP862) - + Hebrew (EBCDIC 424) 希伯来语(EBCDIC 424) - + Hebrew (ISO-8859-8) 希伯来语(ISO-8859-8) - + Hebrew (Windows-1255) 希伯来语(Windows-1255) - + Icelandic (CP861) 冰岛语(CP861) - + Icelandic (Mac OS Icelandic) 冰岛语(Mac OS Icelandic) - + Japanese (CP932) 日语(CP932) - + Japanese (EUC-JP) 日语(EUC-JP) - + Japanese (EUC-JIS-2004) 日语(EUC-JIS-2004) - + Japanese (EUC-JISx0213) 日语(EUC-JISx0213) - + Japanese (ISO-2022-JP) 日语(ISO-2022-JP) - + Japanese (ISO-2022-JP-1) 日语(ISO-2022-JP-1) - + Japanese (ISO-2022-JP-2) 日语(ISO-2022-JP-2) - + Japanese (ISO-2022-JP-2004) 日语(ISO-2022-JP-2004) - + Japanese (ISO-2022-JP-3) 日语(ISO-2022-JP-3) - + Japanese (ISO-2022-JP-EXT) 日语(ISO-2022-JP-EXT) - + Japanese (Shift_JIS) 日语(Shift_JIS) - + Japanese (Shift_JIS-2004) 日语(Shift_JIS-2004) - + Japanese (Shift_JISx0213) 日语(Shift_JISx0213) - + Kazakh (KZ-1048) 哈萨克语(KZ-1048) - + Kazakh (PTCP154) 哈萨克语(PTCP154) - + Korean (EUC-KR) 韩语(EUC-KR) - + Korean (ISO-2022-KR) 韩语(ISO-2022-KR) - + Korean (JOHAB) 韩语(JOHAB) - + Korean (UHC) 韩语(UHC) - + Persian/Urdu (Mac OS Farsi) 波斯语/乌尔都语(Mac OS Farsi) - + Portuguese (CP860) 葡萄牙语(CP860) - + Romanian (Mac OS Romanian) 罗马尼亚语(Mac OS Romanian) - + Russian (KOI8-R) 俄语(KOI8-R) - + Tajik (KOI8-T) 塔吉克语(KOI8-T) - + Thai (CP874) 泰语(CP874) - + Thai (ISO-8859-11) 泰语(ISO-8859-11) - + Turkish (CP857) 土耳其语(CP857) - + Turkish (EBCDIC 1026) 土耳其语(EBCDIC 1026) - + Turkish (ISO-8859-9) 土耳其语(ISO-8859-9) - + Turkish (Mac OS Turkish) 土耳其语(Mac OS Turkish) - + Turkish (Windows-1254) 土耳其语(Windows-1254) - + Ukrainian (CP1125) 乌克兰语(CP1125) - + Ukrainian (KOI8-U) 乌克兰语(KOI8-U) - + Urdu (CP1006) 乌尔都语(CP1006) - + Vietnamese (CP1258) 越南语(CP1258) - + spaCy - Sentencizer spaCy - 分句器 - + Student's t-test (1-sample) 学生 t 检验(单样本) - + z-score z 值 - + Mann-Whitney U Test 曼惠特尼 U 检验 - + Student's t-test (2-sample) 学生 t 检验(双样本) - + Log-likelihood Ratio 对数似然比 - + t-statistic t 值 - + Dice's Coefficient Dice 系数 - + Jaccard Index 雅卡尔指数 - + Sorbian (Lower) 索布语(下) - + Sorbian (Upper) 索布语(上) - + None - + Welch's t-test Welch t 检验 - + z-score (Berry-Rogghe) z 值(Berry-Rogghe) - + Carroll's D₂ - + Gries's DP - + Juilland's D - + Lyne's D₃ - + Rosengren's S - + Zhang's Distributional Consistency - + Zhang's DC - + Engwall's FM - + Juilland's U - + Kromer's UR - + Rosengren's KF - + Difference Coefficient - + Kilgarriff's Ratio - + Log Ratio - + Minimum Sensitivity - + Poisson Collocation Measure @@ -6018,2549 +5908,4349 @@ Frequency 缅甸语 - + English (Middle) 英语(中古) - + Ganda 干达语 - + Georgian 格鲁吉亚语 - - Lugbara - 卢格巴拉语 - - - + Punjabi (Gurmukhi) 旁遮普语(古木基) - + Sámi (Northern) 萨米语(北) - + Other languages 其他语种 - + All languages (UTF-8 without BOM) 所有语种(UTF-8 无签名) - + All languages (UTF-8 with BOM) 所有语种(UTF-8 带签名) - + All languages (UTF-16 with BOM) 所有语种(UTF-16 带签名) - + All languages (UTF-16BE without BOM) 所有语种(UTF-16 大端无签名) - + All languages (UTF-16LE without BOM) 所有语种(UTF-16 小端无签名) - + All languages (UTF-32 with BOM) 所有语种(UTF-32 带签名) - + All languages (UTF-32BE without BOM) 所有语种(UTF-32 大端无签名) - + All languages (UTF-32LE without BOM) 所有语种(UTF-32 小端无签名) - + All languages (UTF-7) 所有语种(UTF-7) - + Baltic languages (CP775) 波罗的海诸语(CP775) - + Baltic languages (ISO-8859-13) 波罗的海诸语(ISO-8859-13) - + Baltic languages (Windows-1257) 波罗的海诸语(Windows-1257) - + Celtic languages (ISO-8859-14) 凯尔特语(ISO-8859-14) - + European (Southeastern) (ISO-8859-16) 欧洲(东南部)(ISO-8859-16) - + Nordic languages (CP865) 北欧诸语(CP865) - + Nordic languages (ISO-8859-10) 北欧诸语(ISO-8859-10) - + Thai (TIS-620) 泰语(TIS-620) - + CSV files (*.csv) CSV 文件 (*.csv) - + Excel workbooks (*.xlsx) Excel 工作簿 (*.xlsx) - + HTML pages (*.htm; *.html) HTML 页面 (*.htm; *.html) - + PDF files (*.pdf) PDF 文件 (*.pdf) - + Text files (*.txt) 文本文件 (*.txt) - + Translation memory files (*.tmx) 翻译记忆库文件 (*.tmx) - + Word documents (*.docx) Word 文档 (*.docx) - + XML files (*.xml) XML 文件 (*.xml) - + All files (*.*) 所有文件 (*.*) - + OpenType fonts (*.otf) OpenType 字体 (*.otf) - + TrueType fonts (*.ttf) TrueType 字体 (*.ttf) - + Blizzard mipmap format (*.blp) - + Windows bitmaps (*.bmp) Windows 位图 (*.bmp) - + Window cursor files (*.cur) Window 光标文件 (*.cur) - + Multi-page PCX files (*.dcx) 多页 PCX 文件 (*.dcx) - + DirectDraw surface (*.dds) - + Device-independent bitmaps (*.dib) 设备无关位图 (*.dib) - + Encapsulated PostScript (*.eps, *.ps) - + Flexible image transport system (*.fit, *.fits) - + Autodesk animation files (*.flc, *.fli) Autodesk 动画文件 (*.flc, *.fli) - + Fox Engine textures (*.ftex) Fox Engine 纹理 (*.ftex) - + GIMP brush files (*.gbr) GIMP 笔刷文件 (*.gbr) - + Graphics interchange format (*.gif) - + Apple icon images (*.icns) 苹果图标图片 (*.icns) - + Windows icon files (*.ico) Windows 图标文件 (*.ico) - + IPTC/NAA newsphoto files (*.iim) IPTC/NAA newsphoto 文件 (*.iim) - + IM files (*.im) IM 文件 (*.im) - + Image Tools image files (*) Image Tools 图像文件 (*) - + JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg) JPEG 文件 (*.jfif, *.jpe, *.jpeg, *.jpg) - + JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) JPEG 2000 文件 (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) - + McIDAS area files (*) McIDAS area 文件 (*) - + PhotoCD files (*.pcd) PhotoCD 文件 (*.pcd) - + Picture exchange (*.pcx) - + PIXAR raster files (*.pxr) PIXAR 栅格文件 (*.pxr) - + Portable network graphics (*.apng, *.png) - + Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm) - + Photoshop PSD files (*.psd) Photoshop PSD 文件 (*.psd) - + Sun raster files (*.ras) Sun 栅格文件 (*.ras) - + Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi) - + SPIDER files (*) SPIDER 文件 (*) - + Truevision TGA (*.icb, *.tga, *.vda, *.vst) - + TIFF files (*.tif, *.tiff) TIFF 文件 (*.tif, *.tiff) - + WebP files (*.webp) WebP 文件 (*.webp) - + Windows metafiles (*.emf, *.wmf) Windows 元文件 (*.emf, *.wmf) - + X bitmaps (*.xbm) - + X pixmaps (*.xpm) - + XV thumbnails (*) XV 缩略图 (*) - + botok - Tibetan sentence tokenizer botok - 藏语分句器 - + NLTK - Czech Punkt sentence tokenizer NLTK - 捷克语 Punkt 分句器 - + NLTK - Danish Punkt sentence tokenizer NLTK - 丹麦语 Punkt 分句器 - + NLTK - Dutch Punkt sentence tokenizer NLTK - 荷兰语 Punkt 分句器 - + NLTK - English Punkt sentence tokenizer NLTK - 英语 Punkt 分句器 - + NLTK - Estonian Punkt sentence tokenizer NLTK - 爱沙尼亚语 Punkt 分句器 - + NLTK - Finnish Punkt sentence tokenizer NLTK - 芬兰语 Punkt 分句器 - + NLTK - French Punkt sentence tokenizer NLTK - 法语 Punkt 分句器 - + NLTK - German Punkt sentence tokenizer NLTK - 德语 Punkt 分句器 - + NLTK - Greek Punkt sentence tokenizer NLTK - 希腊语 Punkt 分句器 - + NLTK - Italian Punkt sentence tokenizer NLTK - 意大利语 Punkt 分句器 - + NLTK - Malayalam Punkt sentence tokenizer NLTK - 马拉雅拉姆语 Punkt 分句器 - + NLTK - Norwegian Punkt sentence tokenizer NLTK - 挪威语 Punkt 分句器 - + NLTK - Polish Punkt sentence tokenizer NLTK - 波兰语 Punkt 分句器 - + NLTK - Portuguese Punkt sentence tokenizer NLTK - 葡萄牙语 Punkt 分句器 - + NLTK - Russian Punkt sentence tokenizer NLTK - 俄语 Punkt 分句器 - + NLTK - Slovenian Punkt sentence tokenizer NLTK - 斯洛文尼亚语 Punkt 分句器 - + NLTK - Spanish Punkt sentence tokenizer NLTK - 西班牙语 Punkt 分句器 - + NLTK - Swedish Punkt sentence tokenizer NLTK - 瑞典语 Punkt 分句器 - + NLTK - Turkish Punkt sentence tokenizer NLTK - 土耳其语 Punkt 分句器 - + spaCy - Croatian sentence recognizer spaCy - 克罗地亚语句子识别器 - + spaCy - Dutch sentence recognizer spaCy - 荷兰语句子识别器 - + spaCy - Finnish sentence recognizer spaCy - 芬兰语句子识别器 - + spaCy - Greek (Modern) sentence recognizer spaCy - 希腊语(现代)句子识别器 - + spaCy - Italian sentence recognizer spaCy - 意大利语句子识别器 - + spaCy - Lithuanian sentence recognizer spaCy - 立陶宛语句子识别器 - + spaCy - Macedonian sentence recognizer spaCy - 马其顿语句子识别器 - + spaCy - Norwegian Bokmål sentence recognizer spaCy - 书面挪威语句子识别器 - + spaCy - Polish sentence recognizer spaCy - 波兰语句子识别器 - + spaCy - Portuguese sentence recognizer spaCy - 葡萄牙语句子识别器 - + spaCy - Romanian sentence recognizer spaCy - 罗马尼亚语句子识别器 - + spaCy - Russian sentence recognizer spaCy - 俄语句子识别器 - + spaCy - Swedish sentence recognizer spaCy - 瑞典语句子识别器 - + Underthesea - Vietnamese sentence tokenizer Underthesea - 越南语分句器 - + botok - Tibetan word tokenizer botok - 藏语分词器 - + jieba - Chinese word tokenizer jieba - 汉语分词器 - + NLTK - NIST tokenizer NLTK - NIST 分词器 - + NLTK - NLTK tokenizer NLTK - NLTK 分词器 - + NLTK - Penn Treebank tokenizer NLTK - 宾州树库分词器 - + NLTK - Regular-expression tokenizer NLTK - 正则表达式分词器 - + NLTK - Tok-tok tokenizer NLTK - Tok-tok 分词器 - + NLTK - Twitter tokenizer NLTK - 推特分词器 - + pkuseg - Chinese word tokenizer pkuseg - 汉语分词器 - + PyThaiNLP - Longest matching PyThaiNLP - 最长匹配 - + PyThaiNLP - Maximum matching PyThaiNLP - 最大匹配 - + PyThaiNLP - Maximum matching + TCC PyThaiNLP - 最大匹配 + TCC - + Sacremoses - Moses tokenizer Sacremoses - Moses 分词器 - + spaCy - Afrikaans word tokenizer spaCy - 南非语分词器 - + spaCy - Albanian word tokenizer spaCy - 阿尔巴尼亚语分词器 - + spaCy - Amharic word tokenizer spaCy - 阿姆哈拉语分词器 - + spaCy - Arabic word tokenizer spaCy - 阿拉伯语分词器 - + spaCy - Armenian word tokenizer - spaCy - 亚美尼亚语分词器 + spaCy - Armenian 分词器 - + spaCy - Azerbaijani word tokenizer spaCy - 阿塞拜疆语分词器 - + spaCy - Basque word tokenizer spaCy - 巴斯克语分词器 - + spaCy - Bengali word tokenizer spaCy - 孟加拉语分词器 - + spaCy - Bulgarian word tokenizer spaCy - 保加利亚语分词器 - + spaCy - Catalan word tokenizer spaCy - 加泰罗尼亚语分词器 - + spaCy - Chinese word tokenizer spaCy - 汉语分词器 - + spaCy - Croatian word tokenizer spaCy - 克罗地亚语分词器 - + spaCy - Czech word tokenizer spaCy - 捷克语分词器 - + spaCy - Danish word tokenizer spaCy - 丹麦语分词器 - + spaCy - Dutch word tokenizer spaCy - 荷兰语分词器 - + spaCy - English word tokenizer spaCy - 英语分词器 - + spaCy - Estonian word tokenizer spaCy - 爱沙尼亚语分词器 - + spaCy - Finnish word tokenizer spaCy - 芬兰语分词器 - + spaCy - French word tokenizer spaCy - 法语分词器 - + spaCy - Ganda word tokenizer spaCy - 干达语分词器 - + spaCy - German word tokenizer spaCy - 德语分词器 - + spaCy - Greek (Ancient) word tokenizer spaCy - 希腊语(古)分词器 - + spaCy - Greek (Modern) word tokenizer spaCy - 希腊语(现代)分词器 - + spaCy - Gujarati word tokenizer spaCy - 古吉拉特语分词器 - - spaCy - Hebrew word tokenizer - spaCy - 希伯来语分词器 - - - + spaCy - Hindi word tokenizer spaCy - 印地语分词器 - + spaCy - Hungarian word tokenizer spaCy - 匈牙利语分词器 - + spaCy - Icelandic word tokenizer spaCy - 冰岛语分词器 - + spaCy - Indonesian word tokenizer spaCy - 印度尼西亚语分词器 - + spaCy - Irish word tokenizer spaCy - 爱尔兰语分词器 - + spaCy - Italian word tokenizer spaCy - 意大利语分词器 - + spaCy - Japanese word tokenizer spaCy - 日语分词器 - + spaCy - Kannada word tokenizer spaCy - 卡纳达语分词器 - + spaCy - Kyrgyz word tokenizer spaCy - 吉尔吉斯语分词器 - + spaCy - Latin word tokenizer spaCy - 拉丁语分词器 - + spaCy - Latvian word tokenizer spaCy - 拉脱维亚语分词器 - + spaCy - Ligurian word tokenizer spaCy - 利古里亚语分词器 - + spaCy - Lithuanian word tokenizer spaCy - 立陶宛语分词器 - + spaCy - Luxembourgish word tokenizer spaCy - 卢森堡语分词器 - + spaCy - Macedonian word tokenizer spaCy - 马其顿语分词器 - + spaCy - Malayalam word tokenizer spaCy - 马拉雅拉姆语分词器 - + spaCy - Marathi word tokenizer spaCy - 马拉地语分词器 - + spaCy - Nepali word tokenizer spaCy - 尼泊尔语分词器 - - spaCy - Norwegian word tokenizer - spaCy - 挪威语分词器 - - - + spaCy - Persian word tokenizer spaCy - 波斯语分词器 - + spaCy - Polish word tokenizer spaCy - 波兰语分词器 - + spaCy - Portuguese word tokenizer spaCy - 葡萄牙语分词器 - + spaCy - Romanian word tokenizer spaCy - 罗马尼亚语分词器 - + spaCy - Russian word tokenizer spaCy - 俄语分词器 - + spaCy - Sanskrit word tokenizer spaCy - 梵语分词器 - + spaCy - Serbian word tokenizer spaCy - 塞尔维亚语分词器 - + spaCy - Sinhala word tokenizer spaCy - 僧伽罗语分词器 - + spaCy - Slovak word tokenizer spaCy - 斯洛伐克语分词器 - + spaCy - Slovenian word tokenizer spaCy - 斯洛文尼亚语分词器 - + spaCy - Sorbian (Lower) word tokenizer spaCy - 索布语(下)分词器 - + spaCy - Sorbian (Upper) word tokenizer spaCy - 索布语(上)分词器 - + spaCy - Spanish word tokenizer spaCy - 西班牙语分词器 - + spaCy - Swedish word tokenizer spaCy - 瑞典语分词器 - + spaCy - Tagalog word tokenizer spaCy - 他加禄语分词器 - + spaCy - Tamil word tokenizer spaCy - 泰米尔语分词器 - + spaCy - Tatar word tokenizer spaCy - 鞑靼语分词器 - + spaCy - Telugu word tokenizer spaCy - 泰卢固语分词器 - + spaCy - Tigrinya word tokenizer spaCy - 提格雷尼亚语分词器 - + spaCy - Tswana word tokenizer spaCy - 茨瓦纳语分词器 - + spaCy - Turkish word tokenizer spaCy - 土耳其语分词器 - + spaCy - Ukrainian word tokenizer spaCy - 乌克兰语分词器 - + spaCy - Urdu word tokenizer spaCy - 乌尔都语分词器 - + spaCy - Yoruba word tokenizer spaCy - 约鲁巴语分词器 - + SudachiPy - Japanese word tokenizer (split mode A) SudachiPy - 日语分词器(切分模式 A) - + SudachiPy - Japanese word tokenizer (split mode B) SudachiPy - 日语分词器(切分模式 B) - + SudachiPy - Japanese word tokenizer (split mode C) SudachiPy - 日语分词器(切分模式 C) - + Underthesea - Vietnamese word tokenizer Underthesea - 越南语分词器 - + Wordless - Chinese character tokenizer Wordless - 汉语分字器 - + Wordless - Japanese kanji tokenizer Wordless - 日语分字器 - + NLTK - Legality syllable tokenizer NLTK - 合法性分音节器 - + NLTK - Sonority sequencing syllable tokenizer NLTK - 响度顺序分音节器 - + Pyphen - Afrikaans syllable tokenizer Pyphen - 南非语分音节器 - + Pyphen - Albanian syllable tokenizer Pyphen - 阿尔巴尼亚语分音节器 - + Pyphen - Belarusian syllable tokenizer Pyphen - 白俄罗斯语分音节器 - + Pyphen - Bulgarian syllable tokenizer Pyphen - 保加利亚语分音节器 - + Pyphen - Catalan syllable tokenizer Pyphen - 加泰罗尼亚语分音节器 - + Pyphen - Croatian syllable tokenizer Pyphen - 克罗地亚语分音节器 - + Pyphen - Czech syllable tokenizer Pyphen - 捷克语分音节器 - + Pyphen - Danish syllable tokenizer Pyphen - 丹麦语分音节器 - + Pyphen - Dutch syllable tokenizer Pyphen - 荷兰语分音节器 - + Pyphen - English (United Kingdom) syllable tokenizer Pyphen - 英语(英国)分音节器 - + Pyphen - English (United States) syllable tokenizer Pyphen - 英语(美国)分音节器 - + Pyphen - Esperanto syllable tokenizer Pyphen - 世界语分音节器 - + Pyphen - Estonian syllable tokenizer Pyphen - 爱沙尼亚语分音节器 - + Pyphen - French syllable tokenizer Pyphen - 法语分音节器 - + Pyphen - Galician syllable tokenizer Pyphen - 加里西亚语分音节器 - + Pyphen - German (Austria) syllable tokenizer Pyphen - 德语(奥地利)分音节器 - + Pyphen - German (Germany) syllable tokenizer Pyphen - 德语(德国)分音节器 - + Pyphen - German (Switzerland) syllable tokenizer Pyphen - 德语(瑞士)分音节器 - + Pyphen - Greek (Modern) syllable tokenizer Pyphen - 希腊语(现代)分音节器 - + Pyphen - Hungarian syllable tokenizer Pyphen - 匈牙利语分音节器 - + Pyphen - Icelandic syllable tokenizer Pyphen - 冰岛语分音节器 - + Pyphen - Indonesian syllable tokenizer Pyphen - 印度尼西亚语分音节器 - + Pyphen - Italian syllable tokenizer Pyphen - 意大利语分音节器 - + Pyphen - Lithuanian syllable tokenizer Pyphen - 立陶宛语分音节器 - + Pyphen - Latvian syllable tokenizer Pyphen - 拉脱维亚语分音节器 - + Pyphen - Mongolian syllable tokenizer Pyphen - 蒙古语分音节器 - + Pyphen - Norwegian Bokmål syllable tokenizer Pyphen - 书面挪威语分音节器 - + Pyphen - Norwegian Nynorsk syllable tokenizer Pyphen - 新挪威语分音节器 - + Pyphen - Polish syllable tokenizer Pyphen - 波兰语分音节器 - + Pyphen - Portuguese (Brazil) syllable tokenizer Pyphen - 葡萄牙语(巴西)分音节器 - + Pyphen - Portuguese (Portugal) syllable tokenizer Pyphen - 葡萄牙语(葡萄牙)分音节器 - + Pyphen - Romanian syllable tokenizer Pyphen - 罗马尼亚语分音节器 - + Pyphen - Russian syllable tokenizer Pyphen - 俄语分音节器 - + Pyphen - Serbian (Cyrillic) syllable tokenizer Pyphen - 塞尔维亚语(西里尔)分音节器 - + Pyphen - Serbian (Latin) syllable tokenizer Pyphen - 塞尔维亚语(拉丁)分音节器 - + Pyphen - Slovak syllable tokenizer Pyphen - 斯洛伐克语分音节器 - + Pyphen - Slovenian syllable tokenizer Pyphen - 斯洛文尼亚语分音节器 - + Pyphen - Spanish syllable tokenizer Pyphen - 西班牙语分音节器 - + Pyphen - Swedish syllable tokenizer Pyphen - 瑞典语分音节器 - + Pyphen - Telugu syllable tokenizer Pyphen - 泰卢固语分音节器 - + Pyphen - Thai syllable tokenizer Pyphen - 泰语分音节器 - + Pyphen - Ukrainian syllable tokenizer Pyphen - 乌克兰语分音节器 - + Pyphen - Zulu syllable tokenizer Pyphen - 祖鲁语分音节器 - + PyThaiNLP - Thai syllable tokenizer PyThaiNLP - 泰语分音节器 - + botok - Tibetan part-of-speech tagger botok - 藏语词性标注器 - + jieba - Chinese part-of-speech tagger jieba - 汉语词性标注器 - + NLTK - English perceptron part-of-speech tagger NLTK - 英语感知机词性标注器 - + NLTK - Russian perceptron part-of-speech tagger NLTK - 俄语感知机词性标注器 - + pymorphy3 - Morphological analyzer pymorphy3 - 形态分析器 - + PyThaiNLP - Perceptron part-of-speech tagger (ORCHID) PyThaiNLP - 感知机词性标注器(ORCHID) - + PyThaiNLP - Perceptron part-of-speech tagger (PUD) PyThaiNLP - 感知机词性标注器(PUD) - + spaCy - Catalan part-of-speech tagger spaCy - 加泰罗尼亚语词性标注器 - + spaCy - Chinese part-of-speech tagger spaCy - 汉语词性标注器 - + spaCy - Croatian part-of-speech tagger spaCy - 克罗地亚语词性标注器 - + spaCy - Danish part-of-speech tagger spaCy - 丹麦语词性标注器 - + spaCy - Dutch part-of-speech tagger spaCy - 荷兰语词性标注器 - + spaCy - English part-of-speech tagger spaCy - 英语词性标注器 - + spaCy - Finnish part-of-speech tagger spaCy - 芬兰语词性标注器 - + spaCy - French part-of-speech tagger spaCy - 法语词性标注器 - + spaCy - German part-of-speech tagger spaCy - 德语词性标注器 - + spaCy - Greek (Modern) part-of-speech tagger spaCy - 希腊语(现代)词性标注器 - + spaCy - Italian part-of-speech tagger spaCy - 意大利语词性标注器 - + spaCy - Japanese part-of-speech tagger spaCy - 日语词性标注器 - + spaCy - Lithuanian part-of-speech tagger spaCy - 立陶宛语词性标注器 - + spaCy - Macedonian part-of-speech tagger spaCy - 马其顿语词性标注器 - + spaCy - Norwegian Bokmål part-of-speech tagger spaCy - 书面挪威语词性标注器 - + spaCy - Polish part-of-speech tagger spaCy - 波兰语词性标注器 - + spaCy - Portuguese part-of-speech tagger spaCy - 葡萄牙语词性标注器 - + spaCy - Romanian part-of-speech tagger spaCy - 罗马尼亚语词性标注器 - + spaCy - Russian part-of-speech tagger spaCy - 俄语词性标注器 - + spaCy - Spanish part-of-speech tagger spaCy - 西班牙语词性标注器 - + spaCy - Swedish part-of-speech tagger spaCy - 瑞典语词性标注器 - + spaCy - Ukrainian part-of-speech tagger spaCy - 乌克兰语词性标注器 - + SudachiPy - Japanese part-of-speech tagger SudachiPy - 日语词性标注器 - + Underthesea - Vietnamese part-of-speech tagger Underthesea - 越南语词性标注器 - + botok - Tibetan lemmatizer botok - 藏语词形还原器 - + NLTK - WordNet lemmatizer NLTK - WordNet 词形还原器 - + simplemma - Albanian lemmatizer simplemma - 阿尔巴尼亚语词形还原器 - + simplemma - Armenian lemmatizer - simplemma - 亚美尼亚语词形还原器 + simplemma - Armenian 词形还原器 - + simplemma - Asturian lemmatizer simplemma - 阿斯图里亚斯语词形还原器 - + simplemma - Bulgarian lemmatizer simplemma - 保加利亚语词形还原器 - + simplemma - Catalan lemmatizer simplemma - 加泰罗尼亚语词形还原器 - + simplemma - Czech lemmatizer simplemma - 捷克语词形还原器 - + simplemma - Danish lemmatizer simplemma - 丹麦语词形还原器 - + simplemma - Dutch lemmatizer simplemma - 荷兰语词形还原器 - + simplemma - English lemmatizer simplemma - 英语词形还原器 - + simplemma - English (Middle) lemmatizer simplemma - 英语(中古)词形还原器 - + simplemma - Estonian lemmatizer simplemma - 爱沙尼亚语词形还原器 - + simplemma - Finnish lemmatizer simplemma - 芬兰语词形还原器 - + simplemma - French lemmatizer simplemma - 法语词形还原器 - + simplemma - Galician lemmatizer simplemma - 加里西亚语词形还原器 - + simplemma - Georgian lemmatizer simplemma - 格鲁吉亚语词形还原器 - + simplemma - German lemmatizer simplemma - 德语词形还原器 - + simplemma - Greek (Modern) lemmatizer simplemma - 希腊语(现代)词形还原器 - + simplemma - Hindi lemmatizer simplemma - 印地语词形还原器 - + simplemma - Hungarian lemmatizer simplemma - 匈牙利语词形还原器 - + simplemma - Icelandic lemmatizer simplemma - 冰岛语词形还原器 - + simplemma - Indonesian lemmatizer simplemma - 印度尼西亚语词形还原器 - + simplemma - Irish lemmatizer simplemma - 爱尔兰语词形还原器 - + simplemma - Italian lemmatizer simplemma - 意大利语词形还原器 - + simplemma - Latin lemmatizer simplemma - 拉丁语词形还原器 - + simplemma - Latvian lemmatizer simplemma - 拉脱维亚语词形还原器 - + simplemma - Lithuanian lemmatizer simplemma - 立陶宛语词形还原器 - + simplemma - Luxembourgish lemmatizer simplemma - 卢森堡语词形还原器 - + simplemma - Macedonian lemmatizer simplemma - 马其顿语词形还原器 - + simplemma - Malay lemmatizer simplemma - 马来语词形还原器 - + simplemma - Manx lemmatizer simplemma - 马恩语词形还原器 - + simplemma - Norwegian Bokmål lemmatizer simplemma - 书面挪威语词形还原器 - + simplemma - Norwegian Nynorsk lemmatizer simplemma - 新挪威语词形还原器 - + simplemma - Persian lemmatizer simplemma - 波斯语词形还原器 - + simplemma - Polish lemmatizer simplemma - 波兰语词形还原器 - + simplemma - Portuguese lemmatizer simplemma - 葡萄牙语词形还原器 - + simplemma - Romanian lemmatizer simplemma - 罗马尼亚语词形还原器 - + simplemma - Russian lemmatizer simplemma - 俄语词形还原器 - + simplemma - Sámi (Northern) lemmatizer simplemma - 萨米语(北)词形还原器 - + simplemma - Scottish Gaelic lemmatizer simplemma - 苏格兰盖尔语词形还原器 - + simplemma - Serbo-Croatian lemmatizer simplemma - 塞尔维亚-克罗地亚语词形还原器 - + simplemma - Slovak lemmatizer simplemma - 斯洛伐克语词形还原器 - + simplemma - Slovenian lemmatizer simplemma - 斯洛文尼亚语词形还原器 - + simplemma - Spanish lemmatizer simplemma - 西班牙语词形还原器 - + simplemma - Swahili lemmatizer simplemma - 斯瓦西里语词形还原器 - + simplemma - Swedish lemmatizer simplemma - 瑞典语词形还原器 - + simplemma - Tagalog lemmatizer simplemma - 他加禄语词形还原器 - + simplemma - Turkish lemmatizer simplemma - 土耳其语词形还原器 - + simplemma - Ukrainian lemmatizer simplemma - 乌克兰语词形还原器 - + simplemma - Welsh lemmatizer simplemma - 威尔士语词形还原器 - + spaCy - Bengali lemmatizer spaCy - 孟加拉语词形还原器 - + spaCy - Catalan lemmatizer spaCy - 加泰罗尼亚语词形还原器 - + spaCy - Croatian lemmatizer spaCy - 克罗地亚语词形还原器 - + spaCy - Czech lemmatizer spaCy - 捷克语词形还原器 - + spaCy - Danish lemmatizer spaCy - 丹麦语词形还原器 - + spaCy - Dutch lemmatizer spaCy - 荷兰语词形还原器 - + spaCy - English lemmatizer spaCy - 英语词形还原器 - + spaCy - Finnish lemmatizer spaCy - 芬兰语词形还原器 - + spaCy - French lemmatizer spaCy - 法语词形还原器 - + spaCy - German lemmatizer spaCy - 德语词形还原器 - + spaCy - Greek (Ancient) lemmatizer spaCy - 希腊语(古)词形还原器 - + spaCy - Greek (Modern) lemmatizer spaCy - 希腊语(现代)词形还原器 - + spaCy - Hungarian lemmatizer spaCy - 匈牙利语词形还原器 - + spaCy - Indonesian lemmatizer spaCy - 印度尼西亚语词形还原器 - + spaCy - Irish lemmatizer spaCy - 爱尔兰语词形还原器 - + spaCy - Italian lemmatizer spaCy - 意大利语词形还原器 - + spaCy - Japanese lemmatizer spaCy - 日语词形还原器 - + spaCy - Lithuanian lemmatizer spaCy - 立陶宛语词形还原器 - + spaCy - Luxembourgish lemmatizer spaCy - 卢森堡语词形还原器 - + spaCy - Macedonian lemmatizer spaCy - 马其顿语词形还原器 - + spaCy - Norwegian Bokmål lemmatizer spaCy - 书面挪威语词形还原器 - + spaCy - Persian lemmatizer spaCy - 波斯语词形还原器 - + spaCy - Polish lemmatizer spaCy - 波兰语词形还原器 - + spaCy - Portuguese lemmatizer spaCy - 葡萄牙语词形还原器 - + spaCy - Romanian lemmatizer spaCy - 罗马尼亚语词形还原器 - + spaCy - Russian lemmatizer spaCy - 俄语词形还原器 - + spaCy - Serbian lemmatizer spaCy - 塞尔维亚语词形还原器 - + spaCy - Spanish lemmatizer spaCy - 西班牙语词形还原器 - + spaCy - Swedish lemmatizer spaCy - 瑞典语词形还原器 - + spaCy - Tagalog lemmatizer spaCy - 他加禄语词形还原器 - + spaCy - Turkish lemmatizer spaCy - 土耳其语词形还原器 - + spaCy - Ukrainian lemmatizer spaCy - 乌克兰语词形还原器 - + spaCy - Urdu lemmatizer spaCy - 乌尔都语词形还原器 - + SudachiPy - Japanese lemmatizer SudachiPy - 日语词形还原器 - + NLTK - Arabic stop word list NLTK - 阿拉伯语停用词表 - + NLTK - Azerbaijani stop word list NLTK - 阿塞拜疆语停用词表 - + NLTK - Basque stop word list NLTK - 巴斯克语停用词表 - + NLTK - Bengali stop word list NLTK - 孟加拉语停用词表 - + NLTK - Catalan stop word list NLTK - 加泰罗尼亚语停用词表 - + NLTK - Chinese (Simplified) stop word list NLTK - 汉语(简体)停用词表 - + NLTK - Chinese (Traditional) stop word list NLTK - 汉语(繁体)停用词表 - + NLTK - Danish stop word list NLTK - 丹麦语停用词表 - + NLTK - Dutch stop word list NLTK - 荷兰语停用词表 - + NLTK - English stop word list NLTK - 英语停用词表 - + NLTK - Finnish stop word list NLTK - 芬兰语停用词表 - + NLTK - French stop word list NLTK - 法语停用词表 - + NLTK - German stop word list NLTK - 德语停用词表 - + NLTK - Greek (Modern) stop word list NLTK - 希腊语(现代)停用词表 - - NLTK - Hebrew stop word list - NLTK - 希伯来语停用词表 - - - + NLTK - Hungarian stop word list NLTK - 匈牙利语停用词表 - + NLTK - Indonesian stop word list NLTK - 印度尼西亚语停用词表 - + NLTK - Italian stop word list NLTK - 意大利语停用词表 - - NLTK - Kazakh stop word list - NLTK - 哈萨克语停用词表 + + NLTK - Kazakh stop word list + NLTK - 哈萨克语停用词表 + + + + NLTK - Nepali stop word list + NLTK - 尼泊尔语停用词表 + + + + NLTK - Norwegian stop word list + NLTK - 挪威语停用词表 + + + + NLTK - Portuguese stop word list + NLTK - 葡萄牙语停用词表 + + + + NLTK - Romanian stop word list + NLTK - 罗马尼亚语停用词表 + + + + NLTK - Russian stop word list + NLTK - 俄语停用词表 + + + + NLTK - Slovenian stop word list + NLTK - 斯洛文尼亚语停用词表 + + + + NLTK - Spanish stop word list + NLTK - 西班牙语停用词表 + + + + NLTK - Swedish stop word list + NLTK - 瑞典语停用词表 + + + + NLTK - Tajik stop word list + NLTK - 塔吉克语停用词表 + + + + NLTK - Turkish stop word list + NLTK - 土耳其语停用词表 + + + + PyThaiNLP - Thai stop word list + PyThaiNLP - 泰语停用词表 + + + + Custom stop word list + 自定义停用词表 + + + + spaCy - Catalan dependency parser + spaCy - 加泰罗尼亚语依存分析器 + + + + spaCy - Chinese dependency parser + spaCy - 汉语依存分析器 + + + + spaCy - Croatian dependency parser + spaCy - 克罗地亚语依存分析器 + + + + spaCy - Danish dependency parser + spaCy - 丹麦语依存分析器 + + + + spaCy - Dutch dependency parser + spaCy - 荷兰语依存分析器 + + + + spaCy - English dependency parser + spaCy - 英语依存分析器 + + + + spaCy - Finnish dependency parser + spaCy - 芬兰语依存分析器 + + + + spaCy - French dependency parser + spaCy - 法语依存分析器 + + + + spaCy - German dependency parser + spaCy - 德语依存分析器 + + + + spaCy - Greek (Modern) dependency parser + spaCy - 希腊语(现代)依存分析器 + + + + spaCy - Italian dependency parser + spaCy - 意大利语依存分析器 + + + + spaCy - Japanese dependency parser + spaCy - 日语依存分析器 + + + + spaCy - Lithuanian dependency parser + spaCy - 立陶宛语依存分析器 + + + + spaCy - Macedonian dependency parser + spaCy - 马其顿语依存分析器 + + + + spaCy - Norwegian Bokmål dependency parser + spaCy - 书面挪威语依存分析器 + + + + spaCy - Polish dependency parser + spaCy - 波兰语依存分析器 + + + + spaCy - Portuguese dependency parser + spaCy - 葡萄牙语依存分析器 + + + + spaCy - Romanian dependency parser + spaCy - 罗马尼亚语依存分析器 + + + + spaCy - Russian dependency parser + spaCy - 俄语依存分析器 + + + + spaCy - Spanish dependency parser + spaCy - 西班牙语依存分析器 + + + + spaCy - Swedish dependency parser + spaCy - 瑞典语依存分析器 + + + + spaCy - Ukrainian dependency parser + spaCy - 乌克兰语依存分析器 + + + + Average logarithmic distance + + + + + Average reduced frequency + + + + + Average waiting time + + + + + Carroll's Uₘ + + + + + Fisher's exact test + 费希尔精确检验 + + + + Log-likelihood ratio test + 对数似然比检验 + + + + Pearson's chi-squared test + 皮尔森卡方检验 + + + + Cubic association ratio + + + + + Dice's coefficient + Dice 系数 + + + + Difference coefficient + + + + + Jaccard index + 雅卡尔指数 + + + + Log-frequency biased MD + + + + + Kilgarriff's ratio + + + + + Log ratio + + + + + Minimum sensitivity + + + + + Mutual dependency + + + + + Mutual expectation + + + + + Mutual information + 互信息 + + + + Odds ratio + 比值比 + + + + Pointwise mutual information + 点互信息 + + + + Poisson collocation measure + + + + + Squared phi coefficient + Phi 系数的平方 + + + + Microsoft Paint files (*.msp) + Microsoft Paint 文件 (*.msp) + + + + Khmer + 柬埔寨语 + + + + khmer-nltk - Khmer sentence tokenizer + khmer-nltk - 柬埔寨语分句器 + + + + spaCy - Korean dependency parser + spaCy - 韩语依存分析器 + + + + spaCy - Slovenian dependency parser + spaCy - 斯洛文尼亚语依存分析器 + + + + spaCy - Korean sentence recognizer + spaCy - 韩语句子识别器 + + + + khmer-nltk - Khmer word tokenizer + khmer-nltk - 柬埔寨语分词器 + + + + spaCy - Korean word tokenizer + spaCy - 韩语分词器 + + + + spaCy - Malay word tokenizer + spaCy - 马来语分词器 + + + + khmer-nltk - Khmer part-of-speech tagger + khmer-nltk - 柬埔寨语词性标注器 + + + + PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) + PyThaiNLP - 感知机词性标注器(Blackboard) + + + + spaCy - Korean part-of-speech tagger + spaCy - 韩语词性标注器 + + + + spaCy - Slovenian part-of-speech tagger + spaCy - 斯洛文尼亚语词性标注器 + + + + spaCy - Korean lemmatizer + spaCy - 韩语词形还原器 + + + + spaCy - Slovenian lemmatizer + spaCy - 斯洛文尼亚语词形还原器 + + + + Dostoevsky - Russian sentiment analyzer + Dostoevsky - 俄语情感分析器 + + + + Underthesea - Vietnamese sentiment analyzer + Underthesea - 越南语情感分析器 + + + + Armenian (Eastern) + 亚美尼亚语(东) + + + + Armenian (Western) + 亚美尼亚语(西) + + + + Buryat (Russia) + 布里亚特语(俄罗斯) + + + + Chinese (Classical) + 汉语(文言) + + + + Church Slavonic (Old) + 教会斯拉夫语(古) + + + + Coptic + 科普特语 + + + + Erzya + 埃尔齐亚语 + + + + Faroese + 法罗语 + + + + French (Old) + 法语(古) + + + + Gothic + 哥特语 + + + + Hebrew (Ancient) + 希伯来语(古) + + + + Hebrew (Modern) + 希伯来语(现代) + + + + Kurdish (Kurmanji) + 库尔德语(库尔曼吉语) + + + + Lao + 老挝语 + + + + Maltese + 马耳他语 + + + + Nigerian Pidgin + 尼日利亚皮钦语 + + + + Pomak + 波马克语 + + + + Russian (Old) + 俄语(古) + + + + Sindhi + 信德语 + + + + Uyghur + 维吾尔语 + + + + Wolof + 沃洛夫语 + + + + LaoNLP - Lao sentence tokenizer + LaoNLP - 老挝语分句器 + + + + Stanza - Afrikaans sentence tokenizer + Stanza - 南非语分句器 + + + + Stanza - Arabic sentence tokenizer + Stanza - 阿拉伯语分句器 + + + + Stanza - Armenian (Eastern) sentence tokenizer + Stanza - 亚美尼亚语(东)分句器 + + + + Stanza - Armenian (Western) sentence tokenizer + Stanza - 亚美尼亚语(西)分句器 + + + + Stanza - Basque sentence tokenizer + Stanza - 巴斯克语分句器 + + + + Stanza - Belarusian sentence tokenizer + Stanza - 白俄罗斯语分句器 + + + + Stanza - Bulgarian sentence tokenizer + Stanza - 保加利亚语分句器 + + + + Stanza - Burmese sentence tokenizer + Stanza - 缅甸语分句器 + + + + Stanza - Buryat (Russia) sentence tokenizer + Stanza - 布里亚特语(俄罗斯)分句器 + + + + Stanza - Catalan sentence tokenizer + Stanza - 加泰罗尼亚语分句器 + + + + Stanza - Chinese (Classical) sentence tokenizer + Stanza - 汉语(文言)分句器 + + + + Stanza - Chinese (Simplified) sentence tokenizer + Stanza - 汉语(简体)分句器 + + + + Stanza - Chinese (Traditional) sentence tokenizer + Stanza - 汉语(繁体)分句器 + + + + Stanza - Church Slavonic (Old) sentence tokenizer + Stanza - 教会斯拉夫语(古)分句器 + + + + Stanza - Coptic sentence tokenizer + Stanza - 科普特语分句器 + + + + Stanza - Croatian sentence tokenizer + Stanza - 克罗地亚语分句器 + + + + Stanza - Czech sentence tokenizer + Stanza - 捷克语分句器 + + + + Stanza - Danish sentence tokenizer + Stanza - 丹麦语分句器 + + + + Stanza - Dutch sentence tokenizer + Stanza - 荷兰语分句器 + + + + Stanza - English sentence tokenizer + Stanza - 英语分句器 + + + + Stanza - Erzya sentence tokenizer + Stanza - 埃尔齐亚语分句器 + + + + Stanza - Estonian sentence tokenizer + Stanza - 爱沙尼亚语分句器 + + + + Stanza - Faroese sentence tokenizer + Stanza - 法罗语分句器 + + + + Stanza - Finnish sentence tokenizer + Stanza - 芬兰语分句器 + + + + Stanza - French sentence tokenizer + Stanza - 法语分句器 + + + + Stanza - French (Old) sentence tokenizer + Stanza - 法语(古)分句器 + + + + Stanza - Galician sentence tokenizer + Stanza - 加里西亚语分句器 + + + + Stanza - German sentence tokenizer + Stanza - 德语分句器 + + + + Stanza - Gothic sentence tokenizer + Stanza - 哥特语分句器 + + + + Stanza - Greek (Ancient) sentence tokenizer + Stanza - 希腊语(古)分句器 + + + + Stanza - Greek (Modern) sentence tokenizer + Stanza - 希腊语(现代)分句器 + + + + Stanza - Hebrew (Ancient) sentence tokenizer + Stanza - 希伯来语(古)分句器 + + + + Stanza - Hebrew (Modern) sentence tokenizer + Stanza - 希伯来语(现代)分句器 + + + + Stanza - Hindi sentence tokenizer + Stanza - 印地语分句器 + + + + Stanza - Hungarian sentence tokenizer + Stanza - 匈牙利语分句器 + + + + Stanza - Icelandic sentence tokenizer + Stanza - 冰岛语分句器 + + + + Stanza - Indonesian sentence tokenizer + Stanza - 印度尼西亚语分句器 + + + + Stanza - Irish sentence tokenizer + Stanza - 爱尔兰语分句器 + + + + Stanza - Italian sentence tokenizer + Stanza - 意大利语分句器 + + + + Stanza - Japanese sentence tokenizer + Stanza - 日语分句器 + + + + Stanza - Kazakh sentence tokenizer + Stanza - 哈萨克语分句器 + + + + Stanza - Korean sentence tokenizer + Stanza - 韩语分句器 + + + + Stanza - Kurdish (Kurmanji) sentence tokenizer + Stanza - 库尔德语(库尔曼吉语)分句器 + + + + Stanza - Kyrgyz sentence tokenizer + Stanza - 吉尔吉斯语分句器 + + + + Stanza - Latin sentence tokenizer + Stanza - 拉丁语分句器 + + + + Stanza - Latvian sentence tokenizer + Stanza - 拉脱维亚语分句器 + + + + Stanza - Ligurian sentence tokenizer + Stanza - 利古里亚语分句器 + + + + Stanza - Lithuanian sentence tokenizer + Stanza - 立陶宛语分句器 + + + + Stanza - Maltese sentence tokenizer + Stanza - 马耳他语分句器 + + + + Stanza - Manx sentence tokenizer + Stanza - 马恩语分句器 + + + + Stanza - Marathi sentence tokenizer + Stanza - 马拉地语分句器 + + + + Stanza - Nigerian Pidgin sentence tokenizer + Stanza - 尼日利亚皮钦语分句器 + + + + Stanza - Norwegian Bokmål sentence tokenizer + Stanza - 书面挪威语分句器 + + + + Stanza - Norwegian Nynorsk sentence tokenizer + Stanza - 新挪威语分句器 + + + + Stanza - Persian sentence tokenizer + Stanza - 波斯语分句器 + + + + Stanza - Polish sentence tokenizer + Stanza - 波兰语分句器 + + + + Stanza - Pomak sentence tokenizer + Stanza - 波马克语分句器 + + + + Stanza - Portuguese sentence tokenizer + Stanza - 葡萄牙语分句器 + + + + Stanza - Romanian sentence tokenizer + Stanza - 罗马尼亚语分句器 + + + + Stanza - Russian sentence tokenizer + Stanza - 俄语分句器 + + + + Stanza - Russian (Old) sentence tokenizer + Stanza - 俄语(古)分句器 + + + + Stanza - Sámi (Northern) sentence tokenizer + Stanza - 萨米语(北)分句器 + + + + Stanza - Sanskrit sentence tokenizer + Stanza - 梵语分句器 + + + + Stanza - Scottish Gaelic sentence tokenizer + Stanza - 苏格兰盖尔语分句器 + + + + Stanza - Serbian (Latin) sentence tokenizer + Stanza - 塞尔维亚语(拉丁)分句器 + + + + Stanza - Sindhi sentence tokenizer + Stanza - 信德语分句器 + + + + Stanza - Slovak sentence tokenizer + Stanza - 斯洛伐克语分句器 + + + + Stanza - Slovenian sentence tokenizer + Stanza - 斯洛文尼亚语分句器 + + + + Stanza - Sorbian (Upper) sentence tokenizer + Stanza - 索布语(上)分句器 + + + + Stanza - Spanish sentence tokenizer + Stanza - 西班牙语分句器 + + + + Stanza - Swedish sentence tokenizer + Stanza - 瑞典语分句器 + + + + Stanza - Tamil sentence tokenizer + Stanza - 泰米尔语分句器 + + + + Stanza - Telugu sentence tokenizer + Stanza - 泰卢固语分句器 + + + + Stanza - Thai sentence tokenizer + Stanza - 泰语分句器 + + + + Stanza - Turkish sentence tokenizer + Stanza - 土耳其语分句器 + + + + Stanza - Ukrainian sentence tokenizer + Stanza - 乌克兰语分句器 + + + + Stanza - Urdu sentence tokenizer + Stanza - 乌尔都语分句器 + + + + Stanza - Uyghur sentence tokenizer + Stanza - 维吾尔语分句器 + + + + Stanza - Vietnamese sentence tokenizer + Stanza - 越南语分句器 + + + + Stanza - Welsh sentence tokenizer + Stanza - 威尔士语分句器 + + + + Stanza - Wolof sentence tokenizer + Stanza - 沃洛夫语分句器 + + + + LaoNLP - Lao word tokenizer + LaoNLP - 老挝语分词器 + + + + spaCy - Hebrew (Modern) word tokenizer + spaCy - 希伯来语(现代)分词器 + + + + spaCy - Norwegian Bokmål word tokenizer + spaCy - 书面挪威语分词器 + + + + Stanza - Afrikaans word tokenizer + Stanza - 南非语分词器 + + + + Stanza - Arabic word tokenizer + Stanza - 阿拉伯语分词器 + + + + Stanza - Armenian (Eastern) word tokenizer + Stanza - 亚美尼亚语(东)分词器 + + + + Stanza - Armenian (Western) word tokenizer + Stanza - 亚美尼亚语(西)分词器 + + + + Stanza - Basque word tokenizer + Stanza - 巴斯克语分词器 + + + + Stanza - Belarusian word tokenizer + Stanza - 白俄罗斯语分词器 + + + + Stanza - Bulgarian word tokenizer + Stanza - 保加利亚语分词器 + + + + Stanza - Burmese word tokenizer + Stanza - 缅甸语分词器 + + + + Stanza - Buryat (Russia) word tokenizer + Stanza - 布里亚特语(俄罗斯)分词器 + + + + Stanza - Catalan word tokenizer + Stanza - 加泰罗尼亚语分词器 + + + + Stanza - Chinese (Classical) word tokenizer + Stanza - 汉语(文言)分词器 + + + + Stanza - Chinese (Simplified) word tokenizer + Stanza - 汉语(简体)分词器 + + + + Stanza - Chinese (Traditional) word tokenizer + Stanza - 汉语(繁体)分词器 + + + + Stanza - Church Slavonic (Old) word tokenizer + Stanza - 教会斯拉夫语(古)分词器 + + + + Stanza - Coptic word tokenizer + Stanza - 科普特语分词器 + + + + Stanza - Croatian word tokenizer + Stanza - 克罗地亚语分词器 + + + + Stanza - Czech word tokenizer + Stanza - 捷克语分词器 + + + + Stanza - Danish word tokenizer + Stanza - 丹麦语分词器 + + + + Stanza - Dutch word tokenizer + Stanza - 荷兰语分词器 + + + + Stanza - English word tokenizer + Stanza - 英语分词器 + + + + Stanza - Erzya word tokenizer + Stanza - 埃尔齐亚语分词器 + + + + Stanza - Estonian word tokenizer + Stanza - 爱沙尼亚语分词器 + + + + Stanza - Faroese word tokenizer + Stanza - 法罗语分词器 + + + + Stanza - Finnish word tokenizer + Stanza - 芬兰语分词器 + + + + Stanza - French word tokenizer + Stanza - 法语分词器 + + + + Stanza - French (Old) word tokenizer + Stanza - 法语(古)分词器 + + + + Stanza - Galician word tokenizer + Stanza - 加里西亚语分词器 + + + + Stanza - German word tokenizer + Stanza - 德语分词器 + + + + Stanza - Gothic word tokenizer + Stanza - 哥特语分词器 + + + + Stanza - Greek (Ancient) word tokenizer + Stanza - 希腊语(古)分词器 + + + + Stanza - Greek (Modern) word tokenizer + Stanza - 希腊语(现代)分词器 + + + + Stanza - Hebrew (Ancient) word tokenizer + Stanza - 希伯来语(古)分词器 + + + + Stanza - Hebrew (Modern) word tokenizer + Stanza - 希伯来语(现代)分词器 + + + + Stanza - Hindi word tokenizer + Stanza - 印地语分词器 + + + + Stanza - Hungarian word tokenizer + Stanza - 匈牙利语分词器 + + + + Stanza - Icelandic word tokenizer + Stanza - 冰岛语分词器 + + + + Stanza - Indonesian word tokenizer + Stanza - 印度尼西亚语分词器 + + + + Stanza - Irish word tokenizer + Stanza - 爱尔兰语分词器 + + + + Stanza - Italian word tokenizer + Stanza - 意大利语分词器 + + + + Stanza - Japanese word tokenizer + Stanza - 日语分词器 + + + + Stanza - Kazakh word tokenizer + Stanza - 哈萨克语分词器 + + + + Stanza - Korean word tokenizer + Stanza - 韩语分词器 + + + + Stanza - Kurdish (Kurmanji) word tokenizer + Stanza - 库尔德语(库尔曼吉语)分词器 + + + + Stanza - Kyrgyz word tokenizer + Stanza - 吉尔吉斯语分词器 + + + + Stanza - Latin word tokenizer + Stanza - 拉丁语分词器 + + + + Stanza - Latvian word tokenizer + Stanza - 拉脱维亚语分词器 + + + + Stanza - Ligurian word tokenizer + Stanza - 利古里亚语分词器 + + + + Stanza - Lithuanian word tokenizer + Stanza - 立陶宛语分词器 + + + + Stanza - Maltese word tokenizer + Stanza - 马耳他语分词器 + + + + Stanza - Manx word tokenizer + Stanza - 马恩语分词器 + + + + Stanza - Marathi word tokenizer + Stanza - 马拉地语分词器 + + + + Stanza - Nigerian Pidgin word tokenizer + Stanza - 尼日利亚皮钦语分词器 + + + + Stanza - Norwegian Bokmål word tokenizer + Stanza - 书面挪威语分词器 + + + + Stanza - Norwegian Nynorsk word tokenizer + Stanza - 新挪威语分词器 + + + + Stanza - Persian word tokenizer + Stanza - 波斯语分词器 + + + + Stanza - Polish word tokenizer + Stanza - 波兰语分词器 + + + + Stanza - Pomak word tokenizer + Stanza - 波马克语分词器 + + + + Stanza - Portuguese word tokenizer + Stanza - 葡萄牙语分词器 + + + + Stanza - Romanian word tokenizer + Stanza - 罗马尼亚语分词器 + + + + Stanza - Russian word tokenizer + Stanza - 俄语分词器 + + + + Stanza - Russian (Old) word tokenizer + Stanza - 俄语(古)分词器 + + + + Stanza - Sámi (Northern) word tokenizer + Stanza - 萨米语(北)分词器 + + + + Stanza - Sanskrit word tokenizer + Stanza - 梵语分词器 + + + + Stanza - Scottish Gaelic word tokenizer + Stanza - 苏格兰盖尔语分词器 + + + + Stanza - Serbian (Latin) word tokenizer + Stanza - 塞尔维亚语(拉丁)分词器 + + + + Stanza - Sindhi word tokenizer + Stanza - 信德语分词器 + + + + Stanza - Slovak word tokenizer + Stanza - 斯洛伐克语分词器 + + + + Stanza - Slovenian word tokenizer + Stanza - 斯洛文尼亚语分词器 + + + + Stanza - Sorbian (Upper) word tokenizer + Stanza - 索布语(上)分词器 + + + + Stanza - Spanish word tokenizer + Stanza - 西班牙语分词器 + + + + Stanza - Swedish word tokenizer + Stanza - 瑞典语分词器 + + + + Stanza - Tamil word tokenizer + Stanza - 泰米尔语分词器 + + + + Stanza - Telugu word tokenizer + Stanza - 泰卢固语分词器 + + + + Stanza - Thai word tokenizer + Stanza - 泰语分词器 + + + + Stanza - Turkish word tokenizer + Stanza - 土耳其语分词器 + + + + Stanza - Ukrainian word tokenizer + Stanza - 乌克兰语分词器 + + + + Stanza - Urdu word tokenizer + Stanza - 乌尔都语分词器 + + + + Stanza - Uyghur word tokenizer + Stanza - 维吾尔语分词器 + + + + Stanza - Vietnamese word tokenizer + Stanza - 越南语分词器 + + + + Stanza - Welsh word tokenizer + Stanza - 威尔士语分词器 + + + + Stanza - Wolof word tokenizer + Stanza - 沃洛夫语分词器 + + + + LaoNLP - SeqLabeling + + + + + LaoNLP - Yunshan Cup 2020 + + + + + Stanza - Afrikaans part-of-speech tagger + Stanza - 南非语词性标注器 + + + + Stanza - Arabic part-of-speech tagger + Stanza - 阿拉伯语词性标注器 + + + + Stanza - Armenian (Eastern) part-of-speech tagger + Stanza - 亚美尼亚语(东)词性标注器 + + + + Stanza - Armenian (Western) part-of-speech tagger + Stanza - 亚美尼亚语(西)词性标注器 + + + + Stanza - Basque part-of-speech tagger + Stanza - 巴斯克语词性标注器 + + + + Stanza - Belarusian part-of-speech tagger + Stanza - 白俄罗斯语词性标注器 + + + + Stanza - Bulgarian part-of-speech tagger + Stanza - 保加利亚语词性标注器 + + + + Stanza - Buryat (Russia) part-of-speech tagger + Stanza - 布里亚特语(俄罗斯)词性标注器 + + + + Stanza - Catalan part-of-speech tagger + Stanza - 加泰罗尼亚语词性标注器 + + + + Stanza - Chinese (Classical) part-of-speech tagger + Stanza - 汉语(文言)词性标注器 + + + + Stanza - Chinese (Simplified) part-of-speech tagger + Stanza - 汉语(简体)词性标注器 + + + + Stanza - Chinese (Traditional) part-of-speech tagger + Stanza - 汉语(繁体)词性标注器 + + + + Stanza - Church Slavonic (Old) part-of-speech tagger + Stanza - 教会斯拉夫语(古)词性标注器 + + + + Stanza - Coptic part-of-speech tagger + Stanza - 科普特语词性标注器 + + + + Stanza - Croatian part-of-speech tagger + Stanza - 克罗地亚语词性标注器 + + + + Stanza - Czech part-of-speech tagger + Stanza - 捷克语词性标注器 + + + + Stanza - Danish part-of-speech tagger + Stanza - 丹麦语词性标注器 + + + + Stanza - Dutch part-of-speech tagger + Stanza - 荷兰语词性标注器 + + + + Stanza - English part-of-speech tagger + Stanza - 英语词性标注器 + + + + Stanza - Erzya part-of-speech tagger + Stanza - 埃尔齐亚语词性标注器 + + + + Stanza - Estonian part-of-speech tagger + Stanza - 爱沙尼亚语词性标注器 + + + + Stanza - Faroese part-of-speech tagger + Stanza - 法罗语词性标注器 + + + + Stanza - Finnish part-of-speech tagger + Stanza - 芬兰语词性标注器 + + + + Stanza - French part-of-speech tagger + Stanza - 法语词性标注器 + + + + Stanza - French (Old) part-of-speech tagger + Stanza - 法语(古)词性标注器 + + + + Stanza - Galician part-of-speech tagger + Stanza - 加里西亚语词性标注器 + + + + Stanza - German part-of-speech tagger + Stanza - 德语词性标注器 + + + + Stanza - Gothic part-of-speech tagger + Stanza - 哥特语词性标注器 + + + + Stanza - Greek (Ancient) part-of-speech tagger + Stanza - 希腊语(古)词性标注器 + + + + Stanza - Greek (Modern) part-of-speech tagger + Stanza - 希腊语(现代)词性标注器 + + + + Stanza - Hebrew (Ancient) part-of-speech tagger + Stanza - 希伯来语(古)词性标注器 + + + + Stanza - Hebrew (Modern) part-of-speech tagger + Stanza - 希伯来语(现代)词性标注器 + + + + Stanza - Hindi part-of-speech tagger + Stanza - 印地语词性标注器 + + + + Stanza - Hungarian part-of-speech tagger + Stanza - 匈牙利语词性标注器 + + + + Stanza - Icelandic part-of-speech tagger + Stanza - 冰岛语词性标注器 + + + + Stanza - Indonesian part-of-speech tagger + Stanza - 印度尼西亚语词性标注器 + + + + Stanza - Irish part-of-speech tagger + Stanza - 爱尔兰语词性标注器 + + + + Stanza - Italian part-of-speech tagger + Stanza - 意大利语词性标注器 + + + + Stanza - Japanese part-of-speech tagger + Stanza - 日语词性标注器 + + + + Stanza - Kazakh part-of-speech tagger + Stanza - 哈萨克语词性标注器 + + + + Stanza - Korean part-of-speech tagger + Stanza - 韩语词性标注器 + + + + Stanza - Kurdish (Kurmanji) part-of-speech tagger + Stanza - 库尔德语(库尔曼吉语)词性标注器 + + + + Stanza - Kyrgyz part-of-speech tagger + Stanza - 吉尔吉斯语词性标注器 + + + + Stanza - Latin part-of-speech tagger + Stanza - 拉丁语词性标注器 + + + + Stanza - Latvian part-of-speech tagger + Stanza - 拉脱维亚语词性标注器 + + + + Stanza - Ligurian part-of-speech tagger + Stanza - 利古里亚语词性标注器 + + + + Stanza - Lithuanian part-of-speech tagger + Stanza - 立陶宛语词性标注器 + + + + Stanza - Maltese part-of-speech tagger + Stanza - 马耳他语词性标注器 + + + + Stanza - Manx part-of-speech tagger + Stanza - 马恩语词性标注器 + + + + Stanza - Marathi part-of-speech tagger + Stanza - 马拉地语词性标注器 + + + + Stanza - Nigerian Pidgin part-of-speech tagger + Stanza - 尼日利亚皮钦语词性标注器 + + + + Stanza - Norwegian Bokmål part-of-speech tagger + Stanza - 书面挪威语词性标注器 + + + + Stanza - Norwegian Nynorsk part-of-speech tagger + Stanza - 新挪威语词性标注器 + + + + Stanza - Persian part-of-speech tagger + Stanza - 波斯语词性标注器 + + + + Stanza - Polish part-of-speech tagger + Stanza - 波兰语词性标注器 + + + + Stanza - Pomak part-of-speech tagger + Stanza - 波马克语词性标注器 + + + + Stanza - Portuguese part-of-speech tagger + Stanza - 葡萄牙语词性标注器 + + + + Stanza - Romanian part-of-speech tagger + Stanza - 罗马尼亚语词性标注器 + + + + Stanza - Russian part-of-speech tagger + Stanza - 俄语词性标注器 + + + + Stanza - Russian (Old) part-of-speech tagger + Stanza - 俄语(古)词性标注器 + + + + Stanza - Sámi (Northern) part-of-speech tagger + Stanza - 萨米语(北)词性标注器 + + + + Stanza - Sanskrit part-of-speech tagger + Stanza - 梵语词性标注器 + + + + Stanza - Scottish Gaelic part-of-speech tagger + Stanza - 苏格兰盖尔语词性标注器 + + + + Stanza - Serbian (Latin) part-of-speech tagger + Stanza - 塞尔维亚语(拉丁)词性标注器 + + + + Stanza - Slovak part-of-speech tagger + Stanza - 斯洛伐克语词性标注器 + + + + Stanza - Slovenian part-of-speech tagger + Stanza - 斯洛文尼亚语词性标注器 + + + + Stanza - Sorbian (Upper) part-of-speech tagger + Stanza - 索布语(上)词性标注器 + + + + Stanza - Spanish part-of-speech tagger + Stanza - 西班牙语词性标注器 + + + + Stanza - Swedish part-of-speech tagger + Stanza - 瑞典语词性标注器 + + + + Stanza - Tamil part-of-speech tagger + Stanza - 泰米尔语词性标注器 + + + + Stanza - Telugu part-of-speech tagger + Stanza - 泰卢固语词性标注器 + + + + Stanza - Turkish part-of-speech tagger + Stanza - 土耳其语词性标注器 + + + + Stanza - Ukrainian part-of-speech tagger + Stanza - 乌克兰语词性标注器 + + + + Stanza - Urdu part-of-speech tagger + Stanza - 乌尔都语词性标注器 + + + + Stanza - Uyghur part-of-speech tagger + Stanza - 维吾尔语词性标注器 + + + + Stanza - Vietnamese part-of-speech tagger + Stanza - 越南语词性标注器 + + + + Stanza - Welsh part-of-speech tagger + Stanza - 威尔士语词性标注器 + + + + Stanza - Wolof part-of-speech tagger + Stanza - 沃洛夫语词性标注器 + + + + Stanza - Afrikaans lemmatizer + Stanza - 南非语词形还原器 + + + + Stanza - Arabic lemmatizer + Stanza - 阿拉伯语词形还原器 + + + + Stanza - Armenian (Eastern) lemmatizer + Stanza - 亚美尼亚语(东)词形还原器 + + + + Stanza - Armenian (Western) lemmatizer + Stanza - 亚美尼亚语(西)词形还原器 + + + + Stanza - Basque lemmatizer + Stanza - 巴斯克语词形还原器 + + + + Stanza - Belarusian lemmatizer + Stanza - 白俄罗斯语词形还原器 + + + + Stanza - Bulgarian lemmatizer + Stanza - 保加利亚语词形还原器 + + + + Stanza - Buryat (Russia) lemmatizer + Stanza - 布里亚特语(俄罗斯)词形还原器 + + + + Stanza - Catalan lemmatizer + Stanza - 加泰罗尼亚语词形还原器 + + + + Stanza - Chinese (Classical) lemmatizer + Stanza - 汉语(文言)词形还原器 + + + + Stanza - Chinese (Simplified) lemmatizer + Stanza - 汉语(简体)词形还原器 + + + + Stanza - Chinese (Traditional) lemmatizer + Stanza - 汉语(繁体)词形还原器 + + + + Stanza - Church Slavonic (Old) lemmatizer + Stanza - 教会斯拉夫语(古)词形还原器 + + + + Stanza - Coptic lemmatizer + Stanza - 科普特语词形还原器 + + + + Stanza - Croatian lemmatizer + Stanza - 克罗地亚语词形还原器 + + + + Stanza - Czech lemmatizer + Stanza - 捷克语词形还原器 + + + + Stanza - Danish lemmatizer + Stanza - 丹麦语词形还原器 + + + + Stanza - Dutch lemmatizer + Stanza - 荷兰语词形还原器 + + + + Stanza - English lemmatizer + Stanza - 英语词形还原器 + + + + Stanza - Erzya lemmatizer + Stanza - 埃尔齐亚语词形还原器 + + + + Stanza - Estonian lemmatizer + Stanza - 爱沙尼亚语词形还原器 + + + + Stanza - Finnish lemmatizer + Stanza - 芬兰语词形还原器 + + + + Stanza - French lemmatizer + Stanza - 法语词形还原器 + + + + Stanza - French (Old) lemmatizer + Stanza - 法语(古)词形还原器 - - NLTK - Nepali stop word list - NLTK - 尼泊尔语停用词表 + + Stanza - Galician lemmatizer + Stanza - 加里西亚语词形还原器 - - NLTK - Norwegian stop word list - NLTK - 挪威语停用词表 + + Stanza - German lemmatizer + Stanza - 德语词形还原器 - - NLTK - Portuguese stop word list - NLTK - 葡萄牙语停用词表 + + Stanza - Gothic lemmatizer + Stanza - 哥特语词形还原器 - - NLTK - Romanian stop word list - NLTK - 罗马尼亚语停用词表 + + Stanza - Greek (Ancient) lemmatizer + Stanza - 希腊语(古)词形还原器 - - NLTK - Russian stop word list - NLTK - 俄语停用词表 + + Stanza - Greek (Modern) lemmatizer + Stanza - 希腊语(现代)词形还原器 - - NLTK - Slovenian stop word list - NLTK - 斯洛文尼亚语停用词表 + + Stanza - Hebrew (Ancient) lemmatizer + Stanza - 希伯来语(古)词形还原器 - - NLTK - Spanish stop word list - NLTK - 西班牙语停用词表 + + Stanza - Hebrew (Modern) lemmatizer + Stanza - 希伯来语(现代)词形还原器 - - NLTK - Swedish stop word list - NLTK - 瑞典语停用词表 + + Stanza - Hindi lemmatizer + Stanza - 印地语词形还原器 - - NLTK - Tajik stop word list - NLTK - 塔吉克语停用词表 + + Stanza - Hungarian lemmatizer + Stanza - 匈牙利语词形还原器 - - NLTK - Turkish stop word list - NLTK - 土耳其语停用词表 + + Stanza - Icelandic lemmatizer + Stanza - 冰岛语词形还原器 - - PyThaiNLP - Thai stop word list - PyThaiNLP - 泰语停用词表 + + Stanza - Indonesian lemmatizer + Stanza - 印度尼西亚语词形还原器 - - stopword - Afrikaans stop word list - stopword - 南非语停用词表 + + Stanza - Irish lemmatizer + Stanza - 爱尔兰语词形还原器 - - stopword - Arabic stop word list - stopword - 阿拉伯语停用词表 + + Stanza - Italian lemmatizer + Stanza - 意大利语词形还原器 - - stopword - Armenian stop word list - stopword - 亚美尼亚语停用词表 + + Stanza - Japanese lemmatizer + Stanza - 日语词形还原器 - - stopword - Basque stop word list - stopword - 巴斯克语停用词表 + + Stanza - Kazakh lemmatizer + Stanza - 哈萨克语词形还原器 - - stopword - Bengali stop word list - stopword - 孟加拉语停用词表 + + Stanza - Korean lemmatizer + Stanza - 韩语词形还原器 - - stopword - Breton stop word list - stopword - 布列塔尼语停用词表 + + Stanza - Kurdish (Kurmanji) lemmatizer + Stanza - 库尔德语(库尔曼吉语)词形还原器 - - stopword - Bulgarian stop word list - stopword - 保加利亚语停用词表 + + Stanza - Kyrgyz lemmatizer + Stanza - 吉尔吉斯语词形还原器 - - stopword - Catalan stop word list - stopword - 加泰罗尼亚语停用词表 + + Stanza - Latin lemmatizer + Stanza - 拉丁语词形还原器 - - stopword - Chinese (Simplified) stop word list - stopword - 汉语(简体)停用词表 + + Stanza - Latvian lemmatizer + Stanza - 拉脱维亚语词形还原器 - - stopword - Chinese (Traditional) stop word list - stopword - 汉语(繁体)停用词表 + + Stanza - Ligurian lemmatizer + Stanza - 利古里亚语词形还原器 - - stopword - Croatian stop word list - stopword - 克罗地亚语停用词表 + + Stanza - Lithuanian lemmatizer + Stanza - 立陶宛语词形还原器 - - stopword - Czech stop word list - stopword - 捷克语停用词表 + + Stanza - Manx lemmatizer + Stanza - 马恩语词形还原器 - - stopword - Danish stop word list - stopword - 丹麦语停用词表 + + Stanza - Marathi lemmatizer + Stanza - 马拉地语词形还原器 - - stopword - Dutch stop word list - stopword - 荷兰语停用词表 + + Stanza - Nigerian Pidgin lemmatizer + Stanza - 尼日利亚皮钦语词形还原器 - - stopword - English stop word list - stopword - 英语停用词表 + + Stanza - Norwegian Bokmål lemmatizer + Stanza - 书面挪威语词形还原器 - - stopword - Esperanto stop word list - stopword - 世界语停用词表 + + Stanza - Norwegian Nynorsk lemmatizer + Stanza - 新挪威语词形还原器 - - stopword - Estonian stop word list - stopword - 爱沙尼亚语停用词表 + + Stanza - Persian lemmatizer + Stanza - 波斯语词形还原器 - - stopword - Finnish stop word list - stopword - 芬兰语停用词表 + + Stanza - Polish lemmatizer + Stanza - 波兰语词形还原器 - - stopword - French stop word list - stopword - 法语停用词表 + + Stanza - Pomak lemmatizer + Stanza - 波马克语词形还原器 - - stopword - Galician stop word list - stopword - 加里西亚语停用词表 + + Stanza - Portuguese lemmatizer + Stanza - 葡萄牙语词形还原器 - - stopword - German stop word list - stopword - 德语停用词表 + + Stanza - Romanian lemmatizer + Stanza - 罗马尼亚语词形还原器 - - stopword - Greek (Modern) stop word list - stopword - 希腊语(现代)停用词表 + + Stanza - Russian lemmatizer + Stanza - 俄语词形还原器 - - stopword - Gujarati stop word list - stopword - 古吉拉特语停用词表 + + Stanza - Russian (Old) lemmatizer + Stanza - 俄语(古)词形还原器 - - stopword - Hausa stop word list - stopword - 豪萨语停用词表 + + Stanza - Sámi (Northern) lemmatizer + Stanza - 萨米语(北)词形还原器 - - stopword - Hebrew stop word list - stopword - 希伯来语停用词表 + + Stanza - Sanskrit lemmatizer + Stanza - 梵语词形还原器 - - stopword - Hindi stop word list - stopword - 印地语停用词表 + + Stanza - Scottish Gaelic lemmatizer + Stanza - 苏格兰盖尔语词形还原器 - - stopword - Hungarian stop word list - stopword - 匈牙利语停用词表 + + Stanza - Serbian (Latin) lemmatizer + Stanza - 塞尔维亚语(拉丁)词形还原器 - - stopword - Indonesian stop word list - stopword - 印度尼西亚语停用词表 + + Stanza - Slovak lemmatizer + Stanza - 斯洛伐克语词形还原器 - - stopword - Irish stop word list - stopword - 爱尔兰语停用词表 + + Stanza - Slovenian lemmatizer + Stanza - 斯洛文尼亚语词形还原器 - - stopword - Italian stop word list - stopword - 意大利语停用词表 + + Stanza - Sorbian (Upper) lemmatizer + Stanza - 索布语(上)词形还原器 - - stopword - Japanese stop word list - stopword - 日语停用词表 + + Stanza - Spanish lemmatizer + Stanza - 西班牙语词形还原器 - - stopword - Korean stop word list - stopword - 韩语停用词表 + + Stanza - Swedish lemmatizer + Stanza - 瑞典语词形还原器 - - stopword - Kurdish stop word list - stopword - 库尔德语停用词表 + + Stanza - Tamil lemmatizer + Stanza - 泰米尔语词形还原器 - - stopword - Latin stop word list - stopword - 拉丁语停用词表 + + Stanza - Turkish lemmatizer + Stanza - 土耳其语词形还原器 - - stopword - Latvian stop word list - stopword - 拉脱维亚语停用词表 + + Stanza - Ukrainian lemmatizer + Stanza - 乌克兰语词形还原器 - - stopword - Lithuanian stop word list - stopword - 立陶宛语停用词表 + + Stanza - Urdu lemmatizer + Stanza - 乌尔都语词形还原器 - - stopword - Lugbara stop word list - stopword - 卢格巴拉语停用词表 + + Stanza - Uyghur lemmatizer + Stanza - 维吾尔语词形还原器 - - stopword - Malay stop word list - stopword - 马来语停用词表 + + Stanza - Welsh lemmatizer + Stanza - 威尔士语词形还原器 - - stopword - Marathi stop word list - stopword - 马拉地语停用词表 + + Stanza - Wolof lemmatizer + Stanza - 沃洛夫语词形还原器 - - stopword - Norwegian Bokmål stop word list - stopword - 书面挪威语停用词表 + + LaoNLP - Lao stop word list + LaoNLP - 老挝语停用词表 - - stopword - Persian stop word list - stopword - 波斯语停用词表 + + NLTK - Hebrew (Modern) stop word list + NLTK - 希伯来语(现代)停用词表 - - stopword - Polish stop word list - stopword - 波兰语停用词表 + + Stanza - Afrikaans dependency parser + Stanza - 南非语依存分析器 - - stopword - Portuguese (Brazil) stop word list - stopword - 葡萄牙语(巴西)停用词表 + + Stanza - Arabic dependency parser + Stanza - 阿拉伯语依存分析器 - - stopword - Portuguese (Portugal) stop word list - stopword - 葡萄牙语(葡萄牙)停用词表 + + Stanza - Armenian (Eastern) dependency parser + Stanza - 亚美尼亚语(东)依存分析器 - - stopword - Punjabi (Gurmukhi) stop word list - stopword - 旁遮普语(古木基)停用词表 + + Stanza - Armenian (Western) dependency parser + Stanza - 亚美尼亚语(西)依存分析器 - - stopword - Romanian stop word list - stopword - 罗马尼亚语停用词表 + + Stanza - Basque dependency parser + Stanza - 巴斯克语依存分析器 - - stopword - Russian stop word list - stopword - 俄语停用词表 + + Stanza - Belarusian dependency parser + Stanza - 白俄罗斯语依存分析器 - - stopword - Slovak stop word list - stopword - 斯洛伐克语停用词表 + + Stanza - Bulgarian dependency parser + Stanza - 保加利亚语依存分析器 - - stopword - Slovenian stop word list - stopword - 斯洛文尼亚语停用词表 + + Stanza - Buryat (Russia) dependency parser + Stanza - 布里亚特语(俄罗斯)依存分析器 - - stopword - Somali stop word list - stopword - 索马里语停用词表 + + Stanza - Catalan dependency parser + Stanza - 加泰罗尼亚语依存分析器 - - stopword - Sotho (Southern) stop word list - stopword - 塞索托语停用词表 + + Stanza - Chinese (Classical) dependency parser + Stanza - 汉语(文言)依存分析器 - - stopword - Spanish stop word list - stopword - 西班牙语停用词表 + + Stanza - Chinese (Simplified) dependency parser + Stanza - 汉语(简体)依存分析器 - - stopword - Swahili stop word list - stopword - 斯瓦西里语停用词表 + + Stanza - Chinese (Traditional) dependency parser + Stanza - 汉语(繁体)依存分析器 - - stopword - Swedish stop word list - stopword - 瑞典语停用词表 + + Stanza - Church Slavonic (Old) dependency parser + Stanza - 教会斯拉夫语(古)依存分析器 - - stopword - Tagalog stop word list - stopword - 他加禄语停用词表 + + Stanza - Coptic dependency parser + Stanza - 科普特语依存分析器 - - stopword - Thai stop word list - stopword - 泰语停用词表 + + Stanza - Croatian dependency parser + Stanza - 克罗地亚语依存分析器 - - stopword - Turkish stop word list - stopword - 土耳其语停用词表 + + Stanza - Czech dependency parser + Stanza - 捷克语依存分析器 - - stopword - Ukrainian stop word list - stopword - 乌克兰语停用词表 + + Stanza - Danish dependency parser + Stanza - 丹麦语依存分析器 - - stopword - Urdu stop word list - stopword - 乌尔都语停用词表 + + Stanza - Dutch dependency parser + Stanza - 荷兰语依存分析器 - - stopword - Vietnamese stop word list - stopword - 越南语停用词表 + + Stanza - English dependency parser + Stanza - 英语依存分析器 - - stopword - Yoruba stop word list - stopword - 约鲁巴语停用词表 + + Stanza - Erzya dependency parser + Stanza - 埃尔齐亚语依存分析器 - - stopword - Zulu stop word list - stopword - 祖鲁语停用词表 + + Stanza - Estonian dependency parser + Stanza - 爱沙尼亚语依存分析器 - - Custom stop word list - 自定义停用词表 + + Stanza - Faroese dependency parser + Stanza - 法罗语依存分析器 - - spaCy - Catalan dependency parser - spaCy - 加泰罗尼亚语依存分析器 + + Stanza - Finnish dependency parser + Stanza - 芬兰语依存分析器 - - spaCy - Chinese dependency parser - spaCy - 汉语依存分析器 + + Stanza - French dependency parser + Stanza - 法语依存分析器 - - spaCy - Croatian dependency parser - spaCy - 克罗地亚语依存分析器 + + Stanza - French (Old) dependency parser + Stanza - 法语(古)依存分析器 - - spaCy - Danish dependency parser - spaCy - 丹麦语依存分析器 + + Stanza - Galician dependency parser + Stanza - 加里西亚语依存分析器 - - spaCy - Dutch dependency parser - spaCy - 荷兰语依存分析器 + + Stanza - German dependency parser + Stanza - 德语依存分析器 - - spaCy - English dependency parser - spaCy - 英语依存分析器 + + Stanza - Gothic dependency parser + Stanza - 哥特语依存分析器 - - spaCy - Finnish dependency parser - spaCy - 芬兰语依存分析器 + + Stanza - Greek (Ancient) dependency parser + Stanza - 希腊语(古)依存分析器 - - spaCy - French dependency parser - spaCy - 法语依存分析器 + + Stanza - Greek (Modern) dependency parser + Stanza - 希腊语(现代)依存分析器 - - spaCy - German dependency parser - spaCy - 德语依存分析器 + + Stanza - Hebrew (Ancient) dependency parser + Stanza - 希伯来语(古)依存分析器 - - spaCy - Greek (Modern) dependency parser - spaCy - 希腊语(现代)依存分析器 + + Stanza - Hebrew (Modern) dependency parser + Stanza - 希伯来语(现代)依存分析器 - - spaCy - Italian dependency parser - spaCy - 意大利语依存分析器 + + Stanza - Hindi dependency parser + Stanza - 印地语依存分析器 - - spaCy - Japanese dependency parser - spaCy - 日语依存分析器 + + Stanza - Hungarian dependency parser + Stanza - 匈牙利语依存分析器 - - spaCy - Lithuanian dependency parser - spaCy - 立陶宛语依存分析器 + + Stanza - Icelandic dependency parser + Stanza - 冰岛语依存分析器 - - spaCy - Macedonian dependency parser - spaCy - 马其顿语依存分析器 + + Stanza - Indonesian dependency parser + Stanza - 印度尼西亚语依存分析器 - - spaCy - Norwegian Bokmål dependency parser - spaCy - 书面挪威语依存分析器 + + Stanza - Irish dependency parser + Stanza - 爱尔兰语依存分析器 - - spaCy - Polish dependency parser - spaCy - 波兰语依存分析器 + + Stanza - Italian dependency parser + Stanza - 意大利语依存分析器 - - spaCy - Portuguese dependency parser - spaCy - 葡萄牙语依存分析器 + + Stanza - Japanese dependency parser + Stanza - 日语依存分析器 - - spaCy - Romanian dependency parser - spaCy - 罗马尼亚语依存分析器 + + Stanza - Kazakh dependency parser + Stanza - 哈萨克语依存分析器 - - spaCy - Russian dependency parser - spaCy - 俄语依存分析器 + + Stanza - Korean dependency parser + Stanza - 韩语依存分析器 - - spaCy - Spanish dependency parser - spaCy - 西班牙语依存分析器 + + Stanza - Kurdish (Kurmanji) dependency parser + Stanza - 库尔德语(库尔曼吉语)依存分析器 - - spaCy - Swedish dependency parser - spaCy - 瑞典语依存分析器 + + Stanza - Kyrgyz dependency parser + Stanza - 吉尔吉斯语依存分析器 - - spaCy - Ukrainian dependency parser - spaCy - 乌克兰语依存分析器 + + Stanza - Latin dependency parser + Stanza - 拉丁语依存分析器 - - Average logarithmic distance - + + Stanza - Latvian dependency parser + Stanza - 拉脱维亚语依存分析器 - - Average reduced frequency - + + Stanza - Ligurian dependency parser + Stanza - 利古里亚语依存分析器 - - Average waiting time - + + Stanza - Lithuanian dependency parser + Stanza - 立陶宛语依存分析器 - - Carroll's Uₘ - + + Stanza - Maltese dependency parser + Stanza - 马耳他语依存分析器 - - Fisher's exact test - 费希尔精确检验 + + Stanza - Manx dependency parser + Stanza - 马恩语依存分析器 - - Log-likelihood ratio test - 对数似然比检验 + + Stanza - Marathi dependency parser + Stanza - 马拉地语依存分析器 - - Pearson's chi-squared test - 皮尔森卡方检验 + + Stanza - Nigerian Pidgin dependency parser + Stanza - 尼日利亚皮钦语依存分析器 - - Cubic association ratio - + + Stanza - Norwegian Bokmål dependency parser + Stanza - 书面挪威语依存分析器 - - Dice's coefficient - Dice 系数 + + Stanza - Norwegian Nynorsk dependency parser + Stanza - 新挪威语依存分析器 - - Difference coefficient - + + Stanza - Persian dependency parser + Stanza - 波斯语依存分析器 - - Jaccard index - 雅卡尔指数 + + Stanza - Polish dependency parser + Stanza - 波兰语依存分析器 - - Log-frequency biased MD - + + Stanza - Pomak dependency parser + Stanza - 波马克语依存分析器 - - Kilgarriff's ratio - + + Stanza - Portuguese dependency parser + Stanza - 葡萄牙语依存分析器 - - Log ratio - + + Stanza - Romanian dependency parser + Stanza - 罗马尼亚语依存分析器 - - Minimum sensitivity - + + Stanza - Russian dependency parser + Stanza - 俄语依存分析器 - - Mutual dependency - + + Stanza - Russian (Old) dependency parser + Stanza - 俄语(古)依存分析器 - - Mutual expectation - + + Stanza - Sámi (Northern) dependency parser + Stanza - 萨米语(北)依存分析器 - - Mutual information - 互信息 + + Stanza - Sanskrit dependency parser + Stanza - 梵语依存分析器 - - Odds ratio - 比值比 + + Stanza - Scottish Gaelic dependency parser + Stanza - 苏格兰盖尔语依存分析器 - - Pointwise mutual information - 点互信息 + + Stanza - Serbian (Latin) dependency parser + Stanza - 塞尔维亚语(拉丁)依存分析器 - - Poisson collocation measure - + + Stanza - Slovak dependency parser + Stanza - 斯洛伐克语依存分析器 - - Squared phi coefficient - Phi 系数的平方 + + Stanza - Slovenian dependency parser + Stanza - 斯洛文尼亚语依存分析器 - - Microsoft Paint files (*.msp) - Microsoft Paint 文件 (*.msp) + + Stanza - Sorbian (Upper) dependency parser + Stanza - 索布语(上)依存分析器 - - stopword - Burmese stop word list - stopword - 缅甸语停用词表 + + Stanza - Spanish dependency parser + Stanza - 西班牙语依存分析器 - - Khmer - 柬埔寨语 + + Stanza - Swedish dependency parser + Stanza - 瑞典语依存分析器 - - khmer-nltk - Khmer sentence tokenizer - khmer-nltk - 柬埔寨语分句器 + + Stanza - Tamil dependency parser + Stanza - 泰米尔语依存分析器 - - spaCy - Korean dependency parser - spaCy - 韩语依存分析器 + + Stanza - Telugu dependency parser + Stanza - 泰卢固语依存分析器 - - spaCy - Slovenian dependency parser - spaCy - 斯洛文尼亚语依存分析器 + + Stanza - Turkish dependency parser + Stanza - 土耳其语依存分析器 - - spaCy - Korean sentence recognizer - spaCy - 韩语句子识别器 + + Stanza - Ukrainian dependency parser + Stanza - 乌克兰语依存分析器 - - khmer-nltk - Khmer word tokenizer - khmer-nltk - 柬埔寨语分词器 + + Stanza - Urdu dependency parser + Stanza - 乌尔都语依存分析器 - - spaCy - Korean word tokenizer - spaCy - 韩语分词器 + + Stanza - Uyghur dependency parser + Stanza - 维吾尔语依存分析器 - - spaCy - Malay word tokenizer - spaCy - 马来语分词器 + + Stanza - Vietnamese dependency parser + Stanza - 越南语依存分析器 - - khmer-nltk - Khmer part-of-speech tagger - khmer-nltk - 柬埔寨语词性标注器 + + Stanza - Welsh dependency parser + Stanza - 威尔士语依存分析器 - - PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) - PyThaiNLP - 感知机词性标注器(Blackboard) + + Stanza - Wolof dependency parser + Stanza - 沃洛夫语依存分析器 - - spaCy - Korean part-of-speech tagger - spaCy - 韩语词性标注器 + + Stanza - Chinese (Simplified) sentiment analyzer + Stanza - 汉语(简体)情感分析器 - - spaCy - Slovenian part-of-speech tagger - spaCy - 斯洛文尼亚语词性标注器 + + Stanza - German sentiment analyzer + Stanza - 德语情感分析器 - - spaCy - Korean lemmatizer - spaCy - 韩语词形还原器 + + Stanza - English sentiment analyzer + Stanza - 英语情感分析器 - - spaCy - Slovenian lemmatizer - spaCy - 斯洛文尼亚语词形还原器 + + Stanza - Marathi sentiment analyzer + Stanza - 马拉地语情感分析器 - - Dostoevsky - Russian sentiment analyzer - Dostoevsky - 俄语情感分析器 + + Stanza - Spanish sentiment analyzer + Stanza - 西班牙语情感分析器 - - Underthesea - Vietnamese sentiment analyzer - Underthesea - 越南语情感分析器 + + Stanza - Vietnamese sentiment analyzer + Stanza - 越南语情感分析器 @@ -8808,12 +10498,12 @@ Frequency wl_conversion - + Yes - + No @@ -8821,12 +10511,12 @@ Frequency wl_dependency_parsing - + Dependency Graphs Generated Successfully 成功生成依存图 - + <div>Dependency graphs has been successfully generated and exported under folder: {}</div> @@ -9153,16 +10843,29 @@ Frequency wl_measure_utils - + Absolute frequency 绝对频数 - + Relative frequency 相对频数 + + wl_measures_lexical_diversity + + + Rank-frequency distribution + 频数排序分布 + + + + Frequency spectrum + 频数谱 + + wl_measures_readability @@ -9185,11 +10888,6 @@ Frequency New 新版 - - - Brouwer's Leesindex A - Brouwer Leesindex A - Navy @@ -9199,17 +10897,17 @@ Frequency wl_measures_statistical_significance - + Two-tailed 双尾 - + Left-tailed 左尾 - + Right-tailed 右尾 @@ -9217,22 +10915,22 @@ Frequency wl_misc - + minute - + minutes - + (In {} {} {:.2f} seconds) (耗时 {} {} {:.2f} 秒) - + (In (耗时 @@ -9400,668 +11098,783 @@ Frequency 字符数% - + Type-token Ratio 类符形符比 - - Type-token Ratio (Standardized) - 类符形符比(标准化) - - - + Paragraph Length in Sentences (Mean) 段落长(单位:句子)(均值) - + Paragraph Length in Sentences (Standard Deviation) 段落长(单位:句子)(标准差) - + Paragraph Length in Sentences (Variance) 段落长(单位:句子)(方差) - + Paragraph Length in Sentences (Minimum) 段落长(单位:句子)(最小值) - + Paragraph Length in Sentences (25th Percentile) 段落长(单位:句子)(25分位数) - + Paragraph Length in Sentences (Median) 段落长(单位:句子)(中位数) - + Paragraph Length in Sentences (75th Percentile) 段落长(单位:句子)(75分位数) - + Paragraph Length in Sentences (Maximum) 段落长(单位:句子)(最大值) - + Paragraph Length in Sentences (Range) 段落长(单位:句子)(极差) - + Paragraph Length in Sentences (Interquartile Range) 段落长(单位:句子)(四分位差) - + Paragraph Length in Sentences (Modes) 段落长(单位:句子)(众数) - + Paragraph Length in Sentence Segments (Mean) 段落长(单位:句段)(均值) - + Paragraph Length in Sentence Segments (Standard Deviation) 段落长(单位:句段)(标准差) - + Paragraph Length in Sentence Segments (Variance) 段落长(单位:句段)(方差) - + Paragraph Length in Sentence Segments (Minimum) 段落长(单位:句段)(最小值) - + Paragraph Length in Sentence Segments (25th Percentile) 段落长(单位:句段)(25分位数) - + Paragraph Length in Sentence Segments (Median) 段落长(单位:句段)(中位数) - + Paragraph Length in Sentence Segments (75th Percentile) 段落长(单位:句段)(75分位数) - + Paragraph Length in Sentence Segments (Maximum) 段落长(单位:句段)(最大值) - + Paragraph Length in Sentence Segments (Range) 段落长(单位:句段)(极差) - + Paragraph Length in Sentence Segments (Interquartile Range) 段落长(单位:句段)(四分位差) - + Paragraph Length in Sentence Segments (Modes) 段落长(单位:句段)(众数) - + Paragraph Length in Tokens (Mean) 段落长(单位:形符)(均值) - + Paragraph Length in Tokens (Standard Deviation) 段落长(单位:形符)(标准差) - + Paragraph Length in Tokens (Variance) 段落长(单位:形符)(方差) - + Paragraph Length in Tokens (Minimum) 段落长(单位:形符)(最小值) - + Paragraph Length in Tokens (25th Percentile) 段落长(单位:形符)(25分位数) - + Paragraph Length in Tokens (Median) 段落长(单位:形符)(中位数) - + Paragraph Length in Tokens (75th Percentile) 段落长(单位:形符)(75分位数) - + Paragraph Length in Tokens (Maximum) 段落长(单位:形符)(最大值) - + Paragraph Length in Tokens (Range) 段落长(单位:形符)(极差) - + Paragraph Length in Tokens (Interquartile Range) 段落长(单位:形符)(四分位差) - + Paragraph Length in Tokens (Modes) 段落长(单位:形符)(众数) - + Sentence Length in Tokens (Mean) 句长(单位:形符)(均值) - + Sentence Length in Tokens (Standard Deviation) 句长(单位:形符)(标准差) - + Sentence Length in Tokens (Variance) 句长(单位:形符)(方差) - + Sentence Length in Tokens (Minimum) 句长(单位:形符)(最小值) - + Sentence Length in Tokens (25th Percentile) 句长(单位:形符)(25分位数) - + Sentence Length in Tokens (Median) 句长(单位:形符)(中位数) - + Sentence Length in Tokens (75th Percentile) 句长(单位:形符)(75分位数) - + Sentence Length in Tokens (Maximum) 句长(单位:形符)(最大值) - + Sentence Length in Tokens (Range) 句长(单位:形符)(极差) - + Sentence Length in Tokens (Interquartile Range) 句长(单位:形符)(四分位差) - + Sentence Length in Tokens (Modes) 句长(单位:形符)(众数) - + Sentence Segment Length in Tokens (Mean) 句段长(单位:形符)(均值) - + Sentence Segment Length in Tokens (Standard Deviation) 句段长(单位:形符)(标准差) - + Sentence Segment Length in Tokens (Variance) 句段长(单位:形符)(方差) - + Sentence Segment Length in Tokens (Minimum) 句段长(单位:形符)(最小值) - + Sentence Segment Length in Tokens (25th Percentile) 句段长(单位:形符)(25分位数) - + Sentence Segment Length in Tokens (Median) 句段长(单位:形符)(中位数) - + Sentence Segment Length in Tokens (75th Percentile) 句段长(单位:形符)(75分位数) - + Sentence Segment Length in Tokens (Maximum) 句段长(单位:形符)(最大值) - + Sentence Segment Length in Tokens (Range) 句段长(单位:形符)(极差) - + Sentence Segment Length in Tokens (Interquartile Range) 句段长(单位:形符)(四分位数) - + Sentence Segment Length in Tokens (Modes) 句段长(单位:形符)(众数) - + Token Length in Syllables (Mean) 形符长(单位:音节)(均值) - + Token Length in Syllables (Standard Deviation) 形符长(单位:音节)(标准差) - + Token Length in Syllables (Variance) 形符长(单位:音节)(方差) - + Token Length in Syllables (Minimum) 形符长(单位:音节)(最小值) - + Token Length in Syllables (25th Percentile) 形符长(单位:音节)(25分位数) - + Token Length in Syllables (Median) 形符长(单位:音节)(中位数) - + Token Length in Syllables (75th Percentile) 形符长(单位:音节)(75分位数) - + Token Length in Syllables (Maximum) 形符长(单位:音节)(最大值) - + Token Length in Syllables (Range) 形符长(单位:音节)(极差) - + Token Length in Syllables (Interquartile Range) 形符长(单位:音节)(四分位差) - + Token Length in Syllables (Modes) 形符长(单位:音节)(众数) - + Token Length in Characters (Mean) 形符长(单位:字符)(均值) - + Token Length in Characters (Standard Deviation) 形符长(单位:字符)(标准差) - + Token Length in Characters (Variance) 形符长(单位:字符)(方差) - + Token Length in Characters (Minimum) 形符长(单位:字符)(最小值) - + Token Length in Characters (25th Percentile) 形符长(单位:字符)(25分位值) - + Token Length in Characters (Median) 形符长(单位:字符)(中位数) - + Token Length in Characters (75th Percentile) 形符长(单位:字符)(75分位数) - + Token Length in Characters (Maximum) 形符长(单位:字符)(最大值) - + Token Length in Characters (Range) 形符长(单位:字符)(极差) - + Token Length in Characters (Interquartile Range) 形符长(单位:字符)(四分位差) - + Token Length in Characters (Modes) 形符长(单位:字符)(众数) - + Type Length in Syllables (Mean) 类符长(单位:音节)(均值) - + Type Length in Syllables (Standard Deviation) 类符长(单位:音节)(标准差) - + Type Length in Syllables (Variance) 类符长(单位:音节)(方差) - + Type Length in Syllables (Minimum) 类符长(单位:音节)(最小值) - + Type Length in Syllables (25th Percentile) 类符长(单位:音节)(25分位数) - + Type Length in Syllables (Median) 类符长(单位:音节)(中位数) - + Type Length in Syllables (75th Percentile) 类符长(单位:音节)(75分位数) - + Type Length in Syllables (Maximum) 类符长(单位:音节)(最大值) - + Type Length in Syllables (Range) 类符长(单位:音节)(极差) - + Type Length in Syllables (Interquartile Range) 类符长(单位:音节)(四分位差) - + Type Length in Syllables (Modes) 类符长(单位:音节)(众数) - + Type Length in Characters (Mean) 类符长(单位:字符)(均值) - + Type Length in Characters (Standard Deviation) 类符长(单位:字符)(标准差) - + Type Length in Characters (Variance) 类符长(单位:字符)(方差) - + Type Length in Characters (Minimum) 类符长(单位:字符)(最小值) - + Type Length in Characters (25th Percentile) 类符长(单位:字符)(25分位数) - + Type Length in Characters (Median) 类符长(单位:字符)(中位数) - + Type Length in Characters (75th Percentile) 类符长(单位:字符)(75分位数) - + Type Length in Characters (Maximum) 类符长(单位:字符)(最大值) - + Type Length in Characters (Range) 类符长(单位:字符)(极差) - + Type Length in Characters (Interquartile Range) 类符长(单位:字符)(四分位差) - + Type Length in Characters (Modes) 类符长(单位:字符)(众数) - + Syllable Length in Characters (Mean) 音节长(单位:字符)(均值) - + Syllable Length in Characters (Standard Deviation) 音节长(单位:字符)(标准差) - + Syllable Length in Characters (Variance) 音节长(单位:字符)(方差) - + Syllable Length in Characters (Minimum) 音节长(单位:字符)(最小值) - + Syllable Length in Characters (25th Percentile) 音节长(单位:字符)(25分位数) - + Syllable Length in Characters (Median) 音节长(单位:字符)(中位数) - + Syllable Length in Characters (75th Percentile) 音节长(单位:字符)(75分位数) - + Syllable Length in Characters (Maximum) 音节长(单位:字符)(最大值) - - Syllable Length in Characters (Range) - 音节长(单位:字符)(极差) + + Syllable Length in Characters (Range) + 音节长(单位:字符)(极差) + + + + Syllable Length in Characters (Interquartile Range) + 音节长(单位:字符)(四分位差) + + + + Syllable Length in Characters (Modes) + 音节长(单位:字符)(众数) + + + + Al-Heeti's Readability Prediction Formula + + + + + Bormuth's Cloze Mean + + + + + Bormuth's Grade Placement + + + + + Coleman's Readability Formula + + + + + Dale-Chall Readability Formula + + + + + Danielson-Bryan's Readability Formula + + + + + Dawood's Readability Formula + + + + + Degrees of Reading Power + + + + + Dickes-Steiwer Handformel + + + + + Easy Listening Formula + + + + + Flesch Reading Ease (Farr-Jenkins-Paterson) + + + + + Fórmula de Comprensibilidad de Gutiérrez de Polini + + + + + Fucks's Stilcharakteristik + + + + + Lorge Readability Index + + + + + Luong-Nguyen-Dinh's Readability Formula + + + + + neue Wiener Literaturformeln + + + + + neue Wiener Sachtextformel + + + + + Strain Index + + + + + Tränkle & Bailer's Readability Formula + + + + + Tuldava's Text Difficulty + + + + + Wheeler & Smith's Readability Formula + - - Syllable Length in Characters (Interquartile Range) - 音节长(单位:字符)(四分位差) + + Corrected TTR + - - Syllable Length in Characters (Modes) - 音节长(单位:字符)(众数) + + Fisher's Index of Diversity + - - Al-Heeti's Readability Prediction Formula + + Herdan's Vₘ - - Bormuth's Cloze Mean + + HD-D - - Bormuth's Grade Placement + + LogTTR - - Coleman's Readability Formula + + Mean Segmental TTR - - Dale-Chall Readability Formula + + Measure of Textual Lexical Diversity - - Danielson-Bryan's Readability Formula + + Moving-average TTR - - Dawood's Readability Formula + + Popescu-Mačutek-Altmann's B₁ - - Degrees of Reading Power + + Popescu-Mačutek-Altmann's B₂ - - Dickes-Steiwer Handformel + + Popescu-Mačutek-Altmann's B₃ - - Easy Listening Formula + + Popescu-Mačutek-Altmann's B₄ - - Flesch Reading Ease (Farr-Jenkins-Paterson) + + Popescu-Mačutek-Altmann's B₅ - - Fórmula de Comprensibilidad de Gutiérrez de Polini + + Popescu's R₁ - - Fucks's Stilcharakteristik + + Popescu's R₂ - - Lorge Readability Index + + Popescu's R₃ - - Luong-Nguyen-Dinh's Readability Formula + + Popescu's R₄ - - neue Wiener Literaturformeln + + Repeat Rate - - neue Wiener Sachtextformel + + Root TTR - - Strain Index + + Shannon Entropy + 香农熵 + + + + Simpson's l - - Tränkle & Bailer's Readability Formula + + vocd-D - - Tuldava's Text Difficulty + + Yule's Characteristic K - - Wheeler & Smith's Readability Formula + + Yule's Index of Diversity @@ -10185,215 +11998,368 @@ Frequency + + wl_settings_default + + + Observed Files + 观察文件 + + + + Profiler + 分析工具 + + + + APA (7th edition) + APA(第七版) + + + + Counts + 计数 + + + + Token + 形符 + + + + File + 文件 + + + + Ascending + 升序 + + + + Token no. + 形符序号 + + + + Line chart + 折线图 + + + + Total + 合计 + + + + Frequency + 频数 + + + + None + + + + + p-value + p 值 + + + + General + 全局 + + + + Excel workbooks (*.xlsx) + Excel 工作簿 (*.xlsx) + + + + Non-embedded + 非嵌入式 + + + + Header + + + + + Embedded + 嵌入式 + + + + Part of speech + 词性 + + + + Others + 其他 + + + + Paragraph + 段落 + + + + Sentence + 句子 + + + + Word + 单词 + + + + Policy one + + + + + New + 新版 + + + + Original + 原版 + + + + Rank-frequency distribution + 频数排序分布 + + + + Two-tailed + 双尾 + + + + Relative frequency + 相对频数 + + + + Colormap + 色谱 + + wl_settings_figs - + Square 方形 - + Circle 圆形 - + Triangle up 朝上三角形 - + Triangle right 朝右三角形 - + Triangle down 朝下三角形 - + Triangle left 朝左三角形 - + Thin diamond 薄菱形 - + Pentagon 五角形 - + Hexagon 六边形 - + Octagon 八边形 - + Arc3 - + Arc - + Angle3 - + Angle - + Bar - + Solid 实线 - + Dashed 虚线 - + Dash-dotted 点画线 - + Dotted 点线 - + Curve 圆弧 - + Curve A 圆弧 A - + Curve B 圆弧 B - + Curve AB 圆弧 AB - + Curve filled A 实心圆弧 A - + Curve filled B 实心圆弧 B - + Curve filled AB 实心圆弧 AB - + Bracket A 方括号 A - + Bracket B 方括号 B - + Bracket AB 方括号 AB - + Bar AB 横条 AB - + Bracket curve 方括号加圆弧 - + Simple 朴素 - + Fancy 绚丽 - + Wedge 楔形 - + Circular 环形 - + Kamada-Kawai - + Planar 平面 - + Random 随机 - + Shell 同心 - + Spring 弹簧 - + Spectral 谱图 - + Spiral 螺旋 diff --git a/trs/zho_tw.ts b/trs/zho_tw.ts index e9cd08e8e..2e9eacff4 100644 --- a/trs/zho_tw.ts +++ b/trs/zho_tw.ts @@ -168,85 +168,90 @@ Wl_Dialog_About - + About Wordless 關於 Wordless - + - <div style="text-align: center;"> - <h2>Wordless {}</h2> - <div> - An Integrated Corpus Tool with Multilingual Support<br> - for the Study of Language, Literature, and Translation - </div> - </div> - - - <div style="text-align: center;"> - <h2>Wordless {}</h2> - <div> - 一款擁有多語種支援的語料庫整合工具<br> - 可用於語言學、文學及翻譯研究 - </div> - </div> - + <div style="text-align: center;"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Version {}</div> + </div> + + <div style="text-align: center;"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;版本 {}</div> + </div> + - + - <hr> - <div style="text-align: center;"> - Copyright (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> - Licensed Under GNU GPLv3<br> - All Other Rights Reserved - </div> - + <div style="text-align: center;"> + An Integrated Corpus Tool with Multilingual Support<br> + for the Study of Language, Literature, and Translation + </div> + + <hr> + + <div style="text-align: center;"> + Copyright (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> + Licensed Under GNU GPLv3<br> + All Other Rights Reserved + </div> + - <hr> - <div style="text-align: center;"> - Copyright (C) 2018-2023&nbsp;&nbsp;葉磊<br> - 基於 GNU GPLv3 開源許可<br> - 保留所有其他權利 - </div> - + <div style="text-align: center;"> + 一款擁有多語種支援的語料庫整合工具<br> + 可用於語言學、文學及翻譯研究 + </div> + + <hr> + + <div style="text-align: center;"> + 版權所有 (C) 2018-2023&nbsp;&nbsp;Ye Lei (葉磊)<br> + 基於 GNU GPLv3 協議<br> + 保留其他所有權利 + </div> + Wl_Dialog_Acks - + Name 名稱 - + Version 版本 - + Authors 作者 - + License 許可 - + Acknowledgments 致謝 - + ACKNOWLEDGMENTS.md ACKNOWLEDGMENTS_zho_tw.md - + <div> As Wordless stands on the shoulders of giants, I hereby extend my sincere gratitude to the following open-source projects without which this project would not have been possible: @@ -262,7 +267,7 @@ Wl_Dialog_Changelog - + Changelog 更新日誌 @@ -270,17 +275,17 @@ Wl_Dialog_Check_Updates - + Check for updates on startup 啟動時檢查更新 - + Cancel 取消 - + <div> Checking for updates... @@ -293,7 +298,7 @@ - + <div> Hooray, you are using the latest version of Wordless! @@ -306,7 +311,7 @@ - + <div> A network error has occurred, please check your network settings and try again or <a href="https://github.com/BLKSerene/Wordless/releases">check for updates manually</a>. @@ -319,17 +324,17 @@ - + OK 確認 - + Check for Updates 檢查更新 - + <div> Wordless {} is out, click <a href="https://github.com/BLKSerene/Wordless#download"><b>HERE</b></a> to download the latest version of Wordless. @@ -342,27 +347,27 @@ - + Current version: 當前版本: - + Try again 重試 - + Latest version: Checking... 最新版本:查詢中…… - + Latest version: 最新版本: - + Latest version: Network error 最新版本:網路錯誤 @@ -370,27 +375,27 @@ Wl_Dialog_Citing - + Citing 引用 - + Select citation system: 選擇引用體系: - + APA (7th edition) APA(第七版) - + MLA (8th edition) MLA(第八版) - + <div> If you are going to publish a work that uses Wordless, please cite as follows. @@ -452,7 +457,7 @@ Wl_Dialog_Confirm_Exit - + <div> Are you sure you want to exit Wordless? @@ -471,22 +476,22 @@ - + Always confirm on exit 關閉時總是提示確認 - + Exit 退出 - + Cancel 取消 - + Exit Wordless 退出 Wordless @@ -522,7 +527,7 @@ Wl_Dialog_Donating - + <div> If you would like to support the development of Wordless, you may donate via <a href="https://www.paypal.com/">PayPal</a>, <a href="https://global.alipay.com/">Alipay</a>, or <a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">WeChat Pay</a>. @@ -535,22 +540,22 @@ - + Donating via: 贊助途徑: - + Alipay 支付寶 - + WeChat Pay 微信支付 - + Donating 贊助 @@ -581,12 +586,12 @@ Wl_Dialog_Need_Help - + Need Help? 需要幫助? - + <div> If you have any questions, find software bugs, need to provide feedback, or want to submit feature requests, you may seek support from the open-source community or contact me directly via any of the support channels listed below. @@ -599,62 +604,62 @@ - + Support Channel 支援渠道 - + Information 資訊 - + <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">Documentation</a> <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">文件</a> - + <a href="https://github.com/BLKSerene/Wordless/issues">Gihub Issues</a> - + <a href="https://github.com/BLKSerene/Wordless/discussions">Gihub Discussions</a> - + Official documentation 官方文件 - + Tutorial videos 影片教程 - + Bug reports Bug 提交 - + Usage questions 使用問題 - + Email support 郵件諮詢 - + <a href="https://www.wechat.com/en/">WeChat</a> official account <a href="https://www.wechat.com/en/">微信</a>公眾號 - + <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">bilibili</a> <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">B 站</a> @@ -662,12 +667,12 @@ Wl_Dialog_Preview_Settings - + coarse-grained 粗分 - + fine-grained 細分 @@ -968,27 +973,27 @@ Wl_Dialog_Settings - + Save 儲存 - + Cancel 取消 - + OK 確認 - + Copy 複製 - + Close 關閉 @@ -996,7 +1001,7 @@ Wl_Loading - + Initializing Wordless... 初始化 Wordless…… @@ -1004,242 +1009,242 @@ Wl_Main - + Loading settings... 載入設定…… - + Initializing main window... 初始化主窗體…… - + Ready! 就緒! - + Starting Wordless... 啟動 Wordless…… - + &File 檔案 - + &Preferences 偏好 - + &Help 幫助 - + &Open Files... 開啟檔案... - + Open files 開啟檔案 - + Open &Folder... 開啟資料夾... - + Open all files in the folder 開啟資料夾下的所有檔案 - + &Reopen Closed Files 重新開啟已關閉檔案 - + Reopen closed files 重新開啟已經關閉的檔案 - + S&elect All 全選 - + Select all files 勾選所有檔案 - + &Deselect All 取消選中所有 - + Deselect all files 取消勾選所有檔案 - + &Invert Selection 反選 - + Invert file selection 反向勾選檔案 - + &Close Selected 關閉選中 - + Close selected file(s) 關閉已選中的檔案 - + C&lose All 關閉所有 - + Close all files 關閉所有檔案 - + &Exit... 退出... - + Exit the program 退出程式 - + &Settings 設定 - + Change settings 更改設定 - + &Display Language 顯示語言 - + &Reset Layouts 重置佈局 - + Reset layouts 重置佈局 - + &Show Status Bar 顯示狀態列 - + Show/Hide the status bar 顯示/隱藏狀態列 - + &Citing 引用 - + Show information about citing 顯示引用資訊 - + &Acknowledgments 致謝 - + Show acknowldgments 顯示致謝 - + &Need Help? 需要幫助? - + Show help information 顯示幫助資訊 - + &Donating 贊助 - + Show information about donating 顯示贊助相關資訊 - + Check &for Updates 檢查更新 - + Check for updates of Wordless 檢查 Wordless 更新 - + C&hangelog 更新日誌 - + Show Changelog 顯示更新日誌 - + About &Wordless 關於 Wordless - + Show information about Wordless 顯示 Wordless 的相關資訊 - + Reset Layouts 重置佈局 - + <div>Do you want to reset all layouts to their default settings?</div> @@ -1248,57 +1253,57 @@ - + Profiler 分析工具 - + Concordancer 索引工具 - + Parallel Concordancer 平行索引工具 - + Wordlist Generator 詞表生成器 - + N-gram Generator n 元組生成器 - + Collocation Extractor 搭配抽取器 - + Colligation Extractor 類聯接抽取器 - + Keyword Extractor 關鍵詞抽取器 - + Observed Files 觀察檔案 - + Reference Files 參照檔案 - + Dependency Parser 依存分析器 @@ -1371,22 +1376,22 @@ 統計方法 - + Dispersion 分佈 - + Adjusted Frequency 調整頻數 - + Statistical Significance 統計顯著性 - + Effect Size 效應量 @@ -1437,21 +1442,16 @@ 詞性標註 - + Bayes Factor 貝葉斯因子 - + Tables 表格 - -Profiler -分析工具 - - Dependency Parsing 依存分析 @@ -1491,6 +1491,11 @@ Sentiment Analysis 情感分析 + + +Lexical Diversity +詞彙多樣性 + Wl_Settings_Dependency_Parsing @@ -1525,12 +1530,12 @@ 預覽設定 - + Show preview 顯示預覽 - + Processing... 處理中…… @@ -1538,12 +1543,12 @@ Wl_Settings_Figs_Line_Charts - + General Settings 全域性設定 - + Font: 字型: @@ -1551,132 +1556,132 @@ Wl_Settings_Figs_Network_Graphs - + Node Settings 節點設定 - + Node shape: 節點形狀: - + Node size: 節點大小: - + Node color: 節點顏色: - + Node opacity: 節點不透明度: - + Node Label Settings 節點標籤設定 - + Label font: 標籤字型: - + Label font size: 標籤字型大小: - + Label font weight: 標籤字型粗細: - + Label font color: 標籤字型顏色: - + Label opacity: 標籤不透明度: - + Edge Settings 邊設定 - + Connection style: 連線樣式: - + Edge width: 邊的寬度: - + Minimum 最小 - + Maximum 最大 - + Edge style: 邊的樣式: - + Edge color: 邊的顏色: - + Edge opacity: 邊的不透明度: - + Arrow style: 箭頭樣式: - + Arrow size: 箭頭大小: - + Edge Label Settings 邊的標籤設定 - + Label position: 標籤位置: - + Rotate labels to lie parallel to edges 旋轉標籤使其與邊平行 - + Advanced Settings 高階設定 - + Layout: 佈局: @@ -1684,112 +1689,112 @@ Wl_Settings_Figs_Word_Clouds - + Font Settings 字型設定 - + Font: 字型: - + Select Font 選擇字型 - + Font size: 字型大小: - + Minimum 最小 - + Maximum 最大 - + Relative scaling: 相對縮放: - + Font color: 字型顏色: - + Custom 自定義 - + Monochrome 單色 - + Colormap 色譜 - + Auto 自動 - + Background Settings 背景設定 - + Background color: 背景顏色: - + Mask Settings 蒙版設定 - + Mask path: 蒙版路徑: - + Select Mask 選擇蒙版 - + Contour width: 輪廓寬度: - + Contour color: 輪廓顏色: - + Advanced Settings 高階設定 - + Prefer horizontal: 水平優先度: - + Allow repeated words 允許重複單詞 @@ -2080,7 +2085,7 @@ Wl_Settings_Measures_Adjusted_Freq - + General Settings 全域性設定 @@ -2088,17 +2093,17 @@ Wl_Settings_Measures_Bayes_Factor - + Log-likelihood Ratio Test 對數似然比檢驗 - + Apply Yates's correction for continuity 應用耶茨連續性校正 - + Student's t-test (2-sample) 學生 t 檢驗(雙樣本) @@ -2106,17 +2111,17 @@ Wl_Settings_Measures_Dispersion - + General Settings 全域性設定 - + Gries's DP - + Apply normalization 應用歸一化 @@ -2124,17 +2129,90 @@ Wl_Settings_Measures_Effect_Size - + Kilgarriff's Ratio - + Smoothing parameter: 平滑係數: +Wl_Settings_Measures_Lexical_Diversity + + +HD-D + + + + +Sample size: +樣本大小: + + + +LogTTR + + + + +Mean Segmental TTR + + + + +Number of tokens in each segment: +各部分中形符數: + + + +Measure of Textual Lexical Diversity + + + + +Factor size: +因子大小: + + + +Moving-average TTR + + + + +Window size: +視窗大小: + + + +Repeat Rate + + + + +Use data: +使用資料: + + + +Rank-frequency distribution +頻數排序分佈 + + + +Frequency spectrum +頻數譜 + + + +Shannon Entropy +夏農熵 + + + Wl_Settings_Measures_Readability @@ -2174,7 +2252,7 @@ Cloze criterion score: - +完型填空標準分: @@ -2214,17 +2292,12 @@ Dutch variant: -荷蘭語變體: +荷蘭語變體: Spanish variant: -西班牙語變體: - - - -Brouwer's Leesindex A -Brouwer Leesindex A +西班牙語變體: @@ -2244,7 +2317,7 @@ English variant: -英語變體: +英語變體: @@ -2290,57 +2363,57 @@ Wl_Settings_Measures_Statistical_Significance - + Fisher's Exact Test 費希爾精確檢驗 - + Log-likelihood Ratio Test 對數似然比檢驗 - + Apply Yates's correction for continuity 應用耶茨連續性校正 - + Mann-Whitney U Test 曼惠特尼 U 檢驗 - + Apply continuity correction 應用連續性校正 - + Pearson's Chi-squared Test 皮爾森卡方檢驗 - + Student's t-test (1-sample) 學生 t 檢驗(單樣本) - + Student's t-test (2-sample) 學生 t 檢驗(雙樣本) - + Welch's t-test Welch t 檢驗 - + z-score z 值 - + z-score (Berry-Rogghe) z 值(Berry-Rogghe) @@ -2431,17 +2504,17 @@ 示例 - + Fetching data... 獲取資料中…… - + Reset Mappings 重置對映 - + <div>Do you want to reset all mappings to their default settings?</div> <div><b>Note: This will only affect the mapping settings in the currently shown table.</b></div> @@ -2452,12 +2525,12 @@ - + Reset All Mappings 重置所有對映 - + <div>Do you want to reset all mappings to their default settings?</div> <div><b>Warning: This will affect the mapping settings in all tables!</b></div> @@ -2478,7 +2551,7 @@ 重置所有 - + Number of part-of-speech tags: 詞性標籤數: @@ -2630,7 +2703,7 @@ 選擇語種: - + Show preview 顯示預覽 @@ -2676,42 +2749,42 @@ Wl_Settings_Tables_Concordancer - + Sorting Settings 排序設定 - + Highlight colors: 高亮顏色: - + Level 1 / Node: 第1級/節點: - + Level 2: 第2級: - + Level 3: 第3級: - + Level 4: 第4級: - + Level 5: 第5級: - + Level 6: 第6級: @@ -2719,30 +2792,17 @@ Wl_Settings_Tables_Parallel_Concordancer - + Color Settings 顏色設定 - + Search term color: 檢索項顏色: -Wl_Settings_Tables_Profiler - - -General Settings -全域性設定 - - - -Number of tokens in each section when calculating standardized type-token ratio: -計算標準化類符形符比時各部分中的形符數: - - - Wl_Settings_Word_Tokenization @@ -3628,47 +3688,47 @@ Frequency % Wl_Table_Profiler_Len_Breakdown - + Total 合計 - + Count of {}-token-long Sentences {}個形符長的句子數 - + Count of {}-token-long Sentences % {}個形符長的句子數% - + Count of {}-token-long Sentence Segment {}個形符長的句段數 - + Count of {}-token-long Sentence Segment % {}個形符長的句段數% - + Count of {}-character-long Tokens {}個字元長的形符數 - + Count of {}-character-long Tokens % {}個字元長的形符數% - + Count of {}-syllables-long Tokens {}個音節長的形符數 - + Count of {}-syllables-long Tokens % {}個音節長的形符數% @@ -3676,17 +3736,25 @@ Frequency % Wl_Table_Profiler_Lens - + Total 合計 - + No language support 無語種支援 +Wl_Table_Profiler_Lexical_Diversity + + +Total +合計 + + + Wl_Table_Profiler_Readability @@ -3705,14 +3773,6 @@ Frequency % -Wl_Table_Profiler_Ttrs - - -Total -合計 - - - Wl_Table_Results_Sort_Conordancer @@ -3985,7 +4045,7 @@ Frequency % Wl_Worker_Colligation_Extractor_Fig - + Rendering figure... 渲染圖表中…… @@ -3993,7 +4053,7 @@ Frequency % Wl_Worker_Colligation_Extractor_Table - + Rendering table... 渲染表格中…… @@ -4001,7 +4061,7 @@ Frequency % Wl_Worker_Collocation_Extractor_Fig - + Rendering figure... 渲染圖表中…… @@ -4009,7 +4069,7 @@ Frequency % Wl_Worker_Collocation_Extractor_Table - + Rendering table... 渲染表格中…… @@ -4086,32 +4146,45 @@ Frequency % Wl_Worker_Dependency_Parser - + Rendering table... 渲染表格中…… -Wl_Worker_Download_Spacy_Model +Wl_Worker_Download_Model_Spacy - + Fetching model information... 獲取模型資訊中…… - + +Downloading model ({:.2f} MB)... +下載模型中({:.2f} MB)…… + + + Downloading model... 下載模型中…… - + Download completed successfully. 模型下載完畢。 + + +Wl_Worker_Download_Model_Stanza - -Downloading model ({:.2f} MB)... -下載模型中({:.2f} MB)…… + +Downloading model... +下載模型中…… + + + +Download completed successfully. +模型下載完畢。 @@ -4130,7 +4203,7 @@ Frequency % Wl_Worker_Fetch_Data_Tagsets - + Updating table... 更新表格中…… @@ -4138,7 +4211,7 @@ Frequency % Wl_Worker_Keyword_Extractor_Fig - + Rendering figure... 渲染圖表中…… @@ -4146,7 +4219,7 @@ Frequency % Wl_Worker_Keyword_Extractor_Table - + Rendering table... 渲染表格中…… @@ -4183,7 +4256,7 @@ Frequency % Wl_Worker_Profiler_Table - + Rendering table... 渲染表格中…… @@ -4717,11 +4790,6 @@ Frequency 計數 - -Type-token Ratios -類符形符比 - - Lengths 長度 @@ -4731,6 +4799,11 @@ Frequency Length Breakdown 細分長度 + + +Lexical Diversity +詞彙多樣性 + Wrapper_Wordlist_Generator @@ -4792,164 +4865,16 @@ Frequency -init_settings_default - - -None - - +init_settings_global - -Total -合計 + +Afrikaans +南非語 - -Frequency -頻數 - - - -General -全域性 - - - -Non-embedded -非嵌入式 - - - -Header - - - - -Embedded -嵌入式 - - - -Others -其他 - - - -Paragraph -段落 - - - -Sentence -句子 - - - -Word -單詞 - - - -Profiler -分析工具 - - - -Token -形符 - - - -File -檔案 - - - -Ascending -升序 - - - -p-value -p 值 - - - -Two-tailed -雙尾 - - - -Observed Files -觀察檔案 - - - -APA (7th edition) -APA(第七版) - - - -Counts -計數 - - - -Token no. -形符序號 - - - -Line chart -折線圖 - - - -Excel workbooks (*.xlsx) -Excel 工作簿 (*.xlsx) - - - -Part of speech -詞性 - - - -Relative frequency -相對頻數 - - - -Colormap -色譜 - - - -Policy one - - - - -New -新版 - - - -Original -原版 - - - -init_settings_global - - -Afrikaans -南非語 - - - -Albanian -阿爾巴尼亞語 + +Albanian +阿爾巴尼亞語 @@ -4962,1052 +4887,1017 @@ Frequency 阿拉伯語 - -Armenian -亞美尼亞語 - - - + Assamese 阿薩姆語 - + Asturian 阿斯圖里亞斯語 - + Azerbaijani 亞塞拜然語 - + Basque 巴斯克語 - + Belarusian 白俄羅斯語 - + Bengali 孟加拉語 - -Breton -布列塔尼語 - - Bulgarian 保加利亞語 - + Catalan 加泰羅尼亞語 - + Chinese (Simplified) 漢語(簡體) - + Chinese (Traditional) 漢語(繁體) - + Croatian 克羅埃西亞語 - + Czech 捷克語 - + Danish 丹麥語 - + Dutch 荷蘭語 - + English (United Kingdom) 英語(英國) - + English (United States) 英語(美國) - + Esperanto 世界語 - + Estonian 愛沙尼亞語 - + Finnish 芬蘭語 - + French 法語 - + Galician 加里西亞語 - + German (Austria) 德語(奧地利) - + German (Germany) 德語(德國) - + German (Switzerland) 德語(瑞士) - + Greek (Ancient) 希臘語(古) - + Greek (Modern) 希臘語(現代) - + Gujarati 古吉拉特語 - -Hausa -豪薩語 - - - -Hebrew -希伯來語 - - - + Hindi 印地語 - + Hungarian 匈牙利語 - + Icelandic 冰島語 - + Indonesian 印度尼西亞語 - + Irish 愛爾蘭語 - + Italian 義大利語 - + Japanese 日語 - + Kannada 卡納達語 - + Kazakh 哈薩克語 - + Korean 韓語 - -Kurdish -庫爾德語 - - - + Kyrgyz 吉爾吉斯語 - + Latin 拉丁語 - + Latvian 拉脫維亞語 - + Ligurian 利古里亞語 - + Lithuanian 立陶宛語 - + Luxembourgish 盧森堡語 - + Macedonian 馬其頓語 - + Malay 馬來語 - + Malayalam 馬拉雅拉姆語 - + Manx 馬恩語 - + Marathi 馬拉地語 - + Meitei 曼尼普爾語 - + Mongolian 蒙古語 - + Nepali 尼泊爾語 - + Norwegian Bokmål 書面挪威語 - + Norwegian Nynorsk 新挪威語 - + Oriya 奧里亞語 - + Persian 波斯語 - + Polish 波蘭語 - + Portuguese (Brazil) 葡萄牙語(巴西) - + Portuguese (Portugal) 葡萄牙語(葡萄牙) - + Romanian 羅馬尼亞語 - + Russian 俄語 - + Sanskrit 梵語 - + Scottish Gaelic 蘇格蘭蓋爾語 - + Serbian (Cyrillic) 塞爾維亞語(西里爾) - + Serbian (Latin) 塞爾維亞語(拉丁) - + Sinhala 僧伽羅語 - + Slovak 斯洛伐克語 - + Slovenian 斯洛維尼亞語 - -Somali -索馬利亞語 - - - -Sotho (Southern) -塞索托語 - - - + Spanish 西班牙語 - + Swahili 斯瓦西里語 - + Swedish 瑞典語 - + Tagalog 他加祿語 - + Tajik 塔吉克語 - + Tamil 泰米爾語 - + Tatar 韃靼語 - + Telugu 泰盧固語 - + Tetun Dili 帝力德頓語 - + Thai 泰語 - + Tibetan 藏語 - + Tigrinya 提格雷尼亞語 - + Tswana 茨瓦納語 - + Turkish 土耳其語 - + Ukrainian 烏克蘭語 - + Urdu 烏爾都語 - + Vietnamese 越南語 - + Welsh 威爾士語 - + Yoruba 約魯巴語 - + Zulu 祖魯語 - + Arabic (CP720) 阿拉伯語(CP720) - + Arabic (CP864) 阿拉伯語(CP864) - + Arabic (ISO-8859-6) 阿拉伯語(ISO-8859-6) - + Arabic (Mac OS Arabic) 阿拉伯語(Mac OS Arabic) - + Arabic (Windows-1256) 阿拉伯語(Windows-1256) - + Chinese (GB18030) 漢語(GB18030) - + Chinese (GBK) 漢語(GBK) - + Chinese (Simplified) (GB2312) 漢語(簡體)(GB2312) - + Chinese (Simplified) (HZ) 漢語(簡體)(HZ) - + Chinese (Traditional) (Big-5) 漢語(繁體)(Big-5) - + Chinese (Traditional) (Big5-HKSCS) 漢語(繁體)(Big5-HKSCS) - + Chinese (Traditional) (CP950) 漢語(繁體)(CP950) - + Croatian (Mac OS Croatian) 克羅埃西亞語(Mac OS Croatian) - + Cyrillic (CP855) 西里爾(CP855) - + Cyrillic (CP866) 西里爾(CP866) - + Cyrillic (ISO-8859-5) 西里爾(ISO-8859-5) - + Cyrillic (Mac OS Cyrillic) 西里爾(Mac OS Cyrillic) - + Cyrillic (Windows-1251) 西里爾(Windows-1251) - + English (ASCII) 英語(ASCII) - + English (EBCDIC 037) 英語(EBCDIC 037) - + English (CP437) 英語(CP437) - + European (HP Roman-8) 歐洲(HP Roman-8) - + European (Central) (CP852) 歐洲(中部)(CP852) - + European (Central) (ISO-8859-2) 歐洲(中部)(ISO-8859-2) - + European (Central) (Mac OS Central European) 歐洲(中部)(Mac OS Central European) - + European (Central) (Windows-1250) 歐洲(中部)(Windows-1250) - + European (Northern) (ISO-8859-4) 歐洲(北部)(ISO-8859-4) - + European (Southern) (ISO-8859-3) 歐洲(南部)(ISO-8859-3) - + European (Western) (EBCDIC 500) 歐洲(西部)(EBCDIC 500) - + European (Western) (CP850) 歐洲(西部)(CP850) - + European (Western) (CP858) 歐洲(西部)(CP858) - + European (Western) (CP1140) 歐洲(西部)(CP1140) - + European (Western) (ISO-8859-1) 歐洲(西部)(ISO-8859-1) - + European (Western) (ISO-8859-15) 歐洲(西部)(ISO-8859-15) - + European (Western) (Mac OS Roman) 歐洲(西部)(Mac OS Roman) - + European (Western) (Windows-1252) 歐洲(西部)(Windows-1252) - + French (CP863) 法語(CP863) - + German (EBCDIC 273) 德語(EBCDIC 273) - + Greek (CP737) 希臘語(CP737) - + Greek (CP869) 希臘語(CP869) - + Greek (CP875) 希臘語(CP875) - + Greek (ISO-8859-7) 希臘語(ISO-8859-7) - + Greek (Mac OS Greek) 希臘語(Mac OS Greek) - + Greek (Windows-1253) 希臘語(Windows-1253) - + Hebrew (CP856) 希伯來語(CP856) - + Hebrew (CP862) 希伯來語(CP862) - + Hebrew (EBCDIC 424) 希伯來語(EBCDIC 424) - + Hebrew (ISO-8859-8) 希伯來語(ISO-8859-8) - + Hebrew (Windows-1255) 希伯來語(Windows-1255) - + Icelandic (CP861) 冰島語(CP861) - + Icelandic (Mac OS Icelandic) 冰島語(Mac OS Icelandic) - + Japanese (CP932) 日語(CP932) - + Japanese (EUC-JP) 日語(EUC-JP) - + Japanese (EUC-JIS-2004) 日語(EUC-JIS-2004) - + Japanese (EUC-JISx0213) 日語(EUC-JISx0213) - + Japanese (ISO-2022-JP) 日語(ISO-2022-JP) - + Japanese (ISO-2022-JP-1) 日語(ISO-2022-JP-1) - + Japanese (ISO-2022-JP-2) 日語(ISO-2022-JP-2) - + Japanese (ISO-2022-JP-2004) 日語(ISO-2022-JP-2004) - + Japanese (ISO-2022-JP-3) 日語(ISO-2022-JP-3) - + Japanese (ISO-2022-JP-EXT) 日語(ISO-2022-JP-EXT) - + Japanese (Shift_JIS) 日語(Shift_JIS) - + Japanese (Shift_JIS-2004) 日語(Shift_JIS-2004) - + Japanese (Shift_JISx0213) 日語(Shift_JISx0213) - + Kazakh (KZ-1048) 哈薩克語(KZ-1048) - + Kazakh (PTCP154) 哈薩克語(PTCP154) - + Korean (EUC-KR) 韓語(EUC-KR) - + Korean (ISO-2022-KR) 韓語(ISO-2022-KR) - + Korean (JOHAB) 韓語(JOHAB) - + Korean (UHC) 韓語(UHC) - + Persian/Urdu (Mac OS Farsi) 波斯語/烏爾都語(Mac OS Farsi) - + Portuguese (CP860) 葡萄牙語(CP860) - + Romanian (Mac OS Romanian) 羅馬尼亞語(Mac OS Romanian) - + Russian (KOI8-R) 俄語(KOI8-R) - + Tajik (KOI8-T) 塔吉克語(KOI8-T) - + Thai (CP874) 泰語(CP874) - + Thai (ISO-8859-11) 泰語(ISO-8859-11) - + Turkish (CP857) 土耳其語(CP857) - + Turkish (EBCDIC 1026) 土耳其語(EBCDIC 1026) - + Turkish (ISO-8859-9) 土耳其語(ISO-8859-9) - + Turkish (Mac OS Turkish) 土耳其語(Mac OS Turkish) - + Turkish (Windows-1254) 土耳其語(Windows-1254) - + Ukrainian (CP1125) 烏克蘭語(CP1125) - + Ukrainian (KOI8-U) 烏克蘭語(KOI8-U) - + Urdu (CP1006) 烏爾都語(CP1006) - + Vietnamese (CP1258) 越南語(CP1258) - + spaCy - Sentencizer spaCy - 分句器 - + Student's t-test (1-sample) 學生 t 檢驗(單樣本) - + z-score z 值 - + Mann-Whitney U Test 曼惠特尼 U 檢驗 - + Student's t-test (2-sample) 學生 t 檢驗(雙樣本) - + Log-likelihood Ratio 對數似然比 - + t-statistic t 值 - + Dice's Coefficient Dice 係數 - + Jaccard Index 雅卡爾指數 - + Sorbian (Lower) 索布語(下) - + Sorbian (Upper) 索布語(上) - + None - + Welch's t-test Welch t 檢驗 - + z-score (Berry-Rogghe) z 值(Berry-Rogghe) - + Carroll's D₂ - + Gries's DP - + Juilland's D - + Lyne's D₃ - + Rosengren's S - + Zhang's Distributional Consistency - + Zhang's DC - + Engwall's FM - + Juilland's U - + Kromer's UR - + Rosengren's KF - + Difference Coefficient - + Kilgarriff's Ratio - + Log Ratio - + Minimum Sensitivity - + Poisson Collocation Measure @@ -6017,2549 +5907,4349 @@ Frequency 緬甸語 - + English (Middle) 英語(中古) - + Ganda 幹達語 - + Georgian 喬治亞語 - -Lugbara -盧格巴拉語 - - - + Punjabi (Gurmukhi) 旁遮普語(古木基) - + Sámi (Northern) 薩米語(北) - + Other languages 其他語種 - + All languages (UTF-8 without BOM) 所有語種(UTF-8 無簽名) - + All languages (UTF-8 with BOM) 所有語種(UTF-8 帶簽名) - + All languages (UTF-16 with BOM) 所有語種(UTF-16 帶簽名) - + All languages (UTF-16BE without BOM) 所有語種(UTF-16 大端無簽名) - + All languages (UTF-16LE without BOM) 所有語種(UTF-16 小端無簽名) - + All languages (UTF-32 with BOM) 所有語種(UTF-32 帶簽名) - + All languages (UTF-32BE without BOM) 所有語種(UTF-32 大端無簽名) - + All languages (UTF-32LE without BOM) 所有語種(UTF-32 小端無簽名) - + All languages (UTF-7) 所有語種(UTF-7) - + Baltic languages (CP775) 波羅的海諸語(CP775) - + Baltic languages (ISO-8859-13) 波羅的海諸語(ISO-8859-13) - + Baltic languages (Windows-1257) 波羅的海諸語(Windows-1257) - + Celtic languages (ISO-8859-14) 凱爾特語(ISO-8859-14) - + European (Southeastern) (ISO-8859-16) 歐洲(東南部)(ISO-8859-16) - + Nordic languages (CP865) 北歐諸語(CP865) - + Nordic languages (ISO-8859-10) 北歐諸語(ISO-8859-10) - + Thai (TIS-620) 泰語(TIS-620) - + CSV files (*.csv) CSV 檔案 (*.csv) - + Excel workbooks (*.xlsx) Excel 工作簿 (*.xlsx) - + HTML pages (*.htm; *.html) HTML 頁面 (*.htm; *.html) - + PDF files (*.pdf) PDF 檔案 (*.pdf) - + Text files (*.txt) 文字檔案 (*.txt) - + Translation memory files (*.tmx) 翻譯記憶庫檔案 (*.tmx) - + Word documents (*.docx) Word 文件 (*.docx) - + XML files (*.xml) XML 檔案 (*.xml) - + All files (*.*) 所有檔案 (*.*) - + OpenType fonts (*.otf) OpenType 字型 (*.otf) - + TrueType fonts (*.ttf) TrueType 字型 (*.ttf) - + Blizzard mipmap format (*.blp) - + Windows bitmaps (*.bmp) Windows 點陣圖 (*.bmp) - + Window cursor files (*.cur) Window 游標檔案 (*.cur) - + Multi-page PCX files (*.dcx) 多頁 PCX 檔案 (*.dcx) - + DirectDraw surface (*.dds) - + Device-independent bitmaps (*.dib) 裝置無關點陣圖 (*.dib) - + Encapsulated PostScript (*.eps, *.ps) - + Flexible image transport system (*.fit, *.fits) - + Autodesk animation files (*.flc, *.fli) Autodesk 動畫檔案 (*.flc, *.fli) - + Fox Engine textures (*.ftex) Fox Engine 紋理 (*.ftex) - + GIMP brush files (*.gbr) GIMP 筆刷檔案 (*.gbr) - + Graphics interchange format (*.gif) - + Apple icon images (*.icns) 蘋果圖示圖片 (*.icns) - + Windows icon files (*.ico) Windows 圖示檔案 (*.ico) - + IPTC/NAA newsphoto files (*.iim) IPTC/NAA newsphoto 檔案 (*.iim) - + IM files (*.im) IM 檔案 (*.im) - + Image Tools image files (*) Image Tools 影象檔案 (*) - + JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg) JPEG 檔案 (*.jfif, *.jpe, *.jpeg, *.jpg) - + JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) JPEG 2000 檔案 (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) - + McIDAS area files (*) McIDAS area 檔案 (*) - + PhotoCD files (*.pcd) PhotoCD 檔案 (*.pcd) - + Picture exchange (*.pcx) - + PIXAR raster files (*.pxr) PIXAR 柵格檔案 (*.pxr) - + Portable network graphics (*.apng, *.png) - + Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm) - + Photoshop PSD files (*.psd) Photoshop PSD 檔案 (*.psd) - + Sun raster files (*.ras) Sun 柵格檔案 (*.ras) - + Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi) - + SPIDER files (*) SPIDER 檔案 (*) - + Truevision TGA (*.icb, *.tga, *.vda, *.vst) - + TIFF files (*.tif, *.tiff) TIFF 檔案 (*.tif, *.tiff) - + WebP files (*.webp) WebP 檔案 (*.webp) - + Windows metafiles (*.emf, *.wmf) Windows 元檔案 (*.emf, *.wmf) - + X bitmaps (*.xbm) - + X pixmaps (*.xpm) - + XV thumbnails (*) XV 縮圖 (*) - + botok - Tibetan sentence tokenizer botok - 藏語分句器 - + NLTK - Czech Punkt sentence tokenizer NLTK - 捷克語 Punkt 分句器 - + NLTK - Danish Punkt sentence tokenizer NLTK - 丹麥語 Punkt 分句器 - + NLTK - Dutch Punkt sentence tokenizer NLTK - 荷蘭語 Punkt 分句器 - + NLTK - English Punkt sentence tokenizer NLTK - 英語 Punkt 分句器 - + NLTK - Estonian Punkt sentence tokenizer NLTK - 愛沙尼亞語 Punkt 分句器 - + NLTK - Finnish Punkt sentence tokenizer NLTK - 芬蘭語 Punkt 分句器 - + NLTK - French Punkt sentence tokenizer NLTK - 法語 Punkt 分句器 - + NLTK - German Punkt sentence tokenizer NLTK - 德語 Punkt 分句器 - + NLTK - Greek Punkt sentence tokenizer NLTK - 希臘語 Punkt 分句器 - + NLTK - Italian Punkt sentence tokenizer NLTK - 義大利語 Punkt 分句器 - + NLTK - Malayalam Punkt sentence tokenizer NLTK - 馬拉雅拉姆語 Punkt 分句器 - + NLTK - Norwegian Punkt sentence tokenizer NLTK - 挪威語 Punkt 分句器 - + NLTK - Polish Punkt sentence tokenizer NLTK - 波蘭語 Punkt 分句器 - + NLTK - Portuguese Punkt sentence tokenizer NLTK - 葡萄牙語 Punkt 分句器 - + NLTK - Russian Punkt sentence tokenizer NLTK - 俄語 Punkt 分句器 - + NLTK - Slovenian Punkt sentence tokenizer NLTK - 斯洛維尼亞語 Punkt 分句器 - + NLTK - Spanish Punkt sentence tokenizer NLTK - 西班牙語 Punkt 分句器 - + NLTK - Swedish Punkt sentence tokenizer NLTK - 瑞典語 Punkt 分句器 - + NLTK - Turkish Punkt sentence tokenizer NLTK - 土耳其語 Punkt 分句器 - + spaCy - Croatian sentence recognizer spaCy - 克羅埃西亞語句子識別器 - + spaCy - Dutch sentence recognizer spaCy - 荷蘭語句子識別器 - + spaCy - Finnish sentence recognizer spaCy - 芬蘭語句子識別器 - + spaCy - Greek (Modern) sentence recognizer spaCy - 希臘語(現代)句子識別器 - + spaCy - Italian sentence recognizer spaCy - 義大利語句子識別器 - + spaCy - Lithuanian sentence recognizer spaCy - 立陶宛語句子識別器 - + spaCy - Macedonian sentence recognizer spaCy - 馬其頓語句子識別器 - + spaCy - Norwegian Bokmål sentence recognizer spaCy - 書面挪威語句子識別器 - + spaCy - Polish sentence recognizer spaCy - 波蘭語句子識別器 - + spaCy - Portuguese sentence recognizer spaCy - 葡萄牙語句子識別器 - + spaCy - Romanian sentence recognizer spaCy - 羅馬尼亞語句子識別器 - + spaCy - Russian sentence recognizer spaCy - 俄語句子識別器 - + spaCy - Swedish sentence recognizer spaCy - 瑞典語句子識別器 - + Underthesea - Vietnamese sentence tokenizer Underthesea - 越南語分句器 - + botok - Tibetan word tokenizer botok - 藏語分詞器 - + jieba - Chinese word tokenizer jieba - 漢語分詞器 - + NLTK - NIST tokenizer NLTK - NIST 分詞器 - + NLTK - NLTK tokenizer NLTK - NLTK 分詞器 - + NLTK - Penn Treebank tokenizer NLTK - 賓州樹庫分詞器 - + NLTK - Regular-expression tokenizer NLTK - 正規表示式分詞器 - + NLTK - Tok-tok tokenizer NLTK - Tok-tok 分詞器 - + NLTK - Twitter tokenizer NLTK - 推特分詞器 - + pkuseg - Chinese word tokenizer pkuseg - 漢語分詞器 - + PyThaiNLP - Longest matching PyThaiNLP - 最長匹配 - + PyThaiNLP - Maximum matching PyThaiNLP - 最大匹配 - + PyThaiNLP - Maximum matching + TCC PyThaiNLP - 最大匹配 + TCC - + Sacremoses - Moses tokenizer Sacremoses - Moses 分詞器 - + spaCy - Afrikaans word tokenizer spaCy - 南非語分詞器 - + spaCy - Albanian word tokenizer spaCy - 阿爾巴尼亞語分詞器 - + spaCy - Amharic word tokenizer spaCy - 阿姆哈拉語分詞器 - + spaCy - Arabic word tokenizer spaCy - 阿拉伯語分詞器 - + spaCy - Armenian word tokenizer -spaCy - 亞美尼亞語分詞器 +spaCy - Armenian 分詞器 - + spaCy - Azerbaijani word tokenizer spaCy - 亞塞拜然語分詞器 - + spaCy - Basque word tokenizer spaCy - 巴斯克語分詞器 - + spaCy - Bengali word tokenizer spaCy - 孟加拉語分詞器 - + spaCy - Bulgarian word tokenizer spaCy - 保加利亞語分詞器 - + spaCy - Catalan word tokenizer spaCy - 加泰羅尼亞語分詞器 - + spaCy - Chinese word tokenizer spaCy - 漢語分詞器 - + spaCy - Croatian word tokenizer spaCy - 克羅埃西亞語分詞器 - + spaCy - Czech word tokenizer spaCy - 捷克語分詞器 - + spaCy - Danish word tokenizer spaCy - 丹麥語分詞器 - + spaCy - Dutch word tokenizer spaCy - 荷蘭語分詞器 - + spaCy - English word tokenizer spaCy - 英語分詞器 - + spaCy - Estonian word tokenizer spaCy - 愛沙尼亞語分詞器 - + spaCy - Finnish word tokenizer spaCy - 芬蘭語分詞器 - + spaCy - French word tokenizer spaCy - 法語分詞器 - + spaCy - Ganda word tokenizer spaCy - 幹達語分詞器 - + spaCy - German word tokenizer spaCy - 德語分詞器 - + spaCy - Greek (Ancient) word tokenizer spaCy - 希臘語(古)分詞器 - + spaCy - Greek (Modern) word tokenizer spaCy - 希臘語(現代)分詞器 - + spaCy - Gujarati word tokenizer spaCy - 古吉拉特語分詞器 - -spaCy - Hebrew word tokenizer -spaCy - 希伯來語分詞器 - - - + spaCy - Hindi word tokenizer spaCy - 印地語分詞器 - + spaCy - Hungarian word tokenizer spaCy - 匈牙利語分詞器 - + spaCy - Icelandic word tokenizer spaCy - 冰島語分詞器 - + spaCy - Indonesian word tokenizer spaCy - 印度尼西亞語分詞器 - + spaCy - Irish word tokenizer spaCy - 愛爾蘭語分詞器 - + spaCy - Italian word tokenizer spaCy - 義大利語分詞器 - + spaCy - Japanese word tokenizer spaCy - 日語分詞器 - + spaCy - Kannada word tokenizer spaCy - 卡納達語分詞器 - + spaCy - Kyrgyz word tokenizer spaCy - 吉爾吉斯語分詞器 - + spaCy - Latin word tokenizer spaCy - 拉丁語分詞器 - + spaCy - Latvian word tokenizer spaCy - 拉脫維亞語分詞器 - + spaCy - Ligurian word tokenizer spaCy - 利古里亞語分詞器 - + spaCy - Lithuanian word tokenizer spaCy - 立陶宛語分詞器 - + spaCy - Luxembourgish word tokenizer spaCy - 盧森堡語分詞器 - + spaCy - Macedonian word tokenizer spaCy - 馬其頓語分詞器 - + spaCy - Malayalam word tokenizer spaCy - 馬拉雅拉姆語分詞器 - + spaCy - Marathi word tokenizer spaCy - 馬拉地語分詞器 - + spaCy - Nepali word tokenizer spaCy - 尼泊爾語分詞器 - -spaCy - Norwegian word tokenizer -spaCy - 挪威語分詞器 - - - + spaCy - Persian word tokenizer spaCy - 波斯語分詞器 - + spaCy - Polish word tokenizer spaCy - 波蘭語分詞器 - + spaCy - Portuguese word tokenizer spaCy - 葡萄牙語分詞器 - + spaCy - Romanian word tokenizer spaCy - 羅馬尼亞語分詞器 - + spaCy - Russian word tokenizer spaCy - 俄語分詞器 - + spaCy - Sanskrit word tokenizer spaCy - 梵語分詞器 - + spaCy - Serbian word tokenizer spaCy - 塞爾維亞語分詞器 - + spaCy - Sinhala word tokenizer spaCy - 僧伽羅語分詞器 - + spaCy - Slovak word tokenizer spaCy - 斯洛伐克語分詞器 - + spaCy - Slovenian word tokenizer spaCy - 斯洛維尼亞語分詞器 - + spaCy - Sorbian (Lower) word tokenizer spaCy - 索布語(下)分詞器 - + spaCy - Sorbian (Upper) word tokenizer spaCy - 索布語(上)分詞器 - + spaCy - Spanish word tokenizer spaCy - 西班牙語分詞器 - + spaCy - Swedish word tokenizer spaCy - 瑞典語分詞器 - + spaCy - Tagalog word tokenizer spaCy - 他加祿語分詞器 - + spaCy - Tamil word tokenizer spaCy - 泰米爾語分詞器 - + spaCy - Tatar word tokenizer spaCy - 韃靼語分詞器 - + spaCy - Telugu word tokenizer spaCy - 泰盧固語分詞器 - + spaCy - Tigrinya word tokenizer spaCy - 提格雷尼亞語分詞器 - + spaCy - Tswana word tokenizer spaCy - 茨瓦納語分詞器 - + spaCy - Turkish word tokenizer spaCy - 土耳其語分詞器 - + spaCy - Ukrainian word tokenizer spaCy - 烏克蘭語分詞器 - + spaCy - Urdu word tokenizer spaCy - 烏爾都語分詞器 - + spaCy - Yoruba word tokenizer spaCy - 約魯巴語分詞器 - + SudachiPy - Japanese word tokenizer (split mode A) SudachiPy - 日語分詞器(切分模式 A) - + SudachiPy - Japanese word tokenizer (split mode B) SudachiPy - 日語分詞器(切分模式 B) - + SudachiPy - Japanese word tokenizer (split mode C) SudachiPy - 日語分詞器(切分模式 C) - + Underthesea - Vietnamese word tokenizer Underthesea - 越南語分詞器 - + Wordless - Chinese character tokenizer Wordless - 漢語分字器 - + Wordless - Japanese kanji tokenizer Wordless - 日語分字器 - + NLTK - Legality syllable tokenizer NLTK - 合法性分音節器 - + NLTK - Sonority sequencing syllable tokenizer NLTK - 響度順序分音節器 - + Pyphen - Afrikaans syllable tokenizer Pyphen - 南非語分音節器 - + Pyphen - Albanian syllable tokenizer Pyphen - 阿爾巴尼亞語分音節器 - + Pyphen - Belarusian syllable tokenizer Pyphen - 白俄羅斯語分音節器 - + Pyphen - Bulgarian syllable tokenizer Pyphen - 保加利亞語分音節器 - + Pyphen - Catalan syllable tokenizer Pyphen - 加泰羅尼亞語分音節器 - + Pyphen - Croatian syllable tokenizer Pyphen - 克羅埃西亞語分音節器 - + Pyphen - Czech syllable tokenizer Pyphen - 捷克語分音節器 - + Pyphen - Danish syllable tokenizer Pyphen - 丹麥語分音節器 - + Pyphen - Dutch syllable tokenizer Pyphen - 荷蘭語分音節器 - + Pyphen - English (United Kingdom) syllable tokenizer Pyphen - 英語(英國)分音節器 - + Pyphen - English (United States) syllable tokenizer Pyphen - 英語(美國)分音節器 - + Pyphen - Esperanto syllable tokenizer Pyphen - 世界語分音節器 - + Pyphen - Estonian syllable tokenizer Pyphen - 愛沙尼亞語分音節器 - + Pyphen - French syllable tokenizer Pyphen - 法語分音節器 - + Pyphen - Galician syllable tokenizer Pyphen - 加里西亞語分音節器 - + Pyphen - German (Austria) syllable tokenizer Pyphen - 德語(奧地利)分音節器 - + Pyphen - German (Germany) syllable tokenizer Pyphen - 德語(德國)分音節器 - + Pyphen - German (Switzerland) syllable tokenizer Pyphen - 德語(瑞士)分音節器 - + Pyphen - Greek (Modern) syllable tokenizer Pyphen - 希臘語(現代)分音節器 - + Pyphen - Hungarian syllable tokenizer Pyphen - 匈牙利語分音節器 - + Pyphen - Icelandic syllable tokenizer Pyphen - 冰島語分音節器 - + Pyphen - Indonesian syllable tokenizer Pyphen - 印度尼西亞語分音節器 - + Pyphen - Italian syllable tokenizer Pyphen - 義大利語分音節器 - + Pyphen - Lithuanian syllable tokenizer Pyphen - 立陶宛語分音節器 - + Pyphen - Latvian syllable tokenizer Pyphen - 拉脫維亞語分音節器 - + Pyphen - Mongolian syllable tokenizer Pyphen - 蒙古語分音節器 - + Pyphen - Norwegian Bokmål syllable tokenizer Pyphen - 書面挪威語分音節器 - + Pyphen - Norwegian Nynorsk syllable tokenizer Pyphen - 新挪威語分音節器 - + Pyphen - Polish syllable tokenizer Pyphen - 波蘭語分音節器 - + Pyphen - Portuguese (Brazil) syllable tokenizer Pyphen - 葡萄牙語(巴西)分音節器 - + Pyphen - Portuguese (Portugal) syllable tokenizer Pyphen - 葡萄牙語(葡萄牙)分音節器 - + Pyphen - Romanian syllable tokenizer Pyphen - 羅馬尼亞語分音節器 - + Pyphen - Russian syllable tokenizer Pyphen - 俄語分音節器 - + Pyphen - Serbian (Cyrillic) syllable tokenizer Pyphen - 塞爾維亞語(西里爾)分音節器 - + Pyphen - Serbian (Latin) syllable tokenizer Pyphen - 塞爾維亞語(拉丁)分音節器 - + Pyphen - Slovak syllable tokenizer Pyphen - 斯洛伐克語分音節器 - + Pyphen - Slovenian syllable tokenizer Pyphen - 斯洛維尼亞語分音節器 - + Pyphen - Spanish syllable tokenizer Pyphen - 西班牙語分音節器 - + Pyphen - Swedish syllable tokenizer Pyphen - 瑞典語分音節器 - + Pyphen - Telugu syllable tokenizer Pyphen - 泰盧固語分音節器 - + Pyphen - Thai syllable tokenizer Pyphen - 泰語分音節器 - + Pyphen - Ukrainian syllable tokenizer Pyphen - 烏克蘭語分音節器 - + Pyphen - Zulu syllable tokenizer Pyphen - 祖魯語分音節器 - + PyThaiNLP - Thai syllable tokenizer PyThaiNLP - 泰語分音節器 - + botok - Tibetan part-of-speech tagger botok - 藏語詞性標註器 - + jieba - Chinese part-of-speech tagger jieba - 漢語詞性標註器 - + NLTK - English perceptron part-of-speech tagger NLTK - 英語感知機詞性標註器 - + NLTK - Russian perceptron part-of-speech tagger NLTK - 俄語感知機詞性標註器 - + pymorphy3 - Morphological analyzer pymorphy3 - 形態分析器 - + PyThaiNLP - Perceptron part-of-speech tagger (ORCHID) PyThaiNLP - 感知機詞性標註器(ORCHID) - + PyThaiNLP - Perceptron part-of-speech tagger (PUD) PyThaiNLP - 感知機詞性標註器(PUD) - + spaCy - Catalan part-of-speech tagger spaCy - 加泰羅尼亞語詞性標註器 - + spaCy - Chinese part-of-speech tagger spaCy - 漢語詞性標註器 - + spaCy - Croatian part-of-speech tagger spaCy - 克羅埃西亞語詞性標註器 - + spaCy - Danish part-of-speech tagger spaCy - 丹麥語詞性標註器 - + spaCy - Dutch part-of-speech tagger spaCy - 荷蘭語詞性標註器 - + spaCy - English part-of-speech tagger spaCy - 英語詞性標註器 - + spaCy - Finnish part-of-speech tagger spaCy - 芬蘭語詞性標註器 - + spaCy - French part-of-speech tagger spaCy - 法語詞性標註器 - + spaCy - German part-of-speech tagger spaCy - 德語詞性標註器 - + spaCy - Greek (Modern) part-of-speech tagger spaCy - 希臘語(現代)詞性標註器 - + spaCy - Italian part-of-speech tagger spaCy - 義大利語詞性標註器 - + spaCy - Japanese part-of-speech tagger spaCy - 日語詞性標註器 - + spaCy - Lithuanian part-of-speech tagger spaCy - 立陶宛語詞性標註器 - + spaCy - Macedonian part-of-speech tagger spaCy - 馬其頓語詞性標註器 - + spaCy - Norwegian Bokmål part-of-speech tagger spaCy - 書面挪威語詞性標註器 - + spaCy - Polish part-of-speech tagger spaCy - 波蘭語詞性標註器 - + spaCy - Portuguese part-of-speech tagger spaCy - 葡萄牙語詞性標註器 - + spaCy - Romanian part-of-speech tagger spaCy - 羅馬尼亞語詞性標註器 - + spaCy - Russian part-of-speech tagger spaCy - 俄語詞性標註器 - + spaCy - Spanish part-of-speech tagger spaCy - 西班牙語詞性標註器 - + spaCy - Swedish part-of-speech tagger spaCy - 瑞典語詞性標註器 - + spaCy - Ukrainian part-of-speech tagger spaCy - 烏克蘭語詞性標註器 - + SudachiPy - Japanese part-of-speech tagger SudachiPy - 日語詞性標註器 - + Underthesea - Vietnamese part-of-speech tagger Underthesea - 越南語詞性標註器 - + botok - Tibetan lemmatizer botok - 藏語詞形還原器 - + NLTK - WordNet lemmatizer NLTK - WordNet 詞形還原器 - + simplemma - Albanian lemmatizer simplemma - 阿爾巴尼亞語詞形還原器 - + simplemma - Armenian lemmatizer -simplemma - 亞美尼亞語詞形還原器 +simplemma - Armenian 詞形還原器 - + simplemma - Asturian lemmatizer simplemma - 阿斯圖里亞斯語詞形還原器 - + simplemma - Bulgarian lemmatizer simplemma - 保加利亞語詞形還原器 - + simplemma - Catalan lemmatizer simplemma - 加泰羅尼亞語詞形還原器 - + simplemma - Czech lemmatizer simplemma - 捷克語詞形還原器 - + simplemma - Danish lemmatizer simplemma - 丹麥語詞形還原器 - + simplemma - Dutch lemmatizer simplemma - 荷蘭語詞形還原器 - + simplemma - English lemmatizer simplemma - 英語詞形還原器 - + simplemma - English (Middle) lemmatizer simplemma - 英語(中古)詞形還原器 - + simplemma - Estonian lemmatizer simplemma - 愛沙尼亞語詞形還原器 - + simplemma - Finnish lemmatizer simplemma - 芬蘭語詞形還原器 - + simplemma - French lemmatizer simplemma - 法語詞形還原器 - + simplemma - Galician lemmatizer simplemma - 加里西亞語詞形還原器 - + simplemma - Georgian lemmatizer simplemma - 喬治亞語詞形還原器 - + simplemma - German lemmatizer simplemma - 德語詞形還原器 - + simplemma - Greek (Modern) lemmatizer simplemma - 希臘語(現代)詞形還原器 - + simplemma - Hindi lemmatizer simplemma - 印地語詞形還原器 - + simplemma - Hungarian lemmatizer simplemma - 匈牙利語詞形還原器 - + simplemma - Icelandic lemmatizer simplemma - 冰島語詞形還原器 - + simplemma - Indonesian lemmatizer simplemma - 印度尼西亞語詞形還原器 - + simplemma - Irish lemmatizer simplemma - 愛爾蘭語詞形還原器 - + simplemma - Italian lemmatizer simplemma - 義大利語詞形還原器 - + simplemma - Latin lemmatizer simplemma - 拉丁語詞形還原器 - + simplemma - Latvian lemmatizer simplemma - 拉脫維亞語詞形還原器 - + simplemma - Lithuanian lemmatizer simplemma - 立陶宛語詞形還原器 - + simplemma - Luxembourgish lemmatizer simplemma - 盧森堡語詞形還原器 - + simplemma - Macedonian lemmatizer simplemma - 馬其頓語詞形還原器 - + simplemma - Malay lemmatizer simplemma - 馬來語詞形還原器 - + simplemma - Manx lemmatizer simplemma - 馬恩語詞形還原器 - + simplemma - Norwegian Bokmål lemmatizer simplemma - 書面挪威語詞形還原器 - + simplemma - Norwegian Nynorsk lemmatizer simplemma - 新挪威語詞形還原器 - + simplemma - Persian lemmatizer simplemma - 波斯語詞形還原器 - + simplemma - Polish lemmatizer simplemma - 波蘭語詞形還原器 - + simplemma - Portuguese lemmatizer simplemma - 葡萄牙語詞形還原器 - + simplemma - Romanian lemmatizer simplemma - 羅馬尼亞語詞形還原器 - + simplemma - Russian lemmatizer simplemma - 俄語詞形還原器 - + simplemma - Sámi (Northern) lemmatizer simplemma - 薩米語(北)詞形還原器 - + simplemma - Scottish Gaelic lemmatizer simplemma - 蘇格蘭蓋爾語詞形還原器 - + simplemma - Serbo-Croatian lemmatizer simplemma - 塞爾維亞-克羅埃西亞語詞形還原器 - + simplemma - Slovak lemmatizer simplemma - 斯洛伐克語詞形還原器 - + simplemma - Slovenian lemmatizer simplemma - 斯洛維尼亞語詞形還原器 - + simplemma - Spanish lemmatizer simplemma - 西班牙語詞形還原器 - + simplemma - Swahili lemmatizer simplemma - 斯瓦西里語詞形還原器 - + simplemma - Swedish lemmatizer simplemma - 瑞典語詞形還原器 - + simplemma - Tagalog lemmatizer simplemma - 他加祿語詞形還原器 - + simplemma - Turkish lemmatizer simplemma - 土耳其語詞形還原器 - + simplemma - Ukrainian lemmatizer simplemma - 烏克蘭語詞形還原器 - + simplemma - Welsh lemmatizer simplemma - 威爾士語詞形還原器 - + spaCy - Bengali lemmatizer spaCy - 孟加拉語詞形還原器 - + spaCy - Catalan lemmatizer spaCy - 加泰羅尼亞語詞形還原器 - + spaCy - Croatian lemmatizer spaCy - 克羅埃西亞語詞形還原器 - + spaCy - Czech lemmatizer spaCy - 捷克語詞形還原器 - + spaCy - Danish lemmatizer spaCy - 丹麥語詞形還原器 - + spaCy - Dutch lemmatizer spaCy - 荷蘭語詞形還原器 - + spaCy - English lemmatizer spaCy - 英語詞形還原器 - + spaCy - Finnish lemmatizer spaCy - 芬蘭語詞形還原器 - + spaCy - French lemmatizer spaCy - 法語詞形還原器 - + spaCy - German lemmatizer spaCy - 德語詞形還原器 - + spaCy - Greek (Ancient) lemmatizer spaCy - 希臘語(古)詞形還原器 - + spaCy - Greek (Modern) lemmatizer spaCy - 希臘語(現代)詞形還原器 - + spaCy - Hungarian lemmatizer spaCy - 匈牙利語詞形還原器 - + spaCy - Indonesian lemmatizer spaCy - 印度尼西亞語詞形還原器 - + spaCy - Irish lemmatizer spaCy - 愛爾蘭語詞形還原器 - + spaCy - Italian lemmatizer spaCy - 義大利語詞形還原器 - + spaCy - Japanese lemmatizer spaCy - 日語詞形還原器 - + spaCy - Lithuanian lemmatizer spaCy - 立陶宛語詞形還原器 - + spaCy - Luxembourgish lemmatizer spaCy - 盧森堡語詞形還原器 - + spaCy - Macedonian lemmatizer spaCy - 馬其頓語詞形還原器 - + spaCy - Norwegian Bokmål lemmatizer spaCy - 書面挪威語詞形還原器 - + spaCy - Persian lemmatizer spaCy - 波斯語詞形還原器 - + spaCy - Polish lemmatizer spaCy - 波蘭語詞形還原器 - + spaCy - Portuguese lemmatizer spaCy - 葡萄牙語詞形還原器 - + spaCy - Romanian lemmatizer spaCy - 羅馬尼亞語詞形還原器 - + spaCy - Russian lemmatizer spaCy - 俄語詞形還原器 - + spaCy - Serbian lemmatizer spaCy - 塞爾維亞語詞形還原器 - + spaCy - Spanish lemmatizer spaCy - 西班牙語詞形還原器 - + spaCy - Swedish lemmatizer spaCy - 瑞典語詞形還原器 - + spaCy - Tagalog lemmatizer spaCy - 他加祿語詞形還原器 - + spaCy - Turkish lemmatizer spaCy - 土耳其語詞形還原器 - + spaCy - Ukrainian lemmatizer spaCy - 烏克蘭語詞形還原器 - + spaCy - Urdu lemmatizer spaCy - 烏爾都語詞形還原器 - + SudachiPy - Japanese lemmatizer SudachiPy - 日語詞形還原器 - + NLTK - Arabic stop word list NLTK - 阿拉伯語停用詞表 - + NLTK - Azerbaijani stop word list NLTK - 亞塞拜然語停用詞表 - + NLTK - Basque stop word list NLTK - 巴斯克語停用詞表 - + NLTK - Bengali stop word list NLTK - 孟加拉語停用詞表 - + NLTK - Catalan stop word list NLTK - 加泰羅尼亞語停用詞表 - + NLTK - Chinese (Simplified) stop word list NLTK - 漢語(簡體)停用詞表 - + NLTK - Chinese (Traditional) stop word list NLTK - 漢語(繁體)停用詞表 - + NLTK - Danish stop word list NLTK - 丹麥語停用詞表 - + NLTK - Dutch stop word list NLTK - 荷蘭語停用詞表 - + NLTK - English stop word list NLTK - 英語停用詞表 - + NLTK - Finnish stop word list NLTK - 芬蘭語停用詞表 - + NLTK - French stop word list NLTK - 法語停用詞表 - + NLTK - German stop word list NLTK - 德語停用詞表 - + NLTK - Greek (Modern) stop word list NLTK - 希臘語(現代)停用詞表 - -NLTK - Hebrew stop word list -NLTK - 希伯來語停用詞表 - - - + NLTK - Hungarian stop word list NLTK - 匈牙利語停用詞表 - + NLTK - Indonesian stop word list NLTK - 印度尼西亞語停用詞表 - + NLTK - Italian stop word list NLTK - 義大利語停用詞表 - -NLTK - Kazakh stop word list -NLTK - 哈薩克語停用詞表 + +NLTK - Kazakh stop word list +NLTK - 哈薩克語停用詞表 + + + +NLTK - Nepali stop word list +NLTK - 尼泊爾語停用詞表 + + + +NLTK - Norwegian stop word list +NLTK - 挪威語停用詞表 + + + +NLTK - Portuguese stop word list +NLTK - 葡萄牙語停用詞表 + + + +NLTK - Romanian stop word list +NLTK - 羅馬尼亞語停用詞表 + + + +NLTK - Russian stop word list +NLTK - 俄語停用詞表 + + + +NLTK - Slovenian stop word list +NLTK - 斯洛維尼亞語停用詞表 + + + +NLTK - Spanish stop word list +NLTK - 西班牙語停用詞表 + + + +NLTK - Swedish stop word list +NLTK - 瑞典語停用詞表 + + + +NLTK - Tajik stop word list +NLTK - 塔吉克語停用詞表 + + + +NLTK - Turkish stop word list +NLTK - 土耳其語停用詞表 + + + +PyThaiNLP - Thai stop word list +PyThaiNLP - 泰語停用詞表 + + + +Custom stop word list +自定義停用詞表 + + + +spaCy - Catalan dependency parser +spaCy - 加泰羅尼亞語依存分析器 + + + +spaCy - Chinese dependency parser +spaCy - 漢語依存分析器 + + + +spaCy - Croatian dependency parser +spaCy - 克羅埃西亞語依存分析器 + + + +spaCy - Danish dependency parser +spaCy - 丹麥語依存分析器 + + + +spaCy - Dutch dependency parser +spaCy - 荷蘭語依存分析器 + + + +spaCy - English dependency parser +spaCy - 英語依存分析器 + + + +spaCy - Finnish dependency parser +spaCy - 芬蘭語依存分析器 + + + +spaCy - French dependency parser +spaCy - 法語依存分析器 + + + +spaCy - German dependency parser +spaCy - 德語依存分析器 + + + +spaCy - Greek (Modern) dependency parser +spaCy - 希臘語(現代)依存分析器 + + + +spaCy - Italian dependency parser +spaCy - 義大利語依存分析器 + + + +spaCy - Japanese dependency parser +spaCy - 日語依存分析器 + + + +spaCy - Lithuanian dependency parser +spaCy - 立陶宛語依存分析器 + + + +spaCy - Macedonian dependency parser +spaCy - 馬其頓語依存分析器 + + + +spaCy - Norwegian Bokmål dependency parser +spaCy - 書面挪威語依存分析器 + + + +spaCy - Polish dependency parser +spaCy - 波蘭語依存分析器 + + + +spaCy - Portuguese dependency parser +spaCy - 葡萄牙語依存分析器 + + + +spaCy - Romanian dependency parser +spaCy - 羅馬尼亞語依存分析器 + + + +spaCy - Russian dependency parser +spaCy - 俄語依存分析器 + + + +spaCy - Spanish dependency parser +spaCy - 西班牙語依存分析器 + + + +spaCy - Swedish dependency parser +spaCy - 瑞典語依存分析器 + + + +spaCy - Ukrainian dependency parser +spaCy - 烏克蘭語依存分析器 + + + +Average logarithmic distance + + + + +Average reduced frequency + + + + +Average waiting time + + + + +Carroll's Uₘ + + + + +Fisher's exact test +費希爾精確檢驗 + + + +Log-likelihood ratio test +對數似然比檢驗 + + + +Pearson's chi-squared test +皮爾森卡方檢驗 + + + +Cubic association ratio + + + + +Dice's coefficient +Dice 係數 + + + +Difference coefficient + + + + +Jaccard index +雅卡爾指數 + + + +Log-frequency biased MD + + + + +Kilgarriff's ratio + + + + +Log ratio + + + + +Minimum sensitivity + + + + +Mutual dependency + + + + +Mutual expectation + + + + +Mutual information +互資訊 + + + +Odds ratio +比值比 + + + +Pointwise mutual information +點互資訊 + + + +Poisson collocation measure + + + + +Squared phi coefficient +Phi 係數的平方 + + + +Microsoft Paint files (*.msp) +Microsoft Paint 檔案 (*.msp) + + + +Khmer +柬埔寨語 + + + +khmer-nltk - Khmer sentence tokenizer +khmer-nltk - 柬埔寨語分句器 + + + +spaCy - Korean dependency parser +spaCy - 韓語依存分析器 + + + +spaCy - Slovenian dependency parser +spaCy - 斯洛維尼亞語依存分析器 + + + +spaCy - Korean sentence recognizer +spaCy - 韓語句子識別器 + + + +khmer-nltk - Khmer word tokenizer +khmer-nltk - 柬埔寨語分詞器 + + + +spaCy - Korean word tokenizer +spaCy - 韓語分詞器 + + + +spaCy - Malay word tokenizer +spaCy - 馬來語分詞器 + + + +khmer-nltk - Khmer part-of-speech tagger +khmer-nltk - 柬埔寨語詞性標註器 + + + +PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) +PyThaiNLP - 感知機詞性標註器(Blackboard) + + + +spaCy - Korean part-of-speech tagger +spaCy - 韓語詞性標註器 + + + +spaCy - Slovenian part-of-speech tagger +spaCy - 斯洛維尼亞語詞性標註器 + + + +spaCy - Korean lemmatizer +spaCy - 韓語詞形還原器 + + + +spaCy - Slovenian lemmatizer +spaCy - 斯洛維尼亞語詞形還原器 + + + +Dostoevsky - Russian sentiment analyzer +Dostoevsky - 俄語情感分析器 + + + +Underthesea - Vietnamese sentiment analyzer +Underthesea - 越南語情感分析器 + + + +Armenian (Eastern) +亞美尼亞語(東) + + + +Armenian (Western) +亞美尼亞語(西) + + + +Buryat (Russia) +布里亞特語(俄羅斯) + + + +Chinese (Classical) +漢語(文言) + + + +Church Slavonic (Old) +教會斯拉夫語(古) + + + +Coptic +科普特語 + + + +Erzya +埃爾齊亞語 + + + +Faroese +法羅語 + + + +French (Old) +法語(古) + + + +Gothic +哥特語 + + + +Hebrew (Ancient) +希伯來語(古) + + + +Hebrew (Modern) +希伯來語(現代) + + + +Kurdish (Kurmanji) +庫爾德語(庫爾曼吉語) + + + +Lao +寮國語 + + + +Maltese +馬耳他語 + + + +Nigerian Pidgin +奈及利亞皮欽語 + + + +Pomak +波馬克語 + + + +Russian (Old) +俄語(古) + + + +Sindhi +信德語 + + + +Uyghur +維吾爾語 + + + +Wolof +沃洛夫語 + + + +LaoNLP - Lao sentence tokenizer +LaoNLP - 寮國語分句器 + + + +Stanza - Afrikaans sentence tokenizer +Stanza - 南非語分句器 + + + +Stanza - Arabic sentence tokenizer +Stanza - 阿拉伯語分句器 + + + +Stanza - Armenian (Eastern) sentence tokenizer +Stanza - 亞美尼亞語(東)分句器 + + + +Stanza - Armenian (Western) sentence tokenizer +Stanza - 亞美尼亞語(西)分句器 + + + +Stanza - Basque sentence tokenizer +Stanza - 巴斯克語分句器 + + + +Stanza - Belarusian sentence tokenizer +Stanza - 白俄羅斯語分句器 + + + +Stanza - Bulgarian sentence tokenizer +Stanza - 保加利亞語分句器 + + + +Stanza - Burmese sentence tokenizer +Stanza - 緬甸語分句器 + + + +Stanza - Buryat (Russia) sentence tokenizer +Stanza - 布里亞特語(俄羅斯)分句器 + + + +Stanza - Catalan sentence tokenizer +Stanza - 加泰羅尼亞語分句器 + + + +Stanza - Chinese (Classical) sentence tokenizer +Stanza - 漢語(文言)分句器 + + + +Stanza - Chinese (Simplified) sentence tokenizer +Stanza - 漢語(簡體)分句器 + + + +Stanza - Chinese (Traditional) sentence tokenizer +Stanza - 漢語(繁體)分句器 + + + +Stanza - Church Slavonic (Old) sentence tokenizer +Stanza - 教會斯拉夫語(古)分句器 + + + +Stanza - Coptic sentence tokenizer +Stanza - 科普特語分句器 + + + +Stanza - Croatian sentence tokenizer +Stanza - 克羅埃西亞語分句器 + + + +Stanza - Czech sentence tokenizer +Stanza - 捷克語分句器 + + + +Stanza - Danish sentence tokenizer +Stanza - 丹麥語分句器 + + + +Stanza - Dutch sentence tokenizer +Stanza - 荷蘭語分句器 + + + +Stanza - English sentence tokenizer +Stanza - 英語分句器 + + + +Stanza - Erzya sentence tokenizer +Stanza - 埃爾齊亞語分句器 + + + +Stanza - Estonian sentence tokenizer +Stanza - 愛沙尼亞語分句器 + + + +Stanza - Faroese sentence tokenizer +Stanza - 法羅語分句器 + + + +Stanza - Finnish sentence tokenizer +Stanza - 芬蘭語分句器 + + + +Stanza - French sentence tokenizer +Stanza - 法語分句器 + + + +Stanza - French (Old) sentence tokenizer +Stanza - 法語(古)分句器 + + + +Stanza - Galician sentence tokenizer +Stanza - 加里西亞語分句器 + + + +Stanza - German sentence tokenizer +Stanza - 德語分句器 + + + +Stanza - Gothic sentence tokenizer +Stanza - 哥特語分句器 + + + +Stanza - Greek (Ancient) sentence tokenizer +Stanza - 希臘語(古)分句器 + + + +Stanza - Greek (Modern) sentence tokenizer +Stanza - 希臘語(現代)分句器 + + + +Stanza - Hebrew (Ancient) sentence tokenizer +Stanza - 希伯來語(古)分句器 + + + +Stanza - Hebrew (Modern) sentence tokenizer +Stanza - 希伯來語(現代)分句器 + + + +Stanza - Hindi sentence tokenizer +Stanza - 印地語分句器 + + + +Stanza - Hungarian sentence tokenizer +Stanza - 匈牙利語分句器 + + + +Stanza - Icelandic sentence tokenizer +Stanza - 冰島語分句器 + + + +Stanza - Indonesian sentence tokenizer +Stanza - 印度尼西亞語分句器 + + + +Stanza - Irish sentence tokenizer +Stanza - 愛爾蘭語分句器 + + + +Stanza - Italian sentence tokenizer +Stanza - 義大利語分句器 + + + +Stanza - Japanese sentence tokenizer +Stanza - 日語分句器 + + + +Stanza - Kazakh sentence tokenizer +Stanza - 哈薩克語分句器 + + + +Stanza - Korean sentence tokenizer +Stanza - 韓語分句器 + + + +Stanza - Kurdish (Kurmanji) sentence tokenizer +Stanza - 庫爾德語(庫爾曼吉語)分句器 + + + +Stanza - Kyrgyz sentence tokenizer +Stanza - 吉爾吉斯語分句器 + + + +Stanza - Latin sentence tokenizer +Stanza - 拉丁語分句器 + + + +Stanza - Latvian sentence tokenizer +Stanza - 拉脫維亞語分句器 + + + +Stanza - Ligurian sentence tokenizer +Stanza - 利古里亞語分句器 + + + +Stanza - Lithuanian sentence tokenizer +Stanza - 立陶宛語分句器 + + + +Stanza - Maltese sentence tokenizer +Stanza - 馬耳他語分句器 + + + +Stanza - Manx sentence tokenizer +Stanza - 馬恩語分句器 + + + +Stanza - Marathi sentence tokenizer +Stanza - 馬拉地語分句器 + + + +Stanza - Nigerian Pidgin sentence tokenizer +Stanza - 奈及利亞皮欽語分句器 + + + +Stanza - Norwegian Bokmål sentence tokenizer +Stanza - 書面挪威語分句器 + + + +Stanza - Norwegian Nynorsk sentence tokenizer +Stanza - 新挪威語分句器 + + + +Stanza - Persian sentence tokenizer +Stanza - 波斯語分句器 + + + +Stanza - Polish sentence tokenizer +Stanza - 波蘭語分句器 + + + +Stanza - Pomak sentence tokenizer +Stanza - 波馬克語分句器 + + + +Stanza - Portuguese sentence tokenizer +Stanza - 葡萄牙語分句器 + + + +Stanza - Romanian sentence tokenizer +Stanza - 羅馬尼亞語分句器 + + + +Stanza - Russian sentence tokenizer +Stanza - 俄語分句器 + + + +Stanza - Russian (Old) sentence tokenizer +Stanza - 俄語(古)分句器 + + + +Stanza - Sámi (Northern) sentence tokenizer +Stanza - 薩米語(北)分句器 + + + +Stanza - Sanskrit sentence tokenizer +Stanza - 梵語分句器 + + + +Stanza - Scottish Gaelic sentence tokenizer +Stanza - 蘇格蘭蓋爾語分句器 + + + +Stanza - Serbian (Latin) sentence tokenizer +Stanza - 塞爾維亞語(拉丁)分句器 + + + +Stanza - Sindhi sentence tokenizer +Stanza - 信德語分句器 + + + +Stanza - Slovak sentence tokenizer +Stanza - 斯洛伐克語分句器 + + + +Stanza - Slovenian sentence tokenizer +Stanza - 斯洛維尼亞語分句器 + + + +Stanza - Sorbian (Upper) sentence tokenizer +Stanza - 索布語(上)分句器 + + + +Stanza - Spanish sentence tokenizer +Stanza - 西班牙語分句器 + + + +Stanza - Swedish sentence tokenizer +Stanza - 瑞典語分句器 + + + +Stanza - Tamil sentence tokenizer +Stanza - 泰米爾語分句器 + + + +Stanza - Telugu sentence tokenizer +Stanza - 泰盧固語分句器 + + + +Stanza - Thai sentence tokenizer +Stanza - 泰語分句器 + + + +Stanza - Turkish sentence tokenizer +Stanza - 土耳其語分句器 + + + +Stanza - Ukrainian sentence tokenizer +Stanza - 烏克蘭語分句器 + + + +Stanza - Urdu sentence tokenizer +Stanza - 烏爾都語分句器 + + + +Stanza - Uyghur sentence tokenizer +Stanza - 維吾爾語分句器 + + + +Stanza - Vietnamese sentence tokenizer +Stanza - 越南語分句器 + + + +Stanza - Welsh sentence tokenizer +Stanza - 威爾士語分句器 + + + +Stanza - Wolof sentence tokenizer +Stanza - 沃洛夫語分句器 + + + +LaoNLP - Lao word tokenizer +LaoNLP - 寮國語分詞器 + + + +spaCy - Hebrew (Modern) word tokenizer +spaCy - 希伯來語(現代)分詞器 + + + +spaCy - Norwegian Bokmål word tokenizer +spaCy - 書面挪威語分詞器 + + + +Stanza - Afrikaans word tokenizer +Stanza - 南非語分詞器 + + + +Stanza - Arabic word tokenizer +Stanza - 阿拉伯語分詞器 + + + +Stanza - Armenian (Eastern) word tokenizer +Stanza - 亞美尼亞語(東)分詞器 + + + +Stanza - Armenian (Western) word tokenizer +Stanza - 亞美尼亞語(西)分詞器 + + + +Stanza - Basque word tokenizer +Stanza - 巴斯克語分詞器 + + + +Stanza - Belarusian word tokenizer +Stanza - 白俄羅斯語分詞器 + + + +Stanza - Bulgarian word tokenizer +Stanza - 保加利亞語分詞器 + + + +Stanza - Burmese word tokenizer +Stanza - 緬甸語分詞器 + + + +Stanza - Buryat (Russia) word tokenizer +Stanza - 布里亞特語(俄羅斯)分詞器 + + + +Stanza - Catalan word tokenizer +Stanza - 加泰羅尼亞語分詞器 + + + +Stanza - Chinese (Classical) word tokenizer +Stanza - 漢語(文言)分詞器 + + + +Stanza - Chinese (Simplified) word tokenizer +Stanza - 漢語(簡體)分詞器 + + + +Stanza - Chinese (Traditional) word tokenizer +Stanza - 漢語(繁體)分詞器 + + + +Stanza - Church Slavonic (Old) word tokenizer +Stanza - 教會斯拉夫語(古)分詞器 + + + +Stanza - Coptic word tokenizer +Stanza - 科普特語分詞器 + + + +Stanza - Croatian word tokenizer +Stanza - 克羅埃西亞語分詞器 + + + +Stanza - Czech word tokenizer +Stanza - 捷克語分詞器 + + + +Stanza - Danish word tokenizer +Stanza - 丹麥語分詞器 + + + +Stanza - Dutch word tokenizer +Stanza - 荷蘭語分詞器 + + + +Stanza - English word tokenizer +Stanza - 英語分詞器 + + + +Stanza - Erzya word tokenizer +Stanza - 埃爾齊亞語分詞器 + + + +Stanza - Estonian word tokenizer +Stanza - 愛沙尼亞語分詞器 + + + +Stanza - Faroese word tokenizer +Stanza - 法羅語分詞器 + + + +Stanza - Finnish word tokenizer +Stanza - 芬蘭語分詞器 + + + +Stanza - French word tokenizer +Stanza - 法語分詞器 + + + +Stanza - French (Old) word tokenizer +Stanza - 法語(古)分詞器 + + + +Stanza - Galician word tokenizer +Stanza - 加里西亞語分詞器 + + + +Stanza - German word tokenizer +Stanza - 德語分詞器 + + + +Stanza - Gothic word tokenizer +Stanza - 哥特語分詞器 + + + +Stanza - Greek (Ancient) word tokenizer +Stanza - 希臘語(古)分詞器 + + + +Stanza - Greek (Modern) word tokenizer +Stanza - 希臘語(現代)分詞器 + + + +Stanza - Hebrew (Ancient) word tokenizer +Stanza - 希伯來語(古)分詞器 + + + +Stanza - Hebrew (Modern) word tokenizer +Stanza - 希伯來語(現代)分詞器 + + + +Stanza - Hindi word tokenizer +Stanza - 印地語分詞器 + + + +Stanza - Hungarian word tokenizer +Stanza - 匈牙利語分詞器 + + + +Stanza - Icelandic word tokenizer +Stanza - 冰島語分詞器 + + + +Stanza - Indonesian word tokenizer +Stanza - 印度尼西亞語分詞器 + + + +Stanza - Irish word tokenizer +Stanza - 愛爾蘭語分詞器 + + + +Stanza - Italian word tokenizer +Stanza - 義大利語分詞器 + + + +Stanza - Japanese word tokenizer +Stanza - 日語分詞器 + + + +Stanza - Kazakh word tokenizer +Stanza - 哈薩克語分詞器 + + + +Stanza - Korean word tokenizer +Stanza - 韓語分詞器 + + + +Stanza - Kurdish (Kurmanji) word tokenizer +Stanza - 庫爾德語(庫爾曼吉語)分詞器 + + + +Stanza - Kyrgyz word tokenizer +Stanza - 吉爾吉斯語分詞器 + + + +Stanza - Latin word tokenizer +Stanza - 拉丁語分詞器 + + + +Stanza - Latvian word tokenizer +Stanza - 拉脫維亞語分詞器 + + + +Stanza - Ligurian word tokenizer +Stanza - 利古里亞語分詞器 + + + +Stanza - Lithuanian word tokenizer +Stanza - 立陶宛語分詞器 + + + +Stanza - Maltese word tokenizer +Stanza - 馬耳他語分詞器 + + + +Stanza - Manx word tokenizer +Stanza - 馬恩語分詞器 + + + +Stanza - Marathi word tokenizer +Stanza - 馬拉地語分詞器 + + + +Stanza - Nigerian Pidgin word tokenizer +Stanza - 奈及利亞皮欽語分詞器 + + + +Stanza - Norwegian Bokmål word tokenizer +Stanza - 書面挪威語分詞器 + + + +Stanza - Norwegian Nynorsk word tokenizer +Stanza - 新挪威語分詞器 + + + +Stanza - Persian word tokenizer +Stanza - 波斯語分詞器 + + + +Stanza - Polish word tokenizer +Stanza - 波蘭語分詞器 + + + +Stanza - Pomak word tokenizer +Stanza - 波馬克語分詞器 + + + +Stanza - Portuguese word tokenizer +Stanza - 葡萄牙語分詞器 + + + +Stanza - Romanian word tokenizer +Stanza - 羅馬尼亞語分詞器 + + + +Stanza - Russian word tokenizer +Stanza - 俄語分詞器 + + + +Stanza - Russian (Old) word tokenizer +Stanza - 俄語(古)分詞器 + + + +Stanza - Sámi (Northern) word tokenizer +Stanza - 薩米語(北)分詞器 + + + +Stanza - Sanskrit word tokenizer +Stanza - 梵語分詞器 + + + +Stanza - Scottish Gaelic word tokenizer +Stanza - 蘇格蘭蓋爾語分詞器 + + + +Stanza - Serbian (Latin) word tokenizer +Stanza - 塞爾維亞語(拉丁)分詞器 + + + +Stanza - Sindhi word tokenizer +Stanza - 信德語分詞器 + + + +Stanza - Slovak word tokenizer +Stanza - 斯洛伐克語分詞器 + + + +Stanza - Slovenian word tokenizer +Stanza - 斯洛維尼亞語分詞器 + + + +Stanza - Sorbian (Upper) word tokenizer +Stanza - 索布語(上)分詞器 + + + +Stanza - Spanish word tokenizer +Stanza - 西班牙語分詞器 + + + +Stanza - Swedish word tokenizer +Stanza - 瑞典語分詞器 + + + +Stanza - Tamil word tokenizer +Stanza - 泰米爾語分詞器 + + + +Stanza - Telugu word tokenizer +Stanza - 泰盧固語分詞器 + + + +Stanza - Thai word tokenizer +Stanza - 泰語分詞器 + + + +Stanza - Turkish word tokenizer +Stanza - 土耳其語分詞器 + + + +Stanza - Ukrainian word tokenizer +Stanza - 烏克蘭語分詞器 + + + +Stanza - Urdu word tokenizer +Stanza - 烏爾都語分詞器 + + + +Stanza - Uyghur word tokenizer +Stanza - 維吾爾語分詞器 + + + +Stanza - Vietnamese word tokenizer +Stanza - 越南語分詞器 + + + +Stanza - Welsh word tokenizer +Stanza - 威爾士語分詞器 + + + +Stanza - Wolof word tokenizer +Stanza - 沃洛夫語分詞器 + + + +LaoNLP - SeqLabeling + + + + +LaoNLP - Yunshan Cup 2020 + + + + +Stanza - Afrikaans part-of-speech tagger +Stanza - 南非語詞性標註器 + + + +Stanza - Arabic part-of-speech tagger +Stanza - 阿拉伯語詞性標註器 + + + +Stanza - Armenian (Eastern) part-of-speech tagger +Stanza - 亞美尼亞語(東)詞性標註器 + + + +Stanza - Armenian (Western) part-of-speech tagger +Stanza - 亞美尼亞語(西)詞性標註器 + + + +Stanza - Basque part-of-speech tagger +Stanza - 巴斯克語詞性標註器 + + + +Stanza - Belarusian part-of-speech tagger +Stanza - 白俄羅斯語詞性標註器 + + + +Stanza - Bulgarian part-of-speech tagger +Stanza - 保加利亞語詞性標註器 + + + +Stanza - Buryat (Russia) part-of-speech tagger +Stanza - 布里亞特語(俄羅斯)詞性標註器 + + + +Stanza - Catalan part-of-speech tagger +Stanza - 加泰羅尼亞語詞性標註器 + + + +Stanza - Chinese (Classical) part-of-speech tagger +Stanza - 漢語(文言)詞性標註器 + + + +Stanza - Chinese (Simplified) part-of-speech tagger +Stanza - 漢語(簡體)詞性標註器 + + + +Stanza - Chinese (Traditional) part-of-speech tagger +Stanza - 漢語(繁體)詞性標註器 + + + +Stanza - Church Slavonic (Old) part-of-speech tagger +Stanza - 教會斯拉夫語(古)詞性標註器 + + + +Stanza - Coptic part-of-speech tagger +Stanza - 科普特語詞性標註器 + + + +Stanza - Croatian part-of-speech tagger +Stanza - 克羅埃西亞語詞性標註器 + + + +Stanza - Czech part-of-speech tagger +Stanza - 捷克語詞性標註器 + + + +Stanza - Danish part-of-speech tagger +Stanza - 丹麥語詞性標註器 + + + +Stanza - Dutch part-of-speech tagger +Stanza - 荷蘭語詞性標註器 + + + +Stanza - English part-of-speech tagger +Stanza - 英語詞性標註器 + + + +Stanza - Erzya part-of-speech tagger +Stanza - 埃爾齊亞語詞性標註器 + + + +Stanza - Estonian part-of-speech tagger +Stanza - 愛沙尼亞語詞性標註器 + + + +Stanza - Faroese part-of-speech tagger +Stanza - 法羅語詞性標註器 + + + +Stanza - Finnish part-of-speech tagger +Stanza - 芬蘭語詞性標註器 + + + +Stanza - French part-of-speech tagger +Stanza - 法語詞性標註器 + + + +Stanza - French (Old) part-of-speech tagger +Stanza - 法語(古)詞性標註器 + + + +Stanza - Galician part-of-speech tagger +Stanza - 加里西亞語詞性標註器 + + + +Stanza - German part-of-speech tagger +Stanza - 德語詞性標註器 + + + +Stanza - Gothic part-of-speech tagger +Stanza - 哥特語詞性標註器 + + + +Stanza - Greek (Ancient) part-of-speech tagger +Stanza - 希臘語(古)詞性標註器 + + + +Stanza - Greek (Modern) part-of-speech tagger +Stanza - 希臘語(現代)詞性標註器 + + + +Stanza - Hebrew (Ancient) part-of-speech tagger +Stanza - 希伯來語(古)詞性標註器 + + + +Stanza - Hebrew (Modern) part-of-speech tagger +Stanza - 希伯來語(現代)詞性標註器 + + + +Stanza - Hindi part-of-speech tagger +Stanza - 印地語詞性標註器 + + + +Stanza - Hungarian part-of-speech tagger +Stanza - 匈牙利語詞性標註器 + + + +Stanza - Icelandic part-of-speech tagger +Stanza - 冰島語詞性標註器 + + + +Stanza - Indonesian part-of-speech tagger +Stanza - 印度尼西亞語詞性標註器 + + + +Stanza - Irish part-of-speech tagger +Stanza - 愛爾蘭語詞性標註器 + + + +Stanza - Italian part-of-speech tagger +Stanza - 義大利語詞性標註器 + + + +Stanza - Japanese part-of-speech tagger +Stanza - 日語詞性標註器 + + + +Stanza - Kazakh part-of-speech tagger +Stanza - 哈薩克語詞性標註器 + + + +Stanza - Korean part-of-speech tagger +Stanza - 韓語詞性標註器 + + + +Stanza - Kurdish (Kurmanji) part-of-speech tagger +Stanza - 庫爾德語(庫爾曼吉語)詞性標註器 + + + +Stanza - Kyrgyz part-of-speech tagger +Stanza - 吉爾吉斯語詞性標註器 + + + +Stanza - Latin part-of-speech tagger +Stanza - 拉丁語詞性標註器 + + + +Stanza - Latvian part-of-speech tagger +Stanza - 拉脫維亞語詞性標註器 + + + +Stanza - Ligurian part-of-speech tagger +Stanza - 利古里亞語詞性標註器 + + + +Stanza - Lithuanian part-of-speech tagger +Stanza - 立陶宛語詞性標註器 + + + +Stanza - Maltese part-of-speech tagger +Stanza - 馬耳他語詞性標註器 + + + +Stanza - Manx part-of-speech tagger +Stanza - 馬恩語詞性標註器 + + + +Stanza - Marathi part-of-speech tagger +Stanza - 馬拉地語詞性標註器 + + + +Stanza - Nigerian Pidgin part-of-speech tagger +Stanza - 奈及利亞皮欽語詞性標註器 + + + +Stanza - Norwegian Bokmål part-of-speech tagger +Stanza - 書面挪威語詞性標註器 + + + +Stanza - Norwegian Nynorsk part-of-speech tagger +Stanza - 新挪威語詞性標註器 + + + +Stanza - Persian part-of-speech tagger +Stanza - 波斯語詞性標註器 + + + +Stanza - Polish part-of-speech tagger +Stanza - 波蘭語詞性標註器 + + + +Stanza - Pomak part-of-speech tagger +Stanza - 波馬克語詞性標註器 + + + +Stanza - Portuguese part-of-speech tagger +Stanza - 葡萄牙語詞性標註器 + + + +Stanza - Romanian part-of-speech tagger +Stanza - 羅馬尼亞語詞性標註器 + + + +Stanza - Russian part-of-speech tagger +Stanza - 俄語詞性標註器 + + + +Stanza - Russian (Old) part-of-speech tagger +Stanza - 俄語(古)詞性標註器 + + + +Stanza - Sámi (Northern) part-of-speech tagger +Stanza - 薩米語(北)詞性標註器 + + + +Stanza - Sanskrit part-of-speech tagger +Stanza - 梵語詞性標註器 + + + +Stanza - Scottish Gaelic part-of-speech tagger +Stanza - 蘇格蘭蓋爾語詞性標註器 + + + +Stanza - Serbian (Latin) part-of-speech tagger +Stanza - 塞爾維亞語(拉丁)詞性標註器 + + + +Stanza - Slovak part-of-speech tagger +Stanza - 斯洛伐克語詞性標註器 + + + +Stanza - Slovenian part-of-speech tagger +Stanza - 斯洛維尼亞語詞性標註器 + + + +Stanza - Sorbian (Upper) part-of-speech tagger +Stanza - 索布語(上)詞性標註器 + + + +Stanza - Spanish part-of-speech tagger +Stanza - 西班牙語詞性標註器 + + + +Stanza - Swedish part-of-speech tagger +Stanza - 瑞典語詞性標註器 + + + +Stanza - Tamil part-of-speech tagger +Stanza - 泰米爾語詞性標註器 + + + +Stanza - Telugu part-of-speech tagger +Stanza - 泰盧固語詞性標註器 + + + +Stanza - Turkish part-of-speech tagger +Stanza - 土耳其語詞性標註器 + + + +Stanza - Ukrainian part-of-speech tagger +Stanza - 烏克蘭語詞性標註器 + + + +Stanza - Urdu part-of-speech tagger +Stanza - 烏爾都語詞性標註器 + + + +Stanza - Uyghur part-of-speech tagger +Stanza - 維吾爾語詞性標註器 + + + +Stanza - Vietnamese part-of-speech tagger +Stanza - 越南語詞性標註器 + + + +Stanza - Welsh part-of-speech tagger +Stanza - 威爾士語詞性標註器 + + + +Stanza - Wolof part-of-speech tagger +Stanza - 沃洛夫語詞性標註器 + + + +Stanza - Afrikaans lemmatizer +Stanza - 南非語詞形還原器 + + + +Stanza - Arabic lemmatizer +Stanza - 阿拉伯語詞形還原器 + + + +Stanza - Armenian (Eastern) lemmatizer +Stanza - 亞美尼亞語(東)詞形還原器 + + + +Stanza - Armenian (Western) lemmatizer +Stanza - 亞美尼亞語(西)詞形還原器 + + + +Stanza - Basque lemmatizer +Stanza - 巴斯克語詞形還原器 + + + +Stanza - Belarusian lemmatizer +Stanza - 白俄羅斯語詞形還原器 + + + +Stanza - Bulgarian lemmatizer +Stanza - 保加利亞語詞形還原器 + + + +Stanza - Buryat (Russia) lemmatizer +Stanza - 布里亞特語(俄羅斯)詞形還原器 + + + +Stanza - Catalan lemmatizer +Stanza - 加泰羅尼亞語詞形還原器 + + + +Stanza - Chinese (Classical) lemmatizer +Stanza - 漢語(文言)詞形還原器 + + + +Stanza - Chinese (Simplified) lemmatizer +Stanza - 漢語(簡體)詞形還原器 + + + +Stanza - Chinese (Traditional) lemmatizer +Stanza - 漢語(繁體)詞形還原器 + + + +Stanza - Church Slavonic (Old) lemmatizer +Stanza - 教會斯拉夫語(古)詞形還原器 + + + +Stanza - Coptic lemmatizer +Stanza - 科普特語詞形還原器 + + + +Stanza - Croatian lemmatizer +Stanza - 克羅埃西亞語詞形還原器 + + + +Stanza - Czech lemmatizer +Stanza - 捷克語詞形還原器 + + + +Stanza - Danish lemmatizer +Stanza - 丹麥語詞形還原器 + + + +Stanza - Dutch lemmatizer +Stanza - 荷蘭語詞形還原器 + + + +Stanza - English lemmatizer +Stanza - 英語詞形還原器 + + + +Stanza - Erzya lemmatizer +Stanza - 埃爾齊亞語詞形還原器 + + + +Stanza - Estonian lemmatizer +Stanza - 愛沙尼亞語詞形還原器 + + + +Stanza - Finnish lemmatizer +Stanza - 芬蘭語詞形還原器 + + + +Stanza - French lemmatizer +Stanza - 法語詞形還原器 + + + +Stanza - French (Old) lemmatizer +Stanza - 法語(古)詞形還原器 - -NLTK - Nepali stop word list -NLTK - 尼泊爾語停用詞表 + +Stanza - Galician lemmatizer +Stanza - 加里西亞語詞形還原器 - -NLTK - Norwegian stop word list -NLTK - 挪威語停用詞表 + +Stanza - German lemmatizer +Stanza - 德語詞形還原器 - -NLTK - Portuguese stop word list -NLTK - 葡萄牙語停用詞表 + +Stanza - Gothic lemmatizer +Stanza - 哥特語詞形還原器 - -NLTK - Romanian stop word list -NLTK - 羅馬尼亞語停用詞表 + +Stanza - Greek (Ancient) lemmatizer +Stanza - 希臘語(古)詞形還原器 - -NLTK - Russian stop word list -NLTK - 俄語停用詞表 + +Stanza - Greek (Modern) lemmatizer +Stanza - 希臘語(現代)詞形還原器 - -NLTK - Slovenian stop word list -NLTK - 斯洛維尼亞語停用詞表 + +Stanza - Hebrew (Ancient) lemmatizer +Stanza - 希伯來語(古)詞形還原器 - -NLTK - Spanish stop word list -NLTK - 西班牙語停用詞表 + +Stanza - Hebrew (Modern) lemmatizer +Stanza - 希伯來語(現代)詞形還原器 - -NLTK - Swedish stop word list -NLTK - 瑞典語停用詞表 + +Stanza - Hindi lemmatizer +Stanza - 印地語詞形還原器 - -NLTK - Tajik stop word list -NLTK - 塔吉克語停用詞表 + +Stanza - Hungarian lemmatizer +Stanza - 匈牙利語詞形還原器 - -NLTK - Turkish stop word list -NLTK - 土耳其語停用詞表 + +Stanza - Icelandic lemmatizer +Stanza - 冰島語詞形還原器 - -PyThaiNLP - Thai stop word list -PyThaiNLP - 泰語停用詞表 + +Stanza - Indonesian lemmatizer +Stanza - 印度尼西亞語詞形還原器 - -stopword - Afrikaans stop word list -stopword - 南非語停用詞表 + +Stanza - Irish lemmatizer +Stanza - 愛爾蘭語詞形還原器 - -stopword - Arabic stop word list -stopword - 阿拉伯語停用詞表 + +Stanza - Italian lemmatizer +Stanza - 義大利語詞形還原器 - -stopword - Armenian stop word list -stopword - 亞美尼亞語停用詞表 + +Stanza - Japanese lemmatizer +Stanza - 日語詞形還原器 - -stopword - Basque stop word list -stopword - 巴斯克語停用詞表 + +Stanza - Kazakh lemmatizer +Stanza - 哈薩克語詞形還原器 - -stopword - Bengali stop word list -stopword - 孟加拉語停用詞表 + +Stanza - Korean lemmatizer +Stanza - 韓語詞形還原器 - -stopword - Breton stop word list -stopword - 布列塔尼語停用詞表 + +Stanza - Kurdish (Kurmanji) lemmatizer +Stanza - 庫爾德語(庫爾曼吉語)詞形還原器 - -stopword - Bulgarian stop word list -stopword - 保加利亞語停用詞表 + +Stanza - Kyrgyz lemmatizer +Stanza - 吉爾吉斯語詞形還原器 - -stopword - Catalan stop word list -stopword - 加泰羅尼亞語停用詞表 + +Stanza - Latin lemmatizer +Stanza - 拉丁語詞形還原器 - -stopword - Chinese (Simplified) stop word list -stopword - 漢語(簡體)停用詞表 + +Stanza - Latvian lemmatizer +Stanza - 拉脫維亞語詞形還原器 - -stopword - Chinese (Traditional) stop word list -stopword - 漢語(繁體)停用詞表 + +Stanza - Ligurian lemmatizer +Stanza - 利古里亞語詞形還原器 - -stopword - Croatian stop word list -stopword - 克羅埃西亞語停用詞表 + +Stanza - Lithuanian lemmatizer +Stanza - 立陶宛語詞形還原器 - -stopword - Czech stop word list -stopword - 捷克語停用詞表 + +Stanza - Manx lemmatizer +Stanza - 馬恩語詞形還原器 - -stopword - Danish stop word list -stopword - 丹麥語停用詞表 + +Stanza - Marathi lemmatizer +Stanza - 馬拉地語詞形還原器 - -stopword - Dutch stop word list -stopword - 荷蘭語停用詞表 + +Stanza - Nigerian Pidgin lemmatizer +Stanza - 奈及利亞皮欽語詞形還原器 - -stopword - English stop word list -stopword - 英語停用詞表 + +Stanza - Norwegian Bokmål lemmatizer +Stanza - 書面挪威語詞形還原器 - -stopword - Esperanto stop word list -stopword - 世界語停用詞表 + +Stanza - Norwegian Nynorsk lemmatizer +Stanza - 新挪威語詞形還原器 - -stopword - Estonian stop word list -stopword - 愛沙尼亞語停用詞表 + +Stanza - Persian lemmatizer +Stanza - 波斯語詞形還原器 - -stopword - Finnish stop word list -stopword - 芬蘭語停用詞表 + +Stanza - Polish lemmatizer +Stanza - 波蘭語詞形還原器 - -stopword - French stop word list -stopword - 法語停用詞表 + +Stanza - Pomak lemmatizer +Stanza - 波馬克語詞形還原器 - -stopword - Galician stop word list -stopword - 加里西亞語停用詞表 + +Stanza - Portuguese lemmatizer +Stanza - 葡萄牙語詞形還原器 - -stopword - German stop word list -stopword - 德語停用詞表 + +Stanza - Romanian lemmatizer +Stanza - 羅馬尼亞語詞形還原器 - -stopword - Greek (Modern) stop word list -stopword - 希臘語(現代)停用詞表 + +Stanza - Russian lemmatizer +Stanza - 俄語詞形還原器 - -stopword - Gujarati stop word list -stopword - 古吉拉特語停用詞表 + +Stanza - Russian (Old) lemmatizer +Stanza - 俄語(古)詞形還原器 - -stopword - Hausa stop word list -stopword - 豪薩語停用詞表 + +Stanza - Sámi (Northern) lemmatizer +Stanza - 薩米語(北)詞形還原器 - -stopword - Hebrew stop word list -stopword - 希伯來語停用詞表 + +Stanza - Sanskrit lemmatizer +Stanza - 梵語詞形還原器 - -stopword - Hindi stop word list -stopword - 印地語停用詞表 + +Stanza - Scottish Gaelic lemmatizer +Stanza - 蘇格蘭蓋爾語詞形還原器 - -stopword - Hungarian stop word list -stopword - 匈牙利語停用詞表 + +Stanza - Serbian (Latin) lemmatizer +Stanza - 塞爾維亞語(拉丁)詞形還原器 - -stopword - Indonesian stop word list -stopword - 印度尼西亞語停用詞表 + +Stanza - Slovak lemmatizer +Stanza - 斯洛伐克語詞形還原器 - -stopword - Irish stop word list -stopword - 愛爾蘭語停用詞表 + +Stanza - Slovenian lemmatizer +Stanza - 斯洛維尼亞語詞形還原器 - -stopword - Italian stop word list -stopword - 義大利語停用詞表 + +Stanza - Sorbian (Upper) lemmatizer +Stanza - 索布語(上)詞形還原器 - -stopword - Japanese stop word list -stopword - 日語停用詞表 + +Stanza - Spanish lemmatizer +Stanza - 西班牙語詞形還原器 - -stopword - Korean stop word list -stopword - 韓語停用詞表 + +Stanza - Swedish lemmatizer +Stanza - 瑞典語詞形還原器 - -stopword - Kurdish stop word list -stopword - 庫爾德語停用詞表 + +Stanza - Tamil lemmatizer +Stanza - 泰米爾語詞形還原器 - -stopword - Latin stop word list -stopword - 拉丁語停用詞表 + +Stanza - Turkish lemmatizer +Stanza - 土耳其語詞形還原器 - -stopword - Latvian stop word list -stopword - 拉脫維亞語停用詞表 + +Stanza - Ukrainian lemmatizer +Stanza - 烏克蘭語詞形還原器 - -stopword - Lithuanian stop word list -stopword - 立陶宛語停用詞表 + +Stanza - Urdu lemmatizer +Stanza - 烏爾都語詞形還原器 - -stopword - Lugbara stop word list -stopword - 盧格巴拉語停用詞表 + +Stanza - Uyghur lemmatizer +Stanza - 維吾爾語詞形還原器 - -stopword - Malay stop word list -stopword - 馬來語停用詞表 + +Stanza - Welsh lemmatizer +Stanza - 威爾士語詞形還原器 - -stopword - Marathi stop word list -stopword - 馬拉地語停用詞表 + +Stanza - Wolof lemmatizer +Stanza - 沃洛夫語詞形還原器 - -stopword - Norwegian Bokmål stop word list -stopword - 書面挪威語停用詞表 + +LaoNLP - Lao stop word list +LaoNLP - 寮國語停用詞表 - -stopword - Persian stop word list -stopword - 波斯語停用詞表 + +NLTK - Hebrew (Modern) stop word list +NLTK - 希伯來語(現代)停用詞表 - -stopword - Polish stop word list -stopword - 波蘭語停用詞表 + +Stanza - Afrikaans dependency parser +Stanza - 南非語依存分析器 - -stopword - Portuguese (Brazil) stop word list -stopword - 葡萄牙語(巴西)停用詞表 + +Stanza - Arabic dependency parser +Stanza - 阿拉伯語依存分析器 - -stopword - Portuguese (Portugal) stop word list -stopword - 葡萄牙語(葡萄牙)停用詞表 + +Stanza - Armenian (Eastern) dependency parser +Stanza - 亞美尼亞語(東)依存分析器 - -stopword - Punjabi (Gurmukhi) stop word list -stopword - 旁遮普語(古木基)停用詞表 + +Stanza - Armenian (Western) dependency parser +Stanza - 亞美尼亞語(西)依存分析器 - -stopword - Romanian stop word list -stopword - 羅馬尼亞語停用詞表 + +Stanza - Basque dependency parser +Stanza - 巴斯克語依存分析器 - -stopword - Russian stop word list -stopword - 俄語停用詞表 + +Stanza - Belarusian dependency parser +Stanza - 白俄羅斯語依存分析器 - -stopword - Slovak stop word list -stopword - 斯洛伐克語停用詞表 + +Stanza - Bulgarian dependency parser +Stanza - 保加利亞語依存分析器 - -stopword - Slovenian stop word list -stopword - 斯洛維尼亞語停用詞表 + +Stanza - Buryat (Russia) dependency parser +Stanza - 布里亞特語(俄羅斯)依存分析器 - -stopword - Somali stop word list -stopword - 索馬利亞語停用詞表 + +Stanza - Catalan dependency parser +Stanza - 加泰羅尼亞語依存分析器 - -stopword - Sotho (Southern) stop word list -stopword - 塞索托語停用詞表 + +Stanza - Chinese (Classical) dependency parser +Stanza - 漢語(文言)依存分析器 - -stopword - Spanish stop word list -stopword - 西班牙語停用詞表 + +Stanza - Chinese (Simplified) dependency parser +Stanza - 漢語(簡體)依存分析器 - -stopword - Swahili stop word list -stopword - 斯瓦西里語停用詞表 + +Stanza - Chinese (Traditional) dependency parser +Stanza - 漢語(繁體)依存分析器 - -stopword - Swedish stop word list -stopword - 瑞典語停用詞表 + +Stanza - Church Slavonic (Old) dependency parser +Stanza - 教會斯拉夫語(古)依存分析器 - -stopword - Tagalog stop word list -stopword - 他加祿語停用詞表 + +Stanza - Coptic dependency parser +Stanza - 科普特語依存分析器 - -stopword - Thai stop word list -stopword - 泰語停用詞表 + +Stanza - Croatian dependency parser +Stanza - 克羅埃西亞語依存分析器 - -stopword - Turkish stop word list -stopword - 土耳其語停用詞表 + +Stanza - Czech dependency parser +Stanza - 捷克語依存分析器 - -stopword - Ukrainian stop word list -stopword - 烏克蘭語停用詞表 + +Stanza - Danish dependency parser +Stanza - 丹麥語依存分析器 - -stopword - Urdu stop word list -stopword - 烏爾都語停用詞表 + +Stanza - Dutch dependency parser +Stanza - 荷蘭語依存分析器 - -stopword - Vietnamese stop word list -stopword - 越南語停用詞表 + +Stanza - English dependency parser +Stanza - 英語依存分析器 - -stopword - Yoruba stop word list -stopword - 約魯巴語停用詞表 + +Stanza - Erzya dependency parser +Stanza - 埃爾齊亞語依存分析器 - -stopword - Zulu stop word list -stopword - 祖魯語停用詞表 + +Stanza - Estonian dependency parser +Stanza - 愛沙尼亞語依存分析器 - -Custom stop word list -自定義停用詞表 + +Stanza - Faroese dependency parser +Stanza - 法羅語依存分析器 - -spaCy - Catalan dependency parser -spaCy - 加泰羅尼亞語依存分析器 + +Stanza - Finnish dependency parser +Stanza - 芬蘭語依存分析器 - -spaCy - Chinese dependency parser -spaCy - 漢語依存分析器 + +Stanza - French dependency parser +Stanza - 法語依存分析器 - -spaCy - Croatian dependency parser -spaCy - 克羅埃西亞語依存分析器 + +Stanza - French (Old) dependency parser +Stanza - 法語(古)依存分析器 - -spaCy - Danish dependency parser -spaCy - 丹麥語依存分析器 + +Stanza - Galician dependency parser +Stanza - 加里西亞語依存分析器 - -spaCy - Dutch dependency parser -spaCy - 荷蘭語依存分析器 + +Stanza - German dependency parser +Stanza - 德語依存分析器 - -spaCy - English dependency parser -spaCy - 英語依存分析器 + +Stanza - Gothic dependency parser +Stanza - 哥特語依存分析器 - -spaCy - Finnish dependency parser -spaCy - 芬蘭語依存分析器 + +Stanza - Greek (Ancient) dependency parser +Stanza - 希臘語(古)依存分析器 - -spaCy - French dependency parser -spaCy - 法語依存分析器 + +Stanza - Greek (Modern) dependency parser +Stanza - 希臘語(現代)依存分析器 - -spaCy - German dependency parser -spaCy - 德語依存分析器 + +Stanza - Hebrew (Ancient) dependency parser +Stanza - 希伯來語(古)依存分析器 - -spaCy - Greek (Modern) dependency parser -spaCy - 希臘語(現代)依存分析器 + +Stanza - Hebrew (Modern) dependency parser +Stanza - 希伯來語(現代)依存分析器 - -spaCy - Italian dependency parser -spaCy - 義大利語依存分析器 + +Stanza - Hindi dependency parser +Stanza - 印地語依存分析器 - -spaCy - Japanese dependency parser -spaCy - 日語依存分析器 + +Stanza - Hungarian dependency parser +Stanza - 匈牙利語依存分析器 - -spaCy - Lithuanian dependency parser -spaCy - 立陶宛語依存分析器 + +Stanza - Icelandic dependency parser +Stanza - 冰島語依存分析器 - -spaCy - Macedonian dependency parser -spaCy - 馬其頓語依存分析器 + +Stanza - Indonesian dependency parser +Stanza - 印度尼西亞語依存分析器 - -spaCy - Norwegian Bokmål dependency parser -spaCy - 書面挪威語依存分析器 + +Stanza - Irish dependency parser +Stanza - 愛爾蘭語依存分析器 - -spaCy - Polish dependency parser -spaCy - 波蘭語依存分析器 + +Stanza - Italian dependency parser +Stanza - 義大利語依存分析器 - -spaCy - Portuguese dependency parser -spaCy - 葡萄牙語依存分析器 + +Stanza - Japanese dependency parser +Stanza - 日語依存分析器 - -spaCy - Romanian dependency parser -spaCy - 羅馬尼亞語依存分析器 + +Stanza - Kazakh dependency parser +Stanza - 哈薩克語依存分析器 - -spaCy - Russian dependency parser -spaCy - 俄語依存分析器 + +Stanza - Korean dependency parser +Stanza - 韓語依存分析器 - -spaCy - Spanish dependency parser -spaCy - 西班牙語依存分析器 + +Stanza - Kurdish (Kurmanji) dependency parser +Stanza - 庫爾德語(庫爾曼吉語)依存分析器 - -spaCy - Swedish dependency parser -spaCy - 瑞典語依存分析器 + +Stanza - Kyrgyz dependency parser +Stanza - 吉爾吉斯語依存分析器 - -spaCy - Ukrainian dependency parser -spaCy - 烏克蘭語依存分析器 + +Stanza - Latin dependency parser +Stanza - 拉丁語依存分析器 - -Average logarithmic distance - + +Stanza - Latvian dependency parser +Stanza - 拉脫維亞語依存分析器 - -Average reduced frequency - + +Stanza - Ligurian dependency parser +Stanza - 利古里亞語依存分析器 - -Average waiting time - + +Stanza - Lithuanian dependency parser +Stanza - 立陶宛語依存分析器 - -Carroll's Uₘ - + +Stanza - Maltese dependency parser +Stanza - 馬耳他語依存分析器 - -Fisher's exact test -費希爾精確檢驗 + +Stanza - Manx dependency parser +Stanza - 馬恩語依存分析器 - -Log-likelihood ratio test -對數似然比檢驗 + +Stanza - Marathi dependency parser +Stanza - 馬拉地語依存分析器 - -Pearson's chi-squared test -皮爾森卡方檢驗 + +Stanza - Nigerian Pidgin dependency parser +Stanza - 奈及利亞皮欽語依存分析器 - -Cubic association ratio - + +Stanza - Norwegian Bokmål dependency parser +Stanza - 書面挪威語依存分析器 - -Dice's coefficient -Dice 係數 + +Stanza - Norwegian Nynorsk dependency parser +Stanza - 新挪威語依存分析器 - -Difference coefficient - + +Stanza - Persian dependency parser +Stanza - 波斯語依存分析器 - -Jaccard index -雅卡爾指數 + +Stanza - Polish dependency parser +Stanza - 波蘭語依存分析器 - -Log-frequency biased MD - + +Stanza - Pomak dependency parser +Stanza - 波馬克語依存分析器 - -Kilgarriff's ratio - + +Stanza - Portuguese dependency parser +Stanza - 葡萄牙語依存分析器 - -Log ratio - + +Stanza - Romanian dependency parser +Stanza - 羅馬尼亞語依存分析器 - -Minimum sensitivity - + +Stanza - Russian dependency parser +Stanza - 俄語依存分析器 - -Mutual dependency - + +Stanza - Russian (Old) dependency parser +Stanza - 俄語(古)依存分析器 - -Mutual expectation - + +Stanza - Sámi (Northern) dependency parser +Stanza - 薩米語(北)依存分析器 - -Mutual information -互資訊 + +Stanza - Sanskrit dependency parser +Stanza - 梵語依存分析器 - -Odds ratio -比值比 + +Stanza - Scottish Gaelic dependency parser +Stanza - 蘇格蘭蓋爾語依存分析器 - -Pointwise mutual information -點互資訊 + +Stanza - Serbian (Latin) dependency parser +Stanza - 塞爾維亞語(拉丁)依存分析器 - -Poisson collocation measure - + +Stanza - Slovak dependency parser +Stanza - 斯洛伐克語依存分析器 - -Squared phi coefficient -Phi 係數的平方 + +Stanza - Slovenian dependency parser +Stanza - 斯洛維尼亞語依存分析器 - -Microsoft Paint files (*.msp) -Microsoft Paint 檔案 (*.msp) + +Stanza - Sorbian (Upper) dependency parser +Stanza - 索布語(上)依存分析器 - -stopword - Burmese stop word list -stopword - 緬甸語停用詞表 + +Stanza - Spanish dependency parser +Stanza - 西班牙語依存分析器 - -Khmer -柬埔寨語 + +Stanza - Swedish dependency parser +Stanza - 瑞典語依存分析器 - -khmer-nltk - Khmer sentence tokenizer -khmer-nltk - 柬埔寨語分句器 + +Stanza - Tamil dependency parser +Stanza - 泰米爾語依存分析器 - -spaCy - Korean dependency parser -spaCy - 韓語依存分析器 + +Stanza - Telugu dependency parser +Stanza - 泰盧固語依存分析器 - -spaCy - Slovenian dependency parser -spaCy - 斯洛維尼亞語依存分析器 + +Stanza - Turkish dependency parser +Stanza - 土耳其語依存分析器 - -spaCy - Korean sentence recognizer -spaCy - 韓語句子識別器 + +Stanza - Ukrainian dependency parser +Stanza - 烏克蘭語依存分析器 - -khmer-nltk - Khmer word tokenizer -khmer-nltk - 柬埔寨語分詞器 + +Stanza - Urdu dependency parser +Stanza - 烏爾都語依存分析器 - -spaCy - Korean word tokenizer -spaCy - 韓語分詞器 + +Stanza - Uyghur dependency parser +Stanza - 維吾爾語依存分析器 - -spaCy - Malay word tokenizer -spaCy - 馬來語分詞器 + +Stanza - Vietnamese dependency parser +Stanza - 越南語依存分析器 - -khmer-nltk - Khmer part-of-speech tagger -khmer-nltk - 柬埔寨語詞性標註器 + +Stanza - Welsh dependency parser +Stanza - 威爾士語依存分析器 - -PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) -PyThaiNLP - 感知機詞性標註器(Blackboard) + +Stanza - Wolof dependency parser +Stanza - 沃洛夫語依存分析器 - -spaCy - Korean part-of-speech tagger -spaCy - 韓語詞性標註器 + +Stanza - Chinese (Simplified) sentiment analyzer +Stanza - 漢語(簡體)情感分析器 - -spaCy - Slovenian part-of-speech tagger -spaCy - 斯洛維尼亞語詞性標註器 + +Stanza - German sentiment analyzer +Stanza - 德語情感分析器 - -spaCy - Korean lemmatizer -spaCy - 韓語詞形還原器 + +Stanza - English sentiment analyzer +Stanza - 英語情感分析器 - -spaCy - Slovenian lemmatizer -spaCy - 斯洛維尼亞語詞形還原器 + +Stanza - Marathi sentiment analyzer +Stanza - 馬拉地語情感分析器 - -Dostoevsky - Russian sentiment analyzer -Dostoevsky - 俄語情感分析器 + +Stanza - Spanish sentiment analyzer +Stanza - 西班牙語情感分析器 - -Underthesea - Vietnamese sentiment analyzer -Underthesea - 越南語情感分析器 + +Stanza - Vietnamese sentiment analyzer +Stanza - 越南語情感分析器 @@ -8807,12 +10497,12 @@ Frequency wl_conversion - + Yes - + No @@ -8820,12 +10510,12 @@ Frequency wl_dependency_parsing - + Dependency Graphs Generated Successfully 成功生成依存圖 - + <div>Dependency graphs has been successfully generated and exported under folder: {}</div> @@ -9152,17 +10842,30 @@ Frequency wl_measure_utils - + Absolute frequency 絕對頻數 - + Relative frequency 相對頻數 +wl_measures_lexical_diversity + + +Rank-frequency distribution +頻數排序分佈 + + + +Frequency spectrum +頻數譜 + + + wl_measures_readability @@ -9185,11 +10888,6 @@ Frequency 新版 - -Brouwer's Leesindex A -Brouwer Leesindex A - - Navy 海軍版 @@ -9198,17 +10896,17 @@ Frequency wl_measures_statistical_significance - + Two-tailed 雙尾 - + Left-tailed 左尾 - + Right-tailed 右尾 @@ -9216,22 +10914,22 @@ Frequency wl_misc - + minute - + minutes - + (In {} {} {:.2f} seconds) (耗時 {} {} {:.2f} 秒) - + (In (耗時 @@ -9399,668 +11097,783 @@ Frequency 字元數% - + Type-token Ratio 類符形符比 - -Type-token Ratio (Standardized) -類符形符比(標準化) - - - + Paragraph Length in Sentences (Mean) 段落長(單位:句子)(均值) - + Paragraph Length in Sentences (Standard Deviation) 段落長(單位:句子)(標準差) - + Paragraph Length in Sentences (Variance) 段落長(單位:句子)(方差) - + Paragraph Length in Sentences (Minimum) 段落長(單位:句子)(最小值) - + Paragraph Length in Sentences (25th Percentile) 段落長(單位:句子)(25分位數) - + Paragraph Length in Sentences (Median) 段落長(單位:句子)(中位數) - + Paragraph Length in Sentences (75th Percentile) 段落長(單位:句子)(75分位數) - + Paragraph Length in Sentences (Maximum) 段落長(單位:句子)(最大值) - + Paragraph Length in Sentences (Range) 段落長(單位:句子)(極差) - + Paragraph Length in Sentences (Interquartile Range) 段落長(單位:句子)(四分位差) - + Paragraph Length in Sentences (Modes) 段落長(單位:句子)(眾數) - + Paragraph Length in Sentence Segments (Mean) 段落長(單位:句段)(均值) - + Paragraph Length in Sentence Segments (Standard Deviation) 段落長(單位:句段)(標準差) - + Paragraph Length in Sentence Segments (Variance) 段落長(單位:句段)(方差) - + Paragraph Length in Sentence Segments (Minimum) 段落長(單位:句段)(最小值) - + Paragraph Length in Sentence Segments (25th Percentile) 段落長(單位:句段)(25分位數) - + Paragraph Length in Sentence Segments (Median) 段落長(單位:句段)(中位數) - + Paragraph Length in Sentence Segments (75th Percentile) 段落長(單位:句段)(75分位數) - + Paragraph Length in Sentence Segments (Maximum) 段落長(單位:句段)(最大值) - + Paragraph Length in Sentence Segments (Range) 段落長(單位:句段)(極差) - + Paragraph Length in Sentence Segments (Interquartile Range) 段落長(單位:句段)(四分位差) - + Paragraph Length in Sentence Segments (Modes) 段落長(單位:句段)(眾數) - + Paragraph Length in Tokens (Mean) 段落長(單位:形符)(均值) - + Paragraph Length in Tokens (Standard Deviation) 段落長(單位:形符)(標準差) - + Paragraph Length in Tokens (Variance) 段落長(單位:形符)(方差) - + Paragraph Length in Tokens (Minimum) 段落長(單位:形符)(最小值) - + Paragraph Length in Tokens (25th Percentile) 段落長(單位:形符)(25分位數) - + Paragraph Length in Tokens (Median) 段落長(單位:形符)(中位數) - + Paragraph Length in Tokens (75th Percentile) 段落長(單位:形符)(75分位數) - + Paragraph Length in Tokens (Maximum) 段落長(單位:形符)(最大值) - + Paragraph Length in Tokens (Range) 段落長(單位:形符)(極差) - + Paragraph Length in Tokens (Interquartile Range) 段落長(單位:形符)(四分位差) - + Paragraph Length in Tokens (Modes) 段落長(單位:形符)(眾數) - + Sentence Length in Tokens (Mean) 句長(單位:形符)(均值) - + Sentence Length in Tokens (Standard Deviation) 句長(單位:形符)(標準差) - + Sentence Length in Tokens (Variance) 句長(單位:形符)(方差) - + Sentence Length in Tokens (Minimum) 句長(單位:形符)(最小值) - + Sentence Length in Tokens (25th Percentile) 句長(單位:形符)(25分位數) - + Sentence Length in Tokens (Median) 句長(單位:形符)(中位數) - + Sentence Length in Tokens (75th Percentile) 句長(單位:形符)(75分位數) - + Sentence Length in Tokens (Maximum) 句長(單位:形符)(最大值) - + Sentence Length in Tokens (Range) 句長(單位:形符)(極差) - + Sentence Length in Tokens (Interquartile Range) 句長(單位:形符)(四分位差) - + Sentence Length in Tokens (Modes) 句長(單位:形符)(眾數) - + Sentence Segment Length in Tokens (Mean) 句段長(單位:形符)(均值) - + Sentence Segment Length in Tokens (Standard Deviation) 句段長(單位:形符)(標準差) - + Sentence Segment Length in Tokens (Variance) 句段長(單位:形符)(方差) - + Sentence Segment Length in Tokens (Minimum) 句段長(單位:形符)(最小值) - + Sentence Segment Length in Tokens (25th Percentile) 句段長(單位:形符)(25分位數) - + Sentence Segment Length in Tokens (Median) 句段長(單位:形符)(中位數) - + Sentence Segment Length in Tokens (75th Percentile) 句段長(單位:形符)(75分位數) - + Sentence Segment Length in Tokens (Maximum) 句段長(單位:形符)(最大值) - + Sentence Segment Length in Tokens (Range) 句段長(單位:形符)(極差) - + Sentence Segment Length in Tokens (Interquartile Range) 句段長(單位:形符)(四分位數) - + Sentence Segment Length in Tokens (Modes) 句段長(單位:形符)(眾數) - + Token Length in Syllables (Mean) 形符長(單位:音節)(均值) - + Token Length in Syllables (Standard Deviation) 形符長(單位:音節)(標準差) - + Token Length in Syllables (Variance) 形符長(單位:音節)(方差) - + Token Length in Syllables (Minimum) 形符長(單位:音節)(最小值) - + Token Length in Syllables (25th Percentile) 形符長(單位:音節)(25分位數) - + Token Length in Syllables (Median) 形符長(單位:音節)(中位數) - + Token Length in Syllables (75th Percentile) 形符長(單位:音節)(75分位數) - + Token Length in Syllables (Maximum) 形符長(單位:音節)(最大值) - + Token Length in Syllables (Range) 形符長(單位:音節)(極差) - + Token Length in Syllables (Interquartile Range) 形符長(單位:音節)(四分位差) - + Token Length in Syllables (Modes) 形符長(單位:音節)(眾數) - + Token Length in Characters (Mean) 形符長(單位:字元)(均值) - + Token Length in Characters (Standard Deviation) 形符長(單位:字元)(標準差) - + Token Length in Characters (Variance) 形符長(單位:字元)(方差) - + Token Length in Characters (Minimum) 形符長(單位:字元)(最小值) - + Token Length in Characters (25th Percentile) 形符長(單位:字元)(25分位值) - + Token Length in Characters (Median) 形符長(單位:字元)(中位數) - + Token Length in Characters (75th Percentile) 形符長(單位:字元)(75分位數) - + Token Length in Characters (Maximum) 形符長(單位:字元)(最大值) - + Token Length in Characters (Range) 形符長(單位:字元)(極差) - + Token Length in Characters (Interquartile Range) 形符長(單位:字元)(四分位差) - + Token Length in Characters (Modes) 形符長(單位:字元)(眾數) - + Type Length in Syllables (Mean) 類符長(單位:音節)(均值) - + Type Length in Syllables (Standard Deviation) 類符長(單位:音節)(標準差) - + Type Length in Syllables (Variance) 類符長(單位:音節)(方差) - + Type Length in Syllables (Minimum) 類符長(單位:音節)(最小值) - + Type Length in Syllables (25th Percentile) 類符長(單位:音節)(25分位數) - + Type Length in Syllables (Median) 類符長(單位:音節)(中位數) - + Type Length in Syllables (75th Percentile) 類符長(單位:音節)(75分位數) - + Type Length in Syllables (Maximum) 類符長(單位:音節)(最大值) - + Type Length in Syllables (Range) 類符長(單位:音節)(極差) - + Type Length in Syllables (Interquartile Range) 類符長(單位:音節)(四分位差) - + Type Length in Syllables (Modes) 類符長(單位:音節)(眾數) - + Type Length in Characters (Mean) 類符長(單位:字元)(均值) - + Type Length in Characters (Standard Deviation) 類符長(單位:字元)(標準差) - + Type Length in Characters (Variance) 類符長(單位:字元)(方差) - + Type Length in Characters (Minimum) 類符長(單位:字元)(最小值) - + Type Length in Characters (25th Percentile) 類符長(單位:字元)(25分位數) - + Type Length in Characters (Median) 類符長(單位:字元)(中位數) - + Type Length in Characters (75th Percentile) 類符長(單位:字元)(75分位數) - + Type Length in Characters (Maximum) 類符長(單位:字元)(最大值) - + Type Length in Characters (Range) 類符長(單位:字元)(極差) - + Type Length in Characters (Interquartile Range) 類符長(單位:字元)(四分位差) - + Type Length in Characters (Modes) 類符長(單位:字元)(眾數) - + Syllable Length in Characters (Mean) 音節長(單位:字元)(均值) - + Syllable Length in Characters (Standard Deviation) 音節長(單位:字元)(標準差) - + Syllable Length in Characters (Variance) 音節長(單位:字元)(方差) - + Syllable Length in Characters (Minimum) 音節長(單位:字元)(最小值) - + Syllable Length in Characters (25th Percentile) 音節長(單位:字元)(25分位數) - + Syllable Length in Characters (Median) 音節長(單位:字元)(中位數) - + Syllable Length in Characters (75th Percentile) 音節長(單位:字元)(75分位數) - + Syllable Length in Characters (Maximum) 音節長(單位:字元)(最大值) - -Syllable Length in Characters (Range) -音節長(單位:字元)(極差) + +Syllable Length in Characters (Range) +音節長(單位:字元)(極差) + + + +Syllable Length in Characters (Interquartile Range) +音節長(單位:字元)(四分位差) + + + +Syllable Length in Characters (Modes) +音節長(單位:字元)(眾數) + + + +Al-Heeti's Readability Prediction Formula + + + + +Bormuth's Cloze Mean + + + + +Bormuth's Grade Placement + + + + +Coleman's Readability Formula + + + + +Dale-Chall Readability Formula + + + + +Danielson-Bryan's Readability Formula + + + + +Dawood's Readability Formula + + + + +Degrees of Reading Power + + + + +Dickes-Steiwer Handformel + + + + +Easy Listening Formula + + + + +Flesch Reading Ease (Farr-Jenkins-Paterson) + + + + +Fórmula de Comprensibilidad de Gutiérrez de Polini + + + + +Fucks's Stilcharakteristik + + + + +Lorge Readability Index + + + + +Luong-Nguyen-Dinh's Readability Formula + + + + +neue Wiener Literaturformeln + + + + +neue Wiener Sachtextformel + + + + +Strain Index + + + + +Tränkle & Bailer's Readability Formula + + + + +Tuldava's Text Difficulty + + + + +Wheeler & Smith's Readability Formula + - -Syllable Length in Characters (Interquartile Range) -音節長(單位:字元)(四分位差) + +Corrected TTR + - -Syllable Length in Characters (Modes) -音節長(單位:字元)(眾數) + +Fisher's Index of Diversity + - -Al-Heeti's Readability Prediction Formula + +Herdan's Vₘ - -Bormuth's Cloze Mean + +HD-D - -Bormuth's Grade Placement + +LogTTR - -Coleman's Readability Formula + +Mean Segmental TTR - -Dale-Chall Readability Formula + +Measure of Textual Lexical Diversity - -Danielson-Bryan's Readability Formula + +Moving-average TTR - -Dawood's Readability Formula + +Popescu-Mačutek-Altmann's B₁ - -Degrees of Reading Power + +Popescu-Mačutek-Altmann's B₂ - -Dickes-Steiwer Handformel + +Popescu-Mačutek-Altmann's B₃ - -Easy Listening Formula + +Popescu-Mačutek-Altmann's B₄ - -Flesch Reading Ease (Farr-Jenkins-Paterson) + +Popescu-Mačutek-Altmann's B₅ - -Fórmula de Comprensibilidad de Gutiérrez de Polini + +Popescu's R₁ - -Fucks's Stilcharakteristik + +Popescu's R₂ - -Lorge Readability Index + +Popescu's R₃ - -Luong-Nguyen-Dinh's Readability Formula + +Popescu's R₄ - -neue Wiener Literaturformeln + +Repeat Rate - -neue Wiener Sachtextformel + +Root TTR - -Strain Index + +Shannon Entropy +夏農熵 + + + +Simpson's l - -Tränkle & Bailer's Readability Formula + +vocd-D - -Tuldava's Text Difficulty + +Yule's Characteristic K - -Wheeler & Smith's Readability Formula + +Yule's Index of Diversity @@ -10185,214 +11998,367 @@ Frequency +wl_settings_default + + +Observed Files +觀察檔案 + + + +Profiler +分析工具 + + + +APA (7th edition) +APA(第七版) + + + +Counts +計數 + + + +Token +形符 + + + +File +檔案 + + + +Ascending +升序 + + + +Token no. +形符序號 + + + +Line chart +折線圖 + + + +Total +合計 + + + +Frequency +頻數 + + + +None + + + + +p-value +p 值 + + + +General +全域性 + + + +Excel workbooks (*.xlsx) +Excel 工作簿 (*.xlsx) + + + +Non-embedded +非嵌入式 + + + +Header + + + + +Embedded +嵌入式 + + + +Part of speech +詞性 + + + +Others +其他 + + + +Paragraph +段落 + + + +Sentence +句子 + + + +Word +單詞 + + + +Policy one + + + + +New +新版 + + + +Original +原版 + + + +Rank-frequency distribution +頻數排序分佈 + + + +Two-tailed +雙尾 + + + +Relative frequency +相對頻數 + + + +Colormap +色譜 + + + wl_settings_figs - + Square 方形 - + Circle 圓形 - + Triangle up 朝上三角形 - + Triangle right 朝右三角形 - + Triangle down 朝下三角形 - + Triangle left 朝左三角形 - + Thin diamond 薄菱形 - + Pentagon 五角形 - + Hexagon 六邊形 - + Octagon 八邊形 - + Arc3 - + Arc - + Angle3 - + Angle - + Bar - + Solid 實線 - + Dashed 虛線 - + Dash-dotted 點畫線 - + Dotted 點線 - + Curve 圓弧 - + Curve A 圓弧 A - + Curve B 圓弧 B - + Curve AB 圓弧 AB - + Curve filled A 實心圓弧 A - + Curve filled B 實心圓弧 B - + Curve filled AB 實心圓弧 AB - + Bracket A 方括號 A - + Bracket B 方括號 B - + Bracket AB 方括號 AB - + Bar AB 橫條 AB - + Bracket curve 方括號加圓弧 - + Simple 樸素 - + Fancy 絢麗 - + Wedge 楔形 - + Circular 環形 - + Kamada-Kawai - + Planar 平面 - + Random 隨機 - + Shell 同心 - + Spring 彈簧 - + Spectral 譜圖 - + Spiral 螺旋 diff --git a/utils/wl_packaging.py b/utils/wl_packaging.py index 7d7329c19..6b9cfcf6d 100644 --- a/utils/wl_packaging.py +++ b/utils/wl_packaging.py @@ -46,8 +46,8 @@ def print_with_elapsed_time(message): os.makedirs('dist/Wordless/imports') os.makedirs('dist/Wordless/exports') elif is_macos: - os.makedirs('dist/Wordless.app/Contents/Macos/imports') - os.makedirs('dist/Wordless.app/Contents/Macos/exports') + os.makedirs('dist/Wordless.app/Contents/MacOS/imports') + os.makedirs('dist/Wordless.app/Contents/MacOS/exports') if is_linux: # Fix GLib-GIO-ERROR, Gtk-WARNING, and many other errors/warnings on Linux @@ -73,23 +73,25 @@ def print_with_elapsed_time(message): if is_windows: subprocess.run([os.path.join(os.getcwd(), 'Wordless.exe')], check = True) elif is_macos: - subprocess.run([os.path.join(os.getcwd(), 'Wordless.app/Contents/Macos/Wordless')], check = True) + subprocess.run([os.path.join(os.getcwd(), 'Wordless.app/Contents/MacOS/Wordless')], check = True) elif is_linux: subprocess.run(['./Wordless'], check = True) # Remove custom settings file if is_windows or is_linux: - if os.path.exists('wl_settings.pickle'): - os.remove('wl_settings.pickle') - - if os.path.exists('wl_settings_display_lang.pickle'): - os.remove('wl_settings_display_lang.pickle') + files_settings = [ + 'wl_settings.pickle', + 'wl_settings_display_lang.pickle' + ] elif is_macos: - if os.path.exists('Wordless.app/Contents/Macos/wl_settings.pickle'): - os.remove('Wordless.app/Contents/Macos/wl_settings.pickle') - - if os.path.exists('Wordless.app/Contents/Macos/wl_settings_display_lang.pickle'): - os.remove('Wordless.app/Contents/Macos/wl_settings_display_lang.pickle') + files_settings = [ + 'Wordless.app/Contents/MacOS/wl_settings.pickle', + 'Wordless.app/Contents/MacOS/wl_settings_display_lang.pickle' + ] + +for file_settings in files_settings: + if os.path.exists(file_settings): + os.remove(file_settings) print_with_elapsed_time('Tests passed!') diff --git a/utils/wl_trs_translate.py b/utils/wl_trs_translate.py index b95a6640a..221750956 100644 --- a/utils/wl_trs_translate.py +++ b/utils/wl_trs_translate.py @@ -61,8 +61,10 @@ 'Estonian': ['爱沙尼亚语'], 'Faroese': ['法罗语'], 'Finnish': ['芬兰语'], - 'French': ['法语'], + 'French (Old)': ['法语(古)'], + 'French': ['法语'], + 'Galician': ['加里西亚语'], 'Ganda': ['干达语'], 'Georgian': ['格鲁吉亚语'], @@ -108,6 +110,7 @@ 'Meitei': ['曼尼普尔语'], 'Mongolian': ['蒙古语'], 'Nepali': ['尼泊尔语'], + 'Nigerian Pidgin': ['尼日利亚皮钦语'], 'Norwegian Bokmål': ['书面挪威语'], 'Norwegian Nynorsk': ['新挪威语'], 'Oriya': ['奥里亚语'], @@ -118,8 +121,10 @@ 'Portuguese (Portugal)': ['葡萄牙语(葡萄牙)'], 'Punjabi (Gurmukhi)': ['旁遮普语(古木基)'], 'Romanian': ['罗马尼亚语'], - 'Russian': ['俄语'], + 'Russian (Old)': ['俄语(古)'], + 'Russian': ['俄语'], + 'Sámi (Northern)': ['萨米语(北)'], 'Sanskrit': ['梵语'], 'Scottish Gaelic': ['苏格兰盖尔语'], @@ -324,27 +329,29 @@ or ('type' in element_tr.attrs and element_tr['type'] != 'obsolete') ): tr = element_src.text + tr_raw = tr # Languages for lang, trs in TRS_LANGS.items(): - # Only replace language names at the beginning of the text, after hyphens in names of language utils, or after slashes in encoding names - if tr.startswith(lang) or f' - {lang} ' in tr or f'/{lang}' in tr: - if tr.startswith(lang): - tr = tr.replace(lang, trs[0], 1) - elif f' - {lang}' in tr: - tr = tr.replace(f' - {lang} ', f' - {trs[0]} ', 1) - elif f'/{lang}' in tr: - tr = tr.replace(f'/{lang}', f'/{trs[0]}', 1) - - tr_hit = True + # Language names + if tr == lang: + tr = trs[0] + # Encoding names + elif tr.startswith(f'{lang} ('): + tr = tr.replace(f'{lang} (', f'{trs[0]} (', 1) + elif tr.startswith(f'{lang}/'): + tr = tr.replace(f'{lang}/', f'{trs[0]}/', 1) + elif f'/{lang} (' in tr: + tr = tr.replace(f'/{lang} (', f'/{trs[0]} (', 1) + # Names of language utils + elif f' - {lang} ' in tr: + tr = tr.replace(f' - {lang} ', f' - {trs[0]} ', 1) # Encodings for encoding, trs in TRS_ENCODINGS.items(): if encoding in tr: tr = tr.replace(encoding, trs[0]) - tr_hit = True - break # File types @@ -352,8 +359,6 @@ if tr == file_type: tr = trs[0] - tr_hit = True - break # NLP utils @@ -365,8 +370,6 @@ elif tr.endswith(util): tr = tr.replace(util, trs[0], 1) - tr_hit = True - break # Misc @@ -374,20 +377,14 @@ if tr == item: tr = trs[0] - tr_hit = True - break # Exceptions - if any((text in tr for text in [ - '语 variant:' - ])): - tr = tr.replace('语 variant:', '语变体:') - + if any((text in tr for text in [])): # Flag translation as unfinished to be reviewed manually unfinished = True - if tr_hit: + if tr_raw != tr: # Do not replace parentheses in file type filters if element_src.text not in TRS_FILE_TYPES: # Parentheses diff --git a/wordless/wl_main.py b/wordless/wl_main.py index 34997a5f1..10bd0e976 100644 --- a/wordless/wl_main.py +++ b/wordless/wl_main.py @@ -197,8 +197,8 @@ def __init__(self, loading_window): self.settings_default = wl_settings_default.init_settings_default(self) # Custom settings - if os.path.exists('wl_settings.pickle'): - with open('wl_settings.pickle', 'rb') as f: + if os.path.exists(file_settings): + with open(file_settings, 'rb') as f: settings_custom = pickle.load(f) if wl_checks_misc.check_custom_settings(settings_custom, self.settings_default): @@ -208,8 +208,8 @@ def __init__(self, loading_window): else: self.settings_custom = copy.deepcopy(self.settings_default) - if os.path.exists('wl_settings_display_lang.pickle'): - with open('wl_settings_display_lang.pickle', 'rb') as f: + if os.path.exists(file_settings_display_lang): + with open(file_settings_display_lang, 'rb') as f: self.settings_custom['menu']['prefs']['display_lang'] = pickle.load(f) # Global settings @@ -385,15 +385,17 @@ def prefs_display_lang(self): if action.isChecked(): if action.lang != self.settings_custom['menu']['prefs']['display_lang']: if wl_dialogs_misc.Wl_Dialog_Restart_Required(self).exec_() == QDialog.Accepted: - with open('wl_settings_display_lang.pickle', 'wb') as f: + with open(file_settings_display_lang, 'wb') as f: pickle.dump(action.lang, f) # Remove settings file - if os.path.exists('wl_settings.pickle'): - os.remove('wl_settings.pickle') + if os.path.exists(file_settings): + os.remove(file_settings) # Remove file caches - for file in glob.glob('imports/*.*'): + for file in glob.glob(os.path.join( + self.settings_custom['general']['imp']['temp_files']['default_path'], '*.*' + )): os.remove(file) self.restart(save_settings = False) @@ -587,7 +589,7 @@ def save_settings(self): # Layouts self.settings_custom['menu']['prefs']['layouts']['central_widget'] = self.centralWidget().sizes() - with open('wl_settings.pickle', 'wb') as f: + with open(file_settings, 'wb') as f: pickle.dump(self.settings_custom, f) def restart(self, save_settings = True): @@ -598,9 +600,9 @@ def restart(self, save_settings = True): if getattr(sys, '_MEIPASS', False): if is_windows: - subprocess.Popen([wl_paths.get_normalized_path('Wordless.exe')]) + subprocess.Popen([wl_paths.get_path_file('Wordless.exe', internal = False)]) elif is_macos or is_linux: - subprocess.Popen([wl_paths.get_normalized_path('Wordless')]) + subprocess.Popen([wl_paths.get_path_file('Wordless', internal = False)]) else: if is_windows: subprocess.Popen(['python', '-m', 'wordless.wl_main']) @@ -1218,9 +1220,12 @@ def __init__(self, main): self.wrapper_info.layout().setColumnStretch(1, 5) if __name__ == '__main__': + file_settings = wl_paths.get_path_file('wl_settings.pickle', internal = False) + file_settings_display_lang = wl_paths.get_path_file('wl_settings_display_lang.pickle', internal = False) + # UI scaling - if os.path.exists('wl_settings.pickle'): - with open('wl_settings.pickle', 'rb') as f: + if os.path.exists(file_settings): + with open(file_settings, 'rb') as f: settings_custom = pickle.load(f) ui_scaling = settings_custom['general']['ui_settings']['interface_scaling'] @@ -1237,8 +1242,8 @@ def __init__(self, main): wl_app = QApplication(sys.argv) # Translations - if os.path.exists('wl_settings_display_lang.pickle'): - with open('wl_settings_display_lang.pickle', 'rb') as f: + if os.path.exists(file_settings_display_lang): + with open(file_settings_display_lang, 'rb') as f: display_lang = pickle.load(f) else: display_lang = 'eng_us' diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py index 24eafdd2b..b3f405278 100644 --- a/wordless/wl_measures/wl_measures_readability.py +++ b/wordless/wl_measures/wl_measures_readability.py @@ -577,7 +577,7 @@ def re_flesch(main, text): - 77 * (text.num_syls / text.num_words) - 0.93 * (text.num_words / text.num_sentences) ) - elif settings['variant_nld'] == _tr('wl_measures_readability', "Brouwer's Leesindex A"): + elif settings['variant_nld'] == "Brouwer's Leesindex A": re = ( 195 - (200 / 3) * (text.num_syls / text.num_words) diff --git a/wordless/wl_nlp/wl_dependency_parsing.py b/wordless/wl_nlp/wl_dependency_parsing.py index 00557aa01..6832e38bb 100644 --- a/wordless/wl_nlp/wl_dependency_parsing.py +++ b/wordless/wl_nlp/wl_dependency_parsing.py @@ -27,6 +27,7 @@ from wordless.wl_checks import wl_checks_misc from wordless.wl_dialogs import wl_msg_boxes from wordless.wl_nlp import wl_matching, wl_nlp_utils +from wordless.wl_settings import wl_settings_default from wordless.wl_utils import wl_conversion, wl_misc, wl_paths _tr = QCoreApplication.translate @@ -105,11 +106,11 @@ def wl_dependency_parse_tokens(main, inputs, lang, dependency_parser, tagged): with nlp.select_pipes(disable = [ pipeline - for pipeline in ['tagger', 'morphologizer', 'lemmatizer', 'attribute_ruler', 'senter', 'sentencizer'] + for pipeline in ['senter', 'sentencizer'] if nlp.has_pipe(pipeline) ]): for doc in nlp.pipe([ - spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [False] * len(tokens)) + spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [True] * len(tokens)) for tokens in wl_nlp_utils.split_token_list(main, inputs, dependency_parser) ]): for token in doc: @@ -392,7 +393,7 @@ def wl_dependency_parse_fig_tokens( lens_docs = [] for tokens in wl_nlp_utils.split_token_list(main, inputs, dependency_parser): - docs.append(spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [False] * len(tokens))) + docs.append(spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [True] * len(tokens))) # Record length of each section if tagged: @@ -480,7 +481,7 @@ def wl_dependency_parse_fig_tokens( return htmls def wl_show_dependency_graphs(main, htmls, show_in_separate_tab): - DIR_PATH = 'exports/_dependency_parsing_figs' + DIR_PATH = os.path.join(wl_settings_default.DEFAULT_DIR_EXPS, '_dependency_parsing_figs') # Clean cache if os.path.exists(DIR_PATH): diff --git a/wordless/wl_nlp/wl_lemmatization.py b/wordless/wl_nlp/wl_lemmatization.py index fcc9b4a78..61ebbb35c 100644 --- a/wordless/wl_nlp/wl_lemmatization.py +++ b/wordless/wl_nlp/wl_lemmatization.py @@ -192,7 +192,7 @@ def wl_lemmatize_tokens(main, inputs, lang, lemmatizer, tagged): if lang == 'jpn': docs.append(''.join(tokens)) else: - docs.append(spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [False] * len(tokens))) + docs.append(spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [True] * len(tokens))) for doc in nlp.pipe(docs): for token in doc: diff --git a/wordless/wl_nlp/wl_pos_tagging.py b/wordless/wl_nlp/wl_pos_tagging.py index 1e40e7afd..7bce91306 100644 --- a/wordless/wl_nlp/wl_pos_tagging.py +++ b/wordless/wl_nlp/wl_pos_tagging.py @@ -117,7 +117,7 @@ def wl_pos_tag(main, inputs, lang, pos_tagger = 'default', tagset = 'default'): if lang == 'jpn': docs.append(''.join(tokens)) else: - docs.append(spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [False] * len(tokens))) + docs.append(spacy.tokens.Doc(nlp.vocab, words = tokens, spaces = [True] * len(tokens))) for doc in nlp.pipe(docs): if tagset in ['default', 'raw']: diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index 9a900aef6..123e5d7af 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -68,6 +68,11 @@ elif is_linux: DEFAULT_FONT_SIZE = 11 +# Directories for imports and exports +DEFAULT_DIR_WORDLESS = wl_paths.get_path_file('', internal = False) +DEFAULT_DIR_IMPS = wl_paths.get_path_file('imports', internal = False) +DEFAULT_DIR_EXPS = wl_paths.get_path_file('exports', internal = False) + def init_settings_default(main): desktop_widget = QDesktopWidget() @@ -1098,41 +1103,41 @@ def init_settings_default(main): # Settings - General - Import 'imp': { 'files': { - 'default_path': wl_paths.get_normalized_path('.') + 'default_path': DEFAULT_DIR_WORDLESS }, 'search_terms': { - 'default_path': wl_paths.get_normalized_path('.'), + 'default_path': DEFAULT_DIR_WORDLESS, 'default_encoding': 'utf_8', 'detect_encodings': True }, 'stop_words': { - 'default_path': wl_paths.get_normalized_path('.'), + 'default_path': DEFAULT_DIR_WORDLESS, 'default_encoding': 'utf_8', 'detect_encodings': True }, 'temp_files': { - 'default_path': wl_paths.get_normalized_path('imports/'), + 'default_path': DEFAULT_DIR_IMPS, } }, # Settings - General - Export 'exp': { 'tables': { - 'default_path': wl_paths.get_normalized_path('exports/'), + 'default_path': DEFAULT_DIR_EXPS, 'default_type': _tr('wl_settings_default', 'Excel workbooks (*.xlsx)'), 'default_encoding': 'utf_8' }, 'search_terms': { - 'default_path': wl_paths.get_normalized_path('exports/'), + 'default_path': DEFAULT_DIR_EXPS, 'default_encoding': 'utf_8' }, 'stop_words': { - 'default_path': wl_paths.get_normalized_path('exports/'), + 'default_path': DEFAULT_DIR_EXPS, 'default_encoding': 'utf_8' } } diff --git a/wordless/wl_settings/wl_settings_global.py b/wordless/wl_settings/wl_settings_global.py index bdf25bf02..04d93182e 100644 --- a/wordless/wl_settings/wl_settings_global.py +++ b/wordless/wl_settings/wl_settings_global.py @@ -2565,7 +2565,12 @@ def init_settings_global(): 'kmr': ['stanza_kmr'], 'kir': ['stanza_kir'], - 'lao': ['laonlp_seqlabeling'], + + 'lao': [ + 'laonlp_seqlabeling', + 'laonlp_yunshan_cup_2020' + ], + 'lat': ['stanza_lat'], 'lav': ['stanza_lav'], 'lij': ['stanza_lij'], diff --git a/wordless/wl_settings/wl_settings_measures.py b/wordless/wl_settings/wl_settings_measures.py index 3726cd604..3ffd8e2f3 100644 --- a/wordless/wl_settings/wl_settings_measures.py +++ b/wordless/wl_settings/wl_settings_measures.py @@ -126,7 +126,7 @@ def __init__(self, main): self.combo_box_re_variant_spa = wl_boxes.Wl_Combo_Box(self) self.combo_box_re_variant_nld.addItems([ - self.tr("Brouwer's Leesindex A"), + "Brouwer's Leesindex A", 'Douma', ]) self.combo_box_re_variant_spa.addItems([ diff --git a/wordless/wl_utils/wl_paths.py b/wordless/wl_utils/wl_paths.py index e54cda977..2e146cbdf 100644 --- a/wordless/wl_utils/wl_paths.py +++ b/wordless/wl_utils/wl_paths.py @@ -19,6 +19,8 @@ import os import sys +from wordless.wl_utils import wl_misc + def get_normalized_path(path): path = os.path.realpath(path) path = os.path.normpath(path) @@ -30,9 +32,17 @@ def get_normalized_dir(path): return os.path.dirname(path) -def get_path_file(*paths): +def get_path_file(*paths, internal = True): if getattr(sys, '_MEIPASS', False): - path = os.path.join(sys._MEIPASS, *paths) + is_windows, is_macos, is_linux = wl_misc.check_os() + + if internal: + path = os.path.join(sys._MEIPASS, *paths) + else: + if is_windows or is_linux: + path = os.path.join(sys._MEIPASS, '..', *paths) + elif is_macos: + path = os.path.join(sys._MEIPASS, '..', 'MacOS', *paths) else: path = os.path.join(*paths)