diff --git a/data/unifont-15.1.02.otf b/data/unifont-15.1.02.otf deleted file mode 100644 index aa212bf72..000000000 Binary files a/data/unifont-15.1.02.otf and /dev/null differ diff --git a/data/unifont-15.1.05.otf b/data/unifont-15.1.05.otf new file mode 100644 index 000000000..3d0dcd3c1 Binary files /dev/null and b/data/unifont-15.1.05.otf differ diff --git a/doc/doc.md b/doc/doc.md index 418313b97..1a0fc839a 100644 --- a/doc/doc.md +++ b/doc/doc.md @@ -946,8 +946,8 @@ The following variables would be used in formulas:
- + + + - @@ -52,63 +53,63 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -117,119 +118,119 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 370fa2acc..d791f3365 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -178,9 +178,6 @@ def update_gui(err_msg, texts_stats_files): assert len_sentences_total == numpy.sum(stats[3]) - # Sentence length should never be zero - assert 0 not in count_sentences_lens - # Count of n-token-long Sentence Segments if any(count_sentence_segs_lens): count_sentence_segs_lens_files = wl_misc.merge_dicts(count_sentence_segs_lens) @@ -195,9 +192,6 @@ def update_gui(err_msg, texts_stats_files): assert len_sentence_segs_total == numpy.sum(stats[3]) - # Sentence segment length should never be zero - assert 0 not in count_sentence_segs_lens - # Count of n-syllable-long Tokens if len_tokens_syls is not None: count_tokens_lens_files = wl_misc.merge_dicts(count_tokens_lens_syls) diff --git a/tests/tests_measures/test_measures_readability.py b/tests/tests_measures/test_measures_readability.py index b77fff36f..32dcf60b8 100644 --- a/tests/tests_measures/test_measures_readability.py +++ b/tests/tests_measures/test_measures_readability.py @@ -83,9 +83,9 @@ def test_rd(): rd_ara_0 = wl_measures_readability.rd(main, test_text_ara_0) - settings['rd']['variant'] = 'Policy one' + settings['rd']['variant'] = 'Policy One' rd_ara_12_policy_1 = wl_measures_readability.rd(main, test_text_ara_12) - settings['rd']['variant'] = 'Policy two' + settings['rd']['variant'] = 'Policy Two' rd_ara_12_policy_2 = wl_measures_readability.rd(main, test_text_ara_12) rd_eng_12 = wl_measures_readability.rd(main, test_text_eng_12) diff --git a/tests/tests_nlp/test_texts.py b/tests/tests_nlp/test_texts.py index 35b2357ca..968edd0f6 100644 --- a/tests/tests_nlp/test_texts.py +++ b/tests/tests_nlp/test_texts.py @@ -26,6 +26,13 @@ wl_token = wl_texts.Wl_Token('test', tag = '_NN') wl_tokens = [wl_texts.Wl_Token('test', tag = '_NN')] +def test_check_text(): + assert wl_texts.check_text('test') == 'test' + assert wl_texts.check_text(None) == '' + +def test_check_texts(): + assert wl_texts.check_texts(['test', None]) == ['test', ''] + def test_wl_token(): wl_token = wl_texts.Wl_Token('test') hash(wl_token) @@ -35,7 +42,7 @@ def test_wl_token(): wl_token.update_properties(wl_token) def test_to_tokens(): - assert wl_texts.to_tokens(['test_NN']) == wl_tokens + assert wl_texts.to_display_texts(wl_texts.to_tokens(['test', None], tags = ['_NN', None])) == ['test_NN', ''] def test_display_texts_to_tokens(): assert wl_texts.display_texts_to_tokens(main, ['test_NN'])[0].display_text() == 'test_NN' @@ -68,13 +75,18 @@ def test_to_display_texts(): def test_set_token_text(): assert wl_texts.set_token_text(wl_token, 'tests').display_text() == 'tests_NN' + assert wl_texts.set_token_text(wl_token, None).display_text() == '_NN' def test_set_token_texts(): wl_tokens_copy = copy.deepcopy(wl_tokens) - wl_texts.set_token_texts(wl_tokens_copy, ['test1']) + wl_texts.set_token_texts(wl_tokens_copy, ['test1']) assert wl_texts.to_display_texts(wl_tokens_copy) == ['test1_NN'] + # If the token property is None, the token text should be set to empty + wl_texts.set_token_texts(wl_tokens_copy, [None]) + assert wl_texts.to_display_texts(wl_tokens_copy) == ['_NN'] + def test_has_token_properties(): assert wl_texts.has_token_properties(wl_tokens, 'tag') assert not wl_texts.has_token_properties(wl_tokens, 'lemma') @@ -112,6 +124,9 @@ def test_wl_text_total(): assert text_total_2.tagged if __name__ == '__main__': + test_check_text() + test_check_texts() + test_wl_token() test_to_tokens() test_display_texts_to_tokens() diff --git a/tests/tests_results/test_results_sort.py b/tests/tests_results/test_results_sort.py index 6bb158e95..ef0c604c9 100644 --- a/tests/tests_results/test_results_sort.py +++ b/tests/tests_results/test_results_sort.py @@ -36,7 +36,7 @@ def test_wl_dialog_results_sort_concordancer(): main.settings_custom['concordancer']['sort_results']['sorting_rules'] = [ ['Node', 'Ascending'], ['Sentiment', 'Ascending'], - ['Token no.', 'Ascending'], + ['Token No.', 'Ascending'], ['File', 'Ascending'], ['R1', 'Ascending'], ['L1', 'Ascending'] @@ -45,7 +45,7 @@ def test_wl_dialog_results_sort_concordancer(): dialog_results_sort_concordancer.update_gui([]) def test_table_results_sort_concordancer(): - table_results_sort_concordancer = wl_results_sort.Table_Results_Sort_Conordancer( + table_results_sort_concordancer = wl_results_sort.Wl_Table_Results_Sort_Conordancer( main, table = table ) diff --git a/tests/tests_settings/test_settings_lemmatization.py b/tests/tests_settings/test_settings_lemmatization.py index 96a8d428f..dc4fa0264 100644 --- a/tests/tests_settings/test_settings_lemmatization.py +++ b/tests/tests_settings/test_settings_lemmatization.py @@ -28,7 +28,6 @@ def test_wl_settings_lemmatization(): settings_lemmatization.apply_settings() settings_lemmatization.preview_changed() - settings_lemmatization.preview_results_changed() settings_lemmatization.update_gui('test') settings_lemmatization.update_gui_err() diff --git a/tests/tests_settings/test_settings_pos_tagging.py b/tests/tests_settings/test_settings_pos_tagging.py index 9992dfb30..305482736 100644 --- a/tests/tests_settings/test_settings_pos_tagging.py +++ b/tests/tests_settings/test_settings_pos_tagging.py @@ -29,7 +29,6 @@ def test_wl_settings_pos_tagging(): settings_pos_tagging.apply_settings() settings_pos_tagging.preview_changed() - settings_pos_tagging.preview_results_changed() settings_pos_tagging.update_gui('test') settings_pos_tagging.update_gui_err() diff --git a/tests/tests_settings/test_settings_sentence_tokenization.py b/tests/tests_settings/test_settings_sentence_tokenization.py index 8f973611d..9a36bdd33 100644 --- a/tests/tests_settings/test_settings_sentence_tokenization.py +++ b/tests/tests_settings/test_settings_sentence_tokenization.py @@ -28,7 +28,6 @@ def test_wl_settings_sentence_tokenization(): settings_sentence_tokenization.apply_settings() settings_sentence_tokenization.preview_changed() - settings_sentence_tokenization.preview_results_changed() settings_sentence_tokenization.update_gui('test') settings_sentence_tokenization.update_gui_err() diff --git a/tests/tests_settings/test_settings_sentiment_analysis.py b/tests/tests_settings/test_settings_sentiment_analysis.py index 0ba8620ab..b9166cd6a 100644 --- a/tests/tests_settings/test_settings_sentiment_analysis.py +++ b/tests/tests_settings/test_settings_sentiment_analysis.py @@ -28,7 +28,6 @@ def test_wl_settings_sentiment_analysis(): settings_sentiment_analysis.apply_settings() settings_sentiment_analysis.preview_changed() - settings_sentiment_analysis.preview_results_changed() settings_sentiment_analysis.update_gui('test') settings_sentiment_analysis.update_gui_err() diff --git a/tests/tests_settings/test_settings_syl_tokenization.py b/tests/tests_settings/test_settings_syl_tokenization.py index d5d5efa98..d5a9ce631 100644 --- a/tests/tests_settings/test_settings_syl_tokenization.py +++ b/tests/tests_settings/test_settings_syl_tokenization.py @@ -28,7 +28,6 @@ def test_wl_settings_syl_tokenization(): settings_syl_tokenization.apply_settings() settings_syl_tokenization.preview_changed() - settings_syl_tokenization.preview_results_changed() settings_syl_tokenization.update_gui('test') settings_syl_tokenization.update_gui_err() diff --git a/tests/tests_settings/test_settings_word_tokenization.py b/tests/tests_settings/test_settings_word_tokenization.py index 41454aca1..49f6aff2e 100644 --- a/tests/tests_settings/test_settings_word_tokenization.py +++ b/tests/tests_settings/test_settings_word_tokenization.py @@ -28,7 +28,6 @@ def test_wl_settings_word_tokenization(): settings_word_tokenization.apply_settings() settings_word_tokenization.preview_changed() - settings_word_tokenization.preview_results_changed() settings_word_tokenization.update_gui('test') settings_word_tokenization.update_gui_err() diff --git a/tests/tests_utils/test_threading.py b/tests/tests_utils/test_threading.py index e1cc0dd79..75e6b8dc1 100644 --- a/tests/tests_utils/test_threading.py +++ b/tests/tests_utils/test_threading.py @@ -44,7 +44,7 @@ def test_wl_thread_no_progress(): worker = wl_threading.Wl_Worker_No_Progress(main, lambda: None) worker.run = lambda: None - wl_threading.Wl_Thread_No_Progress(worker).start_worker() + wl_threading.Wl_Thread_No_Progress(worker) if __name__ == '__main__': test_wl_worker() diff --git a/tests/wl_test_file_area.py b/tests/wl_test_file_area.py index b4dd226fd..5f407dd76 100644 --- a/tests/wl_test_file_area.py +++ b/tests/wl_test_file_area.py @@ -37,6 +37,12 @@ def wl_test_file_area(main): def open_file(err_msg, files_to_open): assert not err_msg + if files_to_open[-1]['name'] == '[amh] No language support': + files_to_open[-1]['lang'] = files_to_open[-1]['lang'] = 'amh' + + if files_to_open[-1]['name'] == '[eng_gb] Tagged': + files_to_open[-1]['tagged'] = files_to_open[-1]['tagged'] = True + wl_file_area.Wl_Worker_Open_Files( main, dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, text = ''), @@ -48,6 +54,12 @@ def open_file(err_msg, files_to_open): def open_file_ref(err_msg, files_to_open): assert not err_msg + if files_to_open[-1]['name'] == '[amh] No language support': + files_to_open[-1]['lang'] = files_to_open[-1]['lang'] = 'amh' + + if files_to_open[-1]['name'] == '[eng_gb] Tagged': + files_to_open[-1]['tagged'] = files_to_open[-1]['tagged'] = True + wl_file_area.Wl_Worker_Open_Files( main, dialog_progress = wl_dialogs_misc.Wl_Dialog_Progress(main, text = ''), @@ -124,18 +136,16 @@ def update_gui_ref(err_msg, new_files): assert new_file['encoding'] == 'ascii' if new_file['name'] == '[amh] No language support': - assert new_file['lang'] == 'other' + assert new_file['lang'] == 'amh' else: assert new_file['lang'] == 'eng_us' assert not new_file['tokenized'] - assert not new_file['tagged'] - - if new_file['name'] == '[amh] No language support': - new_file['lang'] = new_file['text'].lang = 'amh' if new_file['name'] == '[eng_gb] Tagged': - new_file['tagged'] = new_file['text'].tagged = True + assert new_file['tagged'] + else: + assert not new_file['tagged'] print(f'done! (In {round(time.time() - time_start, 2)} seconds)') diff --git a/tests/wl_test_init.py b/tests/wl_test_init.py index e31102e66..71ce7b15f 100644 --- a/tests/wl_test_init.py +++ b/tests/wl_test_init.py @@ -61,7 +61,8 @@ def __init__(self, switch_lang_utils = 'default'): self.email = 'blkserene@gmail.com' self.email_html = 'blkserene@gmail.com' - # Default settings + # Global and default settings + self.settings_global = wl_settings_global.init_settings_global() self.settings_default = wl_settings_default.init_settings_default(self) # Custom settings @@ -76,9 +77,6 @@ def __init__(self, switch_lang_utils = 'default'): else: self.settings_custom = copy.deepcopy(self.settings_default) - # Global settings - self.settings_global = wl_settings_global.SETTINGS_GLOBAL - match switch_lang_utils: case 'fast': self.switch_lang_utils_fast() @@ -234,6 +232,8 @@ def __init__(self, parent, tab = ''): self.tab = tab self.header_orientation = 'hor' + + self.settings_global = wl_settings_global.init_settings_global() self.settings = wl_settings_default.init_settings_default(self) self.setModel(QStandardItemModel()) diff --git a/trs/zho_cn.ts b/trs/zho_cn.ts index 10b2fed07..de5d57b21 100644 --- a/trs/zho_cn.ts +++ b/trs/zho_cn.ts @@ -1,142 +1,46 @@ - - Dialog_Open_Files - - - Add files... - 添加文件... - - - - Add folder... - 添加文件夹... - - - - Auto-detect encodings - 自动检测编码 - - - - Auto-detect languages - 自动检测语种 - - - - Include files in subfolders - 包含子文件夹下文件 - - - - Open - 打开 - - - - Cancel - 取消 - - - - Error Adding Files - 添加文件时出错 - - - - Checking files... - 检查文件中…… - - - - Open Files - 打开文件 - - - - Open Folder - 打开文件夹 - - - - Remove files - 移除文件 - - - - Clear table - 清空表格 - - - - - <div> - An error occurred while adding files, so the following files are not added to the table. - </div> - - - <div> - 添加文件时发生了一个错误,因此下列文件未被添加至表格中。 - </div> - - - - - Empty file - 空文件 - - - - Unsupported file type - 文件类型不支持 - - - - Duplicate file - 重复文件 - - Table_Open_Files - + Language 语种 - + Path 路径 - + Encoding 编码 - + Tokenized 已分词 - + Tagged 已标注 - WL_Dialog_Clear_Table + Wl_Button_Color - - Clear Table - 清空表格 + + Pick Color + 选择颜色 Wl_Button_Restore_Defaults - + <div>Are you sure you want to reset all settings to their defaults?</div> @@ -145,15 +49,20 @@ - + Restore Defaults 恢复默认值 + + + Restore defaults + 恢复默认值 + - Wl_Combo_Box_File_Figure_Settings + Wl_Combo_Box_File_Fig_Settings - + Total 合计 @@ -161,7 +70,7 @@ Wl_Combo_Box_File_To_Filter - + Total 合计 @@ -169,106 +78,99 @@ Wl_Dialog_About - + About Wordless 关于 Wordless - + - <div style="text-align: center;"> - <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> - <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Version {}</div> - </div> - - <div style="text-align: center;"> - <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> - <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;版本 {}</div> - </div> - + <div align="center"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Version {}</div> + </div> + + + <div align="center"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;版本 {}</div> + </div> + - + - <div style="text-align: center;"> - An Integrated Corpus Tool with Multilingual Support<br> - for the Study of Language, Literature, and Translation - </div> - - <hr> - - <div style="text-align: center;"> - Copyright (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> - Licensed Under GNU GPLv3<br> - All Other Rights Reserved - </div> - + <div align="center"> + An Integrated Corpus Tool with Multilingual Support<br> + for the Study of Language, Literature, and Translation + </div> + <hr> + <div align="center"> + Copyright (C) 2018-{}&nbsp;&nbsp;Ye Lei (叶磊)<br> + Licensed Under GNU GPLv3<br> + All Other Rights Reserved + </div> + - <div style="text-align: center;"> - 一款拥有多语种支持的语料库集成工具<br> - 可用于语言学、文学及翻译研究 - </div> - - <hr> - - <div style="text-align: center;"> - 版权所有 (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> - 基于 GNU GPLv3 协议<br> - 保留其他所有权利 - </div> - + <div align="center"> + 一款拥有多语种支持的语料库集成工具<br> + 可用于语言学、文学及翻译研究 + </div> + <hr> + <div align="center"> + 版权所有 (C) 2018-{}&nbsp;&nbsp;叶磊<br> + 基于 GNU GPLv3 协议<br> + 保留其他所有权利 + </div> + Wl_Dialog_Acks - + Name 名称 - + Version 版本 - + Authors 作者 - + License 许可 - + Acknowledgments 致谢 - - ACKNOWLEDGMENTS.md - ACKNOWLEDGMENTS_zho_cn.md + + ACKS.md + doc/trs/zho_cn/ACKS.md - + - <div> - As Wordless stands on the shoulders of giants, I hereby extend my sincere gratitude to the following open-source projects without which this project would not have been possible: - </div> + <div>As Wordless stands on the shoulders of giants, I hereby extend my sincere gratitude to the following open-source projects without which this project would not have been possible:</div> - <div> - 鉴于 Wordless 立于巨人的肩膀之上,我谨在此向下列开源项目致以本人诚挚的感谢,若没有它们,本项目将无法完成: - </div> + <div>鉴于 Wordless 立于巨人的肩膀之上,我谨在此向下列开源项目致以本人诚挚的感谢,若没有它们,本项目将无法完成:</div> Wl_Dialog_Changelog - + Changelog 更新日志 @@ -276,310 +178,235 @@ Wl_Dialog_Check_Updates - + Check for updates on startup 启动时检查更新 - + Cancel 取消 - - - <div> - Checking for updates... - </div> - - - <div> - 检查更新中…… - </div> - + + OK + 确认 - - - <div> - Hooray, you are using the latest version of Wordless! - </div> - - - <div> - 好耶,你使用的是 Wordless 的最新版! - </div> - + + Check for Updates + 检查更新 - - - <div> - A network error has occurred, please check your network settings and try again or <a href="https://github.com/BLKSerene/Wordless/releases">check for updates manually</a>. - </div> - - - <div> - 刚才发生了一个网络错误,请检查你的网络设置并重试或<a href="https://github.com/BLKSerene/Wordless/releases">手动检查更新</a>。 - </div> - + + Try again + 重试 - - OK - 确认 + + <div>Current version: </div> + 当前版本: - - Check for Updates - 检查更新 + + <div>Checking for updates...</div> + <div>检查更新中……</div> - + + <div>Latest version: Checking...</div> + <div>最新版本:查询中……</div> + + + - <div> - Wordless {} is out, click <a href="https://github.com/BLKSerene/Wordless#download"><b>HERE</b></a> to download the latest version of Wordless. - </div> + <div>Wordless {} is out, click <a href="https://github.com/BLKSerene/Wordless#download"><b>HERE</b></a> to download the latest version of Wordless.</div> - <div> - Wordless {} 已发布,点击<a href="https://github.com/BLKSerene/Wordless#download"><b>此处</b></a>下载 Wordless 的最新版。 - </div> + <div> Wordless {} 已发布,点击<a href="https://github.com/BLKSerene/Wordless#download"><b>此处</b></a>下载最新版Wordless。</div> - - Current version: - 当前版本: - - - - Try again - 重试 + + <div>Latest version: </div> + <div>最新版本:</div> - - Latest version: Checking... - 最新版本:查询中…… + + + <div>Hooray, you are using the latest version of Wordless!</div> + + + <div>好耶,你使用的是 Wordless 的最新版!</div> + - - Latest version: - 最新版本: + + + <div>A network error has occurred, please check your network settings and try again or <a href="https://github.com/BLKSerene/Wordless/releases">check for updates manually</a>.</div> + + + <div>刚才发生了一个网络错误,请检查你的网络设置并重试或<a href="https://github.com/BLKSerene/Wordless/releases">手动检查更新</a>。</div> + - - Latest version: Network error - 最新版本:网络错误 + + <div>Latest version: Network error</div> + <div>最新版本:网络错误</div> Wl_Dialog_Citing - + Citing 引用 - + Select citation system: 选择引用体系: - + APA (7th edition) APA(第七版) - + MLA (8th edition) MLA(第八版) - - - <div> - If you are going to publish a work that uses Wordless, please cite as follows. - </div> - - <div> - 如果你准备发表的成果中使用了 Wordless,请按如下格式进行引用。 - </div> - - - - - Wl_Dialog_Clr_All_Tables - - - Clear All Tables - 清空所有表格 - - - - - <div> - The results in some of the tables have yet been exported. Do you really want to clear all tables? - </div> - - - <div> - 部分表格中的结果尚未保存。你是否确认清空所有表格? - </div> - - - - - Wl_Dialog_Clr_Table - - + - <div> - The results in the table have yet been exported. Do you really want to clear the table? - </div> + <div>If you are going to publish a work that uses Wordless, please cite as follows.</div> - <div> - 表格中的结果尚未导出。你确认要清空表格吗? - </div> + <div>如果你准备发表的成果中使用了 Wordless,请按如下格式进行引用。</div> - - - Yes - - - - - No - - Wl_Dialog_Confirm_Exit - - - <div> - Are you sure you want to exit Wordless? - </div> - <div style="font-weight: bold;"> - Note: All unsaved data and figures will be lost. - </div> - - - <div> - 你确认你要退出 Wordless 吗? - </div> - <div style="font-weight: bold;"> - 注:所有未保存的数据和图表都将丢失。 - </div> - - - - + Always confirm on exit 关闭时总是提示确认 - + Exit 退出 - + Cancel 取消 - + Exit Wordless 退出 Wordless + + + + <div>Are you sure you want to exit Wordless?</div> + <br> + <div><b>Note: All unsaved data and figures will be lost.</b></div> + + + <div>你确认你要退出 Wordless 吗?</div> + <br> + <div><b> 注意:所有未保存的数据和图表都将丢失。</b></div> + + Wl_Dialog_Context_Settings - + Inclusion 包含 - + Exclusion 排除 - + L - + R - + Context window: - 上下文检索范围: + 上下文限制范围: + + + + Context Settings + 上下文设置 Wl_Dialog_Donating - - - <div> - If you would like to support the development of Wordless, you may donate via <a href="https://www.paypal.com/">PayPal</a>, <a href="https://global.alipay.com/">Alipay</a>, or <a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">WeChat Pay</a>. - </div> - - - <div> - 如果你愿意支持 Wordless 的开发工作,你可以通过<a href="https://www.paypal.com/">PayPal</a>、<a href="https://global.alipay.com/">支付宝</a>或<a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">微信支付</a>进行赞助。 - </div> - - - - + Donating via: 赞助途径: - + Alipay 支付宝 - + WeChat Pay 微信支付 - + Donating 赞助 + + + + <div>If you would like to support the development of Wordless, you may donate via <a href="https://www.paypal.com/">PayPal</a>, <a href="https://global.alipay.com/">Alipay</a>, or <a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">WeChat Pay</a>.</div> + + + <div>如果你愿意支持 Wordless 的开发工作,你可以通过<a href="https://www.paypal.com/">PayPal</a>、<a href="https://global.alipay.com/">支付宝</a>或<a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">微信支付</a>进行赞助。</div> + + Wl_Dialog_Err_Files - + Error Type 错误类型 - + OK 确认 - + File Path 文件路径 - + Export table... 导出表格... @@ -587,93 +414,211 @@ Wl_Dialog_Need_Help - + Need Help? 需要帮助? - - - <div> - If you have any questions, find software bugs, need to provide feedback, or want to submit feature requests, you may seek support from the open-source community or contact me directly via any of the support channels listed below. - </div> - - - <div> - 如果你有任何问题、发现了软件错误、需要提供反馈信息或想要提交功能需求,你可以通过下方所列的任一支持渠道来获取开源社区的支持或直接与我联络。 - </div> - - - - + Support Channel 支持渠道 - + Information 信息 - - <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">Documentation</a> - <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">文档</a> - - - - <a href="https://github.com/BLKSerene/Wordless/issues">Gihub Issues</a> - - - - - <a href="https://github.com/BLKSerene/Wordless/discussions">Gihub Discussions</a> - - - - + Official documentation 官方文档 - + Tutorial videos 视频教程 - + Bug reports Bug 提交 - + Usage questions 使用问题 - + Email support 邮件咨询 - + <a href="https://www.wechat.com/en/">WeChat</a> official account <a href="https://www.wechat.com/en/">微信</a>公众号 - + <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">bilibili</a> <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">B 站</a> + + + + <div>If you have any questions, find software bugs, need to provide feedback, or want to submit feature requests, you may seek support from the open-source community or contact me directly via any of the support channels listed below.</div> + + + <div>如果你有任何问题、发现了软件错误、需要提供反馈信息或想要提交功能需求,你可以通过下方所列的任一支持渠道来获取开源社区的支持或直接与我联络。</div> + + + + + <a href="https://github.com/BLKSerene/Wordless/blob/{self.main.ver}/doc/doc.md">Stable Version</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc.md">Development Version</a> + <a href="https://github.com/BLKSerene/Wordless/blob/{self.main.ver}/doc/doc.md">稳定版</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc.md">开发版</a> + + + + Wl_Dialog_Open_Files + + + Open Files + 打开文件 + + + + Add files... + 添加文件... + + + + Add folder... + 添加文件夹... + + + + Remove files + 移除文件 + + + + Clear table + 清空表格 + + + + Auto-detect encodings + 自动检测编码 + + + + Auto-detect languages + 自动检测语种 + + + + Include files in subfolders + 包含子文件夹下文件 + + + + Open + 打开 + + + + Cancel + 取消 + + + + Checking files... + 检查文件中…… + + + + Error Adding Files + 添加文件时出错 + + + + + <div> + An error occurred while adding files, so the following files are not added to the table. + </div> + + + <div> + 添加文件时发生了一个错误,因此下列文件未被添加至表格中。 + </div> + + + + + Empty file + 空文件 + + + + Unsupported file type + 文件类型不支持 + + + + Duplicate file + 重复文件 + + + + Open Folder + 打开文件夹 + + + + Wl_Dialog_Opening_Nontext_Files + + + Opening Non-text Files + 打开非文本文件 + + + + + <div>It is <b>not recommended to directly import non-text files into <i>Wordless</i></b> and the support for doing so is provided only for convenience, since accuracy of text extraction could never be guaranteed and unintended data loss might occur, for which reason users are encouraged to <b>convert their files using specialized tools and make their own choices</b> on which part of the data should be kept or discarded.</div> + <br> + <div>Do you want to proceed to open non-text files anyway?</div> + + + <div><b>不建议将非文本文件直接导入<i>Wordless</i></b>,提供该功能也只是为了方便用户,因为文本提取的精度始终无法保证且数据可能会在无意中丢失。鉴于该原因,建议用户<b>使用专用工具转换文件并自行决定</b>应该保留或丢第哪部分数据。</div> + <br> + <div>你是否无论如何仍要继续打开非文本文件?</div> + + + + + Do not show this again + 下次不再显示该提示 + + + + Proceed + 继续 + + + + Abort + 中止 + Wl_Dialog_Preview_Settings - + coarse-grained 粗分 - + fine-grained 细分 @@ -682,144 +627,197 @@ Wl_Dialog_Progress - Please wait. It may take a few seconds to several minutes for the operation to be completed. - 请稍后。本操作可能需要数秒至数分钟完成。 + <div>Elapsed time: 0:00:00</div> + <div>已用时间:0:00:00</div> - - Elapsed time: 0:00:00 - 已用时间:0:00:00 + + + <div>Please wait. It may take a few seconds to several minutes for the operation to be completed.</div> + + + <div>请稍候。该操作可能会在几秒至数分钟内完成。</div> + - - Elapsed time: - 已用时间: + + <div>Elapsed time: {}</div> + <div>已用时间:{}</div> - Wl_Dialog_Progress_Process_Data + Wl_Dialog_Restart_Required - - Processing data... - 处理数据中…… + + Restart + 重启 - - Downloading model... - 下载模型中…… + + Cancel + 取消 + + + + Restart Wordless + 重启 Wordless - - - Wl_Dialog_Restart_Required - + <div> Restart is required for the settings to take effect. Do you want to restart Wordless now? </div> - - <div style="font-weight: bold;"> - Note: All unsaved data and figures will be lost. - </div> + <br> + <div><b>Note: All unsaved data and figures will be lost.</b></div> <div> - 需要重启本程序使设置生效。你想要现在重启 Wordless 吗? - </div> - - <div style="font-weight: bold;"> - 注:所有未保存的数据和图表都将丢失。 + 需要重启本程序才能使设置生效。你想要现在重启 Wordless 吗? </div> + <br> + <div><b>注意:所有未保存的数据和图表都将丢失。</b></div> + + + Wl_Dialog_Results_Filter - - Restart - 重启 + + Filter Results + 筛选结果 - - Cancel - 取消 + + File to filter: + 待筛选文件: - - Restart Wordless - 重启 Wordless + + Filter + 筛选 + + + + Close + 关闭 + + + + Filtering results... + 筛选结果中…… + + + + The results in the data table has been successfully filtered. + 已成功筛选数据表格中的结果。 Wl_Dialog_Results_Filter_Collocation_Extractor - + Frequency: 频数: - - p-value: - p 值: - - - + Total 合计 - - L - - - - - R - - - - + Collocate length: 搭配词长度: - + Keyword length: 关键词长度: - + Bayes factor: 贝叶斯因子: - + Number of files found: 查找到的文件数 + + + Node length: + 节点词长度: + + + + Collocation length: + 搭配长度: + + + + Colligation length: + 类联接长度: + + + + L{} + 左 {} + + + + R{} + 右 {} + + + + Wl_Dialog_Results_Filter_Dependency_Parser + + + Head length: + 核心词长度: + + + + Dependent length: + 依存词长度: + + + + Dependency length: + 依存距离: + + + + Dependency length (absolute): + 依存距离(绝对): + Wl_Dialog_Results_Filter_Wordlist_Generator - + Frequency: 频数: - + Token length: 形符长度: - + N-gram length: n 元组长度: - + Number of files found: 查找到的文件数: - + Number of syllables: 音节数: @@ -827,7 +825,7 @@ Wl_Dialog_Results_Search - + Close 关闭 @@ -842,42 +840,42 @@ 无查找结果 - + Search in Results 在结果中查找 - + item - + items - + Found {} {}. 查找到 {} {}。 - + Find next 查找下一个 - + Find previous 查找上一个 - + Find all 查找所有 - + Clear highlights 清除高亮 @@ -893,7 +891,7 @@ - + Highlights cleared. 高亮已清除。 @@ -901,108 +899,80 @@ Wl_Dialog_Results_Sort_Concordancer - + Sort 排序 - + Close 关闭 - + Ascending 升序 - + Node 节点词 - + Sentiment 情感 - + File 文件 - + Sorting results... 结果排序中…… - + Sort Results 对结果排序 - - The results in the table has been successfully sorted. - 已成功对表格中的结果进行排序。 - - - + ^L[0-9]+$ - ^左[0-9]+$ + ^左 [0-9]+$ - + ^R[0-9]+$ - ^右[0-9]+$ - - - - Token no. - 形符序号 + ^右 [0-9]+$ - + ^Ld+$ - ^左d+$ + ^左 d+$ - + ^Rd+$ - ^右d+$ - - - - Wl_Dialog_Settings - - - Save - 保存 - - - - Cancel - 取消 - - - - OK - 确认 + ^右 d+$ - - Copy - 复制 + + The results in the data table has been successfully sorted. + 已成功对数据表格中的结果进行排序。 - - Close - 关闭 + + Token No. + 形符序号 Wl_Loading - + Initializing Wordless... 初始化 Wordless…… @@ -1010,242 +980,222 @@ Wl_Main - + Loading settings... 载入设置…… - + Initializing main window... 初始化主窗体…… - + Ready! 就绪! - + Starting Wordless... 启动 Wordless…… - + &File 文件 - + &Preferences 偏好 - + &Help 帮助 - + &Open Files... 打开文件... - - Open files - 打开文件 - - - - Open &Folder... - 打开文件夹... - - - - Open all files in the folder - 打开文件夹下的所有文件 - - - + &Reopen Closed Files 重新打开已关闭文件 - + Reopen closed files 重新打开已经关闭的文件 - + S&elect All 全选 - + Select all files 勾选所有文件 - + &Deselect All - 取消选中所有 + 取消勾选所有 - + Deselect all files 取消勾选所有文件 - + &Invert Selection - 反选 + 反向勾选 - + Invert file selection 反向勾选文件 - + &Close Selected 关闭选中 - + Close selected file(s) 关闭已选中的文件 - + C&lose All 关闭所有 - + Close all files 关闭所有文件 - + &Exit... 退出... - + Exit the program 退出程序 - + &Settings 设置 - + Change settings 更改设置 - + &Display Language 显示语言 - + &Reset Layouts 重置布局 - + Reset layouts 重置布局 - + &Show Status Bar 显示状态栏 - + Show/Hide the status bar 显示/隐藏状态栏 - + &Citing 引用 - + Show information about citing 显示引用信息 - + &Acknowledgments 致谢 - - Show acknowldgments - 显示致谢 - - - + &Need Help? 需要帮助? - + Show help information 显示帮助信息 - + &Donating 赞助 - + Show information about donating 显示赞助相关信息 - + Check &for Updates 检查更新 - + Check for updates of Wordless 检查 Wordless 更新 - + C&hangelog 更新日志 - + Show Changelog 显示更新日志 - + About &Wordless 关于 Wordless - + Show information about Wordless 显示 Wordless 的相关信息 - + Reset Layouts 重置布局 - + <div>Do you want to reset all layouts to their default settings?</div> @@ -1254,289 +1204,306 @@ - + Profiler 分析工具 - + Concordancer 索引工具 - + Parallel Concordancer 平行索引工具 - + Wordlist Generator 词表生成器 - + N-gram Generator n 元组生成器 - + Collocation Extractor 搭配抽取器 - + Colligation Extractor 类联接抽取器 - + Keyword Extractor 关键词抽取器 - + Observed Files 观察文件 - + Reference Files 参照文件 - + Dependency Parser 依存分析器 + + + Open file(s) + 打开文件 + + + + Show acknowledgments + 显示致谢 + Wl_Settings - + Settings 设置 - + General 全局 - + Import 导入 - + Export 导出 - + Files 文件 - + Tags 标签 - + Sentence Tokenization 分句 - + Word Tokenization 分词 - + Syllable Tokenization 分音节 - + Tagsets 标签集 - + Lemmatization 词形还原 - + Stop Word Lists 停用词表 - + Measures 统计方法 - + Dispersion 分布 - + Adjusted Frequency 调整频数 - + Statistical Significance 统计显著性 - + Effect Size 效应量 - + Figures 图表 - + Reset all settings 重置所有设置 - + Save 保存 - + Apply 应用 - + Cancel 取消 - + Reset All Settings 重置所有设置 - - - <div>Do you want to reset all settings to their defaults?</div> - <div><b>Warning: This will affect settings on all pages!</b></div> - - - <div>你想要将所有设置重置为默认值吗?</div> - <div><b>警告:本操作将影响所有页面上的设置选项!</b></div> - - - - + Part-of-speech Tagging 词性标注 - + Bayes Factor 贝叶斯因子 - + Tables 表格 - + Dependency Parsing 依存分析 - + Readability 可读性 - + Concordancer 索引工具 - + Parallel Concordancer 平行索引工具 - + Line Charts 折线图 - + Word Clouds 词云图 - + Network Graphs 网络图 - + Sentiment Analysis 情感分析 - - Lexical Diversity - 词汇多样性 + + Lexical Density/Diversity + 词汇密度/多样性 + + + + Dependency Parser + 依存分析器 + + + + + <div>Do you want to reset all settings to their defaults?</div> + <br> + <div><b>Warning: This will affect settings on all pages!</b></div> + + + <div>你想要将所有设置重置为默认值吗?</div> + <br> + <div><b>警告:本操作将影响所有页面上的设置选项!</b></div> + Wl_Settings_Dependency_Parsing - + Dependency Parser Settings 依存分析器设置 - + Language 语种 - + Dependency Parsers 依存分析器 - + Preview 预览 - + Select language: 选择语种: - + Preview settings 预览设置 - + Show preview 显示预览 - + Processing... 处理中…… @@ -1544,12 +1511,12 @@ Wl_Settings_Figs_Line_Charts - + General Settings 全局设置 - + Font: 字体: @@ -1557,132 +1524,132 @@ Wl_Settings_Figs_Network_Graphs - + Node Settings 节点设置 - + Node shape: 节点形状: - + Node size: 节点大小: - + Node color: 节点颜色: - + Node opacity: 节点不透明度: - + Node Label Settings 节点标签设置 - + Label font: 标签字体: - + Label font size: 标签字体大小: - + Label font weight: 标签字体粗细: - + Label font color: 标签字体颜色: - + Label opacity: 标签不透明度: - + Edge Settings 边设置 - + Connection style: 连接样式: - + Edge width: 边的宽度: - + Minimum 最小 - + Maximum 最大 - + Edge style: 边的样式: - + Edge color: 边的颜色: - + Edge opacity: 边的不透明度: - + Arrow style: 箭头样式: - + Arrow size: 箭头大小: - + Edge Label Settings 边的标签设置 - + Label position: 标签位置: - + Rotate labels to lie parallel to edges 旋转标签使其与边平行 - + Advanced Settings 高级设置 - + Layout: 布局: @@ -1690,112 +1657,112 @@ Wl_Settings_Figs_Word_Clouds - + Font Settings 字体设置 - + Font: 字体: - + Select Font 选择字体 - + Font size: 字体大小: - + Minimum 最小 - + Maximum 最大 - + Relative scaling: 相对缩放: - + Font color: 字体颜色: - + Custom 自定义 - + Monochrome 单色 - + Colormap 色谱 - + Auto 自动 - + Background Settings 背景设置 - + Background color: 背景颜色: - + Mask Settings 蒙版设置 - + Mask path: 蒙版路径: - + Select Mask 选择蒙版 - + Contour width: 轮廓宽度: - + Contour color: 轮廓颜色: - + Advanced Settings 高级设置 - + Prefer horizontal: 水平优先度: - + Allow repeated words 允许重复单词 @@ -1803,80 +1770,85 @@ Wl_Settings_Files - + Default Settings 默认设置 - + Encoding: 编码: - + Language: 语种: - + Tokenized: 已分词: - + Tagged: 已标注: - + Auto-detection Settings 自动检测设置 - + Number of lines to scan in each file: 文件扫描行数: - + Read files in chunks of 读取文件时每 - + lines 行为一段 - + Miscellaneous Settings 其他设置 + + + Display warning when opening non-text files + 打开非文本文件时显示警告 + Wl_Settings_Files_Tags - + Header Tag Settings 头标签设置 - + Note: All contents surrounded by header tags will be discarded during text processing! 注意:所有被头标签括起来的文本在文本处理时都将被忽略! - + Body Tag Settings 正文标签设置 - + * Use asterisk character (*) to indicate any number of characters * 使用星号(*)来表示任意数量的字符 - + XML Tag Settings XML 标签设置 @@ -1884,72 +1856,72 @@ Wl_Settings_General - + Proxy Settings 代理设置 - + Address: 地址: - + Port: 端口: - + Username: 用户名: - + Password: 密码: - + Update Settings 更新设置 - + Check for updates on startup 启动时检查更新 - + Always confirm on exit 退出时总是提示确认 - + Miscellaneous Settings 其他设置 - + User Interface Settings 用户界面设置 - + Interface scaling: 界面缩放: - + Font family: 字体集: - + Font size: 字体大小: - + Use proxy 使用代理 @@ -1957,42 +1929,42 @@ Wl_Settings_General_Exp - + Tables 表格 - + Browse... 浏览... - + Search Terms 检索项 - + Stop Words 停用词 - + Select Folder 选择文件夹 - + Default path: 默认路径: - + Default type: 默认类型: - + Default encoding: 默认编码: @@ -2000,47 +1972,47 @@ Wl_Settings_General_Imp - + Files 文件 - + Browse... 浏览... - + Search Terms 检索项 - + Auto-detect encodings 自动检测编码 - + Stop Words 停用词 - + Temporary Files 临时文件 - + Select Folder 选择文件夹 - + Default path: 默认路径: - + Default encoding: 默认编码: @@ -2048,37 +2020,37 @@ Wl_Settings_Lemmatization - + Lemmatizer Settings 词形还原器设置 - + Language 语种 - + Lemmatizers 词形还原器 - + Preview 预览 - + Select language: 选择语种: - + Show preview 显示预览 - + Processing... 处理中…… @@ -2094,17 +2066,17 @@ Wl_Settings_Measures_Bayes_Factor - + Log-likelihood Ratio Test 对数似然比检验 - + Apply Yates's correction for continuity 应用耶茨连续性校正 - + Student's t-test (2-sample) 学生 t 检验(双样本) @@ -2130,23 +2102,18 @@ Wl_Settings_Measures_Effect_Size - + Kilgarriff's Ratio - + Smoothing parameter: 平滑系数: - Wl_Settings_Measures_Lexical_Diversity - - - HD-D - - + Wl_Settings_Measures_Lexical_Density_Diversity Sample size: @@ -2225,16 +2192,6 @@ Al-Heeti's Readability Prediction Formula - - - Policy one - - - - - Policy two - - Automated Readability Index @@ -2360,6 +2317,16 @@ Tränkle & Bailer's Readability Formula + + + Policy One + 策略一 + + + + Policy Two + 策略二 + Wl_Settings_Measures_Statistical_Significance @@ -2405,59 +2372,141 @@ - Welch's t-test - Welch t 检验 - - - z-score z 值 - + z-score (Berry-Rogghe) z 值(Berry-Rogghe) + + Wl_Settings_Node + + + Empty Path + 空路径 + + + + + <div>The path should not be left empty!</div> + + + <div>路径不可为空!</div> + + + + + Path not Found + 未找到路径 + + + + + <div>The specified path "{}" could not be found!</div> + <div>Please check your settings and try again.</div> + + + <div>未找到指定的路径“{}”!</div> + <div>请检查你的设置后重试。</div> + + + + + Invalid File Path + 无效文件路径 + + + + + <div>The specified path "{}" should be a file, not a directory!</div> + <div>Please check your settings and try again.</div> + + + <div>指定的路径“{}”应是一个文件,而不是目录!</div> + <div>请检查你的设置后重试。</div> + + + + + Invalid Directory Path + 无效文件夹路径 + + + + + <div>The specified path "{}" should be a directory, not a file!</div> + <div>Please check your settings and try again.</div> + + + <div>指定的路径“{}”应是一个目录,而不是文件!</div> + <div>请检查你的设置后重试。</div> + + + + + Path Not Exist + 路径不存在 + + + + + {} + <body> + <div>The specified path "{}" does not exist.</div> + <div>Do you want to create the directory?</div> + </body> + + + {} + <body> + <div>指定的路径“{}”不存在。</div> + <div>你想要新建该文件夹吗?</div> + </body> + + + Wl_Settings_Pos_Tagging - + Language 语种 - + Preview 预览 - + Select language: 选择语种: - + Show preview 显示预览 - + Processing... 处理中…… - + Part-of-speech Tagger Settings 词性标注器设置 - + Part-of-speech Taggers 词性标注器 - + Convert all part-of-speech tags to universal part-of-speech tags 将所有词性标签转换为通用词性标签 @@ -2465,137 +2514,156 @@ Wl_Settings_Pos_Tagging_Tagsets - + Preview Settings: 预览设置: - + Language: 语种: - + Mapping Settings 映射设置 - + Reset 重置 - + Part-of-speech Tag 词性标签 - + Universal Part-of-speech Tag 通用词性标签 - + Description 描述 - + Examples 示例 - + Fetching data... 获取数据中…… - + Reset Mappings 重置映射 - + + Reset All Mappings + 重置所有映射 + + + + * This part-of-speech tagger does not support custom mapping. + * 该词性标注器不支持自定义映射。 + + + + Reset all + 重置所有 + + + + Number of part-of-speech tags: + 词性标签数: + + + + Part-of-speech tagger: + 词性标注器: + + + + Content/Function Words + 实/虚词 + + + <div>Do you want to reset all mappings to their default settings?</div> + <br> <div><b>Note: This will only affect the mapping settings in the currently shown table.</b></div> <div>你想要将所有映射都重置为默认设置吗?</div> - <div><b>注:这只会影响当前所显示表格中的映射设置。</b></div> + <br> + <div><b>注意:这只会影响当前所显示表格中的映射设置。</b></div> - - Reset All Mappings - 重置所有映射 - - - + <div>Do you want to reset all mappings to their default settings?</div> + <br> <div><b>Warning: This will affect the mapping settings in all tables!</b></div> <div>你想要将所有映射重置为默认值吗?</div> + <br> <div><b>警告:本操作将影响所有表格中的映射设置!</b></div> - - * This part-of-speech tagger does not support custom mapping. - * 该词性标注器不支持自定义映射。 - - - - Reset all - 重置所有 - - - - Number of part-of-speech tags: - 词性标签数: + + Content words + 实词 - - Part-of-speech tagger: - 词性标注器: + + Function words + 虚词 Wl_Settings_Sentence_Tokenization - + Sentence Tokenizer Settings 分句器设置 - + Language 语种 - + Sentence Tokenizers 分句器 - + Preview 预览 - + Select language: 选择语种: - + Show preview 显示预览 - + Processing... 处理中…… @@ -2603,42 +2671,42 @@ Wl_Settings_Sentiment_Analysis - + Sentiment Analyzer Settings 情感分析器设置 - + Language 语种 - + Sentiment Analyzer 情感分析器 - + Preview 预览 - + Select language: 选择语种: - + Show preview 显示预览 - + Sentiment score: 情感分数: - + Processing... 处理中…… @@ -2646,70 +2714,75 @@ Wl_Settings_Stop_Word_Lists - + Language 语种 - + Stop Word List 停用词表 - + Preview 预览 - + Select language: 选择语种: - + Stop Word List Settings 停用词表设置 - + Number of stop words: 停用词数: + + + Case-sensitive + 大小写敏感 + Wl_Settings_Syl_Tokenization - + Syllable Tokenizer Settings 分音节器设置 - + Language 语种 - + Syllable Tokenizers 分音节器 - + Preview 预览 - + Select language: 选择语种: - + Show preview 显示预览 - + Processing... 处理中…… @@ -2791,52 +2864,70 @@ - Wl_Settings_Tables_Parallel_Concordancer + Wl_Settings_Tables_Dependency_Parser - - Color Settings - 颜色设置 + + Highlight Color Settings + 高亮颜色设置 + + + + Head color: + 核心词颜色: + + + + Dependent color: + 依存词颜色: + + + Wl_Settings_Tables_Parallel_Concordancer Search term color: 检索项颜色: + + + Highlight Color Settings + 高亮颜色设置 + Wl_Settings_Word_Tokenization - + Word Tokenizer Settings 分词器设置 - + Language 语种 - + Word Tokenizers 分词器 - + Preview 预览 - + Select language: 选择语种: - + Show preview 显示预览 - + Processing... 处理中…… @@ -2844,12 +2935,12 @@ Wl_Spin_Box_Window - + L - + R @@ -2857,155 +2948,145 @@ Wl_Table_Colligation_Extractor - + Rank 序号 - + Node 节点词 - + Collocate 搭配词 - + Number of Files Found 查找到的 文件数 - + Number of Files Found % 查找到的 文件数% - + [{}] L{} [{}] -左{} +左 {} - + [{}] R{} [{}] -右{} +右 {} - + ^[LR][0-9]+$ - ^[左右][0-9]+$ + ^[左右] [0-9]+$ - + L - + [{}] L{} % [{}] -左{} % +左 {} % - + [{}] R{} % [{}] -右{} % +右 {} % - + [{}] Frequency [{}] 频数 - + [{}] Frequency % [{}] 频数% - + [{}] p-value [{}] p 值 - + [{}] Bayes Factor [{}] 贝叶斯因子 - + Frequency 频数 - + Frequency % 频数% - + p-value p 值 - + Bayes Factor 贝叶斯因子 - - Network Graph - 网络图 - - - + Frequency 频数 - + p-value p 值 - + Total 合计 - - Network graph - 网络图 - - - + Bayes factor 贝叶斯因子 @@ -3013,150 +3094,145 @@ Bayes Factor Wl_Table_Collocation_Extractor - + Rank 序号 - + Node 节点词 - + Collocate 搭配词 - + Number of Files Found 查找到的 文件数 - + Number of Files Found % 查找到的 文件数% - + [{}] L{} [{}] -左{} +左 {} - + [{}] L{} % [{}] -左{} % +左 {} % - + [{}] R{} [{}] -右{} +右 {} - + [{}] R{} % [{}] -右{} % +右 {} % - + [{}] Frequency [{}] 频数 - + [{}] Frequency % [{}] 频数% - + [{}] p-value [{}] p 值 - + [{}] Bayes Factor [{}] 贝叶斯因子 - + Frequency 频数 - + Frequency % 频数% - + p-value p 值 - + Bayes Factor 贝叶斯因子 - + ^[LR][0-9]+$ - ^[左右][0-9]+$ + ^[左右] [0-9]+$ - + L - + Frequency 频数 - + p-value p 值 - + Total 合计 - - Network graph - 网络图 - - - + Bayes factor 贝叶斯因子 @@ -3164,82 +3240,82 @@ Bayes Factor Wl_Table_Concordancer - + Left - + Node 节点词 - + Right - + Sentiment 情感 - + Token No. 形符序号 - + Token No. % 形符序号% - + Sentence No. 句子序号 - + Sentence No. % 句子序号% - + Paragraph No. 段落序号 - + Paragraph No. % 段落序号% - + File 文件 - + Sentence Segment No. 句段序号 - + Sentence Segment No. % 句段序号% - + Search Term 检索项 - + Search term 检索项 - + Dispersion Plot 分布图 @@ -3247,22 +3323,22 @@ Bayes Factor Wl_Table_Concordancer_Parallel - + Parallel Unit No. 平行单位序号 - + Parallel Unit No. % 平行单位序号% - + Missing Search Terms 缺少检索项 - + <div>You have not specified any search terms. Do you want to search for additions and deletions?</div> @@ -3271,83 +3347,50 @@ Bayes Factor - - Wl_Table_Data - - - Rank - 序号 - - - - Generate table - 生成表格 - - - - Generate figure - 生成图表 - - - - Export selected cells... - 导出选中单元格... - - - - Export all cells... - 导出所有单元格... - - - - Clear table - 清空表格 - - Wl_Table_Dependency_Parser - + Head 核心词 - + Dependent 依存词 - + Dependency Relation 依存关系 - + Dependency Length 依存距离 - + Dependency Length (Absolute) 依存距离(绝对) - + Sentence 句子 - + Sentence No. 句子序号 - + Sentence No. % 句子序号% - + File 文件 @@ -3355,72 +3398,72 @@ Bayes Factor Wl_Table_Files - + Checking files... 检查文件中…… - + Path 路径 - + Encoding 编码 - + Language 语种 - + Tokenized 已分词 - + Tagged 已标注 - + file 文件 - + files 文件 - + {} {} has been successfully opened. 已成功打开 {} 个{}。 - + Observed Files 观察文件 - + Reference Files 参照文件 - + Name 名称 - + Empty File Name 空文件名 - + <div>The file name should not be left empty!</div> @@ -3429,12 +3472,12 @@ Bayes Factor - + Duplicate File Names 重复文件名 - + <div>There is already a file with the same name in the file area.</div> <div>Please specify a different file name.</div> @@ -3448,116 +3491,116 @@ Bayes Factor Wl_Table_Keyword_Extractor - + Rank 序号 - + Keyword 关键词 - + Number of Files Found 查找到的 文件数 - + Number of Files Found % 查找到的 文件数% - + [Reference Files] Frequency [参照文件] 频数 - + [Reference Files] Frequency % [参照文件] 频数% - + [{}] Frequency [{}] 频数 - + [{}] Frequency % [{}] 频数% - + [{}] p-value [{}] p 值 - + [{}] Bayes Factor [{}] 贝叶斯因子 - + Frequency 频数 - + Frequency % 频数% - + p-value p 值 - + Bayes Factor 贝叶斯因子 - + Frequency 频数 - + p-value p 值 - + Missing Observed Files 缺少观察文件 - + <div>You have not specified any observed files yet.</div> @@ -3566,12 +3609,12 @@ Bayes Factor - + Missing Reference Files 缺少参照文件 - + <div>You have not specified any reference files yet.</div> @@ -3580,22 +3623,22 @@ Bayes Factor - + Missing observed files! 缺少观察文件! - + Missing reference files! 缺少参照文件! - + Total 合计 - + Bayes factor 贝叶斯因子 @@ -3603,64 +3646,64 @@ Bayes Factor Wl_Table_Ngram_Generator - + Rank 序号 - + N-gram n 元组 - + Number of Files Found 查找到的 文件数 - + Number of Files Found % 查找到的 文件数% - + Total 合计 - + [{}] Frequency [{}] 频数 - + [{}] Frequency % [{}] 频数% - + Frequency 频数 - + Frequency % 频数% - + Frequency 频数 @@ -3668,7 +3711,7 @@ Frequency % Wl_Table_Profiler - + Total 合计 @@ -3676,60 +3719,130 @@ Frequency % Wl_Table_Profiler_Counts - + Total 合计 - + No language support 无语种支持 + + + Count of Paragraphs + 段落数 + + + + Count of Paragraphs % + 段落数% + + + + Count of Sentences + 句子数 + + + + Count of Sentences % + 句子数% + + + + Count of Sentence Segments + 句段数 + + + + Count of Sentence Segments % + 句段数% + + + + Count of Tokens + 形符数 + + + + Count of Tokens % + 形符数% + + + + Count of Types + 类符数 + + + + Count of Types % + 类符数% + + + + Count of Syllables + 音节数 + + + + Count of Syllables % + 音节数% + + + + Count of Characters + 字符数 + + + + Count of Characters % + 字符数% + Wl_Table_Profiler_Len_Breakdown - + Total 合计 - + Count of {}-token-long Sentences {}个形符长的句子数 - + Count of {}-token-long Sentences % {}个形符长的句子数% - + Count of {}-token-long Sentence Segment {}个形符长的句段数 - + Count of {}-token-long Sentence Segment % {}个形符长的句段数% - + Count of {}-character-long Tokens {}个字符长的形符数 - + Count of {}-character-long Tokens % {}个字符长的形符数% - + Count of {}-syllables-long Tokens {}个音节长的形符数 - + Count of {}-syllables-long Tokens % {}个音节长的形符数% @@ -3737,8823 +3850,9323 @@ Frequency % Wl_Table_Profiler_Lens - + Total 合计 - + No language support 无语种支持 - - - Wl_Table_Profiler_Lexical_Diversity - - Total - 合计 + + Paragraph Length in Sentences (Mean) + 段落长(单位:句子)(均值) - - - Wl_Table_Profiler_Readability - - Total - 合计 + + Paragraph Length in Sentences (Standard Deviation) + 段落长(单位:句子)(标准差) - - No language support - 无语种支持 + + Paragraph Length in Sentences (Variance) + 段落长(单位:句子)(方差) - - Text is too short - 文件过短 + + Paragraph Length in Sentences (Minimum) + 段落长(单位:句子)(最小值) - - - Wl_Table_Results_Sort_Conordancer - - Node - 节点词 + + Paragraph Length in Sentences (25th Percentile) + 段落长(单位:句子)(25分位数) - - Sentiment - 情感 + + Paragraph Length in Sentences (Median) + 段落长(单位:句子)(中位数) - - File - 文件 + + Paragraph Length in Sentences (75th Percentile) + 段落长(单位:句子)(75分位数) - - Ascending - 升序 + + Paragraph Length in Sentences (Maximum) + 段落长(单位:句子)(最大值) - - Descending - 降序 + + Paragraph Length in Sentences (Range) + 段落长(单位:句子)(极差) - - Column Sorted More Than Once - 同列排序超过一次 + + Paragraph Length in Sentences (Interquartile Range) + 段落长(单位:句子)(四分位差) - - Token - 形符 + + Paragraph Length in Sentences (Modes) + 段落长(单位:句子)(众数) - - Column - + + Paragraph Length in Sentence Segments (Mean) + 段落长(单位:句段)(均值) - - Order - 顺序 + + Paragraph Length in Sentence Segments (Standard Deviation) + 段落长(单位:句段)(标准差) - - - <body> - <div>Please refrain from sorting the same column more than once!</div> - </body> - - - <body> - <div>请勿对同一列排序一次以上!</div> - </body> - + + Paragraph Length in Sentence Segments (Variance) + 段落长(单位:句段)(方差) - - R - + + Paragraph Length in Sentence Segments (Minimum) + 段落长(单位:句段)(最小值) - - L - + + Paragraph Length in Sentence Segments (25th Percentile) + 段落长(单位:句段)(25分位数) - - L1 - 左1 + + Paragraph Length in Sentence Segments (Median) + 段落长(单位:句段)(中位数) - - ^L[0-9]+$ - ^左[0-9]+$ + + Paragraph Length in Sentence Segments (75th Percentile) + 段落长(单位:句段)(75分位数) - - R1 - 右1 + + Paragraph Length in Sentence Segments (Maximum) + 段落长(单位:句段)(最大值) - - ^R[0-9]+$ - ^右[0-9]+$ + + Paragraph Length in Sentence Segments (Range) + 段落长(单位:句段)(极差) - - Token no. - 形符序号 + + Paragraph Length in Sentence Segments (Interquartile Range) + 段落长(单位:句段)(四分位差) - - - Wl_Table_Tags - - TAG - 标签 + + Paragraph Length in Sentence Segments (Modes) + 段落长(单位:句段)(众数) - - - Wl_Table_Tags_Body - - Others - 其他 + + Paragraph Length in Tokens (Mean) + 段落长(单位:形符)(均值) - - Non-embedded - 非嵌入式 + + Paragraph Length in Tokens (Standard Deviation) + 段落长(单位:形符)(标准差) - - <TAG> - <标签> + + Paragraph Length in Tokens (Variance) + 段落长(单位:形符)(方差) - - Part of speech - 词性 + + Paragraph Length in Tokens (Minimum) + 段落长(单位:形符)(最小值) - - - Wl_Table_Tags_Header - - Header - + + Paragraph Length in Tokens (25th Percentile) + 段落长(单位:形符)(25分位数) - - Non-embedded - 非嵌入式 + + Paragraph Length in Tokens (Median) + 段落长(单位:形符)(中位数) - - <TAG> - <标签> + + Paragraph Length in Tokens (75th Percentile) + 段落长(单位:形符)(75分位数) - - - Wl_Table_Tags_Xml - - Non-embedded - 非嵌入式 + + Paragraph Length in Tokens (Maximum) + 段落长(单位:形符)(最大值) - - Paragraph - 段落 + + Paragraph Length in Tokens (Range) + 段落长(单位:形符)(极差) - - Sentence - 句子 + + Paragraph Length in Tokens (Interquartile Range) + 段落长(单位:形符)(四分位差) - - Word - 单词 + + Paragraph Length in Tokens (Modes) + 段落长(单位:形符)(众数) - - Invalid XML Tag - 无效 XML 标签 + + Sentence Length in Tokens (Mean) + 句长(单位:形符)(均值) - - - <div>The specified XML tag is invalid, please check and try again!</div> - - - <div>指定的 XML 标签无效,请检查后重试!</div> - + + Sentence Length in Tokens (Standard Deviation) + 句长(单位:形符)(标准差) - - <TAG> - <标签> + + Sentence Length in Tokens (Variance) + 句长(单位:形符)(方差) - - - Wl_Table_Wordlist_Generator - - Rank - 序号 + + Sentence Length in Tokens (Minimum) + 句长(单位:形符)(最小值) - - Token - 形符 + + Sentence Length in Tokens (25th Percentile) + 句长(单位:形符)(25分位数) - - Number of -Files Found - 查找到的 -文件数 + + Sentence Length in Tokens (Median) + 句长(单位:形符)(中位数) - - Number of -Files Found % - 查找到的 -文件数% + + Sentence Length in Tokens (75th Percentile) + 句长(单位:形符)(75分位数) - - Total - 合计 + + Sentence Length in Tokens (Maximum) + 句长(单位:形符)(最大值) - - [{}] -Frequency - [{}] -频数 + + Sentence Length in Tokens (Range) + 句长(单位:形符)(极差) - - [{}] -Frequency % - [{}] -频数% + + Sentence Length in Tokens (Interquartile Range) + 句长(单位:形符)(四分位差) - - -Frequency - -频数 + + Sentence Length in Tokens (Modes) + 句长(单位:形符)(众数) - - -Frequency % - -频数% + + Sentence Segment Length in Tokens (Mean) + 句段长(单位:形符)(均值) - - Frequency - 频数 + + Sentence Segment Length in Tokens (Standard Deviation) + 句段长(单位:形符)(标准差) - - Syllabification - 音节划分 + + Sentence Segment Length in Tokens (Variance) + 句段长(单位:形符)(方差) - - - Wl_Worker_Add_Files - - Updating table... - 更新表格中…… + + Sentence Segment Length in Tokens (Minimum) + 句段长(单位:形符)(最小值) - - Adding files... ({}/{}) - 添加文件中……({}/{}) + + Sentence Segment Length in Tokens (25th Percentile) + 句段长(单位:形符)(25分位数) - - - Wl_Worker_Colligation_Extractor_Fig - - Rendering figure... - 渲染图表中…… + + Sentence Segment Length in Tokens (Median) + 句段长(单位:形符)(中位数) - - - Wl_Worker_Colligation_Extractor_Table - - Rendering table... - 渲染表格中…… + + Sentence Segment Length in Tokens (75th Percentile) + 句段长(单位:形符)(75分位数) - - - Wl_Worker_Collocation_Extractor_Fig - - Rendering figure... - 渲染图表中…… + + Sentence Segment Length in Tokens (Maximum) + 句段长(单位:形符)(最大值) - - - Wl_Worker_Collocation_Extractor_Table - - Rendering table... - 渲染表格中…… + + Sentence Segment Length in Tokens (Range) + 句段长(单位:形符)(极差) - - - Wl_Worker_Concordancer_Fig - - File - 文件 + + Sentence Segment Length in Tokens (Interquartile Range) + 句段长(单位:形符)(四分位数) - - Total - 合计 + + Sentence Segment Length in Tokens (Modes) + 句段长(单位:形符)(众数) - - Rendering figure... - 渲染图表中…… + + Token Length in Syllables (Mean) + 形符长(单位:音节)(均值) - - Search term - 检索项 + + Token Length in Syllables (Standard Deviation) + 形符长(单位:音节)(标准差) - - - Wl_Worker_Concordancer_Parallel_Table - - Rendering table... - 渲染表格中…… + + Token Length in Syllables (Variance) + 形符长(单位:音节)(方差) - - - Wl_Worker_Concordancer_Table - - Paragraph - 段落 + + Token Length in Syllables (Minimum) + 形符长(单位:音节)(最小值) - - Sentence - 句子 + + Token Length in Syllables (25th Percentile) + 形符长(单位:音节)(25分位数) - - Token - 形符 + + Token Length in Syllables (Median) + 形符长(单位:音节)(中位数) - - Character - 字符 + + Token Length in Syllables (75th Percentile) + 形符长(单位:音节)(75分位数) - - Rendering table... - 渲染表格中…… + + Token Length in Syllables (Maximum) + 形符长(单位:音节)(最大值) - - Sentence segment - 句段 + + Token Length in Syllables (Range) + 形符长(单位:音节)(极差) - - No language support - 无语种支持 + + Token Length in Syllables (Interquartile Range) + 形符长(单位:音节)(四分位差) - - - Wl_Worker_Dependency_Parser - - Rendering table... - 渲染表格中…… + + Token Length in Syllables (Modes) + 形符长(单位:音节)(众数) - - - Wl_Worker_Download_Model_Spacy - - Fetching model information... - 获取模型信息中…… + + Token Length in Characters (Mean) + 形符长(单位:字符)(均值) - - Downloading model ({:.2f} MB)... - 下载模型中({:.2f} MB)…… + + Token Length in Characters (Standard Deviation) + 形符长(单位:字符)(标准差) - - Downloading model... - 下载模型中…… + + Token Length in Characters (Variance) + 形符长(单位:字符)(方差) - - Download completed successfully. - 模型下载完毕。 + + Token Length in Characters (Minimum) + 形符长(单位:字符)(最小值) - - - Wl_Worker_Download_Model_Stanza - - Downloading model... - 下载模型中…… + + Token Length in Characters (25th Percentile) + 形符长(单位:字符)(25分位值) - - Download completed successfully. - 模型下载完毕。 + + Token Length in Characters (Median) + 形符长(单位:字符)(中位数) - - - Wl_Worker_Exp_Table - - Saving file... - 保存文件中…… + + Token Length in Characters (75th Percentile) + 形符长(单位:字符)(75分位数) - - Exporting table... ({} / {}) - 导出表格中……({} / {}) + + Token Length in Characters (Maximum) + 形符长(单位:字符)(最大值) - - - Wl_Worker_Fetch_Data_Tagsets - - Updating table... - 更新表格中…… + + Token Length in Characters (Range) + 形符长(单位:字符)(极差) - - - Wl_Worker_Keyword_Extractor_Fig - - Rendering figure... - 渲染图表中…… + + Token Length in Characters (Interquartile Range) + 形符长(单位:字符)(四分位差) - - - Wl_Worker_Keyword_Extractor_Table - - Rendering table... - 渲染表格中…… + + Token Length in Characters (Modes) + 形符长(单位:字符)(众数) - - - Wl_Worker_Ngram_Generator_Fig - - Rendering figure... - 渲染图表中…… + + Type Length in Syllables (Mean) + 类符长(单位:音节)(均值) - - - Wl_Worker_Ngram_Generator_Table - - Rendering table... - 渲染表格中…… + + Type Length in Syllables (Standard Deviation) + 类符长(单位:音节)(标准差) - - - Wl_Worker_Open_Files - - Updating table... - 更新表格中…… + + Type Length in Syllables (Variance) + 类符长(单位:音节)(方差) - - Opening files... ({}/{}) - 打开文件中……({}/{}) + + Type Length in Syllables (Minimum) + 类符长(单位:音节)(最小值) - - - Wl_Worker_Profiler_Table - - Rendering table... - 渲染表格中…… + + Type Length in Syllables (25th Percentile) + 类符长(单位:音节)(25分位数) - - - Wl_Worker_Results_Filter_Collocation_Extractor - - Collocate - 搭配词 + + Type Length in Syllables (Median) + 类符长(单位:音节)(中位数) - - Total - 合计 + + Type Length in Syllables (75th Percentile) + 类符长(单位:音节)(75分位数) - - Number of -Files Found - 查找到的 -文件数 + + Type Length in Syllables (Maximum) + 类符长(单位:音节)(最大值) - - Updating table... - 更新表格中…… + + Type Length in Syllables (Range) + 类符长(单位:音节)(极差) - - [{}] -Frequency - [{}] -频数 + + Type Length in Syllables (Interquartile Range) + 类符长(单位:音节)(四分位差) - - [{}] -p-value - [{}] -p 值 + + Type Length in Syllables (Modes) + 类符长(单位:音节)(众数) - - [{}] -Bayes Factor - [{}] -贝叶斯因子 + + Type Length in Characters (Mean) + 类符长(单位:字符)(均值) - - Keyword - 关键词 + + Type Length in Characters (Standard Deviation) + 类符长(单位:字符)(标准差) - - - Wl_Worker_Results_Filter_Wordlist_Generator - - Token - 形符 + + Type Length in Characters (Variance) + 类符长(单位:字符)(方差) - - N-gram - n 元组 + + Type Length in Characters (Minimum) + 类符长(单位:字符)(最小值) - - Number of -Files Found - 查找到的 -文件数 + + Type Length in Characters (25th Percentile) + 类符长(单位:字符)(25分位数) - - Updating table... - 更新表格中…… + + Type Length in Characters (Median) + 类符长(单位:字符)(中位数) - - [{}] -Frequency - [{}] -频数 + + Type Length in Characters (75th Percentile) + 类符长(单位:字符)(75分位数) - - Syllabification - 音节划分 + + Type Length in Characters (Maximum) + 类符长(单位:字符)(最大值) - - - Wl_Worker_Results_Search - - Highlighting found items... - 高亮查得项中…… + + Type Length in Characters (Range) + 类符长(单位:字符)(极差) - - - Wl_Worker_Results_Sort_Concordancer - - Updating table... - 更新表格中…… + + Type Length in Characters (Interquartile Range) + 类符长(单位:字符)(四分位差) - - - Wl_Worker_Wordlist_Generator_Fig - - Rendering figure... - 渲染图表中…… + + Type Length in Characters (Modes) + 类符长(单位:字符)(众数) - - - Wl_Worker_Wordlist_Generator_Table - - Rendering table... - 渲染表格中…… + + Syllable Length in Characters (Mean) + 音节长(单位:字符)(均值) - - - Wrapper_Colligation_Extractor - - Token Settings - 形符设置 + + Syllable Length in Characters (Standard Deviation) + 音节长(单位:字符)(标准差) - - Search Settings - 搜索设置 + + Syllable Length in Characters (Variance) + 音节长(单位:字符)(方差) - - Generation Settings - 生成设置 + + Syllable Length in Characters (Minimum) + 音节长(单位:字符)(最小值) - - None - + + Syllable Length in Characters (25th Percentile) + 音节长(单位:字符)(25分位数) - - Table Settings - 表格设置 + + Syllable Length in Characters (Median) + 音节长(单位:字符)(中位数) - - Figure Settings - 图表设置 + + Syllable Length in Characters (75th Percentile) + 音节长(单位:字符)(75分位数) - - Rank: - 序号: + + Syllable Length in Characters (Maximum) + 音节长(单位:字符)(最大值) - - L - + + Syllable Length in Characters (Range) + 音节长(单位:字符)(极差) - - R - + + Syllable Length in Characters (Interquartile Range) + 音节长(单位:字符)(四分位差) - - Collocational window: - 搭配检索范围: + + Syllable Length in Characters (Modes) + 音节长(单位:字符)(众数) + + + Wl_Table_Profiler_Lexical_Density_Diversity - - Limit searching: - 限制检索: + + Total + 合计 - - Within sentence segments - 句段内 + + No language support + 无语种支持 - - Within sentences - 句子内 + + Brunét's Index + - - Within paragraphs - 段落内 + + Corrected TTR + - - - Wrapper_Collocation_Extractor - - Token Settings - 形符设置 + + Fisher's Index of Diversity + - - Search Settings - 搜索设置 + + Herdan's Vₘ + - - Generation Settings - 生成设置 + + Honoré's Statistic + - - None - + + Lexical Density + 词汇密度 - - Table Settings - 表格设置 + + Mean Segmental TTR + - - Figure Settings - 图表设置 + + Measure of Textual Lexical Diversity + - - Rank: - 序号: + + Moving-average TTR + - - L - + + Popescu-Mačutek-Altmann's B₁ + - - R - + + Popescu-Mačutek-Altmann's B₂ + - - Collocational window: - 搭配检索范围: + + Popescu-Mačutek-Altmann's B₃ + - - Limit searching: - 限制检索: + + Popescu-Mačutek-Altmann's B₄ + - - Within sentence segments - 句段内 + + Popescu-Mačutek-Altmann's B₅ + - - Within sentences - 句子内 + + Popescu's R₁ + - - Within paragraphs - 段落内 + + Popescu's R₂ + - - - Wrapper_Concordancer - - Token Settings - 形符设置 + + Popescu's R₃ + - - Search Settings - 搜索设置 + + Popescu's R₄ + - - Generation Settings - 生成设置 + + Repeat Rate + - - Paragraph - 段落 + + Root TTR + - - Sentence - 句子 + + Shannon Entropy + 香农熵 - - Token - 形符 + + Simpson's l + - - Character - 字符 + + Type-token Ratio + 类符形符比 - - Table Settings - 表格设置 + + Yule's Characteristic K + - - Figure Settings - 图表设置 + + Yule's Index of Diversity + + + + Wl_Table_Profiler_Readability - - Sort results by: - 结果排序依据: + + Total + 合计 - - File - 文件 + + No language support + 无语种支持 - - Zapping Settings - 检索词遮蔽设置 + + Text is too short + 文件过短 - - Replace keywords with - 将关键词替换为 + + Al-Heeti's Readability Prediction Formula + - - Add line numbers - 添加行号 + + Automated Arabic Readability Index + - - Randomize outputs - 随机化输出 + + Automated Readability Index + - - Width (left): - 长度(左): + + Bormuth's Cloze Mean + - - Width (right): - 长度(右): + + Bormuth's Grade Placement + - - Width unit: - 长度单位: + + Coleman-Liau Index + - - Sentence segment - 句段 + + Coleman's Readability Formula + - - Search term - 检索项 + + Dale-Chall Readability Formula + - - - Wrapper_Concordancer_Parallel - - Token Settings - 形符设置 + + Danielson-Bryan's Readability Formula + - - Search Settings - 搜索设置 + + Dawood's Readability Formula + - - Table Settings - 表格设置 + + Degrees of Reading Power + - - - Wrapper_Dependency_Parser - - Token Settings - 形符设置 + + Devereaux Readability Index + - - Search Settings - 搜索设置 + + Dickes-Steiwer Handformel + - - Table Settings - 表格设置 + + Easy Listening Formula + - - Figure Settings - 图表设置 + + Flesch-Kincaid Grade Level + - - coarse-grained - 粗分 + + Flesch Reading Ease + - - fine-grained - 细分 + + Flesch Reading Ease (Farr-Jenkins-Paterson) + - - - Wrapper_Keyword_Extractor - - Token Settings - 形符设置 + + FORCAST Grade Level + - - Generation Settings - 生成设置 + + Fórmula de Comprensibilidad de Gutiérrez de Polini + - - Table Settings - 表格设置 + + Fórmula de Crawford + - - Figure Settings - 图表设置 + + Fucks's Stilcharakteristik + - - Rank: - 序号: + + Gulpease Index + - - - Wrapper_Ngram_Generator - - Token Settings - 形符设置 - - - - Search Settings - 搜索设置 + + Gunning Fog Index + - - Generation Settings - 生成设置 + + Legibilidad μ + - - Allow skipped tokens: - 允许跳过形符数: + + Lensear Write + - - Table Settings - 表格设置 + + Lix + - - Figure Settings - 图表设置 + + Lorge Readability Index + - - Rank: - 序号: + + Luong-Nguyen-Dinh's Readability Formula + - - Search term position: - 检索项位置: + + McAlpine EFLAW Readability Score + - - N-gram size: - n 元组长度: + + neue Wiener Literaturformeln + - - - Wrapper_Profiler - - Token Settings - 形符设置 + + neue Wiener Sachtextformel + - - Table Settings - 表格设置 + + OSMAN + - - Generate all tables - 生成所有表格 + + Rix + - - Clear all tables - 清空所有表格 + + SMOG Grade + - - Readability - 可读性 + + Spache Grade Level + - - Counts - 计数 + + Strain Index + - - Lengths - 长度 + + Tränkle & Bailer's Readability Formula + - - Length Breakdown - 细分长度 + + Tuldava's Text Difficulty + - - Lexical Diversity - 词汇多样性 + + Wheeler & Smith's Readability Formula + - Wrapper_Wordlist_Generator + Wl_Table_Results_Sort_Conordancer - - Token Settings - 形符设置 + + Column + - - Generation Settings - 生成设置 + + Order + 顺序 - - Table Settings - 表格设置 + + Node + 节点词 - - Figure Settings - 图表设置 + + Sentiment + 情感 - - Rank: - 序号: + + Token No. + 形符序号 - - Syllabification - 音节划分 + + File + 文件 - - - get_re_tags - - Embedded - 嵌入式 + + Ascending + 升序 - - Non-embedded - 非嵌入式 + + Descending + 降序 - - - get_re_tags_with_tokens - - Embedded - 嵌入式 + + Column Sorted More Than Once + 同列排序超过一次 - - Non-embedded - 非嵌入式 + + + <body> + <div>Please refrain from sorting the same column more than once!</div> + </body> + + + <body> + <div>请勿对同一列排序一次以上!</div> + </body> + - - - init_settings_global - - Afrikaans - 南非语 + + Token + 形符 - - Albanian - 阿尔巴尼亚语 + + R{} + 右 {} - - Amharic - 阿姆哈拉语 + + L{} + 左 {} - - Arabic - 阿拉伯语 + + L1 + 左 1 - - Assamese - 阿萨姆语 + + ^L[0-9]+$ + ^左 [0-9]+$ - - Asturian - 阿斯图里亚斯语 + + R1 + 右 1 - - Azerbaijani - 阿塞拜疆语 + + ^R[0-9]+$ + ^右 [0-9]+$ + + + Wl_Table_Tags - - Basque - 巴斯克语 + + TAG + 标签 - - Belarusian - 白俄罗斯语 + + Type + 类型 - - Bengali - 孟加拉语 + + Level + 层级 - - Bulgarian - 保加利亚语 + + Opening Tag + 开始标签 - - Catalan - 加泰罗尼亚语 + + Closing Tag + 结束标签 - - Chinese (Simplified) - 汉语(简体) + + Preview + 预览 - - Chinese (Traditional) - 汉语(繁体) + + Embedded + 嵌入式 - - Croatian - 克罗地亚语 + + Non-embedded + 非嵌入式 - - Czech - 捷克语 + + Reset + 重置 - - Danish - 丹麦语 + + + <div>Embedded tags must begin with a punctuation mark, e.g. an underscore or a slash!</div> + + + <div>嵌入式标签必须以一个标点符号,如下划线或斜杠,开头!</div> + - - Dutch - 荷兰语 + + + <div>Non-embedded tags must begin and end with a punctuation mark, e.g. brackets!</div> + + + <div>非嵌入式标签必须以一个标点,如括号,开头和结尾!</div> + - - English (United Kingdom) - 英语(英国) + + Invalid Opening Tag + 无效开始标签 - - English (United States) - 英语(美国) + + Duplicate Tags + 重复标签 - - Esperanto - 世界语 + + + <div>The tag that you have specified already exists in the table!</div> + + + <div>你指定的标签已存在于表格中!</div> + - - Estonian - 爱沙尼亚语 + + N/A + 不适用 - - Finnish - 芬兰语 + + token + 形符 + + + Wl_Table_Tags_Body - - French - 法语 + + Others + 其他 - - Galician - 加里西亚语 + + Non-embedded + 非嵌入式 - - German (Austria) - 德语(奥地利) + + <TAG> + <标签> - - German (Germany) - 德语(德国) + + Part of speech + 词性 + + + Wl_Table_Tags_Header - - German (Switzerland) - 德语(瑞士) + + Header + - - Greek (Ancient) - 希腊语(古) + + Non-embedded + 非嵌入式 - - Greek (Modern) - 希腊语(现代) + + <TAG> + <标签> + + + Wl_Table_Tags_Xml - - Gujarati - 古吉拉特语 + + Non-embedded + 非嵌入式 - - Hindi - 印地语 + + Paragraph + 段落 - - Hungarian - 匈牙利语 + + Sentence + 句子 - - Icelandic - 冰岛语 + + Word + 单词 - - Indonesian - 印度尼西亚语 + + Invalid XML Tag + 无效 XML 标签 - - Irish - 爱尔兰语 + + + <div>The specified XML tag is invalid, please check and try again!</div> + + + <div>指定的 XML 标签无效,请检查后重试!</div> + - - Italian - 意大利语 + + <TAG> + <标签> + + + Wl_Table_Wordlist_Generator - - Japanese - 日语 + + Rank + 序号 - - Kannada - 卡纳达语 + + Token + 形符 - - Kazakh - 哈萨克语 + + Number of +Files Found + 查找到的 +文件数 - - Korean - 韩语 + + Number of +Files Found % + 查找到的 +文件数% - - Kyrgyz - 吉尔吉斯语 + + Total + 合计 - - Latin - 拉丁语 + + [{}] +Frequency + [{}] +频数 - - Latvian - 拉脱维亚语 + + [{}] +Frequency % + [{}] +频数% - - Ligurian - 利古里亚语 + + +Frequency + +频数 - - Lithuanian - 立陶宛语 + + +Frequency % + +频数% - - Luxembourgish - 卢森堡语 + + Frequency + 频数 - - Macedonian - 马其顿语 + + Syllabification + 音节划分 - - Malay - 马来语 + + N/A + 不适用 - - Malayalam - 马拉雅拉姆语 + + No language support + 无语种支持 + + + Wl_Worker_Add_Files - - Manx - 马恩语 + + Updating table... + 更新表格中…… - - Marathi - 马拉地语 + + Adding files... ({}/{}) + 添加文件中……({}/{}) + + + Wl_Worker_Colligation_Extractor - - Meitei - 曼尼普尔语 + + None + - - Mongolian - 蒙古语 + + Within sentence segments + 句段内 - - Nepali - 尼泊尔语 + + Within sentences + 句子内 - - Norwegian Bokmål - 书面挪威语 + + Within paragraphs + 段落内 + + + Wl_Worker_Colligation_Extractor_Fig - - Norwegian Nynorsk - 新挪威语 + + Rendering figure... + 渲染图表中…… + + + Wl_Worker_Colligation_Extractor_Table - - Oriya - 奥里亚语 + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Collocation_Extractor - - Persian - 波斯语 + + None + - - Polish - 波兰语 + + Within sentence segments + 句段内 - - Portuguese (Brazil) - 葡萄牙语(巴西) + + Within sentences + 句子内 - - Portuguese (Portugal) - 葡萄牙语(葡萄牙) + + Within paragraphs + 段落内 + + + Wl_Worker_Collocation_Extractor_Fig - - Romanian - 罗马尼亚语 + + Rendering figure... + 渲染图表中…… + + + Wl_Worker_Collocation_Extractor_Table - - Russian - 俄语 + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Concordancer_Fig - - Sanskrit - 梵语 + + File + 文件 - - Scottish Gaelic - 苏格兰盖尔语 + + Total + 合计 - - Serbian (Cyrillic) - 塞尔维亚语(西里尔) + + Rendering figure... + 渲染图表中…… - - Serbian (Latin) - 塞尔维亚语(拉丁) + + Search term + 检索项 + + + Wl_Worker_Concordancer_Parallel_Table - - Sinhala - 僧伽罗语 + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Concordancer_Table - - Slovak - 斯洛伐克语 + + Paragraph + 段落 - - Slovenian - 斯洛文尼亚语 + + Sentence + 句子 - - Spanish - 西班牙语 + + Token + 形符 - - Swahili - 斯瓦西里语 + + Character + 字符 - - Swedish - 瑞典语 + + Rendering table... + 渲染表格中…… - - Tagalog - 他加禄语 + + Sentence segment + 句段 - - Tajik - 塔吉克语 + + No language support + 无语种支持 + + + Wl_Worker_Dependency_Parser - - Tamil - 泰米尔语 + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Download_Model_Spacy - - Tatar - 鞑靼语 + + Fetching model information... + 获取模型信息中…… - - Telugu - 泰卢固语 + + Downloading model ({:.2f} MB)... + 下载模型中({:.2f} MB)…… - - Tetun Dili - 帝力德顿语 + + Downloading model... + 下载模型中…… - - Thai - 泰语 + + Download completed successfully. + 模型下载完毕。 + + + Wl_Worker_Download_Model_Stanza - - Tibetan - 藏语 + + Downloading model... + 下载模型中…… - - Tigrinya - 提格雷尼亚语 + + Download completed successfully. + 模型下载完毕。 + + + Wl_Worker_Exp_Table - - Tswana - 茨瓦纳语 + + Saving file... + 保存文件中…… - - Turkish - 土耳其语 + + Exporting table... ({} / {}) + 导出表格中……({} / {}) + + + Wl_Worker_Fetch_Data_Tagsets - - Ukrainian - 乌克兰语 + + Updating table... + 更新表格中…… + + + Wl_Worker_Keyword_Extractor_Fig - - Urdu - 乌尔都语 + + Rendering figure... + 渲染图表中…… + + + Wl_Worker_Keyword_Extractor_Table - - Vietnamese - 越南语 + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Ngram_Generator_Fig - - Welsh - 威尔士语 + + Rendering figure... + 渲染图表中…… + + + Wl_Worker_Ngram_Generator_Table - - Yoruba - 约鲁巴语 + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Open_Files - - Zulu - 祖鲁语 + + Updating table... + 更新表格中…… - - Arabic (CP720) - 阿拉伯语(CP720) + + Opening files... ({}/{}) + 打开文件中……({}/{}) + + + Wl_Worker_Profiler_Table - - Arabic (CP864) - 阿拉伯语(CP864) + + Rendering table... + 渲染表格中…… + + + Wl_Worker_Results_Filter_Collocation_Extractor - - Arabic (ISO-8859-6) - 阿拉伯语(ISO-8859-6) + + Collocate + 搭配词 - - Arabic (Mac OS Arabic) - 阿拉伯语(Mac OS Arabic) + + Total + 合计 - - Arabic (Windows-1256) - 阿拉伯语(Windows-1256) + + Number of +Files Found + 查找到的 +文件数 - - Chinese (GB18030) - 汉语(GB18030) + + Updating table... + 更新表格中…… - - Chinese (GBK) - 汉语(GBK) + + [{}] +Frequency + [{}] +频数 - - Chinese (Simplified) (GB2312) - 汉语(简体)(GB2312) + + [{}] +p-value + [{}] +p 值 - - Chinese (Simplified) (HZ) - 汉语(简体)(HZ) + + [{}] +Bayes Factor + [{}] +贝叶斯因子 - - Chinese (Traditional) (Big-5) - 汉语(繁体)(Big-5) + + Keyword + 关键词 - - Chinese (Traditional) (Big5-HKSCS) - 汉语(繁体)(Big5-HKSCS) + + Node + 节点词 + + + Wl_Worker_Results_Filter_Dependency_Parser - - Chinese (Traditional) (CP950) - 汉语(繁体)(CP950) + + Head + 核心词 - - Croatian (Mac OS Croatian) - 克罗地亚语(Mac OS Croatian) + + Dependent + 依存词 - - Cyrillic (CP855) - 西里尔(CP855) + + Dependency Length + 依存距离 - - Cyrillic (CP866) - 西里尔(CP866) + + Dependency Length (Absolute) + 依存距离(绝对) - - Cyrillic (ISO-8859-5) - 西里尔(ISO-8859-5) + + File + 文件 - - Cyrillic (Mac OS Cyrillic) - 西里尔(Mac OS Cyrillic) + + Total + 合计 - - Cyrillic (Windows-1251) - 西里尔(Windows-1251) + + Updating table... + 更新表格中…… + + + Wl_Worker_Results_Filter_Wordlist_Generator - - English (ASCII) - 英语(ASCII) + + Token + 形符 - - English (EBCDIC 037) - 英语(EBCDIC 037) + + N-gram + n 元组 - - English (CP437) - 英语(CP437) + + Number of +Files Found + 查找到的 +文件数 - - European (HP Roman-8) - 欧洲(HP Roman-8) + + Updating table... + 更新表格中…… - - European (Central) (CP852) - 欧洲(中部)(CP852) + + [{}] +Frequency + [{}] +频数 - - European (Central) (ISO-8859-2) - 欧洲(中部)(ISO-8859-2) + + Syllabification + 音节划分 + + + Wl_Worker_Results_Search - - European (Central) (Mac OS Central European) - 欧洲(中部)(Mac OS Central European) + + Highlighting found items... + 高亮查得项中…… + + + Wl_Worker_Results_Sort_Concordancer - - European (Central) (Windows-1250) - 欧洲(中部)(Windows-1250) + + Updating table... + 更新表格中…… + + + Wl_Worker_Wordlist_Generator - - European (Northern) (ISO-8859-4) - 欧洲(北部)(ISO-8859-4) + + No language support + 无语种支持 + + + Wl_Worker_Wordlist_Generator_Fig - - European (Southern) (ISO-8859-3) - 欧洲(南部)(ISO-8859-3) + + Rendering figure... + 渲染图表中…… + + + Wl_Worker_Wordlist_Generator_Table - - European (Western) (EBCDIC 500) - 欧洲(西部)(EBCDIC 500) + + Rendering table... + 渲染表格中…… + + + Wrapper_Colligation_Extractor - - European (Western) (CP850) - 欧洲(西部)(CP850) + + Token Settings + 形符设置 - - European (Western) (CP858) - 欧洲(西部)(CP858) + + Search Settings + 搜索设置 - - European (Western) (CP1140) - 欧洲(西部)(CP1140) + + Generation Settings + 生成设置 - - European (Western) (ISO-8859-1) - 欧洲(西部)(ISO-8859-1) + + None + - - European (Western) (ISO-8859-15) - 欧洲(西部)(ISO-8859-15) + + Table Settings + 表格设置 - - European (Western) (Mac OS Roman) - 欧洲(西部)(Mac OS Roman) + + Figure Settings + 图表设置 - - European (Western) (Windows-1252) - 欧洲(西部)(Windows-1252) + + Rank: + 序号: - - French (CP863) - 法语(CP863) + + L + - - German (EBCDIC 273) - 德语(EBCDIC 273) + + R + - - Greek (CP737) - 希腊语(CP737) + + Collocational window: + 搭配检索范围: - - Greek (CP869) - 希腊语(CP869) + + Limit searching: + 限制检索: - - Greek (CP875) - 希腊语(CP875) + + Within sentence segments + 句段内 - - Greek (ISO-8859-7) - 希腊语(ISO-8859-7) + + Within sentences + 句子内 - - Greek (Mac OS Greek) - 希腊语(Mac OS Greek) + + Within paragraphs + 段落内 + + + Wrapper_Collocation_Extractor - - Greek (Windows-1253) - 希腊语(Windows-1253) + + Token Settings + 形符设置 - - Hebrew (CP856) - 希伯来语(CP856) + + Search Settings + 搜索设置 - - Hebrew (CP862) - 希伯来语(CP862) + + Generation Settings + 生成设置 - - Hebrew (EBCDIC 424) - 希伯来语(EBCDIC 424) + + None + - - Hebrew (ISO-8859-8) - 希伯来语(ISO-8859-8) + + Table Settings + 表格设置 - - Hebrew (Windows-1255) - 希伯来语(Windows-1255) + + Figure Settings + 图表设置 - - Icelandic (CP861) - 冰岛语(CP861) + + Rank: + 序号: - - Icelandic (Mac OS Icelandic) - 冰岛语(Mac OS Icelandic) + + L + - - Japanese (CP932) - 日语(CP932) + + R + - - Japanese (EUC-JP) - 日语(EUC-JP) + + Collocational window: + 搭配检索范围: - - Japanese (EUC-JIS-2004) - 日语(EUC-JIS-2004) + + Limit searching: + 限制检索: - - Japanese (EUC-JISx0213) - 日语(EUC-JISx0213) + + Within sentence segments + 句段内 - - Japanese (ISO-2022-JP) - 日语(ISO-2022-JP) + + Within sentences + 句子内 - - Japanese (ISO-2022-JP-1) - 日语(ISO-2022-JP-1) + + Within paragraphs + 段落内 + + + Wrapper_Concordancer - - Japanese (ISO-2022-JP-2) - 日语(ISO-2022-JP-2) + + Token Settings + 形符设置 - - Japanese (ISO-2022-JP-2004) - 日语(ISO-2022-JP-2004) + + Search Settings + 搜索设置 - - Japanese (ISO-2022-JP-3) - 日语(ISO-2022-JP-3) + + Generation Settings + 生成设置 - - Japanese (ISO-2022-JP-EXT) - 日语(ISO-2022-JP-EXT) + + Paragraph + 段落 - - Japanese (Shift_JIS) - 日语(Shift_JIS) + + Sentence + 句子 - - Japanese (Shift_JIS-2004) - 日语(Shift_JIS-2004) + + Token + 形符 - - Japanese (Shift_JISx0213) - 日语(Shift_JISx0213) + + Character + 字符 - - Kazakh (KZ-1048) - 哈萨克语(KZ-1048) + + Table Settings + 表格设置 - - Kazakh (PTCP154) - 哈萨克语(PTCP154) + + Figure Settings + 图表设置 - - Korean (EUC-KR) - 韩语(EUC-KR) + + Sort results by: + 结果排序依据: - - Korean (ISO-2022-KR) - 韩语(ISO-2022-KR) + + File + 文件 - - Korean (JOHAB) - 韩语(JOHAB) + + Zapping Settings + 检索项遮蔽设置 - - Korean (UHC) - 韩语(UHC) + + Replace keywords with + 将检索项替换为 - - Persian/Urdu (Mac OS Farsi) - 波斯语/乌尔都语(Mac OS Farsi) + + Add line numbers + 添加行号 - - Portuguese (CP860) - 葡萄牙语(CP860) + + Randomize outputs + 随机化输出 - - Romanian (Mac OS Romanian) - 罗马尼亚语(Mac OS Romanian) + + Sentence segment + 句段 - - Russian (KOI8-R) - 俄语(KOI8-R) + + Search term + 检索项 - - Tajik (KOI8-T) - 塔吉克语(KOI8-T) + + Context length (left): + 上下文长度(左): - - Thai (CP874) - 泰语(CP874) + + Context length (right): + 上下文长度(右): - - Thai (ISO-8859-11) - 泰语(ISO-8859-11) + + Unit of context length: + 上下文长度单位: + + + Wrapper_Concordancer_Parallel - - Turkish (CP857) - 土耳其语(CP857) + + Token Settings + 形符设置 - - Turkish (EBCDIC 1026) - 土耳其语(EBCDIC 1026) + + Search Settings + 搜索设置 - - Turkish (ISO-8859-9) - 土耳其语(ISO-8859-9) + + Table Settings + 表格设置 + + + Wrapper_Dependency_Parser - - Turkish (Mac OS Turkish) - 土耳其语(Mac OS Turkish) + + Token Settings + 形符设置 - - Turkish (Windows-1254) - 土耳其语(Windows-1254) + + Search Settings + 搜索设置 - - Ukrainian (CP1125) - 乌克兰语(CP1125) + + Table Settings + 表格设置 - - Ukrainian (KOI8-U) - 乌克兰语(KOI8-U) + + Figure Settings + 图表设置 - - Urdu (CP1006) - 乌尔都语(CP1006) + + coarse-grained + 粗分 - - Vietnamese (CP1258) - 越南语(CP1258) + + fine-grained + 细分 - - spaCy - Sentencizer - spaCy - 分句器 + + Match dependency relations + 匹配依存关系 + + + Wrapper_Keyword_Extractor - - Student's t-test (1-sample) - 学生 t 检验(单样本) + + Token Settings + 形符设置 - - z-score - z 值 + + Generation Settings + 生成设置 - - Mann-Whitney U Test - 曼惠特尼 U 检验 + + Table Settings + 表格设置 - - Student's t-test (2-sample) - 学生 t 检验(双样本) + + Figure Settings + 图表设置 - - Log-likelihood Ratio - 对数似然比 + + Rank: + 序号: + + + Wrapper_Ngram_Generator - - t-statistic - t 值 + + Token Settings + 形符设置 - - Dice's Coefficient - Dice 系数 + + Search Settings + 搜索设置 - - Jaccard Index - 雅卡尔指数 + + Generation Settings + 生成设置 - - Sorbian (Lower) - 索布语(下) + + Allow skipped tokens: + 允许跳过形符数: - - Sorbian (Upper) - 索布语(上) + + Table Settings + 表格设置 - - None - + + Figure Settings + 图表设置 - - Welch's t-test - Welch t 检验 + + Rank: + 序号: - - z-score (Berry-Rogghe) - z 值(Berry-Rogghe) + + Search term position: + 检索项位置: - - Carroll's D₂ - + + N-gram size: + n 元组长度: + + + Wrapper_Profiler - - Gries's DP - + + Token Settings + 形符设置 - - Juilland's D - + + Table Settings + 表格设置 - - Lyne's D₃ - + + Generate all tables + 生成所有表格 - - Rosengren's S - + + Clear all tables + 清空所有表格 - - Zhang's Distributional Consistency - + + Readability + 可读性 - - Zhang's DC - + + Counts + 计数 - - Engwall's FM - + + Lengths + 长度 - - Juilland's U - + + Length Breakdown + 长度明细 - - Kromer's UR - + + Lexical Density/Diversity + 词汇密度/多样性 - - Rosengren's KF - + + Clear All Tables + 清空所有表格 - - Difference Coefficient - + + + <div> + The results in some of the tables have yet been exported. Do you really want to clear all tables? + </div> + + + <div> + 部分表格中的结果尚未保存。你是否确认清空所有表格? + </div> + + + + Wrapper_Wordlist_Generator - - Kilgarriff's Ratio - + + Token Settings + 形符设置 - - Log Ratio - + + Generation Settings + 生成设置 - - Minimum Sensitivity - + + Table Settings + 表格设置 - - Poisson Collocation Measure - + + Figure Settings + 图表设置 - - Burmese - 缅甸语 + + Rank: + 序号: - - English (Middle) - 英语(中古) + + Syllabification + 音节划分 + + + wl_boxes - - Ganda - 干达语 + + Yes + - - Georgian - 格鲁吉亚语 + + No + - - Punjabi (Gurmukhi) - 旁遮普语(古木基) + + No limit + 无限制 - - Sámi (Northern) - 萨米语(北) + + From + - - Other languages - 其他语种 + + to + - - All languages (UTF-8 without BOM) - 所有语种(UTF-8 无签名) + + Sync + 同步 - - All languages (UTF-8 with BOM) - 所有语种(UTF-8 带签名) + + L + - - All languages (UTF-16 with BOM) - 所有语种(UTF-16 带签名) + + R + + + + wl_buttons - - All languages (UTF-16BE without BOM) - 所有语种(UTF-16 大端无签名) + + Browse... + 浏览... - - All languages (UTF-16LE without BOM) - 所有语种(UTF-16 小端无签名) + + Transparent + 透明 + + + wl_checks_work_area - - All languages (UTF-32 with BOM) - 所有语种(UTF-32 带签名) + + Missing Search Terms + 缺少检索项 - - All languages (UTF-32BE without BOM) - 所有语种(UTF-32 大端无签名) + + + <div> + You have not specified any search terms yet, please enter one in the input box under "<span style="color: #F00; font-weight: bold;">Search term</span>" first. + </div> + + + <div> + 你还未指定任何检索项,请先在“<span style="color: #F00; font-weight: bold;">检索项</span>”下的输入框中指定一项。 + </div> + - - All languages (UTF-32LE without BOM) - 所有语种(UTF-32 小端无签名) + + No Results + 无结果 - - All languages (UTF-7) - 所有语种(UTF-7) + + + <div>Data processing has completed successfully, but there are no results to display.</div> + <div>You can change your settings and try again.</div> + + + <div>数据处理操作已完成,但没有可显示的结果。</div> + <div>你可以更改你的设置后重试。</div> + - - Baltic languages (CP775) - 波罗的海诸语(CP775) + + Language support unavailable! + 语种支持不可用! - - Baltic languages (ISO-8859-13) - 波罗的海诸语(ISO-8859-13) + + Missing search terms! + 缺少检索项! - - Baltic languages (Windows-1257) - 波罗的海诸语(Windows-1257) + + Table generated successfully. + 已成功生成表格。 - - Celtic languages (ISO-8859-14) - 凯尔特语(ISO-8859-14) + + Figure generated successfully. + 已成功生成图表。 - - European (Southeastern) (ISO-8859-16) - 欧洲(东南部)(ISO-8859-16) + + No results to display. + 无结果可供显示。 - - Nordic languages (CP865) - 北欧诸语(CP865) + + A fatal error has just occurred! + 刚才发生了一个致命错误! - - Nordic languages (ISO-8859-10) - 北欧诸语(ISO-8859-10) + + Syllable tokenization + 分音节 - - Thai (TIS-620) - 泰语(TIS-620) + + Part-of-speech tagging + 词性标注 - - CSV files (*.csv) - CSV 文件 (*.csv) + + Lemmatization + 词形还原 - - Excel workbooks (*.xlsx) - Excel 工作簿 (*.xlsx) + + Dependency parsing + 依存分析 - - HTML pages (*.htm; *.html) - HTML 页面 (*.htm; *.html) + + No Language Support + 无语种支持 - - PDF files (*.pdf) - PDF 文件 (*.pdf) + + Type of Language Support + 语种支持类型 - - Text files (*.txt) - 文本文件 (*.txt) + + File Name + 文件名 - - Translation memory files (*.tmx) - 翻译记忆库文件 (*.tmx) + + Language + 语种 - - Word documents (*.docx) - Word 文档 (*.docx) + + + <div> + The process cannot be done because language support is unavailable for the following files. Please check your language settings or try again with files of different languages. + </div> + + + <div> + 由于下列文件缺少语种支持,因此操作无法完成。请检查你的语种设置或使用其他语种的文件重试。 + </div> + - - XML files (*.xml) - XML 文件 (*.xml) + + Model downloaded successfully. + 已成功下载模型。 - - All files (*.*) - 所有文件 (*.*) + + A network error occurred while downloading the model! + 下载模型时发生网络错误! - - OpenType fonts (*.otf) - OpenType 字体 (*.otf) + + Table exported successfully. + 已成功导出表格。 - - TrueType fonts (*.ttf) - TrueType 字体 (*.ttf) + + File access denied! + 文件请求被拒绝! - - Blizzard mipmap format (*.blp) - + + File Access Denied + 文件请求被拒绝 - - Windows bitmaps (*.bmp) - Windows 位图 (*.bmp) + + + <div>Access to "{}" is denied, please specify another location or close the file and try again.</div> + + + <div>访问“{}”时被拒绝,请指定其他位置或关闭文件后重试。</div> + - - Window cursor files (*.cur) - Window 光标文件 (*.cur) + + Export Completed + 导出已完成 - - Multi-page PCX files (*.dcx) - 多页 PCX 文件 (*.dcx) + + + <div>The table has been successfully exported to "{}".</div> + + + <div>已成功导出表格至“{}”。</div> + + + + wl_conversion - - DirectDraw surface (*.dds) - + + Yes + - - Device-independent bitmaps (*.dib) - 设备无关位图 (*.dib) + + No + + + + wl_dependency_parsing - - Encapsulated PostScript (*.eps, *.ps) - + + Dependency Graphs Generated Successfully + 成功生成依存图 - - Flexible image transport system (*.fit, *.fits) - + + + <div>Dependency graphs has been successfully generated and exported under folder: {}</div> + + <div>If the figures are not displayed automatically, you may try opening them manually using web browsers or image viewers installed on your computer, or save copies of them in other locations for later use.</div> + + + <div>已成功生成依存图并导出至该文件夹下:{}</div> + + <div>若图表未自动显示,你可使用电脑上已安装的浏览器或看图软件将其手动打开,或将图表副本保存至其他位置以待后用。</div> + + + + wl_dialogs - - Autodesk animation files (*.flc, *.fli) - Autodesk 动画文件 (*.flc, *.fli) + + Copy + 复制 - - Fox Engine textures (*.ftex) - Fox Engine 纹理 (*.ftex) + + Close + 关闭 - - GIMP brush files (*.gbr) - GIMP 笔刷文件 (*.gbr) + + Save + 保存 - - Graphics interchange format (*.gif) - + + Cancel + 取消 - - Apple icon images (*.icns) - 苹果图标图片 (*.icns) + + OK + 确认 + + + wl_dialogs_errs - - Windows icon files (*.ico) - Windows 图标文件 (*.ico) + + Fatal Error + 致命错误 - - IPTC/NAA newsphoto files (*.iim) - IPTC/NAA newsphoto 文件 (*.iim) + + + <div>A fatal error has occurred, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>!</div> + + + <div>刚才发生了一个致命错误,请<b>将下方错误信息</b>发送至{}来获取<b>作者的帮助</b>!</div> + - - IM files (*.im) - IM 文件 (*.im) + + Network Error + 网络错误 - - Image Tools image files (*) - Image Tools 图像文件 (*) + + + <div>A network error occurred while downloading the model, please check your internet connections and proxy settings in <b>Menu → Preferences → General → Proxy Settings</b> if you are using a proxy.</div> + <div>If the network issue persists, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>.</div> + + + <div>下载模型时发生了网络错误,请检查你的网络连接,如果你使用了代理,那么请一并检查<b>菜单 → 偏好 → 全局 → 代理设置</b>中的代理设置。</div> + <div>如果网络问题仍然存在,请<b>将下方错误信息</b>发送至{}来获取<b>作者的帮助</b>。</div> + + + + wl_dialogs_misc - - JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg) - JPEG 文件 (*.jfif, *.jpe, *.jpeg, *.jpg) + + Processing data... + 处理数据中…… - - JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) - JPEG 2000 文件 (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) + + Downloading model... + 下载模型中…… + + + wl_figs - - McIDAS area files (*) - McIDAS area 文件 (*) + + Frequency + 频数 - - PhotoCD files (*.pcd) - PhotoCD 文件 (*.pcd) + + ^[LR][1-9][0-9]*$ + ^[左右] [1-9][0-9]*$ - - Picture exchange (*.pcx) - + + Cumulative Percentage Frequency + 累加百分比频数 - - PIXAR raster files (*.pxr) - PIXAR 栅格文件 (*.pxr) + + Cumulative Frequency + 累加频数 - - Portable network graphics (*.apng, *.png) - + + Percentage Frequency + 百分比频数 - - Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm) - + + p-value + p 值 - - Photoshop PSD files (*.psd) - Photoshop PSD 文件 (*.psd) + + Custom + 自定义 - - Sun raster files (*.ras) - Sun 栅格文件 (*.ras) + + Monochrome + 单色 - - Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi) - + + Colormap + 色谱 + + + wl_figs_freqs - - SPIDER files (*) - SPIDER 文件 (*) + + Network graph + 网络图 - - Truevision TGA (*.icb, *.tga, *.vda, *.vst) - + + Reference files + 参照文件 - - TIFF files (*.tif, *.tiff) - TIFF 文件 (*.tif, *.tiff) + + Total + 合计 - - WebP files (*.webp) - WebP 文件 (*.webp) + + Line chart + 折线图 - - Windows metafiles (*.emf, *.wmf) - Windows 元文件 (*.emf, *.wmf) + + Token + 形符 - - X bitmaps (*.xbm) - + + N-gram + n 元组 - - X pixmaps (*.xpm) - + + Collocate + 搭配词 - - XV thumbnails (*) - XV 缩略图 (*) + + Keyword + 关键词 - - botok - Tibetan sentence tokenizer - botok - 藏语分句器 + + Word cloud + 词云图 + + + wl_figs_stats - - NLTK - Czech Punkt sentence tokenizer - NLTK - 捷克语 Punkt 分句器 + + Total + 合计 - - NLTK - Danish Punkt sentence tokenizer - NLTK - 丹麦语 Punkt 分句器 + + p-value + p 值 - - NLTK - Dutch Punkt sentence tokenizer - NLTK - 荷兰语 Punkt 分句器 + + Line chart + 折线图 - - NLTK - English Punkt sentence tokenizer - NLTK - 英语 Punkt 分句器 + + Token + 形符 - - NLTK - Estonian Punkt sentence tokenizer - NLTK - 爱沙尼亚语 Punkt 分句器 + + N-gram + n 元组 - - NLTK - Finnish Punkt sentence tokenizer - NLTK - 芬兰语 Punkt 分句器 + + Collocate + 搭配词 - - NLTK - French Punkt sentence tokenizer - NLTK - 法语 Punkt 分句器 + + Keyword + 关键词 - - NLTK - German Punkt sentence tokenizer - NLTK - 德语 Punkt 分句器 + + Word cloud + 词云图 - - NLTK - Greek Punkt sentence tokenizer - NLTK - 希腊语 Punkt 分句器 + + Network graph + 网络图 + + + wl_lists - - NLTK - Italian Punkt sentence tokenizer - NLTK - 意大利语 Punkt 分句器 + + New search term + 新检索项 - - NLTK - Malayalam Punkt sentence tokenizer - NLTK - 马拉雅拉姆语 Punkt 分句器 + + New stop word + 新停用词 - - NLTK - Norwegian Punkt sentence tokenizer - NLTK - 挪威语 Punkt 分句器 + + New item + 新列表项 - - NLTK - Polish Punkt sentence tokenizer - NLTK - 波兰语 Punkt 分句器 + + Add + 添加 - - NLTK - Portuguese Punkt sentence tokenizer - NLTK - 葡萄牙语 Punkt 分句器 + + Insert + 插入 - - NLTK - Russian Punkt sentence tokenizer - NLTK - 俄语 Punkt 分句器 + + Remove + 移除 - - NLTK - Slovenian Punkt sentence tokenizer - NLTK - 斯洛文尼亚语 Punkt 分句器 + + Clear + 清空 - - NLTK - Spanish Punkt sentence tokenizer - NLTK - 西班牙语 Punkt 分句器 + + Duplicates Found + 发现重复项 - - NLTK - Swedish Punkt sentence tokenizer - NLTK - 瑞典语 Punkt 分句器 + + + <div>The item that you have just edited already exists in the list, please specify another one!</div> + + + <div>你刚才编辑的列表项已存在于列表中,请另外指定一项!</div> + - - NLTK - Turkish Punkt sentence tokenizer - NLTK - 土耳其语 Punkt 分句器 + + Import + 导入 - - spaCy - Croatian sentence recognizer - spaCy - 克罗地亚语句子识别器 + + Export + 导出 - - spaCy - Dutch sentence recognizer - spaCy - 荷兰语句子识别器 + + Import from Files + 从文件导入 - - spaCy - Finnish sentence recognizer - spaCy - 芬兰语句子识别器 + + Text files (*.txt) + 文本文件 (*.txt) - - spaCy - Greek (Modern) sentence recognizer - spaCy - 希腊语(现代)句子识别器 + + Import Error + 导入时出错 - - spaCy - Italian sentence recognizer - spaCy - 意大利语句子识别器 + + + <div> + An error occurred during import, please check the following files and try again. + </div> + + + <div> + 导入时发生了一个错误,请检查下列文件后重试。 + </div> + - - spaCy - Lithuanian sentence recognizer - spaCy - 立陶宛语句子识别器 + + Empty file + 空文件 - - spaCy - Macedonian sentence recognizer - spaCy - 马其顿语句子识别器 + + An error occurred during import! + 导入时发生了一个错误! - - spaCy - Norwegian Bokmål sentence recognizer - spaCy - 书面挪威语句子识别器 + + item + - - spaCy - Polish sentence recognizer - spaCy - 波兰语句子识别器 + + items + - - spaCy - Portuguese sentence recognizer - spaCy - 葡萄牙语句子识别器 + + {} {} has been successfully imported into the list. + 已成功导入 {} {}至列表中。 - - spaCy - Romanian sentence recognizer - spaCy - 罗马尼亚语句子识别器 + + Export to File + 导出至文件 - - spaCy - Russian sentence recognizer - spaCy - 俄语句子识别器 + + Export Completed + 导出完成 - - spaCy - Swedish sentence recognizer - spaCy - 瑞典语句子识别器 + + + <div>The list has been successfully exported to "{}".</div> + + + <div>已成功导出列表至“{}”。</div> + + + + wl_matching - - Underthesea - Vietnamese sentence tokenizer - Underthesea - 越南语分句器 + + Embedded + 嵌入式 - - botok - Tibetan word tokenizer - botok - 藏语分词器 + + Non-embedded + 非嵌入式 + + + wl_measure_utils - - jieba - Chinese word tokenizer - jieba - 汉语分词器 + + Absolute frequency + 绝对频数 - - NLTK - NIST tokenizer - NLTK - NIST 分词器 + + Relative frequency + 相对频数 + + + wl_measures_lexical_density_diversity - - NLTK - NLTK tokenizer - NLTK - NLTK 分词器 + + Content words + 实词 - - NLTK - Penn Treebank tokenizer - NLTK - 宾州树库分词器 + + Rank-frequency distribution + 频数排序分布 - - NLTK - Regular-expression tokenizer - NLTK - 正则表达式分词器 + + Frequency spectrum + 频数谱 + + + wl_measures_readability - - NLTK - Tok-tok tokenizer - NLTK - Tok-tok 分词器 + + Original + 原版 - - NLTK - Twitter tokenizer - NLTK - 推特分词器 + + New + 新版 - - pkuseg - Chinese word tokenizer - pkuseg - 汉语分词器 + + Navy + 海军版 - - PyThaiNLP - Longest matching - PyThaiNLP - 最长匹配 + + Policy One + 策略一 - - PyThaiNLP - Maximum matching - PyThaiNLP - 最大匹配 + + Policy Two + 策略二 + + + wl_measures_statistical_significance - - PyThaiNLP - Maximum matching + TCC - PyThaiNLP - 最大匹配 + TCC + + Two-tailed + 双尾 - - Sacremoses - Moses tokenizer - Sacremoses - Moses 分词器 + + Left-tailed + 左尾 - - spaCy - Afrikaans word tokenizer - spaCy - 南非语分词器 + + Right-tailed + 右尾 + + + wl_misc - - spaCy - Albanian word tokenizer - spaCy - 阿尔巴尼亚语分词器 + + minute + - - spaCy - Amharic word tokenizer - spaCy - 阿姆哈拉语分词器 + + minutes + - - spaCy - Arabic word tokenizer - spaCy - 阿拉伯语分词器 + + (In {} {} {:.2f} seconds) + (耗时 {} {} {:.2f} 秒) - - spaCy - Armenian word tokenizer - spaCy - Armenian 分词器 + + (In + (耗时 + + + wl_pos_tagging - - spaCy - Azerbaijani word tokenizer - spaCy - 阿塞拜疆语分词器 + + Content words + 实词 - - spaCy - Basque word tokenizer - spaCy - 巴斯克语分词器 + + Function words + 虚词 + + + wl_results_filter - - spaCy - Bengali word tokenizer - spaCy - 孟加拉语分词器 + + p-value: + p 值: + + + wl_settings_default - - spaCy - Bulgarian word tokenizer - spaCy - 保加利亚语分词器 + + Observed Files + 观察文件 - - spaCy - Catalan word tokenizer - spaCy - 加泰罗尼亚语分词器 + + Profiler + 分析工具 - - spaCy - Chinese word tokenizer - spaCy - 汉语分词器 + + APA (7th edition) + APA(第七版) - - spaCy - Croatian word tokenizer - spaCy - 克罗地亚语分词器 + + Counts + 计数 - - spaCy - Czech word tokenizer - spaCy - 捷克语分词器 + + Token + 形符 - - spaCy - Danish word tokenizer - spaCy - 丹麦语分词器 + + File + 文件 - - spaCy - Dutch word tokenizer - spaCy - 荷兰语分词器 + + Ascending + 升序 - - spaCy - English word tokenizer - spaCy - 英语分词器 + + Token no. + 形符序号 - - spaCy - Estonian word tokenizer - spaCy - 爱沙尼亚语分词器 + + Line chart + 折线图 - - spaCy - Finnish word tokenizer - spaCy - 芬兰语分词器 + + Total + 合计 - - spaCy - French word tokenizer - spaCy - 法语分词器 + + Frequency + 频数 - - spaCy - Ganda word tokenizer - spaCy - 干达语分词器 + + None + - - spaCy - German word tokenizer - spaCy - 德语分词器 + + p-value + p 值 - - spaCy - Greek (Ancient) word tokenizer - spaCy - 希腊语(古)分词器 + + General + 全局 - - spaCy - Greek (Modern) word tokenizer - spaCy - 希腊语(现代)分词器 - - - - spaCy - Gujarati word tokenizer - spaCy - 古吉拉特语分词器 + + Excel workbooks (*.xlsx) + Excel 工作簿 (*.xlsx) - - spaCy - Hindi word tokenizer - spaCy - 印地语分词器 + + Non-embedded + 非嵌入式 - - spaCy - Hungarian word tokenizer - spaCy - 匈牙利语分词器 + + Header + - - spaCy - Icelandic word tokenizer - spaCy - 冰岛语分词器 + + Embedded + 嵌入式 - - spaCy - Indonesian word tokenizer - spaCy - 印度尼西亚语分词器 + + Part of speech + 词性 - - spaCy - Irish word tokenizer - spaCy - 爱尔兰语分词器 + + Others + 其他 - - spaCy - Italian word tokenizer - spaCy - 意大利语分词器 + + Paragraph + 段落 - - spaCy - Japanese word tokenizer - spaCy - 日语分词器 + + Sentence + 句子 - - spaCy - Kannada word tokenizer - spaCy - 卡纳达语分词器 + + Word + 单词 - - spaCy - Kyrgyz word tokenizer - spaCy - 吉尔吉斯语分词器 + + New + 新版 - - spaCy - Latin word tokenizer - spaCy - 拉丁语分词器 + + Original + 原版 - - spaCy - Latvian word tokenizer - spaCy - 拉脱维亚语分词器 + + Rank-frequency distribution + 频数排序分布 - - spaCy - Ligurian word tokenizer - spaCy - 利古里亚语分词器 + + Two-tailed + 双尾 - - spaCy - Lithuanian word tokenizer - spaCy - 立陶宛语分词器 + + Relative frequency + 相对频数 - - spaCy - Luxembourgish word tokenizer - spaCy - 卢森堡语分词器 + + Colormap + 色谱 - - spaCy - Macedonian word tokenizer - spaCy - 马其顿语分词器 + + Policy One + 策略一 + + + wl_settings_figs - - spaCy - Malayalam word tokenizer - spaCy - 马拉雅拉姆语分词器 + + Square + 方形 - - spaCy - Marathi word tokenizer - spaCy - 马拉地语分词器 + + Circle + 圆形 - - spaCy - Nepali word tokenizer - spaCy - 尼泊尔语分词器 + + Triangle up + 朝上三角形 - - spaCy - Persian word tokenizer - spaCy - 波斯语分词器 + + Triangle right + 朝右三角形 - - spaCy - Polish word tokenizer - spaCy - 波兰语分词器 + + Triangle down + 朝下三角形 - - spaCy - Portuguese word tokenizer - spaCy - 葡萄牙语分词器 + + Triangle left + 朝左三角形 - - spaCy - Romanian word tokenizer - spaCy - 罗马尼亚语分词器 + + Thin diamond + 薄菱形 - - spaCy - Russian word tokenizer - spaCy - 俄语分词器 + + Pentagon + 五角形 - - spaCy - Sanskrit word tokenizer - spaCy - 梵语分词器 + + Hexagon + 六边形 - - spaCy - Serbian word tokenizer - spaCy - 塞尔维亚语分词器 + + Octagon + 八边形 - - spaCy - Sinhala word tokenizer - spaCy - 僧伽罗语分词器 + + Arc3 + 弧3 - - spaCy - Slovak word tokenizer - spaCy - 斯洛伐克语分词器 + + Arc + - - spaCy - Slovenian word tokenizer - spaCy - 斯洛文尼亚语分词器 + + Angle3 + 角3 - - spaCy - Sorbian (Lower) word tokenizer - spaCy - 索布语(下)分词器 + + Angle + - - spaCy - Sorbian (Upper) word tokenizer - spaCy - 索布语(上)分词器 + + Bar + 横条 - - spaCy - Spanish word tokenizer - spaCy - 西班牙语分词器 + + Solid + 实线 - - spaCy - Swedish word tokenizer - spaCy - 瑞典语分词器 + + Dashed + 虚线 - - spaCy - Tagalog word tokenizer - spaCy - 他加禄语分词器 + + Dash-dotted + 点画线 - - spaCy - Tamil word tokenizer - spaCy - 泰米尔语分词器 + + Dotted + 点线 - - spaCy - Tatar word tokenizer - spaCy - 鞑靼语分词器 + + Curve + 圆弧 - - spaCy - Telugu word tokenizer - spaCy - 泰卢固语分词器 + + Curve A + 圆弧 A - - spaCy - Tigrinya word tokenizer - spaCy - 提格雷尼亚语分词器 + + Curve B + 圆弧 B - - spaCy - Tswana word tokenizer - spaCy - 茨瓦纳语分词器 + + Curve AB + 圆弧 AB - - spaCy - Turkish word tokenizer - spaCy - 土耳其语分词器 + + Curve filled A + 实心圆弧 A - - spaCy - Ukrainian word tokenizer - spaCy - 乌克兰语分词器 + + Curve filled B + 实心圆弧 B - - spaCy - Urdu word tokenizer - spaCy - 乌尔都语分词器 + + Curve filled AB + 实心圆弧 AB - - spaCy - Yoruba word tokenizer - spaCy - 约鲁巴语分词器 + + Bracket A + 方括号 A - - SudachiPy - Japanese word tokenizer (split mode A) - SudachiPy - 日语分词器(切分模式 A) + + Bracket B + 方括号 B - - SudachiPy - Japanese word tokenizer (split mode B) - SudachiPy - 日语分词器(切分模式 B) + + Bracket AB + 方括号 AB - - SudachiPy - Japanese word tokenizer (split mode C) - SudachiPy - 日语分词器(切分模式 C) + + Bar AB + 横条 AB - - Underthesea - Vietnamese word tokenizer - Underthesea - 越南语分词器 + + Bracket curve + 方括号加圆弧 - - Wordless - Chinese character tokenizer - Wordless - 汉语分字器 + + Simple + 朴素 - - Wordless - Japanese kanji tokenizer - Wordless - 日语分字器 + + Fancy + 绚丽 - - NLTK - Legality syllable tokenizer - NLTK - 合法性分音节器 + + Wedge + 楔形 - - NLTK - Sonority sequencing syllable tokenizer - NLTK - 响度顺序分音节器 + + Circular + 环形 - - Pyphen - Afrikaans syllable tokenizer - Pyphen - 南非语分音节器 + + Planar + 平面 - - Pyphen - Albanian syllable tokenizer - Pyphen - 阿尔巴尼亚语分音节器 + + Random + 随机 - - Pyphen - Belarusian syllable tokenizer - Pyphen - 白俄罗斯语分音节器 + + Shell + 同心 - - Pyphen - Bulgarian syllable tokenizer - Pyphen - 保加利亚语分音节器 + + Spring + 弹簧 - - Pyphen - Catalan syllable tokenizer - Pyphen - 加泰罗尼亚语分音节器 + + Spectral + 谱图 - - Pyphen - Croatian syllable tokenizer - Pyphen - 克罗地亚语分音节器 + + Spiral + 螺旋 + + + wl_settings_global - - Pyphen - Czech syllable tokenizer - Pyphen - 捷克语分音节器 + + Afrikaans + 南非语 - - Pyphen - Danish syllable tokenizer - Pyphen - 丹麦语分音节器 + + Albanian + 阿尔巴尼亚语 - - Pyphen - Dutch syllable tokenizer - Pyphen - 荷兰语分音节器 + + Amharic + 阿姆哈拉语 - - Pyphen - English (United Kingdom) syllable tokenizer - Pyphen - 英语(英国)分音节器 + + Arabic + 阿拉伯语 - - Pyphen - English (United States) syllable tokenizer - Pyphen - 英语(美国)分音节器 + + Armenian (Classical) + 亚美尼亚语(古) - - Pyphen - Esperanto syllable tokenizer - Pyphen - 世界语分音节器 + + Armenian (Eastern) + 亚美尼亚语(东) - - Pyphen - Estonian syllable tokenizer - Pyphen - 爱沙尼亚语分音节器 + + Armenian (Western) + 亚美尼亚语(西) - - Pyphen - French syllable tokenizer - Pyphen - 法语分音节器 + + Assamese + 阿萨姆语 - - Pyphen - Galician syllable tokenizer - Pyphen - 加里西亚语分音节器 + + Asturian + 阿斯图里亚斯语 - - Pyphen - German (Austria) syllable tokenizer - Pyphen - 德语(奥地利)分音节器 + + Azerbaijani + 阿塞拜疆语 - - Pyphen - German (Germany) syllable tokenizer - Pyphen - 德语(德国)分音节器 + + Basque + 巴斯克语 - - Pyphen - German (Switzerland) syllable tokenizer - Pyphen - 德语(瑞士)分音节器 + + Belarusian + 白俄罗斯语 - - Pyphen - Greek (Modern) syllable tokenizer - Pyphen - 希腊语(现代)分音节器 + + Bengali + 孟加拉语 - - Pyphen - Hungarian syllable tokenizer - Pyphen - 匈牙利语分音节器 + + Bulgarian + 保加利亚语 - - Pyphen - Icelandic syllable tokenizer - Pyphen - 冰岛语分音节器 + + Burmese + 缅甸语 - - Pyphen - Indonesian syllable tokenizer - Pyphen - 印度尼西亚语分音节器 + + Buryat (Russia) + 布里亚特语(俄罗斯) - - Pyphen - Italian syllable tokenizer - Pyphen - 意大利语分音节器 + + Catalan + 加泰罗尼亚语 - - Pyphen - Lithuanian syllable tokenizer - Pyphen - 立陶宛语分音节器 + + Chinese (Classical) + 汉语(文言) - - Pyphen - Latvian syllable tokenizer - Pyphen - 拉脱维亚语分音节器 + + Chinese (Simplified) + 汉语(简体) - - Pyphen - Mongolian syllable tokenizer - Pyphen - 蒙古语分音节器 + + Chinese (Traditional) + 汉语(繁体) - - Pyphen - Norwegian Bokmål syllable tokenizer - Pyphen - 书面挪威语分音节器 + + Church Slavonic (Old) + 教会斯拉夫语(古) - - Pyphen - Norwegian Nynorsk syllable tokenizer - Pyphen - 新挪威语分音节器 + + Coptic + 科普特语 - - Pyphen - Polish syllable tokenizer - Pyphen - 波兰语分音节器 + + Croatian + 克罗地亚语 - - Pyphen - Portuguese (Brazil) syllable tokenizer - Pyphen - 葡萄牙语(巴西)分音节器 + + Czech + 捷克语 - - Pyphen - Portuguese (Portugal) syllable tokenizer - Pyphen - 葡萄牙语(葡萄牙)分音节器 + + Danish + 丹麦语 - - Pyphen - Romanian syllable tokenizer - Pyphen - 罗马尼亚语分音节器 + + Dutch + 荷兰语 - - Pyphen - Russian syllable tokenizer - Pyphen - 俄语分音节器 + + English (Middle) + 英语(中古) - - Pyphen - Serbian (Cyrillic) syllable tokenizer - Pyphen - 塞尔维亚语(西里尔)分音节器 + + English (Old) + 英语(古) - - Pyphen - Serbian (Latin) syllable tokenizer - Pyphen - 塞尔维亚语(拉丁)分音节器 + + English (United Kingdom) + 英语(英国) - - Pyphen - Slovak syllable tokenizer - Pyphen - 斯洛伐克语分音节器 + + English (United States) + 英语(美国) - - Pyphen - Slovenian syllable tokenizer - Pyphen - 斯洛文尼亚语分音节器 + + Erzya + 埃尔齐亚语 - - Pyphen - Spanish syllable tokenizer - Pyphen - 西班牙语分音节器 + + Esperanto + 世界语 - - Pyphen - Swedish syllable tokenizer - Pyphen - 瑞典语分音节器 + + Estonian + 爱沙尼亚语 - - Pyphen - Telugu syllable tokenizer - Pyphen - 泰卢固语分音节器 + + Faroese + 法罗语 - - Pyphen - Thai syllable tokenizer - Pyphen - 泰语分音节器 + + Finnish + 芬兰语 - - Pyphen - Ukrainian syllable tokenizer - Pyphen - 乌克兰语分音节器 + + French + 法语 - - Pyphen - Zulu syllable tokenizer - Pyphen - 祖鲁语分音节器 + + French (Old) + 法语(古) - - PyThaiNLP - Thai syllable tokenizer - PyThaiNLP - 泰语分音节器 + + Galician + 加里西亚语 - - botok - Tibetan part-of-speech tagger - botok - 藏语词性标注器 + + Georgian + 格鲁吉亚语 - - jieba - Chinese part-of-speech tagger - jieba - 汉语词性标注器 + + German (Austria) + 德语(奥地利) - - NLTK - English perceptron part-of-speech tagger - NLTK - 英语感知机词性标注器 + + German (Germany) + 德语(德国) - - NLTK - Russian perceptron part-of-speech tagger - NLTK - 俄语感知机词性标注器 + + German (Switzerland) + 德语(瑞士) - - pymorphy3 - Morphological analyzer - pymorphy3 - 形态分析器 + + Gothic + 哥特语 - - PyThaiNLP - Perceptron part-of-speech tagger (ORCHID) - PyThaiNLP - 感知机词性标注器(ORCHID) + + Greek (Ancient) + 希腊语(古) - - PyThaiNLP - Perceptron part-of-speech tagger (PUD) - PyThaiNLP - 感知机词性标注器(PUD) + + Greek (Modern) + 希腊语(现代) - - spaCy - Catalan part-of-speech tagger - spaCy - 加泰罗尼亚语词性标注器 + + Gujarati + 古吉拉特语 - - spaCy - Chinese part-of-speech tagger - spaCy - 汉语词性标注器 + + Hebrew (Ancient) + 希伯来语(古) - - spaCy - Croatian part-of-speech tagger - spaCy - 克罗地亚语词性标注器 + + Hebrew (Modern) + 希伯来语(现代) - - spaCy - Danish part-of-speech tagger - spaCy - 丹麦语词性标注器 + + Hindi + 印地语 - - spaCy - Dutch part-of-speech tagger - spaCy - 荷兰语词性标注器 + + Hungarian + 匈牙利语 - - spaCy - English part-of-speech tagger - spaCy - 英语词性标注器 + + Icelandic + 冰岛语 - - spaCy - Finnish part-of-speech tagger - spaCy - 芬兰语词性标注器 + + Indonesian + 印度尼西亚语 - - spaCy - French part-of-speech tagger - spaCy - 法语词性标注器 + + Irish + 爱尔兰语 - - spaCy - German part-of-speech tagger - spaCy - 德语词性标注器 + + Italian + 意大利语 - - spaCy - Greek (Modern) part-of-speech tagger - spaCy - 希腊语(现代)词性标注器 + + Japanese + 日语 - - spaCy - Italian part-of-speech tagger - spaCy - 意大利语词性标注器 + + Kannada + 卡纳达语 - - spaCy - Japanese part-of-speech tagger - spaCy - 日语词性标注器 + + Kazakh + 哈萨克语 - - spaCy - Lithuanian part-of-speech tagger - spaCy - 立陶宛语词性标注器 + + Khmer + 柬埔寨语 - - spaCy - Macedonian part-of-speech tagger - spaCy - 马其顿语词性标注器 + + Korean + 韩语 - - spaCy - Norwegian Bokmål part-of-speech tagger - spaCy - 书面挪威语词性标注器 + + Kurdish (Kurmanji) + 库尔德语(库尔曼吉语) - - spaCy - Polish part-of-speech tagger - spaCy - 波兰语词性标注器 + + Kyrgyz + 吉尔吉斯语 - - spaCy - Portuguese part-of-speech tagger - spaCy - 葡萄牙语词性标注器 + + Lao + 老挝语 - - spaCy - Romanian part-of-speech tagger - spaCy - 罗马尼亚语词性标注器 + + Latin + 拉丁语 - - spaCy - Russian part-of-speech tagger - spaCy - 俄语词性标注器 + + Latvian + 拉脱维亚语 - - spaCy - Spanish part-of-speech tagger - spaCy - 西班牙语词性标注器 + + Ligurian + 利古里亚语 - - spaCy - Swedish part-of-speech tagger - spaCy - 瑞典语词性标注器 + + Lithuanian + 立陶宛语 - - spaCy - Ukrainian part-of-speech tagger - spaCy - 乌克兰语词性标注器 + + Luganda + 卢干达语 - - SudachiPy - Japanese part-of-speech tagger - SudachiPy - 日语词性标注器 + + Luxembourgish + 卢森堡语 - - Underthesea - Vietnamese part-of-speech tagger - Underthesea - 越南语词性标注器 + + Macedonian + 马其顿语 - - botok - Tibetan lemmatizer - botok - 藏语词形还原器 + + Malay + 马来语 - - NLTK - WordNet lemmatizer - NLTK - WordNet 词形还原器 + + Malayalam + 马拉雅拉姆语 - - simplemma - Albanian lemmatizer - simplemma - 阿尔巴尼亚语词形还原器 + + Maltese + 马耳他语 - - simplemma - Armenian lemmatizer - simplemma - Armenian 词形还原器 + + Manx + 马恩语 - - simplemma - Asturian lemmatizer - simplemma - 阿斯图里亚斯语词形还原器 + + Marathi + 马拉地语 - - simplemma - Bulgarian lemmatizer - simplemma - 保加利亚语词形还原器 + + Meitei (Meitei script) + 曼尼普尔语(曼尼普尔文) - - simplemma - Catalan lemmatizer - simplemma - 加泰罗尼亚语词形还原器 + + Mongolian + 蒙古语 - - simplemma - Czech lemmatizer - simplemma - 捷克语词形还原器 + + Nepali + 尼泊尔语 - - simplemma - Danish lemmatizer - simplemma - 丹麦语词形还原器 + + Nigerian Pidgin + 尼日利亚皮钦语 - - simplemma - Dutch lemmatizer - simplemma - 荷兰语词形还原器 + + Norwegian (Bokmål) + 挪威语(书面) - - simplemma - English lemmatizer - simplemma - 英语词形还原器 + + Norwegian (Nynorsk) + 挪威语(新) - - simplemma - English (Middle) lemmatizer - simplemma - 英语(中古)词形还原器 + + Odia + 奥里亚语 - - simplemma - Estonian lemmatizer - simplemma - 爱沙尼亚语词形还原器 + + Persian + 波斯语 - - simplemma - Finnish lemmatizer - simplemma - 芬兰语词形还原器 + + Polish + 波兰语 - - simplemma - French lemmatizer - simplemma - 法语词形还原器 + + Pomak + 波马克语 - - simplemma - Galician lemmatizer - simplemma - 加里西亚语词形还原器 + + Portuguese (Brazil) + 葡萄牙语(巴西) - - simplemma - Georgian lemmatizer - simplemma - 格鲁吉亚语词形还原器 + + Portuguese (Portugal) + 葡萄牙语(葡萄牙) - - simplemma - German lemmatizer - simplemma - 德语词形还原器 + + Punjabi (Gurmukhi script) + 旁遮普语(古木基文) - - simplemma - Greek (Modern) lemmatizer - simplemma - 希腊语(现代)词形还原器 + + Romanian + 罗马尼亚语 - - simplemma - Hindi lemmatizer - simplemma - 印地语词形还原器 + + Russian + 俄语 - - simplemma - Hungarian lemmatizer - simplemma - 匈牙利语词形还原器 + + Russian (Old) + 俄语(古) - - simplemma - Icelandic lemmatizer - simplemma - 冰岛语词形还原器 + + Sámi (Northern) + 萨米语(北) - - simplemma - Indonesian lemmatizer - simplemma - 印度尼西亚语词形还原器 + + Sanskrit + 梵语 - - simplemma - Irish lemmatizer - simplemma - 爱尔兰语词形还原器 + + Scottish Gaelic + 苏格兰盖尔语 - - simplemma - Italian lemmatizer - simplemma - 意大利语词形还原器 + + Serbian (Cyrillic script) + 塞尔维亚语(西里尔文) - - simplemma - Latin lemmatizer - simplemma - 拉丁语词形还原器 + + Serbian (Latin script) + 塞尔维亚语(拉丁文) - - simplemma - Latvian lemmatizer - simplemma - 拉脱维亚语词形还原器 + + Sindhi + 信德语 - - simplemma - Lithuanian lemmatizer - simplemma - 立陶宛语词形还原器 + + Sinhala + 僧伽罗语 - - simplemma - Luxembourgish lemmatizer - simplemma - 卢森堡语词形还原器 + + Slovak + 斯洛伐克语 - - simplemma - Macedonian lemmatizer - simplemma - 马其顿语词形还原器 + + Slovene + 斯洛文尼亚语 - - simplemma - Malay lemmatizer - simplemma - 马来语词形还原器 + + Sorbian (Lower) + 索布语(下) - - simplemma - Manx lemmatizer - simplemma - 马恩语词形还原器 + + Sorbian (Upper) + 索布语(上) - - simplemma - Norwegian Bokmål lemmatizer - simplemma - 书面挪威语词形还原器 + + Spanish + 西班牙语 - - simplemma - Norwegian Nynorsk lemmatizer - simplemma - 新挪威语词形还原器 + + Swahili + 斯瓦西里语 - - simplemma - Persian lemmatizer - simplemma - 波斯语词形还原器 + + Swedish + 瑞典语 - - simplemma - Polish lemmatizer - simplemma - 波兰语词形还原器 + + Tagalog + 他加禄语 - - simplemma - Portuguese lemmatizer - simplemma - 葡萄牙语词形还原器 + + Tajik + 塔吉克语 - - simplemma - Romanian lemmatizer - simplemma - 罗马尼亚语词形还原器 + + Tamil + 泰米尔语 - - simplemma - Russian lemmatizer - simplemma - 俄语词形还原器 + + Tatar + 鞑靼语 - - simplemma - Sámi (Northern) lemmatizer - simplemma - 萨米语(北)词形还原器 + + Telugu + 泰卢固语 - - simplemma - Scottish Gaelic lemmatizer - simplemma - 苏格兰盖尔语词形还原器 + + Tetun (Dili) + 德顿语(帝力) - - simplemma - Serbo-Croatian lemmatizer - simplemma - 塞尔维亚-克罗地亚语词形还原器 + + Thai + 泰语 - - simplemma - Slovak lemmatizer - simplemma - 斯洛伐克语词形还原器 + + Tibetan + 藏语 - - simplemma - Slovenian lemmatizer - simplemma - 斯洛文尼亚语词形还原器 + + Tigrinya + 提格雷尼亚语 - - simplemma - Spanish lemmatizer - simplemma - 西班牙语词形还原器 + + Tswana + 茨瓦纳语 - - simplemma - Swahili lemmatizer - simplemma - 斯瓦西里语词形还原器 + + Turkish + 土耳其语 - - simplemma - Swedish lemmatizer - simplemma - 瑞典语词形还原器 + + Ukrainian + 乌克兰语 - - simplemma - Tagalog lemmatizer - simplemma - 他加禄语词形还原器 + + Urdu + 乌尔都语 - - simplemma - Turkish lemmatizer - simplemma - 土耳其语词形还原器 + + Uyghur + 维吾尔语 - - simplemma - Ukrainian lemmatizer - simplemma - 乌克兰语词形还原器 + + Vietnamese + 越南语 - - simplemma - Welsh lemmatizer - simplemma - 威尔士语词形还原器 + + Welsh + 威尔士语 - - spaCy - Bengali lemmatizer - spaCy - 孟加拉语词形还原器 + + Wolof + 沃洛夫语 - - spaCy - Catalan lemmatizer - spaCy - 加泰罗尼亚语词形还原器 + + Yoruba + 约鲁巴语 - - spaCy - Croatian lemmatizer - spaCy - 克罗地亚语词形还原器 + + Zulu + 祖鲁语 - - spaCy - Czech lemmatizer - spaCy - 捷克语词形还原器 + + Other languages + 其他语种 - - spaCy - Danish lemmatizer - spaCy - 丹麦语词形还原器 + + All languages (UTF-8 without BOM) + 所有语种(UTF-8 无签名) - - spaCy - Dutch lemmatizer - spaCy - 荷兰语词形还原器 + + All languages (UTF-8 with BOM) + 所有语种(UTF-8 带签名) - - spaCy - English lemmatizer - spaCy - 英语词形还原器 + + All languages (UTF-16 with BOM) + 所有语种(UTF-16 带签名) - - spaCy - Finnish lemmatizer - spaCy - 芬兰语词形还原器 + + All languages (UTF-16BE without BOM) + 所有语种(UTF-16 大端无签名) - - spaCy - French lemmatizer - spaCy - 法语词形还原器 + + All languages (UTF-16LE without BOM) + 所有语种(UTF-16 小端无签名) - - spaCy - German lemmatizer - spaCy - 德语词形还原器 - - - - spaCy - Greek (Ancient) lemmatizer - spaCy - 希腊语(古)词形还原器 + + All languages (UTF-32 with BOM) + 所有语种(UTF-32 带签名) - - spaCy - Greek (Modern) lemmatizer - spaCy - 希腊语(现代)词形还原器 + + All languages (UTF-32BE without BOM) + 所有语种(UTF-32 大端无签名) - - spaCy - Hungarian lemmatizer - spaCy - 匈牙利语词形还原器 + + All languages (UTF-32LE without BOM) + 所有语种(UTF-32 小端无签名) - - spaCy - Indonesian lemmatizer - spaCy - 印度尼西亚语词形还原器 + + All languages (UTF-7) + 所有语种(UTF-7) - - spaCy - Irish lemmatizer - spaCy - 爱尔兰语词形还原器 + + Arabic (CP720) + 阿拉伯语(CP720) - - spaCy - Italian lemmatizer - spaCy - 意大利语词形还原器 + + Arabic (CP864) + 阿拉伯语(CP864) - - spaCy - Japanese lemmatizer - spaCy - 日语词形还原器 + + Arabic (ISO-8859-6) + 阿拉伯语(ISO-8859-6) - - spaCy - Lithuanian lemmatizer - spaCy - 立陶宛语词形还原器 + + Arabic (Mac OS Arabic) + 阿拉伯语(Mac OS Arabic) - - spaCy - Luxembourgish lemmatizer - spaCy - 卢森堡语词形还原器 + + Arabic (Windows-1256) + 阿拉伯语(Windows-1256) - - spaCy - Macedonian lemmatizer - spaCy - 马其顿语词形还原器 + + Baltic languages (CP775) + 波罗的海诸语(CP775) - - spaCy - Norwegian Bokmål lemmatizer - spaCy - 书面挪威语词形还原器 + + Baltic languages (ISO-8859-13) + 波罗的海诸语(ISO-8859-13) - - spaCy - Persian lemmatizer - spaCy - 波斯语词形还原器 + + Baltic languages (Windows-1257) + 波罗的海诸语(Windows-1257) - - spaCy - Polish lemmatizer - spaCy - 波兰语词形还原器 + + Celtic languages (ISO-8859-14) + 凯尔特语(ISO-8859-14) - - spaCy - Portuguese lemmatizer - spaCy - 葡萄牙语词形还原器 + + Chinese (GB18030) + 汉语(GB18030) - - spaCy - Romanian lemmatizer - spaCy - 罗马尼亚语词形还原器 + + Chinese (GBK) + 汉语(GBK) - - spaCy - Russian lemmatizer - spaCy - 俄语词形还原器 + + Chinese (Simplified) (GB2312) + 汉语(简体)(GB2312) - - spaCy - Serbian lemmatizer - spaCy - 塞尔维亚语词形还原器 + + Chinese (Simplified) (HZ) + 汉语(简体)(HZ) - - spaCy - Spanish lemmatizer - spaCy - 西班牙语词形还原器 + + Chinese (Traditional) (Big-5) + 汉语(繁体)(Big-5) - - spaCy - Swedish lemmatizer - spaCy - 瑞典语词形还原器 + + Chinese (Traditional) (Big5-HKSCS) + 汉语(繁体)(Big5-HKSCS) - - spaCy - Tagalog lemmatizer - spaCy - 他加禄语词形还原器 + + Chinese (Traditional) (CP950) + 汉语(繁体)(CP950) - - spaCy - Turkish lemmatizer - spaCy - 土耳其语词形还原器 + + Croatian (Mac OS Croatian) + 克罗地亚语(Mac OS Croatian) - - spaCy - Ukrainian lemmatizer - spaCy - 乌克兰语词形还原器 + + Cyrillic (CP855) + 西里尔(CP855) - - spaCy - Urdu lemmatizer - spaCy - 乌尔都语词形还原器 + + Cyrillic (CP866) + 西里尔(CP866) - - SudachiPy - Japanese lemmatizer - SudachiPy - 日语词形还原器 + + Cyrillic (ISO-8859-5) + 西里尔(ISO-8859-5) - - NLTK - Arabic stop word list - NLTK - 阿拉伯语停用词表 + + Cyrillic (Mac OS Cyrillic) + 西里尔(Mac OS Cyrillic) - - NLTK - Azerbaijani stop word list - NLTK - 阿塞拜疆语停用词表 + + Cyrillic (Windows-1251) + 西里尔(Windows-1251) - - NLTK - Basque stop word list - NLTK - 巴斯克语停用词表 + + English (ASCII) + 英语(ASCII) - - NLTK - Bengali stop word list - NLTK - 孟加拉语停用词表 + + English (EBCDIC 037) + 英语(EBCDIC 037) - - NLTK - Catalan stop word list - NLTK - 加泰罗尼亚语停用词表 + + English (CP437) + 英语(CP437) - - NLTK - Chinese (Simplified) stop word list - NLTK - 汉语(简体)停用词表 + + European (HP Roman-8) + 欧洲(HP Roman-8) - - NLTK - Chinese (Traditional) stop word list - NLTK - 汉语(繁体)停用词表 + + European (Central) (CP852) + 欧洲(中部)(CP852) - - NLTK - Danish stop word list - NLTK - 丹麦语停用词表 + + European (Central) (ISO-8859-2) + 欧洲(中部)(ISO-8859-2) - - NLTK - Dutch stop word list - NLTK - 荷兰语停用词表 + + European (Central) (Mac OS Central European) + 欧洲(中部)(Mac OS Central European) - - NLTK - English stop word list - NLTK - 英语停用词表 + + European (Central) (Windows-1250) + 欧洲(中部)(Windows-1250) - - NLTK - Finnish stop word list - NLTK - 芬兰语停用词表 + + European (Northern) (ISO-8859-4) + 欧洲(北部)(ISO-8859-4) - - NLTK - French stop word list - NLTK - 法语停用词表 + + European (Southern) (ISO-8859-3) + 欧洲(南部)(ISO-8859-3) - - NLTK - German stop word list - NLTK - 德语停用词表 + + European (Southeastern) (ISO-8859-16) + 欧洲(东南部)(ISO-8859-16) - - NLTK - Greek (Modern) stop word list - NLTK - 希腊语(现代)停用词表 + + European (Western) (EBCDIC 500) + 欧洲(西部)(EBCDIC 500) - - NLTK - Hungarian stop word list - NLTK - 匈牙利语停用词表 + + European (Western) (CP850) + 欧洲(西部)(CP850) - - NLTK - Indonesian stop word list - NLTK - 印度尼西亚语停用词表 + + European (Western) (CP858) + 欧洲(西部)(CP858) - - NLTK - Italian stop word list - NLTK - 意大利语停用词表 + + European (Western) (CP1140) + 欧洲(西部)(CP1140) - - NLTK - Kazakh stop word list - NLTK - 哈萨克语停用词表 + + European (Western) (ISO-8859-1) + 欧洲(西部)(ISO-8859-1) - - NLTK - Nepali stop word list - NLTK - 尼泊尔语停用词表 + + European (Western) (ISO-8859-15) + 欧洲(西部)(ISO-8859-15) - - NLTK - Norwegian stop word list - NLTK - 挪威语停用词表 + + European (Western) (Mac OS Roman) + 欧洲(西部)(Mac OS Roman) - - NLTK - Portuguese stop word list - NLTK - 葡萄牙语停用词表 + + European (Western) (Windows-1252) + 欧洲(西部)(Windows-1252) - - NLTK - Romanian stop word list - NLTK - 罗马尼亚语停用词表 + + French (CP863) + 法语(CP863) - - NLTK - Russian stop word list - NLTK - 俄语停用词表 + + German (EBCDIC 273) + 德语(EBCDIC 273) - - NLTK - Slovenian stop word list - NLTK - 斯洛文尼亚语停用词表 + + Greek (CP737) + 希腊语(CP737) - - NLTK - Spanish stop word list - NLTK - 西班牙语停用词表 + + Greek (CP869) + 希腊语(CP869) - - NLTK - Swedish stop word list - NLTK - 瑞典语停用词表 + + Greek (CP875) + 希腊语(CP875) - - NLTK - Tajik stop word list - NLTK - 塔吉克语停用词表 + + Greek (ISO-8859-7) + 希腊语(ISO-8859-7) - - NLTK - Turkish stop word list - NLTK - 土耳其语停用词表 + + Greek (Mac OS Greek) + 希腊语(Mac OS Greek) - - PyThaiNLP - Thai stop word list - PyThaiNLP - 泰语停用词表 + + Greek (Windows-1253) + 希腊语(Windows-1253) - - Custom stop word list - 自定义停用词表 + + Hebrew (CP856) + 希伯来语(CP856) - - spaCy - Catalan dependency parser - spaCy - 加泰罗尼亚语依存分析器 + + Hebrew (CP862) + 希伯来语(CP862) - - spaCy - Chinese dependency parser - spaCy - 汉语依存分析器 + + Hebrew (EBCDIC 424) + 希伯来语(EBCDIC 424) - - spaCy - Croatian dependency parser - spaCy - 克罗地亚语依存分析器 + + Hebrew (ISO-8859-8) + 希伯来语(ISO-8859-8) - - spaCy - Danish dependency parser - spaCy - 丹麦语依存分析器 + + Hebrew (Windows-1255) + 希伯来语(Windows-1255) - - spaCy - Dutch dependency parser - spaCy - 荷兰语依存分析器 + + Icelandic (CP861) + 冰岛语(CP861) - - spaCy - English dependency parser - spaCy - 英语依存分析器 + + Icelandic (Mac OS Icelandic) + 冰岛语(Mac OS Icelandic) - - spaCy - Finnish dependency parser - spaCy - 芬兰语依存分析器 + + Japanese (CP932) + 日语(CP932) - - spaCy - French dependency parser - spaCy - 法语依存分析器 + + Japanese (EUC-JP) + 日语(EUC-JP) - - spaCy - German dependency parser - spaCy - 德语依存分析器 + + Japanese (EUC-JIS-2004) + 日语(EUC-JIS-2004) - - spaCy - Greek (Modern) dependency parser - spaCy - 希腊语(现代)依存分析器 + + Japanese (EUC-JISx0213) + 日语(EUC-JISx0213) - - spaCy - Italian dependency parser - spaCy - 意大利语依存分析器 + + Japanese (ISO-2022-JP) + 日语(ISO-2022-JP) - - spaCy - Japanese dependency parser - spaCy - 日语依存分析器 + + Japanese (ISO-2022-JP-1) + 日语(ISO-2022-JP-1) - - spaCy - Lithuanian dependency parser - spaCy - 立陶宛语依存分析器 + + Japanese (ISO-2022-JP-2) + 日语(ISO-2022-JP-2) - - spaCy - Macedonian dependency parser - spaCy - 马其顿语依存分析器 + + Japanese (ISO-2022-JP-2004) + 日语(ISO-2022-JP-2004) - - spaCy - Norwegian Bokmål dependency parser - spaCy - 书面挪威语依存分析器 + + Japanese (ISO-2022-JP-3) + 日语(ISO-2022-JP-3) - - spaCy - Polish dependency parser - spaCy - 波兰语依存分析器 + + Japanese (ISO-2022-JP-EXT) + 日语(ISO-2022-JP-EXT) - - spaCy - Portuguese dependency parser - spaCy - 葡萄牙语依存分析器 + + Japanese (Shift_JIS) + 日语(Shift_JIS) - - spaCy - Romanian dependency parser - spaCy - 罗马尼亚语依存分析器 + + Japanese (Shift_JIS-2004) + 日语(Shift_JIS-2004) - - spaCy - Russian dependency parser - spaCy - 俄语依存分析器 + + Japanese (Shift_JISx0213) + 日语(Shift_JISx0213) - - spaCy - Spanish dependency parser - spaCy - 西班牙语依存分析器 + + Kazakh (KZ-1048) + 哈萨克语(KZ-1048) - - spaCy - Swedish dependency parser - spaCy - 瑞典语依存分析器 + + Kazakh (PTCP154) + 哈萨克语(PTCP154) - - spaCy - Ukrainian dependency parser - spaCy - 乌克兰语依存分析器 + + Korean (EUC-KR) + 韩语(EUC-KR) - - Average logarithmic distance - + + Korean (ISO-2022-KR) + 韩语(ISO-2022-KR) - - Average reduced frequency - + + Korean (JOHAB) + 韩语(JOHAB) - - Average waiting time - + + Korean (UHC) + 韩语(UHC) - - Carroll's Uₘ - + + Nordic languages (CP865) + 北欧诸语(CP865) - - Fisher's exact test - 费希尔精确检验 + + Nordic languages (ISO-8859-10) + 北欧诸语(ISO-8859-10) - - Log-likelihood ratio test - 对数似然比检验 + + Persian/Urdu (Mac OS Farsi) + 波斯语/乌尔都语(Mac OS Farsi) - - Pearson's chi-squared test - 皮尔森卡方检验 + + Portuguese (CP860) + 葡萄牙语(CP860) - - Cubic association ratio - + + Romanian (Mac OS Romanian) + 罗马尼亚语(Mac OS Romanian) - - Dice's coefficient - Dice 系数 + + Russian (KOI8-R) + 俄语(KOI8-R) - - Difference coefficient - + + Tajik (KOI8-T) + 塔吉克语(KOI8-T) - - Jaccard index - 雅卡尔指数 + + Thai (CP874) + 泰语(CP874) - - Log-frequency biased MD - + + Thai (ISO-8859-11) + 泰语(ISO-8859-11) - - Kilgarriff's ratio - + + Thai (TIS-620) + 泰语(TIS-620) - - Log ratio - + + Turkish (CP857) + 土耳其语(CP857) - - Minimum sensitivity - + + Turkish (EBCDIC 1026) + 土耳其语(EBCDIC 1026) - - Mutual dependency - + + Turkish (ISO-8859-9) + 土耳其语(ISO-8859-9) - - Mutual expectation - + + Turkish (Mac OS Turkish) + 土耳其语(Mac OS Turkish) - - Mutual information - 互信息 + + Turkish (Windows-1254) + 土耳其语(Windows-1254) - - Odds ratio - 比值比 + + Ukrainian (CP1125) + 乌克兰语(CP1125) - - Pointwise mutual information - 点互信息 + + Ukrainian (KOI8-U) + 乌克兰语(KOI8-U) - - Poisson collocation measure - - + + Urdu (CP1006) + 乌尔都语(CP1006) + - - Squared phi coefficient - Phi 系数的平方 + + Vietnamese (CP1258) + 越南语(CP1258) - - Microsoft Paint files (*.msp) - Microsoft Paint 文件 (*.msp) + + CSV files (*.csv) + CSV 文件 (*.csv) - - Khmer - 柬埔寨语 + + Excel workbooks (*.xlsx) + Excel 工作簿 (*.xlsx) - - khmer-nltk - Khmer sentence tokenizer - khmer-nltk - 柬埔寨语分句器 + + HTML pages (*.htm; *.html) + HTML 页面 (*.htm; *.html) - - spaCy - Korean dependency parser - spaCy - 韩语依存分析器 + + Lyrics files (*.lrc) + 歌词文件(*.lrc) - - spaCy - Slovenian dependency parser - spaCy - 斯洛文尼亚语依存分析器 + + PDF files (*.pdf) + PDF 文件 (*.pdf) - - spaCy - Korean sentence recognizer - spaCy - 韩语句子识别器 + + PowerPoint presentations (*.pptx) + PowerPoint 演示文稿 (*.pptx) - - khmer-nltk - Khmer word tokenizer - khmer-nltk - 柬埔寨语分词器 + + Text files (*.txt) + 文本文件 (*.txt) - - spaCy - Korean word tokenizer - spaCy - 韩语分词器 + + Translation memory files (*.tmx) + 翻译记忆库文件 (*.tmx) - - spaCy - Malay word tokenizer - spaCy - 马来语分词器 + + Word documents (*.docx) + Word 文档 (*.docx) - - khmer-nltk - Khmer part-of-speech tagger - khmer-nltk - 柬埔寨语词性标注器 + + XML files (*.xml) + XML 文件 (*.xml) - - PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) - PyThaiNLP - 感知机词性标注器(Blackboard) + + All files (*.*) + 所有文件 (*.*) - - spaCy - Korean part-of-speech tagger - spaCy - 韩语词性标注器 + + OpenType fonts (*.otf) + OpenType 字体 (*.otf) - - spaCy - Slovenian part-of-speech tagger - spaCy - 斯洛文尼亚语词性标注器 + + TrueType fonts (*.ttf) + TrueType 字体 (*.ttf) - - spaCy - Korean lemmatizer - spaCy - 韩语词形还原器 + + Blizzard mipmap format (*.blp) + - - spaCy - Slovenian lemmatizer - spaCy - 斯洛文尼亚语词形还原器 + + Windows bitmaps (*.bmp) + Windows 位图 (*.bmp) - - Dostoevsky - Russian sentiment analyzer - Dostoevsky - 俄语情感分析器 + + Window cursor files (*.cur) + Window 光标文件 (*.cur) - - Underthesea - Vietnamese sentiment analyzer - Underthesea - 越南语情感分析器 + + Multi-page PCX files (*.dcx) + 多页 PCX 文件 (*.dcx) - - Armenian (Eastern) - 亚美尼亚语(东) + + DirectDraw surface (*.dds) + - - Armenian (Western) - 亚美尼亚语(西) + + Device-independent bitmaps (*.dib) + 设备无关位图 (*.dib) - - Buryat (Russia) - 布里亚特语(俄罗斯) + + Encapsulated PostScript (*.eps, *.ps) + - - Chinese (Classical) - 汉语(文言) + + Flexible image transport system (*.fit, *.fits) + - - Church Slavonic (Old) - 教会斯拉夫语(古) + + Autodesk animation files (*.flc, *.fli) + Autodesk 动画文件 (*.flc, *.fli) - - Coptic - 科普特语 + + Fox Engine textures (*.ftex) + Fox Engine 纹理 (*.ftex) - - Erzya - 埃尔齐亚语 + + GIMP brush files (*.gbr) + GIMP 笔刷文件 (*.gbr) - - Faroese - 法罗语 + + Graphics interchange format (*.gif) + - - French (Old) - 法语(古) + + Apple icon images (*.icns) + 苹果图标图片 (*.icns) - - Gothic - 哥特语 + + Windows icon files (*.ico) + Windows 图标文件 (*.ico) - - Hebrew (Ancient) - 希伯来语(古) + + IPTC/NAA newsphoto files (*.iim) + IPTC/NAA newsphoto 文件 (*.iim) - - Hebrew (Modern) - 希伯来语(现代) + + IM files (*.im) + IM 文件 (*.im) - - Kurdish (Kurmanji) - 库尔德语(库尔曼吉语) + + Image Tools image files (*) + Image Tools 图像文件 (*) - - Lao - 老挝语 + + JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg) + JPEG 文件 (*.jfif, *.jpe, *.jpeg, *.jpg) - - Maltese - 马耳他语 + + JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) + JPEG 2000 文件 (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) - - Nigerian Pidgin - 尼日利亚皮钦语 + + McIDAS area files (*) + McIDAS area 文件 (*) - - Pomak - 波马克语 + + Microsoft Paint files (*.msp) + Microsoft Paint 文件 (*.msp) - - Russian (Old) - 俄语(古) + + PhotoCD files (*.pcd) + PhotoCD 文件 (*.pcd) - - Sindhi - 信德语 + + Picture exchange (*.pcx) + - - Uyghur - 维吾尔语 + + PIXAR raster files (*.pxr) + PIXAR 栅格文件 (*.pxr) - - Wolof - 沃洛夫语 + + Portable network graphics (*.apng, *.png) + - - LaoNLP - Lao sentence tokenizer - LaoNLP - 老挝语分句器 + + Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm) + - - Stanza - Afrikaans sentence tokenizer - Stanza - 南非语分句器 + + Photoshop PSD files (*.psd) + Photoshop PSD 文件 (*.psd) - - Stanza - Arabic sentence tokenizer - Stanza - 阿拉伯语分句器 + + Sun raster files (*.ras) + Sun 栅格文件 (*.ras) - - Stanza - Armenian (Eastern) sentence tokenizer - Stanza - 亚美尼亚语(东)分句器 + + Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi) + - - Stanza - Armenian (Western) sentence tokenizer - Stanza - 亚美尼亚语(西)分句器 + + SPIDER files (*) + SPIDER 文件 (*) - - Stanza - Basque sentence tokenizer - Stanza - 巴斯克语分句器 + + Truevision TGA (*.icb, *.tga, *.vda, *.vst) + - - Stanza - Belarusian sentence tokenizer - Stanza - 白俄罗斯语分句器 + + TIFF files (*.tif, *.tiff) + TIFF 文件 (*.tif, *.tiff) - - Stanza - Bulgarian sentence tokenizer - Stanza - 保加利亚语分句器 + + WebP files (*.webp) + WebP 文件 (*.webp) - - Stanza - Burmese sentence tokenizer - Stanza - 缅甸语分句器 + + Windows metafiles (*.emf, *.wmf) + Windows 元文件 (*.emf, *.wmf) - - Stanza - Buryat (Russia) sentence tokenizer - Stanza - 布里亚特语(俄罗斯)分句器 + + X bitmaps (*.xbm) + - - Stanza - Catalan sentence tokenizer - Stanza - 加泰罗尼亚语分句器 + + X pixmaps (*.xpm) + - - Stanza - Chinese (Classical) sentence tokenizer - Stanza - 汉语(文言)分句器 + + XV thumbnails (*) + XV 缩略图 (*) - - Stanza - Chinese (Simplified) sentence tokenizer - Stanza - 汉语(简体)分句器 + + botok - Tibetan sentence tokenizer + botok - 藏语分句器 - - Stanza - Chinese (Traditional) sentence tokenizer - Stanza - 汉语(繁体)分句器 + + khmer-nltk - Khmer sentence tokenizer + khmer-nltk - 柬埔寨语分句器 - - Stanza - Church Slavonic (Old) sentence tokenizer - Stanza - 教会斯拉夫语(古)分句器 + + LaoNLP - Lao sentence tokenizer + LaoNLP - 老挝语分句器 - - Stanza - Coptic sentence tokenizer - Stanza - 科普特语分句器 + + NLTK - Czech Punkt sentence tokenizer + NLTK - 捷克语 Punkt 分句器 - - Stanza - Croatian sentence tokenizer - Stanza - 克罗地亚语分句器 + + NLTK - Danish Punkt sentence tokenizer + NLTK - 丹麦语 Punkt 分句器 - - Stanza - Czech sentence tokenizer - Stanza - 捷克语分句器 + + NLTK - Dutch Punkt sentence tokenizer + NLTK - 荷兰语 Punkt 分句器 - - Stanza - Danish sentence tokenizer - Stanza - 丹麦语分句器 + + NLTK - English Punkt sentence tokenizer + NLTK - 英语 Punkt 分句器 - - Stanza - Dutch sentence tokenizer - Stanza - 荷兰语分句器 + + NLTK - Estonian Punkt sentence tokenizer + NLTK - 爱沙尼亚语 Punkt 分句器 - - Stanza - English sentence tokenizer - Stanza - 英语分句器 + + NLTK - Finnish Punkt sentence tokenizer + NLTK - 芬兰语 Punkt 分句器 - - Stanza - Erzya sentence tokenizer - Stanza - 埃尔齐亚语分句器 + + NLTK - French Punkt sentence tokenizer + NLTK - 法语 Punkt 分句器 - - Stanza - Estonian sentence tokenizer - Stanza - 爱沙尼亚语分句器 + + NLTK - German Punkt sentence tokenizer + NLTK - 德语 Punkt 分句器 - - Stanza - Faroese sentence tokenizer - Stanza - 法罗语分句器 + + NLTK - Greek Punkt sentence tokenizer + NLTK - 希腊语 Punkt 分句器 - - Stanza - Finnish sentence tokenizer - Stanza - 芬兰语分句器 + + NLTK - Italian Punkt sentence tokenizer + NLTK - 意大利语 Punkt 分句器 - - Stanza - French sentence tokenizer - Stanza - 法语分句器 + + NLTK - Malayalam Punkt sentence tokenizer + NLTK - 马拉雅拉姆语 Punkt 分句器 - - Stanza - French (Old) sentence tokenizer - Stanza - 法语(古)分句器 + + NLTK - Norwegian (Bokmål) Punkt sentence tokenizer + NLTK - 挪威语(书面) Punkt 分句器 - - Stanza - Galician sentence tokenizer - Stanza - 加里西亚语分句器 + + NLTK - Polish Punkt sentence tokenizer + NLTK - 波兰语 Punkt 分句器 - - Stanza - German sentence tokenizer - Stanza - 德语分句器 + + NLTK - Portuguese Punkt sentence tokenizer + NLTK - 葡萄牙语 Punkt 分句器 - - Stanza - Gothic sentence tokenizer - Stanza - 哥特语分句器 + + NLTK - Russian Punkt sentence tokenizer + NLTK - 俄语 Punkt 分句器 - - Stanza - Greek (Ancient) sentence tokenizer - Stanza - 希腊语(古)分句器 + + NLTK - Slovene Punkt sentence tokenizer + NLTK - 斯洛文尼亚语 Punkt 分句器 - - Stanza - Greek (Modern) sentence tokenizer - Stanza - 希腊语(现代)分句器 + + NLTK - Spanish Punkt sentence tokenizer + NLTK - 西班牙语 Punkt 分句器 - - Stanza - Hebrew (Ancient) sentence tokenizer - Stanza - 希伯来语(古)分句器 + + NLTK - Swedish Punkt sentence tokenizer + NLTK - 瑞典语 Punkt 分句器 - - Stanza - Hebrew (Modern) sentence tokenizer - Stanza - 希伯来语(现代)分句器 + + NLTK - Turkish Punkt sentence tokenizer + NLTK - 土耳其语 Punkt 分句器 - - Stanza - Hindi sentence tokenizer - Stanza - 印地语分句器 + + spaCy - Catalan dependency parser + spaCy - 加泰罗尼亚语依存分析器 - - Stanza - Hungarian sentence tokenizer - Stanza - 匈牙利语分句器 + + spaCy - Chinese dependency parser + spaCy - 汉语依存分析器 - - Stanza - Icelandic sentence tokenizer - Stanza - 冰岛语分句器 + + spaCy - Croatian dependency parser + spaCy - 克罗地亚语依存分析器 - - Stanza - Indonesian sentence tokenizer - Stanza - 印度尼西亚语分句器 + + spaCy - Danish dependency parser + spaCy - 丹麦语依存分析器 - - Stanza - Irish sentence tokenizer - Stanza - 爱尔兰语分句器 + + spaCy - Dutch dependency parser + spaCy - 荷兰语依存分析器 - - Stanza - Italian sentence tokenizer - Stanza - 意大利语分句器 + + spaCy - English dependency parser + spaCy - 英语依存分析器 - - Stanza - Japanese sentence tokenizer - Stanza - 日语分句器 + + spaCy - Finnish dependency parser + spaCy - 芬兰语依存分析器 - - Stanza - Kazakh sentence tokenizer - Stanza - 哈萨克语分句器 + + spaCy - French dependency parser + spaCy - 法语依存分析器 - - Stanza - Korean sentence tokenizer - Stanza - 韩语分句器 + + spaCy - German dependency parser + spaCy - 德语依存分析器 - - Stanza - Kurdish (Kurmanji) sentence tokenizer - Stanza - 库尔德语(库尔曼吉语)分句器 + + spaCy - Greek (Modern) dependency parser + spaCy - 希腊语(现代)依存分析器 - - Stanza - Kyrgyz sentence tokenizer - Stanza - 吉尔吉斯语分句器 + + spaCy - Italian dependency parser + spaCy - 意大利语依存分析器 - - Stanza - Latin sentence tokenizer - Stanza - 拉丁语分句器 + + spaCy - Japanese dependency parser + spaCy - 日语依存分析器 - - Stanza - Latvian sentence tokenizer - Stanza - 拉脱维亚语分句器 + + spaCy - Korean dependency parser + spaCy - 韩语依存分析器 - - Stanza - Ligurian sentence tokenizer - Stanza - 利古里亚语分句器 + + spaCy - Lithuanian dependency parser + spaCy - 立陶宛语依存分析器 - - Stanza - Lithuanian sentence tokenizer - Stanza - 立陶宛语分句器 + + spaCy - Macedonian dependency parser + spaCy - 马其顿语依存分析器 - - Stanza - Maltese sentence tokenizer - Stanza - 马耳他语分句器 + + spaCy - Norwegian (Bokmål) dependency parser + spaCy - 挪威语(书面)依存分析器 - - Stanza - Manx sentence tokenizer - Stanza - 马恩语分句器 + + spaCy - Polish dependency parser + spaCy - 波兰语依存分析器 - - Stanza - Marathi sentence tokenizer - Stanza - 马拉地语分句器 + + spaCy - Portuguese dependency parser + spaCy - 葡萄牙语依存分析器 - - Stanza - Nigerian Pidgin sentence tokenizer - Stanza - 尼日利亚皮钦语分句器 + + spaCy - Romanian dependency parser + spaCy - 罗马尼亚语依存分析器 - - Stanza - Norwegian Bokmål sentence tokenizer - Stanza - 书面挪威语分句器 + + spaCy - Russian dependency parser + spaCy - 俄语依存分析器 - - Stanza - Norwegian Nynorsk sentence tokenizer - Stanza - 新挪威语分句器 + + spaCy - Slovene dependency parser + spaCy - 斯洛文尼亚语依存分析器 - - Stanza - Persian sentence tokenizer - Stanza - 波斯语分句器 + + spaCy - Spanish dependency parser + spaCy - 西班牙语依存分析器 - - Stanza - Polish sentence tokenizer - Stanza - 波兰语分句器 + + spaCy - Swedish dependency parser + spaCy - 瑞典语依存分析器 - - Stanza - Pomak sentence tokenizer - Stanza - 波马克语分句器 + + spaCy - Ukrainian dependency parser + spaCy - 乌克兰语依存分析器 - - Stanza - Portuguese sentence tokenizer - Stanza - 葡萄牙语分句器 + + spaCy - Croatian sentence recognizer + spaCy - 克罗地亚语句子识别器 - - Stanza - Romanian sentence tokenizer - Stanza - 罗马尼亚语分句器 + + spaCy - Dutch sentence recognizer + spaCy - 荷兰语句子识别器 - - Stanza - Russian sentence tokenizer - Stanza - 俄语分句器 + + spaCy - Finnish sentence recognizer + spaCy - 芬兰语句子识别器 - - Stanza - Russian (Old) sentence tokenizer - Stanza - 俄语(古)分句器 + + spaCy - Greek (Modern) sentence recognizer + spaCy - 希腊语(现代)句子识别器 - - Stanza - Sámi (Northern) sentence tokenizer - Stanza - 萨米语(北)分句器 + + spaCy - Italian sentence recognizer + spaCy - 意大利语句子识别器 - - Stanza - Sanskrit sentence tokenizer - Stanza - 梵语分句器 + + spaCy - Korean sentence recognizer + spaCy - 韩语句子识别器 - - Stanza - Scottish Gaelic sentence tokenizer - Stanza - 苏格兰盖尔语分句器 + + spaCy - Lithuanian sentence recognizer + spaCy - 立陶宛语句子识别器 - - Stanza - Serbian (Latin) sentence tokenizer - Stanza - 塞尔维亚语(拉丁)分句器 + + spaCy - Macedonian sentence recognizer + spaCy - 马其顿语句子识别器 - - Stanza - Sindhi sentence tokenizer - Stanza - 信德语分句器 + + spaCy - Norwegian (Bokmål) sentence recognizer + spaCy - 挪威语(书面)句子识别器 - - Stanza - Slovak sentence tokenizer - Stanza - 斯洛伐克语分句器 + + spaCy - Polish sentence recognizer + spaCy - 波兰语句子识别器 - - Stanza - Slovenian sentence tokenizer - Stanza - 斯洛文尼亚语分句器 + + spaCy - Portuguese sentence recognizer + spaCy - 葡萄牙语句子识别器 - - Stanza - Sorbian (Upper) sentence tokenizer - Stanza - 索布语(上)分句器 + + spaCy - Romanian sentence recognizer + spaCy - 罗马尼亚语句子识别器 - - Stanza - Spanish sentence tokenizer - Stanza - 西班牙语分句器 + + spaCy - Russian sentence recognizer + spaCy - 俄语句子识别器 - - Stanza - Swedish sentence tokenizer - Stanza - 瑞典语分句器 + + spaCy - Swedish sentence recognizer + spaCy - 瑞典语句子识别器 - - Stanza - Tamil sentence tokenizer - Stanza - 泰米尔语分句器 + + spaCy - Sentencizer + spaCy - 分句器 - - Stanza - Telugu sentence tokenizer - Stanza - 泰卢固语分句器 + + Stanza - Afrikaans sentence tokenizer + Stanza - 南非语分句器 - - Stanza - Thai sentence tokenizer - Stanza - 泰语分句器 + + Stanza - Arabic sentence tokenizer + Stanza - 阿拉伯语分句器 - - Stanza - Turkish sentence tokenizer - Stanza - 土耳其语分句器 + + Stanza - Armenian (Classical) sentence tokenizer + Stanza - 亚美尼亚语(古)分句器 - - Stanza - Ukrainian sentence tokenizer - Stanza - 乌克兰语分句器 + + Stanza - Armenian (Eastern) sentence tokenizer + Stanza - 亚美尼亚语(东)分句器 - - Stanza - Urdu sentence tokenizer - Stanza - 乌尔都语分句器 + + Stanza - Armenian (Western) sentence tokenizer + Stanza - 亚美尼亚语(西)分句器 - - Stanza - Uyghur sentence tokenizer - Stanza - 维吾尔语分句器 + + Stanza - Basque sentence tokenizer + Stanza - 巴斯克语分句器 - - Stanza - Vietnamese sentence tokenizer - Stanza - 越南语分句器 + + Stanza - Belarusian sentence tokenizer + Stanza - 白俄罗斯语分句器 - - Stanza - Welsh sentence tokenizer - Stanza - 威尔士语分句器 + + Stanza - Bulgarian sentence tokenizer + Stanza - 保加利亚语分句器 - - Stanza - Wolof sentence tokenizer - Stanza - 沃洛夫语分句器 + + Stanza - Burmese sentence tokenizer + Stanza - 缅甸语分句器 - - LaoNLP - Lao word tokenizer - LaoNLP - 老挝语分词器 + + Stanza - Buryat (Russia) sentence tokenizer + Stanza - 布里亚特语(俄罗斯)分句器 - - spaCy - Hebrew (Modern) word tokenizer - spaCy - 希伯来语(现代)分词器 + + Stanza - Catalan sentence tokenizer + Stanza - 加泰罗尼亚语分句器 - - spaCy - Norwegian Bokmål word tokenizer - spaCy - 书面挪威语分词器 + + Stanza - Chinese (Classical) sentence tokenizer + Stanza - 汉语(文言)分句器 - - Stanza - Afrikaans word tokenizer - Stanza - 南非语分词器 + + Stanza - Chinese (Simplified) sentence tokenizer + Stanza - 汉语(简体)分句器 - - Stanza - Arabic word tokenizer - Stanza - 阿拉伯语分词器 + + Stanza - Chinese (Traditional) sentence tokenizer + Stanza - 汉语(繁体)分句器 - - Stanza - Armenian (Eastern) word tokenizer - Stanza - 亚美尼亚语(东)分词器 + + Stanza - Church Slavonic (Old) sentence tokenizer + Stanza - 教会斯拉夫语(古)分句器 - - Stanza - Armenian (Western) word tokenizer - Stanza - 亚美尼亚语(西)分词器 + + Stanza - Coptic sentence tokenizer + Stanza - 科普特语分句器 - - Stanza - Basque word tokenizer - Stanza - 巴斯克语分词器 + + Stanza - Croatian sentence tokenizer + Stanza - 克罗地亚语分句器 - - Stanza - Belarusian word tokenizer - Stanza - 白俄罗斯语分词器 + + Stanza - Czech sentence tokenizer + Stanza - 捷克语分句器 - - Stanza - Bulgarian word tokenizer - Stanza - 保加利亚语分词器 + + Stanza - Danish sentence tokenizer + Stanza - 丹麦语分句器 - - Stanza - Burmese word tokenizer - Stanza - 缅甸语分词器 + + Stanza - Dutch sentence tokenizer + Stanza - 荷兰语分句器 - - Stanza - Buryat (Russia) word tokenizer - Stanza - 布里亚特语(俄罗斯)分词器 + + Stanza - English sentence tokenizer + Stanza - 英语分句器 - - Stanza - Catalan word tokenizer - Stanza - 加泰罗尼亚语分词器 + + Stanza - English (Old) sentence tokenizer + Stanza - 英语(古)分句器 - - Stanza - Chinese (Classical) word tokenizer - Stanza - 汉语(文言)分词器 + + Stanza - Erzya sentence tokenizer + Stanza - 埃尔齐亚语分句器 - - Stanza - Chinese (Simplified) word tokenizer - Stanza - 汉语(简体)分词器 + + Stanza - Estonian sentence tokenizer + Stanza - 爱沙尼亚语分句器 - - Stanza - Chinese (Traditional) word tokenizer - Stanza - 汉语(繁体)分词器 + + Stanza - Faroese sentence tokenizer + Stanza - 法罗语分句器 - - Stanza - Church Slavonic (Old) word tokenizer - Stanza - 教会斯拉夫语(古)分词器 + + Stanza - Finnish sentence tokenizer + Stanza - 芬兰语分句器 - - Stanza - Coptic word tokenizer - Stanza - 科普特语分词器 + + Stanza - French sentence tokenizer + Stanza - 法语分句器 - - Stanza - Croatian word tokenizer - Stanza - 克罗地亚语分词器 + + Stanza - French (Old) sentence tokenizer + Stanza - 法语(古)分句器 - - Stanza - Czech word tokenizer - Stanza - 捷克语分词器 + + Stanza - Galician sentence tokenizer + Stanza - 加里西亚语分句器 - - Stanza - Danish word tokenizer - Stanza - 丹麦语分词器 + + Stanza - German sentence tokenizer + Stanza - 德语分句器 - - Stanza - Dutch word tokenizer - Stanza - 荷兰语分词器 + + Stanza - Gothic sentence tokenizer + Stanza - 哥特语分句器 - - Stanza - English word tokenizer - Stanza - 英语分词器 + + Stanza - Greek (Ancient) sentence tokenizer + Stanza - 希腊语(古)分句器 - - Stanza - Erzya word tokenizer - Stanza - 埃尔齐亚语分词器 + + Stanza - Greek (Modern) sentence tokenizer + Stanza - 希腊语(现代)分句器 - - Stanza - Estonian word tokenizer - Stanza - 爱沙尼亚语分词器 + + Stanza - Hebrew (Ancient) sentence tokenizer + Stanza - 希伯来语(古)分句器 - - Stanza - Faroese word tokenizer - Stanza - 法罗语分词器 + + Stanza - Hebrew (Modern) sentence tokenizer + Stanza - 希伯来语(现代)分句器 - - Stanza - Finnish word tokenizer - Stanza - 芬兰语分词器 + + Stanza - Hindi sentence tokenizer + Stanza - 印地语分句器 - - Stanza - French word tokenizer - Stanza - 法语分词器 + + Stanza - Hungarian sentence tokenizer + Stanza - 匈牙利语分句器 - - Stanza - French (Old) word tokenizer - Stanza - 法语(古)分词器 + + Stanza - Icelandic sentence tokenizer + Stanza - 冰岛语分句器 - - Stanza - Galician word tokenizer - Stanza - 加里西亚语分词器 + + Stanza - Indonesian sentence tokenizer + Stanza - 印度尼西亚语分句器 - - Stanza - German word tokenizer - Stanza - 德语分词器 + + Stanza - Irish sentence tokenizer + Stanza - 爱尔兰语分句器 - - Stanza - Gothic word tokenizer - Stanza - 哥特语分词器 + + Stanza - Italian sentence tokenizer + Stanza - 意大利语分句器 - - Stanza - Greek (Ancient) word tokenizer - Stanza - 希腊语(古)分词器 + + Stanza - Japanese sentence tokenizer + Stanza - 日语分句器 - - Stanza - Greek (Modern) word tokenizer - Stanza - 希腊语(现代)分词器 + + Stanza - Kazakh sentence tokenizer + Stanza - 哈萨克语分句器 - - Stanza - Hebrew (Ancient) word tokenizer - Stanza - 希伯来语(古)分词器 + + Stanza - Korean sentence tokenizer + Stanza - 韩语分句器 - - Stanza - Hebrew (Modern) word tokenizer - Stanza - 希伯来语(现代)分词器 + + Stanza - Kurdish (Kurmanji) sentence tokenizer + Stanza - 库尔德语(库尔曼吉语)分句器 - - Stanza - Hindi word tokenizer - Stanza - 印地语分词器 + + Stanza - Kyrgyz sentence tokenizer + Stanza - 吉尔吉斯语分句器 - - Stanza - Hungarian word tokenizer - Stanza - 匈牙利语分词器 + + Stanza - Latin sentence tokenizer + Stanza - 拉丁语分句器 - - Stanza - Icelandic word tokenizer - Stanza - 冰岛语分词器 + + Stanza - Latvian sentence tokenizer + Stanza - 拉脱维亚语分句器 - - Stanza - Indonesian word tokenizer - Stanza - 印度尼西亚语分词器 + + Stanza - Ligurian sentence tokenizer + Stanza - 利古里亚语分句器 - - Stanza - Irish word tokenizer - Stanza - 爱尔兰语分词器 + + Stanza - Lithuanian sentence tokenizer + Stanza - 立陶宛语分句器 - - Stanza - Italian word tokenizer - Stanza - 意大利语分词器 + + Stanza - Maltese sentence tokenizer + Stanza - 马耳他语分句器 - - Stanza - Japanese word tokenizer - Stanza - 日语分词器 + + Stanza - Manx sentence tokenizer + Stanza - 马恩语分句器 - - Stanza - Kazakh word tokenizer - Stanza - 哈萨克语分词器 + + Stanza - Marathi sentence tokenizer + Stanza - 马拉地语分句器 - - Stanza - Korean word tokenizer - Stanza - 韩语分词器 + + Stanza - Nigerian Pidgin sentence tokenizer + Stanza - 尼日利亚皮钦语分句器 - - Stanza - Kurdish (Kurmanji) word tokenizer - Stanza - 库尔德语(库尔曼吉语)分词器 + + Stanza - Norwegian (Bokmål) sentence tokenizer + Stanza - 挪威语(书面)分句器 - - Stanza - Kyrgyz word tokenizer - Stanza - 吉尔吉斯语分词器 + + Stanza - Norwegian (Nynorsk) sentence tokenizer + Stanza - 挪威语(新)分句器 - - Stanza - Latin word tokenizer - Stanza - 拉丁语分词器 + + Stanza - Persian sentence tokenizer + Stanza - 波斯语分句器 - - Stanza - Latvian word tokenizer - Stanza - 拉脱维亚语分词器 + + Stanza - Polish sentence tokenizer + Stanza - 波兰语分句器 - - Stanza - Ligurian word tokenizer - Stanza - 利古里亚语分词器 + + Stanza - Pomak sentence tokenizer + Stanza - 波马克语分句器 - - Stanza - Lithuanian word tokenizer - Stanza - 立陶宛语分词器 + + Stanza - Portuguese sentence tokenizer + Stanza - 葡萄牙语分句器 - - Stanza - Maltese word tokenizer - Stanza - 马耳他语分词器 + + Stanza - Romanian sentence tokenizer + Stanza - 罗马尼亚语分句器 - - Stanza - Manx word tokenizer - Stanza - 马恩语分词器 + + Stanza - Russian sentence tokenizer + Stanza - 俄语分句器 - - Stanza - Marathi word tokenizer - Stanza - 马拉地语分词器 + + Stanza - Russian (Old) sentence tokenizer + Stanza - 俄语(古)分句器 - - Stanza - Nigerian Pidgin word tokenizer - Stanza - 尼日利亚皮钦语分词器 + + Stanza - Sámi (Northern) sentence tokenizer + Stanza - 萨米语(北)分句器 - - Stanza - Norwegian Bokmål word tokenizer - Stanza - 书面挪威语分词器 + + Stanza - Sanskrit sentence tokenizer + Stanza - 梵语分句器 - - Stanza - Norwegian Nynorsk word tokenizer - Stanza - 新挪威语分词器 + + Stanza - Scottish Gaelic sentence tokenizer + Stanza - 苏格兰盖尔语分句器 - - Stanza - Persian word tokenizer - Stanza - 波斯语分词器 + + Stanza - Serbian (Latin script) sentence tokenizer + Stanza - 塞尔维亚语(拉丁文)分句器 - - Stanza - Polish word tokenizer - Stanza - 波兰语分词器 + + Stanza - Sindhi sentence tokenizer + Stanza - 信德语分句器 - - Stanza - Pomak word tokenizer - Stanza - 波马克语分词器 + + Stanza - Slovak sentence tokenizer + Stanza - 斯洛伐克语分句器 - - Stanza - Portuguese word tokenizer - Stanza - 葡萄牙语分词器 - - - - Stanza - Romanian word tokenizer - Stanza - 罗马尼亚语分词器 + + Stanza - Slovene sentence tokenizer + Stanza - 斯洛文尼亚语分句器 - - Stanza - Russian word tokenizer - Stanza - 俄语分词器 + + Stanza - Sorbian (Upper) sentence tokenizer + Stanza - 索布语(上)分句器 - - Stanza - Russian (Old) word tokenizer - Stanza - 俄语(古)分词器 + + Stanza - Spanish sentence tokenizer + Stanza - 西班牙语分句器 - - Stanza - Sámi (Northern) word tokenizer - Stanza - 萨米语(北)分词器 + + Stanza - Swedish sentence tokenizer + Stanza - 瑞典语分句器 - - Stanza - Sanskrit word tokenizer - Stanza - 梵语分词器 + + Stanza - Tamil sentence tokenizer + Stanza - 泰米尔语分句器 - - Stanza - Scottish Gaelic word tokenizer - Stanza - 苏格兰盖尔语分词器 + + Stanza - Telugu sentence tokenizer + Stanza - 泰卢固语分句器 - - Stanza - Serbian (Latin) word tokenizer - Stanza - 塞尔维亚语(拉丁)分词器 + + Stanza - Thai sentence tokenizer + Stanza - 泰语分句器 - - Stanza - Sindhi word tokenizer - Stanza - 信德语分词器 + + Stanza - Turkish sentence tokenizer + Stanza - 土耳其语分句器 - - Stanza - Slovak word tokenizer - Stanza - 斯洛伐克语分词器 + + Stanza - Ukrainian sentence tokenizer + Stanza - 乌克兰语分句器 - - Stanza - Slovenian word tokenizer - Stanza - 斯洛文尼亚语分词器 + + Stanza - Urdu sentence tokenizer + Stanza - 乌尔都语分句器 - - Stanza - Sorbian (Upper) word tokenizer - Stanza - 索布语(上)分词器 + + Stanza - Uyghur sentence tokenizer + Stanza - 维吾尔语分句器 - - Stanza - Spanish word tokenizer - Stanza - 西班牙语分词器 + + Stanza - Vietnamese sentence tokenizer + Stanza - 越南语分句器 - - Stanza - Swedish word tokenizer - Stanza - 瑞典语分词器 + + Stanza - Welsh sentence tokenizer + Stanza - 威尔士语分句器 - - Stanza - Tamil word tokenizer - Stanza - 泰米尔语分词器 + + Stanza - Wolof sentence tokenizer + Stanza - 沃洛夫语分句器 - - Stanza - Telugu word tokenizer - Stanza - 泰卢固语分词器 + + Underthesea - Vietnamese sentence tokenizer + Underthesea - 越南语分句器 - - Stanza - Thai word tokenizer - Stanza - 泰语分词器 + + botok - Tibetan word tokenizer + botok - 藏语分词器 - - Stanza - Turkish word tokenizer - Stanza - 土耳其语分词器 + + khmer-nltk - Khmer word tokenizer + khmer-nltk - 柬埔寨语分词器 - - Stanza - Ukrainian word tokenizer - Stanza - 乌克兰语分词器 + + LaoNLP - Lao word tokenizer + LaoNLP - 老挝语分词器 - - Stanza - Urdu word tokenizer - Stanza - 乌尔都语分词器 + + NLTK - NIST tokenizer + NLTK - NIST 分词器 - - Stanza - Uyghur word tokenizer - Stanza - 维吾尔语分词器 + + NLTK - NLTK tokenizer + NLTK - NLTK 分词器 - - Stanza - Vietnamese word tokenizer - Stanza - 越南语分词器 + + NLTK - Penn Treebank tokenizer + NLTK - 宾州树库分词器 - - Stanza - Welsh word tokenizer - Stanza - 威尔士语分词器 + + NLTK - Regular-expression tokenizer + NLTK - 正则表达式分词器 - - Stanza - Wolof word tokenizer - Stanza - 沃洛夫语分词器 + + NLTK - Tok-tok tokenizer + NLTK - Tok-tok 分词器 - - LaoNLP - SeqLabeling - + + NLTK - Twitter tokenizer + NLTK - 推特分词器 - - LaoNLP - Yunshan Cup 2020 - + + pkuseg - Chinese word tokenizer + pkuseg - 汉语分词器 - - Stanza - Afrikaans part-of-speech tagger - Stanza - 南非语词性标注器 + + PyThaiNLP - Longest matching + PyThaiNLP - 最长匹配 - - Stanza - Arabic part-of-speech tagger - Stanza - 阿拉伯语词性标注器 + + PyThaiNLP - Maximum matching + PyThaiNLP - 最大匹配 - - Stanza - Armenian (Eastern) part-of-speech tagger - Stanza - 亚美尼亚语(东)词性标注器 + + PyThaiNLP - Maximum matching + TCC + PyThaiNLP - 最大匹配 + TCC - - Stanza - Armenian (Western) part-of-speech tagger - Stanza - 亚美尼亚语(西)词性标注器 + + Sacremoses - Moses tokenizer + Sacremoses - Moses 分词器 - - Stanza - Basque part-of-speech tagger - Stanza - 巴斯克语词性标注器 + + spaCy - Afrikaans word tokenizer + spaCy - 南非语分词器 - - Stanza - Belarusian part-of-speech tagger - Stanza - 白俄罗斯语词性标注器 + + spaCy - Albanian word tokenizer + spaCy - 阿尔巴尼亚语分词器 - - Stanza - Bulgarian part-of-speech tagger - Stanza - 保加利亚语词性标注器 + + spaCy - Amharic word tokenizer + spaCy - 阿姆哈拉语分词器 - - Stanza - Buryat (Russia) part-of-speech tagger - Stanza - 布里亚特语(俄罗斯)词性标注器 + + spaCy - Arabic word tokenizer + spaCy - 阿拉伯语分词器 - - Stanza - Catalan part-of-speech tagger - Stanza - 加泰罗尼亚语词性标注器 + + spaCy - Armenian word tokenizer + spaCy - 亚美尼亚语分词器 - - Stanza - Chinese (Classical) part-of-speech tagger - Stanza - 汉语(文言)词性标注器 + + spaCy - Azerbaijani word tokenizer + spaCy - 阿塞拜疆语分词器 - - Stanza - Chinese (Simplified) part-of-speech tagger - Stanza - 汉语(简体)词性标注器 + + spaCy - Basque word tokenizer + spaCy - 巴斯克语分词器 - - Stanza - Chinese (Traditional) part-of-speech tagger - Stanza - 汉语(繁体)词性标注器 + + spaCy - Bengali word tokenizer + spaCy - 孟加拉语分词器 - - Stanza - Church Slavonic (Old) part-of-speech tagger - Stanza - 教会斯拉夫语(古)词性标注器 + + spaCy - Bulgarian word tokenizer + spaCy - 保加利亚语分词器 - - Stanza - Coptic part-of-speech tagger - Stanza - 科普特语词性标注器 + + spaCy - Catalan word tokenizer + spaCy - 加泰罗尼亚语分词器 - - Stanza - Croatian part-of-speech tagger - Stanza - 克罗地亚语词性标注器 + + spaCy - Chinese word tokenizer + spaCy - 汉语分词器 - - Stanza - Czech part-of-speech tagger - Stanza - 捷克语词性标注器 + + spaCy - Croatian word tokenizer + spaCy - 克罗地亚语分词器 - - Stanza - Danish part-of-speech tagger - Stanza - 丹麦语词性标注器 + + spaCy - Czech word tokenizer + spaCy - 捷克语分词器 - - Stanza - Dutch part-of-speech tagger - Stanza - 荷兰语词性标注器 + + spaCy - Danish word tokenizer + spaCy - 丹麦语分词器 - - Stanza - English part-of-speech tagger - Stanza - 英语词性标注器 + + spaCy - Dutch word tokenizer + spaCy - 荷兰语分词器 - - Stanza - Erzya part-of-speech tagger - Stanza - 埃尔齐亚语词性标注器 + + spaCy - English word tokenizer + spaCy - 英语分词器 - - Stanza - Estonian part-of-speech tagger - Stanza - 爱沙尼亚语词性标注器 + + spaCy - Estonian word tokenizer + spaCy - 爱沙尼亚语分词器 - - Stanza - Faroese part-of-speech tagger - Stanza - 法罗语词性标注器 + + spaCy - Faroese word tokenizer + spaCy - 法罗语分词器 - - Stanza - Finnish part-of-speech tagger - Stanza - 芬兰语词性标注器 + + spaCy - Finnish word tokenizer + spaCy - 芬兰语分词器 - - Stanza - French part-of-speech tagger - Stanza - 法语词性标注器 + + spaCy - French word tokenizer + spaCy - 法语分词器 - - Stanza - French (Old) part-of-speech tagger - Stanza - 法语(古)词性标注器 + + spaCy - German word tokenizer + spaCy - 德语分词器 - - Stanza - Galician part-of-speech tagger - Stanza - 加里西亚语词性标注器 + + spaCy - Greek (Ancient) word tokenizer + spaCy - 希腊语(古)分词器 - - Stanza - German part-of-speech tagger - Stanza - 德语词性标注器 + + spaCy - Greek (Modern) word tokenizer + spaCy - 希腊语(现代)分词器 - - Stanza - Gothic part-of-speech tagger - Stanza - 哥特语词性标注器 + + spaCy - Gujarati word tokenizer + spaCy - 古吉拉特语分词器 - - Stanza - Greek (Ancient) part-of-speech tagger - Stanza - 希腊语(古)词性标注器 + + spaCy - Hebrew (Modern) word tokenizer + spaCy - 希伯来语(现代)分词器 - - Stanza - Greek (Modern) part-of-speech tagger - Stanza - 希腊语(现代)词性标注器 + + spaCy - Hindi word tokenizer + spaCy - 印地语分词器 - - Stanza - Hebrew (Ancient) part-of-speech tagger - Stanza - 希伯来语(古)词性标注器 + + spaCy - Hungarian word tokenizer + spaCy - 匈牙利语分词器 - - Stanza - Hebrew (Modern) part-of-speech tagger - Stanza - 希伯来语(现代)词性标注器 + + spaCy - Icelandic word tokenizer + spaCy - 冰岛语分词器 - - Stanza - Hindi part-of-speech tagger - Stanza - 印地语词性标注器 + + spaCy - Indonesian word tokenizer + spaCy - 印度尼西亚语分词器 - - Stanza - Hungarian part-of-speech tagger - Stanza - 匈牙利语词性标注器 + + spaCy - Irish word tokenizer + spaCy - 爱尔兰语分词器 - - Stanza - Icelandic part-of-speech tagger - Stanza - 冰岛语词性标注器 + + spaCy - Italian word tokenizer + spaCy - 意大利语分词器 - - Stanza - Indonesian part-of-speech tagger - Stanza - 印度尼西亚语词性标注器 + + spaCy - Japanese word tokenizer + spaCy - 日语分词器 - - Stanza - Irish part-of-speech tagger - Stanza - 爱尔兰语词性标注器 + + spaCy - Kannada word tokenizer + spaCy - 卡纳达语分词器 - - Stanza - Italian part-of-speech tagger - Stanza - 意大利语词性标注器 + + spaCy - Korean word tokenizer + spaCy - 韩语分词器 - - Stanza - Japanese part-of-speech tagger - Stanza - 日语词性标注器 + + spaCy - Kyrgyz word tokenizer + spaCy - 吉尔吉斯语分词器 - - Stanza - Kazakh part-of-speech tagger - Stanza - 哈萨克语词性标注器 + + spaCy - Latin word tokenizer + spaCy - 拉丁语分词器 - - Stanza - Korean part-of-speech tagger - Stanza - 韩语词性标注器 + + spaCy - Latvian word tokenizer + spaCy - 拉脱维亚语分词器 - - Stanza - Kurdish (Kurmanji) part-of-speech tagger - Stanza - 库尔德语(库尔曼吉语)词性标注器 + + spaCy - Ligurian word tokenizer + spaCy - 利古里亚语分词器 - - Stanza - Kyrgyz part-of-speech tagger - Stanza - 吉尔吉斯语词性标注器 + + spaCy - Lithuanian word tokenizer + spaCy - 立陶宛语分词器 - - Stanza - Latin part-of-speech tagger - Stanza - 拉丁语词性标注器 + + spaCy - Luganda word tokenizer + spaCy - 卢干达语分词器 - - Stanza - Latvian part-of-speech tagger - Stanza - 拉脱维亚语词性标注器 + + spaCy - Luxembourgish word tokenizer + spaCy - 卢森堡语分词器 - - Stanza - Ligurian part-of-speech tagger - Stanza - 利古里亚语词性标注器 + + spaCy - Macedonian word tokenizer + spaCy - 马其顿语分词器 - - Stanza - Lithuanian part-of-speech tagger - Stanza - 立陶宛语词性标注器 + + spaCy - Malay word tokenizer + spaCy - 马来语分词器 - - Stanza - Maltese part-of-speech tagger - Stanza - 马耳他语词性标注器 + + spaCy - Malayalam word tokenizer + spaCy - 马拉雅拉姆语分词器 - - Stanza - Manx part-of-speech tagger - Stanza - 马恩语词性标注器 + + spaCy - Marathi word tokenizer + spaCy - 马拉地语分词器 - - Stanza - Marathi part-of-speech tagger - Stanza - 马拉地语词性标注器 + + spaCy - Nepali word tokenizer + spaCy - 尼泊尔语分词器 - - Stanza - Nigerian Pidgin part-of-speech tagger - Stanza - 尼日利亚皮钦语词性标注器 + + spaCy - Norwegian (Bokmål) word tokenizer + spaCy - 挪威语(书面)分词器 - - Stanza - Norwegian Bokmål part-of-speech tagger - Stanza - 书面挪威语词性标注器 + + spaCy - Norwegian (Nynorsk) word tokenizer + spaCy - 挪威语(新)分词器 - - Stanza - Norwegian Nynorsk part-of-speech tagger - Stanza - 新挪威语词性标注器 + + spaCy - Persian word tokenizer + spaCy - 波斯语分词器 - - Stanza - Persian part-of-speech tagger - Stanza - 波斯语词性标注器 + + spaCy - Polish word tokenizer + spaCy - 波兰语分词器 - - Stanza - Polish part-of-speech tagger - Stanza - 波兰语词性标注器 + + spaCy - Portuguese word tokenizer + spaCy - 葡萄牙语分词器 - - Stanza - Pomak part-of-speech tagger - Stanza - 波马克语词性标注器 + + spaCy - Romanian word tokenizer + spaCy - 罗马尼亚语分词器 - - Stanza - Portuguese part-of-speech tagger - Stanza - 葡萄牙语词性标注器 + + spaCy - Russian word tokenizer + spaCy - 俄语分词器 - - Stanza - Romanian part-of-speech tagger - Stanza - 罗马尼亚语词性标注器 + + spaCy - Sanskrit word tokenizer + spaCy - 梵语分词器 - - Stanza - Russian part-of-speech tagger - Stanza - 俄语词性标注器 + + spaCy - Serbian (Cyrillic script) word tokenizer + spaCy - 塞尔维亚语(西里尔文)分词器 - - Stanza - Russian (Old) part-of-speech tagger - Stanza - 俄语(古)词性标注器 + + spaCy - Sinhala word tokenizer + spaCy - 僧伽罗语分词器 - - Stanza - Sámi (Northern) part-of-speech tagger - Stanza - 萨米语(北)词性标注器 + + spaCy - Slovak word tokenizer + spaCy - 斯洛伐克语分词器 - - Stanza - Sanskrit part-of-speech tagger - Stanza - 梵语词性标注器 + + spaCy - Slovene word tokenizer + spaCy - 斯洛文尼亚语分词器 - - Stanza - Scottish Gaelic part-of-speech tagger - Stanza - 苏格兰盖尔语词性标注器 + + spaCy - Sorbian (Lower) word tokenizer + spaCy - 索布语(下)分词器 - - Stanza - Serbian (Latin) part-of-speech tagger - Stanza - 塞尔维亚语(拉丁)词性标注器 + + spaCy - Sorbian (Upper) word tokenizer + spaCy - 索布语(上)分词器 - - Stanza - Slovak part-of-speech tagger - Stanza - 斯洛伐克语词性标注器 + + spaCy - Spanish word tokenizer + spaCy - 西班牙语分词器 - - Stanza - Slovenian part-of-speech tagger - Stanza - 斯洛文尼亚语词性标注器 - + + spaCy - Swedish word tokenizer + spaCy - 瑞典语分词器 + - - Stanza - Sorbian (Upper) part-of-speech tagger - Stanza - 索布语(上)词性标注器 + + spaCy - Tagalog word tokenizer + spaCy - 他加禄语分词器 - - Stanza - Spanish part-of-speech tagger - Stanza - 西班牙语词性标注器 + + spaCy - Tamil word tokenizer + spaCy - 泰米尔语分词器 - - Stanza - Swedish part-of-speech tagger - Stanza - 瑞典语词性标注器 + + spaCy - Tatar word tokenizer + spaCy - 鞑靼语分词器 - - Stanza - Tamil part-of-speech tagger - Stanza - 泰米尔语词性标注器 + + spaCy - Telugu word tokenizer + spaCy - 泰卢固语分词器 - - Stanza - Telugu part-of-speech tagger - Stanza - 泰卢固语词性标注器 + + spaCy - Tigrinya word tokenizer + spaCy - 提格雷尼亚语分词器 - - Stanza - Turkish part-of-speech tagger - Stanza - 土耳其语词性标注器 + + spaCy - Tswana word tokenizer + spaCy - 茨瓦纳语分词器 - - Stanza - Ukrainian part-of-speech tagger - Stanza - 乌克兰语词性标注器 + + spaCy - Turkish word tokenizer + spaCy - 土耳其语分词器 - - Stanza - Urdu part-of-speech tagger - Stanza - 乌尔都语词性标注器 + + spaCy - Ukrainian word tokenizer + spaCy - 乌克兰语分词器 - - Stanza - Uyghur part-of-speech tagger - Stanza - 维吾尔语词性标注器 + + spaCy - Urdu word tokenizer + spaCy - 乌尔都语分词器 - - Stanza - Vietnamese part-of-speech tagger - Stanza - 越南语词性标注器 + + spaCy - Yoruba word tokenizer + spaCy - 约鲁巴语分词器 - - Stanza - Welsh part-of-speech tagger - Stanza - 威尔士语词性标注器 + + Stanza - Afrikaans word tokenizer + Stanza - 南非语分词器 - - Stanza - Wolof part-of-speech tagger - Stanza - 沃洛夫语词性标注器 + + Stanza - Arabic word tokenizer + Stanza - 阿拉伯语分词器 - - Stanza - Afrikaans lemmatizer - Stanza - 南非语词形还原器 + + Stanza - Armenian (Classical) word tokenizer + Stanza - 亚美尼亚语(古)分词器 - - Stanza - Arabic lemmatizer - Stanza - 阿拉伯语词形还原器 + + Stanza - Armenian (Eastern) word tokenizer + Stanza - 亚美尼亚语(东)分词器 - - Stanza - Armenian (Eastern) lemmatizer - Stanza - 亚美尼亚语(东)词形还原器 + + Stanza - Armenian (Western) word tokenizer + Stanza - 亚美尼亚语(西)分词器 - - Stanza - Armenian (Western) lemmatizer - Stanza - 亚美尼亚语(西)词形还原器 + + Stanza - Basque word tokenizer + Stanza - 巴斯克语分词器 - - Stanza - Basque lemmatizer - Stanza - 巴斯克语词形还原器 + + Stanza - Belarusian word tokenizer + Stanza - 白俄罗斯语分词器 - - Stanza - Belarusian lemmatizer - Stanza - 白俄罗斯语词形还原器 + + Stanza - Bulgarian word tokenizer + Stanza - 保加利亚语分词器 - - Stanza - Bulgarian lemmatizer - Stanza - 保加利亚语词形还原器 + + Stanza - Burmese word tokenizer + Stanza - 缅甸语分词器 - - Stanza - Buryat (Russia) lemmatizer - Stanza - 布里亚特语(俄罗斯)词形还原器 + + Stanza - Buryat (Russia) word tokenizer + Stanza - 布里亚特语(俄罗斯)分词器 - - Stanza - Catalan lemmatizer - Stanza - 加泰罗尼亚语词形还原器 + + Stanza - Catalan word tokenizer + Stanza - 加泰罗尼亚语分词器 - - Stanza - Chinese (Classical) lemmatizer - Stanza - 汉语(文言)词形还原器 + + Stanza - Chinese (Classical) word tokenizer + Stanza - 汉语(文言)分词器 - - Stanza - Chinese (Simplified) lemmatizer - Stanza - 汉语(简体)词形还原器 + + Stanza - Chinese (Simplified) word tokenizer + Stanza - 汉语(简体)分词器 - - Stanza - Chinese (Traditional) lemmatizer - Stanza - 汉语(繁体)词形还原器 + + Stanza - Chinese (Traditional) word tokenizer + Stanza - 汉语(繁体)分词器 - - Stanza - Church Slavonic (Old) lemmatizer - Stanza - 教会斯拉夫语(古)词形还原器 + + Stanza - Church Slavonic (Old) word tokenizer + Stanza - 教会斯拉夫语(古)分词器 - - Stanza - Coptic lemmatizer - Stanza - 科普特语词形还原器 + + Stanza - Coptic word tokenizer + Stanza - 科普特语分词器 - - Stanza - Croatian lemmatizer - Stanza - 克罗地亚语词形还原器 + + Stanza - Croatian word tokenizer + Stanza - 克罗地亚语分词器 - - Stanza - Czech lemmatizer - Stanza - 捷克语词形还原器 + + Stanza - Czech word tokenizer + Stanza - 捷克语分词器 - - Stanza - Danish lemmatizer - Stanza - 丹麦语词形还原器 + + Stanza - Danish word tokenizer + Stanza - 丹麦语分词器 - - Stanza - Dutch lemmatizer - Stanza - 荷兰语词形还原器 + + Stanza - Dutch word tokenizer + Stanza - 荷兰语分词器 - - Stanza - English lemmatizer - Stanza - 英语词形还原器 + + Stanza - English word tokenizer + Stanza - 英语分词器 - - Stanza - Erzya lemmatizer - Stanza - 埃尔齐亚语词形还原器 + + Stanza - English (Old) word tokenizer + Stanza - 英语(古)分词器 - - Stanza - Estonian lemmatizer - Stanza - 爱沙尼亚语词形还原器 + + Stanza - Erzya word tokenizer + Stanza - 埃尔齐亚语分词器 - - Stanza - Finnish lemmatizer - Stanza - 芬兰语词形还原器 + + Stanza - Estonian word tokenizer + Stanza - 爱沙尼亚语分词器 - - Stanza - French lemmatizer - Stanza - 法语词形还原器 + + Stanza - Faroese word tokenizer + Stanza - 法罗语分词器 - - Stanza - French (Old) lemmatizer - Stanza - 法语(古)词形还原器 + + Stanza - Finnish word tokenizer + Stanza - 芬兰语分词器 - - Stanza - Galician lemmatizer - Stanza - 加里西亚语词形还原器 + + Stanza - French word tokenizer + Stanza - 法语分词器 - - Stanza - German lemmatizer - Stanza - 德语词形还原器 + + Stanza - French (Old) word tokenizer + Stanza - 法语(古)分词器 - - Stanza - Gothic lemmatizer - Stanza - 哥特语词形还原器 + + Stanza - Galician word tokenizer + Stanza - 加里西亚语分词器 - - Stanza - Greek (Ancient) lemmatizer - Stanza - 希腊语(古)词形还原器 + + Stanza - German word tokenizer + Stanza - 德语分词器 - - Stanza - Greek (Modern) lemmatizer - Stanza - 希腊语(现代)词形还原器 + + Stanza - Gothic word tokenizer + Stanza - 哥特语分词器 - - Stanza - Hebrew (Ancient) lemmatizer - Stanza - 希伯来语(古)词形还原器 + + Stanza - Greek (Ancient) word tokenizer + Stanza - 希腊语(古)分词器 - - Stanza - Hebrew (Modern) lemmatizer - Stanza - 希伯来语(现代)词形还原器 + + Stanza - Greek (Modern) word tokenizer + Stanza - 希腊语(现代)分词器 - - Stanza - Hindi lemmatizer - Stanza - 印地语词形还原器 + + Stanza - Hebrew (Ancient) word tokenizer + Stanza - 希伯来语(古)分词器 - - Stanza - Hungarian lemmatizer - Stanza - 匈牙利语词形还原器 + + Stanza - Hebrew (Modern) word tokenizer + Stanza - 希伯来语(现代)分词器 - - Stanza - Icelandic lemmatizer - Stanza - 冰岛语词形还原器 + + Stanza - Hindi word tokenizer + Stanza - 印地语分词器 - - Stanza - Indonesian lemmatizer - Stanza - 印度尼西亚语词形还原器 + + Stanza - Hungarian word tokenizer + Stanza - 匈牙利语分词器 - - Stanza - Irish lemmatizer - Stanza - 爱尔兰语词形还原器 + + Stanza - Icelandic word tokenizer + Stanza - 冰岛语分词器 - - Stanza - Italian lemmatizer - Stanza - 意大利语词形还原器 + + Stanza - Indonesian word tokenizer + Stanza - 印度尼西亚语分词器 - - Stanza - Japanese lemmatizer - Stanza - 日语词形还原器 + + Stanza - Irish word tokenizer + Stanza - 爱尔兰语分词器 - - Stanza - Kazakh lemmatizer - Stanza - 哈萨克语词形还原器 + + Stanza - Italian word tokenizer + Stanza - 意大利语分词器 - - Stanza - Korean lemmatizer - Stanza - 韩语词形还原器 + + Stanza - Japanese word tokenizer + Stanza - 日语分词器 - - Stanza - Kurdish (Kurmanji) lemmatizer - Stanza - 库尔德语(库尔曼吉语)词形还原器 + + Stanza - Kazakh word tokenizer + Stanza - 哈萨克语分词器 - - Stanza - Kyrgyz lemmatizer - Stanza - 吉尔吉斯语词形还原器 + + Stanza - Korean word tokenizer + Stanza - 韩语分词器 - - Stanza - Latin lemmatizer - Stanza - 拉丁语词形还原器 + + Stanza - Kurdish (Kurmanji) word tokenizer + Stanza - 库尔德语(库尔曼吉语)分词器 - - Stanza - Latvian lemmatizer - Stanza - 拉脱维亚语词形还原器 + + Stanza - Kyrgyz word tokenizer + Stanza - 吉尔吉斯语分词器 - - Stanza - Ligurian lemmatizer - Stanza - 利古里亚语词形还原器 + + Stanza - Latin word tokenizer + Stanza - 拉丁语分词器 - - Stanza - Lithuanian lemmatizer - Stanza - 立陶宛语词形还原器 + + Stanza - Latvian word tokenizer + Stanza - 拉脱维亚语分词器 - - Stanza - Manx lemmatizer - Stanza - 马恩语词形还原器 + + Stanza - Ligurian word tokenizer + Stanza - 利古里亚语分词器 - - Stanza - Marathi lemmatizer - Stanza - 马拉地语词形还原器 + + Stanza - Lithuanian word tokenizer + Stanza - 立陶宛语分词器 - - Stanza - Nigerian Pidgin lemmatizer - Stanza - 尼日利亚皮钦语词形还原器 + + Stanza - Maltese word tokenizer + Stanza - 马耳他语分词器 - - Stanza - Norwegian Bokmål lemmatizer - Stanza - 书面挪威语词形还原器 + + Stanza - Manx word tokenizer + Stanza - 马恩语分词器 - - Stanza - Norwegian Nynorsk lemmatizer - Stanza - 新挪威语词形还原器 + + Stanza - Marathi word tokenizer + Stanza - 马拉地语分词器 - - Stanza - Persian lemmatizer - Stanza - 波斯语词形还原器 + + Stanza - Nigerian Pidgin word tokenizer + Stanza - 尼日利亚皮钦语分词器 - - Stanza - Polish lemmatizer - Stanza - 波兰语词形还原器 + + Stanza - Norwegian (Bokmål) word tokenizer + Stanza - 挪威语(书面)分词器 - - Stanza - Pomak lemmatizer - Stanza - 波马克语词形还原器 + + Stanza - Norwegian (Nynorsk) word tokenizer + Stanza - 挪威语(新)分词器 - - Stanza - Portuguese lemmatizer - Stanza - 葡萄牙语词形还原器 + + Stanza - Persian word tokenizer + Stanza - 波斯语分词器 - - Stanza - Romanian lemmatizer - Stanza - 罗马尼亚语词形还原器 + + Stanza - Polish word tokenizer + Stanza - 波兰语分词器 - - Stanza - Russian lemmatizer - Stanza - 俄语词形还原器 + + Stanza - Pomak word tokenizer + Stanza - 波马克语分词器 - - Stanza - Russian (Old) lemmatizer - Stanza - 俄语(古)词形还原器 + + Stanza - Portuguese word tokenizer + Stanza - 葡萄牙语分词器 - - Stanza - Sámi (Northern) lemmatizer - Stanza - 萨米语(北)词形还原器 + + Stanza - Romanian word tokenizer + Stanza - 罗马尼亚语分词器 - - Stanza - Sanskrit lemmatizer - Stanza - 梵语词形还原器 + + Stanza - Russian word tokenizer + Stanza - 俄语分词器 - - Stanza - Scottish Gaelic lemmatizer - Stanza - 苏格兰盖尔语词形还原器 + + Stanza - Russian (Old) word tokenizer + Stanza - 俄语(古)分词器 - - Stanza - Serbian (Latin) lemmatizer - Stanza - 塞尔维亚语(拉丁)词形还原器 + + Stanza - Sámi (Northern) word tokenizer + Stanza - 萨米语(北)分词器 - - Stanza - Slovak lemmatizer - Stanza - 斯洛伐克语词形还原器 + + Stanza - Sanskrit word tokenizer + Stanza - 梵语分词器 - - Stanza - Slovenian lemmatizer - Stanza - 斯洛文尼亚语词形还原器 + + Stanza - Scottish Gaelic word tokenizer + Stanza - 苏格兰盖尔语分词器 - - Stanza - Sorbian (Upper) lemmatizer - Stanza - 索布语(上)词形还原器 + + Stanza - Serbian (Latin script) word tokenizer + Stanza - 塞尔维亚语(拉丁文)分词器 - - Stanza - Spanish lemmatizer - Stanza - 西班牙语词形还原器 + + Stanza - Sindhi word tokenizer + Stanza - 信德语分词器 - - Stanza - Swedish lemmatizer - Stanza - 瑞典语词形还原器 + + Stanza - Slovak word tokenizer + Stanza - 斯洛伐克语分词器 - - Stanza - Tamil lemmatizer - Stanza - 泰米尔语词形还原器 + + Stanza - Slovene word tokenizer + Stanza - 斯洛文尼亚语分词器 - - Stanza - Turkish lemmatizer - Stanza - 土耳其语词形还原器 + + Stanza - Sorbian (Upper) word tokenizer + Stanza - 索布语(上)分词器 - - Stanza - Ukrainian lemmatizer - Stanza - 乌克兰语词形还原器 + + Stanza - Spanish word tokenizer + Stanza - 西班牙语分词器 - - Stanza - Urdu lemmatizer - Stanza - 乌尔都语词形还原器 + + Stanza - Swedish word tokenizer + Stanza - 瑞典语分词器 - - Stanza - Uyghur lemmatizer - Stanza - 维吾尔语词形还原器 + + Stanza - Tamil word tokenizer + Stanza - 泰米尔语分词器 - - Stanza - Welsh lemmatizer - Stanza - 威尔士语词形还原器 + + Stanza - Telugu word tokenizer + Stanza - 泰卢固语分词器 - - Stanza - Wolof lemmatizer - Stanza - 沃洛夫语词形还原器 + + Stanza - Thai word tokenizer + Stanza - 泰语分词器 - - LaoNLP - Lao stop word list - LaoNLP - 老挝语停用词表 + + Stanza - Turkish word tokenizer + Stanza - 土耳其语分词器 - - NLTK - Hebrew (Modern) stop word list - NLTK - 希伯来语(现代)停用词表 + + Stanza - Ukrainian word tokenizer + Stanza - 乌克兰语分词器 - - Stanza - Afrikaans dependency parser - Stanza - 南非语依存分析器 + + Stanza - Urdu word tokenizer + Stanza - 乌尔都语分词器 - - Stanza - Arabic dependency parser - Stanza - 阿拉伯语依存分析器 + + Stanza - Uyghur word tokenizer + Stanza - 维吾尔语分词器 - - Stanza - Armenian (Eastern) dependency parser - Stanza - 亚美尼亚语(东)依存分析器 + + Stanza - Vietnamese word tokenizer + Stanza - 越南语分词器 - - Stanza - Armenian (Western) dependency parser - Stanza - 亚美尼亚语(西)依存分析器 + + Stanza - Welsh word tokenizer + Stanza - 威尔士语分词器 - - Stanza - Basque dependency parser - Stanza - 巴斯克语依存分析器 + + Stanza - Wolof word tokenizer + Stanza - 沃洛夫语分词器 - - Stanza - Belarusian dependency parser - Stanza - 白俄罗斯语依存分析器 + + SudachiPy - Japanese word tokenizer (split mode A) + SudachiPy - 日语分词器(切分模式 A) - - Stanza - Bulgarian dependency parser - Stanza - 保加利亚语依存分析器 + + SudachiPy - Japanese word tokenizer (split mode B) + SudachiPy - 日语分词器(切分模式 B) - - Stanza - Buryat (Russia) dependency parser - Stanza - 布里亚特语(俄罗斯)依存分析器 + + SudachiPy - Japanese word tokenizer (split mode C) + SudachiPy - 日语分词器(切分模式 C) - - Stanza - Catalan dependency parser - Stanza - 加泰罗尼亚语依存分析器 + + Underthesea - Vietnamese word tokenizer + Underthesea - 越南语分词器 - - Stanza - Chinese (Classical) dependency parser - Stanza - 汉语(文言)依存分析器 + + Wordless - Chinese character tokenizer + Wordless - 汉语分字器 - - Stanza - Chinese (Simplified) dependency parser - Stanza - 汉语(简体)依存分析器 + + Wordless - Japanese kanji tokenizer + Wordless - 日语分字器 - - Stanza - Chinese (Traditional) dependency parser - Stanza - 汉语(繁体)依存分析器 + + NLTK - Legality syllable tokenizer + NLTK - 合法性分音节器 - - Stanza - Church Slavonic (Old) dependency parser - Stanza - 教会斯拉夫语(古)依存分析器 + + NLTK - Sonority sequencing syllable tokenizer + NLTK - 响度顺序分音节器 - - Stanza - Coptic dependency parser - Stanza - 科普特语依存分析器 + + Pyphen - Afrikaans syllable tokenizer + Pyphen - 南非语分音节器 - - Stanza - Croatian dependency parser - Stanza - 克罗地亚语依存分析器 + + Pyphen - Albanian syllable tokenizer + Pyphen - 阿尔巴尼亚语分音节器 - - Stanza - Czech dependency parser - Stanza - 捷克语依存分析器 + + Pyphen - Basque syllable tokenizer + Pyphen - 巴斯克语分音节器 - - Stanza - Danish dependency parser - Stanza - 丹麦语依存分析器 + + Pyphen - Belarusian syllable tokenizer + Pyphen - 白俄罗斯语分音节器 - - Stanza - Dutch dependency parser - Stanza - 荷兰语依存分析器 + + Pyphen - Bulgarian syllable tokenizer + Pyphen - 保加利亚语分音节器 - - Stanza - English dependency parser - Stanza - 英语依存分析器 + + Pyphen - Catalan syllable tokenizer + Pyphen - 加泰罗尼亚语分音节器 - - Stanza - Erzya dependency parser - Stanza - 埃尔齐亚语依存分析器 + + Pyphen - Croatian syllable tokenizer + Pyphen - 克罗地亚语分音节器 - - Stanza - Estonian dependency parser - Stanza - 爱沙尼亚语依存分析器 + + Pyphen - Czech syllable tokenizer + Pyphen - 捷克语分音节器 - - Stanza - Faroese dependency parser - Stanza - 法罗语依存分析器 + + Pyphen - Danish syllable tokenizer + Pyphen - 丹麦语分音节器 - - Stanza - Finnish dependency parser - Stanza - 芬兰语依存分析器 + + Pyphen - Dutch syllable tokenizer + Pyphen - 荷兰语分音节器 - - Stanza - French dependency parser - Stanza - 法语依存分析器 + + Pyphen - English (United Kingdom) syllable tokenizer + Pyphen - 英语(英国)分音节器 - - Stanza - French (Old) dependency parser - Stanza - 法语(古)依存分析器 + + Pyphen - English (United States) syllable tokenizer + Pyphen - 英语(美国)分音节器 - - Stanza - Galician dependency parser - Stanza - 加里西亚语依存分析器 + + Pyphen - Esperanto syllable tokenizer + Pyphen - 世界语分音节器 - - Stanza - German dependency parser - Stanza - 德语依存分析器 + + Pyphen - Estonian syllable tokenizer + Pyphen - 爱沙尼亚语分音节器 - - Stanza - Gothic dependency parser - Stanza - 哥特语依存分析器 + + Pyphen - French syllable tokenizer + Pyphen - 法语分音节器 - - Stanza - Greek (Ancient) dependency parser - Stanza - 希腊语(古)依存分析器 + + Pyphen - Galician syllable tokenizer + Pyphen - 加里西亚语分音节器 - - Stanza - Greek (Modern) dependency parser - Stanza - 希腊语(现代)依存分析器 + + Pyphen - German (Austria) syllable tokenizer + Pyphen - 德语(奥地利)分音节器 - - Stanza - Hebrew (Ancient) dependency parser - Stanza - 希伯来语(古)依存分析器 + + Pyphen - German (Germany) syllable tokenizer + Pyphen - 德语(德国)分音节器 - - Stanza - Hebrew (Modern) dependency parser - Stanza - 希伯来语(现代)依存分析器 + + Pyphen - German (Switzerland) syllable tokenizer + Pyphen - 德语(瑞士)分音节器 - - Stanza - Hindi dependency parser - Stanza - 印地语依存分析器 + + Pyphen - Greek (Modern) syllable tokenizer + Pyphen - 希腊语(现代)分音节器 - - Stanza - Hungarian dependency parser - Stanza - 匈牙利语依存分析器 + + Pyphen - Hungarian syllable tokenizer + Pyphen - 匈牙利语分音节器 - - Stanza - Icelandic dependency parser - Stanza - 冰岛语依存分析器 + + Pyphen - Icelandic syllable tokenizer + Pyphen - 冰岛语分音节器 - - Stanza - Indonesian dependency parser - Stanza - 印度尼西亚语依存分析器 + + Pyphen - Indonesian syllable tokenizer + Pyphen - 印度尼西亚语分音节器 - - Stanza - Irish dependency parser - Stanza - 爱尔兰语依存分析器 + + Pyphen - Italian syllable tokenizer + Pyphen - 意大利语分音节器 - - Stanza - Italian dependency parser - Stanza - 意大利语依存分析器 + + Pyphen - Lithuanian syllable tokenizer + Pyphen - 立陶宛语分音节器 - - Stanza - Japanese dependency parser - Stanza - 日语依存分析器 + + Pyphen - Latvian syllable tokenizer + Pyphen - 拉脱维亚语分音节器 - - Stanza - Kazakh dependency parser - Stanza - 哈萨克语依存分析器 + + Pyphen - Mongolian syllable tokenizer + Pyphen - 蒙古语分音节器 - - Stanza - Korean dependency parser - Stanza - 韩语依存分析器 + + Pyphen - Norwegian (Bokmål) syllable tokenizer + Pyphen - 挪威语(书面)分音节器 - - Stanza - Kurdish (Kurmanji) dependency parser - Stanza - 库尔德语(库尔曼吉语)依存分析器 + + Pyphen - Norwegian (Nynorsk) syllable tokenizer + Pyphen - 挪威语(新)分音节器 - - Stanza - Kyrgyz dependency parser - Stanza - 吉尔吉斯语依存分析器 + + Pyphen - Polish syllable tokenizer + Pyphen - 波兰语分音节器 - - Stanza - Latin dependency parser - Stanza - 拉丁语依存分析器 + + Pyphen - Portuguese (Brazil) syllable tokenizer + Pyphen - 葡萄牙语(巴西)分音节器 - - Stanza - Latvian dependency parser - Stanza - 拉脱维亚语依存分析器 - - - - Stanza - Ligurian dependency parser - Stanza - 利古里亚语依存分析器 + + Pyphen - Portuguese (Portugal) syllable tokenizer + Pyphen - 葡萄牙语(葡萄牙)分音节器 - - Stanza - Lithuanian dependency parser - Stanza - 立陶宛语依存分析器 + + Pyphen - Romanian syllable tokenizer + Pyphen - 罗马尼亚语分音节器 - - Stanza - Maltese dependency parser - Stanza - 马耳他语依存分析器 + + Pyphen - Russian syllable tokenizer + Pyphen - 俄语分音节器 - - Stanza - Manx dependency parser - Stanza - 马恩语依存分析器 + + Pyphen - Serbian (Cyrillic script) syllable tokenizer + Pyphen - 塞尔维亚语(西里尔文)分音节器 - - Stanza - Marathi dependency parser - Stanza - 马拉地语依存分析器 + + Pyphen - Serbian (Latin script) syllable tokenizer + Pyphen - 塞尔维亚语(拉丁文)分音节器 - - Stanza - Nigerian Pidgin dependency parser - Stanza - 尼日利亚皮钦语依存分析器 + + Pyphen - Slovak syllable tokenizer + Pyphen - 斯洛伐克语分音节器 - - Stanza - Norwegian Bokmål dependency parser - Stanza - 书面挪威语依存分析器 + + Pyphen - Slovene syllable tokenizer + Pyphen - 斯洛文尼亚语分音节器 - - Stanza - Norwegian Nynorsk dependency parser - Stanza - 新挪威语依存分析器 + + Pyphen - Spanish syllable tokenizer + Pyphen - 西班牙语分音节器 - - Stanza - Persian dependency parser - Stanza - 波斯语依存分析器 + + Pyphen - Swedish syllable tokenizer + Pyphen - 瑞典语分音节器 - - Stanza - Polish dependency parser - Stanza - 波兰语依存分析器 + + Pyphen - Telugu syllable tokenizer + Pyphen - 泰卢固语分音节器 - - Stanza - Pomak dependency parser - Stanza - 波马克语依存分析器 + + Pyphen - Thai syllable tokenizer + Pyphen - 泰语分音节器 - - Stanza - Portuguese dependency parser - Stanza - 葡萄牙语依存分析器 + + Pyphen - Ukrainian syllable tokenizer + Pyphen - 乌克兰语分音节器 - - Stanza - Romanian dependency parser - Stanza - 罗马尼亚语依存分析器 + + Pyphen - Zulu syllable tokenizer + Pyphen - 祖鲁语分音节器 - - Stanza - Russian dependency parser - Stanza - 俄语依存分析器 + + PyThaiNLP - Syllable dictionary + PyThaiNLP - 音节词典 - - Stanza - Russian (Old) dependency parser - Stanza - 俄语(古)依存分析器 + + botok - Tibetan part-of-speech tagger + botok - 藏语词性标注器 - - Stanza - Sámi (Northern) dependency parser - Stanza - 萨米语(北)依存分析器 + + khmer-nltk - Khmer part-of-speech tagger + khmer-nltk - 柬埔寨语词性标注器 - - Stanza - Sanskrit dependency parser - Stanza - 梵语依存分析器 + + LaoNLP - Yunshan Cup 2020 + LaoNLP - 2020 云山杯 - - Stanza - Scottish Gaelic dependency parser - Stanza - 苏格兰盖尔语依存分析器 + + NLTK - English perceptron part-of-speech tagger + NLTK - 英语感知机词性标注器 - - Stanza - Serbian (Latin) dependency parser - Stanza - 塞尔维亚语(拉丁)依存分析器 + + NLTK - Russian perceptron part-of-speech tagger + NLTK - 俄语感知机词性标注器 - - Stanza - Slovak dependency parser - Stanza - 斯洛伐克语依存分析器 + + pymorphy3 - Morphological analyzer + pymorphy3 - 形态分析器 - - Stanza - Slovenian dependency parser - Stanza - 斯洛文尼亚语依存分析器 + + PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) + PyThaiNLP - 感知机词性标注器(Blackboard) - - Stanza - Sorbian (Upper) dependency parser - Stanza - 索布语(上)依存分析器 + + PyThaiNLP - Perceptron part-of-speech tagger (ORCHID) + PyThaiNLP - 感知机词性标注器(ORCHID) - - Stanza - Spanish dependency parser - Stanza - 西班牙语依存分析器 + + PyThaiNLP - Perceptron part-of-speech tagger (PUD) + PyThaiNLP - 感知机词性标注器(PUD) - - Stanza - Swedish dependency parser - Stanza - 瑞典语依存分析器 + + spaCy - Catalan part-of-speech tagger + spaCy - 加泰罗尼亚语词性标注器 - - Stanza - Tamil dependency parser - Stanza - 泰米尔语依存分析器 + + spaCy - Chinese part-of-speech tagger + spaCy - 汉语词性标注器 - - Stanza - Telugu dependency parser - Stanza - 泰卢固语依存分析器 + + spaCy - Croatian part-of-speech tagger + spaCy - 克罗地亚语词性标注器 - - Stanza - Turkish dependency parser - Stanza - 土耳其语依存分析器 + + spaCy - Danish part-of-speech tagger + spaCy - 丹麦语词性标注器 - - Stanza - Ukrainian dependency parser - Stanza - 乌克兰语依存分析器 + + spaCy - Dutch part-of-speech tagger + spaCy - 荷兰语词性标注器 - - Stanza - Urdu dependency parser - Stanza - 乌尔都语依存分析器 + + spaCy - English part-of-speech tagger + spaCy - 英语词性标注器 - - Stanza - Uyghur dependency parser - Stanza - 维吾尔语依存分析器 + + spaCy - Finnish part-of-speech tagger + spaCy - 芬兰语词性标注器 - - Stanza - Vietnamese dependency parser - Stanza - 越南语依存分析器 + + spaCy - French part-of-speech tagger + spaCy - 法语词性标注器 - - Stanza - Welsh dependency parser - Stanza - 威尔士语依存分析器 + + spaCy - German part-of-speech tagger + spaCy - 德语词性标注器 - - Stanza - Wolof dependency parser - Stanza - 沃洛夫语依存分析器 + + spaCy - Greek (Modern) part-of-speech tagger + spaCy - 希腊语(现代)词性标注器 - - Stanza - Chinese (Simplified) sentiment analyzer - Stanza - 汉语(简体)情感分析器 + + spaCy - Italian part-of-speech tagger + spaCy - 意大利语词性标注器 - - Stanza - German sentiment analyzer - Stanza - 德语情感分析器 + + spaCy - Japanese part-of-speech tagger + spaCy - 日语词性标注器 - - Stanza - English sentiment analyzer - Stanza - 英语情感分析器 + + spaCy - Korean part-of-speech tagger + spaCy - 韩语词性标注器 - - Stanza - Marathi sentiment analyzer - Stanza - 马拉地语情感分析器 + + spaCy - Lithuanian part-of-speech tagger + spaCy - 立陶宛语词性标注器 - - Stanza - Spanish sentiment analyzer - Stanza - 西班牙语情感分析器 + + spaCy - Macedonian part-of-speech tagger + spaCy - 马其顿语词性标注器 - - Stanza - Vietnamese sentiment analyzer - Stanza - 越南语情感分析器 + + spaCy - Norwegian (Bokmål) part-of-speech tagger + spaCy - 挪威语(书面)词性标注器 - - - wl_boxes - - Yes - + + spaCy - Polish part-of-speech tagger + spaCy - 波兰语词性标注器 - - No - + + spaCy - Portuguese part-of-speech tagger + spaCy - 葡萄牙语词性标注器 - - No limit - 无限制 + + spaCy - Romanian part-of-speech tagger + spaCy - 罗马尼亚语词性标注器 - - Sync - 同步 + + spaCy - Russian part-of-speech tagger + spaCy - 俄语词性标注器 - - From - + + spaCy - Slovene part-of-speech tagger + spaCy - 斯洛文尼亚语词性标注器 - - to - + + spaCy - Spanish part-of-speech tagger + spaCy - 西班牙语词性标注器 - - L - + + spaCy - Swedish part-of-speech tagger + spaCy - 瑞典语词性标注器 - - R - + + spaCy - Ukrainian part-of-speech tagger + spaCy - 乌克兰语词性标注器 - - - wl_buttons - - Browse... - 浏览... + + Stanza - Afrikaans part-of-speech tagger + Stanza - 南非语词性标注器 - - Pick Color - 选择颜色 + + Stanza - Arabic part-of-speech tagger + Stanza - 阿拉伯语词性标注器 - - Transparent - 透明 + + Stanza - Armenian (Classical) part-of-speech tagger + Stanza - 亚美尼亚语(古)词性标注器 - - Restore defaults - 恢复默认值 + + Stanza - Armenian (Eastern) part-of-speech tagger + Stanza - 亚美尼亚语(东)词性标注器 - - - wl_checks_work_area - - Missing Search Terms - 缺少检索项 + + Stanza - Armenian (Western) part-of-speech tagger + Stanza - 亚美尼亚语(西)词性标注器 - - - <div> - You have not specified any search terms yet, please enter one in the input box under "<span style="color: #F00; font-weight: bold;">Search term</span>" first. - </div> - - - <div> - 你还未指定任何检索项,请先在“<span style="color: #F00; font-weight: bold;">检索项</span>”下的输入框中指定一项。 - </div> - + + Stanza - Basque part-of-speech tagger + Stanza - 巴斯克语词性标注器 - - No Results - 无结果 + + Stanza - Belarusian part-of-speech tagger + Stanza - 白俄罗斯语词性标注器 - - - <div>Data processing has completed successfully, but there are no results to display.</div> - <div>You can change your settings and try again.</div> - - - <div>数据处理操作已完成,但没有可显示的结果。</div> - <div>你可以更改你的设置后重试。</div> - + + Stanza - Bulgarian part-of-speech tagger + Stanza - 保加利亚语词性标注器 - - Language support unavailable! - 语种支持不可用! + + Stanza - Buryat (Russia) part-of-speech tagger + Stanza - 布里亚特语(俄罗斯)词性标注器 - - Missing search terms! - 缺少检索项! + + Stanza - Catalan part-of-speech tagger + Stanza - 加泰罗尼亚语词性标注器 - - Table generated successfully. - 已成功生成表格。 + + Stanza - Chinese (Classical) part-of-speech tagger + Stanza - 汉语(文言)词性标注器 - - Figure generated successfully. - 已成功生成图表。 + + Stanza - Chinese (Simplified) part-of-speech tagger + Stanza - 汉语(简体)词性标注器 - - No results to display. - 无结果可供显示。 + + Stanza - Chinese (Traditional) part-of-speech tagger + Stanza - 汉语(繁体)词性标注器 - - A fatal error has just occurred! - 刚才发生了一个致命错误! + + Stanza - Church Slavonic (Old) part-of-speech tagger + Stanza - 教会斯拉夫语(古)词性标注器 - - Syllable tokenization - 分音节 + + Stanza - Coptic part-of-speech tagger + Stanza - 科普特语词性标注器 - - Part-of-speech tagging - 词性标注 + + Stanza - Croatian part-of-speech tagger + Stanza - 克罗地亚语词性标注器 - - Lemmatization - 词形还原 + + Stanza - Czech part-of-speech tagger + Stanza - 捷克语词性标注器 - - Dependency parsing - 依存分析 + + Stanza - Danish part-of-speech tagger + Stanza - 丹麦语词性标注器 - - No Language Support - 无语种支持 + + Stanza - Dutch part-of-speech tagger + Stanza - 荷兰语词性标注器 - - Type of Language Support - 语种支持类型 + + Stanza - English part-of-speech tagger + Stanza - 英语词性标注器 - - File Name - 文件名 + + Stanza - English (Old) part-of-speech tagger + Stanza - 英语(古)词性标注器 - - Language - 语种 + + Stanza - Erzya part-of-speech tagger + Stanza - 埃尔齐亚语词性标注器 - - - <div> - The process cannot be done because language support is unavailable for the following files. Please check your language settings or try again with files of different languages. - </div> - - - <div> - 由于下列文件缺少语种支持,因此操作无法完成。请检查你的语种设置或使用其他语种的文件重试。 - </div> - + + Stanza - Estonian part-of-speech tagger + Stanza - 爱沙尼亚语词性标注器 - - Model downloaded successfully. - 已成功下载模型。 + + Stanza - Faroese part-of-speech tagger + Stanza - 法罗语词性标注器 - - A network error occurred while downloading the model! - 下载模型时发生网络错误! + + Stanza - Finnish part-of-speech tagger + Stanza - 芬兰语词性标注器 - - - wl_colligation_extractor - - None - + + Stanza - French part-of-speech tagger + Stanza - 法语词性标注器 - - Within sentence segments - 句段内 + + Stanza - French (Old) part-of-speech tagger + Stanza - 法语(古)词性标注器 - - Within sentences - 句子内 + + Stanza - Galician part-of-speech tagger + Stanza - 加里西亚语词性标注器 - - Within paragraphs - 段落内 + + Stanza - German part-of-speech tagger + Stanza - 德语词性标注器 - - - wl_collocation_extractor - - None - + + Stanza - Gothic part-of-speech tagger + Stanza - 哥特语词性标注器 - - Within sentence segments - 句段内 + + Stanza - Greek (Ancient) part-of-speech tagger + Stanza - 希腊语(古)词性标注器 - - Within sentences - 句子内 + + Stanza - Greek (Modern) part-of-speech tagger + Stanza - 希腊语(现代)词性标注器 - - Within paragraphs - 段落内 + + Stanza - Hebrew (Ancient) part-of-speech tagger + Stanza - 希伯来语(古)词性标注器 - - - wl_conversion - - Yes - + + Stanza - Hebrew (Modern) part-of-speech tagger + Stanza - 希伯来语(现代)词性标注器 - - No - + + Stanza - Hindi part-of-speech tagger + Stanza - 印地语词性标注器 - - - wl_dependency_parsing - - Dependency Graphs Generated Successfully - 成功生成依存图 + + Stanza - Hungarian part-of-speech tagger + Stanza - 匈牙利语词性标注器 - - - <div>Dependency graphs has been successfully generated and exported under folder: {}</div> - - <div>If the figures are not displayed automatically, you may try opening them manually using web browsers or image viewers installed on your computer, or save copies of them in other locations for later use.</div> - - - <div>已成功生成依存图并导出至该文件夹下:{}</div> - - <div>若图表未自动显示,你可使用电脑上已安装的浏览器或看图软件将其手动打开,或将图表副本保存至其他位置以待后用。</div> - + + Stanza - Icelandic part-of-speech tagger + Stanza - 冰岛语词性标注器 - - - wl_dialogs_errs - - Fatal Error - 致命错误 + + Stanza - Indonesian part-of-speech tagger + Stanza - 印度尼西亚语词性标注器 - - - <div>A fatal error has occurred, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>!</div> - - - <div>刚才发生了一个致命错误,请<b>将下方错误信息</b>发送至{}来获取<b>作者的帮助</b>!</div> - + + Stanza - Irish part-of-speech tagger + Stanza - 爱尔兰语词性标注器 - - Network Error - 网络错误 - + + Stanza - Italian part-of-speech tagger + Stanza - 意大利语词性标注器 + - - - <div>A network error occurred while downloading the model, please check your internet connections and proxy settings in <b>Menu → Preferences → General → Proxy Settings</b> if you are using a proxy.</div> - <div>If the network issue persists, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>.</div> - - - <div>下载模型时发生了网络错误,请检查你的网络连接,如果你使用了代理,那么请一并检查<b>菜单 → 偏好 → 全局 → 代理设置</b>中的代理设置。</div> - <div>如果网络问题仍然存在,请<b>将下方错误信息</b>发送至{}来获取<b>作者的帮助</b>。</div> - + + Stanza - Japanese part-of-speech tagger + Stanza - 日语词性标注器 + + + + Stanza - Kazakh part-of-speech tagger + Stanza - 哈萨克语词性标注器 + + + + Stanza - Korean part-of-speech tagger + Stanza - 韩语词性标注器 + + + + Stanza - Kurdish (Kurmanji) part-of-speech tagger + Stanza - 库尔德语(库尔曼吉语)词性标注器 + + + + Stanza - Kyrgyz part-of-speech tagger + Stanza - 吉尔吉斯语词性标注器 + + + + Stanza - Latin part-of-speech tagger + Stanza - 拉丁语词性标注器 + + + + Stanza - Latvian part-of-speech tagger + Stanza - 拉脱维亚语词性标注器 + + + + Stanza - Ligurian part-of-speech tagger + Stanza - 利古里亚语词性标注器 + + + + Stanza - Lithuanian part-of-speech tagger + Stanza - 立陶宛语词性标注器 + + + + Stanza - Maltese part-of-speech tagger + Stanza - 马耳他语词性标注器 + + + + Stanza - Manx part-of-speech tagger + Stanza - 马恩语词性标注器 + + + + Stanza - Marathi part-of-speech tagger + Stanza - 马拉地语词性标注器 + + + + Stanza - Nigerian Pidgin part-of-speech tagger + Stanza - 尼日利亚皮钦语词性标注器 + + + + Stanza - Norwegian (Bokmål) part-of-speech tagger + Stanza - 挪威语(书面)词性标注器 + + + + Stanza - Norwegian (Nynorsk) part-of-speech tagger + Stanza - 挪威语(新)词性标注器 + + + + Stanza - Persian part-of-speech tagger + Stanza - 波斯语词性标注器 + + + + Stanza - Polish part-of-speech tagger + Stanza - 波兰语词性标注器 + + + + Stanza - Pomak part-of-speech tagger + Stanza - 波马克语词性标注器 + + + + Stanza - Portuguese part-of-speech tagger + Stanza - 葡萄牙语词性标注器 + + + + Stanza - Romanian part-of-speech tagger + Stanza - 罗马尼亚语词性标注器 + + + + Stanza - Russian part-of-speech tagger + Stanza - 俄语词性标注器 + + + + Stanza - Russian (Old) part-of-speech tagger + Stanza - 俄语(古)词性标注器 + + + + Stanza - Sámi (Northern) part-of-speech tagger + Stanza - 萨米语(北)词性标注器 + + + + Stanza - Sanskrit part-of-speech tagger + Stanza - 梵语词性标注器 + + + + Stanza - Scottish Gaelic part-of-speech tagger + Stanza - 苏格兰盖尔语词性标注器 + + + + Stanza - Serbian (Latin script) part-of-speech tagger + Stanza - 塞尔维亚语(拉丁文)词性标注器 + + + + Stanza - Sindhi part-of-speech tagger + Stanza - 信德语词性标注器 + + + + Stanza - Slovak part-of-speech tagger + Stanza - 斯洛伐克语词性标注器 + + + + Stanza - Slovene part-of-speech tagger + Stanza - 斯洛文尼亚语词性标注器 + + + + Stanza - Sorbian (Upper) part-of-speech tagger + Stanza - 索布语(上)词性标注器 + + + + Stanza - Spanish part-of-speech tagger + Stanza - 西班牙语词性标注器 + + + + Stanza - Swedish part-of-speech tagger + Stanza - 瑞典语词性标注器 + + + + Stanza - Tamil part-of-speech tagger + Stanza - 泰米尔语词性标注器 + + + + Stanza - Telugu part-of-speech tagger + Stanza - 泰卢固语词性标注器 + + + + Stanza - Turkish part-of-speech tagger + Stanza - 土耳其语词性标注器 + + + + Stanza - Ukrainian part-of-speech tagger + Stanza - 乌克兰语词性标注器 + + + + Stanza - Urdu part-of-speech tagger + Stanza - 乌尔都语词性标注器 + + + + Stanza - Uyghur part-of-speech tagger + Stanza - 维吾尔语词性标注器 + + + + Stanza - Vietnamese part-of-speech tagger + Stanza - 越南语词性标注器 + + + + Stanza - Welsh part-of-speech tagger + Stanza - 威尔士语词性标注器 + + + + Stanza - Wolof part-of-speech tagger + Stanza - 沃洛夫语词性标注器 + + + + SudachiPy - Japanese part-of-speech tagger + SudachiPy - 日语词性标注器 + + + + Underthesea - Vietnamese part-of-speech tagger + Underthesea - 越南语词性标注器 + + + + botok - Tibetan lemmatizer + botok - 藏语词形还原器 + + + + NLTK - WordNet lemmatizer + NLTK - WordNet 词形还原器 + + + + simplemma - Albanian lemmatizer + simplemma - 阿尔巴尼亚语词形还原器 + + + + simplemma - Armenian lemmatizer + simplemma - 亚美尼亚语词形还原器 + + + + simplemma - Asturian lemmatizer + simplemma - 阿斯图里亚斯语词形还原器 + + + + simplemma - Bulgarian lemmatizer + simplemma - 保加利亚语词形还原器 + + + + simplemma - Catalan lemmatizer + simplemma - 加泰罗尼亚语词形还原器 + + + + simplemma - Czech lemmatizer + simplemma - 捷克语词形还原器 + + + + simplemma - Danish lemmatizer + simplemma - 丹麦语词形还原器 + + + + simplemma - Dutch lemmatizer + simplemma - 荷兰语词形还原器 + + + + simplemma - English lemmatizer + simplemma - 英语词形还原器 + + + + simplemma - English (Middle) lemmatizer + simplemma - 英语(中古)词形还原器 + + + + simplemma - Estonian lemmatizer + simplemma - 爱沙尼亚语词形还原器 + + + + simplemma - Finnish lemmatizer + simplemma - 芬兰语词形还原器 + + + + simplemma - French lemmatizer + simplemma - 法语词形还原器 + + + + simplemma - Galician lemmatizer + simplemma - 加里西亚语词形还原器 + + + + simplemma - Georgian lemmatizer + simplemma - 格鲁吉亚语词形还原器 + + + + simplemma - German lemmatizer + simplemma - 德语词形还原器 + + + + simplemma - Greek (Modern) lemmatizer + simplemma - 希腊语(现代)词形还原器 + + + + simplemma - Hindi lemmatizer + simplemma - 印地语词形还原器 + + + + simplemma - Hungarian lemmatizer + simplemma - 匈牙利语词形还原器 + + + + simplemma - Icelandic lemmatizer + simplemma - 冰岛语词形还原器 + + + + simplemma - Indonesian lemmatizer + simplemma - 印度尼西亚语词形还原器 + + + + simplemma - Irish lemmatizer + simplemma - 爱尔兰语词形还原器 + + + + simplemma - Italian lemmatizer + simplemma - 意大利语词形还原器 + + + + simplemma - Latin lemmatizer + simplemma - 拉丁语词形还原器 + + + + simplemma - Latvian lemmatizer + simplemma - 拉脱维亚语词形还原器 + + + + simplemma - Lithuanian lemmatizer + simplemma - 立陶宛语词形还原器 + + + + simplemma - Luxembourgish lemmatizer + simplemma - 卢森堡语词形还原器 + + + + simplemma - Macedonian lemmatizer + simplemma - 马其顿语词形还原器 + + + + simplemma - Malay lemmatizer + simplemma - 马来语词形还原器 + + + + simplemma - Manx lemmatizer + simplemma - 马恩语词形还原器 + + + + simplemma - Norwegian (Bokmål) lemmatizer + simplemma - 挪威语(书面)词形还原器 + + + + simplemma - Norwegian (Nynorsk) lemmatizer + simplemma - 挪威语(新)词形还原器 + + + + simplemma - Persian lemmatizer + simplemma - 波斯语词形还原器 + + + + simplemma - Polish lemmatizer + simplemma - 波兰语词形还原器 + + + + simplemma - Portuguese lemmatizer + simplemma - 葡萄牙语词形还原器 + + + + simplemma - Romanian lemmatizer + simplemma - 罗马尼亚语词形还原器 + + + + simplemma - Russian lemmatizer + simplemma - 俄语词形还原器 + + + + simplemma - Sámi (Northern) lemmatizer + simplemma - 萨米语(北)词形还原器 + + + + simplemma - Scottish Gaelic lemmatizer + simplemma - 苏格兰盖尔语词形还原器 + + + + simplemma - Serbo-Croatian lemmatizer + simplemma - 塞尔维亚-克罗地亚语词形还原器 + + + + simplemma - Slovak lemmatizer + simplemma - 斯洛伐克语词形还原器 + + + + simplemma - Slovene lemmatizer + simplemma - 斯洛文尼亚语词形还原器 + + + + simplemma - Spanish lemmatizer + simplemma - 西班牙语词形还原器 + + + + simplemma - Swahili lemmatizer + simplemma - 斯瓦西里语词形还原器 + + + + simplemma - Swedish lemmatizer + simplemma - 瑞典语词形还原器 + + + + simplemma - Tagalog lemmatizer + simplemma - 他加禄语词形还原器 + + + + simplemma - Turkish lemmatizer + simplemma - 土耳其语词形还原器 + + + + simplemma - Ukrainian lemmatizer + simplemma - 乌克兰语词形还原器 + + + + simplemma - Welsh lemmatizer + simplemma - 威尔士语词形还原器 + + + + spaCy - Bengali lemmatizer + spaCy - 孟加拉语词形还原器 + + + + spaCy - Catalan lemmatizer + spaCy - 加泰罗尼亚语词形还原器 + + + + spaCy - Croatian lemmatizer + spaCy - 克罗地亚语词形还原器 + + + + spaCy - Czech lemmatizer + spaCy - 捷克语词形还原器 + + + + spaCy - Danish lemmatizer + spaCy - 丹麦语词形还原器 + + + + spaCy - Dutch lemmatizer + spaCy - 荷兰语词形还原器 + + + + spaCy - English lemmatizer + spaCy - 英语词形还原器 - - - wl_fig_freqs - - Total - 合计 + + spaCy - Finnish lemmatizer + spaCy - 芬兰语词形还原器 - - Token - 形符 + + spaCy - French lemmatizer + spaCy - 法语词形还原器 - - N-gram - n 元组 + + spaCy - German lemmatizer + spaCy - 德语词形还原器 - - Collocate - 搭配词 + + spaCy - Greek (Ancient) lemmatizer + spaCy - 希腊语(古)词形还原器 - - Keyword - 关键词 + + spaCy - Greek (Modern) lemmatizer + spaCy - 希腊语(现代)词形还原器 - - Reference files - 参照文件 + + spaCy - Hungarian lemmatizer + spaCy - 匈牙利语词形还原器 - - Line chart - 折线图 + + spaCy - Indonesian lemmatizer + spaCy - 印度尼西亚语词形还原器 - - Word cloud - 词云图 + + spaCy - Irish lemmatizer + spaCy - 爱尔兰语词形还原器 - - Network graph - 网络图 + + spaCy - Italian lemmatizer + spaCy - 意大利语词形还原器 - - - wl_fig_stats - - Total - 合计 + + spaCy - Japanese lemmatizer + spaCy - 日语词形还原器 - - p-value - p 值 + + spaCy - Korean lemmatizer + spaCy - 韩语词形还原器 - - Line chart - 折线图 + + spaCy - Lithuanian lemmatizer + spaCy - 立陶宛语词形还原器 - - Token - 形符 + + spaCy - Luxembourgish lemmatizer + spaCy - 卢森堡语词形还原器 - - N-gram - n 元组 + + spaCy - Macedonian lemmatizer + spaCy - 马其顿语词形还原器 - - Collocate - 搭配词 + + spaCy - Norwegian (Bokmål) lemmatizer + spaCy - 挪威语(书面)词形还原器 - - Keyword - 关键词 + + spaCy - Persian lemmatizer + spaCy - 波斯语词形还原器 - - Word cloud - 词云图 + + spaCy - Polish lemmatizer + spaCy - 波兰语词形还原器 - - Network graph - 网络图 + + spaCy - Portuguese lemmatizer + spaCy - 葡萄牙语词形还原器 - - - wl_figs - - Frequency - 频数 + + spaCy - Romanian lemmatizer + spaCy - 罗马尼亚语词形还原器 - - ^[LR][1-9][0-9]*$ - ^[左右][1-9][0-9]*$ + + spaCy - Russian lemmatizer + spaCy - 俄语词形还原器 - - Cumulative Percentage Frequency - 累加百分比频数 + + spaCy - Serbian (Cyrillic script) lemmatizer + spaCy - 塞尔维亚语(西里尔文)词形还原器 - - Cumulative Frequency - 累加频数 + + spaCy - Slovene lemmatizer + spaCy - 斯洛文尼亚语词形还原器 - - Percentage Frequency - 百分比频数 + + spaCy - Spanish lemmatizer + spaCy - 西班牙语词形还原器 - - p-value - p 值 + + spaCy - Swedish lemmatizer + spaCy - 瑞典语词形还原器 - - Custom - 自定义 + + spaCy - Tagalog lemmatizer + spaCy - 他加禄语词形还原器 - - Monochrome - 单色 + + spaCy - Turkish lemmatizer + spaCy - 土耳其语词形还原器 - - Colormap - 色谱 + + spaCy - Ukrainian lemmatizer + spaCy - 乌克兰语词形还原器 - - - wl_lists - - New search term - 新检索项 + + spaCy - Urdu lemmatizer + spaCy - 乌尔都语词形还原器 - - New stop word - 新停用词 + + Stanza - Afrikaans lemmatizer + Stanza - 南非语词形还原器 - - New item - 新列表项 + + Stanza - Arabic lemmatizer + Stanza - 阿拉伯语词形还原器 - - Add - 添加 + + Stanza - Armenian (Classical) lemmatizer + Stanza - 亚美尼亚语(古)词形还原器 - - Insert - 插入 + + Stanza - Armenian (Eastern) lemmatizer + Stanza - 亚美尼亚语(东)词形还原器 - - Remove - 移除 + + Stanza - Armenian (Western) lemmatizer + Stanza - 亚美尼亚语(西)词形还原器 - - Clear - 清空 + + Stanza - Basque lemmatizer + Stanza - 巴斯克语词形还原器 - - Duplicates Found - 发现重复项 + + Stanza - Belarusian lemmatizer + Stanza - 白俄罗斯语词形还原器 - - - <div>The item that you have just edited already exists in the list, please specify another one!</div> - - - <div>你刚才编辑的列表项已存在于列表中,请另外指定一项!</div> - + + Stanza - Bulgarian lemmatizer + Stanza - 保加利亚语词形还原器 - - Import - 导入 + + Stanza - Buryat (Russia) lemmatizer + Stanza - 布里亚特语(俄罗斯)词形还原器 - - Export - 导出 + + Stanza - Catalan lemmatizer + Stanza - 加泰罗尼亚语词形还原器 - - Import from Files - 从文件导入 + + Stanza - Chinese (Classical) lemmatizer + Stanza - 汉语(文言)词形还原器 - - Text files (*.txt) - 文本文件 (*.txt) + + Stanza - Chinese (Simplified) lemmatizer + Stanza - 汉语(简体)词形还原器 - - Import Error - 导入时出错 + + Stanza - Chinese (Traditional) lemmatizer + Stanza - 汉语(繁体)词形还原器 - - - <div> - An error occurred during import, please check the following files and try again. - </div> - - - <div> - 导入时发生了一个错误,请检查下列文件后重试。 - </div> - + + Stanza - Church Slavonic (Old) lemmatizer + Stanza - 教会斯拉夫语(古)词形还原器 - - Empty file - 空文件 + + Stanza - Coptic lemmatizer + Stanza - 科普特语词形还原器 - - An error occurred during import! - 导入时发生了一个错误! + + Stanza - Croatian lemmatizer + Stanza - 克罗地亚语词形还原器 - - item - + + Stanza - Czech lemmatizer + Stanza - 捷克语词形还原器 + + + + Stanza - Danish lemmatizer + Stanza - 丹麦语词形还原器 - - items - + + Stanza - Dutch lemmatizer + Stanza - 荷兰语词形还原器 - - {} {} has been successfully imported into the list. - 已成功导入 {} {}至列表中。 + + Stanza - English lemmatizer + Stanza - 英语词形还原器 - - Export to File - 导出至文件 + + Stanza - English (Old) lemmatizer + Stanza - 英语(古)词形还原器 - - Export Completed - 导出完成 + + Stanza - Erzya lemmatizer + Stanza - 埃尔齐亚语词形还原器 - - - <div>The list has been successfully exported to "{}".</div> - - - <div>已成功导出列表至“{}”。</div> - + + Stanza - Estonian lemmatizer + Stanza - 爱沙尼亚语词形还原器 - - - wl_measure_utils - - Absolute frequency - 绝对频数 + + Stanza - Finnish lemmatizer + Stanza - 芬兰语词形还原器 - - Relative frequency - 相对频数 + + Stanza - French lemmatizer + Stanza - 法语词形还原器 - - - wl_measures_lexical_diversity - - Rank-frequency distribution - 频数排序分布 + + Stanza - French (Old) lemmatizer + Stanza - 法语(古)词形还原器 - - Frequency spectrum - 频数谱 + + Stanza - Galician lemmatizer + Stanza - 加里西亚语词形还原器 - - - wl_measures_readability - - Policy one - + + Stanza - German lemmatizer + Stanza - 德语词形还原器 - - Policy two - + + Stanza - Gothic lemmatizer + Stanza - 哥特语词形还原器 - - Original - 原版 + + Stanza - Greek (Ancient) lemmatizer + Stanza - 希腊语(古)词形还原器 - - New - 新版 + + Stanza - Greek (Modern) lemmatizer + Stanza - 希腊语(现代)词形还原器 - - Navy - 海军版 + + Stanza - Hebrew (Ancient) lemmatizer + Stanza - 希伯来语(古)词形还原器 - - - wl_measures_statistical_significance - - Two-tailed - 双尾 + + Stanza - Hebrew (Modern) lemmatizer + Stanza - 希伯来语(现代)词形还原器 - - Left-tailed - 左尾 + + Stanza - Hindi lemmatizer + Stanza - 印地语词形还原器 - - Right-tailed - 右尾 + + Stanza - Hungarian lemmatizer + Stanza - 匈牙利语词形还原器 - - - wl_misc - - minute - + + Stanza - Icelandic lemmatizer + Stanza - 冰岛语词形还原器 - - minutes - + + Stanza - Indonesian lemmatizer + Stanza - 印度尼西亚语词形还原器 - - (In {} {} {:.2f} seconds) - (耗时 {} {} {:.2f} 秒) + + Stanza - Irish lemmatizer + Stanza - 爱尔兰语词形还原器 - - (In - (耗时 + + Stanza - Italian lemmatizer + Stanza - 意大利语词形还原器 - - - wl_profiler - - Automated Arabic Readability Index - + + Stanza - Japanese lemmatizer + Stanza - 日语词形还原器 - - Automated Readability Index - + + Stanza - Kazakh lemmatizer + Stanza - 哈萨克语词形还原器 - - Coleman-Liau Index - + + Stanza - Korean lemmatizer + Stanza - 韩语词形还原器 - - Devereaux Readability Index - + + Stanza - Kurdish (Kurmanji) lemmatizer + Stanza - 库尔德语(库尔曼吉语)词形还原器 - - Flesch-Kincaid Grade Level - + + Stanza - Kyrgyz lemmatizer + Stanza - 吉尔吉斯语词形还原器 - - Flesch Reading Ease - + + Stanza - Latin lemmatizer + Stanza - 拉丁语词形还原器 - - FORCAST Grade Level - + + Stanza - Latvian lemmatizer + Stanza - 拉脱维亚语词形还原器 - - Fórmula de Crawford - + + Stanza - Ligurian lemmatizer + Stanza - 利古里亚语词形还原器 - - Gulpease Index - + + Stanza - Lithuanian lemmatizer + Stanza - 立陶宛语词形还原器 - - Gunning Fog Index - + + Stanza - Manx lemmatizer + Stanza - 马恩语词形还原器 - - Legibilidad μ - + + Stanza - Marathi lemmatizer + Stanza - 马拉地语词形还原器 - - Lensear Write - + + Stanza - Nigerian Pidgin lemmatizer + Stanza - 尼日利亚皮钦语词形还原器 - - Lix - + + Stanza - Norwegian (Bokmål) lemmatizer + Stanza - 挪威语(书面)词形还原器 - - McAlpine EFLAW Readability Score - + + Stanza - Norwegian (Nynorsk) lemmatizer + Stanza - 挪威语(新)词形还原器 - - OSMAN - + + Stanza - Persian lemmatizer + Stanza - 波斯语词形还原器 - - Rix - + + Stanza - Polish lemmatizer + Stanza - 波兰语词形还原器 - - SMOG Grade - + + Stanza - Pomak lemmatizer + Stanza - 波马克语词形还原器 - - Spache Grade Level - + + Stanza - Portuguese lemmatizer + Stanza - 葡萄牙语词形还原器 - - Count of Paragraphs - 段落数 + + Stanza - Romanian lemmatizer + Stanza - 罗马尼亚语词形还原器 - - Count of Paragraphs % - 段落数% + + Stanza - Russian lemmatizer + Stanza - 俄语词形还原器 - - Count of Sentences - 句子数 + + Stanza - Russian (Old) lemmatizer + Stanza - 俄语(古)词形还原器 - - Count of Sentences % - 句子数% + + Stanza - Sámi (Northern) lemmatizer + Stanza - 萨米语(北)词形还原器 - - Count of Sentence Segments - 句段数 + + Stanza - Sanskrit lemmatizer + Stanza - 梵语词形还原器 - - Count of Sentence Segments % - 句段数% + + Stanza - Scottish Gaelic lemmatizer + Stanza - 苏格兰盖尔语词形还原器 - - Count of Tokens - 形符数 + + Stanza - Serbian (Latin script) lemmatizer + Stanza - 塞尔维亚语(拉丁文)词形还原器 - - Count of Tokens % - 形符数% + + Stanza - Slovak lemmatizer + Stanza - 斯洛伐克语词形还原器 - - Count of Types - 类符数 + + Stanza - Slovene lemmatizer + Stanza - 斯洛文尼亚语词形还原器 - - Count of Types % - 类符数% + + Stanza - Sorbian (Upper) lemmatizer + Stanza - 索布语(上)词形还原器 - - Count of Syllables - 音节数 + + Stanza - Spanish lemmatizer + Stanza - 西班牙语词形还原器 - - Count of Syllables % - 音节数% + + Stanza - Swedish lemmatizer + Stanza - 瑞典语词形还原器 - - Count of Characters - 字符数 + + Stanza - Tamil lemmatizer + Stanza - 泰米尔语词形还原器 - - Count of Characters % - 字符数% + + Stanza - Turkish lemmatizer + Stanza - 土耳其语词形还原器 - - Type-token Ratio - 类符形符比 + + Stanza - Ukrainian lemmatizer + Stanza - 乌克兰语词形还原器 - - Paragraph Length in Sentences (Mean) - 段落长(单位:句子)(均值) + + Stanza - Urdu lemmatizer + Stanza - 乌尔都语词形还原器 - - Paragraph Length in Sentences (Standard Deviation) - 段落长(单位:句子)(标准差) + + Stanza - Uyghur lemmatizer + Stanza - 维吾尔语词形还原器 - - Paragraph Length in Sentences (Variance) - 段落长(单位:句子)(方差) + + Stanza - Welsh lemmatizer + Stanza - 威尔士语词形还原器 - - Paragraph Length in Sentences (Minimum) - 段落长(单位:句子)(最小值) + + Stanza - Wolof lemmatizer + Stanza - 沃洛夫语词形还原器 - - Paragraph Length in Sentences (25th Percentile) - 段落长(单位:句子)(25分位数) + + SudachiPy - Japanese lemmatizer + SudachiPy - 日语词形还原器 - - Paragraph Length in Sentences (Median) - 段落长(单位:句子)(中位数) + + LaoNLP - Lao stop word list + LaoNLP - 老挝语停用词表 - - Paragraph Length in Sentences (75th Percentile) - 段落长(单位:句子)(75分位数) + + NLTK - Arabic stop word list + NLTK - 阿拉伯语停用词表 - - Paragraph Length in Sentences (Maximum) - 段落长(单位:句子)(最大值) + + NLTK - Azerbaijani stop word list + NLTK - 阿塞拜疆语停用词表 - - Paragraph Length in Sentences (Range) - 段落长(单位:句子)(极差) + + NLTK - Basque stop word list + NLTK - 巴斯克语停用词表 - - Paragraph Length in Sentences (Interquartile Range) - 段落长(单位:句子)(四分位差) + + NLTK - Bengali stop word list + NLTK - 孟加拉语停用词表 - - Paragraph Length in Sentences (Modes) - 段落长(单位:句子)(众数) + + NLTK - Catalan stop word list + NLTK - 加泰罗尼亚语停用词表 - - Paragraph Length in Sentence Segments (Mean) - 段落长(单位:句段)(均值) + + NLTK - Chinese (Simplified) stop word list + NLTK - 汉语(简体)停用词表 - - Paragraph Length in Sentence Segments (Standard Deviation) - 段落长(单位:句段)(标准差) + + NLTK - Chinese (Traditional) stop word list + NLTK - 汉语(繁体)停用词表 - - Paragraph Length in Sentence Segments (Variance) - 段落长(单位:句段)(方差) + + NLTK - Danish stop word list + NLTK - 丹麦语停用词表 - - Paragraph Length in Sentence Segments (Minimum) - 段落长(单位:句段)(最小值) + + NLTK - Dutch stop word list + NLTK - 荷兰语停用词表 - - Paragraph Length in Sentence Segments (25th Percentile) - 段落长(单位:句段)(25分位数) + + NLTK - English stop word list + NLTK - 英语停用词表 - - Paragraph Length in Sentence Segments (Median) - 段落长(单位:句段)(中位数) + + NLTK - Finnish stop word list + NLTK - 芬兰语停用词表 - - Paragraph Length in Sentence Segments (75th Percentile) - 段落长(单位:句段)(75分位数) + + NLTK - French stop word list + NLTK - 法语停用词表 - - Paragraph Length in Sentence Segments (Maximum) - 段落长(单位:句段)(最大值) + + NLTK - German stop word list + NLTK - 德语停用词表 - - Paragraph Length in Sentence Segments (Range) - 段落长(单位:句段)(极差) + + NLTK - Greek (Modern) stop word list + NLTK - 希腊语(现代)停用词表 - - Paragraph Length in Sentence Segments (Interquartile Range) - 段落长(单位:句段)(四分位差) + + NLTK - Hebrew (Modern) stop word list + NLTK - 希伯来语(现代)停用词表 - - Paragraph Length in Sentence Segments (Modes) - 段落长(单位:句段)(众数) + + NLTK - Hungarian stop word list + NLTK - 匈牙利语停用词表 - - Paragraph Length in Tokens (Mean) - 段落长(单位:形符)(均值) + + NLTK - Indonesian stop word list + NLTK - 印度尼西亚语停用词表 - - Paragraph Length in Tokens (Standard Deviation) - 段落长(单位:形符)(标准差) + + NLTK - Italian stop word list + NLTK - 意大利语停用词表 - - Paragraph Length in Tokens (Variance) - 段落长(单位:形符)(方差) + + NLTK - Kazakh stop word list + NLTK - 哈萨克语停用词表 - - Paragraph Length in Tokens (Minimum) - 段落长(单位:形符)(最小值) + + NLTK - Nepali stop word list + NLTK - 尼泊尔语停用词表 - - Paragraph Length in Tokens (25th Percentile) - 段落长(单位:形符)(25分位数) + + NLTK - Norwegian (Bokmål) stop word list + NLTK - 挪威语(书面)停用词表 - - Paragraph Length in Tokens (Median) - 段落长(单位:形符)(中位数) + + NLTK - Portuguese stop word list + NLTK - 葡萄牙语停用词表 - - Paragraph Length in Tokens (75th Percentile) - 段落长(单位:形符)(75分位数) + + NLTK - Romanian stop word list + NLTK - 罗马尼亚语停用词表 - - Paragraph Length in Tokens (Maximum) - 段落长(单位:形符)(最大值) + + NLTK - Russian stop word list + NLTK - 俄语停用词表 - - Paragraph Length in Tokens (Range) - 段落长(单位:形符)(极差) + + NLTK - Slovene stop word list + NLTK - 斯洛文尼亚语停用词表 - - Paragraph Length in Tokens (Interquartile Range) - 段落长(单位:形符)(四分位差) + + NLTK - Spanish stop word list + NLTK - 西班牙语停用词表 - - Paragraph Length in Tokens (Modes) - 段落长(单位:形符)(众数) + + NLTK - Swedish stop word list + NLTK - 瑞典语停用词表 - - Sentence Length in Tokens (Mean) - 句长(单位:形符)(均值) + + NLTK - Tajik stop word list + NLTK - 塔吉克语停用词表 - - Sentence Length in Tokens (Standard Deviation) - 句长(单位:形符)(标准差) + + NLTK - Turkish stop word list + NLTK - 土耳其语停用词表 - - Sentence Length in Tokens (Variance) - 句长(单位:形符)(方差) + + PyThaiNLP - Thai stop word list + PyThaiNLP - 泰语停用词表 - - Sentence Length in Tokens (Minimum) - 句长(单位:形符)(最小值) + + Custom stop word list + 自定义停用词表 - - Sentence Length in Tokens (25th Percentile) - 句长(单位:形符)(25分位数) + + Stanza - Afrikaans dependency parser + Stanza - 南非语依存分析器 + + + + Stanza - Arabic dependency parser + Stanza - 阿拉伯语依存分析器 - - Sentence Length in Tokens (Median) - 句长(单位:形符)(中位数) + + Stanza - Armenian (Classical) dependency parser + Stanza - 亚美尼亚语(古)依存分析器 - - Sentence Length in Tokens (75th Percentile) - 句长(单位:形符)(75分位数) + + Stanza - Armenian (Eastern) dependency parser + Stanza - 亚美尼亚语(东)依存分析器 - - Sentence Length in Tokens (Maximum) - 句长(单位:形符)(最大值) + + Stanza - Armenian (Western) dependency parser + Stanza - 亚美尼亚语(西)依存分析器 - - Sentence Length in Tokens (Range) - 句长(单位:形符)(极差) + + Stanza - Basque dependency parser + Stanza - 巴斯克语依存分析器 - - Sentence Length in Tokens (Interquartile Range) - 句长(单位:形符)(四分位差) + + Stanza - Belarusian dependency parser + Stanza - 白俄罗斯语依存分析器 - - Sentence Length in Tokens (Modes) - 句长(单位:形符)(众数) + + Stanza - Bulgarian dependency parser + Stanza - 保加利亚语依存分析器 - - Sentence Segment Length in Tokens (Mean) - 句段长(单位:形符)(均值) + + Stanza - Buryat (Russia) dependency parser + Stanza - 布里亚特语(俄罗斯)依存分析器 - - Sentence Segment Length in Tokens (Standard Deviation) - 句段长(单位:形符)(标准差) + + Stanza - Catalan dependency parser + Stanza - 加泰罗尼亚语依存分析器 - - Sentence Segment Length in Tokens (Variance) - 句段长(单位:形符)(方差) + + Stanza - Chinese (Classical) dependency parser + Stanza - 汉语(文言)依存分析器 - - Sentence Segment Length in Tokens (Minimum) - 句段长(单位:形符)(最小值) + + Stanza - Chinese (Simplified) dependency parser + Stanza - 汉语(简体)依存分析器 - - Sentence Segment Length in Tokens (25th Percentile) - 句段长(单位:形符)(25分位数) + + Stanza - Chinese (Traditional) dependency parser + Stanza - 汉语(繁体)依存分析器 - - Sentence Segment Length in Tokens (Median) - 句段长(单位:形符)(中位数) + + Stanza - Church Slavonic (Old) dependency parser + Stanza - 教会斯拉夫语(古)依存分析器 - - Sentence Segment Length in Tokens (75th Percentile) - 句段长(单位:形符)(75分位数) + + Stanza - Coptic dependency parser + Stanza - 科普特语依存分析器 - - Sentence Segment Length in Tokens (Maximum) - 句段长(单位:形符)(最大值) + + Stanza - Croatian dependency parser + Stanza - 克罗地亚语依存分析器 - - Sentence Segment Length in Tokens (Range) - 句段长(单位:形符)(极差) + + Stanza - Czech dependency parser + Stanza - 捷克语依存分析器 - - Sentence Segment Length in Tokens (Interquartile Range) - 句段长(单位:形符)(四分位数) + + Stanza - Danish dependency parser + Stanza - 丹麦语依存分析器 - - Sentence Segment Length in Tokens (Modes) - 句段长(单位:形符)(众数) + + Stanza - Dutch dependency parser + Stanza - 荷兰语依存分析器 - - Token Length in Syllables (Mean) - 形符长(单位:音节)(均值) + + Stanza - English dependency parser + Stanza - 英语依存分析器 - - Token Length in Syllables (Standard Deviation) - 形符长(单位:音节)(标准差) + + Stanza - English (Old) dependency parser + Stanza - 英语(古)依存分析器 - - Token Length in Syllables (Variance) - 形符长(单位:音节)(方差) + + Stanza - Erzya dependency parser + Stanza - 埃尔齐亚语依存分析器 - - Token Length in Syllables (Minimum) - 形符长(单位:音节)(最小值) + + Stanza - Estonian dependency parser + Stanza - 爱沙尼亚语依存分析器 - - Token Length in Syllables (25th Percentile) - 形符长(单位:音节)(25分位数) + + Stanza - Faroese dependency parser + Stanza - 法罗语依存分析器 - - Token Length in Syllables (Median) - 形符长(单位:音节)(中位数) + + Stanza - Finnish dependency parser + Stanza - 芬兰语依存分析器 - - Token Length in Syllables (75th Percentile) - 形符长(单位:音节)(75分位数) + + Stanza - French dependency parser + Stanza - 法语依存分析器 - - Token Length in Syllables (Maximum) - 形符长(单位:音节)(最大值) + + Stanza - French (Old) dependency parser + Stanza - 法语(古)依存分析器 - - Token Length in Syllables (Range) - 形符长(单位:音节)(极差) + + Stanza - Galician dependency parser + Stanza - 加里西亚语依存分析器 - - Token Length in Syllables (Interquartile Range) - 形符长(单位:音节)(四分位差) + + Stanza - German dependency parser + Stanza - 德语依存分析器 - - Token Length in Syllables (Modes) - 形符长(单位:音节)(众数) + + Stanza - Gothic dependency parser + Stanza - 哥特语依存分析器 - - Token Length in Characters (Mean) - 形符长(单位:字符)(均值) + + Stanza - Greek (Ancient) dependency parser + Stanza - 希腊语(古)依存分析器 - - Token Length in Characters (Standard Deviation) - 形符长(单位:字符)(标准差) + + Stanza - Greek (Modern) dependency parser + Stanza - 希腊语(现代)依存分析器 - - Token Length in Characters (Variance) - 形符长(单位:字符)(方差) + + Stanza - Hebrew (Ancient) dependency parser + Stanza - 希伯来语(古)依存分析器 - - Token Length in Characters (Minimum) - 形符长(单位:字符)(最小值) + + Stanza - Hebrew (Modern) dependency parser + Stanza - 希伯来语(现代)依存分析器 - - Token Length in Characters (25th Percentile) - 形符长(单位:字符)(25分位值) + + Stanza - Hindi dependency parser + Stanza - 印地语依存分析器 - - Token Length in Characters (Median) - 形符长(单位:字符)(中位数) + + Stanza - Hungarian dependency parser + Stanza - 匈牙利语依存分析器 - - Token Length in Characters (75th Percentile) - 形符长(单位:字符)(75分位数) + + Stanza - Icelandic dependency parser + Stanza - 冰岛语依存分析器 - - Token Length in Characters (Maximum) - 形符长(单位:字符)(最大值) + + Stanza - Indonesian dependency parser + Stanza - 印度尼西亚语依存分析器 - - Token Length in Characters (Range) - 形符长(单位:字符)(极差) + + Stanza - Irish dependency parser + Stanza - 爱尔兰语依存分析器 - - Token Length in Characters (Interquartile Range) - 形符长(单位:字符)(四分位差) + + Stanza - Italian dependency parser + Stanza - 意大利语依存分析器 - - Token Length in Characters (Modes) - 形符长(单位:字符)(众数) + + Stanza - Japanese dependency parser + Stanza - 日语依存分析器 - - Type Length in Syllables (Mean) - 类符长(单位:音节)(均值) + + Stanza - Kazakh dependency parser + Stanza - 哈萨克语依存分析器 - - Type Length in Syllables (Standard Deviation) - 类符长(单位:音节)(标准差) + + Stanza - Korean dependency parser + Stanza - 韩语依存分析器 - - Type Length in Syllables (Variance) - 类符长(单位:音节)(方差) + + Stanza - Kurdish (Kurmanji) dependency parser + Stanza - 库尔德语(库尔曼吉语)依存分析器 - - Type Length in Syllables (Minimum) - 类符长(单位:音节)(最小值) + + Stanza - Kyrgyz dependency parser + Stanza - 吉尔吉斯语依存分析器 - - Type Length in Syllables (25th Percentile) - 类符长(单位:音节)(25分位数) + + Stanza - Latin dependency parser + Stanza - 拉丁语依存分析器 - - Type Length in Syllables (Median) - 类符长(单位:音节)(中位数) + + Stanza - Latvian dependency parser + Stanza - 拉脱维亚语依存分析器 - - Type Length in Syllables (75th Percentile) - 类符长(单位:音节)(75分位数) + + Stanza - Ligurian dependency parser + Stanza - 利古里亚语依存分析器 - - Type Length in Syllables (Maximum) - 类符长(单位:音节)(最大值) + + Stanza - Lithuanian dependency parser + Stanza - 立陶宛语依存分析器 - - Type Length in Syllables (Range) - 类符长(单位:音节)(极差) + + Stanza - Maltese dependency parser + Stanza - 马耳他语依存分析器 - - Type Length in Syllables (Interquartile Range) - 类符长(单位:音节)(四分位差) + + Stanza - Manx dependency parser + Stanza - 马恩语依存分析器 - - Type Length in Syllables (Modes) - 类符长(单位:音节)(众数) + + Stanza - Marathi dependency parser + Stanza - 马拉地语依存分析器 - - Type Length in Characters (Mean) - 类符长(单位:字符)(均值) + + Stanza - Nigerian Pidgin dependency parser + Stanza - 尼日利亚皮钦语依存分析器 - - Type Length in Characters (Standard Deviation) - 类符长(单位:字符)(标准差) + + Stanza - Norwegian (Bokmål) dependency parser + Stanza - 挪威语(书面)依存分析器 - - Type Length in Characters (Variance) - 类符长(单位:字符)(方差) + + Stanza - Norwegian (Nynorsk) dependency parser + Stanza - 挪威语(新)依存分析器 - - Type Length in Characters (Minimum) - 类符长(单位:字符)(最小值) + + Stanza - Persian dependency parser + Stanza - 波斯语依存分析器 - - Type Length in Characters (25th Percentile) - 类符长(单位:字符)(25分位数) + + Stanza - Polish dependency parser + Stanza - 波兰语依存分析器 - - Type Length in Characters (Median) - 类符长(单位:字符)(中位数) + + Stanza - Pomak dependency parser + Stanza - 波马克语依存分析器 - - Type Length in Characters (75th Percentile) - 类符长(单位:字符)(75分位数) + + Stanza - Portuguese dependency parser + Stanza - 葡萄牙语依存分析器 - - Type Length in Characters (Maximum) - 类符长(单位:字符)(最大值) + + Stanza - Romanian dependency parser + Stanza - 罗马尼亚语依存分析器 - - Type Length in Characters (Range) - 类符长(单位:字符)(极差) + + Stanza - Russian dependency parser + Stanza - 俄语依存分析器 - - Type Length in Characters (Interquartile Range) - 类符长(单位:字符)(四分位差) + + Stanza - Russian (Old) dependency parser + Stanza - 俄语(古)依存分析器 - - Type Length in Characters (Modes) - 类符长(单位:字符)(众数) + + Stanza - Sámi (Northern) dependency parser + Stanza - 萨米语(北)依存分析器 - - Syllable Length in Characters (Mean) - 音节长(单位:字符)(均值) + + Stanza - Sanskrit dependency parser + Stanza - 梵语依存分析器 - - Syllable Length in Characters (Standard Deviation) - 音节长(单位:字符)(标准差) + + Stanza - Scottish Gaelic dependency parser + Stanza - 苏格兰盖尔语依存分析器 - - Syllable Length in Characters (Variance) - 音节长(单位:字符)(方差) + + Stanza - Serbian (Latin script) dependency parser + Stanza - 塞尔维亚语(拉丁文)依存分析器 - - Syllable Length in Characters (Minimum) - 音节长(单位:字符)(最小值) + + Stanza - Slovak dependency parser + Stanza - 斯洛伐克语依存分析器 - - Syllable Length in Characters (25th Percentile) - 音节长(单位:字符)(25分位数) + + Stanza - Slovene dependency parser + Stanza - 斯洛文尼亚语依存分析器 - - Syllable Length in Characters (Median) - 音节长(单位:字符)(中位数) + + Stanza - Sorbian (Upper) dependency parser + Stanza - 索布语(上)依存分析器 - - Syllable Length in Characters (75th Percentile) - 音节长(单位:字符)(75分位数) + + Stanza - Spanish dependency parser + Stanza - 西班牙语依存分析器 - - Syllable Length in Characters (Maximum) - 音节长(单位:字符)(最大值) + + Stanza - Swedish dependency parser + Stanza - 瑞典语依存分析器 - - Syllable Length in Characters (Range) - 音节长(单位:字符)(极差) + + Stanza - Tamil dependency parser + Stanza - 泰米尔语依存分析器 - - Syllable Length in Characters (Interquartile Range) - 音节长(单位:字符)(四分位差) + + Stanza - Telugu dependency parser + Stanza - 泰卢固语依存分析器 - - Syllable Length in Characters (Modes) - 音节长(单位:字符)(众数) + + Stanza - Turkish dependency parser + Stanza - 土耳其语依存分析器 - - Al-Heeti's Readability Prediction Formula - + + Stanza - Ukrainian dependency parser + Stanza - 乌克兰语依存分析器 - - Bormuth's Cloze Mean - + + Stanza - Urdu dependency parser + Stanza - 乌尔都语依存分析器 - - Bormuth's Grade Placement - + + Stanza - Uyghur dependency parser + Stanza - 维吾尔语依存分析器 - - Coleman's Readability Formula - + + Stanza - Vietnamese dependency parser + Stanza - 越南语依存分析器 - - Dale-Chall Readability Formula - + + Stanza - Welsh dependency parser + Stanza - 威尔士语依存分析器 - - Danielson-Bryan's Readability Formula - + + Stanza - Wolof dependency parser + Stanza - 沃洛夫语依存分析器 - - Dawood's Readability Formula - + + Stanza - Chinese (Simplified) sentiment analyzer + Stanza - 汉语(简体)情感分析器 - - Degrees of Reading Power - + + Stanza - German sentiment analyzer + Stanza - 德语情感分析器 - - Dickes-Steiwer Handformel - + + Stanza - English sentiment analyzer + Stanza - 英语情感分析器 - - Easy Listening Formula - + + Stanza - Marathi sentiment analyzer + Stanza - 马拉地语情感分析器 - - Flesch Reading Ease (Farr-Jenkins-Paterson) - + + Stanza - Spanish sentiment analyzer + Stanza - 西班牙语情感分析器 - - Fórmula de Comprensibilidad de Gutiérrez de Polini - + + Stanza - Vietnamese sentiment analyzer + Stanza - 越南语情感分析器 - - Fucks's Stilcharakteristik - + + Underthesea - Vietnamese sentiment analyzer + Underthesea - 越南语情感分析器 - - Lorge Readability Index - + + VADER - Afrikaans sentiment analyzer + VADER - 南非语情感分析器 - - Luong-Nguyen-Dinh's Readability Formula - + + VADER - Albanian sentiment analyzer + VADER - 阿尔巴尼亚语情感分析器 - - neue Wiener Literaturformeln - + + VADER - Amharic sentiment analyzer + VADER - 阿姆哈拉语情感分析器 - - neue Wiener Sachtextformel - + + VADER - Arabic sentiment analyzer + VADER - 阿拉伯语情感分析器 - - Strain Index - + + VADER - Armenian sentiment analyzer + VADER - 亚美尼亚语情感分析器 - - Tränkle & Bailer's Readability Formula - + + VADER - Assamese sentiment analyzer + VADER - 阿萨姆语情感分析器 - - Tuldava's Text Difficulty - + + VADER - Azerbaijani sentiment analyzer + VADER - 阿塞拜疆语情感分析器 - - Wheeler & Smith's Readability Formula - + + VADER - Basque sentiment analyzer + VADER - 巴斯克语情感分析器 - - Corrected TTR - + + VADER - Belarusian sentiment analyzer + VADER - 白俄罗斯语情感分析器 - - Fisher's Index of Diversity - + + VADER - Bengali sentiment analyzer + VADER - 孟加拉语情感分析器 - - Herdan's Vₘ - + + VADER - Bulgarian sentiment analyzer + VADER - 保加利亚语情感分析器 - - HD-D - + + VADER - Burmese sentiment analyzer + VADER - 缅甸语情感分析器 - - LogTTR - + + VADER - Catalan sentiment analyzer + VADER - 加泰罗尼亚语情感分析器 - - Mean Segmental TTR - + + VADER - Chinese (Simplified) sentiment analyzer + VADER - 汉语(简体)情感分析器 - - Measure of Textual Lexical Diversity - + + VADER - Chinese (Traditional) sentiment analyzer + VADER - 汉语(繁体)情感分析器 - - Moving-average TTR - + + VADER - Croatian sentiment analyzer + VADER - 克罗地亚语情感分析器 - - Popescu-Mačutek-Altmann's B₁ - + + VADER - Czech sentiment analyzer + VADER - 捷克语情感分析器 - - Popescu-Mačutek-Altmann's B₂ - + + VADER - Danish sentiment analyzer + VADER - 丹麦语情感分析器 + + + + VADER - Dutch sentiment analyzer + VADER - 荷兰语情感分析器 - - Popescu-Mačutek-Altmann's B₃ - + + VADER - English sentiment analyzer + VADER - 英语情感分析器 - - Popescu-Mačutek-Altmann's B₄ - + + VADER - Esperanto sentiment analyzer + VADER - 世界语情感分析器 - - Popescu-Mačutek-Altmann's B₅ - + + VADER - Estonian sentiment analyzer + VADER - 爱沙尼亚语情感分析器 - - Popescu's R₁ - + + VADER - Finnish sentiment analyzer + VADER - 芬兰语情感分析器 - - Popescu's R₂ - + + VADER - French sentiment analyzer + VADER - 法语情感分析器 - - Popescu's R₃ - + + VADER - Galician sentiment analyzer + VADER - 加里西亚语情感分析器 - - Popescu's R₄ - + + VADER - Georgian sentiment analyzer + VADER - 格鲁吉亚语情感分析器 - - Repeat Rate - + + VADER - German sentiment analyzer + VADER - 德语情感分析器 - - Root TTR - + + VADER - Greek (Modern) sentiment analyzer + VADER - 希腊语(现代)情感分析器 - - Shannon Entropy - 香农熵 + + VADER - Gujarati sentiment analyzer + VADER - 古吉拉特语情感分析器 - - Simpson's l - + + VADER - Hebrew (Modern) sentiment analyzer + VADER - 希伯来语(现代)情感分析器 - - vocd-D - + + VADER - Hindi sentiment analyzer + VADER - 印地语情感分析器 - - Yule's Characteristic K - + + VADER - Hungarian sentiment analyzer + VADER - 匈牙利语情感分析器 - - Yule's Index of Diversity - + + VADER - Icelandic sentiment analyzer + VADER - 冰岛语情感分析器 - - - wl_results_filter - - Filter Results - 筛选结果 + + VADER - Indonesian sentiment analyzer + VADER - 印度尼西亚语情感分析器 - - File to filter: - 待筛选文件: + + VADER - Irish sentiment analyzer + VADER - 爱尔兰语情感分析器 - - Filter - 筛选 + + VADER - Italian sentiment analyzer + VADER - 意大利语情感分析器 - - Close - 关闭 + + VADER - Japanese sentiment analyzer + VADER - 日语情感分析器 - - The results in the table has been successfully filtered. - 已成功筛选表格中的结果。 + + VADER - Kannada sentiment analyzer + VADER - 卡纳达语情感分析器 - - Filtering results... - 筛选结果中…… + + VADER - Kazakh sentiment analyzer + VADER - 哈萨克语情感分析器 - - - wl_settings - - Empty Path - 空路径 + + VADER - Khmer sentiment analyzer + VADER - 柬埔寨语情感分析器 - - - <div>The path should not be left empty!</div> - - - <div>路径不可为空!</div> - + + VADER - Korean sentiment analyzer + VADER - 韩语情感分析器 - - Path not Found - 未找到路径 + + VADER - Kurdish (Kurmanji) sentiment analyzer + VADER - 库尔德语(库尔曼吉语)情感分析器 - - - <div>The specified path "{}" could not be found!</div> - <div>Please check your settings and try again.</div> - - - <div>未找到指定的路径“{}”!</div> - <div>请检查你的设置后重试。</div> - + + VADER - Kyrgyz sentiment analyzer + VADER - 吉尔吉斯语情感分析器 - - Invalid File Path - 无效文件路径 + + VADER - Latin sentiment analyzer + VADER - 拉丁语情感分析器 - - - <div>The specified path "{}" should be a file, not a directory!</div> - <div>Please check your settings and try again.</div> - - - <div>指定的路径“{}”应是一个文件,而不是目录!</div> - <div>请检查你的设置后重试。</div> - + + VADER - Latvian sentiment analyzer + VADER - 拉脱维亚语情感分析器 - - Invalid Directory Path - 无效文件夹路径 + + VADER - Lithuanian sentiment analyzer + VADER - 立陶宛语情感分析器 - - - <div>The specified path "{}" should be a directory, not a file!</div> - <div>Please check your settings and try again.</div> - - - <div>指定的路径“{}”应是一个目录,而不是文件!</div> - <div>请检查你的设置后重试。</div> - + + VADER - Luganda sentiment analyzer + VADER - 卢干达语情感分析器 - - Path Not Exist - 路径不存在 + + VADER - Luxembourgish sentiment analyzer + VADER - 卢森堡语情感分析器 - - - {} - <body> - <div>The specified path "{}" does not exist.</div> - <div>Do you want to create the directory?</div> - </body> - - - {} - <body> - <div>指定的路径“{}”不存在。</div> - <div>你想要新建该文件夹吗?</div> - </body> - + + VADER - Macedonian sentiment analyzer + VADER - 马其顿语情感分析器 - - - wl_settings_default - - Observed Files - 观察文件 + + VADER - Malay sentiment analyzer + VADER - 马来语情感分析器 - - Profiler - 分析工具 + + VADER - Malayalam sentiment analyzer + VADER - 马拉雅拉姆语情感分析器 - - APA (7th edition) - APA(第七版) + + VADER - Maltese sentiment analyzer + VADER - 马耳他语情感分析器 - - Counts - 计数 + + VADER - Marathi sentiment analyzer + VADER - 马拉地语情感分析器 - - Token - 形符 + + VADER - Meitei (Meitei script) sentiment analyzer + VADER - 曼尼普尔语(曼尼普尔文)情感分析器 - - File - 文件 + + VADER - Mongolian sentiment analyzer + VADER - 蒙古语情感分析器 - - Ascending - 升序 + + VADER - Nepali sentiment analyzer + VADER - 尼泊尔语情感分析器 - - Token no. - 形符序号 + + VADER - Norwegian (Bokmål) sentiment analyzer + VADER - 挪威语(书面)情感分析器 - - Line chart - 折线图 + + VADER - Odia sentiment analyzer + VADER - 奥里亚语情感分析器 - - Total - 合计 + + VADER - Persian sentiment analyzer + VADER - 波斯语情感分析器 - - Frequency - 频数 + + VADER - Polish sentiment analyzer + VADER - 波兰语情感分析器 - - None - + + VADER - Portuguese sentiment analyzer + VADER - 葡萄牙语情感分析器 - - p-value - p 值 + + VADER - Punjabi (Gurmukhi script) sentiment analyzer + VADER - 旁遮普语(古木基文)情感分析器 - - General - 全局 + + VADER - Romanian sentiment analyzer + VADER - 罗马尼亚语情感分析器 - - Excel workbooks (*.xlsx) - Excel 工作簿 (*.xlsx) + + VADER - Russian sentiment analyzer + VADER - 俄语情感分析器 - - Non-embedded - 非嵌入式 + + VADER - Sanskrit sentiment analyzer + VADER - 梵语情感分析器 - - Header - + + VADER - Scottish Gaelic sentiment analyzer + VADER - 苏格兰盖尔语情感分析器 - - Embedded - 嵌入式 + + VADER - Serbian (Cyrillic script) sentiment analyzer + VADER - 塞尔维亚语(西里尔文)情感分析器 - - Part of speech - 词性 + + VADER - Sindhi sentiment analyzer + VADER - 信德语情感分析器 - - Others - 其他 + + VADER - Sinhala sentiment analyzer + VADER - 僧伽罗语情感分析器 - - Paragraph - 段落 + + VADER - Slovak sentiment analyzer + VADER - 斯洛伐克语情感分析器 - - Sentence - 句子 + + VADER - Slovene sentiment analyzer + VADER - 斯洛文尼亚语情感分析器 - - Word - 单词 + + VADER - Spanish sentiment analyzer + VADER - 西班牙语情感分析器 - - Policy one - + + VADER - Swahili sentiment analyzer + VADER - 斯瓦西里语情感分析器 - - New - 新版 + + VADER - Swedish sentiment analyzer + VADER - 瑞典语情感分析器 - - Original - 原版 + + VADER - Tagalog sentiment analyzer + VADER - 他加禄语情感分析器 - - Rank-frequency distribution - 频数排序分布 + + VADER - Tajik sentiment analyzer + VADER - 塔吉克语情感分析器 - - Two-tailed - 双尾 + + VADER - Tamil sentiment analyzer + VADER - 泰米尔语情感分析器 - - Relative frequency - 相对频数 + + VADER - Tatar sentiment analyzer + VADER - 鞑靼语情感分析器 - - Colormap - 色谱 + + VADER - Telugu sentiment analyzer + VADER - 泰卢固语情感分析器 - - - wl_settings_figs - - Square - 方形 + + VADER - Thai sentiment analyzer + VADER - 泰语情感分析器 - - Circle - 圆形 + + VADER - Tigrinya sentiment analyzer + VADER - 提格雷尼亚语情感分析器 - - Triangle up - 朝上三角形 + + VADER - Turkish sentiment analyzer + VADER - 土耳其语情感分析器 - - Triangle right - 朝右三角形 + + VADER - Ukrainian sentiment analyzer + VADER - 乌克兰语情感分析器 - - Triangle down - 朝下三角形 + + VADER - Urdu sentiment analyzer + VADER - 乌尔都语情感分析器 - - Triangle left - 朝左三角形 + + VADER - Uyghur sentiment analyzer + VADER - 维吾尔语情感分析器 - - Thin diamond - 薄菱形 + + VADER - Welsh sentiment analyzer + VADER - 威尔士语情感分析器 - - Pentagon - 五角形 + + VADER - Yoruba sentiment analyzer + VADER - 约鲁巴语情感分析器 - - Hexagon - 六边形 + + VADER - Zulu sentiment analyzer + VADER - 祖鲁语情感分析器 - - Octagon - 八边形 + + None + - - Arc3 + + Average logarithmic distance - - Arc + + Average reduced frequency - - Angle3 + + Average waiting time - - Angle + + Carroll's D₂ - - Bar + + Gries's DP - - Solid - 实线 + + Juilland's D + - - Dashed - 虚线 + + Lyne's D₃ + - - Dash-dotted - 点画线 + + Rosengren's S + - - Dotted - 点线 + + Zhang's Distributional Consistency + - - Curve - 圆弧 + + Carroll's Uₘ + - - Curve A - 圆弧 A + + Engwall's FM + - - Curve B - 圆弧 B + + Juilland's U + - - Curve AB - 圆弧 AB + + Kromer's UR + + + + + Rosengren's KF + - - Curve filled A - 实心圆弧 A + + Fisher's exact test + 费希尔精确检验 - - Curve filled B - 实心圆弧 B + + Log-likelihood ratio test + 对数似然比检验 - - Curve filled AB - 实心圆弧 AB + + Mann-Whitney U Test + 曼惠特尼 U 检验 - - Bracket A - 方括号 A + + Pearson's chi-squared test + 皮尔森卡方检验 - - Bracket B - 方括号 B + + Student's t-test (1-sample) + 学生 t 检验(单样本) - - Bracket AB - 方括号 AB + + Student's t-test (2-sample) + 学生 t 检验(双样本) - - Bar AB - 横条 AB + + z-score + z 值 - - Bracket curve - 方括号加圆弧 + + z-score (Berry-Rogghe) + z 值(Berry-Rogghe) - - Simple - 朴素 + + Cubic association ratio + - - Fancy - 绚丽 + + Dice's coefficient + Dice 系数 - - Wedge - 楔形 + + Difference coefficient + - - Circular - 环形 + + Jaccard index + 雅卡尔指数 - - Kamada-Kawai + + Log-frequency biased MD - - Planar - 平面 + + Kilgarriff's ratio + - - Random - 随机 + + Log ratio + - - Shell - 同心 + + Minimum sensitivity + - - Spring - 弹簧 + + Mutual dependency + - - Spectral - 谱图 + + Mutual expectation + - - Spiral - 螺旋 + + Mutual information + 互信息 - - - wl_settings_files - - Type - 类型 + + Odds ratio + 比值比 - - Level - 层级 + + Pointwise mutual information + 点互信息 - - Opening Tag - 开始标签 + + Poisson collocation measure + - - Closing Tag - 结束标签 + + Squared phi coefficient + Phi 系数的平方 - - Preview - 预览 + + Zhang's DC + - - Embedded - 嵌入式 + + Log-likelihood Ratio + 对数似然比 - - Non-embedded - 非嵌入式 + + t-statistic + t 值 - - Reset - 重置 + + Dice's Coefficient + Dice 系数 - - - <div>Embedded tags must begin with a punctuation mark, e.g. an underscore or a slash!</div> - - - <div>嵌入式标签必须以一个标点符号,如下划线或斜杠,开头!</div> - + + Difference Coefficient + - - - <div>Non-embedded tags must begin and end with a punctuation mark, e.g. brackets!</div> - - - <div>非嵌入式标签必须以一个标点,如括号,开头和结尾!</div> - + + Jaccard Index + 雅卡尔指数 - - Invalid Opening Tag - 无效开始标签 + + Kilgarriff's Ratio + - - Duplicate Tags - 重复标签 + + Log Ratio + - - - <div>The tag that you have specified already exists in the table!</div> - - - <div>你指定的标签已存在于表格中!</div> - + + Minimum Sensitivity + - - token - 形符 + + Poisson Collocation Measure + - - TAG - 标签 + + VADER - Lao sentiment analyzer + VADER - 老挝语情感分析器 wl_tables - + Search in results 在结果中查找 - + Number of results: 结果数: - + Number of results: 0 结果数:0 - + Sort results 对结果排序 - + Filter results 筛选结果 - + Export Table 导出表格 - + Exporting table... 导出表格中…… - - Export Completed - 导出完成 - - - - - <div>The table has been successfully exported to "{}".</div> - - - <div>已成功导出表格至“{}”。</div> - - - - - Export Error - 导出时出错 - - - - - <div>Access to "{}" is denied, please specify another location or close the file and try again.</div> - - - <div>访问“{}”时被拒绝,请指定其他位置或关闭文件后重试。</div> - - - - + Add 添加 - + Insert 插入 - + Remove 移除 - + Clear 清空 + + + Generate table + 生成表格 + + + + Generate figure + 生成图表 + + + + Export selected cells... + 导出选中单元格... + + + + Export all cells... + 导出所有单元格... + + + + Clear table + 清空表格 + + + + Rank + 序号 + + + + Clear Table + 清空表格 + + + + + <div> + The results in the table have yet been exported. Do you really want to clear the table? + </div> + + + <div> + 表格中的结果尚未导出。你确认要清空表格吗? + </div> + + wl_texts - + Paragraph 段落 - + Sentence 句子 - + Word 单词 @@ -12561,342 +13174,319 @@ Frequency wl_widgets - - Context Settings - 上下文设置 - - - + Words 单词 - + All lowercase 全小写 - + All uppercase 全大写 - + Title case 首字母大写 - + Numerals - 数词 + 数字 - + Punctuation marks 标点符号 - + Treat as all lowercase 视为全小写 - + Filter stop words 过滤停用词 - + Ignore tags 忽略标签 - + Use tags only 仅使用标签 - + Search terms: 检索项: - + Search term: 检索项: - + Multi-search mode 多重检索模式 - + * Use whitespace to delimit multiple tokens * 使用空白来分隔多个形符 - + Match case 匹配大小写 - + Match whole words 全字匹配 - + Match inflected forms 匹配屈折变化形式 - + Use regular expressions 使用正则表达式 - + Match without tags 匹配时忽略标签 - + Match tags only 仅匹配标签 - + * Only 1 token is allowed in each search term - * 每条搜索项中只允许输入1个形符 + * 每条检索项中只允许输入1个形符 - + Context settings: 上下文设置: - + Settings... 设置... - + Measure of dispersion: - 分布算法: + 分布计算方法: - + Measure of adjusted frequency: - 调整频数算法: + 调整频数计算方法: - + Test of statistical significance: 统计显著性检验: - + Measure of Bayes factor: - 贝叶斯因子算法: + 贝叶斯因子计算方法: - + Measure of effect size: - 效应量算法: + 效应量计算方法: - + Show percentage data 显示百分比数据 - + Show cumulative data 显示累加数据 - + Show breakdown by file 显示各文件明细 - + Show breakdown by span position 显示各距位明细 - + Line chart 折线图 - + Frequency 频数 - + L - + R - + p-value p 值 - + Bayes factor 贝叶斯因子 - + Graph type: 图表类型: - + Sort by file: 文件排序依据: - + Use data: 使用数据: - + Use percentage data 使用百分比数据 - + Use cumulative data 使用累加数据 - + Word cloud 词云图 - + Network graph 网络图 - + Show 显示 - + part-of-speech tags 词性标签 - + Show lemmas 显示词根 - + Collapse punctuation marks 合并标点符号 - + Compact mode 紧凑模式 - + Show each sentence in a separate tab 将句子分别显示在独立的标签页内 - + coarse-grained 粗分 - + fine-grained 细分 - - From - - - - - to - - - - + Divide each file into 将每个文件分为 - + sub-sections 等份 - + Absolute frequency 绝对频数 - + Relative frequency 相对频数 - + Direction: 方向: - + Two-tailed 双尾 - + Left-tailed 左尾 - + Right-tailed 右尾 - + Apply lemmatization 应用词形还原 - + Assign part-of-speech tags 赋词性标签 - - wl_wordlist_generator - - - No language support - 无语言支持 - - diff --git a/trs/zho_tw.ts b/trs/zho_tw.ts index 2e9eacff4..284aa20ce 100644 --- a/trs/zho_tw.ts +++ b/trs/zho_tw.ts @@ -1,141 +1,45 @@ -Dialog_Open_Files - - -Add files... -新增檔案... - - - -Add folder... -新增資料夾... - - - -Auto-detect encodings -自動檢測編碼 - - - -Auto-detect languages -自動檢測語種 - - - -Include files in subfolders -包含子資料夾下檔案 - - - -Open -開啟 - - - -Cancel -取消 - - - -Error Adding Files -新增檔案時出錯 - - - -Checking files... -檢查檔案中…… - - - -Open Files -開啟檔案 - - - -Open Folder -開啟資料夾 - - - -Remove files -移除檔案 - - - -Clear table -清空表格 - - - - - <div> - An error occurred while adding files, so the following files are not added to the table. - </div> - - - <div> - 新增檔案時發生了一個錯誤,因此下列檔案未被新增至表格中。 - </div> - - - - -Empty file -空檔案 - - - -Unsupported file type -檔案型別不支援 - - - -Duplicate file -重複檔案 - - - Table_Open_Files - + Language 語種 - + Path 路徑 - + Encoding 編碼 - + Tokenized 已分詞 - + Tagged 已標註 -WL_Dialog_Clear_Table +Wl_Button_Color - -Clear Table -清空表格 + +Pick Color +選擇顏色 Wl_Button_Restore_Defaults - + <div>Are you sure you want to reset all settings to their defaults?</div> @@ -144,15 +48,20 @@ - + Restore Defaults 恢復預設值 + + +Restore defaults +恢復預設值 + -Wl_Combo_Box_File_Figure_Settings +Wl_Combo_Box_File_Fig_Settings - + Total 合計 @@ -160,7 +69,7 @@ Wl_Combo_Box_File_To_Filter - + Total 合計 @@ -168,106 +77,99 @@ Wl_Dialog_About - + About Wordless 關於 Wordless - + - <div style="text-align: center;"> - <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> - <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Version {}</div> - </div> - - <div style="text-align: center;"> - <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> - <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;版本 {}</div> - </div> - + <div align="center"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Version {}</div> + </div> + + + <div align="center"> + <h2>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Wordless</h2> + <div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;版本 {}</div> + </div> + - + - <div style="text-align: center;"> - An Integrated Corpus Tool with Multilingual Support<br> - for the Study of Language, Literature, and Translation - </div> - - <hr> - - <div style="text-align: center;"> - Copyright (C) 2018-2023&nbsp;&nbsp;Ye Lei (叶磊)<br> - Licensed Under GNU GPLv3<br> - All Other Rights Reserved - </div> - + <div align="center"> + An Integrated Corpus Tool with Multilingual Support<br> + for the Study of Language, Literature, and Translation + </div> + <hr> + <div align="center"> + Copyright (C) 2018-{}&nbsp;&nbsp;Ye Lei (叶磊)<br> + Licensed Under GNU GPLv3<br> + All Other Rights Reserved + </div> + - <div style="text-align: center;"> - 一款擁有多語種支援的語料庫整合工具<br> - 可用於語言學、文學及翻譯研究 - </div> - - <hr> - - <div style="text-align: center;"> - 版權所有 (C) 2018-2023&nbsp;&nbsp;Ye Lei (葉磊)<br> - 基於 GNU GPLv3 協議<br> - 保留其他所有權利 - </div> - + <div align="center"> + 一款擁有多語種支援的語料庫整合工具<br> + 可用於語言學、文學及翻譯研究 + </div> + <hr> + <div align="center"> + 版權所有 (C) 2018-{}&nbsp;&nbsp;葉磊<br> + 基於 GNU GPLv3 協議<br> + 保留其他所有權利 + </div> + Wl_Dialog_Acks - + Name 名稱 - + Version 版本 - + Authors 作者 - + License 許可 - + Acknowledgments 致謝 - -ACKNOWLEDGMENTS.md -ACKNOWLEDGMENTS_zho_tw.md + +ACKS.md +doc/trs/zho_cn/ACKS.md - + - <div> - As Wordless stands on the shoulders of giants, I hereby extend my sincere gratitude to the following open-source projects without which this project would not have been possible: - </div> + <div>As Wordless stands on the shoulders of giants, I hereby extend my sincere gratitude to the following open-source projects without which this project would not have been possible:</div> - <div> - 鑑於 Wordless 立於巨人的肩膀之上,我謹在此向下列開源專案致以本人誠摯的感謝,若沒有它們,本專案將無法完成: - </div> + <div>鑑於 Wordless 立於巨人的肩膀之上,我謹在此向下列開源專案致以本人誠摯的感謝,若沒有它們,本專案將無法完成:</div> Wl_Dialog_Changelog - + Changelog 更新日誌 @@ -275,310 +177,235 @@ Wl_Dialog_Check_Updates - + Check for updates on startup 啟動時檢查更新 - + Cancel 取消 - - - <div> - Checking for updates... - </div> - - - <div> - 檢查更新中…… - </div> - + +OK +確認 - - - <div> - Hooray, you are using the latest version of Wordless! - </div> - - - <div> - 好耶,你使用的是 Wordless 的最新版! - </div> - + +Check for Updates +檢查更新 - - - <div> - A network error has occurred, please check your network settings and try again or <a href="https://github.com/BLKSerene/Wordless/releases">check for updates manually</a>. - </div> - - - <div> - 剛才發生了一個網路錯誤,請檢查你的網路設定並重試或<a href="https://github.com/BLKSerene/Wordless/releases">手動檢查更新</a>。 - </div> - + +Try again +重試 - -OK -確認 + +<div>Current version: </div> +當前版本: - -Check for Updates -檢查更新 + +<div>Checking for updates...</div> +<div>檢查更新中……</div> - + +<div>Latest version: Checking...</div> +<div>最新版本:查詢中……</div> + + + - <div> - Wordless {} is out, click <a href="https://github.com/BLKSerene/Wordless#download"><b>HERE</b></a> to download the latest version of Wordless. - </div> + <div>Wordless {} is out, click <a href="https://github.com/BLKSerene/Wordless#download"><b>HERE</b></a> to download the latest version of Wordless.</div> - <div> - Wordless {} 已釋出,點選<a href="https://github.com/BLKSerene/Wordless#download"><b>此處</b></a>下載 Wordless 的最新版。 - </div> + <div> Wordless {} 已釋出,點選<a href="https://github.com/BLKSerene/Wordless#download"><b>此處</b></a>下載最新版Wordless。</div> - -Current version: -當前版本: - - - -Try again -重試 + +<div>Latest version: </div> +<div>最新版本:</div> - -Latest version: Checking... -最新版本:查詢中…… + + + <div>Hooray, you are using the latest version of Wordless!</div> + + + <div>好耶,你使用的是 Wordless 的最新版!</div> + - -Latest version: -最新版本: + + + <div>A network error has occurred, please check your network settings and try again or <a href="https://github.com/BLKSerene/Wordless/releases">check for updates manually</a>.</div> + + + <div>剛才發生了一個網路錯誤,請檢查你的網路設定並重試或<a href="https://github.com/BLKSerene/Wordless/releases">手動檢查更新</a>。</div> + - -Latest version: Network error -最新版本:網路錯誤 + +<div>Latest version: Network error</div> +<div>最新版本:網路錯誤</div> Wl_Dialog_Citing - + Citing 引用 - + Select citation system: 選擇引用體系: - + APA (7th edition) APA(第七版) - + MLA (8th edition) MLA(第八版) - - - <div> - If you are going to publish a work that uses Wordless, please cite as follows. - </div> - - <div> - 如果你準備發表的成果中使用了 Wordless,請按如下格式進行引用。 - </div> - - - - -Wl_Dialog_Clr_All_Tables - - -Clear All Tables -清空所有表格 - - - - - <div> - The results in some of the tables have yet been exported. Do you really want to clear all tables? - </div> - - - <div> - 部分表格中的結果尚未儲存。你是否確認清空所有表格? - </div> - - - - -Wl_Dialog_Clr_Table - - + - <div> - The results in the table have yet been exported. Do you really want to clear the table? - </div> + <div>If you are going to publish a work that uses Wordless, please cite as follows.</div> - <div> - 表格中的結果尚未匯出。你確認要清空表格嗎? - </div> + <div>如果你準備發表的成果中使用了 Wordless,請按如下格式進行引用。</div> - - -Yes - - - - -No - - Wl_Dialog_Confirm_Exit - - - <div> - Are you sure you want to exit Wordless? - </div> - <div style="font-weight: bold;"> - Note: All unsaved data and figures will be lost. - </div> - - - <div> - 你確認你要退出 Wordless 嗎? - </div> - <div style="font-weight: bold;"> - 注:所有未儲存的資料和圖表都將丟失。 - </div> - - - - + Always confirm on exit 關閉時總是提示確認 - + Exit 退出 - + Cancel 取消 - + Exit Wordless 退出 Wordless + + + + <div>Are you sure you want to exit Wordless?</div> + <br> + <div><b>Note: All unsaved data and figures will be lost.</b></div> + + + <div>你確認你要退出 Wordless 嗎?</div> + <br> + <div><b> 注意:所有未儲存的資料和圖表都將丟失。</b></div> + + Wl_Dialog_Context_Settings - + Inclusion 包含 - + Exclusion 排除 - + L - + R - + Context window: -上下文檢索範圍: +上下文限制範圍: + + + +Context Settings +上下文設定 Wl_Dialog_Donating - - - <div> - If you would like to support the development of Wordless, you may donate via <a href="https://www.paypal.com/">PayPal</a>, <a href="https://global.alipay.com/">Alipay</a>, or <a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">WeChat Pay</a>. - </div> - - - <div> - 如果你願意支援 Wordless 的開發工作,你可以透過<a href="https://www.paypal.com/">PayPal</a>、<a href="https://global.alipay.com/">支付寶</a>或<a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">微信支付</a>進行贊助。 - </div> - - - - + Donating via: 贊助途徑: - + Alipay 支付寶 - + WeChat Pay 微信支付 - + Donating 贊助 + + + + <div>If you would like to support the development of Wordless, you may donate via <a href="https://www.paypal.com/">PayPal</a>, <a href="https://global.alipay.com/">Alipay</a>, or <a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">WeChat Pay</a>.</div> + + + <div>如果你願意支援 Wordless 的開發工作,你可以透過<a href="https://www.paypal.com/">PayPal</a>、<a href="https://global.alipay.com/">支付寶</a>或<a href="https://pay.weixin.qq.com/index.php/public/wechatpay_en">微信支付</a>進行贊助。</div> + + Wl_Dialog_Err_Files - + Error Type 錯誤型別 - + OK 確認 - + File Path 檔案路徑 - + Export table... 匯出表格... @@ -586,93 +413,211 @@ Wl_Dialog_Need_Help - + Need Help? 需要幫助? - - - <div> - If you have any questions, find software bugs, need to provide feedback, or want to submit feature requests, you may seek support from the open-source community or contact me directly via any of the support channels listed below. - </div> - - - <div> - 如果你有任何問題、發現了軟體錯誤、需要提供反饋資訊或想要提交功能需求,你可以透過下方所列的任一支援渠道來獲取開源社群的支援或直接與我聯絡。 - </div> - - - - + Support Channel 支援渠道 - + Information 資訊 - -<a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">Documentation</a> -<a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc_eng.md">文件</a> - - - -<a href="https://github.com/BLKSerene/Wordless/issues">Gihub Issues</a> - - - - -<a href="https://github.com/BLKSerene/Wordless/discussions">Gihub Discussions</a> - - - - + Official documentation 官方文件 - + Tutorial videos 影片教程 - + Bug reports Bug 提交 - + Usage questions 使用問題 - + Email support 郵件諮詢 - + <a href="https://www.wechat.com/en/">WeChat</a> official account <a href="https://www.wechat.com/en/">微信</a>公眾號 - + <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">bilibili</a> <a href="https://www.youtube.com/@BLKSerene">YouTube</a> | <a href="https://space.bilibili.com/34963752/video">B 站</a> + + + + <div>If you have any questions, find software bugs, need to provide feedback, or want to submit feature requests, you may seek support from the open-source community or contact me directly via any of the support channels listed below.</div> + + + <div>如果你有任何問題、發現了軟體錯誤、需要提供反饋資訊或想要提交功能需求,你可以透過下方所列的任一支援渠道來獲取開源社群的支援或直接與我聯絡。</div> + + + + +<a href="https://github.com/BLKSerene/Wordless/blob/{self.main.ver}/doc/doc.md">Stable Version</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc.md">Development Version</a> +<a href="https://github.com/BLKSerene/Wordless/blob/{self.main.ver}/doc/doc.md">穩定版</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/doc.md">開發版</a> + + + +Wl_Dialog_Open_Files + + +Open Files +開啟檔案 + + + +Add files... +新增檔案... + + + +Add folder... +新增資料夾... + + + +Remove files +移除檔案 + + + +Clear table +清空表格 + + + +Auto-detect encodings +自動檢測編碼 + + + +Auto-detect languages +自動檢測語種 + + + +Include files in subfolders +包含子資料夾下檔案 + + + +Open +開啟 + + + +Cancel +取消 + + + +Checking files... +檢查檔案中…… + + + +Error Adding Files +新增檔案時出錯 + + + + + <div> + An error occurred while adding files, so the following files are not added to the table. + </div> + + + <div> + 新增檔案時發生了一個錯誤,因此下列檔案未被新增至表格中。 + </div> + + + + +Empty file +空檔案 + + + +Unsupported file type +檔案型別不支援 + + + +Duplicate file +重複檔案 + + + +Open Folder +開啟資料夾 + + + +Wl_Dialog_Opening_Nontext_Files + + +Opening Non-text Files +開啟非文字檔案 + + + + + <div>It is <b>not recommended to directly import non-text files into <i>Wordless</i></b> and the support for doing so is provided only for convenience, since accuracy of text extraction could never be guaranteed and unintended data loss might occur, for which reason users are encouraged to <b>convert their files using specialized tools and make their own choices</b> on which part of the data should be kept or discarded.</div> + <br> + <div>Do you want to proceed to open non-text files anyway?</div> + + + <div><b>不建議將非文字檔案直接匯入<i>Wordless</i></b>,提供該功能也只是為了方便使用者,因為文字提取的精度始終無法保證且資料可能會在無意中丟失。鑑於該原因,建議使用者<b>使用專用工具轉換檔案並自行決定</b>應該保留或丟第哪部分資料。</div> + <br> + <div>你是否無論如何仍要繼續開啟非文字檔案?</div> + + + + +Do not show this again +下次不再顯示該提示 + + + +Proceed +繼續 + + + +Abort +中止 + Wl_Dialog_Preview_Settings - + coarse-grained 粗分 - + fine-grained 細分 @@ -681,144 +626,197 @@ Wl_Dialog_Progress -Please wait. It may take a few seconds to several minutes for the operation to be completed. -請稍後。本操作可能需要數秒至數分鐘完成。 +<div>Elapsed time: 0:00:00</div> +<div>已用時間:0:00:00</div> - -Elapsed time: 0:00:00 -已用時間:0:00:00 + + + <div>Please wait. It may take a few seconds to several minutes for the operation to be completed.</div> + + + <div>請稍候。該操作可能會在幾秒至數分鐘內完成。</div> + - -Elapsed time: -已用時間: + +<div>Elapsed time: {}</div> +<div>已用時間:{}</div> -Wl_Dialog_Progress_Process_Data +Wl_Dialog_Restart_Required - -Processing data... -處理資料中…… + +Restart +重啟 - -Downloading model... -下載模型中…… + +Cancel +取消 + + + +Restart Wordless +重啟 Wordless - - -Wl_Dialog_Restart_Required - + <div> Restart is required for the settings to take effect. Do you want to restart Wordless now? </div> - - <div style="font-weight: bold;"> - Note: All unsaved data and figures will be lost. - </div> + <br> + <div><b>Note: All unsaved data and figures will be lost.</b></div> <div> - 需要重啟本程式使設定生效。你想要現在重啟 Wordless 嗎? - </div> - - <div style="font-weight: bold;"> - 注:所有未儲存的資料和圖表都將丟失。 + 需要重啟本程式才能使設定生效。你想要現在重啟 Wordless 嗎? </div> + <br> + <div><b>注意:所有未儲存的資料和圖表都將丟失。</b></div> + + +Wl_Dialog_Results_Filter - -Restart -重啟 + +Filter Results +篩選結果 - -Cancel -取消 + +File to filter: +待篩選檔案: - -Restart Wordless -重啟 Wordless + +Filter +篩選 + + + +Close +關閉 + + + +Filtering results... +篩選結果中…… + + + +The results in the data table has been successfully filtered. +已成功篩選資料表格中的結果。 Wl_Dialog_Results_Filter_Collocation_Extractor - + Frequency: 頻數: - -p-value: -p 值: - - - + Total 合計 - -L - - - - -R - - - - + Collocate length: 搭配詞長度: - + Keyword length: 關鍵詞長度: - + Bayes factor: 貝葉斯因子: - + Number of files found: 查詢到的檔案數 + + +Node length: +節點詞長度: + + + +Collocation length: +搭配長度: + + + +Colligation length: +類聯接長度: + + + +L{} +左 {} + + + +R{} +右 {} + + + +Wl_Dialog_Results_Filter_Dependency_Parser + + +Head length: +核心詞長度: + + + +Dependent length: +依存詞長度: + + + +Dependency length: +依存距離: + + + +Dependency length (absolute): +依存距離(絕對): + Wl_Dialog_Results_Filter_Wordlist_Generator - + Frequency: 頻數: - + Token length: 形符長度: - + N-gram length: n 元組長度: - + Number of files found: 查詢到的檔案數: - + Number of syllables: 音節數: @@ -826,7 +824,7 @@ Wl_Dialog_Results_Search - + Close 關閉 @@ -841,42 +839,42 @@ 無查詢結果 - + Search in Results 在結果中查詢 - + item - + items - + Found {} {}. 查詢到 {} {}。 - + Find next 查詢下一個 - + Find previous 查詢上一個 - + Find all 查詢所有 - + Clear highlights 清除高亮 @@ -892,7 +890,7 @@ - + Highlights cleared. 高亮已清除。 @@ -900,108 +898,80 @@ Wl_Dialog_Results_Sort_Concordancer - + Sort 排序 - + Close 關閉 - + Ascending 升序 - + Node 節點詞 - + Sentiment 情感 - + File 檔案 - + Sorting results... 結果排序中…… - + Sort Results 對結果排序 - -The results in the table has been successfully sorted. -已成功對錶格中的結果進行排序。 - - - + ^L[0-9]+$ -^左[0-9]+$ +^左 [0-9]+$ - + ^R[0-9]+$ -^右[0-9]+$ - - - -Token no. -形符序號 +^右 [0-9]+$ - + ^Ld+$ -^左d+$ +^左 d+$ - + ^Rd+$ -^右d+$ - - - -Wl_Dialog_Settings - - -Save -儲存 - - - -Cancel -取消 - - - -OK -確認 +^右 d+$ - -Copy -複製 + +The results in the data table has been successfully sorted. +已成功對資料表格中的結果進行排序。 - -Close -關閉 + +Token No. +形符序號 Wl_Loading - + Initializing Wordless... 初始化 Wordless…… @@ -1009,242 +979,222 @@ Wl_Main - + Loading settings... 載入設定…… - + Initializing main window... 初始化主窗體…… - + Ready! 就緒! - + Starting Wordless... 啟動 Wordless…… - + &File 檔案 - + &Preferences 偏好 - + &Help 幫助 - + &Open Files... 開啟檔案... - -Open files -開啟檔案 - - - -Open &Folder... -開啟資料夾... - - - -Open all files in the folder -開啟資料夾下的所有檔案 - - - + &Reopen Closed Files 重新開啟已關閉檔案 - + Reopen closed files 重新開啟已經關閉的檔案 - + S&elect All 全選 - + Select all files 勾選所有檔案 - + &Deselect All -取消選中所有 +取消勾選所有 - + Deselect all files 取消勾選所有檔案 - + &Invert Selection -反選 +反向勾選 - + Invert file selection 反向勾選檔案 - + &Close Selected 關閉選中 - + Close selected file(s) 關閉已選中的檔案 - + C&lose All 關閉所有 - + Close all files 關閉所有檔案 - + &Exit... 退出... - + Exit the program 退出程式 - + &Settings 設定 - + Change settings 更改設定 - + &Display Language 顯示語言 - + &Reset Layouts 重置佈局 - + Reset layouts 重置佈局 - + &Show Status Bar 顯示狀態列 - + Show/Hide the status bar 顯示/隱藏狀態列 - + &Citing 引用 - + Show information about citing 顯示引用資訊 - + &Acknowledgments 致謝 - -Show acknowldgments -顯示致謝 - - - + &Need Help? 需要幫助? - + Show help information 顯示幫助資訊 - + &Donating 贊助 - + Show information about donating 顯示贊助相關資訊 - + Check &for Updates 檢查更新 - + Check for updates of Wordless 檢查 Wordless 更新 - + C&hangelog 更新日誌 - + Show Changelog 顯示更新日誌 - + About &Wordless 關於 Wordless - + Show information about Wordless 顯示 Wordless 的相關資訊 - + Reset Layouts 重置佈局 - + <div>Do you want to reset all layouts to their default settings?</div> @@ -1253,289 +1203,306 @@ - + Profiler 分析工具 - + Concordancer 索引工具 - + Parallel Concordancer 平行索引工具 - + Wordlist Generator 詞表生成器 - + N-gram Generator n 元組生成器 - + Collocation Extractor 搭配抽取器 - + Colligation Extractor 類聯接抽取器 - + Keyword Extractor 關鍵詞抽取器 - + Observed Files 觀察檔案 - + Reference Files 參照檔案 - + Dependency Parser 依存分析器 + + +Open file(s) +開啟檔案 + + + +Show acknowledgments +顯示致謝 + Wl_Settings - + Settings 設定 - + General 全域性 - + Import 匯入 - + Export 匯出 - + Files 檔案 - + Tags 標籤 - + Sentence Tokenization 分句 - + Word Tokenization 分詞 - + Syllable Tokenization 分音節 - + Tagsets 標籤集 - + Lemmatization 詞形還原 - + Stop Word Lists 停用詞表 - + Measures 統計方法 - + Dispersion 分佈 - + Adjusted Frequency 調整頻數 - + Statistical Significance 統計顯著性 - + Effect Size 效應量 - + Figures 圖表 - + Reset all settings 重置所有設定 - + Save 儲存 - + Apply 應用 - + Cancel 取消 - + Reset All Settings 重置所有設定 - - - <div>Do you want to reset all settings to their defaults?</div> - <div><b>Warning: This will affect settings on all pages!</b></div> - - - <div>你想要將所有設定重置為預設值嗎?</div> - <div><b>警告:本操作將影響所有頁面上的設定選項!</b></div> - - - - + Part-of-speech Tagging 詞性標註 - + Bayes Factor 貝葉斯因子 - + Tables 表格 - + Dependency Parsing 依存分析 - + Readability 可讀性 - + Concordancer 索引工具 - + Parallel Concordancer 平行索引工具 - + Line Charts 折線圖 - + Word Clouds 詞雲圖 - + Network Graphs 網路圖 - + Sentiment Analysis 情感分析 - -Lexical Diversity -詞彙多樣性 + +Lexical Density/Diversity +詞彙密度/多樣性 + + + +Dependency Parser +依存分析器 + + + + + <div>Do you want to reset all settings to their defaults?</div> + <br> + <div><b>Warning: This will affect settings on all pages!</b></div> + + + <div>你想要將所有設定重置為預設值嗎?</div> + <br> + <div><b>警告:本操作將影響所有頁面上的設定選項!</b></div> + Wl_Settings_Dependency_Parsing - + Dependency Parser Settings 依存分析器設定 - + Language 語種 - + Dependency Parsers 依存分析器 - + Preview 預覽 - + Select language: 選擇語種: - + Preview settings 預覽設定 - + Show preview 顯示預覽 - + Processing... 處理中…… @@ -1543,12 +1510,12 @@ Wl_Settings_Figs_Line_Charts - + General Settings 全域性設定 - + Font: 字型: @@ -1556,132 +1523,132 @@ Wl_Settings_Figs_Network_Graphs - + Node Settings 節點設定 - + Node shape: 節點形狀: - + Node size: 節點大小: - + Node color: 節點顏色: - + Node opacity: 節點不透明度: - + Node Label Settings 節點標籤設定 - + Label font: 標籤字型: - + Label font size: 標籤字型大小: - + Label font weight: 標籤字型粗細: - + Label font color: 標籤字型顏色: - + Label opacity: 標籤不透明度: - + Edge Settings 邊設定 - + Connection style: 連線樣式: - + Edge width: 邊的寬度: - + Minimum 最小 - + Maximum 最大 - + Edge style: 邊的樣式: - + Edge color: 邊的顏色: - + Edge opacity: 邊的不透明度: - + Arrow style: 箭頭樣式: - + Arrow size: 箭頭大小: - + Edge Label Settings 邊的標籤設定 - + Label position: 標籤位置: - + Rotate labels to lie parallel to edges 旋轉標籤使其與邊平行 - + Advanced Settings 高階設定 - + Layout: 佈局: @@ -1689,112 +1656,112 @@ Wl_Settings_Figs_Word_Clouds - + Font Settings 字型設定 - + Font: 字型: - + Select Font 選擇字型 - + Font size: 字型大小: - + Minimum 最小 - + Maximum 最大 - + Relative scaling: 相對縮放: - + Font color: 字型顏色: - + Custom 自定義 - + Monochrome 單色 - + Colormap 色譜 - + Auto 自動 - + Background Settings 背景設定 - + Background color: 背景顏色: - + Mask Settings 蒙版設定 - + Mask path: 蒙版路徑: - + Select Mask 選擇蒙版 - + Contour width: 輪廓寬度: - + Contour color: 輪廓顏色: - + Advanced Settings 高階設定 - + Prefer horizontal: 水平優先度: - + Allow repeated words 允許重複單詞 @@ -1802,80 +1769,85 @@ Wl_Settings_Files - + Default Settings 預設設定 - + Encoding: 編碼: - + Language: 語種: - + Tokenized: 已分詞: - + Tagged: 已標註: - + Auto-detection Settings 自動檢測設定 - + Number of lines to scan in each file: 檔案掃描行數: - + Read files in chunks of 讀取檔案時每 - + lines 行為一段 - + Miscellaneous Settings 其他設定 + + +Display warning when opening non-text files +開啟非文字檔案時顯示警告 + Wl_Settings_Files_Tags - + Header Tag Settings 頭標籤設定 - + Note: All contents surrounded by header tags will be discarded during text processing! 注意:所有被頭標籤括起來的文字在文字處理時都將被忽略! - + Body Tag Settings 正文標籤設定 - + * Use asterisk character (*) to indicate any number of characters * 使用星號(*)來表示任意數量的字元 - + XML Tag Settings XML 標籤設定 @@ -1883,72 +1855,72 @@ Wl_Settings_General - + Proxy Settings 代理設定 - + Address: 地址: - + Port: 埠: - + Username: 使用者名稱: - + Password: 密碼: - + Update Settings 更新設定 - + Check for updates on startup 啟動時檢查更新 - + Always confirm on exit 退出時總是提示確認 - + Miscellaneous Settings 其他設定 - + User Interface Settings 使用者介面設定 - + Interface scaling: 介面縮放: - + Font family: 字型集: - + Font size: 字型大小: - + Use proxy 使用代理 @@ -1956,42 +1928,42 @@ Wl_Settings_General_Exp - + Tables 表格 - + Browse... 瀏覽... - + Search Terms 檢索項 - + Stop Words 停用詞 - + Select Folder 選擇資料夾 - + Default path: 預設路徑: - + Default type: 預設型別: - + Default encoding: 預設編碼: @@ -1999,47 +1971,47 @@ Wl_Settings_General_Imp - + Files 檔案 - + Browse... 瀏覽... - + Search Terms 檢索項 - + Auto-detect encodings 自動檢測編碼 - + Stop Words 停用詞 - + Temporary Files 臨時檔案 - + Select Folder 選擇資料夾 - + Default path: 預設路徑: - + Default encoding: 預設編碼: @@ -2047,37 +2019,37 @@ Wl_Settings_Lemmatization - + Lemmatizer Settings 詞形還原器設定 - + Language 語種 - + Lemmatizers 詞形還原器 - + Preview 預覽 - + Select language: 選擇語種: - + Show preview 顯示預覽 - + Processing... 處理中…… @@ -2093,17 +2065,17 @@ Wl_Settings_Measures_Bayes_Factor - + Log-likelihood Ratio Test 對數似然比檢驗 - + Apply Yates's correction for continuity 應用耶茨連續性校正 - + Student's t-test (2-sample) 學生 t 檢驗(雙樣本) @@ -2129,23 +2101,18 @@ Wl_Settings_Measures_Effect_Size - + Kilgarriff's Ratio - + Smoothing parameter: 平滑係數: -Wl_Settings_Measures_Lexical_Diversity - - -HD-D - - +Wl_Settings_Measures_Lexical_Density_Diversity Sample size: @@ -2225,16 +2192,6 @@ - -Policy one - - - - -Policy two - - - Automated Readability Index @@ -2359,6 +2316,16 @@ Tränkle & Bailer's Readability Formula + + +Policy One +策略一 + + + +Policy Two +策略二 + Wl_Settings_Measures_Statistical_Significance @@ -2404,59 +2371,141 @@ -Welch's t-test -Welch t 檢驗 - - - z-score z 值 - + z-score (Berry-Rogghe) z 值(Berry-Rogghe) +Wl_Settings_Node + + +Empty Path +空路徑 + + + + + <div>The path should not be left empty!</div> + + + <div>路徑不可為空!</div> + + + + +Path not Found +未找到路徑 + + + + + <div>The specified path "{}" could not be found!</div> + <div>Please check your settings and try again.</div> + + + <div>未找到指定的路徑“{}”!</div> + <div>請檢查你的設定後重試。</div> + + + + +Invalid File Path +無效檔案路徑 + + + + + <div>The specified path "{}" should be a file, not a directory!</div> + <div>Please check your settings and try again.</div> + + + <div>指定的路徑“{}”應是一個檔案,而不是目錄!</div> + <div>請檢查你的設定後重試。</div> + + + + +Invalid Directory Path +無效資料夾路徑 + + + + + <div>The specified path "{}" should be a directory, not a file!</div> + <div>Please check your settings and try again.</div> + + + <div>指定的路徑“{}”應是一個目錄,而不是檔案!</div> + <div>請檢查你的設定後重試。</div> + + + + +Path Not Exist +路徑不存在 + + + + + {} + <body> + <div>The specified path "{}" does not exist.</div> + <div>Do you want to create the directory?</div> + </body> + + + {} + <body> + <div>指定的路徑“{}”不存在。</div> + <div>你想要新建該資料夾嗎?</div> + </body> + + + + Wl_Settings_Pos_Tagging - + Language 語種 - + Preview 預覽 - + Select language: 選擇語種: - + Show preview 顯示預覽 - + Processing... 處理中…… - + Part-of-speech Tagger Settings 詞性標註器設定 - + Part-of-speech Taggers 詞性標註器 - + Convert all part-of-speech tags to universal part-of-speech tags 將所有詞性標籤轉換為通用詞性標籤 @@ -2464,137 +2513,156 @@ Wl_Settings_Pos_Tagging_Tagsets - + Preview Settings: 預覽設定: - + Language: 語種: - + Mapping Settings 對映設定 - + Reset 重置 - + Part-of-speech Tag 詞性標籤 - + Universal Part-of-speech Tag 通用詞性標籤 - + Description 描述 - + Examples 示例 - + Fetching data... 獲取資料中…… - + Reset Mappings 重置對映 - + +Reset All Mappings +重置所有對映 + + + +* This part-of-speech tagger does not support custom mapping. +* 該詞性標註器不支援自定義對映。 + + + +Reset all +重置所有 + + + +Number of part-of-speech tags: +詞性標籤數: + + + +Part-of-speech tagger: +詞性標註器: + + + +Content/Function Words +實/虛詞 + + + <div>Do you want to reset all mappings to their default settings?</div> + <br> <div><b>Note: This will only affect the mapping settings in the currently shown table.</b></div> <div>你想要將所有對映都重置為預設設定嗎?</div> - <div><b>注:這隻會影響當前所顯示錶格中的對映設定。</b></div> + <br> + <div><b>注意:這隻會影響當前所顯示錶格中的對映設定。</b></div> - -Reset All Mappings -重置所有對映 - - - + <div>Do you want to reset all mappings to their default settings?</div> + <br> <div><b>Warning: This will affect the mapping settings in all tables!</b></div> <div>你想要將所有對映重置為預設值嗎?</div> + <br> <div><b>警告:本操作將影響所有表格中的對映設定!</b></div> - -* This part-of-speech tagger does not support custom mapping. -* 該詞性標註器不支援自定義對映。 - - - -Reset all -重置所有 - - - -Number of part-of-speech tags: -詞性標籤數: + +Content words +實詞 - -Part-of-speech tagger: -詞性標註器: + +Function words +虛詞 Wl_Settings_Sentence_Tokenization - + Sentence Tokenizer Settings 分句器設定 - + Language 語種 - + Sentence Tokenizers 分句器 - + Preview 預覽 - + Select language: 選擇語種: - + Show preview 顯示預覽 - + Processing... 處理中…… @@ -2602,42 +2670,42 @@ Wl_Settings_Sentiment_Analysis - + Sentiment Analyzer Settings 情感分析器設定 - + Language 語種 - + Sentiment Analyzer 情感分析器 - + Preview 預覽 - + Select language: 選擇語種: - + Show preview 顯示預覽 - + Sentiment score: 情感分數: - + Processing... 處理中…… @@ -2645,70 +2713,75 @@ Wl_Settings_Stop_Word_Lists - + Language 語種 - + Stop Word List 停用詞表 - + Preview 預覽 - + Select language: 選擇語種: - + Stop Word List Settings 停用詞表設定 - + Number of stop words: 停用詞數: + + +Case-sensitive +大小寫敏感 + Wl_Settings_Syl_Tokenization - + Syllable Tokenizer Settings 分音節器設定 - + Language 語種 - + Syllable Tokenizers 分音節器 - + Preview 預覽 - + Select language: 選擇語種: - + Show preview 顯示預覽 - + Processing... 處理中…… @@ -2790,52 +2863,70 @@ -Wl_Settings_Tables_Parallel_Concordancer +Wl_Settings_Tables_Dependency_Parser - -Color Settings -顏色設定 + +Highlight Color Settings +高亮顏色設定 + + + +Head color: +核心詞顏色: + + + +Dependent color: +依存詞顏色: + + +Wl_Settings_Tables_Parallel_Concordancer Search term color: 檢索項顏色: + + +Highlight Color Settings +高亮顏色設定 + Wl_Settings_Word_Tokenization - + Word Tokenizer Settings 分詞器設定 - + Language 語種 - + Word Tokenizers 分詞器 - + Preview 預覽 - + Select language: 選擇語種: - + Show preview 顯示預覽 - + Processing... 處理中…… @@ -2843,12 +2934,12 @@ Wl_Spin_Box_Window - + L - + R @@ -2856,155 +2947,145 @@ Wl_Table_Colligation_Extractor - + Rank 序號 - + Node 節點詞 - + Collocate 搭配詞 - + Number of Files Found 查詢到的 檔案數 - + Number of Files Found % 查詢到的 檔案數% - + [{}] L{} [{}] -左{} +左 {} - + [{}] R{} [{}] -右{} +右 {} - + ^[LR][0-9]+$ -^[左右][0-9]+$ +^[左右] [0-9]+$ - + L - + [{}] L{} % [{}] -左{} % +左 {} % - + [{}] R{} % [{}] -右{} % +右 {} % - + [{}] Frequency [{}] 頻數 - + [{}] Frequency % [{}] 頻數% - + [{}] p-value [{}] p 值 - + [{}] Bayes Factor [{}] 貝葉斯因子 - + Frequency 頻數 - + Frequency % 頻數% - + p-value p 值 - + Bayes Factor 貝葉斯因子 - -Network Graph -網路圖 - - - + Frequency 頻數 - + p-value p 值 - + Total 合計 - -Network graph -網路圖 - - - + Bayes factor 貝葉斯因子 @@ -3012,150 +3093,145 @@ Bayes Factor Wl_Table_Collocation_Extractor - + Rank 序號 - + Node 節點詞 - + Collocate 搭配詞 - + Number of Files Found 查詢到的 檔案數 - + Number of Files Found % 查詢到的 檔案數% - + [{}] L{} [{}] -左{} +左 {} - + [{}] L{} % [{}] -左{} % +左 {} % - + [{}] R{} [{}] -右{} +右 {} - + [{}] R{} % [{}] -右{} % +右 {} % - + [{}] Frequency [{}] 頻數 - + [{}] Frequency % [{}] 頻數% - + [{}] p-value [{}] p 值 - + [{}] Bayes Factor [{}] 貝葉斯因子 - + Frequency 頻數 - + Frequency % 頻數% - + p-value p 值 - + Bayes Factor 貝葉斯因子 - + ^[LR][0-9]+$ -^[左右][0-9]+$ +^[左右] [0-9]+$ - + L - + Frequency 頻數 - + p-value p 值 - + Total 合計 - -Network graph -網路圖 - - - + Bayes factor 貝葉斯因子 @@ -3163,82 +3239,82 @@ Bayes Factor Wl_Table_Concordancer - + Left - + Node 節點詞 - + Right - + Sentiment 情感 - + Token No. 形符序號 - + Token No. % 形符序號% - + Sentence No. 句子序號 - + Sentence No. % 句子序號% - + Paragraph No. 段落序號 - + Paragraph No. % 段落序號% - + File 檔案 - + Sentence Segment No. 句段序號 - + Sentence Segment No. % 句段序號% - + Search Term 檢索項 - + Search term 檢索項 - + Dispersion Plot 分佈圖 @@ -3246,22 +3322,22 @@ Bayes Factor Wl_Table_Concordancer_Parallel - + Parallel Unit No. 平行單位序號 - + Parallel Unit No. % 平行單位序號% - + Missing Search Terms 缺少檢索項 - + <div>You have not specified any search terms. Do you want to search for additions and deletions?</div> @@ -3271,82 +3347,49 @@ Bayes Factor -Wl_Table_Data - - -Rank -序號 - - - -Generate table -生成表格 - - - -Generate figure -生成圖表 - - - -Export selected cells... -匯出選中單元格... - - - -Export all cells... -匯出所有單元格... - - - -Clear table -清空表格 - - - Wl_Table_Dependency_Parser - + Head 核心詞 - + Dependent 依存詞 - + Dependency Relation 依存關係 - + Dependency Length 依存距離 - + Dependency Length (Absolute) 依存距離(絕對) - + Sentence 句子 - + Sentence No. 句子序號 - + Sentence No. % 句子序號% - + File 檔案 @@ -3354,72 +3397,72 @@ Bayes Factor Wl_Table_Files - + Checking files... 檢查檔案中…… - + Path 路徑 - + Encoding 編碼 - + Language 語種 - + Tokenized 已分詞 - + Tagged 已標註 - + file 檔案 - + files 檔案 - + {} {} has been successfully opened. 已成功開啟 {} 個{}。 - + Observed Files 觀察檔案 - + Reference Files 參照檔案 - + Name 名稱 - + Empty File Name 空檔名 - + <div>The file name should not be left empty!</div> @@ -3428,12 +3471,12 @@ Bayes Factor - + Duplicate File Names 重複檔名 - + <div>There is already a file with the same name in the file area.</div> <div>Please specify a different file name.</div> @@ -3447,116 +3490,116 @@ Bayes Factor Wl_Table_Keyword_Extractor - + Rank 序號 - + Keyword 關鍵詞 - + Number of Files Found 查詢到的 檔案數 - + Number of Files Found % 查詢到的 檔案數% - + [Reference Files] Frequency [參照檔案] 頻數 - + [Reference Files] Frequency % [參照檔案] 頻數% - + [{}] Frequency [{}] 頻數 - + [{}] Frequency % [{}] 頻數% - + [{}] p-value [{}] p 值 - + [{}] Bayes Factor [{}] 貝葉斯因子 - + Frequency 頻數 - + Frequency % 頻數% - + p-value p 值 - + Bayes Factor 貝葉斯因子 - + Frequency 頻數 - + p-value p 值 - + Missing Observed Files 缺少觀察檔案 - + <div>You have not specified any observed files yet.</div> @@ -3565,12 +3608,12 @@ Bayes Factor - + Missing Reference Files 缺少參照檔案 - + <div>You have not specified any reference files yet.</div> @@ -3579,22 +3622,22 @@ Bayes Factor - + Missing observed files! 缺少觀察檔案! - + Missing reference files! 缺少參照檔案! - + Total 合計 - + Bayes factor 貝葉斯因子 @@ -3602,64 +3645,64 @@ Bayes Factor Wl_Table_Ngram_Generator - + Rank 序號 - + N-gram n 元組 - + Number of Files Found 查詢到的 檔案數 - + Number of Files Found % 查詢到的 檔案數% - + Total 合計 - + [{}] Frequency [{}] 頻數 - + [{}] Frequency % [{}] 頻數% - + Frequency 頻數 - + Frequency % 頻數% - + Frequency 頻數 @@ -3667,7 +3710,7 @@ Frequency % Wl_Table_Profiler - + Total 合計 @@ -3675,60 +3718,130 @@ Frequency % Wl_Table_Profiler_Counts - + Total 合計 - + No language support 無語種支援 + + +Count of Paragraphs +段落數 + + + +Count of Paragraphs % +段落數% + + + +Count of Sentences +句子數 + + + +Count of Sentences % +句子數% + + + +Count of Sentence Segments +句段數 + + + +Count of Sentence Segments % +句段數% + + + +Count of Tokens +形符數 + + + +Count of Tokens % +形符數% + + + +Count of Types +類符數 + + + +Count of Types % +類符數% + + + +Count of Syllables +音節數 + + + +Count of Syllables % +音節數% + + + +Count of Characters +字元數 + + + +Count of Characters % +字元數% + Wl_Table_Profiler_Len_Breakdown - + Total 合計 - + Count of {}-token-long Sentences {}個形符長的句子數 - + Count of {}-token-long Sentences % {}個形符長的句子數% - + Count of {}-token-long Sentence Segment {}個形符長的句段數 - + Count of {}-token-long Sentence Segment % {}個形符長的句段數% - + Count of {}-character-long Tokens {}個字元長的形符數 - + Count of {}-character-long Tokens % {}個字元長的形符數% - + Count of {}-syllables-long Tokens {}個音節長的形符數 - + Count of {}-syllables-long Tokens % {}個音節長的形符數% @@ -3736,8823 +3849,9323 @@ Frequency % Wl_Table_Profiler_Lens - + Total 合計 - + No language support 無語種支援 - - -Wl_Table_Profiler_Lexical_Diversity - -Total -合計 + +Paragraph Length in Sentences (Mean) +段落長(單位:句子)(均值) - - -Wl_Table_Profiler_Readability - -Total -合計 + +Paragraph Length in Sentences (Standard Deviation) +段落長(單位:句子)(標準差) - -No language support -無語種支援 + +Paragraph Length in Sentences (Variance) +段落長(單位:句子)(方差) - -Text is too short -檔案過短 + +Paragraph Length in Sentences (Minimum) +段落長(單位:句子)(最小值) - - -Wl_Table_Results_Sort_Conordancer - -Node -節點詞 + +Paragraph Length in Sentences (25th Percentile) +段落長(單位:句子)(25分位數) - -Sentiment -情感 + +Paragraph Length in Sentences (Median) +段落長(單位:句子)(中位數) - -File -檔案 + +Paragraph Length in Sentences (75th Percentile) +段落長(單位:句子)(75分位數) - -Ascending -升序 + +Paragraph Length in Sentences (Maximum) +段落長(單位:句子)(最大值) - -Descending -降序 + +Paragraph Length in Sentences (Range) +段落長(單位:句子)(極差) - -Column Sorted More Than Once -同列排序超過一次 + +Paragraph Length in Sentences (Interquartile Range) +段落長(單位:句子)(四分位差) - -Token -形符 + +Paragraph Length in Sentences (Modes) +段落長(單位:句子)(眾數) - -Column - + +Paragraph Length in Sentence Segments (Mean) +段落長(單位:句段)(均值) - -Order -順序 + +Paragraph Length in Sentence Segments (Standard Deviation) +段落長(單位:句段)(標準差) - - - <body> - <div>Please refrain from sorting the same column more than once!</div> - </body> - - - <body> - <div>請勿對同一列排序一次以上!</div> - </body> - + +Paragraph Length in Sentence Segments (Variance) +段落長(單位:句段)(方差) - -R - + +Paragraph Length in Sentence Segments (Minimum) +段落長(單位:句段)(最小值) - -L - + +Paragraph Length in Sentence Segments (25th Percentile) +段落長(單位:句段)(25分位數) - -L1 -左1 + +Paragraph Length in Sentence Segments (Median) +段落長(單位:句段)(中位數) - -^L[0-9]+$ -^左[0-9]+$ + +Paragraph Length in Sentence Segments (75th Percentile) +段落長(單位:句段)(75分位數) - -R1 -右1 + +Paragraph Length in Sentence Segments (Maximum) +段落長(單位:句段)(最大值) - -^R[0-9]+$ -^右[0-9]+$ + +Paragraph Length in Sentence Segments (Range) +段落長(單位:句段)(極差) - -Token no. -形符序號 + +Paragraph Length in Sentence Segments (Interquartile Range) +段落長(單位:句段)(四分位差) - - -Wl_Table_Tags - -TAG -標籤 + +Paragraph Length in Sentence Segments (Modes) +段落長(單位:句段)(眾數) - - -Wl_Table_Tags_Body - -Others -其他 + +Paragraph Length in Tokens (Mean) +段落長(單位:形符)(均值) - -Non-embedded -非嵌入式 + +Paragraph Length in Tokens (Standard Deviation) +段落長(單位:形符)(標準差) - -<TAG> -<標籤> + +Paragraph Length in Tokens (Variance) +段落長(單位:形符)(方差) - -Part of speech -詞性 + +Paragraph Length in Tokens (Minimum) +段落長(單位:形符)(最小值) - - -Wl_Table_Tags_Header - -Header - + +Paragraph Length in Tokens (25th Percentile) +段落長(單位:形符)(25分位數) - -Non-embedded -非嵌入式 + +Paragraph Length in Tokens (Median) +段落長(單位:形符)(中位數) - -<TAG> -<標籤> + +Paragraph Length in Tokens (75th Percentile) +段落長(單位:形符)(75分位數) - - -Wl_Table_Tags_Xml - -Non-embedded -非嵌入式 + +Paragraph Length in Tokens (Maximum) +段落長(單位:形符)(最大值) - -Paragraph -段落 + +Paragraph Length in Tokens (Range) +段落長(單位:形符)(極差) - -Sentence -句子 + +Paragraph Length in Tokens (Interquartile Range) +段落長(單位:形符)(四分位差) - -Word -單詞 + +Paragraph Length in Tokens (Modes) +段落長(單位:形符)(眾數) - -Invalid XML Tag -無效 XML 標籤 + +Sentence Length in Tokens (Mean) +句長(單位:形符)(均值) - - - <div>The specified XML tag is invalid, please check and try again!</div> - - - <div>指定的 XML 標籤無效,請檢查後重試!</div> - + +Sentence Length in Tokens (Standard Deviation) +句長(單位:形符)(標準差) - -<TAG> -<標籤> + +Sentence Length in Tokens (Variance) +句長(單位:形符)(方差) - - -Wl_Table_Wordlist_Generator - -Rank -序號 + +Sentence Length in Tokens (Minimum) +句長(單位:形符)(最小值) - -Token -形符 + +Sentence Length in Tokens (25th Percentile) +句長(單位:形符)(25分位數) - -Number of -Files Found -查詢到的 -檔案數 + +Sentence Length in Tokens (Median) +句長(單位:形符)(中位數) - -Number of -Files Found % -查詢到的 -檔案數% + +Sentence Length in Tokens (75th Percentile) +句長(單位:形符)(75分位數) - -Total -合計 + +Sentence Length in Tokens (Maximum) +句長(單位:形符)(最大值) - -[{}] -Frequency -[{}] -頻數 + +Sentence Length in Tokens (Range) +句長(單位:形符)(極差) - -[{}] -Frequency % -[{}] -頻數% + +Sentence Length in Tokens (Interquartile Range) +句長(單位:形符)(四分位差) - - -Frequency - -頻數 + +Sentence Length in Tokens (Modes) +句長(單位:形符)(眾數) - - -Frequency % - -頻數% + +Sentence Segment Length in Tokens (Mean) +句段長(單位:形符)(均值) - -Frequency -頻數 + +Sentence Segment Length in Tokens (Standard Deviation) +句段長(單位:形符)(標準差) - -Syllabification -音節劃分 + +Sentence Segment Length in Tokens (Variance) +句段長(單位:形符)(方差) - - -Wl_Worker_Add_Files - -Updating table... -更新表格中…… + +Sentence Segment Length in Tokens (Minimum) +句段長(單位:形符)(最小值) - -Adding files... ({}/{}) -新增檔案中……({}/{}) + +Sentence Segment Length in Tokens (25th Percentile) +句段長(單位:形符)(25分位數) - - -Wl_Worker_Colligation_Extractor_Fig - -Rendering figure... -渲染圖表中…… + +Sentence Segment Length in Tokens (Median) +句段長(單位:形符)(中位數) - - -Wl_Worker_Colligation_Extractor_Table - -Rendering table... -渲染表格中…… + +Sentence Segment Length in Tokens (75th Percentile) +句段長(單位:形符)(75分位數) - - -Wl_Worker_Collocation_Extractor_Fig - -Rendering figure... -渲染圖表中…… + +Sentence Segment Length in Tokens (Maximum) +句段長(單位:形符)(最大值) - - -Wl_Worker_Collocation_Extractor_Table - -Rendering table... -渲染表格中…… + +Sentence Segment Length in Tokens (Range) +句段長(單位:形符)(極差) - - -Wl_Worker_Concordancer_Fig - -File -檔案 + +Sentence Segment Length in Tokens (Interquartile Range) +句段長(單位:形符)(四分位數) - -Total -合計 + +Sentence Segment Length in Tokens (Modes) +句段長(單位:形符)(眾數) - -Rendering figure... -渲染圖表中…… + +Token Length in Syllables (Mean) +形符長(單位:音節)(均值) - -Search term -檢索項 + +Token Length in Syllables (Standard Deviation) +形符長(單位:音節)(標準差) - - -Wl_Worker_Concordancer_Parallel_Table - -Rendering table... -渲染表格中…… + +Token Length in Syllables (Variance) +形符長(單位:音節)(方差) - - -Wl_Worker_Concordancer_Table - -Paragraph -段落 + +Token Length in Syllables (Minimum) +形符長(單位:音節)(最小值) - -Sentence -句子 + +Token Length in Syllables (25th Percentile) +形符長(單位:音節)(25分位數) - -Token -形符 + +Token Length in Syllables (Median) +形符長(單位:音節)(中位數) - -Character -字元 + +Token Length in Syllables (75th Percentile) +形符長(單位:音節)(75分位數) - -Rendering table... -渲染表格中…… + +Token Length in Syllables (Maximum) +形符長(單位:音節)(最大值) - -Sentence segment -句段 + +Token Length in Syllables (Range) +形符長(單位:音節)(極差) - -No language support -無語種支援 + +Token Length in Syllables (Interquartile Range) +形符長(單位:音節)(四分位差) - - -Wl_Worker_Dependency_Parser - -Rendering table... -渲染表格中…… + +Token Length in Syllables (Modes) +形符長(單位:音節)(眾數) - - -Wl_Worker_Download_Model_Spacy - -Fetching model information... -獲取模型資訊中…… + +Token Length in Characters (Mean) +形符長(單位:字元)(均值) - -Downloading model ({:.2f} MB)... -下載模型中({:.2f} MB)…… + +Token Length in Characters (Standard Deviation) +形符長(單位:字元)(標準差) - -Downloading model... -下載模型中…… + +Token Length in Characters (Variance) +形符長(單位:字元)(方差) - -Download completed successfully. -模型下載完畢。 + +Token Length in Characters (Minimum) +形符長(單位:字元)(最小值) - - -Wl_Worker_Download_Model_Stanza - -Downloading model... -下載模型中…… + +Token Length in Characters (25th Percentile) +形符長(單位:字元)(25分位值) - -Download completed successfully. -模型下載完畢。 + +Token Length in Characters (Median) +形符長(單位:字元)(中位數) - - -Wl_Worker_Exp_Table - -Saving file... -儲存檔案中…… + +Token Length in Characters (75th Percentile) +形符長(單位:字元)(75分位數) - -Exporting table... ({} / {}) -匯出表格中……({} / {}) + +Token Length in Characters (Maximum) +形符長(單位:字元)(最大值) - - -Wl_Worker_Fetch_Data_Tagsets - -Updating table... -更新表格中…… + +Token Length in Characters (Range) +形符長(單位:字元)(極差) - - -Wl_Worker_Keyword_Extractor_Fig - -Rendering figure... -渲染圖表中…… + +Token Length in Characters (Interquartile Range) +形符長(單位:字元)(四分位差) - - -Wl_Worker_Keyword_Extractor_Table - -Rendering table... -渲染表格中…… + +Token Length in Characters (Modes) +形符長(單位:字元)(眾數) - - -Wl_Worker_Ngram_Generator_Fig - -Rendering figure... -渲染圖表中…… + +Type Length in Syllables (Mean) +類符長(單位:音節)(均值) - - -Wl_Worker_Ngram_Generator_Table - -Rendering table... -渲染表格中…… + +Type Length in Syllables (Standard Deviation) +類符長(單位:音節)(標準差) - - -Wl_Worker_Open_Files - -Updating table... -更新表格中…… + +Type Length in Syllables (Variance) +類符長(單位:音節)(方差) - -Opening files... ({}/{}) -開啟檔案中……({}/{}) + +Type Length in Syllables (Minimum) +類符長(單位:音節)(最小值) - - -Wl_Worker_Profiler_Table - -Rendering table... -渲染表格中…… + +Type Length in Syllables (25th Percentile) +類符長(單位:音節)(25分位數) - - -Wl_Worker_Results_Filter_Collocation_Extractor - -Collocate -搭配詞 + +Type Length in Syllables (Median) +類符長(單位:音節)(中位數) - -Total -合計 + +Type Length in Syllables (75th Percentile) +類符長(單位:音節)(75分位數) - -Number of -Files Found -查詢到的 -檔案數 + +Type Length in Syllables (Maximum) +類符長(單位:音節)(最大值) - -Updating table... -更新表格中…… + +Type Length in Syllables (Range) +類符長(單位:音節)(極差) - -[{}] -Frequency -[{}] -頻數 + +Type Length in Syllables (Interquartile Range) +類符長(單位:音節)(四分位差) - -[{}] -p-value -[{}] -p 值 + +Type Length in Syllables (Modes) +類符長(單位:音節)(眾數) - -[{}] -Bayes Factor -[{}] -貝葉斯因子 + +Type Length in Characters (Mean) +類符長(單位:字元)(均值) - -Keyword -關鍵詞 + +Type Length in Characters (Standard Deviation) +類符長(單位:字元)(標準差) - - -Wl_Worker_Results_Filter_Wordlist_Generator - -Token -形符 + +Type Length in Characters (Variance) +類符長(單位:字元)(方差) - -N-gram -n 元組 + +Type Length in Characters (Minimum) +類符長(單位:字元)(最小值) - -Number of -Files Found -查詢到的 -檔案數 + +Type Length in Characters (25th Percentile) +類符長(單位:字元)(25分位數) - -Updating table... -更新表格中…… + +Type Length in Characters (Median) +類符長(單位:字元)(中位數) - -[{}] -Frequency -[{}] -頻數 + +Type Length in Characters (75th Percentile) +類符長(單位:字元)(75分位數) - -Syllabification -音節劃分 + +Type Length in Characters (Maximum) +類符長(單位:字元)(最大值) - - -Wl_Worker_Results_Search - -Highlighting found items... -高亮查得項中…… + +Type Length in Characters (Range) +類符長(單位:字元)(極差) - - -Wl_Worker_Results_Sort_Concordancer - -Updating table... -更新表格中…… + +Type Length in Characters (Interquartile Range) +類符長(單位:字元)(四分位差) - - -Wl_Worker_Wordlist_Generator_Fig - -Rendering figure... -渲染圖表中…… + +Type Length in Characters (Modes) +類符長(單位:字元)(眾數) - - -Wl_Worker_Wordlist_Generator_Table - -Rendering table... -渲染表格中…… + +Syllable Length in Characters (Mean) +音節長(單位:字元)(均值) - - -Wrapper_Colligation_Extractor - -Token Settings -形符設定 + +Syllable Length in Characters (Standard Deviation) +音節長(單位:字元)(標準差) - -Search Settings -搜尋設定 + +Syllable Length in Characters (Variance) +音節長(單位:字元)(方差) - -Generation Settings -生成設定 + +Syllable Length in Characters (Minimum) +音節長(單位:字元)(最小值) - -None - + +Syllable Length in Characters (25th Percentile) +音節長(單位:字元)(25分位數) - -Table Settings -表格設定 + +Syllable Length in Characters (Median) +音節長(單位:字元)(中位數) - -Figure Settings -圖表設定 + +Syllable Length in Characters (75th Percentile) +音節長(單位:字元)(75分位數) - -Rank: -序號: + +Syllable Length in Characters (Maximum) +音節長(單位:字元)(最大值) - -L - + +Syllable Length in Characters (Range) +音節長(單位:字元)(極差) - -R - + +Syllable Length in Characters (Interquartile Range) +音節長(單位:字元)(四分位差) - -Collocational window: -搭配檢索範圍: + +Syllable Length in Characters (Modes) +音節長(單位:字元)(眾數) + + +Wl_Table_Profiler_Lexical_Density_Diversity - -Limit searching: -限制檢索: + +Total +合計 - -Within sentence segments -句段內 + +No language support +無語種支援 - -Within sentences -句子內 + +Brunét's Index + - -Within paragraphs -段落內 + +Corrected TTR + - - -Wrapper_Collocation_Extractor - -Token Settings -形符設定 + +Fisher's Index of Diversity + - -Search Settings -搜尋設定 + +Herdan's Vₘ + - -Generation Settings -生成設定 + +Honoré's Statistic + - -None - + +Lexical Density +詞彙密度 - -Table Settings -表格設定 + +Mean Segmental TTR + - -Figure Settings -圖表設定 + +Measure of Textual Lexical Diversity + - -Rank: -序號: + +Moving-average TTR + - -L - + +Popescu-Mačutek-Altmann's B₁ + - -R - + +Popescu-Mačutek-Altmann's B₂ + - -Collocational window: -搭配檢索範圍: + +Popescu-Mačutek-Altmann's B₃ + - -Limit searching: -限制檢索: + +Popescu-Mačutek-Altmann's B₄ + - -Within sentence segments -句段內 + +Popescu-Mačutek-Altmann's B₅ + - -Within sentences -句子內 + +Popescu's R₁ + - -Within paragraphs -段落內 + +Popescu's R₂ + - - -Wrapper_Concordancer - -Token Settings -形符設定 + +Popescu's R₃ + - -Search Settings -搜尋設定 + +Popescu's R₄ + - -Generation Settings -生成設定 + +Repeat Rate + - -Paragraph -段落 + +Root TTR + - -Sentence -句子 + +Shannon Entropy +夏農熵 - -Token -形符 + +Simpson's l + - -Character -字元 + +Type-token Ratio +類符形符比 - -Table Settings -表格設定 + +Yule's Characteristic K + - -Figure Settings -圖表設定 + +Yule's Index of Diversity + + + +Wl_Table_Profiler_Readability - -Sort results by: -結果排序依據: + +Total +合計 - -File -檔案 + +No language support +無語種支援 - -Zapping Settings -檢索詞遮蔽設定 + +Text is too short +檔案過短 - -Replace keywords with -將關鍵詞替換為 + +Al-Heeti's Readability Prediction Formula + - -Add line numbers -新增行號 + +Automated Arabic Readability Index + - -Randomize outputs -隨機化輸出 + +Automated Readability Index + - -Width (left): -長度(左): + +Bormuth's Cloze Mean + - -Width (right): -長度(右): + +Bormuth's Grade Placement + - -Width unit: -長度單位: + +Coleman-Liau Index + - -Sentence segment -句段 + +Coleman's Readability Formula + - -Search term -檢索項 + +Dale-Chall Readability Formula + - - -Wrapper_Concordancer_Parallel - -Token Settings -形符設定 + +Danielson-Bryan's Readability Formula + - -Search Settings -搜尋設定 + +Dawood's Readability Formula + - -Table Settings -表格設定 + +Degrees of Reading Power + - - -Wrapper_Dependency_Parser - -Token Settings -形符設定 + +Devereaux Readability Index + - -Search Settings -搜尋設定 + +Dickes-Steiwer Handformel + - -Table Settings -表格設定 + +Easy Listening Formula + - -Figure Settings -圖表設定 + +Flesch-Kincaid Grade Level + - -coarse-grained -粗分 + +Flesch Reading Ease + - -fine-grained -細分 + +Flesch Reading Ease (Farr-Jenkins-Paterson) + - - -Wrapper_Keyword_Extractor - -Token Settings -形符設定 + +FORCAST Grade Level + - -Generation Settings -生成設定 + +Fórmula de Comprensibilidad de Gutiérrez de Polini + - -Table Settings -表格設定 + +Fórmula de Crawford + - -Figure Settings -圖表設定 + +Fucks's Stilcharakteristik + - -Rank: -序號: + +Gulpease Index + - - -Wrapper_Ngram_Generator - -Token Settings -形符設定 - - - -Search Settings -搜尋設定 + +Gunning Fog Index + - -Generation Settings -生成設定 + +Legibilidad μ + - -Allow skipped tokens: -允許跳過形符數: + +Lensear Write + - -Table Settings -表格設定 + +Lix + - -Figure Settings -圖表設定 + +Lorge Readability Index + - -Rank: -序號: + +Luong-Nguyen-Dinh's Readability Formula + - -Search term position: -檢索項位置: + +McAlpine EFLAW Readability Score + - -N-gram size: -n 元組長度: + +neue Wiener Literaturformeln + - - -Wrapper_Profiler - -Token Settings -形符設定 + +neue Wiener Sachtextformel + - -Table Settings -表格設定 + +OSMAN + - -Generate all tables -生成所有表格 + +Rix + - -Clear all tables -清空所有表格 + +SMOG Grade + - -Readability -可讀性 + +Spache Grade Level + - -Counts -計數 + +Strain Index + - -Lengths -長度 + +Tränkle & Bailer's Readability Formula + - -Length Breakdown -細分長度 + +Tuldava's Text Difficulty + - -Lexical Diversity -詞彙多樣性 + +Wheeler & Smith's Readability Formula + -Wrapper_Wordlist_Generator +Wl_Table_Results_Sort_Conordancer - -Token Settings -形符設定 + +Column + - -Generation Settings -生成設定 + +Order +順序 - -Table Settings -表格設定 + +Node +節點詞 - -Figure Settings -圖表設定 + +Sentiment +情感 - -Rank: -序號: + +Token No. +形符序號 - -Syllabification -音節劃分 + +File +檔案 - - -get_re_tags - -Embedded -嵌入式 + +Ascending +升序 - -Non-embedded -非嵌入式 + +Descending +降序 - - -get_re_tags_with_tokens - -Embedded -嵌入式 + +Column Sorted More Than Once +同列排序超過一次 - -Non-embedded -非嵌入式 + + + <body> + <div>Please refrain from sorting the same column more than once!</div> + </body> + + + <body> + <div>請勿對同一列排序一次以上!</div> + </body> + - - -init_settings_global - -Afrikaans -南非語 + +Token +形符 - -Albanian -阿爾巴尼亞語 + +R{} +右 {} - -Amharic -阿姆哈拉語 + +L{} +左 {} - -Arabic -阿拉伯語 + +L1 +左 1 - -Assamese -阿薩姆語 + +^L[0-9]+$ +^左 [0-9]+$ - -Asturian -阿斯圖里亞斯語 + +R1 +右 1 - -Azerbaijani -亞塞拜然語 + +^R[0-9]+$ +^右 [0-9]+$ + + +Wl_Table_Tags - -Basque -巴斯克語 + +TAG +標籤 - -Belarusian -白俄羅斯語 + +Type +型別 - -Bengali -孟加拉語 + +Level +層級 - -Bulgarian -保加利亞語 + +Opening Tag +開始標籤 - -Catalan -加泰羅尼亞語 + +Closing Tag +結束標籤 - -Chinese (Simplified) -漢語(簡體) + +Preview +預覽 - -Chinese (Traditional) -漢語(繁體) + +Embedded +嵌入式 - -Croatian -克羅埃西亞語 + +Non-embedded +非嵌入式 - -Czech -捷克語 + +Reset +重置 - -Danish -丹麥語 + + + <div>Embedded tags must begin with a punctuation mark, e.g. an underscore or a slash!</div> + + + <div>嵌入式標籤必須以一個標點符號,如下劃線或斜槓,開頭!</div> + - -Dutch -荷蘭語 + + + <div>Non-embedded tags must begin and end with a punctuation mark, e.g. brackets!</div> + + + <div>非嵌入式標籤必須以一個標點,如括號,開頭和結尾!</div> + - -English (United Kingdom) -英語(英國) + +Invalid Opening Tag +無效開始標籤 - -English (United States) -英語(美國) + +Duplicate Tags +重複標籤 - -Esperanto -世界語 + + + <div>The tag that you have specified already exists in the table!</div> + + + <div>你指定的標籤已存在於表格中!</div> + - -Estonian -愛沙尼亞語 + +N/A +不適用 - -Finnish -芬蘭語 + +token +形符 + + +Wl_Table_Tags_Body - -French -法語 + +Others +其他 - -Galician -加里西亞語 + +Non-embedded +非嵌入式 - -German (Austria) -德語(奧地利) + +<TAG> +<標籤> - -German (Germany) -德語(德國) + +Part of speech +詞性 + + +Wl_Table_Tags_Header - -German (Switzerland) -德語(瑞士) + +Header + - -Greek (Ancient) -希臘語(古) + +Non-embedded +非嵌入式 - -Greek (Modern) -希臘語(現代) + +<TAG> +<標籤> + + +Wl_Table_Tags_Xml - -Gujarati -古吉拉特語 + +Non-embedded +非嵌入式 - -Hindi -印地語 + +Paragraph +段落 - -Hungarian -匈牙利語 + +Sentence +句子 - -Icelandic -冰島語 + +Word +單詞 - -Indonesian -印度尼西亞語 + +Invalid XML Tag +無效 XML 標籤 - -Irish -愛爾蘭語 + + + <div>The specified XML tag is invalid, please check and try again!</div> + + + <div>指定的 XML 標籤無效,請檢查後重試!</div> + - -Italian -義大利語 + +<TAG> +<標籤> + + +Wl_Table_Wordlist_Generator - -Japanese -日語 + +Rank +序號 - -Kannada -卡納達語 + +Token +形符 - -Kazakh -哈薩克語 + +Number of +Files Found +查詢到的 +檔案數 - -Korean -韓語 + +Number of +Files Found % +查詢到的 +檔案數% - -Kyrgyz -吉爾吉斯語 + +Total +合計 - -Latin -拉丁語 + +[{}] +Frequency +[{}] +頻數 - -Latvian -拉脫維亞語 + +[{}] +Frequency % +[{}] +頻數% - -Ligurian -利古里亞語 + + +Frequency + +頻數 - -Lithuanian -立陶宛語 + + +Frequency % + +頻數% - -Luxembourgish -盧森堡語 + +Frequency +頻數 - -Macedonian -馬其頓語 + +Syllabification +音節劃分 - -Malay -馬來語 + +N/A +不適用 - -Malayalam -馬拉雅拉姆語 + +No language support +無語種支援 + + +Wl_Worker_Add_Files - -Manx -馬恩語 + +Updating table... +更新表格中…… - -Marathi -馬拉地語 + +Adding files... ({}/{}) +新增檔案中……({}/{}) + + +Wl_Worker_Colligation_Extractor - -Meitei -曼尼普爾語 + +None + - -Mongolian -蒙古語 + +Within sentence segments +句段內 - -Nepali -尼泊爾語 + +Within sentences +句子內 - -Norwegian Bokmål -書面挪威語 + +Within paragraphs +段落內 + + +Wl_Worker_Colligation_Extractor_Fig - -Norwegian Nynorsk -新挪威語 + +Rendering figure... +渲染圖表中…… + + +Wl_Worker_Colligation_Extractor_Table - -Oriya -奧里亞語 + +Rendering table... +渲染表格中…… + + +Wl_Worker_Collocation_Extractor - -Persian -波斯語 + +None + - -Polish -波蘭語 + +Within sentence segments +句段內 - -Portuguese (Brazil) -葡萄牙語(巴西) + +Within sentences +句子內 - -Portuguese (Portugal) -葡萄牙語(葡萄牙) + +Within paragraphs +段落內 + + +Wl_Worker_Collocation_Extractor_Fig - -Romanian -羅馬尼亞語 + +Rendering figure... +渲染圖表中…… + + +Wl_Worker_Collocation_Extractor_Table - -Russian -俄語 + +Rendering table... +渲染表格中…… + + +Wl_Worker_Concordancer_Fig - -Sanskrit -梵語 + +File +檔案 - -Scottish Gaelic -蘇格蘭蓋爾語 + +Total +合計 - -Serbian (Cyrillic) -塞爾維亞語(西里爾) + +Rendering figure... +渲染圖表中…… - -Serbian (Latin) -塞爾維亞語(拉丁) + +Search term +檢索項 + + +Wl_Worker_Concordancer_Parallel_Table - -Sinhala -僧伽羅語 + +Rendering table... +渲染表格中…… + + +Wl_Worker_Concordancer_Table - -Slovak -斯洛伐克語 + +Paragraph +段落 - -Slovenian -斯洛維尼亞語 + +Sentence +句子 - -Spanish -西班牙語 + +Token +形符 - -Swahili -斯瓦西里語 + +Character +字元 - -Swedish -瑞典語 + +Rendering table... +渲染表格中…… - -Tagalog -他加祿語 + +Sentence segment +句段 - -Tajik -塔吉克語 + +No language support +無語種支援 + + +Wl_Worker_Dependency_Parser - -Tamil -泰米爾語 + +Rendering table... +渲染表格中…… + + +Wl_Worker_Download_Model_Spacy - -Tatar -韃靼語 + +Fetching model information... +獲取模型資訊中…… - -Telugu -泰盧固語 + +Downloading model ({:.2f} MB)... +下載模型中({:.2f} MB)…… - -Tetun Dili -帝力德頓語 + +Downloading model... +下載模型中…… - -Thai -泰語 + +Download completed successfully. +模型下載完畢。 + + +Wl_Worker_Download_Model_Stanza - -Tibetan -藏語 + +Downloading model... +下載模型中…… - -Tigrinya -提格雷尼亞語 + +Download completed successfully. +模型下載完畢。 + + +Wl_Worker_Exp_Table - -Tswana -茨瓦納語 + +Saving file... +儲存檔案中…… - -Turkish -土耳其語 + +Exporting table... ({} / {}) +匯出表格中……({} / {}) + + +Wl_Worker_Fetch_Data_Tagsets - -Ukrainian -烏克蘭語 + +Updating table... +更新表格中…… + + +Wl_Worker_Keyword_Extractor_Fig - -Urdu -烏爾都語 + +Rendering figure... +渲染圖表中…… + + +Wl_Worker_Keyword_Extractor_Table - -Vietnamese -越南語 + +Rendering table... +渲染表格中…… + + +Wl_Worker_Ngram_Generator_Fig - -Welsh -威爾士語 + +Rendering figure... +渲染圖表中…… + + +Wl_Worker_Ngram_Generator_Table - -Yoruba -約魯巴語 + +Rendering table... +渲染表格中…… + + +Wl_Worker_Open_Files - -Zulu -祖魯語 + +Updating table... +更新表格中…… - -Arabic (CP720) -阿拉伯語(CP720) + +Opening files... ({}/{}) +開啟檔案中……({}/{}) + + +Wl_Worker_Profiler_Table - -Arabic (CP864) -阿拉伯語(CP864) + +Rendering table... +渲染表格中…… + + +Wl_Worker_Results_Filter_Collocation_Extractor - -Arabic (ISO-8859-6) -阿拉伯語(ISO-8859-6) + +Collocate +搭配詞 - -Arabic (Mac OS Arabic) -阿拉伯語(Mac OS Arabic) + +Total +合計 - -Arabic (Windows-1256) -阿拉伯語(Windows-1256) + +Number of +Files Found +查詢到的 +檔案數 - -Chinese (GB18030) -漢語(GB18030) + +Updating table... +更新表格中…… - -Chinese (GBK) -漢語(GBK) + +[{}] +Frequency +[{}] +頻數 - -Chinese (Simplified) (GB2312) -漢語(簡體)(GB2312) + +[{}] +p-value +[{}] +p 值 - -Chinese (Simplified) (HZ) -漢語(簡體)(HZ) + +[{}] +Bayes Factor +[{}] +貝葉斯因子 - -Chinese (Traditional) (Big-5) -漢語(繁體)(Big-5) + +Keyword +關鍵詞 - -Chinese (Traditional) (Big5-HKSCS) -漢語(繁體)(Big5-HKSCS) + +Node +節點詞 + + +Wl_Worker_Results_Filter_Dependency_Parser - -Chinese (Traditional) (CP950) -漢語(繁體)(CP950) + +Head +核心詞 - -Croatian (Mac OS Croatian) -克羅埃西亞語(Mac OS Croatian) + +Dependent +依存詞 - -Cyrillic (CP855) -西里爾(CP855) + +Dependency Length +依存距離 - -Cyrillic (CP866) -西里爾(CP866) + +Dependency Length (Absolute) +依存距離(絕對) - -Cyrillic (ISO-8859-5) -西里爾(ISO-8859-5) + +File +檔案 - -Cyrillic (Mac OS Cyrillic) -西里爾(Mac OS Cyrillic) + +Total +合計 - -Cyrillic (Windows-1251) -西里爾(Windows-1251) + +Updating table... +更新表格中…… + + +Wl_Worker_Results_Filter_Wordlist_Generator - -English (ASCII) -英語(ASCII) + +Token +形符 - -English (EBCDIC 037) -英語(EBCDIC 037) + +N-gram +n 元組 - -English (CP437) -英語(CP437) + +Number of +Files Found +查詢到的 +檔案數 - -European (HP Roman-8) -歐洲(HP Roman-8) + +Updating table... +更新表格中…… - -European (Central) (CP852) -歐洲(中部)(CP852) + +[{}] +Frequency +[{}] +頻數 - -European (Central) (ISO-8859-2) -歐洲(中部)(ISO-8859-2) + +Syllabification +音節劃分 + + +Wl_Worker_Results_Search - -European (Central) (Mac OS Central European) -歐洲(中部)(Mac OS Central European) + +Highlighting found items... +高亮查得項中…… + + +Wl_Worker_Results_Sort_Concordancer - -European (Central) (Windows-1250) -歐洲(中部)(Windows-1250) + +Updating table... +更新表格中…… + + +Wl_Worker_Wordlist_Generator - -European (Northern) (ISO-8859-4) -歐洲(北部)(ISO-8859-4) + +No language support +無語種支援 + + +Wl_Worker_Wordlist_Generator_Fig - -European (Southern) (ISO-8859-3) -歐洲(南部)(ISO-8859-3) + +Rendering figure... +渲染圖表中…… + + +Wl_Worker_Wordlist_Generator_Table - -European (Western) (EBCDIC 500) -歐洲(西部)(EBCDIC 500) + +Rendering table... +渲染表格中…… + + +Wrapper_Colligation_Extractor - -European (Western) (CP850) -歐洲(西部)(CP850) + +Token Settings +形符設定 - -European (Western) (CP858) -歐洲(西部)(CP858) + +Search Settings +搜尋設定 - -European (Western) (CP1140) -歐洲(西部)(CP1140) + +Generation Settings +生成設定 - -European (Western) (ISO-8859-1) -歐洲(西部)(ISO-8859-1) + +None + - -European (Western) (ISO-8859-15) -歐洲(西部)(ISO-8859-15) + +Table Settings +表格設定 - -European (Western) (Mac OS Roman) -歐洲(西部)(Mac OS Roman) + +Figure Settings +圖表設定 - -European (Western) (Windows-1252) -歐洲(西部)(Windows-1252) + +Rank: +序號: - -French (CP863) -法語(CP863) + +L + - -German (EBCDIC 273) -德語(EBCDIC 273) + +R + - -Greek (CP737) -希臘語(CP737) + +Collocational window: +搭配檢索範圍: - -Greek (CP869) -希臘語(CP869) + +Limit searching: +限制檢索: - -Greek (CP875) -希臘語(CP875) + +Within sentence segments +句段內 - -Greek (ISO-8859-7) -希臘語(ISO-8859-7) + +Within sentences +句子內 - -Greek (Mac OS Greek) -希臘語(Mac OS Greek) + +Within paragraphs +段落內 + + +Wrapper_Collocation_Extractor - -Greek (Windows-1253) -希臘語(Windows-1253) + +Token Settings +形符設定 - -Hebrew (CP856) -希伯來語(CP856) + +Search Settings +搜尋設定 - -Hebrew (CP862) -希伯來語(CP862) + +Generation Settings +生成設定 - -Hebrew (EBCDIC 424) -希伯來語(EBCDIC 424) + +None + - -Hebrew (ISO-8859-8) -希伯來語(ISO-8859-8) + +Table Settings +表格設定 - -Hebrew (Windows-1255) -希伯來語(Windows-1255) + +Figure Settings +圖表設定 - -Icelandic (CP861) -冰島語(CP861) + +Rank: +序號: - -Icelandic (Mac OS Icelandic) -冰島語(Mac OS Icelandic) + +L + - -Japanese (CP932) -日語(CP932) + +R + - -Japanese (EUC-JP) -日語(EUC-JP) + +Collocational window: +搭配檢索範圍: - -Japanese (EUC-JIS-2004) -日語(EUC-JIS-2004) + +Limit searching: +限制檢索: - -Japanese (EUC-JISx0213) -日語(EUC-JISx0213) + +Within sentence segments +句段內 - -Japanese (ISO-2022-JP) -日語(ISO-2022-JP) + +Within sentences +句子內 - -Japanese (ISO-2022-JP-1) -日語(ISO-2022-JP-1) + +Within paragraphs +段落內 + + +Wrapper_Concordancer - -Japanese (ISO-2022-JP-2) -日語(ISO-2022-JP-2) + +Token Settings +形符設定 - -Japanese (ISO-2022-JP-2004) -日語(ISO-2022-JP-2004) + +Search Settings +搜尋設定 - -Japanese (ISO-2022-JP-3) -日語(ISO-2022-JP-3) + +Generation Settings +生成設定 - -Japanese (ISO-2022-JP-EXT) -日語(ISO-2022-JP-EXT) + +Paragraph +段落 - -Japanese (Shift_JIS) -日語(Shift_JIS) + +Sentence +句子 - -Japanese (Shift_JIS-2004) -日語(Shift_JIS-2004) + +Token +形符 - -Japanese (Shift_JISx0213) -日語(Shift_JISx0213) + +Character +字元 - -Kazakh (KZ-1048) -哈薩克語(KZ-1048) + +Table Settings +表格設定 - -Kazakh (PTCP154) -哈薩克語(PTCP154) + +Figure Settings +圖表設定 - -Korean (EUC-KR) -韓語(EUC-KR) + +Sort results by: +結果排序依據: - -Korean (ISO-2022-KR) -韓語(ISO-2022-KR) + +File +檔案 - -Korean (JOHAB) -韓語(JOHAB) + +Zapping Settings +檢索項遮蔽設定 - -Korean (UHC) -韓語(UHC) + +Replace keywords with +將檢索項替換為 - -Persian/Urdu (Mac OS Farsi) -波斯語/烏爾都語(Mac OS Farsi) + +Add line numbers +新增行號 - -Portuguese (CP860) -葡萄牙語(CP860) + +Randomize outputs +隨機化輸出 - -Romanian (Mac OS Romanian) -羅馬尼亞語(Mac OS Romanian) + +Sentence segment +句段 - -Russian (KOI8-R) -俄語(KOI8-R) + +Search term +檢索項 - -Tajik (KOI8-T) -塔吉克語(KOI8-T) + +Context length (left): +上下文長度(左): - -Thai (CP874) -泰語(CP874) + +Context length (right): +上下文長度(右): - -Thai (ISO-8859-11) -泰語(ISO-8859-11) + +Unit of context length: +上下文長度單位: + + +Wrapper_Concordancer_Parallel - -Turkish (CP857) -土耳其語(CP857) + +Token Settings +形符設定 - -Turkish (EBCDIC 1026) -土耳其語(EBCDIC 1026) + +Search Settings +搜尋設定 - -Turkish (ISO-8859-9) -土耳其語(ISO-8859-9) + +Table Settings +表格設定 + + +Wrapper_Dependency_Parser - -Turkish (Mac OS Turkish) -土耳其語(Mac OS Turkish) + +Token Settings +形符設定 - -Turkish (Windows-1254) -土耳其語(Windows-1254) + +Search Settings +搜尋設定 - -Ukrainian (CP1125) -烏克蘭語(CP1125) + +Table Settings +表格設定 - -Ukrainian (KOI8-U) -烏克蘭語(KOI8-U) + +Figure Settings +圖表設定 - -Urdu (CP1006) -烏爾都語(CP1006) + +coarse-grained +粗分 - -Vietnamese (CP1258) -越南語(CP1258) + +fine-grained +細分 - -spaCy - Sentencizer -spaCy - 分句器 + +Match dependency relations +匹配依存關係 + + +Wrapper_Keyword_Extractor - -Student's t-test (1-sample) -學生 t 檢驗(單樣本) + +Token Settings +形符設定 - -z-score -z 值 + +Generation Settings +生成設定 - -Mann-Whitney U Test -曼惠特尼 U 檢驗 + +Table Settings +表格設定 - -Student's t-test (2-sample) -學生 t 檢驗(雙樣本) + +Figure Settings +圖表設定 - -Log-likelihood Ratio -對數似然比 + +Rank: +序號: + + +Wrapper_Ngram_Generator - -t-statistic -t 值 + +Token Settings +形符設定 - -Dice's Coefficient -Dice 係數 + +Search Settings +搜尋設定 - -Jaccard Index -雅卡爾指數 + +Generation Settings +生成設定 - -Sorbian (Lower) -索布語(下) + +Allow skipped tokens: +允許跳過形符數: - -Sorbian (Upper) -索布語(上) + +Table Settings +表格設定 - -None - + +Figure Settings +圖表設定 - -Welch's t-test -Welch t 檢驗 + +Rank: +序號: - -z-score (Berry-Rogghe) -z 值(Berry-Rogghe) + +Search term position: +檢索項位置: - -Carroll's D₂ - + +N-gram size: +n 元組長度: + + +Wrapper_Profiler - -Gries's DP - + +Token Settings +形符設定 - -Juilland's D - + +Table Settings +表格設定 - -Lyne's D₃ - + +Generate all tables +生成所有表格 - -Rosengren's S - + +Clear all tables +清空所有表格 - -Zhang's Distributional Consistency - + +Readability +可讀性 - -Zhang's DC - + +Counts +計數 - -Engwall's FM - + +Lengths +長度 - -Juilland's U - + +Length Breakdown +長度明細 - -Kromer's UR - + +Lexical Density/Diversity +詞彙密度/多樣性 - -Rosengren's KF - + +Clear All Tables +清空所有表格 - -Difference Coefficient - + + + <div> + The results in some of the tables have yet been exported. Do you really want to clear all tables? + </div> + + + <div> + 部分表格中的結果尚未儲存。你是否確認清空所有表格? + </div> + + + +Wrapper_Wordlist_Generator - -Kilgarriff's Ratio - + +Token Settings +形符設定 - -Log Ratio - + +Generation Settings +生成設定 - -Minimum Sensitivity - + +Table Settings +表格設定 - -Poisson Collocation Measure - + +Figure Settings +圖表設定 - -Burmese -緬甸語 + +Rank: +序號: - -English (Middle) -英語(中古) + +Syllabification +音節劃分 + + +wl_boxes - -Ganda -幹達語 + +Yes + - -Georgian -喬治亞語 + +No + - -Punjabi (Gurmukhi) -旁遮普語(古木基) + +No limit +無限制 - -Sámi (Northern) -薩米語(北) + +From + - -Other languages -其他語種 + +to + - -All languages (UTF-8 without BOM) -所有語種(UTF-8 無簽名) + +Sync +同步 - -All languages (UTF-8 with BOM) -所有語種(UTF-8 帶簽名) + +L + - -All languages (UTF-16 with BOM) -所有語種(UTF-16 帶簽名) + +R + + + +wl_buttons - -All languages (UTF-16BE without BOM) -所有語種(UTF-16 大端無簽名) + +Browse... +瀏覽... - -All languages (UTF-16LE without BOM) -所有語種(UTF-16 小端無簽名) + +Transparent +透明 + + +wl_checks_work_area - -All languages (UTF-32 with BOM) -所有語種(UTF-32 帶簽名) + +Missing Search Terms +缺少檢索項 - -All languages (UTF-32BE without BOM) -所有語種(UTF-32 大端無簽名) + + + <div> + You have not specified any search terms yet, please enter one in the input box under "<span style="color: #F00; font-weight: bold;">Search term</span>" first. + </div> + + + <div> + 你還未指定任何檢索項,請先在“<span style="color: #F00; font-weight: bold;">檢索項</span>”下的輸入框中指定一項。 + </div> + - -All languages (UTF-32LE without BOM) -所有語種(UTF-32 小端無簽名) + +No Results +無結果 - -All languages (UTF-7) -所有語種(UTF-7) + + + <div>Data processing has completed successfully, but there are no results to display.</div> + <div>You can change your settings and try again.</div> + + + <div>資料處理操作已完成,但沒有可顯示的結果。</div> + <div>你可以更改你的設定後重試。</div> + - -Baltic languages (CP775) -波羅的海諸語(CP775) + +Language support unavailable! +語種支援不可用! - -Baltic languages (ISO-8859-13) -波羅的海諸語(ISO-8859-13) + +Missing search terms! +缺少檢索項! - -Baltic languages (Windows-1257) -波羅的海諸語(Windows-1257) + +Table generated successfully. +已成功生成表格。 - -Celtic languages (ISO-8859-14) -凱爾特語(ISO-8859-14) + +Figure generated successfully. +已成功生成圖表。 - -European (Southeastern) (ISO-8859-16) -歐洲(東南部)(ISO-8859-16) + +No results to display. +無結果可供顯示。 - -Nordic languages (CP865) -北歐諸語(CP865) + +A fatal error has just occurred! +剛才發生了一個致命錯誤! - -Nordic languages (ISO-8859-10) -北歐諸語(ISO-8859-10) + +Syllable tokenization +分音節 - -Thai (TIS-620) -泰語(TIS-620) + +Part-of-speech tagging +詞性標註 - -CSV files (*.csv) -CSV 檔案 (*.csv) + +Lemmatization +詞形還原 - -Excel workbooks (*.xlsx) -Excel 工作簿 (*.xlsx) + +Dependency parsing +依存分析 - -HTML pages (*.htm; *.html) -HTML 頁面 (*.htm; *.html) + +No Language Support +無語種支援 - -PDF files (*.pdf) -PDF 檔案 (*.pdf) + +Type of Language Support +語種支援型別 - -Text files (*.txt) -文字檔案 (*.txt) + +File Name +檔名 - -Translation memory files (*.tmx) -翻譯記憶庫檔案 (*.tmx) + +Language +語種 - -Word documents (*.docx) -Word 文件 (*.docx) + + + <div> + The process cannot be done because language support is unavailable for the following files. Please check your language settings or try again with files of different languages. + </div> + + + <div> + 由於下列檔案缺少語種支援,因此操作無法完成。請檢查你的語種設定或使用其他語種的檔案重試。 + </div> + - -XML files (*.xml) -XML 檔案 (*.xml) + +Model downloaded successfully. +已成功下載模型。 - -All files (*.*) -所有檔案 (*.*) + +A network error occurred while downloading the model! +下載模型時發生網路錯誤! - -OpenType fonts (*.otf) -OpenType 字型 (*.otf) + +Table exported successfully. +已成功匯出表格。 - -TrueType fonts (*.ttf) -TrueType 字型 (*.ttf) + +File access denied! +檔案請求被拒絕! - -Blizzard mipmap format (*.blp) - + +File Access Denied +檔案請求被拒絕 - -Windows bitmaps (*.bmp) -Windows 點陣圖 (*.bmp) + + + <div>Access to "{}" is denied, please specify another location or close the file and try again.</div> + + + <div>訪問“{}”時被拒絕,請指定其他位置或關閉檔案後重試。</div> + - -Window cursor files (*.cur) -Window 游標檔案 (*.cur) + +Export Completed +匯出已完成 - -Multi-page PCX files (*.dcx) -多頁 PCX 檔案 (*.dcx) + + + <div>The table has been successfully exported to "{}".</div> + + + <div>已成功匯出表格至“{}”。</div> + + + +wl_conversion - -DirectDraw surface (*.dds) - + +Yes + - -Device-independent bitmaps (*.dib) -裝置無關點陣圖 (*.dib) + +No + + + +wl_dependency_parsing - -Encapsulated PostScript (*.eps, *.ps) - + +Dependency Graphs Generated Successfully +成功生成依存圖 - -Flexible image transport system (*.fit, *.fits) - + + + <div>Dependency graphs has been successfully generated and exported under folder: {}</div> + + <div>If the figures are not displayed automatically, you may try opening them manually using web browsers or image viewers installed on your computer, or save copies of them in other locations for later use.</div> + + + <div>已成功生成依存圖並匯出至該資料夾下:{}</div> + + <div>若圖表未自動顯示,你可使用電腦上已安裝的瀏覽器或看圖軟體將其手動開啟,或將圖表副本儲存至其他位置以待後用。</div> + + + +wl_dialogs - -Autodesk animation files (*.flc, *.fli) -Autodesk 動畫檔案 (*.flc, *.fli) + +Copy +複製 - -Fox Engine textures (*.ftex) -Fox Engine 紋理 (*.ftex) + +Close +關閉 - -GIMP brush files (*.gbr) -GIMP 筆刷檔案 (*.gbr) + +Save +儲存 - -Graphics interchange format (*.gif) - + +Cancel +取消 - -Apple icon images (*.icns) -蘋果圖示圖片 (*.icns) + +OK +確認 + + +wl_dialogs_errs - -Windows icon files (*.ico) -Windows 圖示檔案 (*.ico) + +Fatal Error +致命錯誤 - -IPTC/NAA newsphoto files (*.iim) -IPTC/NAA newsphoto 檔案 (*.iim) + + + <div>A fatal error has occurred, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>!</div> + + + <div>剛才發生了一個致命錯誤,請<b>將下方錯誤資訊</b>傳送至{}來獲取<b>作者的幫助</b>!</div> + - -IM files (*.im) -IM 檔案 (*.im) + +Network Error +網路錯誤 - -Image Tools image files (*) -Image Tools 影象檔案 (*) + + + <div>A network error occurred while downloading the model, please check your internet connections and proxy settings in <b>Menu → Preferences → General → Proxy Settings</b> if you are using a proxy.</div> + <div>If the network issue persists, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>.</div> + + + <div>下載模型時發生了網路錯誤,請檢查你的網路連線,如果你使用了代理,那麼請一併檢查<b>選單 → 偏好 → 全域性 → 代理設定</b>中的代理設定。</div> + <div>如果網路問題仍然存在,請<b>將下方錯誤資訊</b>傳送至{}來獲取<b>作者的幫助</b>。</div> + + + +wl_dialogs_misc - -JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg) -JPEG 檔案 (*.jfif, *.jpe, *.jpeg, *.jpg) + +Processing data... +處理資料中…… - -JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) -JPEG 2000 檔案 (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) + +Downloading model... +下載模型中…… + + +wl_figs - -McIDAS area files (*) -McIDAS area 檔案 (*) + +Frequency +頻數 - -PhotoCD files (*.pcd) -PhotoCD 檔案 (*.pcd) + +^[LR][1-9][0-9]*$ +^[左右] [1-9][0-9]*$ - -Picture exchange (*.pcx) - + +Cumulative Percentage Frequency +累加百分比頻數 - -PIXAR raster files (*.pxr) -PIXAR 柵格檔案 (*.pxr) + +Cumulative Frequency +累加頻數 - -Portable network graphics (*.apng, *.png) - + +Percentage Frequency +百分比頻數 - -Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm) - + +p-value +p 值 - -Photoshop PSD files (*.psd) -Photoshop PSD 檔案 (*.psd) + +Custom +自定義 - -Sun raster files (*.ras) -Sun 柵格檔案 (*.ras) + +Monochrome +單色 - -Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi) - + +Colormap +色譜 + + +wl_figs_freqs - -SPIDER files (*) -SPIDER 檔案 (*) + +Network graph +網路圖 - -Truevision TGA (*.icb, *.tga, *.vda, *.vst) - + +Reference files +參照檔案 - -TIFF files (*.tif, *.tiff) -TIFF 檔案 (*.tif, *.tiff) + +Total +合計 - -WebP files (*.webp) -WebP 檔案 (*.webp) + +Line chart +折線圖 - -Windows metafiles (*.emf, *.wmf) -Windows 元檔案 (*.emf, *.wmf) + +Token +形符 - -X bitmaps (*.xbm) - + +N-gram +n 元組 - -X pixmaps (*.xpm) - + +Collocate +搭配詞 - -XV thumbnails (*) -XV 縮圖 (*) + +Keyword +關鍵詞 - -botok - Tibetan sentence tokenizer -botok - 藏語分句器 + +Word cloud +詞雲圖 + + +wl_figs_stats - -NLTK - Czech Punkt sentence tokenizer -NLTK - 捷克語 Punkt 分句器 + +Total +合計 - -NLTK - Danish Punkt sentence tokenizer -NLTK - 丹麥語 Punkt 分句器 + +p-value +p 值 - -NLTK - Dutch Punkt sentence tokenizer -NLTK - 荷蘭語 Punkt 分句器 + +Line chart +折線圖 - -NLTK - English Punkt sentence tokenizer -NLTK - 英語 Punkt 分句器 + +Token +形符 - -NLTK - Estonian Punkt sentence tokenizer -NLTK - 愛沙尼亞語 Punkt 分句器 + +N-gram +n 元組 - -NLTK - Finnish Punkt sentence tokenizer -NLTK - 芬蘭語 Punkt 分句器 + +Collocate +搭配詞 - -NLTK - French Punkt sentence tokenizer -NLTK - 法語 Punkt 分句器 + +Keyword +關鍵詞 - -NLTK - German Punkt sentence tokenizer -NLTK - 德語 Punkt 分句器 + +Word cloud +詞雲圖 - -NLTK - Greek Punkt sentence tokenizer -NLTK - 希臘語 Punkt 分句器 + +Network graph +網路圖 + + +wl_lists - -NLTK - Italian Punkt sentence tokenizer -NLTK - 義大利語 Punkt 分句器 + +New search term +新檢索項 - -NLTK - Malayalam Punkt sentence tokenizer -NLTK - 馬拉雅拉姆語 Punkt 分句器 + +New stop word +新停用詞 - -NLTK - Norwegian Punkt sentence tokenizer -NLTK - 挪威語 Punkt 分句器 + +New item +新列表項 - -NLTK - Polish Punkt sentence tokenizer -NLTK - 波蘭語 Punkt 分句器 + +Add +新增 - -NLTK - Portuguese Punkt sentence tokenizer -NLTK - 葡萄牙語 Punkt 分句器 + +Insert +插入 - -NLTK - Russian Punkt sentence tokenizer -NLTK - 俄語 Punkt 分句器 + +Remove +移除 - -NLTK - Slovenian Punkt sentence tokenizer -NLTK - 斯洛維尼亞語 Punkt 分句器 + +Clear +清空 - -NLTK - Spanish Punkt sentence tokenizer -NLTK - 西班牙語 Punkt 分句器 + +Duplicates Found +發現重複項 - -NLTK - Swedish Punkt sentence tokenizer -NLTK - 瑞典語 Punkt 分句器 + + + <div>The item that you have just edited already exists in the list, please specify another one!</div> + + + <div>你剛才編輯的列表項已存在於列表中,請另外指定一項!</div> + - -NLTK - Turkish Punkt sentence tokenizer -NLTK - 土耳其語 Punkt 分句器 + +Import +匯入 - -spaCy - Croatian sentence recognizer -spaCy - 克羅埃西亞語句子識別器 + +Export +匯出 - -spaCy - Dutch sentence recognizer -spaCy - 荷蘭語句子識別器 + +Import from Files +從檔案匯入 - -spaCy - Finnish sentence recognizer -spaCy - 芬蘭語句子識別器 + +Text files (*.txt) +文字檔案 (*.txt) - -spaCy - Greek (Modern) sentence recognizer -spaCy - 希臘語(現代)句子識別器 + +Import Error +匯入時出錯 - -spaCy - Italian sentence recognizer -spaCy - 義大利語句子識別器 + + + <div> + An error occurred during import, please check the following files and try again. + </div> + + + <div> + 匯入時發生了一個錯誤,請檢查下列檔案後重試。 + </div> + - -spaCy - Lithuanian sentence recognizer -spaCy - 立陶宛語句子識別器 + +Empty file +空檔案 - -spaCy - Macedonian sentence recognizer -spaCy - 馬其頓語句子識別器 + +An error occurred during import! +匯入時發生了一個錯誤! - -spaCy - Norwegian Bokmål sentence recognizer -spaCy - 書面挪威語句子識別器 + +item + - -spaCy - Polish sentence recognizer -spaCy - 波蘭語句子識別器 + +items + - -spaCy - Portuguese sentence recognizer -spaCy - 葡萄牙語句子識別器 + +{} {} has been successfully imported into the list. +已成功匯入 {} {}至列表中。 - -spaCy - Romanian sentence recognizer -spaCy - 羅馬尼亞語句子識別器 + +Export to File +匯出至檔案 - -spaCy - Russian sentence recognizer -spaCy - 俄語句子識別器 + +Export Completed +匯出完成 - -spaCy - Swedish sentence recognizer -spaCy - 瑞典語句子識別器 + + + <div>The list has been successfully exported to "{}".</div> + + + <div>已成功匯出列表至“{}”。</div> + + + +wl_matching - -Underthesea - Vietnamese sentence tokenizer -Underthesea - 越南語分句器 + +Embedded +嵌入式 - -botok - Tibetan word tokenizer -botok - 藏語分詞器 + +Non-embedded +非嵌入式 + + +wl_measure_utils - -jieba - Chinese word tokenizer -jieba - 漢語分詞器 + +Absolute frequency +絕對頻數 - -NLTK - NIST tokenizer -NLTK - NIST 分詞器 + +Relative frequency +相對頻數 + + +wl_measures_lexical_density_diversity - -NLTK - NLTK tokenizer -NLTK - NLTK 分詞器 + +Content words +實詞 - -NLTK - Penn Treebank tokenizer -NLTK - 賓州樹庫分詞器 + +Rank-frequency distribution +頻數排序分佈 - -NLTK - Regular-expression tokenizer -NLTK - 正規表示式分詞器 + +Frequency spectrum +頻數譜 + + +wl_measures_readability - -NLTK - Tok-tok tokenizer -NLTK - Tok-tok 分詞器 + +Original +原版 - -NLTK - Twitter tokenizer -NLTK - 推特分詞器 + +New +新版 - -pkuseg - Chinese word tokenizer -pkuseg - 漢語分詞器 + +Navy +海軍版 - -PyThaiNLP - Longest matching -PyThaiNLP - 最長匹配 + +Policy One +策略一 - -PyThaiNLP - Maximum matching -PyThaiNLP - 最大匹配 + +Policy Two +策略二 + + +wl_measures_statistical_significance - -PyThaiNLP - Maximum matching + TCC -PyThaiNLP - 最大匹配 + TCC + +Two-tailed +雙尾 - -Sacremoses - Moses tokenizer -Sacremoses - Moses 分詞器 + +Left-tailed +左尾 - -spaCy - Afrikaans word tokenizer -spaCy - 南非語分詞器 + +Right-tailed +右尾 + + +wl_misc - -spaCy - Albanian word tokenizer -spaCy - 阿爾巴尼亞語分詞器 + +minute + - -spaCy - Amharic word tokenizer -spaCy - 阿姆哈拉語分詞器 + +minutes + - -spaCy - Arabic word tokenizer -spaCy - 阿拉伯語分詞器 + +(In {} {} {:.2f} seconds) +(耗時 {} {} {:.2f} 秒) - -spaCy - Armenian word tokenizer -spaCy - Armenian 分詞器 + +(In +(耗時 + + +wl_pos_tagging - -spaCy - Azerbaijani word tokenizer -spaCy - 亞塞拜然語分詞器 + +Content words +實詞 - -spaCy - Basque word tokenizer -spaCy - 巴斯克語分詞器 + +Function words +虛詞 + + +wl_results_filter - -spaCy - Bengali word tokenizer -spaCy - 孟加拉語分詞器 + +p-value: +p 值: + + +wl_settings_default - -spaCy - Bulgarian word tokenizer -spaCy - 保加利亞語分詞器 + +Observed Files +觀察檔案 - -spaCy - Catalan word tokenizer -spaCy - 加泰羅尼亞語分詞器 + +Profiler +分析工具 - -spaCy - Chinese word tokenizer -spaCy - 漢語分詞器 + +APA (7th edition) +APA(第七版) - -spaCy - Croatian word tokenizer -spaCy - 克羅埃西亞語分詞器 + +Counts +計數 - -spaCy - Czech word tokenizer -spaCy - 捷克語分詞器 + +Token +形符 - -spaCy - Danish word tokenizer -spaCy - 丹麥語分詞器 + +File +檔案 - -spaCy - Dutch word tokenizer -spaCy - 荷蘭語分詞器 + +Ascending +升序 - -spaCy - English word tokenizer -spaCy - 英語分詞器 + +Token no. +形符序號 - -spaCy - Estonian word tokenizer -spaCy - 愛沙尼亞語分詞器 + +Line chart +折線圖 - -spaCy - Finnish word tokenizer -spaCy - 芬蘭語分詞器 + +Total +合計 - -spaCy - French word tokenizer -spaCy - 法語分詞器 + +Frequency +頻數 - -spaCy - Ganda word tokenizer -spaCy - 幹達語分詞器 + +None + - -spaCy - German word tokenizer -spaCy - 德語分詞器 + +p-value +p 值 - -spaCy - Greek (Ancient) word tokenizer -spaCy - 希臘語(古)分詞器 + +General +全域性 - -spaCy - Greek (Modern) word tokenizer -spaCy - 希臘語(現代)分詞器 - - - -spaCy - Gujarati word tokenizer -spaCy - 古吉拉特語分詞器 + +Excel workbooks (*.xlsx) +Excel 工作簿 (*.xlsx) - -spaCy - Hindi word tokenizer -spaCy - 印地語分詞器 + +Non-embedded +非嵌入式 - -spaCy - Hungarian word tokenizer -spaCy - 匈牙利語分詞器 + +Header + - -spaCy - Icelandic word tokenizer -spaCy - 冰島語分詞器 + +Embedded +嵌入式 - -spaCy - Indonesian word tokenizer -spaCy - 印度尼西亞語分詞器 + +Part of speech +詞性 - -spaCy - Irish word tokenizer -spaCy - 愛爾蘭語分詞器 + +Others +其他 - -spaCy - Italian word tokenizer -spaCy - 義大利語分詞器 + +Paragraph +段落 - -spaCy - Japanese word tokenizer -spaCy - 日語分詞器 + +Sentence +句子 - -spaCy - Kannada word tokenizer -spaCy - 卡納達語分詞器 + +Word +單詞 - -spaCy - Kyrgyz word tokenizer -spaCy - 吉爾吉斯語分詞器 + +New +新版 - -spaCy - Latin word tokenizer -spaCy - 拉丁語分詞器 + +Original +原版 - -spaCy - Latvian word tokenizer -spaCy - 拉脫維亞語分詞器 + +Rank-frequency distribution +頻數排序分佈 - -spaCy - Ligurian word tokenizer -spaCy - 利古里亞語分詞器 + +Two-tailed +雙尾 - -spaCy - Lithuanian word tokenizer -spaCy - 立陶宛語分詞器 + +Relative frequency +相對頻數 - -spaCy - Luxembourgish word tokenizer -spaCy - 盧森堡語分詞器 + +Colormap +色譜 - -spaCy - Macedonian word tokenizer -spaCy - 馬其頓語分詞器 + +Policy One +策略一 + + +wl_settings_figs - -spaCy - Malayalam word tokenizer -spaCy - 馬拉雅拉姆語分詞器 + +Square +方形 - -spaCy - Marathi word tokenizer -spaCy - 馬拉地語分詞器 + +Circle +圓形 - -spaCy - Nepali word tokenizer -spaCy - 尼泊爾語分詞器 + +Triangle up +朝上三角形 - -spaCy - Persian word tokenizer -spaCy - 波斯語分詞器 + +Triangle right +朝右三角形 - -spaCy - Polish word tokenizer -spaCy - 波蘭語分詞器 + +Triangle down +朝下三角形 - -spaCy - Portuguese word tokenizer -spaCy - 葡萄牙語分詞器 + +Triangle left +朝左三角形 - -spaCy - Romanian word tokenizer -spaCy - 羅馬尼亞語分詞器 + +Thin diamond +薄菱形 - -spaCy - Russian word tokenizer -spaCy - 俄語分詞器 + +Pentagon +五角形 - -spaCy - Sanskrit word tokenizer -spaCy - 梵語分詞器 + +Hexagon +六邊形 - -spaCy - Serbian word tokenizer -spaCy - 塞爾維亞語分詞器 + +Octagon +八邊形 - -spaCy - Sinhala word tokenizer -spaCy - 僧伽羅語分詞器 + +Arc3 +弧3 - -spaCy - Slovak word tokenizer -spaCy - 斯洛伐克語分詞器 + +Arc + - -spaCy - Slovenian word tokenizer -spaCy - 斯洛維尼亞語分詞器 + +Angle3 +角3 - -spaCy - Sorbian (Lower) word tokenizer -spaCy - 索布語(下)分詞器 + +Angle + - -spaCy - Sorbian (Upper) word tokenizer -spaCy - 索布語(上)分詞器 + +Bar +橫條 - -spaCy - Spanish word tokenizer -spaCy - 西班牙語分詞器 + +Solid +實線 - -spaCy - Swedish word tokenizer -spaCy - 瑞典語分詞器 + +Dashed +虛線 - -spaCy - Tagalog word tokenizer -spaCy - 他加祿語分詞器 + +Dash-dotted +點畫線 - -spaCy - Tamil word tokenizer -spaCy - 泰米爾語分詞器 + +Dotted +點線 - -spaCy - Tatar word tokenizer -spaCy - 韃靼語分詞器 + +Curve +圓弧 - -spaCy - Telugu word tokenizer -spaCy - 泰盧固語分詞器 + +Curve A +圓弧 A - -spaCy - Tigrinya word tokenizer -spaCy - 提格雷尼亞語分詞器 + +Curve B +圓弧 B - -spaCy - Tswana word tokenizer -spaCy - 茨瓦納語分詞器 + +Curve AB +圓弧 AB - -spaCy - Turkish word tokenizer -spaCy - 土耳其語分詞器 + +Curve filled A +實心圓弧 A - -spaCy - Ukrainian word tokenizer -spaCy - 烏克蘭語分詞器 + +Curve filled B +實心圓弧 B - -spaCy - Urdu word tokenizer -spaCy - 烏爾都語分詞器 + +Curve filled AB +實心圓弧 AB - -spaCy - Yoruba word tokenizer -spaCy - 約魯巴語分詞器 + +Bracket A +方括號 A - -SudachiPy - Japanese word tokenizer (split mode A) -SudachiPy - 日語分詞器(切分模式 A) + +Bracket B +方括號 B - -SudachiPy - Japanese word tokenizer (split mode B) -SudachiPy - 日語分詞器(切分模式 B) + +Bracket AB +方括號 AB - -SudachiPy - Japanese word tokenizer (split mode C) -SudachiPy - 日語分詞器(切分模式 C) + +Bar AB +橫條 AB - -Underthesea - Vietnamese word tokenizer -Underthesea - 越南語分詞器 + +Bracket curve +方括號加圓弧 - -Wordless - Chinese character tokenizer -Wordless - 漢語分字器 + +Simple +樸素 - -Wordless - Japanese kanji tokenizer -Wordless - 日語分字器 + +Fancy +絢麗 - -NLTK - Legality syllable tokenizer -NLTK - 合法性分音節器 + +Wedge +楔形 - -NLTK - Sonority sequencing syllable tokenizer -NLTK - 響度順序分音節器 + +Circular +環形 - -Pyphen - Afrikaans syllable tokenizer -Pyphen - 南非語分音節器 + +Planar +平面 - -Pyphen - Albanian syllable tokenizer -Pyphen - 阿爾巴尼亞語分音節器 + +Random +隨機 - -Pyphen - Belarusian syllable tokenizer -Pyphen - 白俄羅斯語分音節器 + +Shell +同心 - -Pyphen - Bulgarian syllable tokenizer -Pyphen - 保加利亞語分音節器 + +Spring +彈簧 - -Pyphen - Catalan syllable tokenizer -Pyphen - 加泰羅尼亞語分音節器 + +Spectral +譜圖 - -Pyphen - Croatian syllable tokenizer -Pyphen - 克羅埃西亞語分音節器 + +Spiral +螺旋 + + +wl_settings_global - -Pyphen - Czech syllable tokenizer -Pyphen - 捷克語分音節器 + +Afrikaans +南非語 - -Pyphen - Danish syllable tokenizer -Pyphen - 丹麥語分音節器 + +Albanian +阿爾巴尼亞語 - -Pyphen - Dutch syllable tokenizer -Pyphen - 荷蘭語分音節器 + +Amharic +阿姆哈拉語 - -Pyphen - English (United Kingdom) syllable tokenizer -Pyphen - 英語(英國)分音節器 + +Arabic +阿拉伯語 - -Pyphen - English (United States) syllable tokenizer -Pyphen - 英語(美國)分音節器 + +Armenian (Classical) +亞美尼亞語(古) - -Pyphen - Esperanto syllable tokenizer -Pyphen - 世界語分音節器 + +Armenian (Eastern) +亞美尼亞語(東) - -Pyphen - Estonian syllable tokenizer -Pyphen - 愛沙尼亞語分音節器 + +Armenian (Western) +亞美尼亞語(西) - -Pyphen - French syllable tokenizer -Pyphen - 法語分音節器 + +Assamese +阿薩姆語 - -Pyphen - Galician syllable tokenizer -Pyphen - 加里西亞語分音節器 + +Asturian +阿斯圖里亞斯語 - -Pyphen - German (Austria) syllable tokenizer -Pyphen - 德語(奧地利)分音節器 + +Azerbaijani +亞塞拜然語 - -Pyphen - German (Germany) syllable tokenizer -Pyphen - 德語(德國)分音節器 + +Basque +巴斯克語 - -Pyphen - German (Switzerland) syllable tokenizer -Pyphen - 德語(瑞士)分音節器 + +Belarusian +白俄羅斯語 - -Pyphen - Greek (Modern) syllable tokenizer -Pyphen - 希臘語(現代)分音節器 + +Bengali +孟加拉語 - -Pyphen - Hungarian syllable tokenizer -Pyphen - 匈牙利語分音節器 + +Bulgarian +保加利亞語 - -Pyphen - Icelandic syllable tokenizer -Pyphen - 冰島語分音節器 + +Burmese +緬甸語 - -Pyphen - Indonesian syllable tokenizer -Pyphen - 印度尼西亞語分音節器 + +Buryat (Russia) +布里亞特語(俄羅斯) - -Pyphen - Italian syllable tokenizer -Pyphen - 義大利語分音節器 + +Catalan +加泰羅尼亞語 - -Pyphen - Lithuanian syllable tokenizer -Pyphen - 立陶宛語分音節器 + +Chinese (Classical) +漢語(文言) - -Pyphen - Latvian syllable tokenizer -Pyphen - 拉脫維亞語分音節器 + +Chinese (Simplified) +漢語(簡體) - -Pyphen - Mongolian syllable tokenizer -Pyphen - 蒙古語分音節器 + +Chinese (Traditional) +漢語(繁體) - -Pyphen - Norwegian Bokmål syllable tokenizer -Pyphen - 書面挪威語分音節器 + +Church Slavonic (Old) +教會斯拉夫語(古) - -Pyphen - Norwegian Nynorsk syllable tokenizer -Pyphen - 新挪威語分音節器 + +Coptic +科普特語 - -Pyphen - Polish syllable tokenizer -Pyphen - 波蘭語分音節器 + +Croatian +克羅埃西亞語 - -Pyphen - Portuguese (Brazil) syllable tokenizer -Pyphen - 葡萄牙語(巴西)分音節器 + +Czech +捷克語 - -Pyphen - Portuguese (Portugal) syllable tokenizer -Pyphen - 葡萄牙語(葡萄牙)分音節器 + +Danish +丹麥語 - -Pyphen - Romanian syllable tokenizer -Pyphen - 羅馬尼亞語分音節器 + +Dutch +荷蘭語 - -Pyphen - Russian syllable tokenizer -Pyphen - 俄語分音節器 + +English (Middle) +英語(中古) - -Pyphen - Serbian (Cyrillic) syllable tokenizer -Pyphen - 塞爾維亞語(西里爾)分音節器 + +English (Old) +英語(古) - -Pyphen - Serbian (Latin) syllable tokenizer -Pyphen - 塞爾維亞語(拉丁)分音節器 + +English (United Kingdom) +英語(英國) - -Pyphen - Slovak syllable tokenizer -Pyphen - 斯洛伐克語分音節器 + +English (United States) +英語(美國) - -Pyphen - Slovenian syllable tokenizer -Pyphen - 斯洛維尼亞語分音節器 + +Erzya +埃爾齊亞語 - -Pyphen - Spanish syllable tokenizer -Pyphen - 西班牙語分音節器 + +Esperanto +世界語 - -Pyphen - Swedish syllable tokenizer -Pyphen - 瑞典語分音節器 + +Estonian +愛沙尼亞語 - -Pyphen - Telugu syllable tokenizer -Pyphen - 泰盧固語分音節器 + +Faroese +法羅語 - -Pyphen - Thai syllable tokenizer -Pyphen - 泰語分音節器 + +Finnish +芬蘭語 - -Pyphen - Ukrainian syllable tokenizer -Pyphen - 烏克蘭語分音節器 + +French +法語 - -Pyphen - Zulu syllable tokenizer -Pyphen - 祖魯語分音節器 + +French (Old) +法語(古) - -PyThaiNLP - Thai syllable tokenizer -PyThaiNLP - 泰語分音節器 + +Galician +加里西亞語 - -botok - Tibetan part-of-speech tagger -botok - 藏語詞性標註器 + +Georgian +喬治亞語 - -jieba - Chinese part-of-speech tagger -jieba - 漢語詞性標註器 + +German (Austria) +德語(奧地利) - -NLTK - English perceptron part-of-speech tagger -NLTK - 英語感知機詞性標註器 + +German (Germany) +德語(德國) - -NLTK - Russian perceptron part-of-speech tagger -NLTK - 俄語感知機詞性標註器 + +German (Switzerland) +德語(瑞士) - -pymorphy3 - Morphological analyzer -pymorphy3 - 形態分析器 + +Gothic +哥特語 - -PyThaiNLP - Perceptron part-of-speech tagger (ORCHID) -PyThaiNLP - 感知機詞性標註器(ORCHID) + +Greek (Ancient) +希臘語(古) - -PyThaiNLP - Perceptron part-of-speech tagger (PUD) -PyThaiNLP - 感知機詞性標註器(PUD) + +Greek (Modern) +希臘語(現代) - -spaCy - Catalan part-of-speech tagger -spaCy - 加泰羅尼亞語詞性標註器 + +Gujarati +古吉拉特語 - -spaCy - Chinese part-of-speech tagger -spaCy - 漢語詞性標註器 + +Hebrew (Ancient) +希伯來語(古) - -spaCy - Croatian part-of-speech tagger -spaCy - 克羅埃西亞語詞性標註器 + +Hebrew (Modern) +希伯來語(現代) - -spaCy - Danish part-of-speech tagger -spaCy - 丹麥語詞性標註器 + +Hindi +印地語 - -spaCy - Dutch part-of-speech tagger -spaCy - 荷蘭語詞性標註器 + +Hungarian +匈牙利語 - -spaCy - English part-of-speech tagger -spaCy - 英語詞性標註器 + +Icelandic +冰島語 - -spaCy - Finnish part-of-speech tagger -spaCy - 芬蘭語詞性標註器 + +Indonesian +印度尼西亞語 - -spaCy - French part-of-speech tagger -spaCy - 法語詞性標註器 + +Irish +愛爾蘭語 - -spaCy - German part-of-speech tagger -spaCy - 德語詞性標註器 + +Italian +義大利語 - -spaCy - Greek (Modern) part-of-speech tagger -spaCy - 希臘語(現代)詞性標註器 + +Japanese +日語 - -spaCy - Italian part-of-speech tagger -spaCy - 義大利語詞性標註器 + +Kannada +卡納達語 - -spaCy - Japanese part-of-speech tagger -spaCy - 日語詞性標註器 + +Kazakh +哈薩克語 - -spaCy - Lithuanian part-of-speech tagger -spaCy - 立陶宛語詞性標註器 + +Khmer +柬埔寨語 - -spaCy - Macedonian part-of-speech tagger -spaCy - 馬其頓語詞性標註器 + +Korean +韓語 - -spaCy - Norwegian Bokmål part-of-speech tagger -spaCy - 書面挪威語詞性標註器 + +Kurdish (Kurmanji) +庫爾德語(庫爾曼吉語) - -spaCy - Polish part-of-speech tagger -spaCy - 波蘭語詞性標註器 + +Kyrgyz +吉爾吉斯語 - -spaCy - Portuguese part-of-speech tagger -spaCy - 葡萄牙語詞性標註器 + +Lao +寮國語 - -spaCy - Romanian part-of-speech tagger -spaCy - 羅馬尼亞語詞性標註器 + +Latin +拉丁語 - -spaCy - Russian part-of-speech tagger -spaCy - 俄語詞性標註器 + +Latvian +拉脫維亞語 - -spaCy - Spanish part-of-speech tagger -spaCy - 西班牙語詞性標註器 + +Ligurian +利古里亞語 - -spaCy - Swedish part-of-speech tagger -spaCy - 瑞典語詞性標註器 + +Lithuanian +立陶宛語 - -spaCy - Ukrainian part-of-speech tagger -spaCy - 烏克蘭語詞性標註器 + +Luganda +盧幹達語 - -SudachiPy - Japanese part-of-speech tagger -SudachiPy - 日語詞性標註器 + +Luxembourgish +盧森堡語 - -Underthesea - Vietnamese part-of-speech tagger -Underthesea - 越南語詞性標註器 + +Macedonian +馬其頓語 - -botok - Tibetan lemmatizer -botok - 藏語詞形還原器 + +Malay +馬來語 - -NLTK - WordNet lemmatizer -NLTK - WordNet 詞形還原器 + +Malayalam +馬拉雅拉姆語 - -simplemma - Albanian lemmatizer -simplemma - 阿爾巴尼亞語詞形還原器 + +Maltese +馬耳他語 - -simplemma - Armenian lemmatizer -simplemma - Armenian 詞形還原器 + +Manx +馬恩語 - -simplemma - Asturian lemmatizer -simplemma - 阿斯圖里亞斯語詞形還原器 + +Marathi +馬拉地語 - -simplemma - Bulgarian lemmatizer -simplemma - 保加利亞語詞形還原器 + +Meitei (Meitei script) +曼尼普爾語(曼尼普爾文) - -simplemma - Catalan lemmatizer -simplemma - 加泰羅尼亞語詞形還原器 + +Mongolian +蒙古語 - -simplemma - Czech lemmatizer -simplemma - 捷克語詞形還原器 + +Nepali +尼泊爾語 - -simplemma - Danish lemmatizer -simplemma - 丹麥語詞形還原器 + +Nigerian Pidgin +奈及利亞皮欽語 - -simplemma - Dutch lemmatizer -simplemma - 荷蘭語詞形還原器 + +Norwegian (Bokmål) +挪威語(書面) - -simplemma - English lemmatizer -simplemma - 英語詞形還原器 + +Norwegian (Nynorsk) +挪威語(新) - -simplemma - English (Middle) lemmatizer -simplemma - 英語(中古)詞形還原器 + +Odia +奧里亞語 - -simplemma - Estonian lemmatizer -simplemma - 愛沙尼亞語詞形還原器 + +Persian +波斯語 - -simplemma - Finnish lemmatizer -simplemma - 芬蘭語詞形還原器 + +Polish +波蘭語 - -simplemma - French lemmatizer -simplemma - 法語詞形還原器 + +Pomak +波馬克語 - -simplemma - Galician lemmatizer -simplemma - 加里西亞語詞形還原器 + +Portuguese (Brazil) +葡萄牙語(巴西) - -simplemma - Georgian lemmatizer -simplemma - 喬治亞語詞形還原器 + +Portuguese (Portugal) +葡萄牙語(葡萄牙) - -simplemma - German lemmatizer -simplemma - 德語詞形還原器 + +Punjabi (Gurmukhi script) +旁遮普語(古木基文) - -simplemma - Greek (Modern) lemmatizer -simplemma - 希臘語(現代)詞形還原器 + +Romanian +羅馬尼亞語 - -simplemma - Hindi lemmatizer -simplemma - 印地語詞形還原器 + +Russian +俄語 - -simplemma - Hungarian lemmatizer -simplemma - 匈牙利語詞形還原器 + +Russian (Old) +俄語(古) - -simplemma - Icelandic lemmatizer -simplemma - 冰島語詞形還原器 + +Sámi (Northern) +薩米語(北) - -simplemma - Indonesian lemmatizer -simplemma - 印度尼西亞語詞形還原器 + +Sanskrit +梵語 - -simplemma - Irish lemmatizer -simplemma - 愛爾蘭語詞形還原器 + +Scottish Gaelic +蘇格蘭蓋爾語 - -simplemma - Italian lemmatizer -simplemma - 義大利語詞形還原器 + +Serbian (Cyrillic script) +塞爾維亞語(西里爾文) - -simplemma - Latin lemmatizer -simplemma - 拉丁語詞形還原器 + +Serbian (Latin script) +塞爾維亞語(拉丁文) - -simplemma - Latvian lemmatizer -simplemma - 拉脫維亞語詞形還原器 + +Sindhi +信德語 - -simplemma - Lithuanian lemmatizer -simplemma - 立陶宛語詞形還原器 + +Sinhala +僧伽羅語 - -simplemma - Luxembourgish lemmatizer -simplemma - 盧森堡語詞形還原器 + +Slovak +斯洛伐克語 - -simplemma - Macedonian lemmatizer -simplemma - 馬其頓語詞形還原器 + +Slovene +斯洛維尼亞語 - -simplemma - Malay lemmatizer -simplemma - 馬來語詞形還原器 + +Sorbian (Lower) +索布語(下) - -simplemma - Manx lemmatizer -simplemma - 馬恩語詞形還原器 + +Sorbian (Upper) +索布語(上) - -simplemma - Norwegian Bokmål lemmatizer -simplemma - 書面挪威語詞形還原器 + +Spanish +西班牙語 - -simplemma - Norwegian Nynorsk lemmatizer -simplemma - 新挪威語詞形還原器 + +Swahili +斯瓦西里語 - -simplemma - Persian lemmatizer -simplemma - 波斯語詞形還原器 + +Swedish +瑞典語 - -simplemma - Polish lemmatizer -simplemma - 波蘭語詞形還原器 + +Tagalog +他加祿語 - -simplemma - Portuguese lemmatizer -simplemma - 葡萄牙語詞形還原器 + +Tajik +塔吉克語 - -simplemma - Romanian lemmatizer -simplemma - 羅馬尼亞語詞形還原器 + +Tamil +泰米爾語 - -simplemma - Russian lemmatizer -simplemma - 俄語詞形還原器 + +Tatar +韃靼語 - -simplemma - Sámi (Northern) lemmatizer -simplemma - 薩米語(北)詞形還原器 + +Telugu +泰盧固語 - -simplemma - Scottish Gaelic lemmatizer -simplemma - 蘇格蘭蓋爾語詞形還原器 + +Tetun (Dili) +德頓語(帝力) - -simplemma - Serbo-Croatian lemmatizer -simplemma - 塞爾維亞-克羅埃西亞語詞形還原器 + +Thai +泰語 - -simplemma - Slovak lemmatizer -simplemma - 斯洛伐克語詞形還原器 + +Tibetan +藏語 - -simplemma - Slovenian lemmatizer -simplemma - 斯洛維尼亞語詞形還原器 + +Tigrinya +提格雷尼亞語 - -simplemma - Spanish lemmatizer -simplemma - 西班牙語詞形還原器 + +Tswana +茨瓦納語 - -simplemma - Swahili lemmatizer -simplemma - 斯瓦西里語詞形還原器 + +Turkish +土耳其語 - -simplemma - Swedish lemmatizer -simplemma - 瑞典語詞形還原器 + +Ukrainian +烏克蘭語 - -simplemma - Tagalog lemmatizer -simplemma - 他加祿語詞形還原器 + +Urdu +烏爾都語 - -simplemma - Turkish lemmatizer -simplemma - 土耳其語詞形還原器 + +Uyghur +維吾爾語 - -simplemma - Ukrainian lemmatizer -simplemma - 烏克蘭語詞形還原器 + +Vietnamese +越南語 - -simplemma - Welsh lemmatizer -simplemma - 威爾士語詞形還原器 + +Welsh +威爾士語 - -spaCy - Bengali lemmatizer -spaCy - 孟加拉語詞形還原器 + +Wolof +沃洛夫語 - -spaCy - Catalan lemmatizer -spaCy - 加泰羅尼亞語詞形還原器 + +Yoruba +約魯巴語 - -spaCy - Croatian lemmatizer -spaCy - 克羅埃西亞語詞形還原器 + +Zulu +祖魯語 - -spaCy - Czech lemmatizer -spaCy - 捷克語詞形還原器 + +Other languages +其他語種 - -spaCy - Danish lemmatizer -spaCy - 丹麥語詞形還原器 + +All languages (UTF-8 without BOM) +所有語種(UTF-8 無簽名) - -spaCy - Dutch lemmatizer -spaCy - 荷蘭語詞形還原器 + +All languages (UTF-8 with BOM) +所有語種(UTF-8 帶簽名) - -spaCy - English lemmatizer -spaCy - 英語詞形還原器 + +All languages (UTF-16 with BOM) +所有語種(UTF-16 帶簽名) - -spaCy - Finnish lemmatizer -spaCy - 芬蘭語詞形還原器 + +All languages (UTF-16BE without BOM) +所有語種(UTF-16 大端無簽名) - -spaCy - French lemmatizer -spaCy - 法語詞形還原器 + +All languages (UTF-16LE without BOM) +所有語種(UTF-16 小端無簽名) - -spaCy - German lemmatizer -spaCy - 德語詞形還原器 - - - -spaCy - Greek (Ancient) lemmatizer -spaCy - 希臘語(古)詞形還原器 + +All languages (UTF-32 with BOM) +所有語種(UTF-32 帶簽名) - -spaCy - Greek (Modern) lemmatizer -spaCy - 希臘語(現代)詞形還原器 + +All languages (UTF-32BE without BOM) +所有語種(UTF-32 大端無簽名) - -spaCy - Hungarian lemmatizer -spaCy - 匈牙利語詞形還原器 + +All languages (UTF-32LE without BOM) +所有語種(UTF-32 小端無簽名) - -spaCy - Indonesian lemmatizer -spaCy - 印度尼西亞語詞形還原器 + +All languages (UTF-7) +所有語種(UTF-7) - -spaCy - Irish lemmatizer -spaCy - 愛爾蘭語詞形還原器 + +Arabic (CP720) +阿拉伯語(CP720) - -spaCy - Italian lemmatizer -spaCy - 義大利語詞形還原器 + +Arabic (CP864) +阿拉伯語(CP864) - -spaCy - Japanese lemmatizer -spaCy - 日語詞形還原器 + +Arabic (ISO-8859-6) +阿拉伯語(ISO-8859-6) - -spaCy - Lithuanian lemmatizer -spaCy - 立陶宛語詞形還原器 + +Arabic (Mac OS Arabic) +阿拉伯語(Mac OS Arabic) - -spaCy - Luxembourgish lemmatizer -spaCy - 盧森堡語詞形還原器 + +Arabic (Windows-1256) +阿拉伯語(Windows-1256) - -spaCy - Macedonian lemmatizer -spaCy - 馬其頓語詞形還原器 + +Baltic languages (CP775) +波羅的海諸語(CP775) - -spaCy - Norwegian Bokmål lemmatizer -spaCy - 書面挪威語詞形還原器 + +Baltic languages (ISO-8859-13) +波羅的海諸語(ISO-8859-13) - -spaCy - Persian lemmatizer -spaCy - 波斯語詞形還原器 + +Baltic languages (Windows-1257) +波羅的海諸語(Windows-1257) - -spaCy - Polish lemmatizer -spaCy - 波蘭語詞形還原器 + +Celtic languages (ISO-8859-14) +凱爾特語(ISO-8859-14) - -spaCy - Portuguese lemmatizer -spaCy - 葡萄牙語詞形還原器 + +Chinese (GB18030) +漢語(GB18030) - -spaCy - Romanian lemmatizer -spaCy - 羅馬尼亞語詞形還原器 + +Chinese (GBK) +漢語(GBK) - -spaCy - Russian lemmatizer -spaCy - 俄語詞形還原器 + +Chinese (Simplified) (GB2312) +漢語(簡體)(GB2312) - -spaCy - Serbian lemmatizer -spaCy - 塞爾維亞語詞形還原器 + +Chinese (Simplified) (HZ) +漢語(簡體)(HZ) - -spaCy - Spanish lemmatizer -spaCy - 西班牙語詞形還原器 + +Chinese (Traditional) (Big-5) +漢語(繁體)(Big-5) - -spaCy - Swedish lemmatizer -spaCy - 瑞典語詞形還原器 + +Chinese (Traditional) (Big5-HKSCS) +漢語(繁體)(Big5-HKSCS) - -spaCy - Tagalog lemmatizer -spaCy - 他加祿語詞形還原器 + +Chinese (Traditional) (CP950) +漢語(繁體)(CP950) - -spaCy - Turkish lemmatizer -spaCy - 土耳其語詞形還原器 + +Croatian (Mac OS Croatian) +克羅埃西亞語(Mac OS Croatian) - -spaCy - Ukrainian lemmatizer -spaCy - 烏克蘭語詞形還原器 + +Cyrillic (CP855) +西里爾(CP855) - -spaCy - Urdu lemmatizer -spaCy - 烏爾都語詞形還原器 + +Cyrillic (CP866) +西里爾(CP866) - -SudachiPy - Japanese lemmatizer -SudachiPy - 日語詞形還原器 + +Cyrillic (ISO-8859-5) +西里爾(ISO-8859-5) - -NLTK - Arabic stop word list -NLTK - 阿拉伯語停用詞表 + +Cyrillic (Mac OS Cyrillic) +西里爾(Mac OS Cyrillic) - -NLTK - Azerbaijani stop word list -NLTK - 亞塞拜然語停用詞表 + +Cyrillic (Windows-1251) +西里爾(Windows-1251) - -NLTK - Basque stop word list -NLTK - 巴斯克語停用詞表 + +English (ASCII) +英語(ASCII) - -NLTK - Bengali stop word list -NLTK - 孟加拉語停用詞表 + +English (EBCDIC 037) +英語(EBCDIC 037) - -NLTK - Catalan stop word list -NLTK - 加泰羅尼亞語停用詞表 + +English (CP437) +英語(CP437) - -NLTK - Chinese (Simplified) stop word list -NLTK - 漢語(簡體)停用詞表 + +European (HP Roman-8) +歐洲(HP Roman-8) - -NLTK - Chinese (Traditional) stop word list -NLTK - 漢語(繁體)停用詞表 + +European (Central) (CP852) +歐洲(中部)(CP852) - -NLTK - Danish stop word list -NLTK - 丹麥語停用詞表 + +European (Central) (ISO-8859-2) +歐洲(中部)(ISO-8859-2) - -NLTK - Dutch stop word list -NLTK - 荷蘭語停用詞表 + +European (Central) (Mac OS Central European) +歐洲(中部)(Mac OS Central European) - -NLTK - English stop word list -NLTK - 英語停用詞表 + +European (Central) (Windows-1250) +歐洲(中部)(Windows-1250) - -NLTK - Finnish stop word list -NLTK - 芬蘭語停用詞表 + +European (Northern) (ISO-8859-4) +歐洲(北部)(ISO-8859-4) - -NLTK - French stop word list -NLTK - 法語停用詞表 + +European (Southern) (ISO-8859-3) +歐洲(南部)(ISO-8859-3) - -NLTK - German stop word list -NLTK - 德語停用詞表 + +European (Southeastern) (ISO-8859-16) +歐洲(東南部)(ISO-8859-16) - -NLTK - Greek (Modern) stop word list -NLTK - 希臘語(現代)停用詞表 + +European (Western) (EBCDIC 500) +歐洲(西部)(EBCDIC 500) - -NLTK - Hungarian stop word list -NLTK - 匈牙利語停用詞表 + +European (Western) (CP850) +歐洲(西部)(CP850) - -NLTK - Indonesian stop word list -NLTK - 印度尼西亞語停用詞表 + +European (Western) (CP858) +歐洲(西部)(CP858) - -NLTK - Italian stop word list -NLTK - 義大利語停用詞表 + +European (Western) (CP1140) +歐洲(西部)(CP1140) - -NLTK - Kazakh stop word list -NLTK - 哈薩克語停用詞表 + +European (Western) (ISO-8859-1) +歐洲(西部)(ISO-8859-1) - -NLTK - Nepali stop word list -NLTK - 尼泊爾語停用詞表 + +European (Western) (ISO-8859-15) +歐洲(西部)(ISO-8859-15) - -NLTK - Norwegian stop word list -NLTK - 挪威語停用詞表 + +European (Western) (Mac OS Roman) +歐洲(西部)(Mac OS Roman) - -NLTK - Portuguese stop word list -NLTK - 葡萄牙語停用詞表 + +European (Western) (Windows-1252) +歐洲(西部)(Windows-1252) - -NLTK - Romanian stop word list -NLTK - 羅馬尼亞語停用詞表 + +French (CP863) +法語(CP863) - -NLTK - Russian stop word list -NLTK - 俄語停用詞表 + +German (EBCDIC 273) +德語(EBCDIC 273) - -NLTK - Slovenian stop word list -NLTK - 斯洛維尼亞語停用詞表 + +Greek (CP737) +希臘語(CP737) - -NLTK - Spanish stop word list -NLTK - 西班牙語停用詞表 + +Greek (CP869) +希臘語(CP869) - -NLTK - Swedish stop word list -NLTK - 瑞典語停用詞表 + +Greek (CP875) +希臘語(CP875) - -NLTK - Tajik stop word list -NLTK - 塔吉克語停用詞表 + +Greek (ISO-8859-7) +希臘語(ISO-8859-7) - -NLTK - Turkish stop word list -NLTK - 土耳其語停用詞表 + +Greek (Mac OS Greek) +希臘語(Mac OS Greek) - -PyThaiNLP - Thai stop word list -PyThaiNLP - 泰語停用詞表 + +Greek (Windows-1253) +希臘語(Windows-1253) - -Custom stop word list -自定義停用詞表 + +Hebrew (CP856) +希伯來語(CP856) - -spaCy - Catalan dependency parser -spaCy - 加泰羅尼亞語依存分析器 + +Hebrew (CP862) +希伯來語(CP862) - -spaCy - Chinese dependency parser -spaCy - 漢語依存分析器 + +Hebrew (EBCDIC 424) +希伯來語(EBCDIC 424) - -spaCy - Croatian dependency parser -spaCy - 克羅埃西亞語依存分析器 + +Hebrew (ISO-8859-8) +希伯來語(ISO-8859-8) - -spaCy - Danish dependency parser -spaCy - 丹麥語依存分析器 + +Hebrew (Windows-1255) +希伯來語(Windows-1255) - -spaCy - Dutch dependency parser -spaCy - 荷蘭語依存分析器 + +Icelandic (CP861) +冰島語(CP861) - -spaCy - English dependency parser -spaCy - 英語依存分析器 + +Icelandic (Mac OS Icelandic) +冰島語(Mac OS Icelandic) - -spaCy - Finnish dependency parser -spaCy - 芬蘭語依存分析器 + +Japanese (CP932) +日語(CP932) - -spaCy - French dependency parser -spaCy - 法語依存分析器 + +Japanese (EUC-JP) +日語(EUC-JP) - -spaCy - German dependency parser -spaCy - 德語依存分析器 + +Japanese (EUC-JIS-2004) +日語(EUC-JIS-2004) - -spaCy - Greek (Modern) dependency parser -spaCy - 希臘語(現代)依存分析器 + +Japanese (EUC-JISx0213) +日語(EUC-JISx0213) - -spaCy - Italian dependency parser -spaCy - 義大利語依存分析器 + +Japanese (ISO-2022-JP) +日語(ISO-2022-JP) - -spaCy - Japanese dependency parser -spaCy - 日語依存分析器 + +Japanese (ISO-2022-JP-1) +日語(ISO-2022-JP-1) - -spaCy - Lithuanian dependency parser -spaCy - 立陶宛語依存分析器 + +Japanese (ISO-2022-JP-2) +日語(ISO-2022-JP-2) - -spaCy - Macedonian dependency parser -spaCy - 馬其頓語依存分析器 + +Japanese (ISO-2022-JP-2004) +日語(ISO-2022-JP-2004) - -spaCy - Norwegian Bokmål dependency parser -spaCy - 書面挪威語依存分析器 + +Japanese (ISO-2022-JP-3) +日語(ISO-2022-JP-3) - -spaCy - Polish dependency parser -spaCy - 波蘭語依存分析器 + +Japanese (ISO-2022-JP-EXT) +日語(ISO-2022-JP-EXT) - -spaCy - Portuguese dependency parser -spaCy - 葡萄牙語依存分析器 + +Japanese (Shift_JIS) +日語(Shift_JIS) - -spaCy - Romanian dependency parser -spaCy - 羅馬尼亞語依存分析器 + +Japanese (Shift_JIS-2004) +日語(Shift_JIS-2004) - -spaCy - Russian dependency parser -spaCy - 俄語依存分析器 + +Japanese (Shift_JISx0213) +日語(Shift_JISx0213) - -spaCy - Spanish dependency parser -spaCy - 西班牙語依存分析器 + +Kazakh (KZ-1048) +哈薩克語(KZ-1048) - -spaCy - Swedish dependency parser -spaCy - 瑞典語依存分析器 + +Kazakh (PTCP154) +哈薩克語(PTCP154) - -spaCy - Ukrainian dependency parser -spaCy - 烏克蘭語依存分析器 + +Korean (EUC-KR) +韓語(EUC-KR) - -Average logarithmic distance - + +Korean (ISO-2022-KR) +韓語(ISO-2022-KR) - -Average reduced frequency - + +Korean (JOHAB) +韓語(JOHAB) - -Average waiting time - + +Korean (UHC) +韓語(UHC) - -Carroll's Uₘ - + +Nordic languages (CP865) +北歐諸語(CP865) - -Fisher's exact test -費希爾精確檢驗 + +Nordic languages (ISO-8859-10) +北歐諸語(ISO-8859-10) - -Log-likelihood ratio test -對數似然比檢驗 + +Persian/Urdu (Mac OS Farsi) +波斯語/烏爾都語(Mac OS Farsi) - -Pearson's chi-squared test -皮爾森卡方檢驗 + +Portuguese (CP860) +葡萄牙語(CP860) - -Cubic association ratio - + +Romanian (Mac OS Romanian) +羅馬尼亞語(Mac OS Romanian) - -Dice's coefficient -Dice 係數 + +Russian (KOI8-R) +俄語(KOI8-R) - -Difference coefficient - + +Tajik (KOI8-T) +塔吉克語(KOI8-T) - -Jaccard index -雅卡爾指數 + +Thai (CP874) +泰語(CP874) - -Log-frequency biased MD - + +Thai (ISO-8859-11) +泰語(ISO-8859-11) - -Kilgarriff's ratio - + +Thai (TIS-620) +泰語(TIS-620) - -Log ratio - + +Turkish (CP857) +土耳其語(CP857) - -Minimum sensitivity - + +Turkish (EBCDIC 1026) +土耳其語(EBCDIC 1026) - -Mutual dependency - + +Turkish (ISO-8859-9) +土耳其語(ISO-8859-9) - -Mutual expectation - + +Turkish (Mac OS Turkish) +土耳其語(Mac OS Turkish) - -Mutual information -互資訊 + +Turkish (Windows-1254) +土耳其語(Windows-1254) - -Odds ratio -比值比 + +Ukrainian (CP1125) +烏克蘭語(CP1125) - -Pointwise mutual information -點互資訊 + +Ukrainian (KOI8-U) +烏克蘭語(KOI8-U) - -Poisson collocation measure - - + +Urdu (CP1006) +烏爾都語(CP1006) + - -Squared phi coefficient -Phi 係數的平方 + +Vietnamese (CP1258) +越南語(CP1258) - -Microsoft Paint files (*.msp) -Microsoft Paint 檔案 (*.msp) + +CSV files (*.csv) +CSV 檔案 (*.csv) - -Khmer -柬埔寨語 + +Excel workbooks (*.xlsx) +Excel 工作簿 (*.xlsx) - -khmer-nltk - Khmer sentence tokenizer -khmer-nltk - 柬埔寨語分句器 + +HTML pages (*.htm; *.html) +HTML 頁面 (*.htm; *.html) - -spaCy - Korean dependency parser -spaCy - 韓語依存分析器 + +Lyrics files (*.lrc) +歌詞檔案(*.lrc) - -spaCy - Slovenian dependency parser -spaCy - 斯洛維尼亞語依存分析器 + +PDF files (*.pdf) +PDF 檔案 (*.pdf) - -spaCy - Korean sentence recognizer -spaCy - 韓語句子識別器 + +PowerPoint presentations (*.pptx) +PowerPoint 簡報 (*.pptx) - -khmer-nltk - Khmer word tokenizer -khmer-nltk - 柬埔寨語分詞器 + +Text files (*.txt) +文字檔案 (*.txt) - -spaCy - Korean word tokenizer -spaCy - 韓語分詞器 + +Translation memory files (*.tmx) +翻譯記憶庫檔案 (*.tmx) - -spaCy - Malay word tokenizer -spaCy - 馬來語分詞器 + +Word documents (*.docx) +Word 文件 (*.docx) - -khmer-nltk - Khmer part-of-speech tagger -khmer-nltk - 柬埔寨語詞性標註器 + +XML files (*.xml) +XML 檔案 (*.xml) - -PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) -PyThaiNLP - 感知機詞性標註器(Blackboard) + +All files (*.*) +所有檔案 (*.*) - -spaCy - Korean part-of-speech tagger -spaCy - 韓語詞性標註器 + +OpenType fonts (*.otf) +OpenType 字型 (*.otf) - -spaCy - Slovenian part-of-speech tagger -spaCy - 斯洛維尼亞語詞性標註器 + +TrueType fonts (*.ttf) +TrueType 字型 (*.ttf) - -spaCy - Korean lemmatizer -spaCy - 韓語詞形還原器 + +Blizzard mipmap format (*.blp) + - -spaCy - Slovenian lemmatizer -spaCy - 斯洛維尼亞語詞形還原器 + +Windows bitmaps (*.bmp) +Windows 點陣圖 (*.bmp) - -Dostoevsky - Russian sentiment analyzer -Dostoevsky - 俄語情感分析器 + +Window cursor files (*.cur) +Window 游標檔案 (*.cur) - -Underthesea - Vietnamese sentiment analyzer -Underthesea - 越南語情感分析器 + +Multi-page PCX files (*.dcx) +多頁 PCX 檔案 (*.dcx) - -Armenian (Eastern) -亞美尼亞語(東) + +DirectDraw surface (*.dds) + - -Armenian (Western) -亞美尼亞語(西) + +Device-independent bitmaps (*.dib) +裝置無關點陣圖 (*.dib) - -Buryat (Russia) -布里亞特語(俄羅斯) + +Encapsulated PostScript (*.eps, *.ps) + - -Chinese (Classical) -漢語(文言) + +Flexible image transport system (*.fit, *.fits) + - -Church Slavonic (Old) -教會斯拉夫語(古) + +Autodesk animation files (*.flc, *.fli) +Autodesk 動畫檔案 (*.flc, *.fli) - -Coptic -科普特語 + +Fox Engine textures (*.ftex) +Fox Engine 紋理 (*.ftex) - -Erzya -埃爾齊亞語 + +GIMP brush files (*.gbr) +GIMP 筆刷檔案 (*.gbr) - -Faroese -法羅語 + +Graphics interchange format (*.gif) + - -French (Old) -法語(古) + +Apple icon images (*.icns) +蘋果圖示圖片 (*.icns) - -Gothic -哥特語 + +Windows icon files (*.ico) +Windows 圖示檔案 (*.ico) - -Hebrew (Ancient) -希伯來語(古) + +IPTC/NAA newsphoto files (*.iim) +IPTC/NAA newsphoto 檔案 (*.iim) - -Hebrew (Modern) -希伯來語(現代) + +IM files (*.im) +IM 檔案 (*.im) - -Kurdish (Kurmanji) -庫爾德語(庫爾曼吉語) + +Image Tools image files (*) +Image Tools 影象檔案 (*) - -Lao -寮國語 + +JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg) +JPEG 檔案 (*.jfif, *.jpe, *.jpeg, *.jpg) - -Maltese -馬耳他語 + +JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) +JPEG 2000 檔案 (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx) - -Nigerian Pidgin -奈及利亞皮欽語 + +McIDAS area files (*) +McIDAS area 檔案 (*) - -Pomak -波馬克語 + +Microsoft Paint files (*.msp) +Microsoft Paint 檔案 (*.msp) - -Russian (Old) -俄語(古) + +PhotoCD files (*.pcd) +PhotoCD 檔案 (*.pcd) - -Sindhi -信德語 + +Picture exchange (*.pcx) + - -Uyghur -維吾爾語 + +PIXAR raster files (*.pxr) +PIXAR 柵格檔案 (*.pxr) - -Wolof -沃洛夫語 + +Portable network graphics (*.apng, *.png) + - -LaoNLP - Lao sentence tokenizer -LaoNLP - 寮國語分句器 + +Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm) + - -Stanza - Afrikaans sentence tokenizer -Stanza - 南非語分句器 + +Photoshop PSD files (*.psd) +Photoshop PSD 檔案 (*.psd) - -Stanza - Arabic sentence tokenizer -Stanza - 阿拉伯語分句器 + +Sun raster files (*.ras) +Sun 柵格檔案 (*.ras) - -Stanza - Armenian (Eastern) sentence tokenizer -Stanza - 亞美尼亞語(東)分句器 + +Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi) + - -Stanza - Armenian (Western) sentence tokenizer -Stanza - 亞美尼亞語(西)分句器 + +SPIDER files (*) +SPIDER 檔案 (*) - -Stanza - Basque sentence tokenizer -Stanza - 巴斯克語分句器 + +Truevision TGA (*.icb, *.tga, *.vda, *.vst) + - -Stanza - Belarusian sentence tokenizer -Stanza - 白俄羅斯語分句器 + +TIFF files (*.tif, *.tiff) +TIFF 檔案 (*.tif, *.tiff) - -Stanza - Bulgarian sentence tokenizer -Stanza - 保加利亞語分句器 + +WebP files (*.webp) +WebP 檔案 (*.webp) - -Stanza - Burmese sentence tokenizer -Stanza - 緬甸語分句器 + +Windows metafiles (*.emf, *.wmf) +Windows 元檔案 (*.emf, *.wmf) - -Stanza - Buryat (Russia) sentence tokenizer -Stanza - 布里亞特語(俄羅斯)分句器 + +X bitmaps (*.xbm) + - -Stanza - Catalan sentence tokenizer -Stanza - 加泰羅尼亞語分句器 + +X pixmaps (*.xpm) + - -Stanza - Chinese (Classical) sentence tokenizer -Stanza - 漢語(文言)分句器 + +XV thumbnails (*) +XV 縮圖 (*) - -Stanza - Chinese (Simplified) sentence tokenizer -Stanza - 漢語(簡體)分句器 + +botok - Tibetan sentence tokenizer +botok - 藏語分句器 - -Stanza - Chinese (Traditional) sentence tokenizer -Stanza - 漢語(繁體)分句器 + +khmer-nltk - Khmer sentence tokenizer +khmer-nltk - 柬埔寨語分句器 - -Stanza - Church Slavonic (Old) sentence tokenizer -Stanza - 教會斯拉夫語(古)分句器 + +LaoNLP - Lao sentence tokenizer +LaoNLP - 寮國語分句器 - -Stanza - Coptic sentence tokenizer -Stanza - 科普特語分句器 + +NLTK - Czech Punkt sentence tokenizer +NLTK - 捷克語 Punkt 分句器 - -Stanza - Croatian sentence tokenizer -Stanza - 克羅埃西亞語分句器 + +NLTK - Danish Punkt sentence tokenizer +NLTK - 丹麥語 Punkt 分句器 - -Stanza - Czech sentence tokenizer -Stanza - 捷克語分句器 + +NLTK - Dutch Punkt sentence tokenizer +NLTK - 荷蘭語 Punkt 分句器 - -Stanza - Danish sentence tokenizer -Stanza - 丹麥語分句器 + +NLTK - English Punkt sentence tokenizer +NLTK - 英語 Punkt 分句器 - -Stanza - Dutch sentence tokenizer -Stanza - 荷蘭語分句器 + +NLTK - Estonian Punkt sentence tokenizer +NLTK - 愛沙尼亞語 Punkt 分句器 - -Stanza - English sentence tokenizer -Stanza - 英語分句器 + +NLTK - Finnish Punkt sentence tokenizer +NLTK - 芬蘭語 Punkt 分句器 - -Stanza - Erzya sentence tokenizer -Stanza - 埃爾齊亞語分句器 + +NLTK - French Punkt sentence tokenizer +NLTK - 法語 Punkt 分句器 - -Stanza - Estonian sentence tokenizer -Stanza - 愛沙尼亞語分句器 + +NLTK - German Punkt sentence tokenizer +NLTK - 德語 Punkt 分句器 - -Stanza - Faroese sentence tokenizer -Stanza - 法羅語分句器 + +NLTK - Greek Punkt sentence tokenizer +NLTK - 希臘語 Punkt 分句器 - -Stanza - Finnish sentence tokenizer -Stanza - 芬蘭語分句器 + +NLTK - Italian Punkt sentence tokenizer +NLTK - 義大利語 Punkt 分句器 - -Stanza - French sentence tokenizer -Stanza - 法語分句器 + +NLTK - Malayalam Punkt sentence tokenizer +NLTK - 馬拉雅拉姆語 Punkt 分句器 - -Stanza - French (Old) sentence tokenizer -Stanza - 法語(古)分句器 + +NLTK - Norwegian (Bokmål) Punkt sentence tokenizer +NLTK - 挪威語(書面) Punkt 分句器 - -Stanza - Galician sentence tokenizer -Stanza - 加里西亞語分句器 + +NLTK - Polish Punkt sentence tokenizer +NLTK - 波蘭語 Punkt 分句器 - -Stanza - German sentence tokenizer -Stanza - 德語分句器 + +NLTK - Portuguese Punkt sentence tokenizer +NLTK - 葡萄牙語 Punkt 分句器 - -Stanza - Gothic sentence tokenizer -Stanza - 哥特語分句器 + +NLTK - Russian Punkt sentence tokenizer +NLTK - 俄語 Punkt 分句器 - -Stanza - Greek (Ancient) sentence tokenizer -Stanza - 希臘語(古)分句器 + +NLTK - Slovene Punkt sentence tokenizer +NLTK - 斯洛維尼亞語 Punkt 分句器 - -Stanza - Greek (Modern) sentence tokenizer -Stanza - 希臘語(現代)分句器 + +NLTK - Spanish Punkt sentence tokenizer +NLTK - 西班牙語 Punkt 分句器 - -Stanza - Hebrew (Ancient) sentence tokenizer -Stanza - 希伯來語(古)分句器 + +NLTK - Swedish Punkt sentence tokenizer +NLTK - 瑞典語 Punkt 分句器 - -Stanza - Hebrew (Modern) sentence tokenizer -Stanza - 希伯來語(現代)分句器 + +NLTK - Turkish Punkt sentence tokenizer +NLTK - 土耳其語 Punkt 分句器 - -Stanza - Hindi sentence tokenizer -Stanza - 印地語分句器 + +spaCy - Catalan dependency parser +spaCy - 加泰羅尼亞語依存分析器 - -Stanza - Hungarian sentence tokenizer -Stanza - 匈牙利語分句器 + +spaCy - Chinese dependency parser +spaCy - 漢語依存分析器 - -Stanza - Icelandic sentence tokenizer -Stanza - 冰島語分句器 + +spaCy - Croatian dependency parser +spaCy - 克羅埃西亞語依存分析器 - -Stanza - Indonesian sentence tokenizer -Stanza - 印度尼西亞語分句器 + +spaCy - Danish dependency parser +spaCy - 丹麥語依存分析器 - -Stanza - Irish sentence tokenizer -Stanza - 愛爾蘭語分句器 + +spaCy - Dutch dependency parser +spaCy - 荷蘭語依存分析器 - -Stanza - Italian sentence tokenizer -Stanza - 義大利語分句器 + +spaCy - English dependency parser +spaCy - 英語依存分析器 - -Stanza - Japanese sentence tokenizer -Stanza - 日語分句器 + +spaCy - Finnish dependency parser +spaCy - 芬蘭語依存分析器 - -Stanza - Kazakh sentence tokenizer -Stanza - 哈薩克語分句器 + +spaCy - French dependency parser +spaCy - 法語依存分析器 - -Stanza - Korean sentence tokenizer -Stanza - 韓語分句器 + +spaCy - German dependency parser +spaCy - 德語依存分析器 - -Stanza - Kurdish (Kurmanji) sentence tokenizer -Stanza - 庫爾德語(庫爾曼吉語)分句器 + +spaCy - Greek (Modern) dependency parser +spaCy - 希臘語(現代)依存分析器 - -Stanza - Kyrgyz sentence tokenizer -Stanza - 吉爾吉斯語分句器 + +spaCy - Italian dependency parser +spaCy - 義大利語依存分析器 - -Stanza - Latin sentence tokenizer -Stanza - 拉丁語分句器 + +spaCy - Japanese dependency parser +spaCy - 日語依存分析器 - -Stanza - Latvian sentence tokenizer -Stanza - 拉脫維亞語分句器 + +spaCy - Korean dependency parser +spaCy - 韓語依存分析器 - -Stanza - Ligurian sentence tokenizer -Stanza - 利古里亞語分句器 + +spaCy - Lithuanian dependency parser +spaCy - 立陶宛語依存分析器 - -Stanza - Lithuanian sentence tokenizer -Stanza - 立陶宛語分句器 + +spaCy - Macedonian dependency parser +spaCy - 馬其頓語依存分析器 - -Stanza - Maltese sentence tokenizer -Stanza - 馬耳他語分句器 + +spaCy - Norwegian (Bokmål) dependency parser +spaCy - 挪威語(書面)依存分析器 - -Stanza - Manx sentence tokenizer -Stanza - 馬恩語分句器 + +spaCy - Polish dependency parser +spaCy - 波蘭語依存分析器 - -Stanza - Marathi sentence tokenizer -Stanza - 馬拉地語分句器 + +spaCy - Portuguese dependency parser +spaCy - 葡萄牙語依存分析器 - -Stanza - Nigerian Pidgin sentence tokenizer -Stanza - 奈及利亞皮欽語分句器 + +spaCy - Romanian dependency parser +spaCy - 羅馬尼亞語依存分析器 - -Stanza - Norwegian Bokmål sentence tokenizer -Stanza - 書面挪威語分句器 + +spaCy - Russian dependency parser +spaCy - 俄語依存分析器 - -Stanza - Norwegian Nynorsk sentence tokenizer -Stanza - 新挪威語分句器 + +spaCy - Slovene dependency parser +spaCy - 斯洛維尼亞語依存分析器 - -Stanza - Persian sentence tokenizer -Stanza - 波斯語分句器 + +spaCy - Spanish dependency parser +spaCy - 西班牙語依存分析器 - -Stanza - Polish sentence tokenizer -Stanza - 波蘭語分句器 + +spaCy - Swedish dependency parser +spaCy - 瑞典語依存分析器 - -Stanza - Pomak sentence tokenizer -Stanza - 波馬克語分句器 + +spaCy - Ukrainian dependency parser +spaCy - 烏克蘭語依存分析器 - -Stanza - Portuguese sentence tokenizer -Stanza - 葡萄牙語分句器 + +spaCy - Croatian sentence recognizer +spaCy - 克羅埃西亞語句子識別器 - -Stanza - Romanian sentence tokenizer -Stanza - 羅馬尼亞語分句器 + +spaCy - Dutch sentence recognizer +spaCy - 荷蘭語句子識別器 - -Stanza - Russian sentence tokenizer -Stanza - 俄語分句器 + +spaCy - Finnish sentence recognizer +spaCy - 芬蘭語句子識別器 - -Stanza - Russian (Old) sentence tokenizer -Stanza - 俄語(古)分句器 + +spaCy - Greek (Modern) sentence recognizer +spaCy - 希臘語(現代)句子識別器 - -Stanza - Sámi (Northern) sentence tokenizer -Stanza - 薩米語(北)分句器 + +spaCy - Italian sentence recognizer +spaCy - 義大利語句子識別器 - -Stanza - Sanskrit sentence tokenizer -Stanza - 梵語分句器 + +spaCy - Korean sentence recognizer +spaCy - 韓語句子識別器 - -Stanza - Scottish Gaelic sentence tokenizer -Stanza - 蘇格蘭蓋爾語分句器 + +spaCy - Lithuanian sentence recognizer +spaCy - 立陶宛語句子識別器 - -Stanza - Serbian (Latin) sentence tokenizer -Stanza - 塞爾維亞語(拉丁)分句器 + +spaCy - Macedonian sentence recognizer +spaCy - 馬其頓語句子識別器 - -Stanza - Sindhi sentence tokenizer -Stanza - 信德語分句器 + +spaCy - Norwegian (Bokmål) sentence recognizer +spaCy - 挪威語(書面)句子識別器 - -Stanza - Slovak sentence tokenizer -Stanza - 斯洛伐克語分句器 + +spaCy - Polish sentence recognizer +spaCy - 波蘭語句子識別器 - -Stanza - Slovenian sentence tokenizer -Stanza - 斯洛維尼亞語分句器 + +spaCy - Portuguese sentence recognizer +spaCy - 葡萄牙語句子識別器 - -Stanza - Sorbian (Upper) sentence tokenizer -Stanza - 索布語(上)分句器 + +spaCy - Romanian sentence recognizer +spaCy - 羅馬尼亞語句子識別器 - -Stanza - Spanish sentence tokenizer -Stanza - 西班牙語分句器 + +spaCy - Russian sentence recognizer +spaCy - 俄語句子識別器 - -Stanza - Swedish sentence tokenizer -Stanza - 瑞典語分句器 + +spaCy - Swedish sentence recognizer +spaCy - 瑞典語句子識別器 - -Stanza - Tamil sentence tokenizer -Stanza - 泰米爾語分句器 + +spaCy - Sentencizer +spaCy - 分句器 - -Stanza - Telugu sentence tokenizer -Stanza - 泰盧固語分句器 + +Stanza - Afrikaans sentence tokenizer +Stanza - 南非語分句器 - -Stanza - Thai sentence tokenizer -Stanza - 泰語分句器 + +Stanza - Arabic sentence tokenizer +Stanza - 阿拉伯語分句器 - -Stanza - Turkish sentence tokenizer -Stanza - 土耳其語分句器 + +Stanza - Armenian (Classical) sentence tokenizer +Stanza - 亞美尼亞語(古)分句器 - -Stanza - Ukrainian sentence tokenizer -Stanza - 烏克蘭語分句器 + +Stanza - Armenian (Eastern) sentence tokenizer +Stanza - 亞美尼亞語(東)分句器 - -Stanza - Urdu sentence tokenizer -Stanza - 烏爾都語分句器 + +Stanza - Armenian (Western) sentence tokenizer +Stanza - 亞美尼亞語(西)分句器 - -Stanza - Uyghur sentence tokenizer -Stanza - 維吾爾語分句器 + +Stanza - Basque sentence tokenizer +Stanza - 巴斯克語分句器 - -Stanza - Vietnamese sentence tokenizer -Stanza - 越南語分句器 + +Stanza - Belarusian sentence tokenizer +Stanza - 白俄羅斯語分句器 - -Stanza - Welsh sentence tokenizer -Stanza - 威爾士語分句器 + +Stanza - Bulgarian sentence tokenizer +Stanza - 保加利亞語分句器 - -Stanza - Wolof sentence tokenizer -Stanza - 沃洛夫語分句器 + +Stanza - Burmese sentence tokenizer +Stanza - 緬甸語分句器 - -LaoNLP - Lao word tokenizer -LaoNLP - 寮國語分詞器 + +Stanza - Buryat (Russia) sentence tokenizer +Stanza - 布里亞特語(俄羅斯)分句器 - -spaCy - Hebrew (Modern) word tokenizer -spaCy - 希伯來語(現代)分詞器 + +Stanza - Catalan sentence tokenizer +Stanza - 加泰羅尼亞語分句器 - -spaCy - Norwegian Bokmål word tokenizer -spaCy - 書面挪威語分詞器 + +Stanza - Chinese (Classical) sentence tokenizer +Stanza - 漢語(文言)分句器 - -Stanza - Afrikaans word tokenizer -Stanza - 南非語分詞器 + +Stanza - Chinese (Simplified) sentence tokenizer +Stanza - 漢語(簡體)分句器 - -Stanza - Arabic word tokenizer -Stanza - 阿拉伯語分詞器 + +Stanza - Chinese (Traditional) sentence tokenizer +Stanza - 漢語(繁體)分句器 - -Stanza - Armenian (Eastern) word tokenizer -Stanza - 亞美尼亞語(東)分詞器 + +Stanza - Church Slavonic (Old) sentence tokenizer +Stanza - 教會斯拉夫語(古)分句器 - -Stanza - Armenian (Western) word tokenizer -Stanza - 亞美尼亞語(西)分詞器 + +Stanza - Coptic sentence tokenizer +Stanza - 科普特語分句器 - -Stanza - Basque word tokenizer -Stanza - 巴斯克語分詞器 + +Stanza - Croatian sentence tokenizer +Stanza - 克羅埃西亞語分句器 - -Stanza - Belarusian word tokenizer -Stanza - 白俄羅斯語分詞器 + +Stanza - Czech sentence tokenizer +Stanza - 捷克語分句器 - -Stanza - Bulgarian word tokenizer -Stanza - 保加利亞語分詞器 + +Stanza - Danish sentence tokenizer +Stanza - 丹麥語分句器 - -Stanza - Burmese word tokenizer -Stanza - 緬甸語分詞器 + +Stanza - Dutch sentence tokenizer +Stanza - 荷蘭語分句器 - -Stanza - Buryat (Russia) word tokenizer -Stanza - 布里亞特語(俄羅斯)分詞器 + +Stanza - English sentence tokenizer +Stanza - 英語分句器 - -Stanza - Catalan word tokenizer -Stanza - 加泰羅尼亞語分詞器 + +Stanza - English (Old) sentence tokenizer +Stanza - 英語(古)分句器 - -Stanza - Chinese (Classical) word tokenizer -Stanza - 漢語(文言)分詞器 + +Stanza - Erzya sentence tokenizer +Stanza - 埃爾齊亞語分句器 - -Stanza - Chinese (Simplified) word tokenizer -Stanza - 漢語(簡體)分詞器 + +Stanza - Estonian sentence tokenizer +Stanza - 愛沙尼亞語分句器 - -Stanza - Chinese (Traditional) word tokenizer -Stanza - 漢語(繁體)分詞器 + +Stanza - Faroese sentence tokenizer +Stanza - 法羅語分句器 - -Stanza - Church Slavonic (Old) word tokenizer -Stanza - 教會斯拉夫語(古)分詞器 + +Stanza - Finnish sentence tokenizer +Stanza - 芬蘭語分句器 - -Stanza - Coptic word tokenizer -Stanza - 科普特語分詞器 + +Stanza - French sentence tokenizer +Stanza - 法語分句器 - -Stanza - Croatian word tokenizer -Stanza - 克羅埃西亞語分詞器 + +Stanza - French (Old) sentence tokenizer +Stanza - 法語(古)分句器 - -Stanza - Czech word tokenizer -Stanza - 捷克語分詞器 + +Stanza - Galician sentence tokenizer +Stanza - 加里西亞語分句器 - -Stanza - Danish word tokenizer -Stanza - 丹麥語分詞器 + +Stanza - German sentence tokenizer +Stanza - 德語分句器 - -Stanza - Dutch word tokenizer -Stanza - 荷蘭語分詞器 + +Stanza - Gothic sentence tokenizer +Stanza - 哥特語分句器 - -Stanza - English word tokenizer -Stanza - 英語分詞器 + +Stanza - Greek (Ancient) sentence tokenizer +Stanza - 希臘語(古)分句器 - -Stanza - Erzya word tokenizer -Stanza - 埃爾齊亞語分詞器 + +Stanza - Greek (Modern) sentence tokenizer +Stanza - 希臘語(現代)分句器 - -Stanza - Estonian word tokenizer -Stanza - 愛沙尼亞語分詞器 + +Stanza - Hebrew (Ancient) sentence tokenizer +Stanza - 希伯來語(古)分句器 - -Stanza - Faroese word tokenizer -Stanza - 法羅語分詞器 + +Stanza - Hebrew (Modern) sentence tokenizer +Stanza - 希伯來語(現代)分句器 - -Stanza - Finnish word tokenizer -Stanza - 芬蘭語分詞器 + +Stanza - Hindi sentence tokenizer +Stanza - 印地語分句器 - -Stanza - French word tokenizer -Stanza - 法語分詞器 + +Stanza - Hungarian sentence tokenizer +Stanza - 匈牙利語分句器 - -Stanza - French (Old) word tokenizer -Stanza - 法語(古)分詞器 + +Stanza - Icelandic sentence tokenizer +Stanza - 冰島語分句器 - -Stanza - Galician word tokenizer -Stanza - 加里西亞語分詞器 + +Stanza - Indonesian sentence tokenizer +Stanza - 印度尼西亞語分句器 - -Stanza - German word tokenizer -Stanza - 德語分詞器 + +Stanza - Irish sentence tokenizer +Stanza - 愛爾蘭語分句器 - -Stanza - Gothic word tokenizer -Stanza - 哥特語分詞器 + +Stanza - Italian sentence tokenizer +Stanza - 義大利語分句器 - -Stanza - Greek (Ancient) word tokenizer -Stanza - 希臘語(古)分詞器 + +Stanza - Japanese sentence tokenizer +Stanza - 日語分句器 - -Stanza - Greek (Modern) word tokenizer -Stanza - 希臘語(現代)分詞器 + +Stanza - Kazakh sentence tokenizer +Stanza - 哈薩克語分句器 - -Stanza - Hebrew (Ancient) word tokenizer -Stanza - 希伯來語(古)分詞器 + +Stanza - Korean sentence tokenizer +Stanza - 韓語分句器 - -Stanza - Hebrew (Modern) word tokenizer -Stanza - 希伯來語(現代)分詞器 + +Stanza - Kurdish (Kurmanji) sentence tokenizer +Stanza - 庫爾德語(庫爾曼吉語)分句器 - -Stanza - Hindi word tokenizer -Stanza - 印地語分詞器 + +Stanza - Kyrgyz sentence tokenizer +Stanza - 吉爾吉斯語分句器 - -Stanza - Hungarian word tokenizer -Stanza - 匈牙利語分詞器 + +Stanza - Latin sentence tokenizer +Stanza - 拉丁語分句器 - -Stanza - Icelandic word tokenizer -Stanza - 冰島語分詞器 + +Stanza - Latvian sentence tokenizer +Stanza - 拉脫維亞語分句器 - -Stanza - Indonesian word tokenizer -Stanza - 印度尼西亞語分詞器 + +Stanza - Ligurian sentence tokenizer +Stanza - 利古里亞語分句器 - -Stanza - Irish word tokenizer -Stanza - 愛爾蘭語分詞器 + +Stanza - Lithuanian sentence tokenizer +Stanza - 立陶宛語分句器 - -Stanza - Italian word tokenizer -Stanza - 義大利語分詞器 + +Stanza - Maltese sentence tokenizer +Stanza - 馬耳他語分句器 - -Stanza - Japanese word tokenizer -Stanza - 日語分詞器 + +Stanza - Manx sentence tokenizer +Stanza - 馬恩語分句器 - -Stanza - Kazakh word tokenizer -Stanza - 哈薩克語分詞器 + +Stanza - Marathi sentence tokenizer +Stanza - 馬拉地語分句器 - -Stanza - Korean word tokenizer -Stanza - 韓語分詞器 + +Stanza - Nigerian Pidgin sentence tokenizer +Stanza - 奈及利亞皮欽語分句器 - -Stanza - Kurdish (Kurmanji) word tokenizer -Stanza - 庫爾德語(庫爾曼吉語)分詞器 + +Stanza - Norwegian (Bokmål) sentence tokenizer +Stanza - 挪威語(書面)分句器 - -Stanza - Kyrgyz word tokenizer -Stanza - 吉爾吉斯語分詞器 + +Stanza - Norwegian (Nynorsk) sentence tokenizer +Stanza - 挪威語(新)分句器 - -Stanza - Latin word tokenizer -Stanza - 拉丁語分詞器 + +Stanza - Persian sentence tokenizer +Stanza - 波斯語分句器 - -Stanza - Latvian word tokenizer -Stanza - 拉脫維亞語分詞器 + +Stanza - Polish sentence tokenizer +Stanza - 波蘭語分句器 - -Stanza - Ligurian word tokenizer -Stanza - 利古里亞語分詞器 + +Stanza - Pomak sentence tokenizer +Stanza - 波馬克語分句器 - -Stanza - Lithuanian word tokenizer -Stanza - 立陶宛語分詞器 + +Stanza - Portuguese sentence tokenizer +Stanza - 葡萄牙語分句器 - -Stanza - Maltese word tokenizer -Stanza - 馬耳他語分詞器 + +Stanza - Romanian sentence tokenizer +Stanza - 羅馬尼亞語分句器 - -Stanza - Manx word tokenizer -Stanza - 馬恩語分詞器 + +Stanza - Russian sentence tokenizer +Stanza - 俄語分句器 - -Stanza - Marathi word tokenizer -Stanza - 馬拉地語分詞器 + +Stanza - Russian (Old) sentence tokenizer +Stanza - 俄語(古)分句器 - -Stanza - Nigerian Pidgin word tokenizer -Stanza - 奈及利亞皮欽語分詞器 + +Stanza - Sámi (Northern) sentence tokenizer +Stanza - 薩米語(北)分句器 - -Stanza - Norwegian Bokmål word tokenizer -Stanza - 書面挪威語分詞器 + +Stanza - Sanskrit sentence tokenizer +Stanza - 梵語分句器 - -Stanza - Norwegian Nynorsk word tokenizer -Stanza - 新挪威語分詞器 + +Stanza - Scottish Gaelic sentence tokenizer +Stanza - 蘇格蘭蓋爾語分句器 - -Stanza - Persian word tokenizer -Stanza - 波斯語分詞器 + +Stanza - Serbian (Latin script) sentence tokenizer +Stanza - 塞爾維亞語(拉丁文)分句器 - -Stanza - Polish word tokenizer -Stanza - 波蘭語分詞器 + +Stanza - Sindhi sentence tokenizer +Stanza - 信德語分句器 - -Stanza - Pomak word tokenizer -Stanza - 波馬克語分詞器 + +Stanza - Slovak sentence tokenizer +Stanza - 斯洛伐克語分句器 - -Stanza - Portuguese word tokenizer -Stanza - 葡萄牙語分詞器 - - - -Stanza - Romanian word tokenizer -Stanza - 羅馬尼亞語分詞器 + +Stanza - Slovene sentence tokenizer +Stanza - 斯洛維尼亞語分句器 - -Stanza - Russian word tokenizer -Stanza - 俄語分詞器 + +Stanza - Sorbian (Upper) sentence tokenizer +Stanza - 索布語(上)分句器 - -Stanza - Russian (Old) word tokenizer -Stanza - 俄語(古)分詞器 + +Stanza - Spanish sentence tokenizer +Stanza - 西班牙語分句器 - -Stanza - Sámi (Northern) word tokenizer -Stanza - 薩米語(北)分詞器 + +Stanza - Swedish sentence tokenizer +Stanza - 瑞典語分句器 - -Stanza - Sanskrit word tokenizer -Stanza - 梵語分詞器 + +Stanza - Tamil sentence tokenizer +Stanza - 泰米爾語分句器 - -Stanza - Scottish Gaelic word tokenizer -Stanza - 蘇格蘭蓋爾語分詞器 + +Stanza - Telugu sentence tokenizer +Stanza - 泰盧固語分句器 - -Stanza - Serbian (Latin) word tokenizer -Stanza - 塞爾維亞語(拉丁)分詞器 + +Stanza - Thai sentence tokenizer +Stanza - 泰語分句器 - -Stanza - Sindhi word tokenizer -Stanza - 信德語分詞器 + +Stanza - Turkish sentence tokenizer +Stanza - 土耳其語分句器 - -Stanza - Slovak word tokenizer -Stanza - 斯洛伐克語分詞器 + +Stanza - Ukrainian sentence tokenizer +Stanza - 烏克蘭語分句器 - -Stanza - Slovenian word tokenizer -Stanza - 斯洛維尼亞語分詞器 + +Stanza - Urdu sentence tokenizer +Stanza - 烏爾都語分句器 - -Stanza - Sorbian (Upper) word tokenizer -Stanza - 索布語(上)分詞器 + +Stanza - Uyghur sentence tokenizer +Stanza - 維吾爾語分句器 - -Stanza - Spanish word tokenizer -Stanza - 西班牙語分詞器 + +Stanza - Vietnamese sentence tokenizer +Stanza - 越南語分句器 - -Stanza - Swedish word tokenizer -Stanza - 瑞典語分詞器 + +Stanza - Welsh sentence tokenizer +Stanza - 威爾士語分句器 - -Stanza - Tamil word tokenizer -Stanza - 泰米爾語分詞器 + +Stanza - Wolof sentence tokenizer +Stanza - 沃洛夫語分句器 - -Stanza - Telugu word tokenizer -Stanza - 泰盧固語分詞器 + +Underthesea - Vietnamese sentence tokenizer +Underthesea - 越南語分句器 - -Stanza - Thai word tokenizer -Stanza - 泰語分詞器 + +botok - Tibetan word tokenizer +botok - 藏語分詞器 - -Stanza - Turkish word tokenizer -Stanza - 土耳其語分詞器 + +khmer-nltk - Khmer word tokenizer +khmer-nltk - 柬埔寨語分詞器 - -Stanza - Ukrainian word tokenizer -Stanza - 烏克蘭語分詞器 + +LaoNLP - Lao word tokenizer +LaoNLP - 寮國語分詞器 - -Stanza - Urdu word tokenizer -Stanza - 烏爾都語分詞器 + +NLTK - NIST tokenizer +NLTK - NIST 分詞器 - -Stanza - Uyghur word tokenizer -Stanza - 維吾爾語分詞器 + +NLTK - NLTK tokenizer +NLTK - NLTK 分詞器 - -Stanza - Vietnamese word tokenizer -Stanza - 越南語分詞器 + +NLTK - Penn Treebank tokenizer +NLTK - 賓州樹庫分詞器 - -Stanza - Welsh word tokenizer -Stanza - 威爾士語分詞器 + +NLTK - Regular-expression tokenizer +NLTK - 正規表示式分詞器 - -Stanza - Wolof word tokenizer -Stanza - 沃洛夫語分詞器 + +NLTK - Tok-tok tokenizer +NLTK - Tok-tok 分詞器 - -LaoNLP - SeqLabeling - + +NLTK - Twitter tokenizer +NLTK - 推特分詞器 - -LaoNLP - Yunshan Cup 2020 - + +pkuseg - Chinese word tokenizer +pkuseg - 漢語分詞器 - -Stanza - Afrikaans part-of-speech tagger -Stanza - 南非語詞性標註器 + +PyThaiNLP - Longest matching +PyThaiNLP - 最長匹配 - -Stanza - Arabic part-of-speech tagger -Stanza - 阿拉伯語詞性標註器 + +PyThaiNLP - Maximum matching +PyThaiNLP - 最大匹配 - -Stanza - Armenian (Eastern) part-of-speech tagger -Stanza - 亞美尼亞語(東)詞性標註器 + +PyThaiNLP - Maximum matching + TCC +PyThaiNLP - 最大匹配 + TCC - -Stanza - Armenian (Western) part-of-speech tagger -Stanza - 亞美尼亞語(西)詞性標註器 + +Sacremoses - Moses tokenizer +Sacremoses - Moses 分詞器 - -Stanza - Basque part-of-speech tagger -Stanza - 巴斯克語詞性標註器 + +spaCy - Afrikaans word tokenizer +spaCy - 南非語分詞器 - -Stanza - Belarusian part-of-speech tagger -Stanza - 白俄羅斯語詞性標註器 + +spaCy - Albanian word tokenizer +spaCy - 阿爾巴尼亞語分詞器 - -Stanza - Bulgarian part-of-speech tagger -Stanza - 保加利亞語詞性標註器 + +spaCy - Amharic word tokenizer +spaCy - 阿姆哈拉語分詞器 - -Stanza - Buryat (Russia) part-of-speech tagger -Stanza - 布里亞特語(俄羅斯)詞性標註器 + +spaCy - Arabic word tokenizer +spaCy - 阿拉伯語分詞器 - -Stanza - Catalan part-of-speech tagger -Stanza - 加泰羅尼亞語詞性標註器 + +spaCy - Armenian word tokenizer +spaCy - 亞美尼亞語分詞器 - -Stanza - Chinese (Classical) part-of-speech tagger -Stanza - 漢語(文言)詞性標註器 + +spaCy - Azerbaijani word tokenizer +spaCy - 亞塞拜然語分詞器 - -Stanza - Chinese (Simplified) part-of-speech tagger -Stanza - 漢語(簡體)詞性標註器 + +spaCy - Basque word tokenizer +spaCy - 巴斯克語分詞器 - -Stanza - Chinese (Traditional) part-of-speech tagger -Stanza - 漢語(繁體)詞性標註器 + +spaCy - Bengali word tokenizer +spaCy - 孟加拉語分詞器 - -Stanza - Church Slavonic (Old) part-of-speech tagger -Stanza - 教會斯拉夫語(古)詞性標註器 + +spaCy - Bulgarian word tokenizer +spaCy - 保加利亞語分詞器 - -Stanza - Coptic part-of-speech tagger -Stanza - 科普特語詞性標註器 + +spaCy - Catalan word tokenizer +spaCy - 加泰羅尼亞語分詞器 - -Stanza - Croatian part-of-speech tagger -Stanza - 克羅埃西亞語詞性標註器 + +spaCy - Chinese word tokenizer +spaCy - 漢語分詞器 - -Stanza - Czech part-of-speech tagger -Stanza - 捷克語詞性標註器 + +spaCy - Croatian word tokenizer +spaCy - 克羅埃西亞語分詞器 - -Stanza - Danish part-of-speech tagger -Stanza - 丹麥語詞性標註器 + +spaCy - Czech word tokenizer +spaCy - 捷克語分詞器 - -Stanza - Dutch part-of-speech tagger -Stanza - 荷蘭語詞性標註器 + +spaCy - Danish word tokenizer +spaCy - 丹麥語分詞器 - -Stanza - English part-of-speech tagger -Stanza - 英語詞性標註器 + +spaCy - Dutch word tokenizer +spaCy - 荷蘭語分詞器 - -Stanza - Erzya part-of-speech tagger -Stanza - 埃爾齊亞語詞性標註器 + +spaCy - English word tokenizer +spaCy - 英語分詞器 - -Stanza - Estonian part-of-speech tagger -Stanza - 愛沙尼亞語詞性標註器 + +spaCy - Estonian word tokenizer +spaCy - 愛沙尼亞語分詞器 - -Stanza - Faroese part-of-speech tagger -Stanza - 法羅語詞性標註器 + +spaCy - Faroese word tokenizer +spaCy - 法羅語分詞器 - -Stanza - Finnish part-of-speech tagger -Stanza - 芬蘭語詞性標註器 + +spaCy - Finnish word tokenizer +spaCy - 芬蘭語分詞器 - -Stanza - French part-of-speech tagger -Stanza - 法語詞性標註器 + +spaCy - French word tokenizer +spaCy - 法語分詞器 - -Stanza - French (Old) part-of-speech tagger -Stanza - 法語(古)詞性標註器 + +spaCy - German word tokenizer +spaCy - 德語分詞器 - -Stanza - Galician part-of-speech tagger -Stanza - 加里西亞語詞性標註器 + +spaCy - Greek (Ancient) word tokenizer +spaCy - 希臘語(古)分詞器 - -Stanza - German part-of-speech tagger -Stanza - 德語詞性標註器 + +spaCy - Greek (Modern) word tokenizer +spaCy - 希臘語(現代)分詞器 - -Stanza - Gothic part-of-speech tagger -Stanza - 哥特語詞性標註器 + +spaCy - Gujarati word tokenizer +spaCy - 古吉拉特語分詞器 - -Stanza - Greek (Ancient) part-of-speech tagger -Stanza - 希臘語(古)詞性標註器 + +spaCy - Hebrew (Modern) word tokenizer +spaCy - 希伯來語(現代)分詞器 - -Stanza - Greek (Modern) part-of-speech tagger -Stanza - 希臘語(現代)詞性標註器 + +spaCy - Hindi word tokenizer +spaCy - 印地語分詞器 - -Stanza - Hebrew (Ancient) part-of-speech tagger -Stanza - 希伯來語(古)詞性標註器 + +spaCy - Hungarian word tokenizer +spaCy - 匈牙利語分詞器 - -Stanza - Hebrew (Modern) part-of-speech tagger -Stanza - 希伯來語(現代)詞性標註器 + +spaCy - Icelandic word tokenizer +spaCy - 冰島語分詞器 - -Stanza - Hindi part-of-speech tagger -Stanza - 印地語詞性標註器 + +spaCy - Indonesian word tokenizer +spaCy - 印度尼西亞語分詞器 - -Stanza - Hungarian part-of-speech tagger -Stanza - 匈牙利語詞性標註器 + +spaCy - Irish word tokenizer +spaCy - 愛爾蘭語分詞器 - -Stanza - Icelandic part-of-speech tagger -Stanza - 冰島語詞性標註器 + +spaCy - Italian word tokenizer +spaCy - 義大利語分詞器 - -Stanza - Indonesian part-of-speech tagger -Stanza - 印度尼西亞語詞性標註器 + +spaCy - Japanese word tokenizer +spaCy - 日語分詞器 - -Stanza - Irish part-of-speech tagger -Stanza - 愛爾蘭語詞性標註器 + +spaCy - Kannada word tokenizer +spaCy - 卡納達語分詞器 - -Stanza - Italian part-of-speech tagger -Stanza - 義大利語詞性標註器 + +spaCy - Korean word tokenizer +spaCy - 韓語分詞器 - -Stanza - Japanese part-of-speech tagger -Stanza - 日語詞性標註器 + +spaCy - Kyrgyz word tokenizer +spaCy - 吉爾吉斯語分詞器 - -Stanza - Kazakh part-of-speech tagger -Stanza - 哈薩克語詞性標註器 + +spaCy - Latin word tokenizer +spaCy - 拉丁語分詞器 - -Stanza - Korean part-of-speech tagger -Stanza - 韓語詞性標註器 + +spaCy - Latvian word tokenizer +spaCy - 拉脫維亞語分詞器 - -Stanza - Kurdish (Kurmanji) part-of-speech tagger -Stanza - 庫爾德語(庫爾曼吉語)詞性標註器 + +spaCy - Ligurian word tokenizer +spaCy - 利古里亞語分詞器 - -Stanza - Kyrgyz part-of-speech tagger -Stanza - 吉爾吉斯語詞性標註器 + +spaCy - Lithuanian word tokenizer +spaCy - 立陶宛語分詞器 - -Stanza - Latin part-of-speech tagger -Stanza - 拉丁語詞性標註器 + +spaCy - Luganda word tokenizer +spaCy - 盧幹達語分詞器 - -Stanza - Latvian part-of-speech tagger -Stanza - 拉脫維亞語詞性標註器 + +spaCy - Luxembourgish word tokenizer +spaCy - 盧森堡語分詞器 - -Stanza - Ligurian part-of-speech tagger -Stanza - 利古里亞語詞性標註器 + +spaCy - Macedonian word tokenizer +spaCy - 馬其頓語分詞器 - -Stanza - Lithuanian part-of-speech tagger -Stanza - 立陶宛語詞性標註器 + +spaCy - Malay word tokenizer +spaCy - 馬來語分詞器 - -Stanza - Maltese part-of-speech tagger -Stanza - 馬耳他語詞性標註器 + +spaCy - Malayalam word tokenizer +spaCy - 馬拉雅拉姆語分詞器 - -Stanza - Manx part-of-speech tagger -Stanza - 馬恩語詞性標註器 + +spaCy - Marathi word tokenizer +spaCy - 馬拉地語分詞器 - -Stanza - Marathi part-of-speech tagger -Stanza - 馬拉地語詞性標註器 + +spaCy - Nepali word tokenizer +spaCy - 尼泊爾語分詞器 - -Stanza - Nigerian Pidgin part-of-speech tagger -Stanza - 奈及利亞皮欽語詞性標註器 + +spaCy - Norwegian (Bokmål) word tokenizer +spaCy - 挪威語(書面)分詞器 - -Stanza - Norwegian Bokmål part-of-speech tagger -Stanza - 書面挪威語詞性標註器 + +spaCy - Norwegian (Nynorsk) word tokenizer +spaCy - 挪威語(新)分詞器 - -Stanza - Norwegian Nynorsk part-of-speech tagger -Stanza - 新挪威語詞性標註器 + +spaCy - Persian word tokenizer +spaCy - 波斯語分詞器 - -Stanza - Persian part-of-speech tagger -Stanza - 波斯語詞性標註器 + +spaCy - Polish word tokenizer +spaCy - 波蘭語分詞器 - -Stanza - Polish part-of-speech tagger -Stanza - 波蘭語詞性標註器 + +spaCy - Portuguese word tokenizer +spaCy - 葡萄牙語分詞器 - -Stanza - Pomak part-of-speech tagger -Stanza - 波馬克語詞性標註器 + +spaCy - Romanian word tokenizer +spaCy - 羅馬尼亞語分詞器 - -Stanza - Portuguese part-of-speech tagger -Stanza - 葡萄牙語詞性標註器 + +spaCy - Russian word tokenizer +spaCy - 俄語分詞器 - -Stanza - Romanian part-of-speech tagger -Stanza - 羅馬尼亞語詞性標註器 + +spaCy - Sanskrit word tokenizer +spaCy - 梵語分詞器 - -Stanza - Russian part-of-speech tagger -Stanza - 俄語詞性標註器 + +spaCy - Serbian (Cyrillic script) word tokenizer +spaCy - 塞爾維亞語(西里爾文)分詞器 - -Stanza - Russian (Old) part-of-speech tagger -Stanza - 俄語(古)詞性標註器 + +spaCy - Sinhala word tokenizer +spaCy - 僧伽羅語分詞器 - -Stanza - Sámi (Northern) part-of-speech tagger -Stanza - 薩米語(北)詞性標註器 + +spaCy - Slovak word tokenizer +spaCy - 斯洛伐克語分詞器 - -Stanza - Sanskrit part-of-speech tagger -Stanza - 梵語詞性標註器 + +spaCy - Slovene word tokenizer +spaCy - 斯洛維尼亞語分詞器 - -Stanza - Scottish Gaelic part-of-speech tagger -Stanza - 蘇格蘭蓋爾語詞性標註器 + +spaCy - Sorbian (Lower) word tokenizer +spaCy - 索布語(下)分詞器 - -Stanza - Serbian (Latin) part-of-speech tagger -Stanza - 塞爾維亞語(拉丁)詞性標註器 + +spaCy - Sorbian (Upper) word tokenizer +spaCy - 索布語(上)分詞器 - -Stanza - Slovak part-of-speech tagger -Stanza - 斯洛伐克語詞性標註器 + +spaCy - Spanish word tokenizer +spaCy - 西班牙語分詞器 - -Stanza - Slovenian part-of-speech tagger -Stanza - 斯洛維尼亞語詞性標註器 - + +spaCy - Swedish word tokenizer +spaCy - 瑞典語分詞器 + - -Stanza - Sorbian (Upper) part-of-speech tagger -Stanza - 索布語(上)詞性標註器 + +spaCy - Tagalog word tokenizer +spaCy - 他加祿語分詞器 - -Stanza - Spanish part-of-speech tagger -Stanza - 西班牙語詞性標註器 + +spaCy - Tamil word tokenizer +spaCy - 泰米爾語分詞器 - -Stanza - Swedish part-of-speech tagger -Stanza - 瑞典語詞性標註器 + +spaCy - Tatar word tokenizer +spaCy - 韃靼語分詞器 - -Stanza - Tamil part-of-speech tagger -Stanza - 泰米爾語詞性標註器 + +spaCy - Telugu word tokenizer +spaCy - 泰盧固語分詞器 - -Stanza - Telugu part-of-speech tagger -Stanza - 泰盧固語詞性標註器 + +spaCy - Tigrinya word tokenizer +spaCy - 提格雷尼亞語分詞器 - -Stanza - Turkish part-of-speech tagger -Stanza - 土耳其語詞性標註器 + +spaCy - Tswana word tokenizer +spaCy - 茨瓦納語分詞器 - -Stanza - Ukrainian part-of-speech tagger -Stanza - 烏克蘭語詞性標註器 + +spaCy - Turkish word tokenizer +spaCy - 土耳其語分詞器 - -Stanza - Urdu part-of-speech tagger -Stanza - 烏爾都語詞性標註器 + +spaCy - Ukrainian word tokenizer +spaCy - 烏克蘭語分詞器 - -Stanza - Uyghur part-of-speech tagger -Stanza - 維吾爾語詞性標註器 + +spaCy - Urdu word tokenizer +spaCy - 烏爾都語分詞器 - -Stanza - Vietnamese part-of-speech tagger -Stanza - 越南語詞性標註器 + +spaCy - Yoruba word tokenizer +spaCy - 約魯巴語分詞器 - -Stanza - Welsh part-of-speech tagger -Stanza - 威爾士語詞性標註器 + +Stanza - Afrikaans word tokenizer +Stanza - 南非語分詞器 - -Stanza - Wolof part-of-speech tagger -Stanza - 沃洛夫語詞性標註器 + +Stanza - Arabic word tokenizer +Stanza - 阿拉伯語分詞器 - -Stanza - Afrikaans lemmatizer -Stanza - 南非語詞形還原器 + +Stanza - Armenian (Classical) word tokenizer +Stanza - 亞美尼亞語(古)分詞器 - -Stanza - Arabic lemmatizer -Stanza - 阿拉伯語詞形還原器 + +Stanza - Armenian (Eastern) word tokenizer +Stanza - 亞美尼亞語(東)分詞器 - -Stanza - Armenian (Eastern) lemmatizer -Stanza - 亞美尼亞語(東)詞形還原器 + +Stanza - Armenian (Western) word tokenizer +Stanza - 亞美尼亞語(西)分詞器 - -Stanza - Armenian (Western) lemmatizer -Stanza - 亞美尼亞語(西)詞形還原器 + +Stanza - Basque word tokenizer +Stanza - 巴斯克語分詞器 - -Stanza - Basque lemmatizer -Stanza - 巴斯克語詞形還原器 + +Stanza - Belarusian word tokenizer +Stanza - 白俄羅斯語分詞器 - -Stanza - Belarusian lemmatizer -Stanza - 白俄羅斯語詞形還原器 + +Stanza - Bulgarian word tokenizer +Stanza - 保加利亞語分詞器 - -Stanza - Bulgarian lemmatizer -Stanza - 保加利亞語詞形還原器 + +Stanza - Burmese word tokenizer +Stanza - 緬甸語分詞器 - -Stanza - Buryat (Russia) lemmatizer -Stanza - 布里亞特語(俄羅斯)詞形還原器 + +Stanza - Buryat (Russia) word tokenizer +Stanza - 布里亞特語(俄羅斯)分詞器 - -Stanza - Catalan lemmatizer -Stanza - 加泰羅尼亞語詞形還原器 + +Stanza - Catalan word tokenizer +Stanza - 加泰羅尼亞語分詞器 - -Stanza - Chinese (Classical) lemmatizer -Stanza - 漢語(文言)詞形還原器 + +Stanza - Chinese (Classical) word tokenizer +Stanza - 漢語(文言)分詞器 - -Stanza - Chinese (Simplified) lemmatizer -Stanza - 漢語(簡體)詞形還原器 + +Stanza - Chinese (Simplified) word tokenizer +Stanza - 漢語(簡體)分詞器 - -Stanza - Chinese (Traditional) lemmatizer -Stanza - 漢語(繁體)詞形還原器 + +Stanza - Chinese (Traditional) word tokenizer +Stanza - 漢語(繁體)分詞器 - -Stanza - Church Slavonic (Old) lemmatizer -Stanza - 教會斯拉夫語(古)詞形還原器 + +Stanza - Church Slavonic (Old) word tokenizer +Stanza - 教會斯拉夫語(古)分詞器 - -Stanza - Coptic lemmatizer -Stanza - 科普特語詞形還原器 + +Stanza - Coptic word tokenizer +Stanza - 科普特語分詞器 - -Stanza - Croatian lemmatizer -Stanza - 克羅埃西亞語詞形還原器 + +Stanza - Croatian word tokenizer +Stanza - 克羅埃西亞語分詞器 - -Stanza - Czech lemmatizer -Stanza - 捷克語詞形還原器 + +Stanza - Czech word tokenizer +Stanza - 捷克語分詞器 - -Stanza - Danish lemmatizer -Stanza - 丹麥語詞形還原器 + +Stanza - Danish word tokenizer +Stanza - 丹麥語分詞器 - -Stanza - Dutch lemmatizer -Stanza - 荷蘭語詞形還原器 + +Stanza - Dutch word tokenizer +Stanza - 荷蘭語分詞器 - -Stanza - English lemmatizer -Stanza - 英語詞形還原器 + +Stanza - English word tokenizer +Stanza - 英語分詞器 - -Stanza - Erzya lemmatizer -Stanza - 埃爾齊亞語詞形還原器 + +Stanza - English (Old) word tokenizer +Stanza - 英語(古)分詞器 - -Stanza - Estonian lemmatizer -Stanza - 愛沙尼亞語詞形還原器 + +Stanza - Erzya word tokenizer +Stanza - 埃爾齊亞語分詞器 - -Stanza - Finnish lemmatizer -Stanza - 芬蘭語詞形還原器 + +Stanza - Estonian word tokenizer +Stanza - 愛沙尼亞語分詞器 - -Stanza - French lemmatizer -Stanza - 法語詞形還原器 + +Stanza - Faroese word tokenizer +Stanza - 法羅語分詞器 - -Stanza - French (Old) lemmatizer -Stanza - 法語(古)詞形還原器 + +Stanza - Finnish word tokenizer +Stanza - 芬蘭語分詞器 - -Stanza - Galician lemmatizer -Stanza - 加里西亞語詞形還原器 + +Stanza - French word tokenizer +Stanza - 法語分詞器 - -Stanza - German lemmatizer -Stanza - 德語詞形還原器 + +Stanza - French (Old) word tokenizer +Stanza - 法語(古)分詞器 - -Stanza - Gothic lemmatizer -Stanza - 哥特語詞形還原器 + +Stanza - Galician word tokenizer +Stanza - 加里西亞語分詞器 - -Stanza - Greek (Ancient) lemmatizer -Stanza - 希臘語(古)詞形還原器 + +Stanza - German word tokenizer +Stanza - 德語分詞器 - -Stanza - Greek (Modern) lemmatizer -Stanza - 希臘語(現代)詞形還原器 + +Stanza - Gothic word tokenizer +Stanza - 哥特語分詞器 - -Stanza - Hebrew (Ancient) lemmatizer -Stanza - 希伯來語(古)詞形還原器 + +Stanza - Greek (Ancient) word tokenizer +Stanza - 希臘語(古)分詞器 - -Stanza - Hebrew (Modern) lemmatizer -Stanza - 希伯來語(現代)詞形還原器 + +Stanza - Greek (Modern) word tokenizer +Stanza - 希臘語(現代)分詞器 - -Stanza - Hindi lemmatizer -Stanza - 印地語詞形還原器 + +Stanza - Hebrew (Ancient) word tokenizer +Stanza - 希伯來語(古)分詞器 - -Stanza - Hungarian lemmatizer -Stanza - 匈牙利語詞形還原器 + +Stanza - Hebrew (Modern) word tokenizer +Stanza - 希伯來語(現代)分詞器 - -Stanza - Icelandic lemmatizer -Stanza - 冰島語詞形還原器 + +Stanza - Hindi word tokenizer +Stanza - 印地語分詞器 - -Stanza - Indonesian lemmatizer -Stanza - 印度尼西亞語詞形還原器 + +Stanza - Hungarian word tokenizer +Stanza - 匈牙利語分詞器 - -Stanza - Irish lemmatizer -Stanza - 愛爾蘭語詞形還原器 + +Stanza - Icelandic word tokenizer +Stanza - 冰島語分詞器 - -Stanza - Italian lemmatizer -Stanza - 義大利語詞形還原器 + +Stanza - Indonesian word tokenizer +Stanza - 印度尼西亞語分詞器 - -Stanza - Japanese lemmatizer -Stanza - 日語詞形還原器 + +Stanza - Irish word tokenizer +Stanza - 愛爾蘭語分詞器 - -Stanza - Kazakh lemmatizer -Stanza - 哈薩克語詞形還原器 + +Stanza - Italian word tokenizer +Stanza - 義大利語分詞器 - -Stanza - Korean lemmatizer -Stanza - 韓語詞形還原器 + +Stanza - Japanese word tokenizer +Stanza - 日語分詞器 - -Stanza - Kurdish (Kurmanji) lemmatizer -Stanza - 庫爾德語(庫爾曼吉語)詞形還原器 + +Stanza - Kazakh word tokenizer +Stanza - 哈薩克語分詞器 - -Stanza - Kyrgyz lemmatizer -Stanza - 吉爾吉斯語詞形還原器 + +Stanza - Korean word tokenizer +Stanza - 韓語分詞器 - -Stanza - Latin lemmatizer -Stanza - 拉丁語詞形還原器 + +Stanza - Kurdish (Kurmanji) word tokenizer +Stanza - 庫爾德語(庫爾曼吉語)分詞器 - -Stanza - Latvian lemmatizer -Stanza - 拉脫維亞語詞形還原器 + +Stanza - Kyrgyz word tokenizer +Stanza - 吉爾吉斯語分詞器 - -Stanza - Ligurian lemmatizer -Stanza - 利古里亞語詞形還原器 + +Stanza - Latin word tokenizer +Stanza - 拉丁語分詞器 - -Stanza - Lithuanian lemmatizer -Stanza - 立陶宛語詞形還原器 + +Stanza - Latvian word tokenizer +Stanza - 拉脫維亞語分詞器 - -Stanza - Manx lemmatizer -Stanza - 馬恩語詞形還原器 + +Stanza - Ligurian word tokenizer +Stanza - 利古里亞語分詞器 - -Stanza - Marathi lemmatizer -Stanza - 馬拉地語詞形還原器 + +Stanza - Lithuanian word tokenizer +Stanza - 立陶宛語分詞器 - -Stanza - Nigerian Pidgin lemmatizer -Stanza - 奈及利亞皮欽語詞形還原器 + +Stanza - Maltese word tokenizer +Stanza - 馬耳他語分詞器 - -Stanza - Norwegian Bokmål lemmatizer -Stanza - 書面挪威語詞形還原器 + +Stanza - Manx word tokenizer +Stanza - 馬恩語分詞器 - -Stanza - Norwegian Nynorsk lemmatizer -Stanza - 新挪威語詞形還原器 + +Stanza - Marathi word tokenizer +Stanza - 馬拉地語分詞器 - -Stanza - Persian lemmatizer -Stanza - 波斯語詞形還原器 + +Stanza - Nigerian Pidgin word tokenizer +Stanza - 奈及利亞皮欽語分詞器 - -Stanza - Polish lemmatizer -Stanza - 波蘭語詞形還原器 + +Stanza - Norwegian (Bokmål) word tokenizer +Stanza - 挪威語(書面)分詞器 - -Stanza - Pomak lemmatizer -Stanza - 波馬克語詞形還原器 + +Stanza - Norwegian (Nynorsk) word tokenizer +Stanza - 挪威語(新)分詞器 - -Stanza - Portuguese lemmatizer -Stanza - 葡萄牙語詞形還原器 + +Stanza - Persian word tokenizer +Stanza - 波斯語分詞器 - -Stanza - Romanian lemmatizer -Stanza - 羅馬尼亞語詞形還原器 + +Stanza - Polish word tokenizer +Stanza - 波蘭語分詞器 - -Stanza - Russian lemmatizer -Stanza - 俄語詞形還原器 + +Stanza - Pomak word tokenizer +Stanza - 波馬克語分詞器 - -Stanza - Russian (Old) lemmatizer -Stanza - 俄語(古)詞形還原器 + +Stanza - Portuguese word tokenizer +Stanza - 葡萄牙語分詞器 - -Stanza - Sámi (Northern) lemmatizer -Stanza - 薩米語(北)詞形還原器 + +Stanza - Romanian word tokenizer +Stanza - 羅馬尼亞語分詞器 - -Stanza - Sanskrit lemmatizer -Stanza - 梵語詞形還原器 + +Stanza - Russian word tokenizer +Stanza - 俄語分詞器 - -Stanza - Scottish Gaelic lemmatizer -Stanza - 蘇格蘭蓋爾語詞形還原器 + +Stanza - Russian (Old) word tokenizer +Stanza - 俄語(古)分詞器 - -Stanza - Serbian (Latin) lemmatizer -Stanza - 塞爾維亞語(拉丁)詞形還原器 + +Stanza - Sámi (Northern) word tokenizer +Stanza - 薩米語(北)分詞器 - -Stanza - Slovak lemmatizer -Stanza - 斯洛伐克語詞形還原器 + +Stanza - Sanskrit word tokenizer +Stanza - 梵語分詞器 - -Stanza - Slovenian lemmatizer -Stanza - 斯洛維尼亞語詞形還原器 + +Stanza - Scottish Gaelic word tokenizer +Stanza - 蘇格蘭蓋爾語分詞器 - -Stanza - Sorbian (Upper) lemmatizer -Stanza - 索布語(上)詞形還原器 + +Stanza - Serbian (Latin script) word tokenizer +Stanza - 塞爾維亞語(拉丁文)分詞器 - -Stanza - Spanish lemmatizer -Stanza - 西班牙語詞形還原器 + +Stanza - Sindhi word tokenizer +Stanza - 信德語分詞器 - -Stanza - Swedish lemmatizer -Stanza - 瑞典語詞形還原器 + +Stanza - Slovak word tokenizer +Stanza - 斯洛伐克語分詞器 - -Stanza - Tamil lemmatizer -Stanza - 泰米爾語詞形還原器 + +Stanza - Slovene word tokenizer +Stanza - 斯洛維尼亞語分詞器 - -Stanza - Turkish lemmatizer -Stanza - 土耳其語詞形還原器 + +Stanza - Sorbian (Upper) word tokenizer +Stanza - 索布語(上)分詞器 - -Stanza - Ukrainian lemmatizer -Stanza - 烏克蘭語詞形還原器 + +Stanza - Spanish word tokenizer +Stanza - 西班牙語分詞器 - -Stanza - Urdu lemmatizer -Stanza - 烏爾都語詞形還原器 + +Stanza - Swedish word tokenizer +Stanza - 瑞典語分詞器 - -Stanza - Uyghur lemmatizer -Stanza - 維吾爾語詞形還原器 + +Stanza - Tamil word tokenizer +Stanza - 泰米爾語分詞器 - -Stanza - Welsh lemmatizer -Stanza - 威爾士語詞形還原器 + +Stanza - Telugu word tokenizer +Stanza - 泰盧固語分詞器 - -Stanza - Wolof lemmatizer -Stanza - 沃洛夫語詞形還原器 + +Stanza - Thai word tokenizer +Stanza - 泰語分詞器 - -LaoNLP - Lao stop word list -LaoNLP - 寮國語停用詞表 + +Stanza - Turkish word tokenizer +Stanza - 土耳其語分詞器 - -NLTK - Hebrew (Modern) stop word list -NLTK - 希伯來語(現代)停用詞表 + +Stanza - Ukrainian word tokenizer +Stanza - 烏克蘭語分詞器 - -Stanza - Afrikaans dependency parser -Stanza - 南非語依存分析器 + +Stanza - Urdu word tokenizer +Stanza - 烏爾都語分詞器 - -Stanza - Arabic dependency parser -Stanza - 阿拉伯語依存分析器 + +Stanza - Uyghur word tokenizer +Stanza - 維吾爾語分詞器 - -Stanza - Armenian (Eastern) dependency parser -Stanza - 亞美尼亞語(東)依存分析器 + +Stanza - Vietnamese word tokenizer +Stanza - 越南語分詞器 - -Stanza - Armenian (Western) dependency parser -Stanza - 亞美尼亞語(西)依存分析器 + +Stanza - Welsh word tokenizer +Stanza - 威爾士語分詞器 - -Stanza - Basque dependency parser -Stanza - 巴斯克語依存分析器 + +Stanza - Wolof word tokenizer +Stanza - 沃洛夫語分詞器 - -Stanza - Belarusian dependency parser -Stanza - 白俄羅斯語依存分析器 + +SudachiPy - Japanese word tokenizer (split mode A) +SudachiPy - 日語分詞器(切分模式 A) - -Stanza - Bulgarian dependency parser -Stanza - 保加利亞語依存分析器 + +SudachiPy - Japanese word tokenizer (split mode B) +SudachiPy - 日語分詞器(切分模式 B) - -Stanza - Buryat (Russia) dependency parser -Stanza - 布里亞特語(俄羅斯)依存分析器 + +SudachiPy - Japanese word tokenizer (split mode C) +SudachiPy - 日語分詞器(切分模式 C) - -Stanza - Catalan dependency parser -Stanza - 加泰羅尼亞語依存分析器 + +Underthesea - Vietnamese word tokenizer +Underthesea - 越南語分詞器 - -Stanza - Chinese (Classical) dependency parser -Stanza - 漢語(文言)依存分析器 + +Wordless - Chinese character tokenizer +Wordless - 漢語分字器 - -Stanza - Chinese (Simplified) dependency parser -Stanza - 漢語(簡體)依存分析器 + +Wordless - Japanese kanji tokenizer +Wordless - 日語分字器 - -Stanza - Chinese (Traditional) dependency parser -Stanza - 漢語(繁體)依存分析器 + +NLTK - Legality syllable tokenizer +NLTK - 合法性分音節器 - -Stanza - Church Slavonic (Old) dependency parser -Stanza - 教會斯拉夫語(古)依存分析器 + +NLTK - Sonority sequencing syllable tokenizer +NLTK - 響度順序分音節器 - -Stanza - Coptic dependency parser -Stanza - 科普特語依存分析器 + +Pyphen - Afrikaans syllable tokenizer +Pyphen - 南非語分音節器 - -Stanza - Croatian dependency parser -Stanza - 克羅埃西亞語依存分析器 + +Pyphen - Albanian syllable tokenizer +Pyphen - 阿爾巴尼亞語分音節器 - -Stanza - Czech dependency parser -Stanza - 捷克語依存分析器 + +Pyphen - Basque syllable tokenizer +Pyphen - 巴斯克語分音節器 - -Stanza - Danish dependency parser -Stanza - 丹麥語依存分析器 + +Pyphen - Belarusian syllable tokenizer +Pyphen - 白俄羅斯語分音節器 - -Stanza - Dutch dependency parser -Stanza - 荷蘭語依存分析器 + +Pyphen - Bulgarian syllable tokenizer +Pyphen - 保加利亞語分音節器 - -Stanza - English dependency parser -Stanza - 英語依存分析器 + +Pyphen - Catalan syllable tokenizer +Pyphen - 加泰羅尼亞語分音節器 - -Stanza - Erzya dependency parser -Stanza - 埃爾齊亞語依存分析器 + +Pyphen - Croatian syllable tokenizer +Pyphen - 克羅埃西亞語分音節器 - -Stanza - Estonian dependency parser -Stanza - 愛沙尼亞語依存分析器 + +Pyphen - Czech syllable tokenizer +Pyphen - 捷克語分音節器 - -Stanza - Faroese dependency parser -Stanza - 法羅語依存分析器 + +Pyphen - Danish syllable tokenizer +Pyphen - 丹麥語分音節器 - -Stanza - Finnish dependency parser -Stanza - 芬蘭語依存分析器 + +Pyphen - Dutch syllable tokenizer +Pyphen - 荷蘭語分音節器 - -Stanza - French dependency parser -Stanza - 法語依存分析器 + +Pyphen - English (United Kingdom) syllable tokenizer +Pyphen - 英語(英國)分音節器 - -Stanza - French (Old) dependency parser -Stanza - 法語(古)依存分析器 + +Pyphen - English (United States) syllable tokenizer +Pyphen - 英語(美國)分音節器 - -Stanza - Galician dependency parser -Stanza - 加里西亞語依存分析器 + +Pyphen - Esperanto syllable tokenizer +Pyphen - 世界語分音節器 - -Stanza - German dependency parser -Stanza - 德語依存分析器 + +Pyphen - Estonian syllable tokenizer +Pyphen - 愛沙尼亞語分音節器 - -Stanza - Gothic dependency parser -Stanza - 哥特語依存分析器 + +Pyphen - French syllable tokenizer +Pyphen - 法語分音節器 - -Stanza - Greek (Ancient) dependency parser -Stanza - 希臘語(古)依存分析器 + +Pyphen - Galician syllable tokenizer +Pyphen - 加里西亞語分音節器 - -Stanza - Greek (Modern) dependency parser -Stanza - 希臘語(現代)依存分析器 + +Pyphen - German (Austria) syllable tokenizer +Pyphen - 德語(奧地利)分音節器 - -Stanza - Hebrew (Ancient) dependency parser -Stanza - 希伯來語(古)依存分析器 + +Pyphen - German (Germany) syllable tokenizer +Pyphen - 德語(德國)分音節器 - -Stanza - Hebrew (Modern) dependency parser -Stanza - 希伯來語(現代)依存分析器 + +Pyphen - German (Switzerland) syllable tokenizer +Pyphen - 德語(瑞士)分音節器 - -Stanza - Hindi dependency parser -Stanza - 印地語依存分析器 + +Pyphen - Greek (Modern) syllable tokenizer +Pyphen - 希臘語(現代)分音節器 - -Stanza - Hungarian dependency parser -Stanza - 匈牙利語依存分析器 + +Pyphen - Hungarian syllable tokenizer +Pyphen - 匈牙利語分音節器 - -Stanza - Icelandic dependency parser -Stanza - 冰島語依存分析器 + +Pyphen - Icelandic syllable tokenizer +Pyphen - 冰島語分音節器 - -Stanza - Indonesian dependency parser -Stanza - 印度尼西亞語依存分析器 + +Pyphen - Indonesian syllable tokenizer +Pyphen - 印度尼西亞語分音節器 - -Stanza - Irish dependency parser -Stanza - 愛爾蘭語依存分析器 + +Pyphen - Italian syllable tokenizer +Pyphen - 義大利語分音節器 - -Stanza - Italian dependency parser -Stanza - 義大利語依存分析器 + +Pyphen - Lithuanian syllable tokenizer +Pyphen - 立陶宛語分音節器 - -Stanza - Japanese dependency parser -Stanza - 日語依存分析器 + +Pyphen - Latvian syllable tokenizer +Pyphen - 拉脫維亞語分音節器 - -Stanza - Kazakh dependency parser -Stanza - 哈薩克語依存分析器 + +Pyphen - Mongolian syllable tokenizer +Pyphen - 蒙古語分音節器 - -Stanza - Korean dependency parser -Stanza - 韓語依存分析器 + +Pyphen - Norwegian (Bokmål) syllable tokenizer +Pyphen - 挪威語(書面)分音節器 - -Stanza - Kurdish (Kurmanji) dependency parser -Stanza - 庫爾德語(庫爾曼吉語)依存分析器 + +Pyphen - Norwegian (Nynorsk) syllable tokenizer +Pyphen - 挪威語(新)分音節器 - -Stanza - Kyrgyz dependency parser -Stanza - 吉爾吉斯語依存分析器 + +Pyphen - Polish syllable tokenizer +Pyphen - 波蘭語分音節器 - -Stanza - Latin dependency parser -Stanza - 拉丁語依存分析器 + +Pyphen - Portuguese (Brazil) syllable tokenizer +Pyphen - 葡萄牙語(巴西)分音節器 - -Stanza - Latvian dependency parser -Stanza - 拉脫維亞語依存分析器 - - - -Stanza - Ligurian dependency parser -Stanza - 利古里亞語依存分析器 + +Pyphen - Portuguese (Portugal) syllable tokenizer +Pyphen - 葡萄牙語(葡萄牙)分音節器 - -Stanza - Lithuanian dependency parser -Stanza - 立陶宛語依存分析器 + +Pyphen - Romanian syllable tokenizer +Pyphen - 羅馬尼亞語分音節器 - -Stanza - Maltese dependency parser -Stanza - 馬耳他語依存分析器 + +Pyphen - Russian syllable tokenizer +Pyphen - 俄語分音節器 - -Stanza - Manx dependency parser -Stanza - 馬恩語依存分析器 + +Pyphen - Serbian (Cyrillic script) syllable tokenizer +Pyphen - 塞爾維亞語(西里爾文)分音節器 - -Stanza - Marathi dependency parser -Stanza - 馬拉地語依存分析器 + +Pyphen - Serbian (Latin script) syllable tokenizer +Pyphen - 塞爾維亞語(拉丁文)分音節器 - -Stanza - Nigerian Pidgin dependency parser -Stanza - 奈及利亞皮欽語依存分析器 + +Pyphen - Slovak syllable tokenizer +Pyphen - 斯洛伐克語分音節器 - -Stanza - Norwegian Bokmål dependency parser -Stanza - 書面挪威語依存分析器 + +Pyphen - Slovene syllable tokenizer +Pyphen - 斯洛維尼亞語分音節器 - -Stanza - Norwegian Nynorsk dependency parser -Stanza - 新挪威語依存分析器 + +Pyphen - Spanish syllable tokenizer +Pyphen - 西班牙語分音節器 - -Stanza - Persian dependency parser -Stanza - 波斯語依存分析器 + +Pyphen - Swedish syllable tokenizer +Pyphen - 瑞典語分音節器 - -Stanza - Polish dependency parser -Stanza - 波蘭語依存分析器 + +Pyphen - Telugu syllable tokenizer +Pyphen - 泰盧固語分音節器 - -Stanza - Pomak dependency parser -Stanza - 波馬克語依存分析器 + +Pyphen - Thai syllable tokenizer +Pyphen - 泰語分音節器 - -Stanza - Portuguese dependency parser -Stanza - 葡萄牙語依存分析器 + +Pyphen - Ukrainian syllable tokenizer +Pyphen - 烏克蘭語分音節器 - -Stanza - Romanian dependency parser -Stanza - 羅馬尼亞語依存分析器 + +Pyphen - Zulu syllable tokenizer +Pyphen - 祖魯語分音節器 - -Stanza - Russian dependency parser -Stanza - 俄語依存分析器 + +PyThaiNLP - Syllable dictionary +PyThaiNLP - 音節詞典 - -Stanza - Russian (Old) dependency parser -Stanza - 俄語(古)依存分析器 + +botok - Tibetan part-of-speech tagger +botok - 藏語詞性標註器 - -Stanza - Sámi (Northern) dependency parser -Stanza - 薩米語(北)依存分析器 + +khmer-nltk - Khmer part-of-speech tagger +khmer-nltk - 柬埔寨語詞性標註器 - -Stanza - Sanskrit dependency parser -Stanza - 梵語依存分析器 + +LaoNLP - Yunshan Cup 2020 +LaoNLP - 2020 雲山杯 - -Stanza - Scottish Gaelic dependency parser -Stanza - 蘇格蘭蓋爾語依存分析器 + +NLTK - English perceptron part-of-speech tagger +NLTK - 英語感知機詞性標註器 - -Stanza - Serbian (Latin) dependency parser -Stanza - 塞爾維亞語(拉丁)依存分析器 + +NLTK - Russian perceptron part-of-speech tagger +NLTK - 俄語感知機詞性標註器 - -Stanza - Slovak dependency parser -Stanza - 斯洛伐克語依存分析器 + +pymorphy3 - Morphological analyzer +pymorphy3 - 形態分析器 - -Stanza - Slovenian dependency parser -Stanza - 斯洛維尼亞語依存分析器 + +PyThaiNLP - Perceptron part-of-speech tagger (Blackboard) +PyThaiNLP - 感知機詞性標註器(Blackboard) - -Stanza - Sorbian (Upper) dependency parser -Stanza - 索布語(上)依存分析器 + +PyThaiNLP - Perceptron part-of-speech tagger (ORCHID) +PyThaiNLP - 感知機詞性標註器(ORCHID) - -Stanza - Spanish dependency parser -Stanza - 西班牙語依存分析器 + +PyThaiNLP - Perceptron part-of-speech tagger (PUD) +PyThaiNLP - 感知機詞性標註器(PUD) - -Stanza - Swedish dependency parser -Stanza - 瑞典語依存分析器 + +spaCy - Catalan part-of-speech tagger +spaCy - 加泰羅尼亞語詞性標註器 - -Stanza - Tamil dependency parser -Stanza - 泰米爾語依存分析器 + +spaCy - Chinese part-of-speech tagger +spaCy - 漢語詞性標註器 - -Stanza - Telugu dependency parser -Stanza - 泰盧固語依存分析器 + +spaCy - Croatian part-of-speech tagger +spaCy - 克羅埃西亞語詞性標註器 - -Stanza - Turkish dependency parser -Stanza - 土耳其語依存分析器 + +spaCy - Danish part-of-speech tagger +spaCy - 丹麥語詞性標註器 - -Stanza - Ukrainian dependency parser -Stanza - 烏克蘭語依存分析器 + +spaCy - Dutch part-of-speech tagger +spaCy - 荷蘭語詞性標註器 - -Stanza - Urdu dependency parser -Stanza - 烏爾都語依存分析器 + +spaCy - English part-of-speech tagger +spaCy - 英語詞性標註器 - -Stanza - Uyghur dependency parser -Stanza - 維吾爾語依存分析器 + +spaCy - Finnish part-of-speech tagger +spaCy - 芬蘭語詞性標註器 - -Stanza - Vietnamese dependency parser -Stanza - 越南語依存分析器 + +spaCy - French part-of-speech tagger +spaCy - 法語詞性標註器 - -Stanza - Welsh dependency parser -Stanza - 威爾士語依存分析器 + +spaCy - German part-of-speech tagger +spaCy - 德語詞性標註器 - -Stanza - Wolof dependency parser -Stanza - 沃洛夫語依存分析器 + +spaCy - Greek (Modern) part-of-speech tagger +spaCy - 希臘語(現代)詞性標註器 - -Stanza - Chinese (Simplified) sentiment analyzer -Stanza - 漢語(簡體)情感分析器 + +spaCy - Italian part-of-speech tagger +spaCy - 義大利語詞性標註器 - -Stanza - German sentiment analyzer -Stanza - 德語情感分析器 + +spaCy - Japanese part-of-speech tagger +spaCy - 日語詞性標註器 - -Stanza - English sentiment analyzer -Stanza - 英語情感分析器 + +spaCy - Korean part-of-speech tagger +spaCy - 韓語詞性標註器 - -Stanza - Marathi sentiment analyzer -Stanza - 馬拉地語情感分析器 + +spaCy - Lithuanian part-of-speech tagger +spaCy - 立陶宛語詞性標註器 - -Stanza - Spanish sentiment analyzer -Stanza - 西班牙語情感分析器 + +spaCy - Macedonian part-of-speech tagger +spaCy - 馬其頓語詞性標註器 - -Stanza - Vietnamese sentiment analyzer -Stanza - 越南語情感分析器 + +spaCy - Norwegian (Bokmål) part-of-speech tagger +spaCy - 挪威語(書面)詞性標註器 - - -wl_boxes - -Yes - + +spaCy - Polish part-of-speech tagger +spaCy - 波蘭語詞性標註器 - -No - + +spaCy - Portuguese part-of-speech tagger +spaCy - 葡萄牙語詞性標註器 - -No limit -無限制 + +spaCy - Romanian part-of-speech tagger +spaCy - 羅馬尼亞語詞性標註器 - -Sync -同步 + +spaCy - Russian part-of-speech tagger +spaCy - 俄語詞性標註器 - -From - + +spaCy - Slovene part-of-speech tagger +spaCy - 斯洛維尼亞語詞性標註器 - -to - + +spaCy - Spanish part-of-speech tagger +spaCy - 西班牙語詞性標註器 - -L - + +spaCy - Swedish part-of-speech tagger +spaCy - 瑞典語詞性標註器 - -R - + +spaCy - Ukrainian part-of-speech tagger +spaCy - 烏克蘭語詞性標註器 - - -wl_buttons - -Browse... -瀏覽... + +Stanza - Afrikaans part-of-speech tagger +Stanza - 南非語詞性標註器 - -Pick Color -選擇顏色 + +Stanza - Arabic part-of-speech tagger +Stanza - 阿拉伯語詞性標註器 - -Transparent -透明 + +Stanza - Armenian (Classical) part-of-speech tagger +Stanza - 亞美尼亞語(古)詞性標註器 - -Restore defaults -恢復預設值 + +Stanza - Armenian (Eastern) part-of-speech tagger +Stanza - 亞美尼亞語(東)詞性標註器 - - -wl_checks_work_area - -Missing Search Terms -缺少檢索項 + +Stanza - Armenian (Western) part-of-speech tagger +Stanza - 亞美尼亞語(西)詞性標註器 - - - <div> - You have not specified any search terms yet, please enter one in the input box under "<span style="color: #F00; font-weight: bold;">Search term</span>" first. - </div> - - - <div> - 你還未指定任何檢索項,請先在“<span style="color: #F00; font-weight: bold;">檢索項</span>”下的輸入框中指定一項。 - </div> - + +Stanza - Basque part-of-speech tagger +Stanza - 巴斯克語詞性標註器 - -No Results -無結果 + +Stanza - Belarusian part-of-speech tagger +Stanza - 白俄羅斯語詞性標註器 - - - <div>Data processing has completed successfully, but there are no results to display.</div> - <div>You can change your settings and try again.</div> - - - <div>資料處理操作已完成,但沒有可顯示的結果。</div> - <div>你可以更改你的設定後重試。</div> - + +Stanza - Bulgarian part-of-speech tagger +Stanza - 保加利亞語詞性標註器 - -Language support unavailable! -語種支援不可用! + +Stanza - Buryat (Russia) part-of-speech tagger +Stanza - 布里亞特語(俄羅斯)詞性標註器 - -Missing search terms! -缺少檢索項! + +Stanza - Catalan part-of-speech tagger +Stanza - 加泰羅尼亞語詞性標註器 - -Table generated successfully. -已成功生成表格。 + +Stanza - Chinese (Classical) part-of-speech tagger +Stanza - 漢語(文言)詞性標註器 - -Figure generated successfully. -已成功生成圖表。 + +Stanza - Chinese (Simplified) part-of-speech tagger +Stanza - 漢語(簡體)詞性標註器 - -No results to display. -無結果可供顯示。 + +Stanza - Chinese (Traditional) part-of-speech tagger +Stanza - 漢語(繁體)詞性標註器 - -A fatal error has just occurred! -剛才發生了一個致命錯誤! + +Stanza - Church Slavonic (Old) part-of-speech tagger +Stanza - 教會斯拉夫語(古)詞性標註器 - -Syllable tokenization -分音節 + +Stanza - Coptic part-of-speech tagger +Stanza - 科普特語詞性標註器 - -Part-of-speech tagging -詞性標註 + +Stanza - Croatian part-of-speech tagger +Stanza - 克羅埃西亞語詞性標註器 - -Lemmatization -詞形還原 + +Stanza - Czech part-of-speech tagger +Stanza - 捷克語詞性標註器 - -Dependency parsing -依存分析 + +Stanza - Danish part-of-speech tagger +Stanza - 丹麥語詞性標註器 - -No Language Support -無語種支援 + +Stanza - Dutch part-of-speech tagger +Stanza - 荷蘭語詞性標註器 - -Type of Language Support -語種支援型別 + +Stanza - English part-of-speech tagger +Stanza - 英語詞性標註器 - -File Name -檔名 + +Stanza - English (Old) part-of-speech tagger +Stanza - 英語(古)詞性標註器 - -Language -語種 + +Stanza - Erzya part-of-speech tagger +Stanza - 埃爾齊亞語詞性標註器 - - - <div> - The process cannot be done because language support is unavailable for the following files. Please check your language settings or try again with files of different languages. - </div> - - - <div> - 由於下列檔案缺少語種支援,因此操作無法完成。請檢查你的語種設定或使用其他語種的檔案重試。 - </div> - + +Stanza - Estonian part-of-speech tagger +Stanza - 愛沙尼亞語詞性標註器 - -Model downloaded successfully. -已成功下載模型。 + +Stanza - Faroese part-of-speech tagger +Stanza - 法羅語詞性標註器 - -A network error occurred while downloading the model! -下載模型時發生網路錯誤! + +Stanza - Finnish part-of-speech tagger +Stanza - 芬蘭語詞性標註器 - - -wl_colligation_extractor - -None - + +Stanza - French part-of-speech tagger +Stanza - 法語詞性標註器 - -Within sentence segments -句段內 + +Stanza - French (Old) part-of-speech tagger +Stanza - 法語(古)詞性標註器 - -Within sentences -句子內 + +Stanza - Galician part-of-speech tagger +Stanza - 加里西亞語詞性標註器 - -Within paragraphs -段落內 + +Stanza - German part-of-speech tagger +Stanza - 德語詞性標註器 - - -wl_collocation_extractor - -None - + +Stanza - Gothic part-of-speech tagger +Stanza - 哥特語詞性標註器 - -Within sentence segments -句段內 + +Stanza - Greek (Ancient) part-of-speech tagger +Stanza - 希臘語(古)詞性標註器 - -Within sentences -句子內 + +Stanza - Greek (Modern) part-of-speech tagger +Stanza - 希臘語(現代)詞性標註器 - -Within paragraphs -段落內 + +Stanza - Hebrew (Ancient) part-of-speech tagger +Stanza - 希伯來語(古)詞性標註器 - - -wl_conversion - -Yes - + +Stanza - Hebrew (Modern) part-of-speech tagger +Stanza - 希伯來語(現代)詞性標註器 - -No - + +Stanza - Hindi part-of-speech tagger +Stanza - 印地語詞性標註器 - - -wl_dependency_parsing - -Dependency Graphs Generated Successfully -成功生成依存圖 + +Stanza - Hungarian part-of-speech tagger +Stanza - 匈牙利語詞性標註器 - - - <div>Dependency graphs has been successfully generated and exported under folder: {}</div> - - <div>If the figures are not displayed automatically, you may try opening them manually using web browsers or image viewers installed on your computer, or save copies of them in other locations for later use.</div> - - - <div>已成功生成依存圖並匯出至該資料夾下:{}</div> - - <div>若圖表未自動顯示,你可使用電腦上已安裝的瀏覽器或看圖軟體將其手動開啟,或將圖表副本儲存至其他位置以待後用。</div> - + +Stanza - Icelandic part-of-speech tagger +Stanza - 冰島語詞性標註器 - - -wl_dialogs_errs - -Fatal Error -致命錯誤 + +Stanza - Indonesian part-of-speech tagger +Stanza - 印度尼西亞語詞性標註器 - - - <div>A fatal error has occurred, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>!</div> - - - <div>剛才發生了一個致命錯誤,請<b>將下方錯誤資訊</b>傳送至{}來獲取<b>作者的幫助</b>!</div> - + +Stanza - Irish part-of-speech tagger +Stanza - 愛爾蘭語詞性標註器 - -Network Error -網路錯誤 - + +Stanza - Italian part-of-speech tagger +Stanza - 義大利語詞性標註器 + - - - <div>A network error occurred while downloading the model, please check your internet connections and proxy settings in <b>Menu → Preferences → General → Proxy Settings</b> if you are using a proxy.</div> - <div>If the network issue persists, please <b>send the following error messages</b> to {} in order to <b>contact the author for support</b>.</div> - - - <div>下載模型時發生了網路錯誤,請檢查你的網路連線,如果你使用了代理,那麼請一併檢查<b>選單 → 偏好 → 全域性 → 代理設定</b>中的代理設定。</div> - <div>如果網路問題仍然存在,請<b>將下方錯誤資訊</b>傳送至{}來獲取<b>作者的幫助</b>。</div> - + +Stanza - Japanese part-of-speech tagger +Stanza - 日語詞性標註器 + + + +Stanza - Kazakh part-of-speech tagger +Stanza - 哈薩克語詞性標註器 + + + +Stanza - Korean part-of-speech tagger +Stanza - 韓語詞性標註器 + + + +Stanza - Kurdish (Kurmanji) part-of-speech tagger +Stanza - 庫爾德語(庫爾曼吉語)詞性標註器 + + + +Stanza - Kyrgyz part-of-speech tagger +Stanza - 吉爾吉斯語詞性標註器 + + + +Stanza - Latin part-of-speech tagger +Stanza - 拉丁語詞性標註器 + + + +Stanza - Latvian part-of-speech tagger +Stanza - 拉脫維亞語詞性標註器 + + + +Stanza - Ligurian part-of-speech tagger +Stanza - 利古里亞語詞性標註器 + + + +Stanza - Lithuanian part-of-speech tagger +Stanza - 立陶宛語詞性標註器 + + + +Stanza - Maltese part-of-speech tagger +Stanza - 馬耳他語詞性標註器 + + + +Stanza - Manx part-of-speech tagger +Stanza - 馬恩語詞性標註器 + + + +Stanza - Marathi part-of-speech tagger +Stanza - 馬拉地語詞性標註器 + + + +Stanza - Nigerian Pidgin part-of-speech tagger +Stanza - 奈及利亞皮欽語詞性標註器 + + + +Stanza - Norwegian (Bokmål) part-of-speech tagger +Stanza - 挪威語(書面)詞性標註器 + + + +Stanza - Norwegian (Nynorsk) part-of-speech tagger +Stanza - 挪威語(新)詞性標註器 + + + +Stanza - Persian part-of-speech tagger +Stanza - 波斯語詞性標註器 + + + +Stanza - Polish part-of-speech tagger +Stanza - 波蘭語詞性標註器 + + + +Stanza - Pomak part-of-speech tagger +Stanza - 波馬克語詞性標註器 + + + +Stanza - Portuguese part-of-speech tagger +Stanza - 葡萄牙語詞性標註器 + + + +Stanza - Romanian part-of-speech tagger +Stanza - 羅馬尼亞語詞性標註器 + + + +Stanza - Russian part-of-speech tagger +Stanza - 俄語詞性標註器 + + + +Stanza - Russian (Old) part-of-speech tagger +Stanza - 俄語(古)詞性標註器 + + + +Stanza - Sámi (Northern) part-of-speech tagger +Stanza - 薩米語(北)詞性標註器 + + + +Stanza - Sanskrit part-of-speech tagger +Stanza - 梵語詞性標註器 + + + +Stanza - Scottish Gaelic part-of-speech tagger +Stanza - 蘇格蘭蓋爾語詞性標註器 + + + +Stanza - Serbian (Latin script) part-of-speech tagger +Stanza - 塞爾維亞語(拉丁文)詞性標註器 + + + +Stanza - Sindhi part-of-speech tagger +Stanza - 信德語詞性標註器 + + + +Stanza - Slovak part-of-speech tagger +Stanza - 斯洛伐克語詞性標註器 + + + +Stanza - Slovene part-of-speech tagger +Stanza - 斯洛維尼亞語詞性標註器 + + + +Stanza - Sorbian (Upper) part-of-speech tagger +Stanza - 索布語(上)詞性標註器 + + + +Stanza - Spanish part-of-speech tagger +Stanza - 西班牙語詞性標註器 + + + +Stanza - Swedish part-of-speech tagger +Stanza - 瑞典語詞性標註器 + + + +Stanza - Tamil part-of-speech tagger +Stanza - 泰米爾語詞性標註器 + + + +Stanza - Telugu part-of-speech tagger +Stanza - 泰盧固語詞性標註器 + + + +Stanza - Turkish part-of-speech tagger +Stanza - 土耳其語詞性標註器 + + + +Stanza - Ukrainian part-of-speech tagger +Stanza - 烏克蘭語詞性標註器 + + + +Stanza - Urdu part-of-speech tagger +Stanza - 烏爾都語詞性標註器 + + + +Stanza - Uyghur part-of-speech tagger +Stanza - 維吾爾語詞性標註器 + + + +Stanza - Vietnamese part-of-speech tagger +Stanza - 越南語詞性標註器 + + + +Stanza - Welsh part-of-speech tagger +Stanza - 威爾士語詞性標註器 + + + +Stanza - Wolof part-of-speech tagger +Stanza - 沃洛夫語詞性標註器 + + + +SudachiPy - Japanese part-of-speech tagger +SudachiPy - 日語詞性標註器 + + + +Underthesea - Vietnamese part-of-speech tagger +Underthesea - 越南語詞性標註器 + + + +botok - Tibetan lemmatizer +botok - 藏語詞形還原器 + + + +NLTK - WordNet lemmatizer +NLTK - WordNet 詞形還原器 + + + +simplemma - Albanian lemmatizer +simplemma - 阿爾巴尼亞語詞形還原器 + + + +simplemma - Armenian lemmatizer +simplemma - 亞美尼亞語詞形還原器 + + + +simplemma - Asturian lemmatizer +simplemma - 阿斯圖里亞斯語詞形還原器 + + + +simplemma - Bulgarian lemmatizer +simplemma - 保加利亞語詞形還原器 + + + +simplemma - Catalan lemmatizer +simplemma - 加泰羅尼亞語詞形還原器 + + + +simplemma - Czech lemmatizer +simplemma - 捷克語詞形還原器 + + + +simplemma - Danish lemmatizer +simplemma - 丹麥語詞形還原器 + + + +simplemma - Dutch lemmatizer +simplemma - 荷蘭語詞形還原器 + + + +simplemma - English lemmatizer +simplemma - 英語詞形還原器 + + + +simplemma - English (Middle) lemmatizer +simplemma - 英語(中古)詞形還原器 + + + +simplemma - Estonian lemmatizer +simplemma - 愛沙尼亞語詞形還原器 + + + +simplemma - Finnish lemmatizer +simplemma - 芬蘭語詞形還原器 + + + +simplemma - French lemmatizer +simplemma - 法語詞形還原器 + + + +simplemma - Galician lemmatizer +simplemma - 加里西亞語詞形還原器 + + + +simplemma - Georgian lemmatizer +simplemma - 喬治亞語詞形還原器 + + + +simplemma - German lemmatizer +simplemma - 德語詞形還原器 + + + +simplemma - Greek (Modern) lemmatizer +simplemma - 希臘語(現代)詞形還原器 + + + +simplemma - Hindi lemmatizer +simplemma - 印地語詞形還原器 + + + +simplemma - Hungarian lemmatizer +simplemma - 匈牙利語詞形還原器 + + + +simplemma - Icelandic lemmatizer +simplemma - 冰島語詞形還原器 + + + +simplemma - Indonesian lemmatizer +simplemma - 印度尼西亞語詞形還原器 + + + +simplemma - Irish lemmatizer +simplemma - 愛爾蘭語詞形還原器 + + + +simplemma - Italian lemmatizer +simplemma - 義大利語詞形還原器 + + + +simplemma - Latin lemmatizer +simplemma - 拉丁語詞形還原器 + + + +simplemma - Latvian lemmatizer +simplemma - 拉脫維亞語詞形還原器 + + + +simplemma - Lithuanian lemmatizer +simplemma - 立陶宛語詞形還原器 + + + +simplemma - Luxembourgish lemmatizer +simplemma - 盧森堡語詞形還原器 + + + +simplemma - Macedonian lemmatizer +simplemma - 馬其頓語詞形還原器 + + + +simplemma - Malay lemmatizer +simplemma - 馬來語詞形還原器 + + + +simplemma - Manx lemmatizer +simplemma - 馬恩語詞形還原器 + + + +simplemma - Norwegian (Bokmål) lemmatizer +simplemma - 挪威語(書面)詞形還原器 + + + +simplemma - Norwegian (Nynorsk) lemmatizer +simplemma - 挪威語(新)詞形還原器 + + + +simplemma - Persian lemmatizer +simplemma - 波斯語詞形還原器 + + + +simplemma - Polish lemmatizer +simplemma - 波蘭語詞形還原器 + + + +simplemma - Portuguese lemmatizer +simplemma - 葡萄牙語詞形還原器 + + + +simplemma - Romanian lemmatizer +simplemma - 羅馬尼亞語詞形還原器 + + + +simplemma - Russian lemmatizer +simplemma - 俄語詞形還原器 + + + +simplemma - Sámi (Northern) lemmatizer +simplemma - 薩米語(北)詞形還原器 + + + +simplemma - Scottish Gaelic lemmatizer +simplemma - 蘇格蘭蓋爾語詞形還原器 + + + +simplemma - Serbo-Croatian lemmatizer +simplemma - 塞爾維亞-克羅埃西亞語詞形還原器 + + + +simplemma - Slovak lemmatizer +simplemma - 斯洛伐克語詞形還原器 + + + +simplemma - Slovene lemmatizer +simplemma - 斯洛維尼亞語詞形還原器 + + + +simplemma - Spanish lemmatizer +simplemma - 西班牙語詞形還原器 + + + +simplemma - Swahili lemmatizer +simplemma - 斯瓦西里語詞形還原器 + + + +simplemma - Swedish lemmatizer +simplemma - 瑞典語詞形還原器 + + + +simplemma - Tagalog lemmatizer +simplemma - 他加祿語詞形還原器 + + + +simplemma - Turkish lemmatizer +simplemma - 土耳其語詞形還原器 + + + +simplemma - Ukrainian lemmatizer +simplemma - 烏克蘭語詞形還原器 + + + +simplemma - Welsh lemmatizer +simplemma - 威爾士語詞形還原器 + + + +spaCy - Bengali lemmatizer +spaCy - 孟加拉語詞形還原器 + + + +spaCy - Catalan lemmatizer +spaCy - 加泰羅尼亞語詞形還原器 + + + +spaCy - Croatian lemmatizer +spaCy - 克羅埃西亞語詞形還原器 + + + +spaCy - Czech lemmatizer +spaCy - 捷克語詞形還原器 + + + +spaCy - Danish lemmatizer +spaCy - 丹麥語詞形還原器 + + + +spaCy - Dutch lemmatizer +spaCy - 荷蘭語詞形還原器 + + + +spaCy - English lemmatizer +spaCy - 英語詞形還原器 - - -wl_fig_freqs - -Total -合計 + +spaCy - Finnish lemmatizer +spaCy - 芬蘭語詞形還原器 - -Token -形符 + +spaCy - French lemmatizer +spaCy - 法語詞形還原器 - -N-gram -n 元組 + +spaCy - German lemmatizer +spaCy - 德語詞形還原器 - -Collocate -搭配詞 + +spaCy - Greek (Ancient) lemmatizer +spaCy - 希臘語(古)詞形還原器 - -Keyword -關鍵詞 + +spaCy - Greek (Modern) lemmatizer +spaCy - 希臘語(現代)詞形還原器 - -Reference files -參照檔案 + +spaCy - Hungarian lemmatizer +spaCy - 匈牙利語詞形還原器 - -Line chart -折線圖 + +spaCy - Indonesian lemmatizer +spaCy - 印度尼西亞語詞形還原器 - -Word cloud -詞雲圖 + +spaCy - Irish lemmatizer +spaCy - 愛爾蘭語詞形還原器 - -Network graph -網路圖 + +spaCy - Italian lemmatizer +spaCy - 義大利語詞形還原器 - - -wl_fig_stats - -Total -合計 + +spaCy - Japanese lemmatizer +spaCy - 日語詞形還原器 - -p-value -p 值 + +spaCy - Korean lemmatizer +spaCy - 韓語詞形還原器 - -Line chart -折線圖 + +spaCy - Lithuanian lemmatizer +spaCy - 立陶宛語詞形還原器 - -Token -形符 + +spaCy - Luxembourgish lemmatizer +spaCy - 盧森堡語詞形還原器 - -N-gram -n 元組 + +spaCy - Macedonian lemmatizer +spaCy - 馬其頓語詞形還原器 - -Collocate -搭配詞 + +spaCy - Norwegian (Bokmål) lemmatizer +spaCy - 挪威語(書面)詞形還原器 - -Keyword -關鍵詞 + +spaCy - Persian lemmatizer +spaCy - 波斯語詞形還原器 - -Word cloud -詞雲圖 + +spaCy - Polish lemmatizer +spaCy - 波蘭語詞形還原器 - -Network graph -網路圖 + +spaCy - Portuguese lemmatizer +spaCy - 葡萄牙語詞形還原器 - - -wl_figs - -Frequency -頻數 + +spaCy - Romanian lemmatizer +spaCy - 羅馬尼亞語詞形還原器 - -^[LR][1-9][0-9]*$ -^[左右][1-9][0-9]*$ + +spaCy - Russian lemmatizer +spaCy - 俄語詞形還原器 - -Cumulative Percentage Frequency -累加百分比頻數 + +spaCy - Serbian (Cyrillic script) lemmatizer +spaCy - 塞爾維亞語(西里爾文)詞形還原器 - -Cumulative Frequency -累加頻數 + +spaCy - Slovene lemmatizer +spaCy - 斯洛維尼亞語詞形還原器 - -Percentage Frequency -百分比頻數 + +spaCy - Spanish lemmatizer +spaCy - 西班牙語詞形還原器 - -p-value -p 值 + +spaCy - Swedish lemmatizer +spaCy - 瑞典語詞形還原器 - -Custom -自定義 + +spaCy - Tagalog lemmatizer +spaCy - 他加祿語詞形還原器 - -Monochrome -單色 + +spaCy - Turkish lemmatizer +spaCy - 土耳其語詞形還原器 - -Colormap -色譜 + +spaCy - Ukrainian lemmatizer +spaCy - 烏克蘭語詞形還原器 - - -wl_lists - -New search term -新檢索項 + +spaCy - Urdu lemmatizer +spaCy - 烏爾都語詞形還原器 - -New stop word -新停用詞 + +Stanza - Afrikaans lemmatizer +Stanza - 南非語詞形還原器 - -New item -新列表項 + +Stanza - Arabic lemmatizer +Stanza - 阿拉伯語詞形還原器 - -Add -新增 + +Stanza - Armenian (Classical) lemmatizer +Stanza - 亞美尼亞語(古)詞形還原器 - -Insert -插入 + +Stanza - Armenian (Eastern) lemmatizer +Stanza - 亞美尼亞語(東)詞形還原器 - -Remove -移除 + +Stanza - Armenian (Western) lemmatizer +Stanza - 亞美尼亞語(西)詞形還原器 - -Clear -清空 + +Stanza - Basque lemmatizer +Stanza - 巴斯克語詞形還原器 - -Duplicates Found -發現重複項 + +Stanza - Belarusian lemmatizer +Stanza - 白俄羅斯語詞形還原器 - - - <div>The item that you have just edited already exists in the list, please specify another one!</div> - - - <div>你剛才編輯的列表項已存在於列表中,請另外指定一項!</div> - + +Stanza - Bulgarian lemmatizer +Stanza - 保加利亞語詞形還原器 - -Import -匯入 + +Stanza - Buryat (Russia) lemmatizer +Stanza - 布里亞特語(俄羅斯)詞形還原器 - -Export -匯出 + +Stanza - Catalan lemmatizer +Stanza - 加泰羅尼亞語詞形還原器 - -Import from Files -從檔案匯入 + +Stanza - Chinese (Classical) lemmatizer +Stanza - 漢語(文言)詞形還原器 - -Text files (*.txt) -文字檔案 (*.txt) + +Stanza - Chinese (Simplified) lemmatizer +Stanza - 漢語(簡體)詞形還原器 - -Import Error -匯入時出錯 + +Stanza - Chinese (Traditional) lemmatizer +Stanza - 漢語(繁體)詞形還原器 - - - <div> - An error occurred during import, please check the following files and try again. - </div> - - - <div> - 匯入時發生了一個錯誤,請檢查下列檔案後重試。 - </div> - + +Stanza - Church Slavonic (Old) lemmatizer +Stanza - 教會斯拉夫語(古)詞形還原器 - -Empty file -空檔案 + +Stanza - Coptic lemmatizer +Stanza - 科普特語詞形還原器 - -An error occurred during import! -匯入時發生了一個錯誤! + +Stanza - Croatian lemmatizer +Stanza - 克羅埃西亞語詞形還原器 - -item - + +Stanza - Czech lemmatizer +Stanza - 捷克語詞形還原器 + + + +Stanza - Danish lemmatizer +Stanza - 丹麥語詞形還原器 - -items - + +Stanza - Dutch lemmatizer +Stanza - 荷蘭語詞形還原器 - -{} {} has been successfully imported into the list. -已成功匯入 {} {}至列表中。 + +Stanza - English lemmatizer +Stanza - 英語詞形還原器 - -Export to File -匯出至檔案 + +Stanza - English (Old) lemmatizer +Stanza - 英語(古)詞形還原器 - -Export Completed -匯出完成 + +Stanza - Erzya lemmatizer +Stanza - 埃爾齊亞語詞形還原器 - - - <div>The list has been successfully exported to "{}".</div> - - - <div>已成功匯出列表至“{}”。</div> - + +Stanza - Estonian lemmatizer +Stanza - 愛沙尼亞語詞形還原器 - - -wl_measure_utils - -Absolute frequency -絕對頻數 + +Stanza - Finnish lemmatizer +Stanza - 芬蘭語詞形還原器 - -Relative frequency -相對頻數 + +Stanza - French lemmatizer +Stanza - 法語詞形還原器 - - -wl_measures_lexical_diversity - -Rank-frequency distribution -頻數排序分佈 + +Stanza - French (Old) lemmatizer +Stanza - 法語(古)詞形還原器 - -Frequency spectrum -頻數譜 + +Stanza - Galician lemmatizer +Stanza - 加里西亞語詞形還原器 - - -wl_measures_readability - -Policy one - + +Stanza - German lemmatizer +Stanza - 德語詞形還原器 - -Policy two - + +Stanza - Gothic lemmatizer +Stanza - 哥特語詞形還原器 - -Original -原版 + +Stanza - Greek (Ancient) lemmatizer +Stanza - 希臘語(古)詞形還原器 - -New -新版 + +Stanza - Greek (Modern) lemmatizer +Stanza - 希臘語(現代)詞形還原器 - -Navy -海軍版 + +Stanza - Hebrew (Ancient) lemmatizer +Stanza - 希伯來語(古)詞形還原器 - - -wl_measures_statistical_significance - -Two-tailed -雙尾 + +Stanza - Hebrew (Modern) lemmatizer +Stanza - 希伯來語(現代)詞形還原器 - -Left-tailed -左尾 + +Stanza - Hindi lemmatizer +Stanza - 印地語詞形還原器 - -Right-tailed -右尾 + +Stanza - Hungarian lemmatizer +Stanza - 匈牙利語詞形還原器 - - -wl_misc - -minute - + +Stanza - Icelandic lemmatizer +Stanza - 冰島語詞形還原器 - -minutes - + +Stanza - Indonesian lemmatizer +Stanza - 印度尼西亞語詞形還原器 - -(In {} {} {:.2f} seconds) -(耗時 {} {} {:.2f} 秒) + +Stanza - Irish lemmatizer +Stanza - 愛爾蘭語詞形還原器 - -(In -(耗時 + +Stanza - Italian lemmatizer +Stanza - 義大利語詞形還原器 - - -wl_profiler - -Automated Arabic Readability Index - + +Stanza - Japanese lemmatizer +Stanza - 日語詞形還原器 - -Automated Readability Index - + +Stanza - Kazakh lemmatizer +Stanza - 哈薩克語詞形還原器 - -Coleman-Liau Index - + +Stanza - Korean lemmatizer +Stanza - 韓語詞形還原器 - -Devereaux Readability Index - + +Stanza - Kurdish (Kurmanji) lemmatizer +Stanza - 庫爾德語(庫爾曼吉語)詞形還原器 - -Flesch-Kincaid Grade Level - + +Stanza - Kyrgyz lemmatizer +Stanza - 吉爾吉斯語詞形還原器 - -Flesch Reading Ease - + +Stanza - Latin lemmatizer +Stanza - 拉丁語詞形還原器 - -FORCAST Grade Level - + +Stanza - Latvian lemmatizer +Stanza - 拉脫維亞語詞形還原器 - -Fórmula de Crawford - + +Stanza - Ligurian lemmatizer +Stanza - 利古里亞語詞形還原器 - -Gulpease Index - + +Stanza - Lithuanian lemmatizer +Stanza - 立陶宛語詞形還原器 - -Gunning Fog Index - + +Stanza - Manx lemmatizer +Stanza - 馬恩語詞形還原器 - -Legibilidad μ - + +Stanza - Marathi lemmatizer +Stanza - 馬拉地語詞形還原器 - -Lensear Write - + +Stanza - Nigerian Pidgin lemmatizer +Stanza - 奈及利亞皮欽語詞形還原器 - -Lix - + +Stanza - Norwegian (Bokmål) lemmatizer +Stanza - 挪威語(書面)詞形還原器 - -McAlpine EFLAW Readability Score - + +Stanza - Norwegian (Nynorsk) lemmatizer +Stanza - 挪威語(新)詞形還原器 - -OSMAN - + +Stanza - Persian lemmatizer +Stanza - 波斯語詞形還原器 - -Rix - + +Stanza - Polish lemmatizer +Stanza - 波蘭語詞形還原器 - -SMOG Grade - + +Stanza - Pomak lemmatizer +Stanza - 波馬克語詞形還原器 - -Spache Grade Level - + +Stanza - Portuguese lemmatizer +Stanza - 葡萄牙語詞形還原器 - -Count of Paragraphs -段落數 + +Stanza - Romanian lemmatizer +Stanza - 羅馬尼亞語詞形還原器 - -Count of Paragraphs % -段落數% + +Stanza - Russian lemmatizer +Stanza - 俄語詞形還原器 - -Count of Sentences -句子數 + +Stanza - Russian (Old) lemmatizer +Stanza - 俄語(古)詞形還原器 - -Count of Sentences % -句子數% + +Stanza - Sámi (Northern) lemmatizer +Stanza - 薩米語(北)詞形還原器 - -Count of Sentence Segments -句段數 + +Stanza - Sanskrit lemmatizer +Stanza - 梵語詞形還原器 - -Count of Sentence Segments % -句段數% + +Stanza - Scottish Gaelic lemmatizer +Stanza - 蘇格蘭蓋爾語詞形還原器 - -Count of Tokens -形符數 + +Stanza - Serbian (Latin script) lemmatizer +Stanza - 塞爾維亞語(拉丁文)詞形還原器 - -Count of Tokens % -形符數% + +Stanza - Slovak lemmatizer +Stanza - 斯洛伐克語詞形還原器 - -Count of Types -類符數 + +Stanza - Slovene lemmatizer +Stanza - 斯洛維尼亞語詞形還原器 - -Count of Types % -類符數% + +Stanza - Sorbian (Upper) lemmatizer +Stanza - 索布語(上)詞形還原器 - -Count of Syllables -音節數 + +Stanza - Spanish lemmatizer +Stanza - 西班牙語詞形還原器 - -Count of Syllables % -音節數% + +Stanza - Swedish lemmatizer +Stanza - 瑞典語詞形還原器 - -Count of Characters -字元數 + +Stanza - Tamil lemmatizer +Stanza - 泰米爾語詞形還原器 - -Count of Characters % -字元數% + +Stanza - Turkish lemmatizer +Stanza - 土耳其語詞形還原器 - -Type-token Ratio -類符形符比 + +Stanza - Ukrainian lemmatizer +Stanza - 烏克蘭語詞形還原器 - -Paragraph Length in Sentences (Mean) -段落長(單位:句子)(均值) + +Stanza - Urdu lemmatizer +Stanza - 烏爾都語詞形還原器 - -Paragraph Length in Sentences (Standard Deviation) -段落長(單位:句子)(標準差) + +Stanza - Uyghur lemmatizer +Stanza - 維吾爾語詞形還原器 - -Paragraph Length in Sentences (Variance) -段落長(單位:句子)(方差) + +Stanza - Welsh lemmatizer +Stanza - 威爾士語詞形還原器 - -Paragraph Length in Sentences (Minimum) -段落長(單位:句子)(最小值) + +Stanza - Wolof lemmatizer +Stanza - 沃洛夫語詞形還原器 - -Paragraph Length in Sentences (25th Percentile) -段落長(單位:句子)(25分位數) + +SudachiPy - Japanese lemmatizer +SudachiPy - 日語詞形還原器 - -Paragraph Length in Sentences (Median) -段落長(單位:句子)(中位數) + +LaoNLP - Lao stop word list +LaoNLP - 寮國語停用詞表 - -Paragraph Length in Sentences (75th Percentile) -段落長(單位:句子)(75分位數) + +NLTK - Arabic stop word list +NLTK - 阿拉伯語停用詞表 - -Paragraph Length in Sentences (Maximum) -段落長(單位:句子)(最大值) + +NLTK - Azerbaijani stop word list +NLTK - 亞塞拜然語停用詞表 - -Paragraph Length in Sentences (Range) -段落長(單位:句子)(極差) + +NLTK - Basque stop word list +NLTK - 巴斯克語停用詞表 - -Paragraph Length in Sentences (Interquartile Range) -段落長(單位:句子)(四分位差) + +NLTK - Bengali stop word list +NLTK - 孟加拉語停用詞表 - -Paragraph Length in Sentences (Modes) -段落長(單位:句子)(眾數) + +NLTK - Catalan stop word list +NLTK - 加泰羅尼亞語停用詞表 - -Paragraph Length in Sentence Segments (Mean) -段落長(單位:句段)(均值) + +NLTK - Chinese (Simplified) stop word list +NLTK - 漢語(簡體)停用詞表 - -Paragraph Length in Sentence Segments (Standard Deviation) -段落長(單位:句段)(標準差) + +NLTK - Chinese (Traditional) stop word list +NLTK - 漢語(繁體)停用詞表 - -Paragraph Length in Sentence Segments (Variance) -段落長(單位:句段)(方差) + +NLTK - Danish stop word list +NLTK - 丹麥語停用詞表 - -Paragraph Length in Sentence Segments (Minimum) -段落長(單位:句段)(最小值) + +NLTK - Dutch stop word list +NLTK - 荷蘭語停用詞表 - -Paragraph Length in Sentence Segments (25th Percentile) -段落長(單位:句段)(25分位數) + +NLTK - English stop word list +NLTK - 英語停用詞表 - -Paragraph Length in Sentence Segments (Median) -段落長(單位:句段)(中位數) + +NLTK - Finnish stop word list +NLTK - 芬蘭語停用詞表 - -Paragraph Length in Sentence Segments (75th Percentile) -段落長(單位:句段)(75分位數) + +NLTK - French stop word list +NLTK - 法語停用詞表 - -Paragraph Length in Sentence Segments (Maximum) -段落長(單位:句段)(最大值) + +NLTK - German stop word list +NLTK - 德語停用詞表 - -Paragraph Length in Sentence Segments (Range) -段落長(單位:句段)(極差) + +NLTK - Greek (Modern) stop word list +NLTK - 希臘語(現代)停用詞表 - -Paragraph Length in Sentence Segments (Interquartile Range) -段落長(單位:句段)(四分位差) + +NLTK - Hebrew (Modern) stop word list +NLTK - 希伯來語(現代)停用詞表 - -Paragraph Length in Sentence Segments (Modes) -段落長(單位:句段)(眾數) + +NLTK - Hungarian stop word list +NLTK - 匈牙利語停用詞表 - -Paragraph Length in Tokens (Mean) -段落長(單位:形符)(均值) + +NLTK - Indonesian stop word list +NLTK - 印度尼西亞語停用詞表 - -Paragraph Length in Tokens (Standard Deviation) -段落長(單位:形符)(標準差) + +NLTK - Italian stop word list +NLTK - 義大利語停用詞表 - -Paragraph Length in Tokens (Variance) -段落長(單位:形符)(方差) + +NLTK - Kazakh stop word list +NLTK - 哈薩克語停用詞表 - -Paragraph Length in Tokens (Minimum) -段落長(單位:形符)(最小值) + +NLTK - Nepali stop word list +NLTK - 尼泊爾語停用詞表 - -Paragraph Length in Tokens (25th Percentile) -段落長(單位:形符)(25分位數) + +NLTK - Norwegian (Bokmål) stop word list +NLTK - 挪威語(書面)停用詞表 - -Paragraph Length in Tokens (Median) -段落長(單位:形符)(中位數) + +NLTK - Portuguese stop word list +NLTK - 葡萄牙語停用詞表 - -Paragraph Length in Tokens (75th Percentile) -段落長(單位:形符)(75分位數) + +NLTK - Romanian stop word list +NLTK - 羅馬尼亞語停用詞表 - -Paragraph Length in Tokens (Maximum) -段落長(單位:形符)(最大值) + +NLTK - Russian stop word list +NLTK - 俄語停用詞表 - -Paragraph Length in Tokens (Range) -段落長(單位:形符)(極差) + +NLTK - Slovene stop word list +NLTK - 斯洛維尼亞語停用詞表 - -Paragraph Length in Tokens (Interquartile Range) -段落長(單位:形符)(四分位差) + +NLTK - Spanish stop word list +NLTK - 西班牙語停用詞表 - -Paragraph Length in Tokens (Modes) -段落長(單位:形符)(眾數) + +NLTK - Swedish stop word list +NLTK - 瑞典語停用詞表 - -Sentence Length in Tokens (Mean) -句長(單位:形符)(均值) + +NLTK - Tajik stop word list +NLTK - 塔吉克語停用詞表 - -Sentence Length in Tokens (Standard Deviation) -句長(單位:形符)(標準差) + +NLTK - Turkish stop word list +NLTK - 土耳其語停用詞表 - -Sentence Length in Tokens (Variance) -句長(單位:形符)(方差) + +PyThaiNLP - Thai stop word list +PyThaiNLP - 泰語停用詞表 - -Sentence Length in Tokens (Minimum) -句長(單位:形符)(最小值) + +Custom stop word list +自定義停用詞表 - -Sentence Length in Tokens (25th Percentile) -句長(單位:形符)(25分位數) + +Stanza - Afrikaans dependency parser +Stanza - 南非語依存分析器 + + + +Stanza - Arabic dependency parser +Stanza - 阿拉伯語依存分析器 - -Sentence Length in Tokens (Median) -句長(單位:形符)(中位數) + +Stanza - Armenian (Classical) dependency parser +Stanza - 亞美尼亞語(古)依存分析器 - -Sentence Length in Tokens (75th Percentile) -句長(單位:形符)(75分位數) + +Stanza - Armenian (Eastern) dependency parser +Stanza - 亞美尼亞語(東)依存分析器 - -Sentence Length in Tokens (Maximum) -句長(單位:形符)(最大值) + +Stanza - Armenian (Western) dependency parser +Stanza - 亞美尼亞語(西)依存分析器 - -Sentence Length in Tokens (Range) -句長(單位:形符)(極差) + +Stanza - Basque dependency parser +Stanza - 巴斯克語依存分析器 - -Sentence Length in Tokens (Interquartile Range) -句長(單位:形符)(四分位差) + +Stanza - Belarusian dependency parser +Stanza - 白俄羅斯語依存分析器 - -Sentence Length in Tokens (Modes) -句長(單位:形符)(眾數) + +Stanza - Bulgarian dependency parser +Stanza - 保加利亞語依存分析器 - -Sentence Segment Length in Tokens (Mean) -句段長(單位:形符)(均值) + +Stanza - Buryat (Russia) dependency parser +Stanza - 布里亞特語(俄羅斯)依存分析器 - -Sentence Segment Length in Tokens (Standard Deviation) -句段長(單位:形符)(標準差) + +Stanza - Catalan dependency parser +Stanza - 加泰羅尼亞語依存分析器 - -Sentence Segment Length in Tokens (Variance) -句段長(單位:形符)(方差) + +Stanza - Chinese (Classical) dependency parser +Stanza - 漢語(文言)依存分析器 - -Sentence Segment Length in Tokens (Minimum) -句段長(單位:形符)(最小值) + +Stanza - Chinese (Simplified) dependency parser +Stanza - 漢語(簡體)依存分析器 - -Sentence Segment Length in Tokens (25th Percentile) -句段長(單位:形符)(25分位數) + +Stanza - Chinese (Traditional) dependency parser +Stanza - 漢語(繁體)依存分析器 - -Sentence Segment Length in Tokens (Median) -句段長(單位:形符)(中位數) + +Stanza - Church Slavonic (Old) dependency parser +Stanza - 教會斯拉夫語(古)依存分析器 - -Sentence Segment Length in Tokens (75th Percentile) -句段長(單位:形符)(75分位數) + +Stanza - Coptic dependency parser +Stanza - 科普特語依存分析器 - -Sentence Segment Length in Tokens (Maximum) -句段長(單位:形符)(最大值) + +Stanza - Croatian dependency parser +Stanza - 克羅埃西亞語依存分析器 - -Sentence Segment Length in Tokens (Range) -句段長(單位:形符)(極差) + +Stanza - Czech dependency parser +Stanza - 捷克語依存分析器 - -Sentence Segment Length in Tokens (Interquartile Range) -句段長(單位:形符)(四分位數) + +Stanza - Danish dependency parser +Stanza - 丹麥語依存分析器 - -Sentence Segment Length in Tokens (Modes) -句段長(單位:形符)(眾數) + +Stanza - Dutch dependency parser +Stanza - 荷蘭語依存分析器 - -Token Length in Syllables (Mean) -形符長(單位:音節)(均值) + +Stanza - English dependency parser +Stanza - 英語依存分析器 - -Token Length in Syllables (Standard Deviation) -形符長(單位:音節)(標準差) + +Stanza - English (Old) dependency parser +Stanza - 英語(古)依存分析器 - -Token Length in Syllables (Variance) -形符長(單位:音節)(方差) + +Stanza - Erzya dependency parser +Stanza - 埃爾齊亞語依存分析器 - -Token Length in Syllables (Minimum) -形符長(單位:音節)(最小值) + +Stanza - Estonian dependency parser +Stanza - 愛沙尼亞語依存分析器 - -Token Length in Syllables (25th Percentile) -形符長(單位:音節)(25分位數) + +Stanza - Faroese dependency parser +Stanza - 法羅語依存分析器 - -Token Length in Syllables (Median) -形符長(單位:音節)(中位數) + +Stanza - Finnish dependency parser +Stanza - 芬蘭語依存分析器 - -Token Length in Syllables (75th Percentile) -形符長(單位:音節)(75分位數) + +Stanza - French dependency parser +Stanza - 法語依存分析器 - -Token Length in Syllables (Maximum) -形符長(單位:音節)(最大值) + +Stanza - French (Old) dependency parser +Stanza - 法語(古)依存分析器 - -Token Length in Syllables (Range) -形符長(單位:音節)(極差) + +Stanza - Galician dependency parser +Stanza - 加里西亞語依存分析器 - -Token Length in Syllables (Interquartile Range) -形符長(單位:音節)(四分位差) + +Stanza - German dependency parser +Stanza - 德語依存分析器 - -Token Length in Syllables (Modes) -形符長(單位:音節)(眾數) + +Stanza - Gothic dependency parser +Stanza - 哥特語依存分析器 - -Token Length in Characters (Mean) -形符長(單位:字元)(均值) + +Stanza - Greek (Ancient) dependency parser +Stanza - 希臘語(古)依存分析器 - -Token Length in Characters (Standard Deviation) -形符長(單位:字元)(標準差) + +Stanza - Greek (Modern) dependency parser +Stanza - 希臘語(現代)依存分析器 - -Token Length in Characters (Variance) -形符長(單位:字元)(方差) + +Stanza - Hebrew (Ancient) dependency parser +Stanza - 希伯來語(古)依存分析器 - -Token Length in Characters (Minimum) -形符長(單位:字元)(最小值) + +Stanza - Hebrew (Modern) dependency parser +Stanza - 希伯來語(現代)依存分析器 - -Token Length in Characters (25th Percentile) -形符長(單位:字元)(25分位值) + +Stanza - Hindi dependency parser +Stanza - 印地語依存分析器 - -Token Length in Characters (Median) -形符長(單位:字元)(中位數) + +Stanza - Hungarian dependency parser +Stanza - 匈牙利語依存分析器 - -Token Length in Characters (75th Percentile) -形符長(單位:字元)(75分位數) + +Stanza - Icelandic dependency parser +Stanza - 冰島語依存分析器 - -Token Length in Characters (Maximum) -形符長(單位:字元)(最大值) + +Stanza - Indonesian dependency parser +Stanza - 印度尼西亞語依存分析器 - -Token Length in Characters (Range) -形符長(單位:字元)(極差) + +Stanza - Irish dependency parser +Stanza - 愛爾蘭語依存分析器 - -Token Length in Characters (Interquartile Range) -形符長(單位:字元)(四分位差) + +Stanza - Italian dependency parser +Stanza - 義大利語依存分析器 - -Token Length in Characters (Modes) -形符長(單位:字元)(眾數) + +Stanza - Japanese dependency parser +Stanza - 日語依存分析器 - -Type Length in Syllables (Mean) -類符長(單位:音節)(均值) + +Stanza - Kazakh dependency parser +Stanza - 哈薩克語依存分析器 - -Type Length in Syllables (Standard Deviation) -類符長(單位:音節)(標準差) + +Stanza - Korean dependency parser +Stanza - 韓語依存分析器 - -Type Length in Syllables (Variance) -類符長(單位:音節)(方差) + +Stanza - Kurdish (Kurmanji) dependency parser +Stanza - 庫爾德語(庫爾曼吉語)依存分析器 - -Type Length in Syllables (Minimum) -類符長(單位:音節)(最小值) + +Stanza - Kyrgyz dependency parser +Stanza - 吉爾吉斯語依存分析器 - -Type Length in Syllables (25th Percentile) -類符長(單位:音節)(25分位數) + +Stanza - Latin dependency parser +Stanza - 拉丁語依存分析器 - -Type Length in Syllables (Median) -類符長(單位:音節)(中位數) + +Stanza - Latvian dependency parser +Stanza - 拉脫維亞語依存分析器 - -Type Length in Syllables (75th Percentile) -類符長(單位:音節)(75分位數) + +Stanza - Ligurian dependency parser +Stanza - 利古里亞語依存分析器 - -Type Length in Syllables (Maximum) -類符長(單位:音節)(最大值) + +Stanza - Lithuanian dependency parser +Stanza - 立陶宛語依存分析器 - -Type Length in Syllables (Range) -類符長(單位:音節)(極差) + +Stanza - Maltese dependency parser +Stanza - 馬耳他語依存分析器 - -Type Length in Syllables (Interquartile Range) -類符長(單位:音節)(四分位差) + +Stanza - Manx dependency parser +Stanza - 馬恩語依存分析器 - -Type Length in Syllables (Modes) -類符長(單位:音節)(眾數) + +Stanza - Marathi dependency parser +Stanza - 馬拉地語依存分析器 - -Type Length in Characters (Mean) -類符長(單位:字元)(均值) + +Stanza - Nigerian Pidgin dependency parser +Stanza - 奈及利亞皮欽語依存分析器 - -Type Length in Characters (Standard Deviation) -類符長(單位:字元)(標準差) + +Stanza - Norwegian (Bokmål) dependency parser +Stanza - 挪威語(書面)依存分析器 - -Type Length in Characters (Variance) -類符長(單位:字元)(方差) + +Stanza - Norwegian (Nynorsk) dependency parser +Stanza - 挪威語(新)依存分析器 - -Type Length in Characters (Minimum) -類符長(單位:字元)(最小值) + +Stanza - Persian dependency parser +Stanza - 波斯語依存分析器 - -Type Length in Characters (25th Percentile) -類符長(單位:字元)(25分位數) + +Stanza - Polish dependency parser +Stanza - 波蘭語依存分析器 - -Type Length in Characters (Median) -類符長(單位:字元)(中位數) + +Stanza - Pomak dependency parser +Stanza - 波馬克語依存分析器 - -Type Length in Characters (75th Percentile) -類符長(單位:字元)(75分位數) + +Stanza - Portuguese dependency parser +Stanza - 葡萄牙語依存分析器 - -Type Length in Characters (Maximum) -類符長(單位:字元)(最大值) + +Stanza - Romanian dependency parser +Stanza - 羅馬尼亞語依存分析器 - -Type Length in Characters (Range) -類符長(單位:字元)(極差) + +Stanza - Russian dependency parser +Stanza - 俄語依存分析器 - -Type Length in Characters (Interquartile Range) -類符長(單位:字元)(四分位差) + +Stanza - Russian (Old) dependency parser +Stanza - 俄語(古)依存分析器 - -Type Length in Characters (Modes) -類符長(單位:字元)(眾數) + +Stanza - Sámi (Northern) dependency parser +Stanza - 薩米語(北)依存分析器 - -Syllable Length in Characters (Mean) -音節長(單位:字元)(均值) + +Stanza - Sanskrit dependency parser +Stanza - 梵語依存分析器 - -Syllable Length in Characters (Standard Deviation) -音節長(單位:字元)(標準差) + +Stanza - Scottish Gaelic dependency parser +Stanza - 蘇格蘭蓋爾語依存分析器 - -Syllable Length in Characters (Variance) -音節長(單位:字元)(方差) + +Stanza - Serbian (Latin script) dependency parser +Stanza - 塞爾維亞語(拉丁文)依存分析器 - -Syllable Length in Characters (Minimum) -音節長(單位:字元)(最小值) + +Stanza - Slovak dependency parser +Stanza - 斯洛伐克語依存分析器 - -Syllable Length in Characters (25th Percentile) -音節長(單位:字元)(25分位數) + +Stanza - Slovene dependency parser +Stanza - 斯洛維尼亞語依存分析器 - -Syllable Length in Characters (Median) -音節長(單位:字元)(中位數) + +Stanza - Sorbian (Upper) dependency parser +Stanza - 索布語(上)依存分析器 - -Syllable Length in Characters (75th Percentile) -音節長(單位:字元)(75分位數) + +Stanza - Spanish dependency parser +Stanza - 西班牙語依存分析器 - -Syllable Length in Characters (Maximum) -音節長(單位:字元)(最大值) + +Stanza - Swedish dependency parser +Stanza - 瑞典語依存分析器 - -Syllable Length in Characters (Range) -音節長(單位:字元)(極差) + +Stanza - Tamil dependency parser +Stanza - 泰米爾語依存分析器 - -Syllable Length in Characters (Interquartile Range) -音節長(單位:字元)(四分位差) + +Stanza - Telugu dependency parser +Stanza - 泰盧固語依存分析器 - -Syllable Length in Characters (Modes) -音節長(單位:字元)(眾數) + +Stanza - Turkish dependency parser +Stanza - 土耳其語依存分析器 - -Al-Heeti's Readability Prediction Formula - + +Stanza - Ukrainian dependency parser +Stanza - 烏克蘭語依存分析器 - -Bormuth's Cloze Mean - + +Stanza - Urdu dependency parser +Stanza - 烏爾都語依存分析器 - -Bormuth's Grade Placement - + +Stanza - Uyghur dependency parser +Stanza - 維吾爾語依存分析器 - -Coleman's Readability Formula - + +Stanza - Vietnamese dependency parser +Stanza - 越南語依存分析器 - -Dale-Chall Readability Formula - + +Stanza - Welsh dependency parser +Stanza - 威爾士語依存分析器 - -Danielson-Bryan's Readability Formula - + +Stanza - Wolof dependency parser +Stanza - 沃洛夫語依存分析器 - -Dawood's Readability Formula - + +Stanza - Chinese (Simplified) sentiment analyzer +Stanza - 漢語(簡體)情感分析器 - -Degrees of Reading Power - + +Stanza - German sentiment analyzer +Stanza - 德語情感分析器 - -Dickes-Steiwer Handformel - + +Stanza - English sentiment analyzer +Stanza - 英語情感分析器 - -Easy Listening Formula - + +Stanza - Marathi sentiment analyzer +Stanza - 馬拉地語情感分析器 - -Flesch Reading Ease (Farr-Jenkins-Paterson) - + +Stanza - Spanish sentiment analyzer +Stanza - 西班牙語情感分析器 - -Fórmula de Comprensibilidad de Gutiérrez de Polini - + +Stanza - Vietnamese sentiment analyzer +Stanza - 越南語情感分析器 - -Fucks's Stilcharakteristik - + +Underthesea - Vietnamese sentiment analyzer +Underthesea - 越南語情感分析器 - -Lorge Readability Index - + +VADER - Afrikaans sentiment analyzer +VADER - 南非語情感分析器 - -Luong-Nguyen-Dinh's Readability Formula - + +VADER - Albanian sentiment analyzer +VADER - 阿爾巴尼亞語情感分析器 - -neue Wiener Literaturformeln - + +VADER - Amharic sentiment analyzer +VADER - 阿姆哈拉語情感分析器 - -neue Wiener Sachtextformel - + +VADER - Arabic sentiment analyzer +VADER - 阿拉伯語情感分析器 - -Strain Index - + +VADER - Armenian sentiment analyzer +VADER - 亞美尼亞語情感分析器 - -Tränkle & Bailer's Readability Formula - + +VADER - Assamese sentiment analyzer +VADER - 阿薩姆語情感分析器 - -Tuldava's Text Difficulty - + +VADER - Azerbaijani sentiment analyzer +VADER - 亞塞拜然語情感分析器 - -Wheeler & Smith's Readability Formula - + +VADER - Basque sentiment analyzer +VADER - 巴斯克語情感分析器 - -Corrected TTR - + +VADER - Belarusian sentiment analyzer +VADER - 白俄羅斯語情感分析器 - -Fisher's Index of Diversity - + +VADER - Bengali sentiment analyzer +VADER - 孟加拉語情感分析器 - -Herdan's Vₘ - + +VADER - Bulgarian sentiment analyzer +VADER - 保加利亞語情感分析器 - -HD-D - + +VADER - Burmese sentiment analyzer +VADER - 緬甸語情感分析器 - -LogTTR - + +VADER - Catalan sentiment analyzer +VADER - 加泰羅尼亞語情感分析器 - -Mean Segmental TTR - + +VADER - Chinese (Simplified) sentiment analyzer +VADER - 漢語(簡體)情感分析器 - -Measure of Textual Lexical Diversity - + +VADER - Chinese (Traditional) sentiment analyzer +VADER - 漢語(繁體)情感分析器 - -Moving-average TTR - + +VADER - Croatian sentiment analyzer +VADER - 克羅埃西亞語情感分析器 - -Popescu-Mačutek-Altmann's B₁ - + +VADER - Czech sentiment analyzer +VADER - 捷克語情感分析器 - -Popescu-Mačutek-Altmann's B₂ - + +VADER - Danish sentiment analyzer +VADER - 丹麥語情感分析器 + + + +VADER - Dutch sentiment analyzer +VADER - 荷蘭語情感分析器 - -Popescu-Mačutek-Altmann's B₃ - + +VADER - English sentiment analyzer +VADER - 英語情感分析器 - -Popescu-Mačutek-Altmann's B₄ - + +VADER - Esperanto sentiment analyzer +VADER - 世界語情感分析器 - -Popescu-Mačutek-Altmann's B₅ - + +VADER - Estonian sentiment analyzer +VADER - 愛沙尼亞語情感分析器 - -Popescu's R₁ - + +VADER - Finnish sentiment analyzer +VADER - 芬蘭語情感分析器 - -Popescu's R₂ - + +VADER - French sentiment analyzer +VADER - 法語情感分析器 - -Popescu's R₃ - + +VADER - Galician sentiment analyzer +VADER - 加里西亞語情感分析器 - -Popescu's R₄ - + +VADER - Georgian sentiment analyzer +VADER - 喬治亞語情感分析器 - -Repeat Rate - + +VADER - German sentiment analyzer +VADER - 德語情感分析器 - -Root TTR - + +VADER - Greek (Modern) sentiment analyzer +VADER - 希臘語(現代)情感分析器 - -Shannon Entropy -夏農熵 + +VADER - Gujarati sentiment analyzer +VADER - 古吉拉特語情感分析器 - -Simpson's l - + +VADER - Hebrew (Modern) sentiment analyzer +VADER - 希伯來語(現代)情感分析器 - -vocd-D - + +VADER - Hindi sentiment analyzer +VADER - 印地語情感分析器 - -Yule's Characteristic K - + +VADER - Hungarian sentiment analyzer +VADER - 匈牙利語情感分析器 - -Yule's Index of Diversity - + +VADER - Icelandic sentiment analyzer +VADER - 冰島語情感分析器 - - -wl_results_filter - -Filter Results -篩選結果 + +VADER - Indonesian sentiment analyzer +VADER - 印度尼西亞語情感分析器 - -File to filter: -待篩選檔案: + +VADER - Irish sentiment analyzer +VADER - 愛爾蘭語情感分析器 - -Filter -篩選 + +VADER - Italian sentiment analyzer +VADER - 義大利語情感分析器 - -Close -關閉 + +VADER - Japanese sentiment analyzer +VADER - 日語情感分析器 - -The results in the table has been successfully filtered. -已成功篩選表格中的結果。 + +VADER - Kannada sentiment analyzer +VADER - 卡納達語情感分析器 - -Filtering results... -篩選結果中…… + +VADER - Kazakh sentiment analyzer +VADER - 哈薩克語情感分析器 - - -wl_settings - -Empty Path -空路徑 + +VADER - Khmer sentiment analyzer +VADER - 柬埔寨語情感分析器 - - - <div>The path should not be left empty!</div> - - - <div>路徑不可為空!</div> - + +VADER - Korean sentiment analyzer +VADER - 韓語情感分析器 - -Path not Found -未找到路徑 + +VADER - Kurdish (Kurmanji) sentiment analyzer +VADER - 庫爾德語(庫爾曼吉語)情感分析器 - - - <div>The specified path "{}" could not be found!</div> - <div>Please check your settings and try again.</div> - - - <div>未找到指定的路徑“{}”!</div> - <div>請檢查你的設定後重試。</div> - + +VADER - Kyrgyz sentiment analyzer +VADER - 吉爾吉斯語情感分析器 - -Invalid File Path -無效檔案路徑 + +VADER - Latin sentiment analyzer +VADER - 拉丁語情感分析器 - - - <div>The specified path "{}" should be a file, not a directory!</div> - <div>Please check your settings and try again.</div> - - - <div>指定的路徑“{}”應是一個檔案,而不是目錄!</div> - <div>請檢查你的設定後重試。</div> - + +VADER - Latvian sentiment analyzer +VADER - 拉脫維亞語情感分析器 - -Invalid Directory Path -無效資料夾路徑 + +VADER - Lithuanian sentiment analyzer +VADER - 立陶宛語情感分析器 - - - <div>The specified path "{}" should be a directory, not a file!</div> - <div>Please check your settings and try again.</div> - - - <div>指定的路徑“{}”應是一個目錄,而不是檔案!</div> - <div>請檢查你的設定後重試。</div> - + +VADER - Luganda sentiment analyzer +VADER - 盧幹達語情感分析器 - -Path Not Exist -路徑不存在 + +VADER - Luxembourgish sentiment analyzer +VADER - 盧森堡語情感分析器 - - - {} - <body> - <div>The specified path "{}" does not exist.</div> - <div>Do you want to create the directory?</div> - </body> - - - {} - <body> - <div>指定的路徑“{}”不存在。</div> - <div>你想要新建該資料夾嗎?</div> - </body> - + +VADER - Macedonian sentiment analyzer +VADER - 馬其頓語情感分析器 - - -wl_settings_default - -Observed Files -觀察檔案 + +VADER - Malay sentiment analyzer +VADER - 馬來語情感分析器 - -Profiler -分析工具 + +VADER - Malayalam sentiment analyzer +VADER - 馬拉雅拉姆語情感分析器 - -APA (7th edition) -APA(第七版) + +VADER - Maltese sentiment analyzer +VADER - 馬耳他語情感分析器 - -Counts -計數 + +VADER - Marathi sentiment analyzer +VADER - 馬拉地語情感分析器 - -Token -形符 + +VADER - Meitei (Meitei script) sentiment analyzer +VADER - 曼尼普爾語(曼尼普爾文)情感分析器 - -File -檔案 + +VADER - Mongolian sentiment analyzer +VADER - 蒙古語情感分析器 - -Ascending -升序 + +VADER - Nepali sentiment analyzer +VADER - 尼泊爾語情感分析器 - -Token no. -形符序號 + +VADER - Norwegian (Bokmål) sentiment analyzer +VADER - 挪威語(書面)情感分析器 - -Line chart -折線圖 + +VADER - Odia sentiment analyzer +VADER - 奧里亞語情感分析器 - -Total -合計 + +VADER - Persian sentiment analyzer +VADER - 波斯語情感分析器 - -Frequency -頻數 + +VADER - Polish sentiment analyzer +VADER - 波蘭語情感分析器 - -None - + +VADER - Portuguese sentiment analyzer +VADER - 葡萄牙語情感分析器 - -p-value -p 值 + +VADER - Punjabi (Gurmukhi script) sentiment analyzer +VADER - 旁遮普語(古木基文)情感分析器 - -General -全域性 + +VADER - Romanian sentiment analyzer +VADER - 羅馬尼亞語情感分析器 - -Excel workbooks (*.xlsx) -Excel 工作簿 (*.xlsx) + +VADER - Russian sentiment analyzer +VADER - 俄語情感分析器 - -Non-embedded -非嵌入式 + +VADER - Sanskrit sentiment analyzer +VADER - 梵語情感分析器 - -Header - + +VADER - Scottish Gaelic sentiment analyzer +VADER - 蘇格蘭蓋爾語情感分析器 - -Embedded -嵌入式 + +VADER - Serbian (Cyrillic script) sentiment analyzer +VADER - 塞爾維亞語(西里爾文)情感分析器 - -Part of speech -詞性 + +VADER - Sindhi sentiment analyzer +VADER - 信德語情感分析器 - -Others -其他 + +VADER - Sinhala sentiment analyzer +VADER - 僧伽羅語情感分析器 - -Paragraph -段落 + +VADER - Slovak sentiment analyzer +VADER - 斯洛伐克語情感分析器 - -Sentence -句子 + +VADER - Slovene sentiment analyzer +VADER - 斯洛維尼亞語情感分析器 - -Word -單詞 + +VADER - Spanish sentiment analyzer +VADER - 西班牙語情感分析器 - -Policy one - + +VADER - Swahili sentiment analyzer +VADER - 斯瓦西里語情感分析器 - -New -新版 + +VADER - Swedish sentiment analyzer +VADER - 瑞典語情感分析器 - -Original -原版 + +VADER - Tagalog sentiment analyzer +VADER - 他加祿語情感分析器 - -Rank-frequency distribution -頻數排序分佈 + +VADER - Tajik sentiment analyzer +VADER - 塔吉克語情感分析器 - -Two-tailed -雙尾 + +VADER - Tamil sentiment analyzer +VADER - 泰米爾語情感分析器 - -Relative frequency -相對頻數 + +VADER - Tatar sentiment analyzer +VADER - 韃靼語情感分析器 - -Colormap -色譜 + +VADER - Telugu sentiment analyzer +VADER - 泰盧固語情感分析器 - - -wl_settings_figs - -Square -方形 + +VADER - Thai sentiment analyzer +VADER - 泰語情感分析器 - -Circle -圓形 + +VADER - Tigrinya sentiment analyzer +VADER - 提格雷尼亞語情感分析器 - -Triangle up -朝上三角形 + +VADER - Turkish sentiment analyzer +VADER - 土耳其語情感分析器 - -Triangle right -朝右三角形 + +VADER - Ukrainian sentiment analyzer +VADER - 烏克蘭語情感分析器 - -Triangle down -朝下三角形 + +VADER - Urdu sentiment analyzer +VADER - 烏爾都語情感分析器 - -Triangle left -朝左三角形 + +VADER - Uyghur sentiment analyzer +VADER - 維吾爾語情感分析器 - -Thin diamond -薄菱形 + +VADER - Welsh sentiment analyzer +VADER - 威爾士語情感分析器 - -Pentagon -五角形 + +VADER - Yoruba sentiment analyzer +VADER - 約魯巴語情感分析器 - -Hexagon -六邊形 + +VADER - Zulu sentiment analyzer +VADER - 祖魯語情感分析器 - -Octagon -八邊形 + +None + - -Arc3 + +Average logarithmic distance - -Arc + +Average reduced frequency - -Angle3 + +Average waiting time - -Angle + +Carroll's D₂ - -Bar + +Gries's DP - -Solid -實線 + +Juilland's D + - -Dashed -虛線 + +Lyne's D₃ + - -Dash-dotted -點畫線 + +Rosengren's S + - -Dotted -點線 + +Zhang's Distributional Consistency + - -Curve -圓弧 + +Carroll's Uₘ + - -Curve A -圓弧 A + +Engwall's FM + - -Curve B -圓弧 B + +Juilland's U + - -Curve AB -圓弧 AB + +Kromer's UR + + + + +Rosengren's KF + - -Curve filled A -實心圓弧 A + +Fisher's exact test +費希爾精確檢驗 - -Curve filled B -實心圓弧 B + +Log-likelihood ratio test +對數似然比檢驗 - -Curve filled AB -實心圓弧 AB + +Mann-Whitney U Test +曼惠特尼 U 檢驗 - -Bracket A -方括號 A + +Pearson's chi-squared test +皮爾森卡方檢驗 - -Bracket B -方括號 B + +Student's t-test (1-sample) +學生 t 檢驗(單樣本) - -Bracket AB -方括號 AB + +Student's t-test (2-sample) +學生 t 檢驗(雙樣本) - -Bar AB -橫條 AB + +z-score +z 值 - -Bracket curve -方括號加圓弧 + +z-score (Berry-Rogghe) +z 值(Berry-Rogghe) - -Simple -樸素 + +Cubic association ratio + - -Fancy -絢麗 + +Dice's coefficient +Dice 係數 - -Wedge -楔形 + +Difference coefficient + - -Circular -環形 + +Jaccard index +雅卡爾指數 - -Kamada-Kawai + +Log-frequency biased MD - -Planar -平面 + +Kilgarriff's ratio + - -Random -隨機 + +Log ratio + - -Shell -同心 + +Minimum sensitivity + - -Spring -彈簧 + +Mutual dependency + - -Spectral -譜圖 + +Mutual expectation + - -Spiral -螺旋 + +Mutual information +互資訊 - - -wl_settings_files - -Type -型別 + +Odds ratio +比值比 - -Level -層級 + +Pointwise mutual information +點互資訊 - -Opening Tag -開始標籤 + +Poisson collocation measure + - -Closing Tag -結束標籤 + +Squared phi coefficient +Phi 係數的平方 - -Preview -預覽 + +Zhang's DC + - -Embedded -嵌入式 + +Log-likelihood Ratio +對數似然比 - -Non-embedded -非嵌入式 + +t-statistic +t 值 - -Reset -重置 + +Dice's Coefficient +Dice 係數 - - - <div>Embedded tags must begin with a punctuation mark, e.g. an underscore or a slash!</div> - - - <div>嵌入式標籤必須以一個標點符號,如下劃線或斜槓,開頭!</div> - + +Difference Coefficient + - - - <div>Non-embedded tags must begin and end with a punctuation mark, e.g. brackets!</div> - - - <div>非嵌入式標籤必須以一個標點,如括號,開頭和結尾!</div> - + +Jaccard Index +雅卡爾指數 - -Invalid Opening Tag -無效開始標籤 + +Kilgarriff's Ratio + - -Duplicate Tags -重複標籤 + +Log Ratio + - - - <div>The tag that you have specified already exists in the table!</div> - - - <div>你指定的標籤已存在於表格中!</div> - + +Minimum Sensitivity + - -token -形符 + +Poisson Collocation Measure + - -TAG -標籤 + +VADER - Lao sentiment analyzer +VADER - 寮國語情感分析器 wl_tables - + Search in results 在結果中查詢 - + Number of results: 結果數: - + Number of results: 0 結果數:0 - + Sort results 對結果排序 - + Filter results 篩選結果 - + Export Table 匯出表格 - + Exporting table... 匯出表格中…… - -Export Completed -匯出完成 - - - - - <div>The table has been successfully exported to "{}".</div> - - - <div>已成功匯出表格至“{}”。</div> - - - - -Export Error -匯出時出錯 - - - - - <div>Access to "{}" is denied, please specify another location or close the file and try again.</div> - - - <div>訪問“{}”時被拒絕,請指定其他位置或關閉檔案後重試。</div> - - - - + Add 新增 - + Insert 插入 - + Remove 移除 - + Clear 清空 + + +Generate table +生成表格 + + + +Generate figure +生成圖表 + + + +Export selected cells... +匯出選中單元格... + + + +Export all cells... +匯出所有單元格... + + + +Clear table +清空表格 + + + +Rank +序號 + + + +Clear Table +清空表格 + + + + + <div> + The results in the table have yet been exported. Do you really want to clear the table? + </div> + + + <div> + 表格中的結果尚未匯出。你確認要清空表格嗎? + </div> + + wl_texts - + Paragraph 段落 - + Sentence 句子 - + Word 單詞 @@ -12560,342 +13173,319 @@ Frequency wl_widgets - -Context Settings -上下文設定 - - - + Words 單詞 - + All lowercase 全小寫 - + All uppercase 全大寫 - + Title case 首字母大寫 - + Numerals -數詞 +數字 - + Punctuation marks 標點符號 - + Treat as all lowercase 視為全小寫 - + Filter stop words 過濾停用詞 - + Ignore tags 忽略標籤 - + Use tags only 僅使用標籤 - + Search terms: 檢索項: - + Search term: 檢索項: - + Multi-search mode 多重檢索模式 - + * Use whitespace to delimit multiple tokens * 使用空白來分隔多個形符 - + Match case 匹配大小寫 - + Match whole words 全字匹配 - + Match inflected forms 匹配屈折變化形式 - + Use regular expressions 使用正規表示式 - + Match without tags 匹配時忽略標籤 - + Match tags only 僅匹配標籤 - + * Only 1 token is allowed in each search term -* 每條搜尋項中只允許輸入1個形符 +* 每條檢索項中只允許輸入1個形符 - + Context settings: 上下文設定: - + Settings... 設定... - + Measure of dispersion: -分佈演算法: +分佈計算方法: - + Measure of adjusted frequency: -調整頻數演算法: +調整頻數計算方法: - + Test of statistical significance: 統計顯著性檢驗: - + Measure of Bayes factor: -貝葉斯因子演算法: +貝葉斯因子計算方法: - + Measure of effect size: -效應量演算法: +效應量計算方法: - + Show percentage data 顯示百分比資料 - + Show cumulative data 顯示累加資料 - + Show breakdown by file 顯示各檔案明細 - + Show breakdown by span position 顯示各距位明細 - + Line chart 折線圖 - + Frequency 頻數 - + L - + R - + p-value p 值 - + Bayes factor 貝葉斯因子 - + Graph type: 圖表型別: - + Sort by file: 檔案排序依據: - + Use data: 使用資料: - + Use percentage data 使用百分比資料 - + Use cumulative data 使用累加資料 - + Word cloud 詞雲圖 - + Network graph 網路圖 - + Show 顯示 - + part-of-speech tags 詞性標籤 - + Show lemmas 顯示詞根 - + Collapse punctuation marks 合併標點符號 - + Compact mode 緊湊模式 - + Show each sentence in a separate tab 將句子分別顯示在獨立的標籤頁內 - + coarse-grained 粗分 - + fine-grained 細分 - -From - - - - -to - - - - + Divide each file into 將每個檔案分為 - + sub-sections 等份 - + Absolute frequency 絕對頻數 - + Relative frequency 相對頻數 - + Direction: 方向: - + Two-tailed 雙尾 - + Left-tailed 左尾 - + Right-tailed 右尾 - + Apply lemmatization 應用詞形還原 - + Assign part-of-speech tags 賦詞性標籤 - -wl_wordlist_generator - - -No language support -無語言支援 - - diff --git a/utils/wl_trs_generate_ts_files.py b/utils/wl_trs_generate_ts_files.py index 3bdfe0bb3..ff11ddee1 100644 --- a/utils/wl_trs_generate_ts_files.py +++ b/utils/wl_trs_generate_ts_files.py @@ -31,10 +31,10 @@ files.append(str(file)) # Use "_tr" as a shortcut of QCoreApplication.translate -subprocess.run(['pylupdate5' ,'-verbose' ,'-translate-function', '_tr', *files, '-ts', '../trs/zho_cn.ts'], check = True) +subprocess.run(['pylupdate5' ,'-verbose' ,'-translate-function', '_tr', *files, '-ts', 'trs/zho_cn.ts'], check = True) # Fix HTML entities -with open(r'../trs/zho_cn.ts', 'r', encoding = 'utf_8') as f: +with open(r'trs/zho_cn.ts', 'r', encoding = 'utf_8') as f: contents = f.read() # Replace "&xxxx;" with "&xxxx;" @@ -42,5 +42,5 @@ # Escape non-breaking spaces contents = contents.replace(r' ', r'&nbsp') -with open(r'../trs/zho_cn.ts', 'w', encoding = 'utf_8') as f: +with open(r'trs/zho_cn.ts', 'w', encoding = 'utf_8') as f: f.write(contents) diff --git a/utils/wl_trs_translate.py b/utils/wl_trs_translate.py index 3142889c3..d55f98eeb 100644 --- a/utils/wl_trs_translate.py +++ b/utils/wl_trs_translate.py @@ -24,10 +24,10 @@ # eng_us: [zho_cn] TRS_LANGS = { - ' (Cyrillic script)': ['(西里尔文)'], - ' (Gurmukhi script)': ['(古木基文)'], - ' (Latin script)': ['(拉丁文)'], - ' (Meitei script)': ['(曼尼普尔文)'], + ' (Cyrillic script)': [' (西里尔文)'], + ' (Gurmukhi script)': [' (古木基文)'], + ' (Latin script)': [' (拉丁文)'], + ' (Meitei script)': [' (曼尼普尔文)'], 'Afrikaans': ['南非语'], 'Albanian': ['阿尔巴尼亚语'], @@ -233,6 +233,7 @@ 'Legality syllable tokenizer': ['合法性分音节器'], 'Sonority sequencing syllable tokenizer': ['响度顺序分音节器'], 'syllable tokenizer': ['分音节器'], + 'Syllable dictionary': ['音节词典'], 'word tokenizer (split mode A)': ['分词器(切分模式 A)'], 'word tokenizer (split mode B)': ['分词器(切分模式 B)'], @@ -249,6 +250,7 @@ 'perceptron part-of-speech tagger': ['感知机词性标注器'], 'part-of-speech tagger': ['词性标注器'], 'Morphological analyzer': ['形态分析器'], + 'Yunshan Cup 2020': ['2020 云山杯'], 'lemmatizer': ['词形还原器'], @@ -340,14 +342,15 @@ # Language names if tr == lang: tr = trs[0] - # Encoding names - elif tr.startswith(f'{lang} ('): + elif f'{lang} (' in tr: tr = tr.replace(f'{lang} (', f'{trs[0]} (', 1) + # Script names + elif 'script)' in lang and lang in tr: + tr = tr.replace(lang, trs[0]) + # Encoding names elif tr.startswith(f'{lang}/'): tr = tr.replace(f'{lang}/', f'{trs[0]}/', 1) - elif f'/{lang} (' in tr: - tr = tr.replace(f'/{lang} (', f'/{trs[0]} (', 1) - # Names of language utils + # Language utility names elif f' - {lang} ' in tr: tr = tr.replace(f' - {lang} ', f' - {trs[0]} ', 1) @@ -367,7 +370,7 @@ # NLP utils for util, trs in TRS_NLP_UTILS.items(): - # Only replace language util names after language names or at the end of text + # Only replace language utility names after language names or at the end of text if f' - {util}' in tr or tr.endswith(util): if f' - {util}' in tr: tr = tr.replace(f' - {util}', f' - {trs[0]}', 1) diff --git a/utils/wl_trs_zho_tw.py b/utils/wl_trs_zho_tw.py index 488b9f0dd..08efb93f4 100644 --- a/utils/wl_trs_zho_tw.py +++ b/utils/wl_trs_zho_tw.py @@ -22,7 +22,12 @@ from utils import wl_trs_utils with open('trs/zho_cn.ts', 'r', encoding = 'utf_8') as f: - soup = bs4.BeautifulSoup(f.read(), features = 'lxml') + trs_zho_cn = f.read() + soup = bs4.BeautifulSoup(trs_zho_cn, features = 'lxml') + +# Convert Unix line endings to Windows ones +with open('trs/zho_cn.ts', 'w', encoding = 'utf_8') as f: + f.write(trs_zho_cn) cc = opencc.OpenCC('s2twp') diff --git a/wordless/wl_checks/wl_checks_files.py b/wordless/wl_checks/wl_checks_files.py index 5454a143a..259625d8e 100644 --- a/wordless/wl_checks/wl_checks_files.py +++ b/wordless/wl_checks/wl_checks_files.py @@ -25,8 +25,6 @@ from wordless.wl_dialogs import wl_dialogs_errs from wordless.wl_utils import wl_paths -_tr = QCoreApplication.translate - def check_file_paths_unsupported(main, file_paths): file_paths_ok = [] file_paths_unsupported = [] diff --git a/wordless/wl_colligation_extractor.py b/wordless/wl_colligation_extractor.py index fb446386a..7fedc7986 100644 --- a/wordless/wl_colligation_extractor.py +++ b/wordless/wl_colligation_extractor.py @@ -824,7 +824,7 @@ def update_gui_fig(self, err_msg, colligations_freqs_file, colligations_stats_fi col_text_test_stat = self.main.settings_global['tests_statistical_significance'][test_statistical_significance]['col_text'] col_text_effect_size = self.main.settings_global['measures_effect_size'][measure_effect_size]['col_text'] - if re.search(_tr('Wl_Table_Colligation_Extractor', r'^[LR][0-9]+$'), settings['fig_settings']['use_data']): + if re.search(self.tr(r'^[LR][0-9]+$'), settings['fig_settings']['use_data']): span_positions = ( list(range(settings['generation_settings']['window_left'], 0)) + list(range(1, settings['generation_settings']['window_right'] + 1)) @@ -889,6 +889,7 @@ def update_gui_fig(self, err_msg, colligations_freqs_file, colligations_stats_fi finally: wl_checks_work_area.check_err_fig(self.main, err_msg) +# self.tr() does not work in inherited classes class Wl_Worker_Colligation_Extractor(wl_threading.Wl_Worker): worker_done = pyqtSignal(str, dict, dict) @@ -968,14 +969,14 @@ def run(self): for i, ngram in enumerate(wl_nlp_utils.ngrams(tokens, ngram_size)): # Limit Searching - if settings_limit_searching != _tr('wl_colligation_extractor', 'None'): - if settings_limit_searching == _tr('wl_colligation_extractor', 'Within sentence segments'): + if settings_limit_searching != _tr('Wl_Worker_Colligation_Extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Colligation_Extractor', 'Within sentence segments'): offsets_unit = offsets_sentence_segs len_unit = len_sentence_segs - elif settings_limit_searching == _tr('wl_colligation_extractor', 'Within sentences'): + elif settings_limit_searching == _tr('Wl_Worker_Colligation_Extractor', 'Within sentences'): offsets_unit = offsets_sentences len_unit = len_sentences - elif settings_limit_searching == _tr('wl_colligation_extractor', 'Within paragraphs'): + elif settings_limit_searching == _tr('Wl_Worker_Colligation_Extractor', 'Within paragraphs'): offsets_unit = offsets_paras len_unit = len_paras @@ -990,7 +991,7 @@ def run(self): if window_left < 0 < window_right: # Limit Searching - if settings_limit_searching == _tr('wl_colligation_extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Colligation_Extractor', 'None'): tags_left = text.tags[max(0, i + window_left) : i] tags_right = text.tags[i + ngram_size : i + ngram_size + window_right] else: @@ -1033,7 +1034,7 @@ def run(self): colligations_freqs_file_all[ngram_size][(ngram, collocate)] += 1 elif window_left < 0 and window_right < 0: # Limit Searching - if settings_limit_searching == _tr('wl_colligation_extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Colligation_Extractor', 'None'): tags_left = text.tags[max(0, i + window_left) : max(0, i + window_right + 1)] else: # Span positions (Left) @@ -1056,7 +1057,7 @@ def run(self): colligations_freqs_file_all[ngram_size][(ngram, collocate)] += 1 elif window_left > 0 and window_right > 0: # Limit Searching - if settings_limit_searching == _tr('wl_colligation_extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Colligation_Extractor', 'None'): tags_right = text.tags[i + ngram_size + window_left - 1 : i + ngram_size + window_right] else: # Span positions (Right) diff --git a/wordless/wl_collocation_extractor.py b/wordless/wl_collocation_extractor.py index bf17b9cb9..fa4859be0 100644 --- a/wordless/wl_collocation_extractor.py +++ b/wordless/wl_collocation_extractor.py @@ -821,7 +821,7 @@ def update_gui_fig(self, err_msg, collocations_freqs_files, collocations_stats_f col_text_test_stat = self.main.settings_global['tests_statistical_significance'][test_statistical_significance]['col_text'] col_text_effect_size = self.main.settings_global['measures_effect_size'][measure_effect_size]['col_text'] - if re.search(_tr('Wl_Table_Collocation_Extractor', r'^[LR][0-9]+$'), settings['fig_settings']['use_data']): + if re.search(self.tr(r'^[LR][0-9]+$'), settings['fig_settings']['use_data']): span_positions = ( list(range(settings['generation_settings']['window_left'], 0)) + list(range(1, settings['generation_settings']['window_right'] + 1)) @@ -886,6 +886,7 @@ def update_gui_fig(self, err_msg, collocations_freqs_files, collocations_stats_f finally: wl_checks_work_area.check_err_fig(self.main, err_msg) +# self.tr() does not work in inherited classes class Wl_Worker_Collocation_Extractor(wl_threading.Wl_Worker): worker_done = pyqtSignal(str, dict, dict) @@ -965,14 +966,14 @@ def run(self): for i, ngram in enumerate(wl_nlp_utils.ngrams(tokens, ngram_size)): # Limit Searching - if settings_limit_searching != _tr('wl_collocation_extractor', 'None'): - if settings_limit_searching == _tr('wl_collocation_extractor', 'Within sentence segments'): + if settings_limit_searching != _tr('Wl_Worker_Collocation_Extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Collocation_Extractor', 'Within sentence segments'): offsets_unit = offsets_sentence_segs len_unit = len_sentence_segs - elif settings_limit_searching == _tr('wl_collocation_extractor', 'Within sentences'): + elif settings_limit_searching == _tr('Wl_Worker_Collocation_Extractor', 'Within sentences'): offsets_unit = offsets_sentences len_unit = len_sentences - elif settings_limit_searching == _tr('wl_collocation_extractor', 'Within paragraphs'): + elif settings_limit_searching == _tr('Wl_Worker_Collocation_Extractor', 'Within paragraphs'): offsets_unit = offsets_paras len_unit = len_paras @@ -987,7 +988,7 @@ def run(self): if window_left < 0 < window_right: # Limit Searching - if settings_limit_searching == _tr('wl_collocation_extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Collocation_Extractor', 'None'): tokens_left = tokens[max(0, i + window_left) : i] tokens_right = tokens[i + ngram_size : i + ngram_size + window_right] else: @@ -1030,7 +1031,7 @@ def run(self): collocations_freqs_file_all[ngram_size][(ngram, collocate)] += 1 elif window_left < 0 and window_right < 0: # Limit Searching - if settings_limit_searching == _tr('wl_collocation_extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Collocation_Extractor', 'None'): tokens_left = tokens[max(0, i + window_left) : max(0, i + window_right + 1)] else: # Span positions (Left) @@ -1053,7 +1054,7 @@ def run(self): collocations_freqs_file_all[ngram_size][(ngram, collocate)] += 1 elif window_left > 0 and window_right > 0: # Limit Searching - if settings_limit_searching == _tr('wl_collocation_extractor', 'None'): + if settings_limit_searching == _tr('Wl_Worker_Collocation_Extractor', 'None'): tokens_right = tokens[i + ngram_size + window_left - 1 : i + ngram_size + window_right] else: # Span positions (Right) diff --git a/wordless/wl_concordancer_parallel.py b/wordless/wl_concordancer_parallel.py index 7575f4ad8..a35d7bb15 100644 --- a/wordless/wl_concordancer_parallel.py +++ b/wordless/wl_concordancer_parallel.py @@ -364,8 +364,7 @@ def run(self): text = wl_token_processing.wl_process_tokens_concordancer( self.main, file['text'], token_settings = settings['token_settings'], - search_settings = settings['search_settings'], - preserve_blank_lines = True + search_settings = settings['search_settings'] ) offsets_paras_files.append(text.get_offsets()[0]) diff --git a/wordless/wl_dialogs/wl_dialogs.py b/wordless/wl_dialogs/wl_dialogs.py index 6a4419f95..ca872f72e 100644 --- a/wordless/wl_dialogs/wl_dialogs.py +++ b/wordless/wl_dialogs/wl_dialogs.py @@ -118,6 +118,7 @@ def __init__(self, main, width = 0, height = 0): } ''') +# self.tr() does not work in inherited classes class Wl_Dialog_Info(Wl_Dialog): def __init__(self, main, title, width = 0, height = 0, resizable = True, icon = True, no_buttons = False): # Avoid circular imports @@ -156,7 +157,7 @@ def __init__(self, main, title, width = 0, height = 0, resizable = True, icon = self.wrapper_buttons.layout().setContentsMargins(13, 1, 13, 13) if not no_buttons: - self.button_ok = QPushButton(_tr('Wl_Dialog_Settings', 'OK'), self) + self.button_ok = QPushButton(_tr('wl_dialogs', 'OK'), self) self.button_ok.clicked.connect(self.accept) @@ -194,8 +195,8 @@ def __init__( else: self.text_edit_info = QTextEdit(self) - self.button_copy = QPushButton(_tr('Wl_Dialog_Settings', 'Copy'), self) - self.button_close = QPushButton(_tr('Wl_Dialog_Settings', 'Close'), self) + self.button_copy = QPushButton(_tr('wl_dialogs', 'Copy'), self) + self.button_close = QPushButton(_tr('wl_dialogs', 'Close'), self) self.text_edit_info.setReadOnly(True) @@ -232,8 +233,9 @@ def __init__(self, main, title, width = 0, height = 0, resizable = True): self.wrapper_settings = self.wrapper_info self.button_restore_defaults = wl_buttons.Wl_Button_Restore_Defaults(self, load_settings = self.load_settings) - self.button_save = QPushButton(_tr('Wl_Dialog_Settings', 'Save'), self) - self.button_cancel = QPushButton(_tr('Wl_Dialog_Settings', 'Cancel'), self) + + self.button_save = QPushButton(_tr('wl_dialogs', 'Save'), self) + self.button_cancel = QPushButton(_tr('wl_dialogs', 'Cancel'), self) self.button_save.clicked.connect(self.save_settings) self.button_save.clicked.connect(self.accept) diff --git a/wordless/wl_dialogs/wl_dialogs_misc.py b/wordless/wl_dialogs/wl_dialogs_misc.py index cad8b715c..292053151 100644 --- a/wordless/wl_dialogs/wl_dialogs_misc.py +++ b/wordless/wl_dialogs/wl_dialogs_misc.py @@ -27,6 +27,7 @@ _tr = QCoreApplication.translate +# self.tr() does not work in inherited classes class Wl_Dialog_Progress(wl_dialogs.Wl_Dialog_Frameless): def __init__(self, main, text): super().__init__(main, width = 500) @@ -36,8 +37,8 @@ def __init__(self, main, text): self.timer_time_elapsed = QTimer(self) self.label_progress = wl_labels.Wl_Label_Dialog(text, self, word_wrap = False) - self.label_time_elapsed = wl_labels.Wl_Label_Dialog(self.tr('
Elapsed time: 0:00:00
'), self, word_wrap = False) - self.label_processing = wl_labels.Wl_Label_Dialog(self.tr(''' + self.label_time_elapsed = wl_labels.Wl_Label_Dialog(_tr('Wl_Dialog_Progress', '
Elapsed time: 0:00:00
'), self, word_wrap = False) + self.label_processing = wl_labels.Wl_Label_Dialog(_tr('Wl_Dialog_Progress', '''
Please wait. It may take a few seconds to several minutes for the operation to be completed.
'''), self @@ -56,7 +57,7 @@ def __init__(self, main, text): def update_elapsed_time(self): elapsed_time = datetime.timedelta(seconds = round(time.time() - self.time_start)) - self.label_time_elapsed.set_text(self.tr('
Elapsed time: {}
').format(elapsed_time)) + self.label_time_elapsed.set_text(_tr('Wl_Dialog_Progress', '
Elapsed time: {}
').format(elapsed_time)) def update_progress(self, text): self.label_progress.set_text(text) diff --git a/wordless/wl_figs/wl_figs.py b/wordless/wl_figs/wl_figs.py index 132c68046..83c0c1c95 100644 --- a/wordless/wl_figs/wl_figs.py +++ b/wordless/wl_figs/wl_figs.py @@ -135,7 +135,7 @@ def generate_word_cloud(main, data_file_items, fig_settings): if settings['font_settings']['font'] == 'Droid Sans Mono': font_path = wordcloud.wordcloud.FONT_PATH elif settings['font_settings']['font'] == 'GNU Unifont': - font_path = wl_paths.get_path_data('unifont-15.1.02.otf') + font_path = wl_paths.get_path_data('unifont-15.1.05.otf') elif settings['font_settings']['font'] == _tr('wl_figs', 'Custom'): font_path = settings['font_settings']['font_path'] diff --git a/wordless/wl_figs/wl_figs_stats.py b/wordless/wl_figs/wl_figs_stats.py index 8c528aa04..95a2c336f 100644 --- a/wordless/wl_figs/wl_figs_stats.py +++ b/wordless/wl_figs/wl_figs_stats.py @@ -41,7 +41,7 @@ def wl_fig_stats(main, stat_files_items, tab): } # Collocations / Colligations else: - if fig_settings['graph_type'] == _tr('wl_figs_freqs', 'Network graph'): + if fig_settings['graph_type'] == _tr('wl_figs_stats', 'Network graph'): stat_files_items = { (' '.join(wl_texts.to_display_texts(node)), collocate.display_text()): stat_files for (node, collocate), stat_files in stat_files_items.items() diff --git a/wordless/wl_main.py b/wordless/wl_main.py index 119945d96..23ac6634f 100644 --- a/wordless/wl_main.py +++ b/wordless/wl_main.py @@ -231,7 +231,8 @@ def __init__(self, loading_window): self.loading_window.show_message(self.tr('Loading settings...')) - # Default settings + # Global and default settings + self.settings_global = wl_settings_global.init_settings_global() self.settings_default = wl_settings_default.init_settings_default(self) # Custom settings @@ -250,9 +251,6 @@ def __init__(self, loading_window): with open(file_settings_display_lang, 'rb') as f: self.settings_custom['menu']['prefs']['display_lang'] = pickle.load(f) - # Global settings - self.settings_global = wl_settings_global.SETTINGS_GLOBAL - self.loading_window.show_message(self.tr('Initializing main window...')) # Font diff --git a/wordless/wl_measures/wl_measures_lexical_density_diversity.py b/wordless/wl_measures/wl_measures_lexical_density_diversity.py index 099a59b65..3a615933e 100644 --- a/wordless/wl_measures/wl_measures_lexical_density_diversity.py +++ b/wordless/wl_measures/wl_measures_lexical_density_diversity.py @@ -120,7 +120,11 @@ def lexical_density(main, text): if text.lang in main.settings_global['pos_taggers']: wl_pos_tagging.wl_pos_tag_universal(main, text.get_tokens_flat(), lang = text.lang, tagged = text.tagged) - num_content_words = sum((1 for token in text.get_tokens_flat() if token.content_function == _tr('wl_measures_lexical_density_diversity', 'Content words'))) + num_content_words = sum(( + 1 + for token in text.get_tokens_flat() + if token.content_function == _tr('wl_measures_lexical_density_diversity', 'Content words') + )) num_tokens = text.num_tokens lexical_density = num_content_words / num_tokens if num_tokens else 0 diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py index 1fce24838..3763acd64 100644 --- a/wordless/wl_measures/wl_measures_readability.py +++ b/wordless/wl_measures/wl_measures_readability.py @@ -191,13 +191,13 @@ def rd(main, text): if text.num_words and text.num_sentences and text.num_word_types: variant = main.settings_custom['measures']['readability']['rd']['variant'] - if variant == _tr('wl_measures_readability', 'Policy one'): + if variant == _tr('wl_measures_readability', 'Policy One'): rd = ( 4.41434307 * (text.num_chars_alpha / text.num_words) - 13.46873475 ) - elif variant == _tr('wl_measures_readability', 'Policy two'): + elif variant == _tr('wl_measures_readability', 'Policy Two'): rd = ( 0.97569509 * (text.num_chars_alpha / text.num_words) + 0.37237998 * (text.num_words / text.num_sentences) diff --git a/wordless/wl_nlp/wl_matching.py b/wordless/wl_nlp/wl_matching.py index a344f0088..56f491211 100644 --- a/wordless/wl_nlp/wl_matching.py +++ b/wordless/wl_nlp/wl_matching.py @@ -57,7 +57,7 @@ def get_re_tags(main, tag_type): tags_non_embedded = [] for type_, _, opening_tag, _ in main.settings_custom['files']['tags'][f'{tag_type}_tag_settings']: - if type_ == _tr('get_re_tags', 'Embedded'): + if type_ == _tr('wl_matching', 'Embedded'): tag_start, tag_name = split_tag_embedded(opening_tag) tag_start = re.escape(tag_start) @@ -66,7 +66,7 @@ def get_re_tags(main, tag_type): tags_embedded.append(fr'{tag_start}\S*(?=\s|$)') else: tags_embedded.append(fr'{tag_start}{re.escape(tag_name)}(?=\s|$)') - elif type_ == _tr('get_re_tags', 'Non-embedded'): + elif type_ == _tr('wl_matching', 'Non-embedded'): tag_start, tag_name, tag_end = split_tag_non_embedded(opening_tag) tag_start = re.escape(tag_start) tag_end = re.escape(tag_end) @@ -84,7 +84,7 @@ def get_re_tags_with_tokens(main, tag_type): tags_non_embedded = [] for type_, _, opening_tag, closing_tag in main.settings_custom['files']['tags'][f'{tag_type}_tag_settings']: - if type_ == _tr('get_re_tags_with_tokens', 'Embedded'): + if type_ == _tr('wl_matching', 'Embedded'): tag_start, tag_name = split_tag_embedded(opening_tag) tag_start = re.escape(tag_start) @@ -93,7 +93,7 @@ def get_re_tags_with_tokens(main, tag_type): tags_embedded.append(fr'\S*{tag_start}\S*(?=\s|$)') else: tags_embedded.append(fr'\S*{tag_start}{re.escape(tag_name)}(?=\s|$)') - elif type_ == _tr('get_re_tags_with_tokens', 'Non-embedded'): + elif type_ == _tr('wl_matching', 'Non-embedded'): tag_start, tag_name, tag_end = split_tag_non_embedded(opening_tag) tag_start = re.escape(tag_start) tag_end = re.escape(tag_end) diff --git a/wordless/wl_nlp/wl_sentiment_analysis.py b/wordless/wl_nlp/wl_sentiment_analysis.py index 1daaf3075..03fbb7b3f 100644 --- a/wordless/wl_nlp/wl_sentiment_analysis.py +++ b/wordless/wl_nlp/wl_sentiment_analysis.py @@ -117,7 +117,7 @@ def wl_sentiment_analyze_text(main, inputs, lang, sentiment_analyzer): # Vietnamese elif sentiment_analyzer == 'underthesea_vie': for sentence in inputs: - sentiment = underthesea.sentiment(sentence) + sentiment = underthesea.sentiment(sentence) # pylint: disable=no-member if sentiment == 'positive': sentiment_scores.append(1) diff --git a/wordless/wl_nlp/wl_texts.py b/wordless/wl_nlp/wl_texts.py index 782edaea6..f28c21e3c 100644 --- a/wordless/wl_nlp/wl_texts.py +++ b/wordless/wl_nlp/wl_texts.py @@ -30,6 +30,12 @@ RE_VIE_TOKENIZED = re.compile(r'(? 0: - self.combo_box_freq_position.addItem(self.tr('R') + str(i)) + self.combo_box_freq_position.addItem(self.tr('R{}').format(i)) self.combo_box_freq_position.addItem(self.tr('Total')) diff --git a/wordless/wl_results/wl_results_search.py b/wordless/wl_results/wl_results_search.py index 4bd09f931..f6e484c16 100644 --- a/wordless/wl_results/wl_results_search.py +++ b/wordless/wl_results/wl_results_search.py @@ -153,7 +153,6 @@ def load_settings(self, defaults = False): self.checkbox_match_tags.setChecked(settings['match_tags']) self.search_settings_changed() - self.clr_highlights() def search_settings_changed(self): self.settings['multi_search_mode'] = self.checkbox_multi_search_mode.isChecked() diff --git a/wordless/wl_results/wl_results_sort.py b/wordless/wl_results/wl_results_sort.py index 128b1b847..a8c8be15a 100644 --- a/wordless/wl_results/wl_results_sort.py +++ b/wordless/wl_results/wl_results_sort.py @@ -55,7 +55,7 @@ def __init__(self, main, table): self.main.wl_work_area.currentChanged.connect(self.reject) - self.table_sort = Table_Results_Sort_Conordancer(self, table = self.table) + self.table_sort = Wl_Table_Results_Sort_Conordancer(self, table = self.table) self.button_restore_defaults = wl_buttons.Wl_Button_Restore_Defaults(self, load_settings = self.load_settings) self.button_sort = QPushButton(self.tr('Sort'), self) @@ -141,7 +141,7 @@ def update_gui(self, results): # Sort first by type (strings after floats), then sort numerically or alphabetically elif sorting_col == self.tr('Sentiment'): results.sort(key = lambda item: (str(type(item[3])), item[3]), reverse = reverse) - elif sorting_col == self.tr('Token no.'): + elif sorting_col == self.tr('Token No.'): results.sort(key = lambda item: item[4], reverse = reverse) elif sorting_col == self.tr('File'): results.sort(key = lambda item: item[12], reverse = reverse) @@ -243,13 +243,13 @@ def update_gui(self, results): self.table.enable_updates(emit_signals = False) -class Table_Results_Sort_Conordancer(wl_tables.Wl_Table_Add_Ins_Del_Clr): +class Wl_Table_Results_Sort_Conordancer(wl_tables.Wl_Table_Add_Ins_Del_Clr): def __init__(self, parent, table): super().__init__( parent = parent, headers = [ - _tr('Table_Results_Sort_Conordancer', 'Column'), - _tr('Table_Results_Sort_Conordancer', 'Order') + _tr('Wl_Table_Results_Sort_Conordancer', 'Column'), + _tr('Wl_Table_Results_Sort_Conordancer', 'Order') ], col_edit = 0 ) @@ -260,7 +260,7 @@ def __init__(self, parent, table): self.cols_to_sort_default = [ self.tr('Node'), self.tr('Sentiment'), - self.tr('Token no.'), + self.tr('Token No.'), self.tr('File') ] @@ -354,8 +354,8 @@ def table_item_changed(self): )) # List right context before left context - self.cols_to_sort.extend([self.tr('R') + str(i + 1) for i in range(context_len_right)]) - self.cols_to_sort.extend([self.tr('L') + str(i + 1) for i in range(context_len_left)]) + self.cols_to_sort.extend([self.tr('R{}').format(i + 1) for i in range(context_len_right)]) + self.cols_to_sort.extend([self.tr('L{}').format(i + 1) for i in range(context_len_left)]) self.setItemDelegateForColumn(0, wl_item_delegates.Wl_Item_Delegate_Combo_Box( parent = self, @@ -429,9 +429,9 @@ def _add_row(self, row = None, texts = None): item_sorting_col.setText(sorting_col) else: if cols_left and max(cols_left) < max_left: - item_sorting_col.setText(self.tr('L') + str(cols_left[-1] + 1)) + item_sorting_col.setText(self.tr('L{}').format(cols_left[-1] + 1)) elif cols_right and max(cols_right) < max_right: - item_sorting_col.setText(self.tr('R') + str(cols_right[-1] + 1)) + item_sorting_col.setText(self.tr('R{}').format(cols_right[-1] + 1)) elif cols_right and max(cols_right) == max_right and not cols_left: item_sorting_col.setText(self.tr('L1')) else: diff --git a/wordless/wl_settings/wl_settings.py b/wordless/wl_settings/wl_settings.py index 9ea671b4e..daaf40396 100644 --- a/wordless/wl_settings/wl_settings.py +++ b/wordless/wl_settings/wl_settings.py @@ -286,6 +286,7 @@ def reset_all_settings(self): title = self.tr('Reset All Settings'), text = self.tr('''
Do you want to reset all settings to their defaults?
+
Warning: This will affect settings on all pages!
''') ): @@ -332,6 +333,7 @@ def load(self, node = None): except Exception: # pylint: disable=broad-exception-caught wl_checks_work_area.check_err(self.main, traceback.format_exc()) +# self.tr() does not work in inherited classes class Wl_Settings_Node(QWidget): def __init__(self, main): super().__init__() @@ -341,8 +343,8 @@ def __init__(self, main): def wl_msg_box_path_empty(self): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = _tr('wl_settings', 'Empty Path'), - text = _tr('wl_settings', ''' + title = _tr('Wl_Settings_Node', 'Empty Path'), + text = _tr('Wl_Settings_Node', '''
The path should not be left empty!
'''), ).open() @@ -350,8 +352,8 @@ def wl_msg_box_path_empty(self): def wl_msg_box_path_not_found(self, path): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = _tr('wl_settings', 'Path not Found'), - text = _tr('wl_settings', ''' + title = _tr('Wl_Settings_Node', 'Path not Found'), + text = _tr('Wl_Settings_Node', '''
The specified path "{}" could not be found!
Please check your settings and try again.
''').format(path), @@ -360,8 +362,8 @@ def wl_msg_box_path_not_found(self, path): def wl_msg_box_path_is_dir(self, path): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = _tr('wl_settings', 'Invalid File Path'), - text = _tr('wl_settings', ''' + title = _tr('Wl_Settings_Node', 'Invalid File Path'), + text = _tr('Wl_Settings_Node', '''
The specified path "{}" should be a file, not a directory!
Please check your settings and try again.
''').format(path), @@ -370,8 +372,8 @@ def wl_msg_box_path_is_dir(self, path): def wl_msg_box_path_not_dir(self, path): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = _tr('wl_settings', 'Invalid Directory Path'), - text = _tr('wl_settings', ''' + title = _tr('Wl_Settings_Node', 'Invalid Directory Path'), + text = _tr('Wl_Settings_Node', '''
The specified path "{}" should be a directory, not a file!
Please check your settings and try again.
''').format(path), @@ -433,8 +435,8 @@ def confirm_path(self, line_edit): if not os.path.exists(path): reply = QMessageBox.question( self.main, - _tr('wl_settings', 'Path Not Exist'), - _tr('wl_settings', ''' + _tr('Wl_Settings_Node', 'Path Not Exist'), + _tr('Wl_Settings_Node', ''' {}
The specified path "{}" does not exist.
diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index 18641ef8e..a8a0a2c3f 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -2268,7 +2268,7 @@ def init_settings_default(main): # Settings - Measures - Readability 'readability': { 'rd': { - 'variant': _tr('wl_settings_default', 'Policy one') + 'variant': _tr('wl_settings_default', 'Policy One') }, 'ari': { @@ -2536,7 +2536,7 @@ def init_settings_default(main): settings_default['pos_tagging']['tagsets']['preview_settings']['preview_pos_tagger'] = settings_default['pos_tagging']['pos_tagger_settings']['pos_taggers'].copy() # Custom stop word lists - for lang in wl_settings_global.SETTINGS_GLOBAL['langs'].values(): + for lang in main.settings_global['langs'].values(): lang_code = lang[0] if lang_code not in settings_default['stop_word_lists']['stop_word_list_settings']['stop_word_lists']: diff --git a/wordless/wl_settings/wl_settings_figs.py b/wordless/wl_settings/wl_settings_figs.py index 9be848fe9..62dc9540c 100644 --- a/wordless/wl_settings/wl_settings_figs.py +++ b/wordless/wl_settings/wl_settings_figs.py @@ -83,7 +83,7 @@ } NETWORKX_LAYOUTS = { _tr('wl_settings_figs', 'Circular'): networkx.circular_layout, - _tr('wl_settings_figs', 'Kamada-Kawai'): networkx.kamada_kawai_layout, + 'Kamada-Kawai': networkx.kamada_kawai_layout, _tr('wl_settings_figs', 'Planar'): networkx.planar_layout, _tr('wl_settings_figs', 'Random'): networkx.random_layout, _tr('wl_settings_figs', 'Shell'): networkx.shell_layout, diff --git a/wordless/wl_settings/wl_settings_files.py b/wordless/wl_settings/wl_settings_files.py index 1b8b724f5..8bb980e57 100644 --- a/wordless/wl_settings/wl_settings_files.py +++ b/wordless/wl_settings/wl_settings_files.py @@ -242,16 +242,17 @@ def apply_settings(self): return True +# self.tr() does not work in inherited classes class Wl_Table_Tags(wl_tables.Wl_Table_Add_Ins_Del_Clr): def __init__(self, parent, settings_tags, defaults_row): super().__init__( parent = parent, headers = [ - _tr('wl_settings_files', 'Type'), - _tr('wl_settings_files', 'Level'), - _tr('wl_settings_files', 'Opening Tag'), - _tr('wl_settings_files', 'Closing Tag'), - _tr('wl_settings_files', 'Preview') + _tr('Wl_Table_Tags', 'Type'), + _tr('Wl_Table_Tags', 'Level'), + _tr('Wl_Table_Tags', 'Opening Tag'), + _tr('Wl_Table_Tags', 'Closing Tag'), + _tr('Wl_Table_Tags', 'Preview') ], col_edit = 2 ) @@ -262,14 +263,14 @@ def __init__(self, parent, settings_tags, defaults_row): self.setItemDelegateForColumn(0, wl_item_delegates.Wl_Item_Delegate_Combo_Box( parent = self, items = [ - _tr('wl_settings_files', 'Embedded'), - _tr('wl_settings_files', 'Non-embedded') + _tr('Wl_Table_Tags', 'Embedded'), + _tr('Wl_Table_Tags', 'Non-embedded') ] )) self.setItemDelegateForColumn(3, wl_item_delegates.Wl_Item_Delegate_Uneditable(self)) self.setItemDelegateForColumn(4, wl_item_delegates.Wl_Item_Delegate_Uneditable(self)) - self.button_reset = QPushButton(_tr('wl_settings_files', 'Reset'), self) + self.button_reset = QPushButton(_tr('Wl_Table_Tags', 'Reset'), self) self.button_reset.clicked.connect(lambda: self.reset_table()) # pylint: disable=unnecessary-lambda @@ -281,21 +282,21 @@ def item_changed(self, item): # pylint: disable=arguments-differ item_opening_tag = self.model().item(row, 2) # Opening Tag - if self.model().item(row, 0).text() == _tr('wl_settings_files', 'Embedded'): + if self.model().item(row, 0).text() == _tr('Wl_Table_Tags', 'Embedded'): re_validation = re.search(r'^([^\w\s]|_)+\S*$', item_opening_tag.text()) - warning_text = _tr('wl_settings_files', ''' + warning_text = _tr('Wl_Table_Tags', '''
Embedded tags must begin with a punctuation mark, e.g. an underscore or a slash!
''') else: re_validation = re.search(r'^([^\w\s]|_)+\S*([^\w\s]|_)+$', item_opening_tag.text()) - warning_text = _tr('wl_settings_files', ''' + warning_text = _tr('Wl_Table_Tags', '''
Non-embedded tags must begin and end with a punctuation mark, e.g. brackets!
''') if re_validation is None: wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = _tr('wl_settings_files', 'Invalid Opening Tag'), + title = _tr('Wl_Table_Tags', 'Invalid Opening Tag'), text = warning_text ).exec_() @@ -312,8 +313,8 @@ def item_changed(self, item): # pylint: disable=arguments-differ if row != item.row() and self.model().item(row, 2).text() == item.text(): wl_msg_boxes.Wl_Msg_Box_Warning( self.main, - title = _tr('wl_settings_files', 'Duplicate Tags'), - text = _tr('wl_settings_files', ''' + title = _tr('Wl_Table_Tags', 'Duplicate Tags'), + text = _tr('Wl_Table_Tags', '''
The tag that you have specified already exists in the table!
''') ).exec_() @@ -337,24 +338,24 @@ def item_changed(self, item): # pylint: disable=arguments-differ preview = self.model().item(row, 4) # Closing Tag & Preview - if type_text == _tr('wl_settings_files', 'Embedded'): + if type_text == _tr('Wl_Table_Tags', 'Embedded'): if wl_matching.split_tag_embedded(opening_tag_text)[1] == '*': opening_tag_text = opening_tag_text.replace('*', self.tr('TAG')) - closing_tag.setText('N/A') - preview.setText(_tr('wl_settings_files', 'token') + opening_tag_text) - elif type_text == _tr('wl_settings_files', 'Non-embedded'): + closing_tag.setText(_tr('Wl_Table_Tags', 'N/A')) + preview.setText(_tr('Wl_Table_Tags', 'token') + opening_tag_text) + elif type_text == _tr('Wl_Table_Tags', 'Non-embedded'): # Add a "/" before the first non-punctuation character tag_start, tag_name, tag_end = wl_matching.split_tag_non_embedded(opening_tag_text) closing_tag.setText(f'{tag_start}/{tag_name}{tag_end}') if self.settings_tags == 'body_tag_settings' and tag_name == '*': - opening_tag_text = opening_tag_text.replace('*', _tr('wl_settings_files', 'TAG')) - closing_tag_text = self.model().item(row, 3).text().replace('*', _tr('wl_settings_files', 'TAG')) - preview.setText(opening_tag_text + _tr('wl_settings_files', 'token') + closing_tag_text) + opening_tag_text = opening_tag_text.replace('*', _tr('Wl_Table_Tags', 'TAG')) + closing_tag_text = self.model().item(row, 3).text().replace('*', _tr('Wl_Table_Tags', 'TAG')) + preview.setText(opening_tag_text + _tr('Wl_Table_Tags', 'token') + closing_tag_text) else: - preview.setText(opening_tag_text + _tr('wl_settings_files', 'token') + self.model().item(row, 3).text()) + preview.setText(opening_tag_text + _tr('Wl_Table_Tags', 'token') + self.model().item(row, 3).text()) self.enable_updates() diff --git a/wordless/wl_settings/wl_settings_global.py b/wordless/wl_settings/wl_settings_global.py index 32400b7b6..48f1c343a 100644 --- a/wordless/wl_settings/wl_settings_global.py +++ b/wordless/wl_settings/wl_settings_global.py @@ -28,3920 +28,3924 @@ _tr = QCoreApplication.translate -SETTINGS_GLOBAL = { - # Language names should be always capitalized - 'langs': { - _tr('wl_settings_global', 'Afrikaans'): ['afr', 'af'], - _tr('wl_settings_global', 'Albanian'): ['sqi', 'sq'], - _tr('wl_settings_global', 'Amharic'): ['amh', 'am'], - _tr('wl_settings_global', 'Arabic'): ['ara', 'ar'], - _tr('wl_settings_global', 'Armenian (Classical)'): ['xcl', 'xcl'], - _tr('wl_settings_global', 'Armenian (Eastern)'): ['hye', 'hy'], - _tr('wl_settings_global', 'Armenian (Western)'): ['hyw', 'hyw'], - _tr('wl_settings_global', 'Assamese'): ['asm', 'as'], - _tr('wl_settings_global', 'Asturian'): ['ast', 'ast'], - _tr('wl_settings_global', 'Azerbaijani'): ['aze', 'az'], - _tr('wl_settings_global', 'Basque'): ['eus', 'eu'], - _tr('wl_settings_global', 'Belarusian'): ['bel', 'be'], - _tr('wl_settings_global', 'Bengali'): ['ben', 'bn'], - _tr('wl_settings_global', 'Bulgarian'): ['bul', 'bg'], - _tr('wl_settings_global', 'Burmese'): ['mya', 'my'], - _tr('wl_settings_global', 'Buryat (Russia)'): ['bxr', 'bxr'], - _tr('wl_settings_global', 'Catalan'): ['cat', 'ca'], - _tr('wl_settings_global', 'Chinese (Classical)'): ['lzh', 'lzh'], - _tr('wl_settings_global', 'Chinese (Simplified)'): ['zho_cn', 'zh_cn'], - _tr('wl_settings_global', 'Chinese (Traditional)'): ['zho_tw', 'zh_tw'], - _tr('wl_settings_global', 'Church Slavonic (Old)'): ['chu', 'cu'], - _tr('wl_settings_global', 'Coptic'): ['cop', 'cop'], - _tr('wl_settings_global', 'Croatian'): ['hrv', 'hr'], - _tr('wl_settings_global', 'Czech'): ['ces', 'cs'], - _tr('wl_settings_global', 'Danish'): ['dan', 'da'], - _tr('wl_settings_global', 'Dutch'): ['nld', 'nl'], - _tr('wl_settings_global', 'English (Middle)'): ['enm', 'enm'], - _tr('wl_settings_global', 'English (Old)'): ['ang', 'ang'], - _tr('wl_settings_global', 'English (United Kingdom)'): ['eng_gb', 'en_gb'], - _tr('wl_settings_global', 'English (United States)'): ['eng_us', 'en_us'], - _tr('wl_settings_global', 'Erzya'): ['myv', 'myv'], - _tr('wl_settings_global', 'Esperanto'): ['epo', 'eo'], - _tr('wl_settings_global', 'Estonian'): ['est', 'et'], - _tr('wl_settings_global', 'Faroese'): ['fao', 'fo'], - _tr('wl_settings_global', 'Finnish'): ['fin', 'fi'], - _tr('wl_settings_global', 'French'): ['fra', 'fr'], - _tr('wl_settings_global', 'French (Old)'): ['fro', 'fro'], - _tr('wl_settings_global', 'Galician'): ['glg', 'gl'], - _tr('wl_settings_global', 'Georgian'): ['kat', 'ka'], - _tr('wl_settings_global', 'German (Austria)'): ['deu_at', 'de_at'], - _tr('wl_settings_global', 'German (Germany)'): ['deu_de', 'de_de'], - _tr('wl_settings_global', 'German (Switzerland)'): ['deu_ch', 'de_ch'], - _tr('wl_settings_global', 'Gothic'): ['got', 'got'], - _tr('wl_settings_global', 'Greek (Ancient)'): ['grc', 'grc'], - _tr('wl_settings_global', 'Greek (Modern)'): ['ell', 'el'], - _tr('wl_settings_global', 'Gujarati'): ['guj', 'gu'], - _tr('wl_settings_global', 'Hebrew (Ancient)'): ['hbo', 'hbo'], - _tr('wl_settings_global', 'Hebrew (Modern)'): ['heb', 'he'], - _tr('wl_settings_global', 'Hindi'): ['hin', 'hi'], - _tr('wl_settings_global', 'Hungarian'): ['hun', 'hu'], - _tr('wl_settings_global', 'Icelandic'): ['isl', 'is'], - _tr('wl_settings_global', 'Indonesian'): ['ind', 'id'], - _tr('wl_settings_global', 'Irish'): ['gle', 'ga'], - _tr('wl_settings_global', 'Italian'): ['ita', 'it'], - _tr('wl_settings_global', 'Japanese'): ['jpn', 'ja'], - _tr('wl_settings_global', 'Kannada'): ['kan', 'kn'], - _tr('wl_settings_global', 'Kazakh'): ['kaz', 'kk'], - _tr('wl_settings_global', 'Khmer'): ['khm', 'km'], - _tr('wl_settings_global', 'Korean'): ['kor', 'ko'], - _tr('wl_settings_global', 'Kurdish (Kurmanji)'): ['kmr', 'kmr'], - _tr('wl_settings_global', 'Kyrgyz'): ['kir', 'ky'], - _tr('wl_settings_global', 'Lao'): ['lao', 'lo'], - _tr('wl_settings_global', 'Latin'): ['lat', 'la'], - _tr('wl_settings_global', 'Latvian'): ['lav', 'lv'], - _tr('wl_settings_global', 'Ligurian'): ['lij', 'lij'], - _tr('wl_settings_global', 'Lithuanian'): ['lit', 'lt'], - _tr('wl_settings_global', 'Luganda'): ['lug', 'lg'], - _tr('wl_settings_global', 'Luxembourgish'): ['ltz', 'lb'], - _tr('wl_settings_global', 'Macedonian'): ['mkd', 'mk'], - _tr('wl_settings_global', 'Malay'): ['msa', 'ms'], - _tr('wl_settings_global', 'Malayalam'): ['mal', 'ml'], - _tr('wl_settings_global', 'Maltese'): ['mlt', 'mt'], - _tr('wl_settings_global', 'Manx'): ['glv', 'gv'], - _tr('wl_settings_global', 'Marathi'): ['mar', 'mr'], - _tr('wl_settings_global', 'Meitei (Meitei script)'): ['mni_mtei', 'mni_mtei'], - _tr('wl_settings_global', 'Mongolian'): ['mon', 'mn'], - _tr('wl_settings_global', 'Nepali'): ['nep', 'ne'], - _tr('wl_settings_global', 'Nigerian Pidgin'): ['pcm', 'pcm'], - _tr('wl_settings_global', 'Norwegian (Bokmål)'): ['nob', 'nb'], - _tr('wl_settings_global', 'Norwegian (Nynorsk)'): ['nno', 'nn'], - _tr('wl_settings_global', 'Odia'): ['ori', 'or'], - _tr('wl_settings_global', 'Persian'): ['fas', 'fa'], - _tr('wl_settings_global', 'Polish'): ['pol', 'pl'], - _tr('wl_settings_global', 'Pomak'): ['qpm', 'qpm'], - _tr('wl_settings_global', 'Portuguese (Brazil)'): ['por_br', 'pt_br'], - _tr('wl_settings_global', 'Portuguese (Portugal)'): ['por_pt', 'pt_pt'], - _tr('wl_settings_global', 'Punjabi (Gurmukhi script)'): ['pan_guru', 'pa_guru'], - _tr('wl_settings_global', 'Romanian'): ['ron', 'ro'], - _tr('wl_settings_global', 'Russian'): ['rus', 'ru'], - _tr('wl_settings_global', 'Russian (Old)'): ['orv', 'orv'], - _tr('wl_settings_global', 'Sámi (Northern)'): ['sme', 'se'], - _tr('wl_settings_global', 'Sanskrit'): ['san', 'sa'], - _tr('wl_settings_global', 'Scottish Gaelic'): ['gla', 'gd'], - _tr('wl_settings_global', 'Serbian (Cyrillic script)'): ['srp_cyrl', 'sr_cyrl'], - _tr('wl_settings_global', 'Serbian (Latin script)'): ['srp_latn', 'sr_latn'], - _tr('wl_settings_global', 'Sindhi'): ['snd', 'sd'], - _tr('wl_settings_global', 'Sinhala'): ['sin', 'si'], - _tr('wl_settings_global', 'Slovak'): ['slk', 'sk'], - _tr('wl_settings_global', 'Slovene'): ['slv', 'sl'], - _tr('wl_settings_global', 'Sorbian (Lower)'): ['dsb', 'dsb'], - _tr('wl_settings_global', 'Sorbian (Upper)'): ['hsb', 'hsb'], - _tr('wl_settings_global', 'Spanish'): ['spa', 'es'], - _tr('wl_settings_global', 'Swahili'): ['swa', 'sw'], - _tr('wl_settings_global', 'Swedish'): ['swe', 'sv'], - _tr('wl_settings_global', 'Tagalog'): ['tgl', 'tl'], - _tr('wl_settings_global', 'Tajik'): ['tgk', 'tg'], - _tr('wl_settings_global', 'Tamil'): ['tam', 'ta'], - _tr('wl_settings_global', 'Tatar'): ['tat', 'tt'], - _tr('wl_settings_global', 'Telugu'): ['tel', 'te'], - _tr('wl_settings_global', 'Tetun (Dili)'): ['tdt', 'tdt'], - _tr('wl_settings_global', 'Thai'): ['tha', 'th'], - _tr('wl_settings_global', 'Tibetan'): ['bod', 'bo'], - _tr('wl_settings_global', 'Tigrinya'): ['tir', 'ti'], - _tr('wl_settings_global', 'Tswana'): ['tsn', 'tn'], - _tr('wl_settings_global', 'Turkish'): ['tur', 'tr'], - _tr('wl_settings_global', 'Ukrainian'): ['ukr', 'uk'], - _tr('wl_settings_global', 'Urdu'): ['urd', 'ur'], - _tr('wl_settings_global', 'Uyghur'): ['uig', 'ug'], - _tr('wl_settings_global', 'Vietnamese'): ['vie', 'vi'], - _tr('wl_settings_global', 'Welsh'): ['cym', 'cy'], - _tr('wl_settings_global', 'Wolof'): ['wol', 'wo'], - _tr('wl_settings_global', 'Yoruba'): ['yor', 'yo'], - _tr('wl_settings_global', 'Zulu'): ['zul', 'zu'], - - _tr('wl_settings_global', 'Other languages'): ['other', 'other'] - }, - - # Language and geographical names should be always capitalized - # Case of encoding names are preserved - 'encodings': { - _tr('wl_settings_global', 'All languages (UTF-8 without BOM)'): 'utf_8', - _tr('wl_settings_global', 'All languages (UTF-8 with BOM)'): 'utf_8_sig', - _tr('wl_settings_global', 'All languages (UTF-16 with BOM)'): 'utf_16', - _tr('wl_settings_global', 'All languages (UTF-16BE without BOM)'): 'utf_16_be', - _tr('wl_settings_global', 'All languages (UTF-16LE without BOM)'): 'utf_16_le', - _tr('wl_settings_global', 'All languages (UTF-32 with BOM)'): 'utf_32', - _tr('wl_settings_global', 'All languages (UTF-32BE without BOM)'): 'utf_32_be', - _tr('wl_settings_global', 'All languages (UTF-32LE without BOM)'): 'utf_32_le', - _tr('wl_settings_global', 'All languages (UTF-7)'): 'utf_7', - - _tr('wl_settings_global', 'Arabic (CP720)'): 'cp720', - _tr('wl_settings_global', 'Arabic (CP864)'): 'cp864', - _tr('wl_settings_global', 'Arabic (ISO-8859-6)'): 'iso8859_6', - _tr('wl_settings_global', 'Arabic (Mac OS Arabic)'): 'mac_arabic', - _tr('wl_settings_global', 'Arabic (Windows-1256)'): 'cp1256', - - _tr('wl_settings_global', 'Baltic languages (CP775)'): 'cp775', - _tr('wl_settings_global', 'Baltic languages (ISO-8859-13)'): 'iso8859_13', - _tr('wl_settings_global', 'Baltic languages (Windows-1257)'): 'cp1257', - - _tr('wl_settings_global', 'Celtic languages (ISO-8859-14)'): 'iso8859_14', - - _tr('wl_settings_global', 'Chinese (GB18030)'): 'gb18030', - _tr('wl_settings_global', 'Chinese (GBK)'): 'gbk', - - _tr('wl_settings_global', 'Chinese (Simplified) (GB2312)'): 'gb2312', - _tr('wl_settings_global', 'Chinese (Simplified) (HZ)'): 'hz', - - _tr('wl_settings_global', 'Chinese (Traditional) (Big-5)'): 'big5', - _tr('wl_settings_global', 'Chinese (Traditional) (Big5-HKSCS)'): 'big5hkscs', - _tr('wl_settings_global', 'Chinese (Traditional) (CP950)'): 'cp950', - - _tr('wl_settings_global', 'Croatian (Mac OS Croatian)'): 'mac_croatian', - - _tr('wl_settings_global', 'Cyrillic (CP855)'): 'cp855', - _tr('wl_settings_global', 'Cyrillic (CP866)'): 'cp866', - _tr('wl_settings_global', 'Cyrillic (ISO-8859-5)'): 'iso8859_5', - _tr('wl_settings_global', 'Cyrillic (Mac OS Cyrillic)'): 'mac_cyrillic', - _tr('wl_settings_global', 'Cyrillic (Windows-1251)'): 'cp1251', - - _tr('wl_settings_global', 'English (ASCII)'): 'ascii', - _tr('wl_settings_global', 'English (EBCDIC 037)'): 'cp037', - _tr('wl_settings_global', 'English (CP437)'): 'cp437', - - _tr('wl_settings_global', 'European (HP Roman-8)'): 'hp_roman8', - - _tr('wl_settings_global', 'European (Central) (CP852)'): 'cp852', - _tr('wl_settings_global', 'European (Central) (ISO-8859-2)'): 'iso8859_2', - _tr('wl_settings_global', 'European (Central) (Mac OS Central European)'): 'mac_latin2', - _tr('wl_settings_global', 'European (Central) (Windows-1250)'): 'cp1250', - - _tr('wl_settings_global', 'European (Northern) (ISO-8859-4)'): 'iso8859_4', - - _tr('wl_settings_global', 'European (Southern) (ISO-8859-3)'): 'iso8859_3', - - _tr('wl_settings_global', 'European (Southeastern) (ISO-8859-16)'): 'iso8859_16', - - _tr('wl_settings_global', 'European (Western) (EBCDIC 500)'): 'cp500', - _tr('wl_settings_global', 'European (Western) (CP850)'): 'cp850', - _tr('wl_settings_global', 'European (Western) (CP858)'): 'cp858', - _tr('wl_settings_global', 'European (Western) (CP1140)'): 'cp1140', - _tr('wl_settings_global', 'European (Western) (ISO-8859-1)'): 'latin_1', - _tr('wl_settings_global', 'European (Western) (ISO-8859-15)'): 'iso8859_15', - _tr('wl_settings_global', 'European (Western) (Mac OS Roman)'): 'mac_roman', - _tr('wl_settings_global', 'European (Western) (Windows-1252)'): 'cp1252', - - _tr('wl_settings_global', 'French (CP863)'): 'cp863', - - _tr('wl_settings_global', 'German (EBCDIC 273)'): 'cp273', - - _tr('wl_settings_global', 'Greek (CP737)'): 'cp737', - _tr('wl_settings_global', 'Greek (CP869)'): 'cp869', - _tr('wl_settings_global', 'Greek (CP875)'): 'cp875', - _tr('wl_settings_global', 'Greek (ISO-8859-7)'): 'iso8859_7', - _tr('wl_settings_global', 'Greek (Mac OS Greek)'): 'mac_greek', - _tr('wl_settings_global', 'Greek (Windows-1253)'): 'cp1253', - - _tr('wl_settings_global', 'Hebrew (CP856)'): 'cp856', - _tr('wl_settings_global', 'Hebrew (CP862)'): 'cp862', - _tr('wl_settings_global', 'Hebrew (EBCDIC 424)'): 'cp424', - _tr('wl_settings_global', 'Hebrew (ISO-8859-8)'): 'iso8859_8', - _tr('wl_settings_global', 'Hebrew (Windows-1255)'): 'cp1255', - - _tr('wl_settings_global', 'Icelandic (CP861)'): 'cp861', - _tr('wl_settings_global', 'Icelandic (Mac OS Icelandic)'): 'mac_iceland', - - _tr('wl_settings_global', 'Japanese (CP932)'): 'cp932', - _tr('wl_settings_global', 'Japanese (EUC-JP)'): 'euc_jp', - _tr('wl_settings_global', 'Japanese (EUC-JIS-2004)'): 'euc_jis_2004', - _tr('wl_settings_global', 'Japanese (EUC-JISx0213)'): 'euc_jisx0213', - _tr('wl_settings_global', 'Japanese (ISO-2022-JP)'): 'iso2022_jp', - _tr('wl_settings_global', 'Japanese (ISO-2022-JP-1)'): 'iso2022_jp_1', - _tr('wl_settings_global', 'Japanese (ISO-2022-JP-2)'): 'iso2022_jp_2', - _tr('wl_settings_global', 'Japanese (ISO-2022-JP-2004)'): 'iso2022_jp_2004', - _tr('wl_settings_global', 'Japanese (ISO-2022-JP-3)'): 'iso2022_jp_3', - _tr('wl_settings_global', 'Japanese (ISO-2022-JP-EXT)'): 'iso2022_jp_ext', - _tr('wl_settings_global', 'Japanese (Shift_JIS)'): 'shift_jis', - _tr('wl_settings_global', 'Japanese (Shift_JIS-2004)'): 'shift_jis_2004', - _tr('wl_settings_global', 'Japanese (Shift_JISx0213)'): 'shift_jisx0213', - - _tr('wl_settings_global', 'Kazakh (KZ-1048)'): 'kz1048', - _tr('wl_settings_global', 'Kazakh (PTCP154)'): 'ptcp154', - - _tr('wl_settings_global', 'Korean (EUC-KR)'): 'euc_kr', - _tr('wl_settings_global', 'Korean (ISO-2022-KR)'): 'iso2022_kr', - _tr('wl_settings_global', 'Korean (JOHAB)'): 'johab', - _tr('wl_settings_global', 'Korean (UHC)'): 'cp949', - - _tr('wl_settings_global', 'Nordic languages (CP865)'): 'cp865', - _tr('wl_settings_global', 'Nordic languages (ISO-8859-10)'): 'iso8859_10', - - _tr('wl_settings_global', 'Persian/Urdu (Mac OS Farsi)'): 'mac_farsi', - - _tr('wl_settings_global', 'Portuguese (CP860)'): 'cp860', - - _tr('wl_settings_global', 'Romanian (Mac OS Romanian)'): 'mac_romanian', - - _tr('wl_settings_global', 'Russian (KOI8-R)'): 'koi8_r', - - _tr('wl_settings_global', 'Tajik (KOI8-T)'): 'koi8_t', - - _tr('wl_settings_global', 'Thai (CP874)'): 'cp874', - _tr('wl_settings_global', 'Thai (ISO-8859-11)'): 'iso8859_11', - _tr('wl_settings_global', 'Thai (TIS-620)'): 'tis_620', - - _tr('wl_settings_global', 'Turkish (CP857)'): 'cp857', - _tr('wl_settings_global', 'Turkish (EBCDIC 1026)'): 'cp1026', - _tr('wl_settings_global', 'Turkish (ISO-8859-9)'): 'iso8859_9', - _tr('wl_settings_global', 'Turkish (Mac OS Turkish)'): 'mac_turkish', - _tr('wl_settings_global', 'Turkish (Windows-1254)'): 'cp1254', - - _tr('wl_settings_global', 'Ukrainian (CP1125)'): 'cp1125', - _tr('wl_settings_global', 'Ukrainian (KOI8-U)'): 'koi8_u', - - _tr('wl_settings_global', 'Urdu (CP1006)'): 'cp1006', - - _tr('wl_settings_global', 'Vietnamese (CP1258)'): 'cp1258', - }, - - # Names of file types are always pluralized but not capitalized - 'file_types': { - 'files': [ - _tr('wl_settings_global', 'CSV files (*.csv)'), - _tr('wl_settings_global', 'Excel workbooks (*.xlsx)'), - _tr('wl_settings_global', 'HTML pages (*.htm; *.html)'), - _tr('wl_settings_global', 'Lyrics files (*.lrc)'), - _tr('wl_settings_global', 'PDF files (*.pdf)'), - _tr('wl_settings_global', 'PowerPoint presentations (*.pptx)'), - _tr('wl_settings_global', 'Text files (*.txt)'), - _tr('wl_settings_global', 'Translation memory files (*.tmx)'), - _tr('wl_settings_global', 'Word documents (*.docx)'), - _tr('wl_settings_global', 'XML files (*.xml)'), - _tr('wl_settings_global', 'All files (*.*)') - ], - - 'exp_tables': [ - _tr('wl_settings_global', 'CSV files (*.csv)'), - _tr('wl_settings_global', 'Excel workbooks (*.xlsx)') - ], - 'exp_tables_concordancer': [ - _tr('wl_settings_global', 'CSV files (*.csv)'), - _tr('wl_settings_global', 'Excel workbooks (*.xlsx)'), - _tr('wl_settings_global', 'Word documents (*.docx)') - ], - 'exp_tables_concordancer_zapping': [ - _tr('wl_settings_global', 'Word documents (*.docx)') - ], - - 'fonts': [ - _tr('wl_settings_global', 'OpenType fonts (*.otf)'), - _tr('wl_settings_global', 'TrueType fonts (*.ttf)'), - _tr('wl_settings_global', 'All files (*.*)') - ], - - # All image formats supported by Pillow - # Reference: https://stackoverflow.com/questions/71112986/retrieve-a-list-of-supported-read-file-extensions-formats - 'masks': [ - _tr('wl_settings_global', 'Blizzard mipmap format (*.blp)'), - _tr('wl_settings_global', 'Windows bitmaps (*.bmp)'), - _tr('wl_settings_global', 'Window cursor files (*.cur)'), - _tr('wl_settings_global', 'Multi-page PCX files (*.dcx)'), - _tr('wl_settings_global', 'DirectDraw surface (*.dds)'), - _tr('wl_settings_global', 'Device-independent bitmaps (*.dib)'), - _tr('wl_settings_global', 'Encapsulated PostScript (*.eps, *.ps)'), - _tr('wl_settings_global', 'Flexible image transport system (*.fit, *.fits)'), - _tr('wl_settings_global', 'Autodesk animation files (*.flc, *.fli)'), - _tr('wl_settings_global', 'Fox Engine textures (*.ftex)'), - _tr('wl_settings_global', 'GIMP brush files (*.gbr)'), - _tr('wl_settings_global', 'Graphics interchange format (*.gif)'), - _tr('wl_settings_global', 'Apple icon images (*.icns)'), - _tr('wl_settings_global', 'Windows icon files (*.ico)'), - _tr('wl_settings_global', 'IPTC/NAA newsphoto files (*.iim)'), - _tr('wl_settings_global', 'IM files (*.im)'), - _tr('wl_settings_global', 'Image Tools image files (*)'), - _tr('wl_settings_global', 'JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg)'), - _tr('wl_settings_global', 'JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx)'), - _tr('wl_settings_global', 'McIDAS area files (*)'), - _tr('wl_settings_global', 'Microsoft Paint files (*.msp)'), - _tr('wl_settings_global', 'PhotoCD files (*.pcd)'), - _tr('wl_settings_global', 'Picture exchange (*.pcx)'), - _tr('wl_settings_global', 'PIXAR raster files (*.pxr)'), - _tr('wl_settings_global', 'Portable network graphics (*.apng, *.png)'), - _tr('wl_settings_global', 'Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm)'), - _tr('wl_settings_global', 'Photoshop PSD files (*.psd)'), - _tr('wl_settings_global', 'Sun raster files (*.ras)'), - _tr('wl_settings_global', 'Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi)'), - _tr('wl_settings_global', 'SPIDER files (*)'), - _tr('wl_settings_global', 'Truevision TGA (*.icb, *.tga, *.vda, *.vst)'), - _tr('wl_settings_global', 'TIFF files (*.tif, *.tiff)'), - _tr('wl_settings_global', 'WebP files (*.webp)'), - _tr('wl_settings_global', 'Windows metafiles (*.emf, *.wmf)'), - _tr('wl_settings_global', 'X bitmaps (*.xbm)'), - _tr('wl_settings_global', 'X pixmaps (*.xpm)'), - _tr('wl_settings_global', 'XV thumbnails (*)'), - _tr('wl_settings_global', 'All files (*.*)') - ], - }, - - # Only language names and proper nouns are capitalized in names of language utilities - 'mapping_lang_utils': { - 'sentence_tokenizers': { - _tr('wl_settings_global', 'botok - Tibetan sentence tokenizer'): 'botok_bod', - _tr('wl_settings_global', 'khmer-nltk - Khmer sentence tokenizer'): 'khmer_nltk_khm', - _tr('wl_settings_global', 'LaoNLP - Lao sentence tokenizer'): 'laonlp_lao', - - _tr('wl_settings_global', 'NLTK - Czech Punkt sentence tokenizer'): 'nltk_punkt_ces', - _tr('wl_settings_global', 'NLTK - Danish Punkt sentence tokenizer'): 'nltk_punkt_dan', - _tr('wl_settings_global', 'NLTK - Dutch Punkt sentence tokenizer'): 'nltk_punkt_nld', - _tr('wl_settings_global', 'NLTK - English Punkt sentence tokenizer'): 'nltk_punkt_eng', - _tr('wl_settings_global', 'NLTK - Estonian Punkt sentence tokenizer'): 'nltk_punkt_est', - _tr('wl_settings_global', 'NLTK - Finnish Punkt sentence tokenizer'): 'nltk_punkt_fin', - _tr('wl_settings_global', 'NLTK - French Punkt sentence tokenizer'): 'nltk_punkt_fra', - _tr('wl_settings_global', 'NLTK - German Punkt sentence tokenizer'): 'nltk_punkt_deu', - _tr('wl_settings_global', 'NLTK - Greek Punkt sentence tokenizer'): 'nltk_punkt_ell', - _tr('wl_settings_global', 'NLTK - Italian Punkt sentence tokenizer'): 'nltk_punkt_ita', - _tr('wl_settings_global', 'NLTK - Malayalam Punkt sentence tokenizer'): 'nltk_punkt_mal', - _tr('wl_settings_global', 'NLTK - Norwegian (Bokmål) Punkt sentence tokenizer'): 'nltk_punkt_nob', - _tr('wl_settings_global', 'NLTK - Polish Punkt sentence tokenizer'): 'nltk_punkt_pol', - _tr('wl_settings_global', 'NLTK - Portuguese Punkt sentence tokenizer'): 'nltk_punkt_por', - _tr('wl_settings_global', 'NLTK - Russian Punkt sentence tokenizer'): 'nltk_punkt_rus', - _tr('wl_settings_global', 'NLTK - Slovene Punkt sentence tokenizer'): 'nltk_punkt_slv', - _tr('wl_settings_global', 'NLTK - Spanish Punkt sentence tokenizer'): 'nltk_punkt_spa', - _tr('wl_settings_global', 'NLTK - Swedish Punkt sentence tokenizer'): 'nltk_punkt_swe', - _tr('wl_settings_global', 'NLTK - Turkish Punkt sentence tokenizer'): 'nltk_punkt_tur', - - 'PyThaiNLP - CRFCut': 'pythainlp_crfcut', - 'PyThaiNLP - ThaiSumCut': 'pythainlp_thaisumcut', - - _tr('wl_settings_global', 'spaCy - Catalan dependency parser'): 'spacy_dependency_parser_cat', - _tr('wl_settings_global', 'spaCy - Chinese dependency parser'): 'spacy_dependency_parser_zho', - _tr('wl_settings_global', 'spaCy - Croatian dependency parser'): 'spacy_dependency_parser_hrv', - _tr('wl_settings_global', 'spaCy - Danish dependency parser'): 'spacy_dependency_parser_dan', - _tr('wl_settings_global', 'spaCy - Dutch dependency parser'): 'spacy_dependency_parser_nld', - _tr('wl_settings_global', 'spaCy - English dependency parser'): 'spacy_dependency_parser_eng', - _tr('wl_settings_global', 'spaCy - Finnish dependency parser'): 'spacy_dependency_parser_fin', - _tr('wl_settings_global', 'spaCy - French dependency parser'): 'spacy_dependency_parser_fra', - _tr('wl_settings_global', 'spaCy - German dependency parser'): 'spacy_dependency_parser_deu', - _tr('wl_settings_global', 'spaCy - Greek (Modern) dependency parser'): 'spacy_dependency_parser_ell', - _tr('wl_settings_global', 'spaCy - Italian dependency parser'): 'spacy_dependency_parser_ita', - _tr('wl_settings_global', 'spaCy - Japanese dependency parser'): 'spacy_dependency_parser_jpn', - _tr('wl_settings_global', 'spaCy - Korean dependency parser'): 'spacy_dependency_parser_kor', - _tr('wl_settings_global', 'spaCy - Lithuanian dependency parser'): 'spacy_dependency_parser_lit', - _tr('wl_settings_global', 'spaCy - Macedonian dependency parser'): 'spacy_dependency_parser_mkd', - _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) dependency parser'): 'spacy_dependency_parser_nob', - _tr('wl_settings_global', 'spaCy - Polish dependency parser'): 'spacy_dependency_parser_pol', - _tr('wl_settings_global', 'spaCy - Portuguese dependency parser'): 'spacy_dependency_parser_por', - _tr('wl_settings_global', 'spaCy - Romanian dependency parser'): 'spacy_dependency_parser_ron', - _tr('wl_settings_global', 'spaCy - Russian dependency parser'): 'spacy_dependency_parser_rus', - _tr('wl_settings_global', 'spaCy - Slovene dependency parser'): 'spacy_dependency_parser_slv', - _tr('wl_settings_global', 'spaCy - Spanish dependency parser'): 'spacy_dependency_parser_spa', - _tr('wl_settings_global', 'spaCy - Swedish dependency parser'): 'spacy_dependency_parser_swe', - _tr('wl_settings_global', 'spaCy - Ukrainian dependency parser'): 'spacy_dependency_parser_ukr', - - _tr('wl_settings_global', 'spaCy - Croatian sentence recognizer'): 'spacy_sentence_recognizer_hrv', - _tr('wl_settings_global', 'spaCy - Dutch sentence recognizer'): 'spacy_sentence_recognizer_nld', - _tr('wl_settings_global', 'spaCy - Finnish sentence recognizer'): 'spacy_sentence_recognizer_fin', - _tr('wl_settings_global', 'spaCy - Greek (Modern) sentence recognizer'): 'spacy_sentence_recognizer_ell', - _tr('wl_settings_global', 'spaCy - Italian sentence recognizer'): 'spacy_sentence_recognizer_ita', - _tr('wl_settings_global', 'spaCy - Korean sentence recognizer'): 'spacy_sentence_recognizer_kor', - _tr('wl_settings_global', 'spaCy - Lithuanian sentence recognizer'): 'spacy_sentence_recognizer_lit', - _tr('wl_settings_global', 'spaCy - Macedonian sentence recognizer'): 'spacy_sentence_recognizer_mkd', - _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) sentence recognizer'): 'spacy_sentence_recognizer_nob', - _tr('wl_settings_global', 'spaCy - Polish sentence recognizer'): 'spacy_sentence_recognizer_pol', - _tr('wl_settings_global', 'spaCy - Portuguese sentence recognizer'): 'spacy_sentence_recognizer_por', - _tr('wl_settings_global', 'spaCy - Romanian sentence recognizer'): 'spacy_sentence_recognizer_ron', - _tr('wl_settings_global', 'spaCy - Russian sentence recognizer'): 'spacy_sentence_recognizer_rus', - _tr('wl_settings_global', 'spaCy - Swedish sentence recognizer'): 'spacy_sentence_recognizer_swe', - - _tr('wl_settings_global', 'spaCy - Sentencizer'): 'spacy_sentencizer', - - _tr('wl_settings_global', 'Stanza - Afrikaans sentence tokenizer'): 'stanza_afr', - _tr('wl_settings_global', 'Stanza - Arabic sentence tokenizer'): 'stanza_ara', - _tr('wl_settings_global', 'Stanza - Armenian (Classical) sentence tokenizer'): 'stanza_xcl', - _tr('wl_settings_global', 'Stanza - Armenian (Eastern) sentence tokenizer'): 'stanza_hye', - _tr('wl_settings_global', 'Stanza - Armenian (Western) sentence tokenizer'): 'stanza_hyw', - _tr('wl_settings_global', 'Stanza - Basque sentence tokenizer'): 'stanza_eus', - _tr('wl_settings_global', 'Stanza - Belarusian sentence tokenizer'): 'stanza_bel', - _tr('wl_settings_global', 'Stanza - Bulgarian sentence tokenizer'): 'stanza_bul', - _tr('wl_settings_global', 'Stanza - Burmese sentence tokenizer'): 'stanza_mya', - _tr('wl_settings_global', 'Stanza - Buryat (Russia) sentence tokenizer'): 'stanza_bxr', - _tr('wl_settings_global', 'Stanza - Catalan sentence tokenizer'): 'stanza_cat', - _tr('wl_settings_global', 'Stanza - Chinese (Classical) sentence tokenizer'): 'stanza_lzh', - _tr('wl_settings_global', 'Stanza - Chinese (Simplified) sentence tokenizer'): 'stanza_zho_cn', - _tr('wl_settings_global', 'Stanza - Chinese (Traditional) sentence tokenizer'): 'stanza_zho_tw', - _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) sentence tokenizer'): 'stanza_chu', - _tr('wl_settings_global', 'Stanza - Coptic sentence tokenizer'): 'stanza_cop', - _tr('wl_settings_global', 'Stanza - Croatian sentence tokenizer'): 'stanza_hrv', - _tr('wl_settings_global', 'Stanza - Czech sentence tokenizer'): 'stanza_ces', - _tr('wl_settings_global', 'Stanza - Danish sentence tokenizer'): 'stanza_dan', - _tr('wl_settings_global', 'Stanza - Dutch sentence tokenizer'): 'stanza_nld', - _tr('wl_settings_global', 'Stanza - English sentence tokenizer'): 'stanza_eng', - _tr('wl_settings_global', 'Stanza - English (Old) sentence tokenizer'): 'stanza_ang', - _tr('wl_settings_global', 'Stanza - Erzya sentence tokenizer'): 'stanza_myv', - _tr('wl_settings_global', 'Stanza - Estonian sentence tokenizer'): 'stanza_est', - _tr('wl_settings_global', 'Stanza - Faroese sentence tokenizer'): 'stanza_fao', - _tr('wl_settings_global', 'Stanza - Finnish sentence tokenizer'): 'stanza_fin', - _tr('wl_settings_global', 'Stanza - French sentence tokenizer'): 'stanza_fra', - _tr('wl_settings_global', 'Stanza - French (Old) sentence tokenizer'): 'stanza_fro', - _tr('wl_settings_global', 'Stanza - Galician sentence tokenizer'): 'stanza_glg', - _tr('wl_settings_global', 'Stanza - German sentence tokenizer'): 'stanza_deu', - _tr('wl_settings_global', 'Stanza - Gothic sentence tokenizer'): 'stanza_got', - _tr('wl_settings_global', 'Stanza - Greek (Ancient) sentence tokenizer'): 'stanza_grc', - _tr('wl_settings_global', 'Stanza - Greek (Modern) sentence tokenizer'): 'stanza_ell', - _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) sentence tokenizer'): 'stanza_hbo', - _tr('wl_settings_global', 'Stanza - Hebrew (Modern) sentence tokenizer'): 'stanza_heb', - _tr('wl_settings_global', 'Stanza - Hindi sentence tokenizer'): 'stanza_hin', - _tr('wl_settings_global', 'Stanza - Hungarian sentence tokenizer'): 'stanza_hun', - _tr('wl_settings_global', 'Stanza - Icelandic sentence tokenizer'): 'stanza_isl', - _tr('wl_settings_global', 'Stanza - Indonesian sentence tokenizer'): 'stanza_ind', - _tr('wl_settings_global', 'Stanza - Irish sentence tokenizer'): 'stanza_gle', - _tr('wl_settings_global', 'Stanza - Italian sentence tokenizer'): 'stanza_ita', - _tr('wl_settings_global', 'Stanza - Japanese sentence tokenizer'): 'stanza_jpn', - _tr('wl_settings_global', 'Stanza - Kazakh sentence tokenizer'): 'stanza_kaz', - _tr('wl_settings_global', 'Stanza - Korean sentence tokenizer'): 'stanza_kor', - _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) sentence tokenizer'): 'stanza_kmr', - _tr('wl_settings_global', 'Stanza - Kyrgyz sentence tokenizer'): 'stanza_kir', - _tr('wl_settings_global', 'Stanza - Latin sentence tokenizer'): 'stanza_lat', - _tr('wl_settings_global', 'Stanza - Latvian sentence tokenizer'): 'stanza_lav', - _tr('wl_settings_global', 'Stanza - Ligurian sentence tokenizer'): 'stanza_lij', - _tr('wl_settings_global', 'Stanza - Lithuanian sentence tokenizer'): 'stanza_lit', - _tr('wl_settings_global', 'Stanza - Maltese sentence tokenizer'): 'stanza_mlt', - _tr('wl_settings_global', 'Stanza - Manx sentence tokenizer'): 'stanza_glv', - _tr('wl_settings_global', 'Stanza - Marathi sentence tokenizer'): 'stanza_mar', - _tr('wl_settings_global', 'Stanza - Nigerian Pidgin sentence tokenizer'): 'stanza_pcm', - _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) sentence tokenizer'): 'stanza_nob', - _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) sentence tokenizer'): 'stanza_nno', - _tr('wl_settings_global', 'Stanza - Persian sentence tokenizer'): 'stanza_fas', - _tr('wl_settings_global', 'Stanza - Polish sentence tokenizer'): 'stanza_pol', - _tr('wl_settings_global', 'Stanza - Pomak sentence tokenizer'): 'stanza_qpm', - _tr('wl_settings_global', 'Stanza - Portuguese sentence tokenizer'): 'stanza_por', - _tr('wl_settings_global', 'Stanza - Romanian sentence tokenizer'): 'stanza_ron', - _tr('wl_settings_global', 'Stanza - Russian sentence tokenizer'): 'stanza_rus', - _tr('wl_settings_global', 'Stanza - Russian (Old) sentence tokenizer'): 'stanza_orv', - _tr('wl_settings_global', 'Stanza - Sámi (Northern) sentence tokenizer'): 'stanza_sme', - _tr('wl_settings_global', 'Stanza - Sanskrit sentence tokenizer'): 'stanza_san', - _tr('wl_settings_global', 'Stanza - Scottish Gaelic sentence tokenizer'): 'stanza_gla', - _tr('wl_settings_global', 'Stanza - Serbian (Latin script) sentence tokenizer'): 'stanza_srp_latn', - _tr('wl_settings_global', 'Stanza - Sindhi sentence tokenizer'): 'stanza_snd', - _tr('wl_settings_global', 'Stanza - Slovak sentence tokenizer'): 'stanza_slk', - _tr('wl_settings_global', 'Stanza - Slovene sentence tokenizer'): 'stanza_slv', - _tr('wl_settings_global', 'Stanza - Sorbian (Upper) sentence tokenizer'): 'stanza_hsb', - _tr('wl_settings_global', 'Stanza - Spanish sentence tokenizer'): 'stanza_spa', - _tr('wl_settings_global', 'Stanza - Swedish sentence tokenizer'): 'stanza_swe', - _tr('wl_settings_global', 'Stanza - Tamil sentence tokenizer'): 'stanza_tam', - _tr('wl_settings_global', 'Stanza - Telugu sentence tokenizer'): 'stanza_tel', - _tr('wl_settings_global', 'Stanza - Thai sentence tokenizer'): 'stanza_tha', - _tr('wl_settings_global', 'Stanza - Turkish sentence tokenizer'): 'stanza_tur', - _tr('wl_settings_global', 'Stanza - Ukrainian sentence tokenizer'): 'stanza_ukr', - _tr('wl_settings_global', 'Stanza - Urdu sentence tokenizer'): 'stanza_urd', - _tr('wl_settings_global', 'Stanza - Uyghur sentence tokenizer'): 'stanza_uig', - _tr('wl_settings_global', 'Stanza - Vietnamese sentence tokenizer'): 'stanza_vie', - _tr('wl_settings_global', 'Stanza - Welsh sentence tokenizer'): 'stanza_cym', - _tr('wl_settings_global', 'Stanza - Wolof sentence tokenizer'): 'stanza_wol', - - _tr('wl_settings_global', 'Underthesea - Vietnamese sentence tokenizer'): 'underthesea_vie' +# Use function instead of constant to defer translation until QTranslator is initialized +def init_settings_global(): + SETTINGS_GLOBAL = { + # Language names should be always capitalized + 'langs': { + _tr('wl_settings_global', 'Afrikaans'): ['afr', 'af'], + _tr('wl_settings_global', 'Albanian'): ['sqi', 'sq'], + _tr('wl_settings_global', 'Amharic'): ['amh', 'am'], + _tr('wl_settings_global', 'Arabic'): ['ara', 'ar'], + _tr('wl_settings_global', 'Armenian (Classical)'): ['xcl', 'xcl'], + _tr('wl_settings_global', 'Armenian (Eastern)'): ['hye', 'hy'], + _tr('wl_settings_global', 'Armenian (Western)'): ['hyw', 'hyw'], + _tr('wl_settings_global', 'Assamese'): ['asm', 'as'], + _tr('wl_settings_global', 'Asturian'): ['ast', 'ast'], + _tr('wl_settings_global', 'Azerbaijani'): ['aze', 'az'], + _tr('wl_settings_global', 'Basque'): ['eus', 'eu'], + _tr('wl_settings_global', 'Belarusian'): ['bel', 'be'], + _tr('wl_settings_global', 'Bengali'): ['ben', 'bn'], + _tr('wl_settings_global', 'Bulgarian'): ['bul', 'bg'], + _tr('wl_settings_global', 'Burmese'): ['mya', 'my'], + _tr('wl_settings_global', 'Buryat (Russia)'): ['bxr', 'bxr'], + _tr('wl_settings_global', 'Catalan'): ['cat', 'ca'], + _tr('wl_settings_global', 'Chinese (Classical)'): ['lzh', 'lzh'], + _tr('wl_settings_global', 'Chinese (Simplified)'): ['zho_cn', 'zh_cn'], + _tr('wl_settings_global', 'Chinese (Traditional)'): ['zho_tw', 'zh_tw'], + _tr('wl_settings_global', 'Church Slavonic (Old)'): ['chu', 'cu'], + _tr('wl_settings_global', 'Coptic'): ['cop', 'cop'], + _tr('wl_settings_global', 'Croatian'): ['hrv', 'hr'], + _tr('wl_settings_global', 'Czech'): ['ces', 'cs'], + _tr('wl_settings_global', 'Danish'): ['dan', 'da'], + _tr('wl_settings_global', 'Dutch'): ['nld', 'nl'], + _tr('wl_settings_global', 'English (Middle)'): ['enm', 'enm'], + _tr('wl_settings_global', 'English (Old)'): ['ang', 'ang'], + _tr('wl_settings_global', 'English (United Kingdom)'): ['eng_gb', 'en_gb'], + _tr('wl_settings_global', 'English (United States)'): ['eng_us', 'en_us'], + _tr('wl_settings_global', 'Erzya'): ['myv', 'myv'], + _tr('wl_settings_global', 'Esperanto'): ['epo', 'eo'], + _tr('wl_settings_global', 'Estonian'): ['est', 'et'], + _tr('wl_settings_global', 'Faroese'): ['fao', 'fo'], + _tr('wl_settings_global', 'Finnish'): ['fin', 'fi'], + _tr('wl_settings_global', 'French'): ['fra', 'fr'], + _tr('wl_settings_global', 'French (Old)'): ['fro', 'fro'], + _tr('wl_settings_global', 'Galician'): ['glg', 'gl'], + _tr('wl_settings_global', 'Georgian'): ['kat', 'ka'], + _tr('wl_settings_global', 'German (Austria)'): ['deu_at', 'de_at'], + _tr('wl_settings_global', 'German (Germany)'): ['deu_de', 'de_de'], + _tr('wl_settings_global', 'German (Switzerland)'): ['deu_ch', 'de_ch'], + _tr('wl_settings_global', 'Gothic'): ['got', 'got'], + _tr('wl_settings_global', 'Greek (Ancient)'): ['grc', 'grc'], + _tr('wl_settings_global', 'Greek (Modern)'): ['ell', 'el'], + _tr('wl_settings_global', 'Gujarati'): ['guj', 'gu'], + _tr('wl_settings_global', 'Hebrew (Ancient)'): ['hbo', 'hbo'], + _tr('wl_settings_global', 'Hebrew (Modern)'): ['heb', 'he'], + _tr('wl_settings_global', 'Hindi'): ['hin', 'hi'], + _tr('wl_settings_global', 'Hungarian'): ['hun', 'hu'], + _tr('wl_settings_global', 'Icelandic'): ['isl', 'is'], + _tr('wl_settings_global', 'Indonesian'): ['ind', 'id'], + _tr('wl_settings_global', 'Irish'): ['gle', 'ga'], + _tr('wl_settings_global', 'Italian'): ['ita', 'it'], + _tr('wl_settings_global', 'Japanese'): ['jpn', 'ja'], + _tr('wl_settings_global', 'Kannada'): ['kan', 'kn'], + _tr('wl_settings_global', 'Kazakh'): ['kaz', 'kk'], + _tr('wl_settings_global', 'Khmer'): ['khm', 'km'], + _tr('wl_settings_global', 'Korean'): ['kor', 'ko'], + _tr('wl_settings_global', 'Kurdish (Kurmanji)'): ['kmr', 'kmr'], + _tr('wl_settings_global', 'Kyrgyz'): ['kir', 'ky'], + _tr('wl_settings_global', 'Lao'): ['lao', 'lo'], + _tr('wl_settings_global', 'Latin'): ['lat', 'la'], + _tr('wl_settings_global', 'Latvian'): ['lav', 'lv'], + _tr('wl_settings_global', 'Ligurian'): ['lij', 'lij'], + _tr('wl_settings_global', 'Lithuanian'): ['lit', 'lt'], + _tr('wl_settings_global', 'Luganda'): ['lug', 'lg'], + _tr('wl_settings_global', 'Luxembourgish'): ['ltz', 'lb'], + _tr('wl_settings_global', 'Macedonian'): ['mkd', 'mk'], + _tr('wl_settings_global', 'Malay'): ['msa', 'ms'], + _tr('wl_settings_global', 'Malayalam'): ['mal', 'ml'], + _tr('wl_settings_global', 'Maltese'): ['mlt', 'mt'], + _tr('wl_settings_global', 'Manx'): ['glv', 'gv'], + _tr('wl_settings_global', 'Marathi'): ['mar', 'mr'], + _tr('wl_settings_global', 'Meitei (Meitei script)'): ['mni_mtei', 'mni_mtei'], + _tr('wl_settings_global', 'Mongolian'): ['mon', 'mn'], + _tr('wl_settings_global', 'Nepali'): ['nep', 'ne'], + _tr('wl_settings_global', 'Nigerian Pidgin'): ['pcm', 'pcm'], + _tr('wl_settings_global', 'Norwegian (Bokmål)'): ['nob', 'nb'], + _tr('wl_settings_global', 'Norwegian (Nynorsk)'): ['nno', 'nn'], + _tr('wl_settings_global', 'Odia'): ['ori', 'or'], + _tr('wl_settings_global', 'Persian'): ['fas', 'fa'], + _tr('wl_settings_global', 'Polish'): ['pol', 'pl'], + _tr('wl_settings_global', 'Pomak'): ['qpm', 'qpm'], + _tr('wl_settings_global', 'Portuguese (Brazil)'): ['por_br', 'pt_br'], + _tr('wl_settings_global', 'Portuguese (Portugal)'): ['por_pt', 'pt_pt'], + _tr('wl_settings_global', 'Punjabi (Gurmukhi script)'): ['pan_guru', 'pa_guru'], + _tr('wl_settings_global', 'Romanian'): ['ron', 'ro'], + _tr('wl_settings_global', 'Russian'): ['rus', 'ru'], + _tr('wl_settings_global', 'Russian (Old)'): ['orv', 'orv'], + _tr('wl_settings_global', 'Sámi (Northern)'): ['sme', 'se'], + _tr('wl_settings_global', 'Sanskrit'): ['san', 'sa'], + _tr('wl_settings_global', 'Scottish Gaelic'): ['gla', 'gd'], + _tr('wl_settings_global', 'Serbian (Cyrillic script)'): ['srp_cyrl', 'sr_cyrl'], + _tr('wl_settings_global', 'Serbian (Latin script)'): ['srp_latn', 'sr_latn'], + _tr('wl_settings_global', 'Sindhi'): ['snd', 'sd'], + _tr('wl_settings_global', 'Sinhala'): ['sin', 'si'], + _tr('wl_settings_global', 'Slovak'): ['slk', 'sk'], + _tr('wl_settings_global', 'Slovene'): ['slv', 'sl'], + _tr('wl_settings_global', 'Sorbian (Lower)'): ['dsb', 'dsb'], + _tr('wl_settings_global', 'Sorbian (Upper)'): ['hsb', 'hsb'], + _tr('wl_settings_global', 'Spanish'): ['spa', 'es'], + _tr('wl_settings_global', 'Swahili'): ['swa', 'sw'], + _tr('wl_settings_global', 'Swedish'): ['swe', 'sv'], + _tr('wl_settings_global', 'Tagalog'): ['tgl', 'tl'], + _tr('wl_settings_global', 'Tajik'): ['tgk', 'tg'], + _tr('wl_settings_global', 'Tamil'): ['tam', 'ta'], + _tr('wl_settings_global', 'Tatar'): ['tat', 'tt'], + _tr('wl_settings_global', 'Telugu'): ['tel', 'te'], + _tr('wl_settings_global', 'Tetun (Dili)'): ['tdt', 'tdt'], + _tr('wl_settings_global', 'Thai'): ['tha', 'th'], + _tr('wl_settings_global', 'Tibetan'): ['bod', 'bo'], + _tr('wl_settings_global', 'Tigrinya'): ['tir', 'ti'], + _tr('wl_settings_global', 'Tswana'): ['tsn', 'tn'], + _tr('wl_settings_global', 'Turkish'): ['tur', 'tr'], + _tr('wl_settings_global', 'Ukrainian'): ['ukr', 'uk'], + _tr('wl_settings_global', 'Urdu'): ['urd', 'ur'], + _tr('wl_settings_global', 'Uyghur'): ['uig', 'ug'], + _tr('wl_settings_global', 'Vietnamese'): ['vie', 'vi'], + _tr('wl_settings_global', 'Welsh'): ['cym', 'cy'], + _tr('wl_settings_global', 'Wolof'): ['wol', 'wo'], + _tr('wl_settings_global', 'Yoruba'): ['yor', 'yo'], + _tr('wl_settings_global', 'Zulu'): ['zul', 'zu'], + + _tr('wl_settings_global', 'Other languages'): ['other', 'other'] }, - 'word_tokenizers': { - _tr('wl_settings_global', 'botok - Tibetan word tokenizer'): 'botok_bod', - _tr('wl_settings_global', 'khmer-nltk - Khmer word tokenizer'): 'khmer_nltk_khm', - _tr('wl_settings_global', 'LaoNLP - Lao word tokenizer'): 'laonlp_lao', - - _tr('wl_settings_global', 'NLTK - NIST tokenizer'): 'nltk_nist', - _tr('wl_settings_global', 'NLTK - NLTK tokenizer'): 'nltk_nltk', - _tr('wl_settings_global', 'NLTK - Penn Treebank tokenizer'): 'nltk_penn_treebank', - _tr('wl_settings_global', 'NLTK - Regular-expression tokenizer'): 'nltk_regex', - _tr('wl_settings_global', 'NLTK - Tok-tok tokenizer'): 'nltk_tok_tok', - _tr('wl_settings_global', 'NLTK - Twitter tokenizer'): 'nltk_twitter', - - _tr('wl_settings_global', 'pkuseg - Chinese word tokenizer'): 'pkuseg_zho', - - _tr('wl_settings_global', 'PyThaiNLP - Longest matching'): 'pythainlp_longest_matching', - _tr('wl_settings_global', 'PyThaiNLP - Maximum matching'): 'pythainlp_max_matching', - _tr('wl_settings_global', 'PyThaiNLP - Maximum matching + TCC'): 'pythainlp_max_matching_tcc', - 'PyThaiNLP - NERCut': 'pythainlp_nercut', - - 'python-mecab-ko - MeCab': 'python_mecab_ko_mecab', - _tr('wl_settings_global', 'Sacremoses - Moses tokenizer'): 'sacremoses_moses', - - _tr('wl_settings_global', 'spaCy - Afrikaans word tokenizer'): 'spacy_afr', - _tr('wl_settings_global', 'spaCy - Albanian word tokenizer'): 'spacy_sqi', - _tr('wl_settings_global', 'spaCy - Amharic word tokenizer'): 'spacy_amh', - _tr('wl_settings_global', 'spaCy - Arabic word tokenizer'): 'spacy_ara', - _tr('wl_settings_global', 'spaCy - Armenian word tokenizer'): 'spacy_hye', - _tr('wl_settings_global', 'spaCy - Azerbaijani word tokenizer'): 'spacy_aze', - _tr('wl_settings_global', 'spaCy - Basque word tokenizer'): 'spacy_eus', - _tr('wl_settings_global', 'spaCy - Bengali word tokenizer'): 'spacy_ben', - _tr('wl_settings_global', 'spaCy - Bulgarian word tokenizer'): 'spacy_bul', - _tr('wl_settings_global', 'spaCy - Catalan word tokenizer'): 'spacy_cat', - _tr('wl_settings_global', 'spaCy - Chinese word tokenizer'): 'spacy_zho', - _tr('wl_settings_global', 'spaCy - Croatian word tokenizer'): 'spacy_hrv', - _tr('wl_settings_global', 'spaCy - Czech word tokenizer'): 'spacy_ces', - _tr('wl_settings_global', 'spaCy - Danish word tokenizer'): 'spacy_dan', - _tr('wl_settings_global', 'spaCy - Dutch word tokenizer'): 'spacy_nld', - _tr('wl_settings_global', 'spaCy - English word tokenizer'): 'spacy_eng', - _tr('wl_settings_global', 'spaCy - Estonian word tokenizer'): 'spacy_est', - _tr('wl_settings_global', 'spaCy - Faroese word tokenizer'): 'spacy_fao', - _tr('wl_settings_global', 'spaCy - Finnish word tokenizer'): 'spacy_fin', - _tr('wl_settings_global', 'spaCy - French word tokenizer'): 'spacy_fra', - _tr('wl_settings_global', 'spaCy - German word tokenizer'): 'spacy_deu', - _tr('wl_settings_global', 'spaCy - Greek (Ancient) word tokenizer'): 'spacy_grc', - _tr('wl_settings_global', 'spaCy - Greek (Modern) word tokenizer'): 'spacy_ell', - _tr('wl_settings_global', 'spaCy - Gujarati word tokenizer'): 'spacy_guj', - _tr('wl_settings_global', 'spaCy - Hebrew (Modern) word tokenizer'): 'spacy_heb', - _tr('wl_settings_global', 'spaCy - Hindi word tokenizer'): 'spacy_hin', - _tr('wl_settings_global', 'spaCy - Hungarian word tokenizer'): 'spacy_hun', - _tr('wl_settings_global', 'spaCy - Icelandic word tokenizer'): 'spacy_isl', - _tr('wl_settings_global', 'spaCy - Indonesian word tokenizer'): 'spacy_ind', - _tr('wl_settings_global', 'spaCy - Irish word tokenizer'): 'spacy_gle', - _tr('wl_settings_global', 'spaCy - Italian word tokenizer'): 'spacy_ita', - _tr('wl_settings_global', 'spaCy - Japanese word tokenizer'): 'spacy_jpn', - _tr('wl_settings_global', 'spaCy - Kannada word tokenizer'): 'spacy_kan', - _tr('wl_settings_global', 'spaCy - Korean word tokenizer'): 'spacy_kor', - _tr('wl_settings_global', 'spaCy - Kyrgyz word tokenizer'): 'spacy_kir', - _tr('wl_settings_global', 'spaCy - Latin word tokenizer'): 'spacy_lat', - _tr('wl_settings_global', 'spaCy - Latvian word tokenizer'): 'spacy_lav', - _tr('wl_settings_global', 'spaCy - Ligurian word tokenizer'): 'spacy_lij', - _tr('wl_settings_global', 'spaCy - Lithuanian word tokenizer'): 'spacy_lit', - _tr('wl_settings_global', 'spaCy - Luganda word tokenizer'): 'spacy_lug', - _tr('wl_settings_global', 'spaCy - Luxembourgish word tokenizer'): 'spacy_ltz', - _tr('wl_settings_global', 'spaCy - Macedonian word tokenizer'): 'spacy_mkd', - _tr('wl_settings_global', 'spaCy - Malay word tokenizer'): 'spacy_msa', - _tr('wl_settings_global', 'spaCy - Malayalam word tokenizer'): 'spacy_mal', - _tr('wl_settings_global', 'spaCy - Marathi word tokenizer'): 'spacy_mar', - _tr('wl_settings_global', 'spaCy - Nepali word tokenizer'): 'spacy_nep', - _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) word tokenizer'): 'spacy_nob', - _tr('wl_settings_global', 'spaCy - Norwegian (Nynorsk) word tokenizer'): 'spacy_nno', - _tr('wl_settings_global', 'spaCy - Persian word tokenizer'): 'spacy_fas', - _tr('wl_settings_global', 'spaCy - Polish word tokenizer'): 'spacy_pol', - _tr('wl_settings_global', 'spaCy - Portuguese word tokenizer'): 'spacy_por', - _tr('wl_settings_global', 'spaCy - Romanian word tokenizer'): 'spacy_ron', - _tr('wl_settings_global', 'spaCy - Russian word tokenizer'): 'spacy_rus', - _tr('wl_settings_global', 'spaCy - Sanskrit word tokenizer'): 'spacy_san', - _tr('wl_settings_global', 'spaCy - Serbian (Cyrillic script) word tokenizer'): 'spacy_srp', - _tr('wl_settings_global', 'spaCy - Sinhala word tokenizer'): 'spacy_sin', - _tr('wl_settings_global', 'spaCy - Slovak word tokenizer'): 'spacy_slk', - _tr('wl_settings_global', 'spaCy - Slovene word tokenizer'): 'spacy_slv', - _tr('wl_settings_global', 'spaCy - Sorbian (Lower) word tokenizer'): 'spacy_dsb', - _tr('wl_settings_global', 'spaCy - Sorbian (Upper) word tokenizer'): 'spacy_hsb', - _tr('wl_settings_global', 'spaCy - Spanish word tokenizer'): 'spacy_spa', - _tr('wl_settings_global', 'spaCy - Swedish word tokenizer'): 'spacy_swe', - _tr('wl_settings_global', 'spaCy - Tagalog word tokenizer'): 'spacy_tgl', - _tr('wl_settings_global', 'spaCy - Tamil word tokenizer'): 'spacy_tam', - _tr('wl_settings_global', 'spaCy - Tatar word tokenizer'): 'spacy_tat', - _tr('wl_settings_global', 'spaCy - Telugu word tokenizer'): 'spacy_tel', - _tr('wl_settings_global', 'spaCy - Tigrinya word tokenizer'): 'spacy_tir', - _tr('wl_settings_global', 'spaCy - Tswana word tokenizer'): 'spacy_tsn', - _tr('wl_settings_global', 'spaCy - Turkish word tokenizer'): 'spacy_tur', - _tr('wl_settings_global', 'spaCy - Ukrainian word tokenizer'): 'spacy_ukr', - _tr('wl_settings_global', 'spaCy - Urdu word tokenizer'): 'spacy_urd', - _tr('wl_settings_global', 'spaCy - Yoruba word tokenizer'): 'spacy_yor', - - _tr('wl_settings_global', 'Stanza - Afrikaans word tokenizer'): 'stanza_afr', - _tr('wl_settings_global', 'Stanza - Arabic word tokenizer'): 'stanza_ara', - _tr('wl_settings_global', 'Stanza - Armenian (Classical) word tokenizer'): 'stanza_xcl', - _tr('wl_settings_global', 'Stanza - Armenian (Eastern) word tokenizer'): 'stanza_hye', - _tr('wl_settings_global', 'Stanza - Armenian (Western) word tokenizer'): 'stanza_hyw', - _tr('wl_settings_global', 'Stanza - Basque word tokenizer'): 'stanza_eus', - _tr('wl_settings_global', 'Stanza - Belarusian word tokenizer'): 'stanza_bel', - _tr('wl_settings_global', 'Stanza - Bulgarian word tokenizer'): 'stanza_bul', - _tr('wl_settings_global', 'Stanza - Burmese word tokenizer'): 'stanza_mya', - _tr('wl_settings_global', 'Stanza - Buryat (Russia) word tokenizer'): 'stanza_bxr', - _tr('wl_settings_global', 'Stanza - Catalan word tokenizer'): 'stanza_cat', - _tr('wl_settings_global', 'Stanza - Chinese (Classical) word tokenizer'): 'stanza_lzh', - _tr('wl_settings_global', 'Stanza - Chinese (Simplified) word tokenizer'): 'stanza_zho_cn', - _tr('wl_settings_global', 'Stanza - Chinese (Traditional) word tokenizer'): 'stanza_zho_tw', - _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) word tokenizer'): 'stanza_chu', - _tr('wl_settings_global', 'Stanza - Coptic word tokenizer'): 'stanza_cop', - _tr('wl_settings_global', 'Stanza - Croatian word tokenizer'): 'stanza_hrv', - _tr('wl_settings_global', 'Stanza - Czech word tokenizer'): 'stanza_ces', - _tr('wl_settings_global', 'Stanza - Danish word tokenizer'): 'stanza_dan', - _tr('wl_settings_global', 'Stanza - Dutch word tokenizer'): 'stanza_nld', - _tr('wl_settings_global', 'Stanza - English word tokenizer'): 'stanza_eng', - _tr('wl_settings_global', 'Stanza - English (Old) word tokenizer'): 'stanza_ang', - _tr('wl_settings_global', 'Stanza - Erzya word tokenizer'): 'stanza_myv', - _tr('wl_settings_global', 'Stanza - Estonian word tokenizer'): 'stanza_est', - _tr('wl_settings_global', 'Stanza - Faroese word tokenizer'): 'stanza_fao', - _tr('wl_settings_global', 'Stanza - Finnish word tokenizer'): 'stanza_fin', - _tr('wl_settings_global', 'Stanza - French word tokenizer'): 'stanza_fra', - _tr('wl_settings_global', 'Stanza - French (Old) word tokenizer'): 'stanza_fro', - _tr('wl_settings_global', 'Stanza - Galician word tokenizer'): 'stanza_glg', - _tr('wl_settings_global', 'Stanza - German word tokenizer'): 'stanza_deu', - _tr('wl_settings_global', 'Stanza - Gothic word tokenizer'): 'stanza_got', - _tr('wl_settings_global', 'Stanza - Greek (Ancient) word tokenizer'): 'stanza_grc', - _tr('wl_settings_global', 'Stanza - Greek (Modern) word tokenizer'): 'stanza_ell', - _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) word tokenizer'): 'stanza_hbo', - _tr('wl_settings_global', 'Stanza - Hebrew (Modern) word tokenizer'): 'stanza_heb', - _tr('wl_settings_global', 'Stanza - Hindi word tokenizer'): 'stanza_hin', - _tr('wl_settings_global', 'Stanza - Hungarian word tokenizer'): 'stanza_hun', - _tr('wl_settings_global', 'Stanza - Icelandic word tokenizer'): 'stanza_isl', - _tr('wl_settings_global', 'Stanza - Indonesian word tokenizer'): 'stanza_ind', - _tr('wl_settings_global', 'Stanza - Irish word tokenizer'): 'stanza_gle', - _tr('wl_settings_global', 'Stanza - Italian word tokenizer'): 'stanza_ita', - _tr('wl_settings_global', 'Stanza - Japanese word tokenizer'): 'stanza_jpn', - _tr('wl_settings_global', 'Stanza - Kazakh word tokenizer'): 'stanza_kaz', - _tr('wl_settings_global', 'Stanza - Korean word tokenizer'): 'stanza_kor', - _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) word tokenizer'): 'stanza_kmr', - _tr('wl_settings_global', 'Stanza - Kyrgyz word tokenizer'): 'stanza_kir', - _tr('wl_settings_global', 'Stanza - Latin word tokenizer'): 'stanza_lat', - _tr('wl_settings_global', 'Stanza - Latvian word tokenizer'): 'stanza_lav', - _tr('wl_settings_global', 'Stanza - Ligurian word tokenizer'): 'stanza_lij', - _tr('wl_settings_global', 'Stanza - Lithuanian word tokenizer'): 'stanza_lit', - _tr('wl_settings_global', 'Stanza - Maltese word tokenizer'): 'stanza_mlt', - _tr('wl_settings_global', 'Stanza - Manx word tokenizer'): 'stanza_glv', - _tr('wl_settings_global', 'Stanza - Marathi word tokenizer'): 'stanza_mar', - _tr('wl_settings_global', 'Stanza - Nigerian Pidgin word tokenizer'): 'stanza_pcm', - _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) word tokenizer'): 'stanza_nob', - _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) word tokenizer'): 'stanza_nno', - _tr('wl_settings_global', 'Stanza - Persian word tokenizer'): 'stanza_fas', - _tr('wl_settings_global', 'Stanza - Polish word tokenizer'): 'stanza_pol', - _tr('wl_settings_global', 'Stanza - Pomak word tokenizer'): 'stanza_qpm', - _tr('wl_settings_global', 'Stanza - Portuguese word tokenizer'): 'stanza_por', - _tr('wl_settings_global', 'Stanza - Romanian word tokenizer'): 'stanza_ron', - _tr('wl_settings_global', 'Stanza - Russian word tokenizer'): 'stanza_rus', - _tr('wl_settings_global', 'Stanza - Russian (Old) word tokenizer'): 'stanza_orv', - _tr('wl_settings_global', 'Stanza - Sámi (Northern) word tokenizer'): 'stanza_sme', - _tr('wl_settings_global', 'Stanza - Sanskrit word tokenizer'): 'stanza_san', - _tr('wl_settings_global', 'Stanza - Scottish Gaelic word tokenizer'): 'stanza_gla', - _tr('wl_settings_global', 'Stanza - Serbian (Latin script) word tokenizer'): 'stanza_srp_latn', - _tr('wl_settings_global', 'Stanza - Sindhi word tokenizer'): 'stanza_snd', - _tr('wl_settings_global', 'Stanza - Slovak word tokenizer'): 'stanza_slk', - _tr('wl_settings_global', 'Stanza - Slovene word tokenizer'): 'stanza_slv', - _tr('wl_settings_global', 'Stanza - Sorbian (Upper) word tokenizer'): 'stanza_hsb', - _tr('wl_settings_global', 'Stanza - Spanish word tokenizer'): 'stanza_spa', - _tr('wl_settings_global', 'Stanza - Swedish word tokenizer'): 'stanza_swe', - _tr('wl_settings_global', 'Stanza - Tamil word tokenizer'): 'stanza_tam', - _tr('wl_settings_global', 'Stanza - Telugu word tokenizer'): 'stanza_tel', - _tr('wl_settings_global', 'Stanza - Thai word tokenizer'): 'stanza_tha', - _tr('wl_settings_global', 'Stanza - Turkish word tokenizer'): 'stanza_tur', - _tr('wl_settings_global', 'Stanza - Ukrainian word tokenizer'): 'stanza_ukr', - _tr('wl_settings_global', 'Stanza - Urdu word tokenizer'): 'stanza_urd', - _tr('wl_settings_global', 'Stanza - Uyghur word tokenizer'): 'stanza_uig', - _tr('wl_settings_global', 'Stanza - Vietnamese word tokenizer'): 'stanza_vie', - _tr('wl_settings_global', 'Stanza - Welsh word tokenizer'): 'stanza_cym', - _tr('wl_settings_global', 'Stanza - Wolof word tokenizer'): 'stanza_wol', - - _tr('wl_settings_global', 'SudachiPy - Japanese word tokenizer (split mode A)'): 'sudachipy_jpn_split_mode_a', - _tr('wl_settings_global', 'SudachiPy - Japanese word tokenizer (split mode B)'): 'sudachipy_jpn_split_mode_b', - _tr('wl_settings_global', 'SudachiPy - Japanese word tokenizer (split mode C)'): 'sudachipy_jpn_split_mode_c', - - _tr('wl_settings_global', 'Underthesea - Vietnamese word tokenizer'): 'underthesea_vie', - - _tr('wl_settings_global', 'Wordless - Chinese character tokenizer'): 'wordless_zho_char', - _tr('wl_settings_global', 'Wordless - Japanese kanji tokenizer'): 'wordless_jpn_kanji' - }, + # Language and geographical names should be always capitalized + # Case of encoding names are preserved + 'encodings': { + _tr('wl_settings_global', 'All languages (UTF-8 without BOM)'): 'utf_8', + _tr('wl_settings_global', 'All languages (UTF-8 with BOM)'): 'utf_8_sig', + _tr('wl_settings_global', 'All languages (UTF-16 with BOM)'): 'utf_16', + _tr('wl_settings_global', 'All languages (UTF-16BE without BOM)'): 'utf_16_be', + _tr('wl_settings_global', 'All languages (UTF-16LE without BOM)'): 'utf_16_le', + _tr('wl_settings_global', 'All languages (UTF-32 with BOM)'): 'utf_32', + _tr('wl_settings_global', 'All languages (UTF-32BE without BOM)'): 'utf_32_be', + _tr('wl_settings_global', 'All languages (UTF-32LE without BOM)'): 'utf_32_le', + _tr('wl_settings_global', 'All languages (UTF-7)'): 'utf_7', + + _tr('wl_settings_global', 'Arabic (CP720)'): 'cp720', + _tr('wl_settings_global', 'Arabic (CP864)'): 'cp864', + _tr('wl_settings_global', 'Arabic (ISO-8859-6)'): 'iso8859_6', + _tr('wl_settings_global', 'Arabic (Mac OS Arabic)'): 'mac_arabic', + _tr('wl_settings_global', 'Arabic (Windows-1256)'): 'cp1256', + + _tr('wl_settings_global', 'Baltic languages (CP775)'): 'cp775', + _tr('wl_settings_global', 'Baltic languages (ISO-8859-13)'): 'iso8859_13', + _tr('wl_settings_global', 'Baltic languages (Windows-1257)'): 'cp1257', + + _tr('wl_settings_global', 'Celtic languages (ISO-8859-14)'): 'iso8859_14', - 'syl_tokenizers': { - _tr('wl_settings_global', 'NLTK - Legality syllable tokenizer'): 'nltk_legality', - _tr('wl_settings_global', 'NLTK - Sonority sequencing syllable tokenizer'): 'nltk_sonority_sequencing', - - _tr('wl_settings_global', 'Pyphen - Afrikaans syllable tokenizer'): 'pyphen_afr', - _tr('wl_settings_global', 'Pyphen - Albanian syllable tokenizer'): 'pyphen_sqi', - _tr('wl_settings_global', 'Pyphen - Basque syllable tokenizer'): 'pyphen_eus', - _tr('wl_settings_global', 'Pyphen - Belarusian syllable tokenizer'): 'pyphen_bel', - _tr('wl_settings_global', 'Pyphen - Bulgarian syllable tokenizer'): 'pyphen_bul', - _tr('wl_settings_global', 'Pyphen - Catalan syllable tokenizer'): 'pyphen_cat', - _tr('wl_settings_global', 'Pyphen - Croatian syllable tokenizer'): 'pyphen_hrv', - _tr('wl_settings_global', 'Pyphen - Czech syllable tokenizer'): 'pyphen_ces', - _tr('wl_settings_global', 'Pyphen - Danish syllable tokenizer'): 'pyphen_dan', - _tr('wl_settings_global', 'Pyphen - Dutch syllable tokenizer'): 'pyphen_nld', - _tr('wl_settings_global', 'Pyphen - English (United Kingdom) syllable tokenizer'): 'pyphen_eng_gb', - _tr('wl_settings_global', 'Pyphen - English (United States) syllable tokenizer'): 'pyphen_eng_us', - _tr('wl_settings_global', 'Pyphen - Esperanto syllable tokenizer'): 'pyphen_epo', - _tr('wl_settings_global', 'Pyphen - Estonian syllable tokenizer'): 'pyphen_est', - _tr('wl_settings_global', 'Pyphen - French syllable tokenizer'): 'pyphen_fra', - _tr('wl_settings_global', 'Pyphen - Galician syllable tokenizer'): 'pyphen_glg', - _tr('wl_settings_global', 'Pyphen - German (Austria) syllable tokenizer'): 'pyphen_deu_at', - _tr('wl_settings_global', 'Pyphen - German (Germany) syllable tokenizer'): 'pyphen_deu_de', - _tr('wl_settings_global', 'Pyphen - German (Switzerland) syllable tokenizer'): 'pyphen_deu_ch', - _tr('wl_settings_global', 'Pyphen - Greek (Modern) syllable tokenizer'): 'pyphen_ell', - _tr('wl_settings_global', 'Pyphen - Hungarian syllable tokenizer'): 'pyphen_hun', - _tr('wl_settings_global', 'Pyphen - Icelandic syllable tokenizer'): 'pyphen_isl', - _tr('wl_settings_global', 'Pyphen - Indonesian syllable tokenizer'): 'pyphen_ind', - _tr('wl_settings_global', 'Pyphen - Italian syllable tokenizer'): 'pyphen_ita', - _tr('wl_settings_global', 'Pyphen - Lithuanian syllable tokenizer'): 'pyphen_lit', - _tr('wl_settings_global', 'Pyphen - Latvian syllable tokenizer'): 'pyphen_lav', - _tr('wl_settings_global', 'Pyphen - Mongolian syllable tokenizer'): 'pyphen_mon', - _tr('wl_settings_global', 'Pyphen - Norwegian (Bokmål) syllable tokenizer'): 'pyphen_nob', - _tr('wl_settings_global', 'Pyphen - Norwegian (Nynorsk) syllable tokenizer'): 'pyphen_nno', - _tr('wl_settings_global', 'Pyphen - Polish syllable tokenizer'): 'pyphen_pol', - _tr('wl_settings_global', 'Pyphen - Portuguese (Brazil) syllable tokenizer'): 'pyphen_por_br', - _tr('wl_settings_global', 'Pyphen - Portuguese (Portugal) syllable tokenizer'): 'pyphen_por_pt', - _tr('wl_settings_global', 'Pyphen - Romanian syllable tokenizer'): 'pyphen_ron', - _tr('wl_settings_global', 'Pyphen - Russian syllable tokenizer'): 'pyphen_rus', - _tr('wl_settings_global', 'Pyphen - Serbian (Cyrillic script) syllable tokenizer'): 'pyphen_srp_cyrl', - _tr('wl_settings_global', 'Pyphen - Serbian (Latin script) syllable tokenizer'): 'pyphen_srp_latn', - _tr('wl_settings_global', 'Pyphen - Slovak syllable tokenizer'): 'pyphen_slk', - _tr('wl_settings_global', 'Pyphen - Slovene syllable tokenizer'): 'pyphen_slv', - _tr('wl_settings_global', 'Pyphen - Spanish syllable tokenizer'): 'pyphen_spa', - _tr('wl_settings_global', 'Pyphen - Swedish syllable tokenizer'): 'pyphen_swe', - _tr('wl_settings_global', 'Pyphen - Telugu syllable tokenizer'): 'pyphen_tel', - _tr('wl_settings_global', 'Pyphen - Thai syllable tokenizer'): 'pyphen_tha', - _tr('wl_settings_global', 'Pyphen - Ukrainian syllable tokenizer'): 'pyphen_ukr', - _tr('wl_settings_global', 'Pyphen - Zulu syllable tokenizer'): 'pyphen_zul', - - _tr('wl_settings_global', 'PyThaiNLP - Han-solo'): 'pythainlp_han_solo', - _tr('wl_settings_global', 'PyThaiNLP - Syllable dictionary'): 'pythainlp_syl_dict' - }, + _tr('wl_settings_global', 'Chinese (GB18030)'): 'gb18030', + _tr('wl_settings_global', 'Chinese (GBK)'): 'gbk', - 'pos_taggers': { - _tr('wl_settings_global', 'botok - Tibetan part-of-speech tagger'): 'botok_bod', - _tr('wl_settings_global', 'khmer-nltk - Khmer part-of-speech tagger'): 'khmer_nltk_khm', - - _tr('wl_settings_global', 'LaoNLP - SeqLabeling'): 'laonlp_seqlabeling', - _tr('wl_settings_global', 'LaoNLP - Yunshan Cup 2020'): 'laonlp_yunshan_cup_2020', - - _tr('wl_settings_global', 'NLTK - English perceptron part-of-speech tagger'): 'nltk_perceptron_eng', - _tr('wl_settings_global', 'NLTK - Russian perceptron part-of-speech tagger'): 'nltk_perceptron_rus', - - _tr('wl_settings_global', 'pymorphy3 - Morphological analyzer'): 'pymorphy3_morphological_analyzer', - 'python-mecab-ko - MeCab': 'python_mecab_ko_mecab', - - _tr('wl_settings_global', 'PyThaiNLP - Perceptron part-of-speech tagger (Blackboard)'): 'pythainlp_perceptron_blackboard', - _tr('wl_settings_global', 'PyThaiNLP - Perceptron part-of-speech tagger (ORCHID)'): 'pythainlp_perceptron_orchid', - _tr('wl_settings_global', 'PyThaiNLP - Perceptron part-of-speech tagger (PUD)'): 'pythainlp_perceptron_pud', - - _tr('wl_settings_global', 'spaCy - Catalan part-of-speech tagger'): 'spacy_cat', - _tr('wl_settings_global', 'spaCy - Chinese part-of-speech tagger'): 'spacy_zho', - _tr('wl_settings_global', 'spaCy - Croatian part-of-speech tagger'): 'spacy_hrv', - _tr('wl_settings_global', 'spaCy - Danish part-of-speech tagger'): 'spacy_dan', - _tr('wl_settings_global', 'spaCy - Dutch part-of-speech tagger'): 'spacy_nld', - _tr('wl_settings_global', 'spaCy - English part-of-speech tagger'): 'spacy_eng', - _tr('wl_settings_global', 'spaCy - Finnish part-of-speech tagger'): 'spacy_fin', - _tr('wl_settings_global', 'spaCy - French part-of-speech tagger'): 'spacy_fra', - _tr('wl_settings_global', 'spaCy - German part-of-speech tagger'): 'spacy_deu', - _tr('wl_settings_global', 'spaCy - Greek (Modern) part-of-speech tagger'): 'spacy_ell', - _tr('wl_settings_global', 'spaCy - Italian part-of-speech tagger'): 'spacy_ita', - _tr('wl_settings_global', 'spaCy - Japanese part-of-speech tagger'): 'spacy_jpn', - _tr('wl_settings_global', 'spaCy - Korean part-of-speech tagger'): 'spacy_kor', - _tr('wl_settings_global', 'spaCy - Lithuanian part-of-speech tagger'): 'spacy_lit', - _tr('wl_settings_global', 'spaCy - Macedonian part-of-speech tagger'): 'spacy_mkd', - _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) part-of-speech tagger'): 'spacy_nob', - _tr('wl_settings_global', 'spaCy - Polish part-of-speech tagger'): 'spacy_pol', - _tr('wl_settings_global', 'spaCy - Portuguese part-of-speech tagger'): 'spacy_por', - _tr('wl_settings_global', 'spaCy - Romanian part-of-speech tagger'): 'spacy_ron', - _tr('wl_settings_global', 'spaCy - Russian part-of-speech tagger'): 'spacy_rus', - _tr('wl_settings_global', 'spaCy - Slovene part-of-speech tagger'): 'spacy_slv', - _tr('wl_settings_global', 'spaCy - Spanish part-of-speech tagger'): 'spacy_spa', - _tr('wl_settings_global', 'spaCy - Swedish part-of-speech tagger'): 'spacy_swe', - _tr('wl_settings_global', 'spaCy - Ukrainian part-of-speech tagger'): 'spacy_ukr', - - _tr('wl_settings_global', 'Stanza - Afrikaans part-of-speech tagger'): 'stanza_afr', - _tr('wl_settings_global', 'Stanza - Arabic part-of-speech tagger'): 'stanza_ara', - _tr('wl_settings_global', 'Stanza - Armenian (Classical) part-of-speech tagger'): 'stanza_xcl', - _tr('wl_settings_global', 'Stanza - Armenian (Eastern) part-of-speech tagger'): 'stanza_hye', - _tr('wl_settings_global', 'Stanza - Armenian (Western) part-of-speech tagger'): 'stanza_hyw', - _tr('wl_settings_global', 'Stanza - Basque part-of-speech tagger'): 'stanza_eus', - _tr('wl_settings_global', 'Stanza - Belarusian part-of-speech tagger'): 'stanza_bel', - _tr('wl_settings_global', 'Stanza - Bulgarian part-of-speech tagger'): 'stanza_bul', - _tr('wl_settings_global', 'Stanza - Buryat (Russia) part-of-speech tagger'): 'stanza_bxr', - _tr('wl_settings_global', 'Stanza - Catalan part-of-speech tagger'): 'stanza_cat', - _tr('wl_settings_global', 'Stanza - Chinese (Classical) part-of-speech tagger'): 'stanza_lzh', - _tr('wl_settings_global', 'Stanza - Chinese (Simplified) part-of-speech tagger'): 'stanza_zho_cn', - _tr('wl_settings_global', 'Stanza - Chinese (Traditional) part-of-speech tagger'): 'stanza_zho_tw', - _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) part-of-speech tagger'): 'stanza_chu', - _tr('wl_settings_global', 'Stanza - Coptic part-of-speech tagger'): 'stanza_cop', - _tr('wl_settings_global', 'Stanza - Croatian part-of-speech tagger'): 'stanza_hrv', - _tr('wl_settings_global', 'Stanza - Czech part-of-speech tagger'): 'stanza_ces', - _tr('wl_settings_global', 'Stanza - Danish part-of-speech tagger'): 'stanza_dan', - _tr('wl_settings_global', 'Stanza - Dutch part-of-speech tagger'): 'stanza_nld', - _tr('wl_settings_global', 'Stanza - English part-of-speech tagger'): 'stanza_eng', - _tr('wl_settings_global', 'Stanza - English (Old) part-of-speech tagger'): 'stanza_ang', - _tr('wl_settings_global', 'Stanza - Erzya part-of-speech tagger'): 'stanza_myv', - _tr('wl_settings_global', 'Stanza - Estonian part-of-speech tagger'): 'stanza_est', - _tr('wl_settings_global', 'Stanza - Faroese part-of-speech tagger'): 'stanza_fao', - _tr('wl_settings_global', 'Stanza - Finnish part-of-speech tagger'): 'stanza_fin', - _tr('wl_settings_global', 'Stanza - French part-of-speech tagger'): 'stanza_fra', - _tr('wl_settings_global', 'Stanza - French (Old) part-of-speech tagger'): 'stanza_fro', - _tr('wl_settings_global', 'Stanza - Galician part-of-speech tagger'): 'stanza_glg', - _tr('wl_settings_global', 'Stanza - German part-of-speech tagger'): 'stanza_deu', - _tr('wl_settings_global', 'Stanza - Gothic part-of-speech tagger'): 'stanza_got', - _tr('wl_settings_global', 'Stanza - Greek (Ancient) part-of-speech tagger'): 'stanza_grc', - _tr('wl_settings_global', 'Stanza - Greek (Modern) part-of-speech tagger'): 'stanza_ell', - _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) part-of-speech tagger'): 'stanza_hbo', - _tr('wl_settings_global', 'Stanza - Hebrew (Modern) part-of-speech tagger'): 'stanza_heb', - _tr('wl_settings_global', 'Stanza - Hindi part-of-speech tagger'): 'stanza_hin', - _tr('wl_settings_global', 'Stanza - Hungarian part-of-speech tagger'): 'stanza_hun', - _tr('wl_settings_global', 'Stanza - Icelandic part-of-speech tagger'): 'stanza_isl', - _tr('wl_settings_global', 'Stanza - Indonesian part-of-speech tagger'): 'stanza_ind', - _tr('wl_settings_global', 'Stanza - Irish part-of-speech tagger'): 'stanza_gle', - _tr('wl_settings_global', 'Stanza - Italian part-of-speech tagger'): 'stanza_ita', - _tr('wl_settings_global', 'Stanza - Japanese part-of-speech tagger'): 'stanza_jpn', - _tr('wl_settings_global', 'Stanza - Kazakh part-of-speech tagger'): 'stanza_kaz', - _tr('wl_settings_global', 'Stanza - Korean part-of-speech tagger'): 'stanza_kor', - _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) part-of-speech tagger'): 'stanza_kmr', - _tr('wl_settings_global', 'Stanza - Kyrgyz part-of-speech tagger'): 'stanza_kir', - _tr('wl_settings_global', 'Stanza - Latin part-of-speech tagger'): 'stanza_lat', - _tr('wl_settings_global', 'Stanza - Latvian part-of-speech tagger'): 'stanza_lav', - _tr('wl_settings_global', 'Stanza - Ligurian part-of-speech tagger'): 'stanza_lij', - _tr('wl_settings_global', 'Stanza - Lithuanian part-of-speech tagger'): 'stanza_lit', - _tr('wl_settings_global', 'Stanza - Maltese part-of-speech tagger'): 'stanza_mlt', - _tr('wl_settings_global', 'Stanza - Manx part-of-speech tagger'): 'stanza_glv', - _tr('wl_settings_global', 'Stanza - Marathi part-of-speech tagger'): 'stanza_mar', - _tr('wl_settings_global', 'Stanza - Nigerian Pidgin part-of-speech tagger'): 'stanza_pcm', - _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) part-of-speech tagger'): 'stanza_nob', - _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) part-of-speech tagger'): 'stanza_nno', - _tr('wl_settings_global', 'Stanza - Persian part-of-speech tagger'): 'stanza_fas', - _tr('wl_settings_global', 'Stanza - Polish part-of-speech tagger'): 'stanza_pol', - _tr('wl_settings_global', 'Stanza - Pomak part-of-speech tagger'): 'stanza_qpm', - _tr('wl_settings_global', 'Stanza - Portuguese part-of-speech tagger'): 'stanza_por', - _tr('wl_settings_global', 'Stanza - Romanian part-of-speech tagger'): 'stanza_ron', - _tr('wl_settings_global', 'Stanza - Russian part-of-speech tagger'): 'stanza_rus', - _tr('wl_settings_global', 'Stanza - Russian (Old) part-of-speech tagger'): 'stanza_orv', - _tr('wl_settings_global', 'Stanza - Sámi (Northern) part-of-speech tagger'): 'stanza_sme', - _tr('wl_settings_global', 'Stanza - Sanskrit part-of-speech tagger'): 'stanza_san', - _tr('wl_settings_global', 'Stanza - Scottish Gaelic part-of-speech tagger'): 'stanza_gla', - _tr('wl_settings_global', 'Stanza - Serbian (Latin script) part-of-speech tagger'): 'stanza_srp_latn', - _tr('wl_settings_global', 'Stanza - Sindhi part-of-speech tagger'): 'stanza_snd', - _tr('wl_settings_global', 'Stanza - Slovak part-of-speech tagger'): 'stanza_slk', - _tr('wl_settings_global', 'Stanza - Slovene part-of-speech tagger'): 'stanza_slv', - _tr('wl_settings_global', 'Stanza - Sorbian (Upper) part-of-speech tagger'): 'stanza_hsb', - _tr('wl_settings_global', 'Stanza - Spanish part-of-speech tagger'): 'stanza_spa', - _tr('wl_settings_global', 'Stanza - Swedish part-of-speech tagger'): 'stanza_swe', - _tr('wl_settings_global', 'Stanza - Tamil part-of-speech tagger'): 'stanza_tam', - _tr('wl_settings_global', 'Stanza - Telugu part-of-speech tagger'): 'stanza_tel', - _tr('wl_settings_global', 'Stanza - Turkish part-of-speech tagger'): 'stanza_tur', - _tr('wl_settings_global', 'Stanza - Ukrainian part-of-speech tagger'): 'stanza_ukr', - _tr('wl_settings_global', 'Stanza - Urdu part-of-speech tagger'): 'stanza_urd', - _tr('wl_settings_global', 'Stanza - Uyghur part-of-speech tagger'): 'stanza_uig', - _tr('wl_settings_global', 'Stanza - Vietnamese part-of-speech tagger'): 'stanza_vie', - _tr('wl_settings_global', 'Stanza - Welsh part-of-speech tagger'): 'stanza_cym', - _tr('wl_settings_global', 'Stanza - Wolof part-of-speech tagger'): 'stanza_wol', - - _tr('wl_settings_global', 'SudachiPy - Japanese part-of-speech tagger'): 'sudachipy_jpn', - _tr('wl_settings_global', 'Underthesea - Vietnamese part-of-speech tagger'): 'underthesea_vie' - }, + _tr('wl_settings_global', 'Chinese (Simplified) (GB2312)'): 'gb2312', + _tr('wl_settings_global', 'Chinese (Simplified) (HZ)'): 'hz', + + _tr('wl_settings_global', 'Chinese (Traditional) (Big-5)'): 'big5', + _tr('wl_settings_global', 'Chinese (Traditional) (Big5-HKSCS)'): 'big5hkscs', + _tr('wl_settings_global', 'Chinese (Traditional) (CP950)'): 'cp950', + + _tr('wl_settings_global', 'Croatian (Mac OS Croatian)'): 'mac_croatian', - 'lemmatizers': { - _tr('wl_settings_global', 'botok - Tibetan lemmatizer'): 'botok_bod', - _tr('wl_settings_global', 'NLTK - WordNet lemmatizer'): 'nltk_wordnet', - _tr('wl_settings_global', 'pymorphy3 - Morphological analyzer'): 'pymorphy3_morphological_analyzer', - - _tr('wl_settings_global', 'simplemma - Albanian lemmatizer'): 'simplemma_sqi', - _tr('wl_settings_global', 'simplemma - Armenian lemmatizer'): 'simplemma_hye', - _tr('wl_settings_global', 'simplemma - Asturian lemmatizer'): 'simplemma_ast', - _tr('wl_settings_global', 'simplemma - Bulgarian lemmatizer'): 'simplemma_bul', - _tr('wl_settings_global', 'simplemma - Catalan lemmatizer'): 'simplemma_cat', - _tr('wl_settings_global', 'simplemma - Czech lemmatizer'): 'simplemma_ces', - _tr('wl_settings_global', 'simplemma - Danish lemmatizer'): 'simplemma_dan', - _tr('wl_settings_global', 'simplemma - Dutch lemmatizer'): 'simplemma_nld', - _tr('wl_settings_global', 'simplemma - English lemmatizer'): 'simplemma_eng', - _tr('wl_settings_global', 'simplemma - English (Middle) lemmatizer'): 'simplemma_enm', - _tr('wl_settings_global', 'simplemma - Estonian lemmatizer'): 'simplemma_est', - _tr('wl_settings_global', 'simplemma - Finnish lemmatizer'): 'simplemma_fin', - _tr('wl_settings_global', 'simplemma - French lemmatizer'): 'simplemma_fra', - _tr('wl_settings_global', 'simplemma - Galician lemmatizer'): 'simplemma_glg', - _tr('wl_settings_global', 'simplemma - Georgian lemmatizer'): 'simplemma_kat', - _tr('wl_settings_global', 'simplemma - German lemmatizer'): 'simplemma_deu', - _tr('wl_settings_global', 'simplemma - Greek (Modern) lemmatizer'): 'simplemma_ell', - _tr('wl_settings_global', 'simplemma - Hindi lemmatizer'): 'simplemma_hin', - _tr('wl_settings_global', 'simplemma - Hungarian lemmatizer'): 'simplemma_hun', - _tr('wl_settings_global', 'simplemma - Icelandic lemmatizer'): 'simplemma_isl', - _tr('wl_settings_global', 'simplemma - Indonesian lemmatizer'): 'simplemma_ind', - _tr('wl_settings_global', 'simplemma - Irish lemmatizer'): 'simplemma_gle', - _tr('wl_settings_global', 'simplemma - Italian lemmatizer'): 'simplemma_ita', - _tr('wl_settings_global', 'simplemma - Latin lemmatizer'): 'simplemma_lat', - _tr('wl_settings_global', 'simplemma - Latvian lemmatizer'): 'simplemma_lav', - _tr('wl_settings_global', 'simplemma - Lithuanian lemmatizer'): 'simplemma_lit', - _tr('wl_settings_global', 'simplemma - Luxembourgish lemmatizer'): 'simplemma_ltz', - _tr('wl_settings_global', 'simplemma - Macedonian lemmatizer'): 'simplemma_mkd', - _tr('wl_settings_global', 'simplemma - Malay lemmatizer'): 'simplemma_msa', - _tr('wl_settings_global', 'simplemma - Manx lemmatizer'): 'simplemma_glv', - _tr('wl_settings_global', 'simplemma - Norwegian (Bokmål) lemmatizer'): 'simplemma_nob', - _tr('wl_settings_global', 'simplemma - Norwegian (Nynorsk) lemmatizer'): 'simplemma_nno', - _tr('wl_settings_global', 'simplemma - Persian lemmatizer'): 'simplemma_fas', - _tr('wl_settings_global', 'simplemma - Polish lemmatizer'): 'simplemma_pol', - _tr('wl_settings_global', 'simplemma - Portuguese lemmatizer'): 'simplemma_por', - _tr('wl_settings_global', 'simplemma - Romanian lemmatizer'): 'simplemma_ron', - _tr('wl_settings_global', 'simplemma - Russian lemmatizer'): 'simplemma_rus', - _tr('wl_settings_global', 'simplemma - Sámi (Northern) lemmatizer'): 'simplemma_sme', - _tr('wl_settings_global', 'simplemma - Scottish Gaelic lemmatizer'): 'simplemma_gla', - _tr('wl_settings_global', 'simplemma - Serbo-Croatian lemmatizer'): 'simplemma_hbs', - _tr('wl_settings_global', 'simplemma - Slovak lemmatizer'): 'simplemma_slk', - _tr('wl_settings_global', 'simplemma - Slovene lemmatizer'): 'simplemma_slv', - _tr('wl_settings_global', 'simplemma - Spanish lemmatizer'): 'simplemma_spa', - _tr('wl_settings_global', 'simplemma - Swahili lemmatizer'): 'simplemma_swa', - _tr('wl_settings_global', 'simplemma - Swedish lemmatizer'): 'simplemma_swe', - _tr('wl_settings_global', 'simplemma - Tagalog lemmatizer'): 'simplemma_tgl', - _tr('wl_settings_global', 'simplemma - Turkish lemmatizer'): 'simplemma_tur', - _tr('wl_settings_global', 'simplemma - Ukrainian lemmatizer'): 'simplemma_ukr', - _tr('wl_settings_global', 'simplemma - Welsh lemmatizer'): 'simplemma_cym', - - _tr('wl_settings_global', 'spaCy - Bengali lemmatizer'): 'spacy_ben', - _tr('wl_settings_global', 'spaCy - Catalan lemmatizer'): 'spacy_cat', - _tr('wl_settings_global', 'spaCy - Croatian lemmatizer'): 'spacy_hrv', - _tr('wl_settings_global', 'spaCy - Czech lemmatizer'): 'spacy_ces', - _tr('wl_settings_global', 'spaCy - Danish lemmatizer'): 'spacy_dan', - _tr('wl_settings_global', 'spaCy - Dutch lemmatizer'): 'spacy_nld', - _tr('wl_settings_global', 'spaCy - English lemmatizer'): 'spacy_eng', - _tr('wl_settings_global', 'spaCy - Finnish lemmatizer'): 'spacy_fin', - _tr('wl_settings_global', 'spaCy - French lemmatizer'): 'spacy_fra', - _tr('wl_settings_global', 'spaCy - German lemmatizer'): 'spacy_deu', - _tr('wl_settings_global', 'spaCy - Greek (Ancient) lemmatizer'): 'spacy_grc', - _tr('wl_settings_global', 'spaCy - Greek (Modern) lemmatizer'): 'spacy_ell', - _tr('wl_settings_global', 'spaCy - Hungarian lemmatizer'): 'spacy_hun', - _tr('wl_settings_global', 'spaCy - Indonesian lemmatizer'): 'spacy_ind', - _tr('wl_settings_global', 'spaCy - Irish lemmatizer'): 'spacy_gle', - _tr('wl_settings_global', 'spaCy - Italian lemmatizer'): 'spacy_ita', - _tr('wl_settings_global', 'spaCy - Japanese lemmatizer'): 'spacy_jpn', - _tr('wl_settings_global', 'spaCy - Korean lemmatizer'): 'spacy_kor', - _tr('wl_settings_global', 'spaCy - Lithuanian lemmatizer'): 'spacy_lit', - _tr('wl_settings_global', 'spaCy - Luxembourgish lemmatizer'): 'spacy_ltz', - _tr('wl_settings_global', 'spaCy - Macedonian lemmatizer'): 'spacy_mkd', - _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) lemmatizer'): 'spacy_nob', - _tr('wl_settings_global', 'spaCy - Persian lemmatizer'): 'spacy_fas', - _tr('wl_settings_global', 'spaCy - Polish lemmatizer'): 'spacy_pol', - _tr('wl_settings_global', 'spaCy - Portuguese lemmatizer'): 'spacy_por', - _tr('wl_settings_global', 'spaCy - Romanian lemmatizer'): 'spacy_ron', - _tr('wl_settings_global', 'spaCy - Russian lemmatizer'): 'spacy_rus', - _tr('wl_settings_global', 'spaCy - Serbian (Cyrillic script) lemmatizer'): 'spacy_srp', - _tr('wl_settings_global', 'spaCy - Slovene lemmatizer'): 'spacy_slv', - _tr('wl_settings_global', 'spaCy - Spanish lemmatizer'): 'spacy_spa', - _tr('wl_settings_global', 'spaCy - Swedish lemmatizer'): 'spacy_swe', - _tr('wl_settings_global', 'spaCy - Tagalog lemmatizer'): 'spacy_tgl', - _tr('wl_settings_global', 'spaCy - Turkish lemmatizer'): 'spacy_tur', - _tr('wl_settings_global', 'spaCy - Ukrainian lemmatizer'): 'spacy_ukr', - _tr('wl_settings_global', 'spaCy - Urdu lemmatizer'): 'spacy_urd', - - _tr('wl_settings_global', 'Stanza - Afrikaans lemmatizer'): 'stanza_afr', - _tr('wl_settings_global', 'Stanza - Arabic lemmatizer'): 'stanza_ara', - _tr('wl_settings_global', 'Stanza - Armenian (Classical) lemmatizer'): 'stanza_xcl', - _tr('wl_settings_global', 'Stanza - Armenian (Eastern) lemmatizer'): 'stanza_hye', - _tr('wl_settings_global', 'Stanza - Armenian (Western) lemmatizer'): 'stanza_hyw', - _tr('wl_settings_global', 'Stanza - Basque lemmatizer'): 'stanza_eus', - _tr('wl_settings_global', 'Stanza - Belarusian lemmatizer'): 'stanza_bel', - _tr('wl_settings_global', 'Stanza - Bulgarian lemmatizer'): 'stanza_bul', - _tr('wl_settings_global', 'Stanza - Buryat (Russia) lemmatizer'): 'stanza_bxr', - _tr('wl_settings_global', 'Stanza - Catalan lemmatizer'): 'stanza_cat', - _tr('wl_settings_global', 'Stanza - Chinese (Classical) lemmatizer'): 'stanza_lzh', - _tr('wl_settings_global', 'Stanza - Chinese (Simplified) lemmatizer'): 'stanza_zho_cn', - _tr('wl_settings_global', 'Stanza - Chinese (Traditional) lemmatizer'): 'stanza_zho_tw', - _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) lemmatizer'): 'stanza_chu', - _tr('wl_settings_global', 'Stanza - Coptic lemmatizer'): 'stanza_cop', - _tr('wl_settings_global', 'Stanza - Croatian lemmatizer'): 'stanza_hrv', - _tr('wl_settings_global', 'Stanza - Czech lemmatizer'): 'stanza_ces', - _tr('wl_settings_global', 'Stanza - Danish lemmatizer'): 'stanza_dan', - _tr('wl_settings_global', 'Stanza - Dutch lemmatizer'): 'stanza_nld', - _tr('wl_settings_global', 'Stanza - English lemmatizer'): 'stanza_eng', - _tr('wl_settings_global', 'Stanza - English (Old) lemmatizer'): 'stanza_ang', - _tr('wl_settings_global', 'Stanza - Erzya lemmatizer'): 'stanza_myv', - _tr('wl_settings_global', 'Stanza - Estonian lemmatizer'): 'stanza_est', - _tr('wl_settings_global', 'Stanza - Finnish lemmatizer'): 'stanza_fin', - _tr('wl_settings_global', 'Stanza - French lemmatizer'): 'stanza_fra', - _tr('wl_settings_global', 'Stanza - French (Old) lemmatizer'): 'stanza_fro', - _tr('wl_settings_global', 'Stanza - Galician lemmatizer'): 'stanza_glg', - _tr('wl_settings_global', 'Stanza - German lemmatizer'): 'stanza_deu', - _tr('wl_settings_global', 'Stanza - Gothic lemmatizer'): 'stanza_got', - _tr('wl_settings_global', 'Stanza - Greek (Ancient) lemmatizer'): 'stanza_grc', - _tr('wl_settings_global', 'Stanza - Greek (Modern) lemmatizer'): 'stanza_ell', - _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) lemmatizer'): 'stanza_hbo', - _tr('wl_settings_global', 'Stanza - Hebrew (Modern) lemmatizer'): 'stanza_heb', - _tr('wl_settings_global', 'Stanza - Hindi lemmatizer'): 'stanza_hin', - _tr('wl_settings_global', 'Stanza - Hungarian lemmatizer'): 'stanza_hun', - _tr('wl_settings_global', 'Stanza - Icelandic lemmatizer'): 'stanza_isl', - _tr('wl_settings_global', 'Stanza - Indonesian lemmatizer'): 'stanza_ind', - _tr('wl_settings_global', 'Stanza - Irish lemmatizer'): 'stanza_gle', - _tr('wl_settings_global', 'Stanza - Italian lemmatizer'): 'stanza_ita', - _tr('wl_settings_global', 'Stanza - Japanese lemmatizer'): 'stanza_jpn', - _tr('wl_settings_global', 'Stanza - Kazakh lemmatizer'): 'stanza_kaz', - _tr('wl_settings_global', 'Stanza - Korean lemmatizer'): 'stanza_kor', - _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) lemmatizer'): 'stanza_kmr', - _tr('wl_settings_global', 'Stanza - Kyrgyz lemmatizer'): 'stanza_kir', - _tr('wl_settings_global', 'Stanza - Latin lemmatizer'): 'stanza_lat', - _tr('wl_settings_global', 'Stanza - Latvian lemmatizer'): 'stanza_lav', - _tr('wl_settings_global', 'Stanza - Ligurian lemmatizer'): 'stanza_lij', - _tr('wl_settings_global', 'Stanza - Lithuanian lemmatizer'): 'stanza_lit', - _tr('wl_settings_global', 'Stanza - Manx lemmatizer'): 'stanza_glv', - _tr('wl_settings_global', 'Stanza - Marathi lemmatizer'): 'stanza_mar', - _tr('wl_settings_global', 'Stanza - Nigerian Pidgin lemmatizer'): 'stanza_pcm', - _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) lemmatizer'): 'stanza_nob', - _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) lemmatizer'): 'stanza_nno', - _tr('wl_settings_global', 'Stanza - Persian lemmatizer'): 'stanza_fas', - _tr('wl_settings_global', 'Stanza - Polish lemmatizer'): 'stanza_pol', - _tr('wl_settings_global', 'Stanza - Pomak lemmatizer'): 'stanza_qpm', - _tr('wl_settings_global', 'Stanza - Portuguese lemmatizer'): 'stanza_por', - _tr('wl_settings_global', 'Stanza - Romanian lemmatizer'): 'stanza_ron', - _tr('wl_settings_global', 'Stanza - Russian lemmatizer'): 'stanza_rus', - _tr('wl_settings_global', 'Stanza - Russian (Old) lemmatizer'): 'stanza_orv', - _tr('wl_settings_global', 'Stanza - Sámi (Northern) lemmatizer'): 'stanza_sme', - _tr('wl_settings_global', 'Stanza - Sanskrit lemmatizer'): 'stanza_san', - _tr('wl_settings_global', 'Stanza - Scottish Gaelic lemmatizer'): 'stanza_gla', - _tr('wl_settings_global', 'Stanza - Serbian (Latin script) lemmatizer'): 'stanza_srp_latn', - _tr('wl_settings_global', 'Stanza - Slovak lemmatizer'): 'stanza_slk', - _tr('wl_settings_global', 'Stanza - Slovene lemmatizer'): 'stanza_slv', - _tr('wl_settings_global', 'Stanza - Sorbian (Upper) lemmatizer'): 'stanza_hsb', - _tr('wl_settings_global', 'Stanza - Spanish lemmatizer'): 'stanza_spa', - _tr('wl_settings_global', 'Stanza - Swedish lemmatizer'): 'stanza_swe', - _tr('wl_settings_global', 'Stanza - Tamil lemmatizer'): 'stanza_tam', - _tr('wl_settings_global', 'Stanza - Turkish lemmatizer'): 'stanza_tur', - _tr('wl_settings_global', 'Stanza - Ukrainian lemmatizer'): 'stanza_ukr', - _tr('wl_settings_global', 'Stanza - Urdu lemmatizer'): 'stanza_urd', - _tr('wl_settings_global', 'Stanza - Uyghur lemmatizer'): 'stanza_uig', - _tr('wl_settings_global', 'Stanza - Welsh lemmatizer'): 'stanza_cym', - _tr('wl_settings_global', 'Stanza - Wolof lemmatizer'): 'stanza_wol', - - _tr('wl_settings_global', 'SudachiPy - Japanese lemmatizer'): 'sudachipy_jpn' - }, + _tr('wl_settings_global', 'Cyrillic (CP855)'): 'cp855', + _tr('wl_settings_global', 'Cyrillic (CP866)'): 'cp866', + _tr('wl_settings_global', 'Cyrillic (ISO-8859-5)'): 'iso8859_5', + _tr('wl_settings_global', 'Cyrillic (Mac OS Cyrillic)'): 'mac_cyrillic', + _tr('wl_settings_global', 'Cyrillic (Windows-1251)'): 'cp1251', - 'stop_word_lists': { - _tr('wl_settings_global', 'LaoNLP - Lao stop word list'): 'laonlp_lao', - - _tr('wl_settings_global', 'NLTK - Arabic stop word list'): 'nltk_ara', - _tr('wl_settings_global', 'NLTK - Azerbaijani stop word list'): 'nltk_aze', - _tr('wl_settings_global', 'NLTK - Basque stop word list'): 'nltk_eus', - _tr('wl_settings_global', 'NLTK - Bengali stop word list'): 'nltk_ben', - _tr('wl_settings_global', 'NLTK - Catalan stop word list'): 'nltk_cat', - _tr('wl_settings_global', 'NLTK - Chinese (Simplified) stop word list'): 'nltk_zho_cn', - _tr('wl_settings_global', 'NLTK - Chinese (Traditional) stop word list'): 'nltk_zho_tw', - _tr('wl_settings_global', 'NLTK - Danish stop word list'): 'nltk_dan', - _tr('wl_settings_global', 'NLTK - Dutch stop word list'): 'nltk_nld', - _tr('wl_settings_global', 'NLTK - English stop word list'): 'nltk_eng', - _tr('wl_settings_global', 'NLTK - Finnish stop word list'): 'nltk_fin', - _tr('wl_settings_global', 'NLTK - French stop word list'): 'nltk_fra', - _tr('wl_settings_global', 'NLTK - German stop word list'): 'nltk_deu', - _tr('wl_settings_global', 'NLTK - Greek (Modern) stop word list'): 'nltk_ell', - _tr('wl_settings_global', 'NLTK - Hebrew (Modern) stop word list'): 'nltk_heb', - _tr('wl_settings_global', 'NLTK - Hungarian stop word list'): 'nltk_hun', - _tr('wl_settings_global', 'NLTK - Indonesian stop word list'): 'nltk_ind', - _tr('wl_settings_global', 'NLTK - Italian stop word list'): 'nltk_ita', - _tr('wl_settings_global', 'NLTK - Kazakh stop word list'): 'nltk_kaz', - _tr('wl_settings_global', 'NLTK - Nepali stop word list'): 'nltk_nep', - _tr('wl_settings_global', 'NLTK - Norwegian (Bokmål) stop word list'): 'nltk_nob', - _tr('wl_settings_global', 'NLTK - Portuguese stop word list'): 'nltk_por', - _tr('wl_settings_global', 'NLTK - Romanian stop word list'): 'nltk_ron', - _tr('wl_settings_global', 'NLTK - Russian stop word list'): 'nltk_rus', - _tr('wl_settings_global', 'NLTK - Slovene stop word list'): 'nltk_slv', - _tr('wl_settings_global', 'NLTK - Spanish stop word list'): 'nltk_spa', - _tr('wl_settings_global', 'NLTK - Swedish stop word list'): 'nltk_swe', - _tr('wl_settings_global', 'NLTK - Tajik stop word list'): 'nltk_tgk', - _tr('wl_settings_global', 'NLTK - Turkish stop word list'): 'nltk_tur', - - _tr('wl_settings_global', 'PyThaiNLP - Thai stop word list'): 'pythainlp_tha', - - _tr('wl_settings_global', 'Custom stop word list'): 'custom', - }, + _tr('wl_settings_global', 'English (ASCII)'): 'ascii', + _tr('wl_settings_global', 'English (EBCDIC 037)'): 'cp037', + _tr('wl_settings_global', 'English (CP437)'): 'cp437', - 'dependency_parsers':{ - _tr('wl_settings_global', 'spaCy - Catalan dependency parser'): 'spacy_cat', - _tr('wl_settings_global', 'spaCy - Chinese dependency parser'): 'spacy_zho', - _tr('wl_settings_global', 'spaCy - Croatian dependency parser'): 'spacy_hrv', - _tr('wl_settings_global', 'spaCy - Danish dependency parser'): 'spacy_dan', - _tr('wl_settings_global', 'spaCy - Dutch dependency parser'): 'spacy_nld', - _tr('wl_settings_global', 'spaCy - English dependency parser'): 'spacy_eng', - _tr('wl_settings_global', 'spaCy - Finnish dependency parser'): 'spacy_fin', - _tr('wl_settings_global', 'spaCy - French dependency parser'): 'spacy_fra', - _tr('wl_settings_global', 'spaCy - German dependency parser'): 'spacy_deu', - _tr('wl_settings_global', 'spaCy - Greek (Modern) dependency parser'): 'spacy_ell', - _tr('wl_settings_global', 'spaCy - Italian dependency parser'): 'spacy_ita', - _tr('wl_settings_global', 'spaCy - Japanese dependency parser'): 'spacy_jpn', - _tr('wl_settings_global', 'spaCy - Korean dependency parser'): 'spacy_kor', - _tr('wl_settings_global', 'spaCy - Lithuanian dependency parser'): 'spacy_lit', - _tr('wl_settings_global', 'spaCy - Macedonian dependency parser'): 'spacy_mkd', - _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) dependency parser'): 'spacy_nob', - _tr('wl_settings_global', 'spaCy - Polish dependency parser'): 'spacy_pol', - _tr('wl_settings_global', 'spaCy - Portuguese dependency parser'): 'spacy_por', - _tr('wl_settings_global', 'spaCy - Romanian dependency parser'): 'spacy_ron', - _tr('wl_settings_global', 'spaCy - Russian dependency parser'): 'spacy_rus', - _tr('wl_settings_global', 'spaCy - Slovene dependency parser'): 'spacy_slv', - _tr('wl_settings_global', 'spaCy - Spanish dependency parser'): 'spacy_spa', - _tr('wl_settings_global', 'spaCy - Swedish dependency parser'): 'spacy_swe', - _tr('wl_settings_global', 'spaCy - Ukrainian dependency parser'): 'spacy_ukr', - - _tr('wl_settings_global', 'Stanza - Afrikaans dependency parser'): 'stanza_afr', - _tr('wl_settings_global', 'Stanza - Arabic dependency parser'): 'stanza_ara', - _tr('wl_settings_global', 'Stanza - Armenian (Classical) dependency parser'): 'stanza_xcl', - _tr('wl_settings_global', 'Stanza - Armenian (Eastern) dependency parser'): 'stanza_hye', - _tr('wl_settings_global', 'Stanza - Armenian (Western) dependency parser'): 'stanza_hyw', - _tr('wl_settings_global', 'Stanza - Basque dependency parser'): 'stanza_eus', - _tr('wl_settings_global', 'Stanza - Belarusian dependency parser'): 'stanza_bel', - _tr('wl_settings_global', 'Stanza - Bulgarian dependency parser'): 'stanza_bul', - _tr('wl_settings_global', 'Stanza - Buryat (Russia) dependency parser'): 'stanza_bxr', - _tr('wl_settings_global', 'Stanza - Catalan dependency parser'): 'stanza_cat', - _tr('wl_settings_global', 'Stanza - Chinese (Classical) dependency parser'): 'stanza_lzh', - _tr('wl_settings_global', 'Stanza - Chinese (Simplified) dependency parser'): 'stanza_zho_cn', - _tr('wl_settings_global', 'Stanza - Chinese (Traditional) dependency parser'): 'stanza_zho_tw', - _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) dependency parser'): 'stanza_chu', - _tr('wl_settings_global', 'Stanza - Coptic dependency parser'): 'stanza_cop', - _tr('wl_settings_global', 'Stanza - Croatian dependency parser'): 'stanza_hrv', - _tr('wl_settings_global', 'Stanza - Czech dependency parser'): 'stanza_ces', - _tr('wl_settings_global', 'Stanza - Danish dependency parser'): 'stanza_dan', - _tr('wl_settings_global', 'Stanza - Dutch dependency parser'): 'stanza_nld', - _tr('wl_settings_global', 'Stanza - English dependency parser'): 'stanza_eng', - _tr('wl_settings_global', 'Stanza - English (Old) dependency parser'): 'stanza_ang', - _tr('wl_settings_global', 'Stanza - Erzya dependency parser'): 'stanza_myv', - _tr('wl_settings_global', 'Stanza - Estonian dependency parser'): 'stanza_est', - _tr('wl_settings_global', 'Stanza - Faroese dependency parser'): 'stanza_fao', - _tr('wl_settings_global', 'Stanza - Finnish dependency parser'): 'stanza_fin', - _tr('wl_settings_global', 'Stanza - French dependency parser'): 'stanza_fra', - _tr('wl_settings_global', 'Stanza - French (Old) dependency parser'): 'stanza_fro', - _tr('wl_settings_global', 'Stanza - Galician dependency parser'): 'stanza_glg', - _tr('wl_settings_global', 'Stanza - German dependency parser'): 'stanza_deu', - _tr('wl_settings_global', 'Stanza - Gothic dependency parser'): 'stanza_got', - _tr('wl_settings_global', 'Stanza - Greek (Ancient) dependency parser'): 'stanza_grc', - _tr('wl_settings_global', 'Stanza - Greek (Modern) dependency parser'): 'stanza_ell', - _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) dependency parser'): 'stanza_hbo', - _tr('wl_settings_global', 'Stanza - Hebrew (Modern) dependency parser'): 'stanza_heb', - _tr('wl_settings_global', 'Stanza - Hindi dependency parser'): 'stanza_hin', - _tr('wl_settings_global', 'Stanza - Hungarian dependency parser'): 'stanza_hun', - _tr('wl_settings_global', 'Stanza - Icelandic dependency parser'): 'stanza_isl', - _tr('wl_settings_global', 'Stanza - Indonesian dependency parser'): 'stanza_ind', - _tr('wl_settings_global', 'Stanza - Irish dependency parser'): 'stanza_gle', - _tr('wl_settings_global', 'Stanza - Italian dependency parser'): 'stanza_ita', - _tr('wl_settings_global', 'Stanza - Japanese dependency parser'): 'stanza_jpn', - _tr('wl_settings_global', 'Stanza - Kazakh dependency parser'): 'stanza_kaz', - _tr('wl_settings_global', 'Stanza - Korean dependency parser'): 'stanza_kor', - _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) dependency parser'): 'stanza_kmr', - _tr('wl_settings_global', 'Stanza - Kyrgyz dependency parser'): 'stanza_kir', - _tr('wl_settings_global', 'Stanza - Latin dependency parser'): 'stanza_lat', - _tr('wl_settings_global', 'Stanza - Latvian dependency parser'): 'stanza_lav', - _tr('wl_settings_global', 'Stanza - Ligurian dependency parser'): 'stanza_lij', - _tr('wl_settings_global', 'Stanza - Lithuanian dependency parser'): 'stanza_lit', - _tr('wl_settings_global', 'Stanza - Maltese dependency parser'): 'stanza_mlt', - _tr('wl_settings_global', 'Stanza - Manx dependency parser'): 'stanza_glv', - _tr('wl_settings_global', 'Stanza - Marathi dependency parser'): 'stanza_mar', - _tr('wl_settings_global', 'Stanza - Nigerian Pidgin dependency parser'): 'stanza_pcm', - _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) dependency parser'): 'stanza_nob', - _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) dependency parser'): 'stanza_nno', - _tr('wl_settings_global', 'Stanza - Persian dependency parser'): 'stanza_fas', - _tr('wl_settings_global', 'Stanza - Polish dependency parser'): 'stanza_pol', - _tr('wl_settings_global', 'Stanza - Pomak dependency parser'): 'stanza_qpm', - _tr('wl_settings_global', 'Stanza - Portuguese dependency parser'): 'stanza_por', - _tr('wl_settings_global', 'Stanza - Romanian dependency parser'): 'stanza_ron', - _tr('wl_settings_global', 'Stanza - Russian dependency parser'): 'stanza_rus', - _tr('wl_settings_global', 'Stanza - Russian (Old) dependency parser'): 'stanza_orv', - _tr('wl_settings_global', 'Stanza - Sámi (Northern) dependency parser'): 'stanza_sme', - _tr('wl_settings_global', 'Stanza - Sanskrit dependency parser'): 'stanza_san', - _tr('wl_settings_global', 'Stanza - Scottish Gaelic dependency parser'): 'stanza_gla', - _tr('wl_settings_global', 'Stanza - Serbian (Latin script) dependency parser'): 'stanza_srp_latn', - _tr('wl_settings_global', 'Stanza - Slovak dependency parser'): 'stanza_slk', - _tr('wl_settings_global', 'Stanza - Slovene dependency parser'): 'stanza_slv', - _tr('wl_settings_global', 'Stanza - Sorbian (Upper) dependency parser'): 'stanza_hsb', - _tr('wl_settings_global', 'Stanza - Spanish dependency parser'): 'stanza_spa', - _tr('wl_settings_global', 'Stanza - Swedish dependency parser'): 'stanza_swe', - _tr('wl_settings_global', 'Stanza - Tamil dependency parser'): 'stanza_tam', - _tr('wl_settings_global', 'Stanza - Telugu dependency parser'): 'stanza_tel', - _tr('wl_settings_global', 'Stanza - Turkish dependency parser'): 'stanza_tur', - _tr('wl_settings_global', 'Stanza - Ukrainian dependency parser'): 'stanza_ukr', - _tr('wl_settings_global', 'Stanza - Urdu dependency parser'): 'stanza_urd', - _tr('wl_settings_global', 'Stanza - Uyghur dependency parser'): 'stanza_uig', - _tr('wl_settings_global', 'Stanza - Vietnamese dependency parser'): 'stanza_vie', - _tr('wl_settings_global', 'Stanza - Welsh dependency parser'): 'stanza_cym', - _tr('wl_settings_global', 'Stanza - Wolof dependency parser'): 'stanza_wol' - }, + _tr('wl_settings_global', 'European (HP Roman-8)'): 'hp_roman8', - 'sentiment_analyzers': { - _tr('wl_settings_global', 'Stanza - Chinese (Simplified) sentiment analyzer'): 'stanza_zho_cn', - _tr('wl_settings_global', 'Stanza - German sentiment analyzer'): 'stanza_deu', - _tr('wl_settings_global', 'Stanza - English sentiment analyzer'): 'stanza_eng', - _tr('wl_settings_global', 'Stanza - Marathi sentiment analyzer'): 'stanza_mar', - _tr('wl_settings_global', 'Stanza - Spanish sentiment analyzer'): 'stanza_spa', - _tr('wl_settings_global', 'Stanza - Vietnamese sentiment analyzer'): 'stanza_vie', - - _tr('wl_settings_global', 'Underthesea - Vietnamese sentiment analyzer'): 'underthesea_vie', - - _tr('wl_settings_global', 'VADER - Afrikaans sentiment analyzer'): 'vader_afr', - _tr('wl_settings_global', 'VADER - Albanian sentiment analyzer'): 'vader_sqi', - _tr('wl_settings_global', 'VADER - Amharic sentiment analyzer'): 'vader_amh', - _tr('wl_settings_global', 'VADER - Arabic sentiment analyzer'): 'vader_ara', - _tr('wl_settings_global', 'VADER - Armenian sentiment analyzer'): 'vader_hye', - _tr('wl_settings_global', 'VADER - Assamese sentiment analyzer'): 'vader_asm', - _tr('wl_settings_global', 'VADER - Azerbaijani sentiment analyzer'): 'vader_aze', - _tr('wl_settings_global', 'VADER - Basque sentiment analyzer'): 'vader_eus', - _tr('wl_settings_global', 'VADER - Belarusian sentiment analyzer'): 'vader_bel', - _tr('wl_settings_global', 'VADER - Bengali sentiment analyzer'): 'vader_ben', - _tr('wl_settings_global', 'VADER - Bulgarian sentiment analyzer'): 'vader_bul', - _tr('wl_settings_global', 'VADER - Burmese sentiment analyzer'): 'vader_mya', - _tr('wl_settings_global', 'VADER - Catalan sentiment analyzer'): 'vader_cat', - _tr('wl_settings_global', 'VADER - Chinese (Simplified) sentiment analyzer'): 'vader_zho_cn', - _tr('wl_settings_global', 'VADER - Chinese (Traditional) sentiment analyzer'): 'vader_zho_tw', - _tr('wl_settings_global', 'VADER - Croatian sentiment analyzer'): 'vader_hrv', - _tr('wl_settings_global', 'VADER - Czech sentiment analyzer'): 'vader_ces', - _tr('wl_settings_global', 'VADER - Danish sentiment analyzer'): 'vader_dan', - _tr('wl_settings_global', 'VADER - Dutch sentiment analyzer'): 'vader_nld', - _tr('wl_settings_global', 'VADER - English sentiment analyzer'): 'vader_eng', - _tr('wl_settings_global', 'VADER - Esperanto sentiment analyzer'): 'vader_epo', - _tr('wl_settings_global', 'VADER - Estonian sentiment analyzer'): 'vader_est', - _tr('wl_settings_global', 'VADER - Finnish sentiment analyzer'): 'vader_fin', - _tr('wl_settings_global', 'VADER - French sentiment analyzer'): 'vader_fra', - _tr('wl_settings_global', 'VADER - Galician sentiment analyzer'): 'vader_glg', - _tr('wl_settings_global', 'VADER - Georgian sentiment analyzer'): 'vader_kat', - _tr('wl_settings_global', 'VADER - German sentiment analyzer'): 'vader_deu', - _tr('wl_settings_global', 'VADER - Greek (Modern) sentiment analyzer'): 'vader_ell', - _tr('wl_settings_global', 'VADER - Gujarati sentiment analyzer'): 'vader_guj', - _tr('wl_settings_global', 'VADER - Hebrew (Modern) sentiment analyzer'): 'vader_heb', - _tr('wl_settings_global', 'VADER - Hindi sentiment analyzer'): 'vader_hin', - _tr('wl_settings_global', 'VADER - Hungarian sentiment analyzer'): 'vader_hun', - _tr('wl_settings_global', 'VADER - Icelandic sentiment analyzer'): 'vader_isl', - _tr('wl_settings_global', 'VADER - Indonesian sentiment analyzer'): 'vader_ind', - _tr('wl_settings_global', 'VADER - Irish sentiment analyzer'): 'vader_gle', - _tr('wl_settings_global', 'VADER - Italian sentiment analyzer'): 'vader_ita', - _tr('wl_settings_global', 'VADER - Japanese sentiment analyzer'): 'vader_jpn', - _tr('wl_settings_global', 'VADER - Kannada sentiment analyzer'): 'vader_kan', - _tr('wl_settings_global', 'VADER - Kazakh sentiment analyzer'): 'vader_kaz', - _tr('wl_settings_global', 'VADER - Khmer sentiment analyzer'): 'vader_khm', - _tr('wl_settings_global', 'VADER - Korean sentiment analyzer'): 'vader_kor', - _tr('wl_settings_global', 'VADER - Kurdish (Kurmanji) sentiment analyzer'): 'vader_kmr', - _tr('wl_settings_global', 'VADER - Kyrgyz sentiment analyzer'): 'vader_kir', - _tr('wl_settings_global', 'VADER - Laos sentiment analyzer'): 'vader_lao', - _tr('wl_settings_global', 'VADER - Latin sentiment analyzer'): 'vader_lat', - _tr('wl_settings_global', 'VADER - Latvian sentiment analyzer'): 'vader_lav', - _tr('wl_settings_global', 'VADER - Lithuanian sentiment analyzer'): 'vader_lit', - _tr('wl_settings_global', 'VADER - Luganda sentiment analyzer'): 'vader_lug', - _tr('wl_settings_global', 'VADER - Luxembourgish sentiment analyzer'): 'vader_ltz', - _tr('wl_settings_global', 'VADER - Macedonian sentiment analyzer'): 'vader_mkd', - _tr('wl_settings_global', 'VADER - Malay sentiment analyzer'): 'vader_msa', - _tr('wl_settings_global', 'VADER - Malayalam sentiment analyzer'): 'vader_mal', - _tr('wl_settings_global', 'VADER - Maltese sentiment analyzer'): 'vader_mlt', - _tr('wl_settings_global', 'VADER - Marathi sentiment analyzer'): 'vader_mar', - _tr('wl_settings_global', 'VADER - Meitei (Meitei script) sentiment analyzer'): 'vader_mni_mtei', - _tr('wl_settings_global', 'VADER - Mongolian sentiment analyzer'): 'vader_mon', - _tr('wl_settings_global', 'VADER - Nepali sentiment analyzer'): 'vader_nep', - # References: - # https://support.google.com/translate/thread/1818911/norwegin-translate-is-in-bokm%C3%A5l-or-nynorsk?hl=en - # https://www.quora.com/How-does-Google-Translate-do-with-Norwegian-language-as-Norwegian-has-two-official-forms - _tr('wl_settings_global', 'VADER - Norwegian (Bokmål) sentiment analyzer'): 'vader_nob', - _tr('wl_settings_global', 'VADER - Odia sentiment analyzer'): 'vader_ori', - _tr('wl_settings_global', 'VADER - Persian sentiment analyzer'): 'vader_fas', - _tr('wl_settings_global', 'VADER - Polish sentiment analyzer'): 'vader_pol', - _tr('wl_settings_global', 'VADER - Portuguese sentiment analyzer'): 'vader_por', - _tr('wl_settings_global', 'VADER - Punjabi (Gurmukhi script) sentiment analyzer'): 'vader_pan_guru', - _tr('wl_settings_global', 'VADER - Romanian sentiment analyzer'): 'vader_ron', - _tr('wl_settings_global', 'VADER - Russian sentiment analyzer'): 'vader_rus', - _tr('wl_settings_global', 'VADER - Sanskrit sentiment analyzer'): 'vader_san', - _tr('wl_settings_global', 'VADER - Scottish Gaelic sentiment analyzer'): 'vader_gla', - _tr('wl_settings_global', 'VADER - Serbian (Cyrillic script) sentiment analyzer'): 'vader_srp_cyrl', - _tr('wl_settings_global', 'VADER - Sindhi sentiment analyzer'): 'vader_snd', - _tr('wl_settings_global', 'VADER - Sinhala sentiment analyzer'): 'vader_sin', - _tr('wl_settings_global', 'VADER - Slovak sentiment analyzer'): 'vader_slk', - _tr('wl_settings_global', 'VADER - Slovene sentiment analyzer'): 'vader_slv', - _tr('wl_settings_global', 'VADER - Spanish sentiment analyzer'): 'vader_spa', - _tr('wl_settings_global', 'VADER - Swahili sentiment analyzer'): 'vader_swa', - _tr('wl_settings_global', 'VADER - Swedish sentiment analyzer'): 'vader_swe', - _tr('wl_settings_global', 'VADER - Tagalog sentiment analyzer'): 'vader_tgl', - _tr('wl_settings_global', 'VADER - Tajik sentiment analyzer'): 'vader_tgk', - _tr('wl_settings_global', 'VADER - Tamil sentiment analyzer'): 'vader_tam', - _tr('wl_settings_global', 'VADER - Tatar sentiment analyzer'): 'vader_tat', - _tr('wl_settings_global', 'VADER - Telugu sentiment analyzer'): 'vader_tel', - _tr('wl_settings_global', 'VADER - Thai sentiment analyzer'): 'vader_tha', - _tr('wl_settings_global', 'VADER - Tigrinya sentiment analyzer'): 'vader_tir', - _tr('wl_settings_global', 'VADER - Turkish sentiment analyzer'): 'vader_tur', - _tr('wl_settings_global', 'VADER - Ukrainian sentiment analyzer'): 'vader_ukr', - _tr('wl_settings_global', 'VADER - Urdu sentiment analyzer'): 'vader_urd', - _tr('wl_settings_global', 'VADER - Uyghur sentiment analyzer'): 'vader_uig', - _tr('wl_settings_global', 'VADER - Welsh sentiment analyzer'): 'vader_cym', - _tr('wl_settings_global', 'VADER - Yoruba sentiment analyzer'): 'vader_yor', - _tr('wl_settings_global', 'VADER - Zulu sentiment analyzer'): 'vader_zul' - } - }, - - 'sentence_tokenizers': { - 'afr': [ - 'spacy_sentencizer', - 'stanza_afr' - ], - - 'ara': [ - 'spacy_sentencizer', - 'stanza_ara' - ], - - 'xcl': [ - 'spacy_sentencizer', - 'stanza_xcl' - ], - 'hye': [ - 'spacy_sentencizer', - 'stanza_hye' - ], - 'hyw': [ - 'spacy_sentencizer', - 'stanza_hyw' - ], - - 'eus': [ - 'spacy_sentencizer', - 'stanza_eus' - ], - - 'bel': [ - 'spacy_sentencizer', - 'stanza_bel' - ], - - 'bul': [ - 'spacy_sentencizer', - 'stanza_bul' - ], - - 'mya': [ - 'spacy_sentencizer', - 'stanza_mya' - ], - - 'bxr': [ - 'spacy_sentencizer', - 'stanza_bxr' - ], - - 'cat': [ - 'spacy_dependency_parser_cat', - 'spacy_sentencizer', - 'stanza_cat' - ], - - 'lzh': [ - 'spacy_sentencizer', - 'stanza_lzh' - ], - 'zho_cn': [ - 'spacy_dependency_parser_zho', - 'spacy_sentencizer', - 'stanza_zho_cn' - ], - 'zho_tw': [ - 'spacy_dependency_parser_zho', - 'spacy_sentencizer', - 'stanza_zho_tw' - ], - - 'chu': [ - 'spacy_sentencizer', - 'stanza_chu' - ], - - 'cop': [ - 'spacy_sentencizer', - 'stanza_cop' - ], - - 'hrv': [ - 'spacy_dependency_parser_hrv', - 'spacy_sentence_recognizer_hrv', - 'spacy_sentencizer', - 'stanza_hrv' - ], - - 'ces': [ - 'nltk_punkt_ces', - 'spacy_sentencizer', - 'stanza_ces' - ], - - 'dan': [ - 'nltk_punkt_dan', - 'spacy_dependency_parser_dan', - 'spacy_sentencizer', - 'stanza_dan' - ], - - 'nld': [ - 'nltk_punkt_nld', - 'spacy_dependency_parser_nld', - 'spacy_sentence_recognizer_nld', - 'spacy_sentencizer', - 'stanza_nld' - ], - - 'ang': [ - 'spacy_sentencizer', - 'stanza_ang' - ], - 'eng_gb': [ - 'nltk_punkt_eng', - 'spacy_dependency_parser_eng', - 'spacy_sentencizer', - 'stanza_eng' - ], - 'eng_us': [ - 'nltk_punkt_eng', - 'spacy_dependency_parser_eng', - 'spacy_sentencizer', - 'stanza_eng' - ], - - 'myv': [ - 'spacy_sentencizer', - 'stanza_myv' - ], - - 'est': [ - 'nltk_punkt_est', - 'spacy_sentencizer', - 'stanza_est' - ], - - 'fao': [ - 'spacy_sentencizer', - 'stanza_fao' - ], - - 'fin': [ - 'nltk_punkt_fin', - 'spacy_dependency_parser_fin', - 'spacy_sentence_recognizer_fin', - 'spacy_sentencizer', - 'stanza_fin' - ], - - 'fra': [ - 'nltk_punkt_fra', - 'spacy_dependency_parser_fra', - 'spacy_sentencizer', - 'stanza_fra' - ], - 'fro': [ - 'spacy_sentencizer', - 'stanza_fro' - ], - - 'glg': [ - 'spacy_sentencizer', - 'stanza_glg' - ], - - 'deu_at': [ - 'nltk_punkt_deu', - 'spacy_dependency_parser_deu', - 'spacy_sentencizer', - 'stanza_deu' - ], - 'deu_de': [ - 'nltk_punkt_deu', - 'spacy_dependency_parser_deu', - 'spacy_sentencizer', - 'stanza_deu' - ], - 'deu_ch': [ - 'nltk_punkt_deu', - 'spacy_dependency_parser_deu', - 'spacy_sentencizer', - 'stanza_deu' - ], - - 'got': [ - 'spacy_sentencizer', - 'stanza_got' - ], - - 'grc': [ - 'spacy_sentencizer', - 'stanza_grc' - ], - 'ell': [ - 'nltk_punkt_ell', - 'spacy_dependency_parser_ell', - 'spacy_sentence_recognizer_ell', - 'spacy_sentencizer', - 'stanza_ell' - ], - - 'hbo': [ - 'spacy_sentencizer', - 'stanza_hbo' - ], - 'heb': [ - 'spacy_sentencizer', - 'stanza_heb' - ], - - 'hin': [ - 'spacy_sentencizer', - 'stanza_hin' - ], - - 'hun': [ - 'spacy_sentencizer', - 'stanza_hun' - ], - - 'isl': [ - 'spacy_sentencizer', - 'stanza_isl' - ], - - 'ind': [ - 'spacy_sentencizer', - 'stanza_ind' - ], - - 'gle': [ - 'spacy_sentencizer', - 'stanza_gle' - ], - - 'ita': [ - 'nltk_punkt_ita', - 'spacy_dependency_parser_ita', - 'spacy_sentence_recognizer_ita', - 'spacy_sentencizer', - 'stanza_ita' - ], - - 'jpn': [ - 'spacy_dependency_parser_jpn', - 'spacy_sentencizer', - 'stanza_jpn' - ], - - 'khm': ['khmer_nltk_khm'], - - 'kaz': [ - 'spacy_sentencizer', - 'stanza_kaz' - ], - - 'kor': [ - 'spacy_dependency_parser_kor', - 'spacy_sentence_recognizer_kor', - 'spacy_sentencizer', - 'stanza_kor' - ], - - 'kmr': [ - 'spacy_sentencizer', - 'stanza_kmr' - ], - - 'kir': [ - 'spacy_sentencizer', - 'stanza_kir' - ], - - 'lao': [ - 'laonlp_lao', - 'spacy_sentencizer' - ], - - 'lat': [ - 'spacy_sentencizer', - 'stanza_lat' - ], - - 'lav': [ - 'spacy_sentencizer', - 'stanza_lav' - ], - - 'lij': [ - 'spacy_sentencizer', - 'stanza_lij' - ], - - 'lit': [ - 'spacy_dependency_parser_lit', - 'spacy_sentence_recognizer_lit', - 'spacy_sentencizer', - 'stanza_lit' - ], - - 'mkd': [ - 'spacy_dependency_parser_mkd', - 'spacy_sentence_recognizer_mkd', - 'spacy_sentencizer' - ], - - 'mal': [ - 'nltk_punkt_mal', - 'spacy_sentencizer' - ], - - 'mlt': [ - 'spacy_sentencizer', - 'stanza_mlt' - ], - - 'glv': [ - 'spacy_sentencizer', - 'stanza_glv' - ], - - 'mar': [ - 'spacy_sentencizer', - 'stanza_mar' - ], - - 'pcm': [ - 'spacy_sentencizer', - 'stanza_pcm' - ], - - 'nob': [ - 'nltk_punkt_nob', - 'spacy_dependency_parser_nob', - 'spacy_sentence_recognizer_nob', - 'spacy_sentencizer', - 'stanza_nob' - ], - - 'nno': [ - 'spacy_sentencizer', - 'stanza_nno' - ], - - 'fas': [ - 'spacy_sentencizer', - 'stanza_fas' - ], - - 'pol': [ - 'nltk_punkt_pol', - 'spacy_dependency_parser_pol', - 'spacy_sentence_recognizer_pol', - 'spacy_sentencizer', - 'stanza_pol' - ], - - 'qpm': [ - 'spacy_sentencizer', - 'stanza_qpm' - ], - - 'por_br': [ - 'nltk_punkt_por', - 'spacy_dependency_parser_por', - 'spacy_sentence_recognizer_por', - 'spacy_sentencizer', - 'stanza_por' - ], - 'por_pt': [ - 'nltk_punkt_por', - 'spacy_dependency_parser_por', - 'spacy_sentence_recognizer_por', - 'spacy_sentencizer', - 'stanza_por' - ], - - 'ron': [ - 'spacy_dependency_parser_ron', - 'spacy_sentence_recognizer_ron', - 'spacy_sentencizer', - 'stanza_ron' - ], - - 'rus': [ - 'nltk_punkt_rus', - 'spacy_dependency_parser_rus', - 'spacy_sentence_recognizer_rus', - 'spacy_sentencizer', - 'stanza_rus' - ], - 'orv': [ - 'spacy_sentencizer', - 'stanza_orv' - ], - - 'sme': [ - 'spacy_sentencizer', - 'stanza_sme' - ], - - 'san': [ - 'spacy_sentencizer', - 'stanza_san' - ], - - 'gla': [ - 'spacy_sentencizer', - 'stanza_gla' - ], - - 'srp_latn': [ - 'spacy_sentencizer', - 'stanza_srp_latn' - ], - - 'snd': [ - 'spacy_sentencizer', - 'stanza_snd' - ], - - 'slk': [ - 'spacy_sentencizer', - 'stanza_slk' - ], - - 'slv': [ - 'nltk_punkt_slv', - 'spacy_dependency_parser_slv', - 'spacy_sentencizer', - 'stanza_slv' - ], - - 'hsb': [ - 'spacy_sentencizer', - 'stanza_hsb' - ], - - 'spa': [ - 'nltk_punkt_spa', - 'spacy_dependency_parser_spa', - 'spacy_sentencizer', - 'stanza_spa' - ], - - 'swe': [ - 'nltk_punkt_swe', - 'spacy_dependency_parser_swe', - 'spacy_sentence_recognizer_swe', - 'spacy_sentencizer', - 'stanza_swe' - ], - - 'tam': [ - 'spacy_sentencizer', - 'stanza_tam' - ], - - 'tel': [ - 'spacy_sentencizer', - 'stanza_tel' - ], - - 'tha': [ - 'pythainlp_crfcut', - 'pythainlp_thaisumcut', - 'stanza_tha' - ], - - 'bod': ['botok_bod'], - - 'tur': [ - 'nltk_punkt_tur', - 'spacy_sentencizer', - 'stanza_tur' - ], - - 'ukr': [ - 'spacy_dependency_parser_ukr', - 'spacy_sentencizer', - 'stanza_ukr' - ], - - 'urd': [ - 'spacy_sentencizer', - 'stanza_urd' - ], - - 'uig': [ - 'spacy_sentencizer', - 'stanza_uig' - ], - - 'vie': [ - 'underthesea_vie', - 'stanza_vie' - ], - - 'cym': [ - 'spacy_sentencizer', - 'stanza_cym' - ], - - 'wol': [ - 'spacy_sentencizer', - 'stanza_wol' - ], - - 'other': [ - 'nltk_punkt_eng', - 'spacy_sentencizer', - 'stanza_eng' - ] - }, - - 'word_tokenizers': { - 'afr': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_afr', - 'stanza_afr' - ], - - 'sqi': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_sqi' - ], - - 'amh': ['spacy_amh'], - - 'ara': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_ara', - 'stanza_ara' - ], - - 'xcl': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_xcl' - ], - 'hye': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_hye', - 'stanza_hye' - ], - 'hyw': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_hyw' - ], - - 'asm': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses' - ], - - 'aze': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_aze' - ], - - 'eus': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_eus', - 'stanza_eus' - ], - - 'ben': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_ben' - ], - - 'bel': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_bel' - ], - - 'bul': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_bul', - 'stanza_bul' - ], - - 'mya': ['stanza_mya'], - - 'bxr': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_bxr' - ], - - 'cat': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_cat', - 'stanza_cat' - ], - - 'lzh': ['stanza_lzh'], - 'zho_cn': [ - 'pkuseg_zho', - 'spacy_zho', - 'stanza_zho_cn', - 'wordless_zho_char' - ], - 'zho_tw': [ - 'pkuseg_zho', - 'spacy_zho', - 'stanza_zho_tw', - 'wordless_zho_char' - ], - - 'chu': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_chu' - ], - - 'cop': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_cop' - ], - - 'hrv': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_hrv', - 'stanza_hrv' - ], - - 'ces': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_ces', - 'stanza_ces' - ], - - 'dan': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_dan', - 'stanza_dan' - ], - - 'nld': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_nld', - 'stanza_nld' - ], - - 'ang': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_ang' - ], - 'eng_gb': [ - 'nltk_nist', 'nltk_nltk', 'nltk_penn_treebank', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_eng', - 'stanza_eng' - ], - 'eng_us': [ - 'nltk_nist', 'nltk_nltk', 'nltk_penn_treebank', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_eng', - 'stanza_eng' - ], - - 'myv': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_myv' - ], - - 'est': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_est', - 'stanza_est' - ], - - 'fao': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_fao', - 'stanza_fao' - ], - - 'fin': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_fin', - 'stanza_fin' - ], - - 'fra': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_fra', - 'stanza_fra' - ], - 'fro': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_fro' - ], - - 'glg': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_glg' - ], - - 'lug': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_lug' - ], - - 'deu_at': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_deu', - 'stanza_deu' - ], - 'deu_de': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_deu', - 'stanza_deu' - ], - 'deu_ch': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_deu', - 'stanza_deu' - ], - - 'got': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_got' - ], - - 'grc': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_grc', - 'stanza_grc' - ], - 'ell': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_ell', - 'stanza_ell' - ], - - 'guj': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_guj' - ], - - 'hbo': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_hbo' - ], - 'heb': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_heb', - 'stanza_heb' - ], - - 'hin': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_hin', - 'stanza_hin' - ], - - 'hun': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_hun', - 'stanza_hun' - ], - - 'isl': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_isl', - 'stanza_isl' - ], - - 'ind': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_ind', - 'stanza_ind' - ], - - 'gle': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_gle', - 'stanza_gle' - ], - - 'ita': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_ita', - 'stanza_ita' - ], - - 'jpn': [ - 'spacy_jpn', - 'stanza_jpn', - 'sudachipy_jpn_split_mode_a', 'sudachipy_jpn_split_mode_b', 'sudachipy_jpn_split_mode_c', - 'wordless_jpn_kanji' - ], - - 'kan': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_kan' - ], - - 'kaz': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_kaz' - ], - - 'khm': ['khmer_nltk_khm'], - - 'kor': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'python_mecab_ko_mecab', - 'spacy_kor', - 'stanza_kor' - ], - - 'kmr': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_kmr' - ], - - 'kir': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_kir', - 'stanza_kir' - ], - - 'lao': ['laonlp_lao'], - - 'lat': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_lat', - 'stanza_lat' - ], - - 'lav': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_lav', - 'stanza_lav' - ], - - 'lij': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_lij', - 'stanza_lij' - ], - - 'lit': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_lit', - 'stanza_lit' - ], - - 'ltz': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_ltz' - ], - - 'mkd': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_mkd' - ], - - 'msa': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_msa' - ], - - 'mal': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_mal' - ], - - 'mlt': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_mlt' - ], - - 'glv': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_glv' - ], - - 'mar': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_mar', - 'stanza_mar' - ], - - 'pcm': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_pcm' - ], - - 'mni_mtei': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses' - ], - - 'nep': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_nep' - ], - - 'nob': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_nob', - 'stanza_nob' - ], - 'nno': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_nno', - 'stanza_nno' - ], - - 'ori': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses' - ], - - 'fas': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'spacy_fas', - 'stanza_fas' - ], - - 'pol': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_pol', - 'stanza_pol' - ], - - 'qpm': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_qpm' - ], - - 'por_br': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_por', - 'stanza_por' - ], - 'por_pt': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_por', - 'stanza_por' - ], - - 'pan_guru': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses' - ], - - 'ron': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_ron', - 'stanza_ron' - ], - - 'rus': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_rus', - 'stanza_rus' - ], - 'orv': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_orv' - ], - - 'sme': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_sme' - ], - - 'san': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_san', - 'stanza_san' - ], - - 'gla': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_gla' - ], - - 'srp_cyrl': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_srp' - ], - - 'srp_latn': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_srp', - 'stanza_srp_latn' - ], - - 'snd': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_snd' - ], - - 'sin': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_sin' - ], - - 'slk': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_slk', - 'stanza_slk' - ], - - 'slv': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_slv', - 'stanza_slv' - ], - - 'dsb': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_dsb' - ], - - 'hsb': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_hsb', - 'stanza_hsb' - ], - - 'spa': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_spa', - 'stanza_spa' - ], - - 'swe': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_swe', - 'stanza_swe' - ], - - 'tgl': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_tgl' - ], - - 'tgk': ['nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter'], - - 'tam': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_tam', - 'stanza_tam' - ], - - 'tat': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_tat' - ], - - 'tel': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_tel', - 'stanza_tel' - ], - - 'tdt': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'sacremoses_moses' - ], - - 'tha': [ - 'pythainlp_longest_matching', - 'pythainlp_max_matching', - 'pythainlp_max_matching_tcc', - 'pythainlp_nercut', - 'stanza_tha' - ], - - 'bod': ['botok_bod'], - - 'tir': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_tir' - ], - - 'tsn': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_tsn' - ], - - 'tur': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_tur', - 'stanza_tur' - ], - - 'ukr': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_ukr', - 'stanza_ukr' - ], - - 'urd': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_urd', - 'stanza_urd' - ], - - 'uig': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_uig' - ], - - 'vie': [ - 'nltk_tok_tok', - 'underthesea_vie', - 'stanza_vie' - ], - - 'cym': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_cym' - ], - - 'wol': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'stanza_wol' - ], - - 'yor': [ - 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', - 'spacy_yor' - ], - - 'other': [ - 'nltk_nist', 'nltk_nltk', 'nltk_penn_treebank', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', - 'sacremoses_moses', - 'spacy_eng', - 'stanza_eng' - ] - }, - - 'syl_tokenizers': { - 'afr': ['pyphen_afr'], - 'sqi': ['pyphen_sqi'], - 'eus': ['pyphen_eus'], - 'bel': ['pyphen_bel'], - 'bul': ['pyphen_bul'], - 'cat': ['pyphen_cat'], - 'hrv': ['pyphen_hrv'], - 'ces': ['pyphen_ces'], - 'dan': ['pyphen_dan'], - 'nld': ['pyphen_nld'], - - 'eng_gb': [ - 'nltk_legality', - 'nltk_sonority_sequencing', - 'pyphen_eng_gb' - ], - - 'eng_us': [ - 'nltk_legality', - 'nltk_sonority_sequencing', - 'pyphen_eng_us' - ], - - 'epo': ['pyphen_epo'], - 'est': ['pyphen_est'], - 'fra': ['pyphen_fra'], - 'glg': ['pyphen_glg'], - 'deu_at': ['pyphen_deu_at'], - 'deu_de': ['pyphen_deu_de'], - 'deu_ch': ['pyphen_deu_ch'], - 'ell': ['pyphen_ell'], - 'hun': ['pyphen_hun'], - 'isl': ['pyphen_isl'], - 'ind': ['pyphen_ind'], - 'ita': ['pyphen_ita'], - 'lit': ['pyphen_lit'], - 'lav': ['pyphen_lav'], - 'mon': ['pyphen_mon'], - 'nob': ['pyphen_nob'], - 'nno': ['pyphen_nno'], - 'pol': ['pyphen_pol'], - 'por_br': ['pyphen_por_br'], - 'por_pt': ['pyphen_por_pt'], - 'ron': ['pyphen_ron'], - 'rus': ['pyphen_rus'], - 'srp_cyrl': ['pyphen_srp_cyrl'], - 'srp_latn': ['pyphen_srp_latn'], - 'slk': ['pyphen_slk'], - 'slv': ['pyphen_slv'], - 'spa': ['pyphen_spa'], - 'swe': ['pyphen_swe'], - 'tel': ['pyphen_tel'], - - 'tha': [ - 'pyphen_tha', - 'pythainlp_han_solo', - 'pythainlp_syl_dict' - ], - - 'ukr': ['pyphen_ukr'], - 'zul': ['pyphen_zul'] - }, - - 'pos_taggers': { - 'afr': ['stanza_afr'], - 'ara': ['stanza_ara'], - 'xcl': ['stanza_xcl'], - 'hye': ['stanza_hye'], - 'hyw': ['stanza_hyw'], - 'eus': ['stanza_eus'], - 'bel': ['stanza_bel'], - 'bul': ['stanza_bul'], - 'bxr': ['stanza_bxr'], - - 'cat': [ - 'spacy_cat', - 'stanza_cat' - ], - - 'lzh': ['stanza_lzh'], - 'zho_cn': [ - 'spacy_zho', - 'stanza_zho_cn' - ], - 'zho_tw': [ - 'spacy_zho', - 'stanza_zho_tw' - ], - - 'chu': ['stanza_chu'], - 'cop': ['stanza_cop'], - - 'hrv': [ - 'spacy_hrv', - 'stanza_hrv' - ], - - 'ces': ['stanza_ces'], - - 'dan': [ - 'spacy_dan', - 'stanza_dan' - ], - - 'nld': [ - 'spacy_nld', - 'stanza_nld' - ], - - 'ang': ['stanza_ang'], - 'eng_gb': [ - 'nltk_perceptron_eng', - 'spacy_eng', - 'stanza_eng' - ], - 'eng_us': [ - 'nltk_perceptron_eng', - 'spacy_eng', - 'stanza_eng' - ], - - 'myv': ['stanza_myv'], - 'est': ['stanza_est'], - 'fao': ['stanza_fao'], - - 'fin': [ - 'spacy_fin', - 'stanza_fin' - ], - - 'fra': [ - 'spacy_fra', - 'stanza_fra' - ], - 'fro': ['stanza_fro'], - - 'glg': ['stanza_glg'], - - 'deu_at': [ - 'spacy_deu', - 'stanza_deu' - ], - 'deu_de': [ - 'spacy_deu', - 'stanza_deu' - ], - 'deu_ch': [ - 'spacy_deu', - 'stanza_deu' - ], - - 'got': ['stanza_got'], - - 'grc': ['stanza_grc'], - 'ell': [ - 'spacy_ell', - 'stanza_ell' - ], - - 'hbo': ['stanza_hbo'], - 'heb': ['stanza_heb'], - 'hin': ['stanza_hin'], - 'hun': ['stanza_hun'], - 'isl': ['stanza_isl'], - 'ind': ['stanza_ind'], - 'gle': ['stanza_gle'], - - 'ita': [ - 'spacy_ita', - 'stanza_ita' - ], - - 'jpn': [ - 'spacy_jpn', - 'stanza_jpn', - 'sudachipy_jpn' - ], - - 'kaz': ['stanza_kaz'], - 'khm': ['khmer_nltk_khm'], - - 'kor': [ - 'python_mecab_ko_mecab', - 'spacy_kor', - 'stanza_kor' - ], - - 'kmr': ['stanza_kmr'], - 'kir': ['stanza_kir'], - - 'lao': [ - 'laonlp_seqlabeling', - 'laonlp_yunshan_cup_2020' - ], - - 'lat': ['stanza_lat'], - 'lav': ['stanza_lav'], - 'lij': ['stanza_lij'], - - 'lit': [ - 'spacy_lit', - 'stanza_lit' - ], - - 'mkd': ['spacy_mkd'], - 'mlt': ['stanza_mlt'], - 'glv': ['stanza_glv'], - 'mar': ['stanza_mar'], - 'pcm': ['stanza_pcm'], - - 'nob': [ - 'spacy_nob', - 'stanza_nob' - ], - - 'nno': ['stanza_nno'], - 'fas': ['stanza_fas'], - - 'pol': [ - 'spacy_pol', - 'stanza_pol' - ], - - 'qpm': ['stanza_qpm'], - - 'por_br': [ - 'spacy_por', - 'stanza_por' - ], - 'por_pt': [ - 'spacy_por', - 'stanza_por' - ], - - 'ron': [ - 'spacy_ron', - 'stanza_ron' - ], - - - 'rus': [ - 'nltk_perceptron_rus', - 'pymorphy3_morphological_analyzer', - 'spacy_rus', - 'stanza_rus' - ], - 'orv': ['stanza_orv'], - - 'sme': ['stanza_sme'], - 'san': ['stanza_san'], - 'gla': ['stanza_gla'], - 'srp_latn': ['stanza_srp_latn'], - 'snd': ['stanza_snd'], - 'slk': ['stanza_slk'], - - 'slv': [ - 'spacy_slv', - 'stanza_slv'] - , - - 'hsb': ['stanza_hsb'], - - 'spa': [ - 'spacy_spa', - 'stanza_spa' - ], - - 'swe': [ - 'spacy_swe', - 'stanza_swe' - ], - - 'tam': ['stanza_tam'], - 'tel': ['stanza_tel'], - - 'tha': [ - 'pythainlp_perceptron_blackboard', - 'pythainlp_perceptron_orchid', - 'pythainlp_perceptron_pud' - ], - - 'bod': ['botok_bod'], - 'tur': ['stanza_tur'], - - 'ukr': [ - 'pymorphy3_morphological_analyzer', - 'spacy_ukr', - 'stanza_ukr' - ], - - 'urd': ['stanza_urd'], - 'uig': ['stanza_uig'], - - 'vie': [ - 'stanza_vie', - 'underthesea_vie' - ], - - 'cym': ['stanza_cym'], - 'wol': ['stanza_wol'] - }, - - 'lemmatizers': { - 'afr': ['stanza_afr'], - 'sqi': ['simplemma_sqi'], - 'ara': ['stanza_ara'], - - 'xcl': ['stanza_xcl'], - 'hye': [ - 'simplemma_hye', - 'stanza_hye' - ], - 'hyw': ['stanza_hyw'], - - 'ast': ['simplemma_ast'], - 'eus': ['stanza_eus'], - 'bel': ['stanza_bel'], - 'ben': ['spacy_ben'], - - 'bul': [ - 'simplemma_bul', - 'stanza_bul' - ], - - 'bxr': ['stanza_bxr'], - - 'cat': [ - 'simplemma_cat', - 'spacy_cat', - 'stanza_cat' - ], - - 'lzh': ['stanza_lzh'], - 'zho_cn': ['stanza_zho_cn'], - 'zho_tw': ['stanza_zho_tw'], - 'chu': ['stanza_chu'], - 'cop': ['stanza_cop'], - - 'hrv': [ - 'simplemma_hbs', - 'spacy_hrv', - 'stanza_hrv' - ], - - 'ces': [ - 'simplemma_ces', - 'spacy_ces', - 'stanza_ces' - ], - - 'dan': [ - 'simplemma_dan', - 'spacy_dan', - 'stanza_dan' - ], - - 'nld': [ - 'simplemma_nld', - 'spacy_nld', - 'stanza_nld' - ], - - 'enm': ['simplemma_enm'], - 'ang': ['stanza_ang'], - 'eng_gb': [ - 'nltk_wordnet', - 'simplemma_eng', - 'spacy_eng', - 'stanza_eng' - ], - 'eng_us': [ - 'nltk_wordnet', - 'simplemma_eng', - 'spacy_eng', - 'stanza_eng' - ], - - 'myv': ['stanza_myv'], - - 'est': [ - 'simplemma_est', - 'stanza_est' - ], - - 'fin': [ - 'simplemma_fin', - 'spacy_fin', - 'stanza_fin' - ], - - 'fra': [ - 'simplemma_fra', - 'spacy_fra', - 'stanza_fra' - ], - 'fro': ['stanza_fro'], - - 'glg': [ - 'simplemma_glg', - 'stanza_glg' - ], - - 'kat': ['simplemma_kat'], - - 'deu_at': [ - 'simplemma_deu', - 'spacy_deu', - 'stanza_deu' - ], - 'deu_de': [ - 'simplemma_deu', - 'spacy_deu', - 'stanza_deu' - ], - 'deu_ch': [ - 'simplemma_deu', - 'spacy_deu', - 'stanza_deu' - ], - - 'got': ['stanza_got'], - - 'grc': [ - 'spacy_grc', - 'stanza_grc' - ], - 'ell': [ - 'simplemma_ell', - 'spacy_ell', - 'stanza_ell' - ], - - 'hbo': ['stanza_hbo'], - 'heb': ['stanza_heb'], - - 'hin': [ - 'simplemma_hin', - 'stanza_hin' - ], - - 'hun': [ - 'simplemma_hun', - 'spacy_hun', - 'stanza_hun' - ], - - 'isl': [ - 'simplemma_isl', - 'stanza_isl' - ], - - 'ind': [ - 'simplemma_ind', - 'spacy_ind', - 'stanza_ind' - ], - - 'gle': [ - 'simplemma_gle', - 'spacy_gle', - 'stanza_gle' - ], - - 'ita': [ - 'simplemma_ita', - 'spacy_ita', - 'stanza_ita' - ], - - 'jpn': [ - 'spacy_jpn', - 'stanza_jpn', - 'sudachipy_jpn' - ], - - 'kaz': ['stanza_kaz'], - - 'kor': [ - 'spacy_kor', - 'stanza_kor' - ], - - 'kmr': ['stanza_kmr'], - 'kir': ['stanza_kir'], - - 'lat': [ - 'simplemma_lat', - 'stanza_lat' - ], - - 'lav': [ - 'simplemma_lav', - 'stanza_lav' - ], - - 'lij': ['stanza_lij'], - - 'lit': [ - 'simplemma_lit', - 'spacy_lit', - 'stanza_lit' - ], - - 'ltz': [ - 'simplemma_ltz', - 'spacy_ltz' - ], - - 'mkd': [ - 'simplemma_mkd', - 'spacy_mkd' - ], - - 'msa': ['simplemma_msa'], - - 'glv': [ - 'simplemma_glv', - 'stanza_glv' - ], - - 'mar': ['stanza_mar'], - 'pcm': ['stanza_pcm'], - - 'nob': [ - 'simplemma_nob', - 'spacy_nob', - 'stanza_nob' - ], - 'nno': [ - 'simplemma_nno', - 'stanza_nno' - ], - - 'fas': [ - 'simplemma_fas', - 'spacy_fas', - 'stanza_fas' - ], - - 'pol': [ - 'simplemma_pol', - 'spacy_pol', - 'stanza_pol' - ], - - 'qpm': ['stanza_qpm'], - - 'por_br': [ - 'simplemma_por', - 'spacy_por', - 'stanza_por' - ], - 'por_pt': [ - 'simplemma_por', - 'spacy_por', - 'stanza_por' - ], - - 'ron': [ - 'simplemma_ron', - 'spacy_ron', - 'stanza_ron' - ], - - 'rus': [ - 'simplemma_rus', - 'pymorphy3_morphological_analyzer', - 'spacy_rus', - 'stanza_rus' - ], - 'orv': ['stanza_orv'], - - 'sme': [ - 'simplemma_sme', - 'stanza_sme' - ], - - 'san': ['stanza_san'], - - 'gla': [ - 'simplemma_gla', - 'stanza_gla' - ], - - 'srp_cyrl': ['spacy_srp'], - 'srp_latn': [ - 'simplemma_hbs', - 'stanza_srp_latn' - ], - - 'slk': [ - 'simplemma_slk', - 'stanza_slk' - ], - - 'slv': [ - 'simplemma_slv', - 'spacy_slv', - 'stanza_slv' - ], - - 'hsb': ['stanza_hsb'], - - 'spa': [ - 'simplemma_spa', - 'spacy_spa', - 'stanza_spa' - ], - - 'swa': ['simplemma_swa'], - - 'swe': [ - 'simplemma_swe', - 'spacy_swe', - 'stanza_swe' - ], - - 'tgl': [ - 'simplemma_tgl', - 'spacy_tgl' - ], - - 'tam': ['stanza_tam'], - 'bod': ['botok_bod'], - - 'tur': [ - 'simplemma_tur', - 'spacy_tur', - 'stanza_tur' - ], - - 'ukr': [ - 'pymorphy3_morphological_analyzer', - 'simplemma_ukr', - 'spacy_ukr', - 'stanza_ukr' - ], - - 'urd': [ - 'spacy_urd', - 'stanza_urd' - ], - - 'uig': ['stanza_uig'], - - 'cym': [ - 'simplemma_cym', - 'stanza_cym' - ], - - 'wol': ['stanza_wol'] - }, - - 'stop_word_lists': { - 'ara': ['nltk_ara'], - 'aze': ['nltk_aze'], - 'eus': ['nltk_eus'], - 'ben': ['nltk_ben'], - 'cat': ['nltk_cat'], - 'zho_cn': ['nltk_zho_cn'], - 'zho_tw': ['nltk_zho_tw'], - 'dan': ['nltk_dan'], - 'nld': ['nltk_nld'], - 'eng_gb': ['nltk_eng'], - 'eng_us': ['nltk_eng'], - 'fin': ['nltk_fin'], - 'fra': ['nltk_fra'], - 'deu_at': ['nltk_deu'], - 'deu_de': ['nltk_deu'], - 'deu_ch': ['nltk_deu'], - 'ell': ['nltk_ell'], - 'heb': ['nltk_heb'], - 'hun': ['nltk_hun'], - 'ind': ['nltk_ind'], - 'ita': ['nltk_ita'], - 'kaz': ['nltk_kaz'], - 'lao': ['laonlp_lao'], - 'nep': ['nltk_nep'], - 'nob': ['nltk_nob'], - 'por_br': ['nltk_por'], - 'por_pt': ['nltk_por'], - 'ron': ['nltk_ron'], - 'rus': ['nltk_rus'], - 'slv': ['nltk_slv'], - 'spa': ['nltk_spa'], - 'swe': ['nltk_swe'], - 'tgk': ['nltk_tgk'], - 'tha': ['pythainlp_tha'], - 'tur': ['nltk_tur'], - - 'other': [] - }, - - 'dependency_parsers': { - 'afr': ['stanza_afr'], - 'ara': ['stanza_ara'], - 'xcl': ['stanza_xcl'], - 'hye': ['stanza_hye'], - 'hyw': ['stanza_hyw'], - 'eus': ['stanza_eus'], - 'bel': ['stanza_bel'], - 'bul': ['stanza_bul'], - 'bxr': ['stanza_bxr'], - - 'cat': [ - 'spacy_cat', - 'stanza_cat' - ], - - 'lzh': ['stanza_lzh'], - 'zho_cn': [ - 'spacy_zho', - 'stanza_zho_cn' - ], - 'zho_tw': [ - 'spacy_zho', - 'stanza_zho_tw' - ], - - 'chu': ['stanza_chu'], - 'cop': ['stanza_cop'], - - 'hrv': [ - 'spacy_hrv', - 'stanza_hrv' - ], - - 'ces': ['stanza_ces'], - - 'dan': [ - 'spacy_dan', - 'stanza_dan' - ], - - 'nld': [ - 'spacy_nld', - 'stanza_nld' - ], - - 'ang': ['stanza_ang'], - 'eng_gb': [ - 'spacy_eng', - 'stanza_eng' - ], - 'eng_us': [ - 'spacy_eng', - 'stanza_eng' - ], - - 'myv': ['stanza_myv'], - 'est': ['stanza_est'], - 'fao': ['stanza_fao'], - - 'fin': [ - 'spacy_fin', - 'stanza_fin' - ], - - 'fra': [ - 'spacy_fra', - 'stanza_fra' - ], - 'fro': ['stanza_fro'], - - 'glg': ['stanza_glg'], - - 'deu_at': [ - 'spacy_deu', - 'stanza_deu' - ], - 'deu_de': [ - 'spacy_deu', - 'stanza_deu' - ], - 'deu_ch': [ - 'spacy_deu', - 'stanza_deu' - ], - - 'got': ['stanza_got'], - - 'grc': ['stanza_grc'], - 'ell': [ - 'spacy_ell', - 'stanza_ell' - ], - - 'hbo': ['stanza_hbo'], - 'heb': ['stanza_heb'], - 'hin': ['stanza_hin'], - 'hun': ['stanza_hun'], - 'isl': ['stanza_isl'], - 'ind': ['stanza_ind'], - 'gle': ['stanza_gle'], - - 'ita': [ - 'spacy_ita', - 'stanza_ita' - ], - - 'jpn': [ - 'spacy_jpn', - 'stanza_jpn' - ], - - 'kaz': ['stanza_kaz'], - - 'kor': [ - 'spacy_kor', - 'stanza_kor' - ], - - 'kmr': ['stanza_kmr'], - 'kir': ['stanza_kir'], - 'lat': ['stanza_lat'], - 'lav': ['stanza_lav'], - 'lij': ['stanza_lij'], - - 'lit': [ - 'spacy_lit', - 'stanza_lit' - ], - - 'mkd': ['spacy_mkd'], - 'mlt': ['stanza_mlt'], - 'glv': ['stanza_glv'], - 'mar': ['stanza_mar'], - 'pcm': ['stanza_pcm'], - - 'nob': [ - 'spacy_nob', - 'stanza_nob' - ], - - 'nno': ['stanza_nno'], - 'fas': ['stanza_fas'], - - 'pol': [ - 'spacy_pol', - 'stanza_pol' - ], - - 'qpm': ['stanza_qpm'], - - 'por_br': [ - 'spacy_por', - 'stanza_por' - ], - 'por_pt': [ - 'spacy_por', - 'stanza_por' - ], - - 'ron': [ - 'spacy_ron', - 'stanza_ron' - ], - - 'rus': [ - 'spacy_rus', - 'stanza_rus' - ], - 'orv': ['stanza_orv'], - - 'sme': ['stanza_sme'], - 'san': ['stanza_san'], - 'gla': ['stanza_gla'], - 'srp_latn': ['stanza_srp_latn'], - 'slk': ['stanza_slk'], - - 'slv': [ - 'spacy_slv', - 'stanza_slv' - ], - - 'hsb': ['stanza_hsb'], - - 'spa': [ - 'spacy_spa', - 'stanza_spa' - ], - - 'swe': [ - 'spacy_swe', - 'stanza_swe' - ], - - 'tam': ['stanza_tam'], - 'tel': ['stanza_tel'], - 'tur': ['stanza_tur'], - - 'ukr': [ - 'spacy_ukr', - 'stanza_ukr' - ], - - 'urd': ['stanza_urd'], - 'uig': ['stanza_uig'], - 'vie': ['stanza_vie'], - 'cym': ['stanza_cym'], - 'wol': ['stanza_wol'] - }, - - 'sentiment_analyzers': { - 'afr': ['vader_afr'], - 'sqi': ['vader_sqi'], - 'amh': ['vader_amh'], - 'ara': ['vader_ara'], - 'hye': ['vader_hye'], - 'hyw': ['vader_hye'], - 'asm': ['vader_asm'], - 'aze': ['vader_aze'], - 'eus': ['vader_eus'], - 'bel': ['vader_bel'], - 'ben': ['vader_ben'], - 'bul': ['vader_bul'], - 'mya': ['vader_mya'], - 'cat': ['vader_cat'], - - 'zho_cn': [ - 'stanza_zho_cn', - 'vader_zho_cn' - ], - - 'zho_tw': ['vader_zho_tw'], - 'hrv': ['vader_hrv'], - 'ces': ['vader_ces'], - 'dan': ['vader_dan'], - 'nld': ['vader_nld'], - - 'eng_gb': [ - 'stanza_eng', - 'vader_eng' - ], - 'eng_us': [ - 'stanza_eng', - 'vader_eng' - ], - - 'epo': ['vader_epo'], - 'est': ['vader_est'], - 'fin': ['vader_fin'], - 'fra': ['vader_fra'], - 'glg': ['vader_glg'], - 'kat': ['vader_kat'], - - 'deu_at': [ - 'stanza_deu', - 'vader_deu' - ], - 'deu_de': [ - 'stanza_deu', - 'vader_deu' - ], - 'deu_ch': [ - 'stanza_deu', - 'vader_deu' - ], - - 'ell': ['vader_ell'], - 'guj': ['vader_guj'], - 'heb': ['vader_heb'], - 'hin': ['vader_hin'], - 'hun': ['vader_hun'], - 'isl': ['vader_isl'], - 'ind': ['vader_ind'], - 'gle': ['vader_gle'], - 'ita': ['vader_ita'], - 'jpn': ['vader_jpn'], - 'kan': ['vader_kan'], - 'kaz': ['vader_kaz'], - 'khm': ['vader_khm'], - 'kor': ['vader_kor'], - 'kmr': ['vader_kmr'], - 'kir': ['vader_kir'], - 'lao': ['vader_lao'], - 'lat': ['vader_lat'], - 'lav': ['vader_lav'], - 'lit': ['vader_lit'], - 'lug': ['vader_lug'], - 'ltz': ['vader_ltz'], - 'mkd': ['vader_mkd'], - 'msa': ['vader_msa'], - 'mal': ['vader_mal'], - 'mlt': ['vader_mlt'], - - 'mar': [ - 'stanza_mar', - 'vader_mar' - ], - - 'mni_mtei': ['vader_mni_mtei'], - 'mon': ['vader_mon'], - 'nep': ['vader_nep'], - 'nob': ['vader_nob'], - 'ori': ['vader_ori'], - 'fas': ['vader_fas'], - 'pol': ['vader_pol'], - 'por_pt': ['vader_por'], - 'por_br': ['vader_por'], - 'pan_guru': ['vader_pan_guru'], - 'ron': ['vader_ron'], - 'rus': ['vader_rus'], - 'san': ['vader_san'], - 'gla': ['vader_gla'], - 'srp_cyrl': ['vader_srp_cyrl'], - 'srp_latn': ['vader_srp_cyrl'], - 'snd': ['vader_snd'], - 'sin': ['vader_sin'], - 'slk': ['vader_slk'], - 'slv': ['vader_slv'], - - 'spa': [ - 'stanza_spa', - 'vader_spa' - ], - - 'swa': ['vader_swa'], - 'swe': ['vader_swe'], - 'tgl': ['vader_tgl'], - 'tgk': ['vader_tgk'], - 'tam': ['vader_tam'], - 'tat': ['vader_tat'], - 'tel': ['vader_tel'], - 'tha': ['vader_tha'], - 'tir': ['vader_tir'], - 'tur': ['vader_tur'], - 'ukr': ['vader_ukr'], - 'urd': ['vader_urd'], - 'uig': ['vader_uig'], - - 'vie': [ - 'stanza_vie', - 'underthesea_vie' - ], - - 'cym': ['vader_cym'], - 'yor': ['vader_yor'], - 'zul': ['vader_zul'] - }, - - # Only people's names are capitalized - # Case of measure names are preserved - 'mapping_measures': { - 'dispersion': { - _tr('wl_settings_global', 'None'): 'none', - _tr('wl_settings_global', 'Average logarithmic distance'): 'ald', - _tr('wl_settings_global', 'Average reduced frequency'): 'arf', - _tr('wl_settings_global', 'Average waiting time'): 'awt', - _tr('wl_settings_global', "Carroll's D₂"): 'carrolls_d2', - _tr('wl_settings_global', "Gries's DP"): 'griess_dp', - _tr('wl_settings_global', "Juilland's D"): 'juillands_d', - _tr('wl_settings_global', "Lyne's D₃"): 'lynes_d3', - _tr('wl_settings_global', "Rosengren's S"): 'rosengrens_s', - _tr('wl_settings_global', "Zhang's Distributional Consistency"): 'zhangs_dc' - }, + _tr('wl_settings_global', 'European (Central) (CP852)'): 'cp852', + _tr('wl_settings_global', 'European (Central) (ISO-8859-2)'): 'iso8859_2', + _tr('wl_settings_global', 'European (Central) (Mac OS Central European)'): 'mac_latin2', + _tr('wl_settings_global', 'European (Central) (Windows-1250)'): 'cp1250', - 'adjusted_freq': { - _tr('wl_settings_global', 'None'): 'none', - _tr('wl_settings_global', 'Average logarithmic distance'): 'fald', - _tr('wl_settings_global', 'Average reduced frequency'): 'farf', - _tr('wl_settings_global', 'Average waiting time'): 'fawt', - _tr('wl_settings_global', "Carroll's Uₘ"): 'carrolls_um', - _tr('wl_settings_global', "Engwall's FM"): 'engwalls_fm', - _tr('wl_settings_global', "Juilland's U"): 'juillands_u', - _tr('wl_settings_global', "Kromer's UR"): 'kromers_ur', - _tr('wl_settings_global', "Rosengren's KF"): 'rosengrens_kf' - }, + _tr('wl_settings_global', 'European (Northern) (ISO-8859-4)'): 'iso8859_4', - 'statistical_significance': { - _tr('wl_settings_global', 'None'): 'none', - _tr('wl_settings_global', "Fisher's exact test"): 'fishers_exact_test', - _tr('wl_settings_global', 'Log-likelihood ratio test'): 'log_likelihood_ratio_test', - _tr('wl_settings_global', 'Mann-Whitney U Test'): 'mann_whitney_u_test', - _tr('wl_settings_global', "Pearson's chi-squared test"): 'pearsons_chi_squared_test', - _tr('wl_settings_global', "Student's t-test (1-sample)"): 'students_t_test_1_sample', - _tr('wl_settings_global', "Student's t-test (2-sample)"): 'students_t_test_2_sample', - _tr('wl_settings_global', 'z-score'): 'z_score', - _tr('wl_settings_global', 'z-score (Berry-Rogghe)'): 'z_score_berry_rogghe' - }, + _tr('wl_settings_global', 'European (Southern) (ISO-8859-3)'): 'iso8859_3', - 'bayes_factor': { - _tr('wl_settings_global', 'None'): 'none', - _tr('wl_settings_global', 'Log-likelihood ratio test'): 'log_likelihood_ratio_test', - _tr('wl_settings_global', "Student's t-test (2-sample)"): 'students_t_test_2_sample' - }, + _tr('wl_settings_global', 'European (Southeastern) (ISO-8859-16)'): 'iso8859_16', - 'effect_size': { - _tr('wl_settings_global', 'None'): 'none', - '%DIFF': 'pct_diff', - _tr('wl_settings_global', 'Cubic association ratio'): 'im3', - _tr('wl_settings_global', "Dice's coefficient"): 'dices_coeff', - _tr('wl_settings_global', 'Difference coefficient'): 'diff_coeff', - _tr('wl_settings_global', 'Jaccard index'): 'jaccard_index', - _tr('wl_settings_global', 'Log-frequency biased MD'): 'lfmd', - _tr('wl_settings_global', "Kilgarriff's ratio"): 'kilgarriffs_ratio', - 'logDice': 'log_dice', - _tr('wl_settings_global', 'Log ratio'): 'log_ratio', - 'MI.log-f': 'mi_log_f', - _tr('wl_settings_global', 'Minimum sensitivity'): 'min_sensitivity', - _tr('wl_settings_global', 'Mutual dependency'): 'md', - _tr('wl_settings_global', 'Mutual expectation'): 'me', - _tr('wl_settings_global', 'Mutual information'): 'mi', - _tr('wl_settings_global', 'Odds ratio'): 'or', - _tr('wl_settings_global', 'Pointwise mutual information'): 'pmi', - _tr('wl_settings_global', 'Poisson collocation measure'): 'poisson_collocation_measure', - _tr('wl_settings_global', 'Squared phi coefficient'): 'squared_phi_coeff' - } - }, + _tr('wl_settings_global', 'European (Western) (EBCDIC 500)'): 'cp500', + _tr('wl_settings_global', 'European (Western) (CP850)'): 'cp850', + _tr('wl_settings_global', 'European (Western) (CP858)'): 'cp858', + _tr('wl_settings_global', 'European (Western) (CP1140)'): 'cp1140', + _tr('wl_settings_global', 'European (Western) (ISO-8859-1)'): 'latin_1', + _tr('wl_settings_global', 'European (Western) (ISO-8859-15)'): 'iso8859_15', + _tr('wl_settings_global', 'European (Western) (Mac OS Roman)'): 'mac_roman', + _tr('wl_settings_global', 'European (Western) (Windows-1252)'): 'cp1252', - 'measures_dispersion': { - 'none': { - 'col_text': None, - 'func': None, - 'type': '' - }, + _tr('wl_settings_global', 'French (CP863)'): 'cp863', - 'ald': { - 'col_text': 'ALD', - 'func': wl_measures_dispersion.ald, - 'type': 'dist_based' - }, + _tr('wl_settings_global', 'German (EBCDIC 273)'): 'cp273', - 'arf': { - 'col_text': 'ARF', - 'func': wl_measures_dispersion.arf, - 'type': 'dist_based' - }, + _tr('wl_settings_global', 'Greek (CP737)'): 'cp737', + _tr('wl_settings_global', 'Greek (CP869)'): 'cp869', + _tr('wl_settings_global', 'Greek (CP875)'): 'cp875', + _tr('wl_settings_global', 'Greek (ISO-8859-7)'): 'iso8859_7', + _tr('wl_settings_global', 'Greek (Mac OS Greek)'): 'mac_greek', + _tr('wl_settings_global', 'Greek (Windows-1253)'): 'cp1253', - 'awt': { - 'col_text': 'AWT', - 'func': wl_measures_dispersion.awt, - 'type': 'dist_based' - }, + _tr('wl_settings_global', 'Hebrew (CP856)'): 'cp856', + _tr('wl_settings_global', 'Hebrew (CP862)'): 'cp862', + _tr('wl_settings_global', 'Hebrew (EBCDIC 424)'): 'cp424', + _tr('wl_settings_global', 'Hebrew (ISO-8859-8)'): 'iso8859_8', + _tr('wl_settings_global', 'Hebrew (Windows-1255)'): 'cp1255', - 'carrolls_d2': { - 'col_text': _tr('wl_settings_global', "Carroll's D₂"), - 'func': wl_measures_dispersion.carrolls_d2, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Icelandic (CP861)'): 'cp861', + _tr('wl_settings_global', 'Icelandic (Mac OS Icelandic)'): 'mac_iceland', - 'griess_dp': { - 'col_text': _tr('wl_settings_global', "Gries's DP"), - 'func': wl_measures_dispersion.griess_dp, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Japanese (CP932)'): 'cp932', + _tr('wl_settings_global', 'Japanese (EUC-JP)'): 'euc_jp', + _tr('wl_settings_global', 'Japanese (EUC-JIS-2004)'): 'euc_jis_2004', + _tr('wl_settings_global', 'Japanese (EUC-JISx0213)'): 'euc_jisx0213', + _tr('wl_settings_global', 'Japanese (ISO-2022-JP)'): 'iso2022_jp', + _tr('wl_settings_global', 'Japanese (ISO-2022-JP-1)'): 'iso2022_jp_1', + _tr('wl_settings_global', 'Japanese (ISO-2022-JP-2)'): 'iso2022_jp_2', + _tr('wl_settings_global', 'Japanese (ISO-2022-JP-2004)'): 'iso2022_jp_2004', + _tr('wl_settings_global', 'Japanese (ISO-2022-JP-3)'): 'iso2022_jp_3', + _tr('wl_settings_global', 'Japanese (ISO-2022-JP-EXT)'): 'iso2022_jp_ext', + _tr('wl_settings_global', 'Japanese (Shift_JIS)'): 'shift_jis', + _tr('wl_settings_global', 'Japanese (Shift_JIS-2004)'): 'shift_jis_2004', + _tr('wl_settings_global', 'Japanese (Shift_JISx0213)'): 'shift_jisx0213', - 'juillands_d': { - 'col_text': _tr('wl_settings_global', "Juilland's D"), - 'func': wl_measures_dispersion.juillands_d, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Kazakh (KZ-1048)'): 'kz1048', + _tr('wl_settings_global', 'Kazakh (PTCP154)'): 'ptcp154', - 'lynes_d3': { - 'col_text': _tr('wl_settings_global', "Lyne's D₃"), - 'func': wl_measures_dispersion.lynes_d3, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Korean (EUC-KR)'): 'euc_kr', + _tr('wl_settings_global', 'Korean (ISO-2022-KR)'): 'iso2022_kr', + _tr('wl_settings_global', 'Korean (JOHAB)'): 'johab', + _tr('wl_settings_global', 'Korean (UHC)'): 'cp949', - 'rosengrens_s': { - 'col_text': _tr('wl_settings_global', "Rosengren's S"), - 'func': wl_measures_dispersion.rosengrens_s, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Nordic languages (CP865)'): 'cp865', + _tr('wl_settings_global', 'Nordic languages (ISO-8859-10)'): 'iso8859_10', - 'zhangs_dc': { - 'col_text': _tr('wl_settings_global', "Zhang's DC"), - 'func': wl_measures_dispersion.zhangs_distributional_consistency, - 'type': 'parts_based' - } - }, + _tr('wl_settings_global', 'Persian/Urdu (Mac OS Farsi)'): 'mac_farsi', - 'measures_adjusted_freq': { - 'none': { - 'col_text': None, - 'func': None, - 'type': '' - }, + _tr('wl_settings_global', 'Portuguese (CP860)'): 'cp860', - 'fald': { - 'col_text': 'f-ALD', - 'func': wl_measures_adjusted_freq.fald, - 'type': 'dist_based' - }, + _tr('wl_settings_global', 'Romanian (Mac OS Romanian)'): 'mac_romanian', - 'farf': { - 'col_text': 'f-ARF', - 'func': wl_measures_adjusted_freq.farf, - 'type': 'dist_based' - }, + _tr('wl_settings_global', 'Russian (KOI8-R)'): 'koi8_r', - 'fawt': { - 'col_text': 'f-AWT', - 'func': wl_measures_adjusted_freq.fawt, - 'type': 'dist_based' - }, + _tr('wl_settings_global', 'Tajik (KOI8-T)'): 'koi8_t', - 'carrolls_um': { - 'col_text': _tr('wl_settings_global', "Carroll's Uₘ"), - 'func': wl_measures_adjusted_freq.carrolls_um, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Thai (CP874)'): 'cp874', + _tr('wl_settings_global', 'Thai (ISO-8859-11)'): 'iso8859_11', + _tr('wl_settings_global', 'Thai (TIS-620)'): 'tis_620', - 'engwalls_fm': { - 'col_text': _tr('wl_settings_global', "Engwall's FM"), - 'func': wl_measures_adjusted_freq.engwalls_fm, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Turkish (CP857)'): 'cp857', + _tr('wl_settings_global', 'Turkish (EBCDIC 1026)'): 'cp1026', + _tr('wl_settings_global', 'Turkish (ISO-8859-9)'): 'iso8859_9', + _tr('wl_settings_global', 'Turkish (Mac OS Turkish)'): 'mac_turkish', + _tr('wl_settings_global', 'Turkish (Windows-1254)'): 'cp1254', - 'juillands_u': { - 'col_text': _tr('wl_settings_global', "Juilland's U"), - 'func': wl_measures_adjusted_freq.juillands_u, - 'type': 'parts_based' - }, + _tr('wl_settings_global', 'Ukrainian (CP1125)'): 'cp1125', + _tr('wl_settings_global', 'Ukrainian (KOI8-U)'): 'koi8_u', - 'kromers_ur': { - 'col_text': _tr('wl_settings_global', "Kromer's UR"), - 'func': wl_measures_adjusted_freq.kromers_ur, - 'type': 'parts_based' - }, - - 'rosengrens_kf': { - 'col_text': _tr('wl_settings_global', "Rosengren's KF"), - 'func': wl_measures_adjusted_freq.rosengrens_kf, - 'type': 'parts_based' - } - }, - - 'tests_statistical_significance': { - 'none': { - 'col_text': None, - 'func': None, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True - }, - - 'fishers_exact_test': { - # There is no test statistic for Fisher's exact test - 'col_text': None, - 'func': wl_measures_statistical_significance.fishers_exact_test, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True - }, - - 'log_likelihood_ratio_test': { - 'col_text': _tr('wl_settings_global', 'Log-likelihood Ratio'), - 'func': wl_measures_statistical_significance.log_likelihood_ratio_test, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True - }, - - 'mann_whitney_u_test': { - 'col_text': 'U1', - 'func': wl_measures_statistical_significance.mann_whitney_u_test, - 'to_sections': True, - 'collocation_extractor': False, - 'keyword_extractor': True - }, - - 'pearsons_chi_squared_test': { - 'col_text': 'χ2', - 'func': wl_measures_statistical_significance.pearsons_chi_squared_test, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True - }, + _tr('wl_settings_global', 'Urdu (CP1006)'): 'cp1006', - 'students_t_test_1_sample': { - 'col_text': _tr('wl_settings_global', 't-statistic'), - 'func': wl_measures_statistical_significance.students_t_test_1_sample, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True + _tr('wl_settings_global', 'Vietnamese (CP1258)'): 'cp1258', }, - 'students_t_test_2_sample': { - 'col_text': _tr('wl_settings_global', 't-statistic'), - 'func': wl_measures_statistical_significance.students_t_test_2_sample, - 'to_sections': True, - 'collocation_extractor': False, - 'keyword_extractor': True + # Names of file types are always pluralized but not capitalized + 'file_types': { + 'files': [ + _tr('wl_settings_global', 'CSV files (*.csv)'), + _tr('wl_settings_global', 'Excel workbooks (*.xlsx)'), + _tr('wl_settings_global', 'HTML pages (*.htm; *.html)'), + _tr('wl_settings_global', 'Lyrics files (*.lrc)'), + _tr('wl_settings_global', 'PDF files (*.pdf)'), + _tr('wl_settings_global', 'PowerPoint presentations (*.pptx)'), + _tr('wl_settings_global', 'Text files (*.txt)'), + _tr('wl_settings_global', 'Translation memory files (*.tmx)'), + _tr('wl_settings_global', 'Word documents (*.docx)'), + _tr('wl_settings_global', 'XML files (*.xml)'), + _tr('wl_settings_global', 'All files (*.*)') + ], + + 'exp_tables': [ + _tr('wl_settings_global', 'CSV files (*.csv)'), + _tr('wl_settings_global', 'Excel workbooks (*.xlsx)') + ], + 'exp_tables_concordancer': [ + _tr('wl_settings_global', 'CSV files (*.csv)'), + _tr('wl_settings_global', 'Excel workbooks (*.xlsx)'), + _tr('wl_settings_global', 'Word documents (*.docx)') + ], + 'exp_tables_concordancer_zapping': [ + _tr('wl_settings_global', 'Word documents (*.docx)') + ], + + 'fonts': [ + _tr('wl_settings_global', 'OpenType fonts (*.otf)'), + _tr('wl_settings_global', 'TrueType fonts (*.ttf)'), + _tr('wl_settings_global', 'All files (*.*)') + ], + + # All image formats supported by Pillow + # Reference: https://stackoverflow.com/questions/71112986/retrieve-a-list-of-supported-read-file-extensions-formats + 'masks': [ + _tr('wl_settings_global', 'Blizzard mipmap format (*.blp)'), + _tr('wl_settings_global', 'Windows bitmaps (*.bmp)'), + _tr('wl_settings_global', 'Window cursor files (*.cur)'), + _tr('wl_settings_global', 'Multi-page PCX files (*.dcx)'), + _tr('wl_settings_global', 'DirectDraw surface (*.dds)'), + _tr('wl_settings_global', 'Device-independent bitmaps (*.dib)'), + _tr('wl_settings_global', 'Encapsulated PostScript (*.eps, *.ps)'), + _tr('wl_settings_global', 'Flexible image transport system (*.fit, *.fits)'), + _tr('wl_settings_global', 'Autodesk animation files (*.flc, *.fli)'), + _tr('wl_settings_global', 'Fox Engine textures (*.ftex)'), + _tr('wl_settings_global', 'GIMP brush files (*.gbr)'), + _tr('wl_settings_global', 'Graphics interchange format (*.gif)'), + _tr('wl_settings_global', 'Apple icon images (*.icns)'), + _tr('wl_settings_global', 'Windows icon files (*.ico)'), + _tr('wl_settings_global', 'IPTC/NAA newsphoto files (*.iim)'), + _tr('wl_settings_global', 'IM files (*.im)'), + _tr('wl_settings_global', 'Image Tools image files (*)'), + _tr('wl_settings_global', 'JPEG files (*.jfif, *.jpe, *.jpeg, *.jpg)'), + _tr('wl_settings_global', 'JPEG 2000 files (*.j2c, *.j2k, *.jp2, *.jpc, *.jpf, *.jpx)'), + _tr('wl_settings_global', 'McIDAS area files (*)'), + _tr('wl_settings_global', 'Microsoft Paint files (*.msp)'), + _tr('wl_settings_global', 'PhotoCD files (*.pcd)'), + _tr('wl_settings_global', 'Picture exchange (*.pcx)'), + _tr('wl_settings_global', 'PIXAR raster files (*.pxr)'), + _tr('wl_settings_global', 'Portable network graphics (*.apng, *.png)'), + _tr('wl_settings_global', 'Portable pixmap format (*.pbm, *.pgm, *.pnm, *.ppm)'), + _tr('wl_settings_global', 'Photoshop PSD files (*.psd)'), + _tr('wl_settings_global', 'Sun raster files (*.ras)'), + _tr('wl_settings_global', 'Silicon graphics images (*.bw, *.rgb, *.rgba, *.sgi)'), + _tr('wl_settings_global', 'SPIDER files (*)'), + _tr('wl_settings_global', 'Truevision TGA (*.icb, *.tga, *.vda, *.vst)'), + _tr('wl_settings_global', 'TIFF files (*.tif, *.tiff)'), + _tr('wl_settings_global', 'WebP files (*.webp)'), + _tr('wl_settings_global', 'Windows metafiles (*.emf, *.wmf)'), + _tr('wl_settings_global', 'X bitmaps (*.xbm)'), + _tr('wl_settings_global', 'X pixmaps (*.xpm)'), + _tr('wl_settings_global', 'XV thumbnails (*)'), + _tr('wl_settings_global', 'All files (*.*)') + ], }, - 'z_score': { - 'col_text': _tr('wl_settings_global', 'z-score'), - 'func': wl_measures_statistical_significance.z_score, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True + # Only language names and proper nouns are capitalized in names of language utilities + 'mapping_lang_utils': { + 'sentence_tokenizers': { + _tr('wl_settings_global', 'botok - Tibetan sentence tokenizer'): 'botok_bod', + _tr('wl_settings_global', 'khmer-nltk - Khmer sentence tokenizer'): 'khmer_nltk_khm', + _tr('wl_settings_global', 'LaoNLP - Lao sentence tokenizer'): 'laonlp_lao', + + _tr('wl_settings_global', 'NLTK - Czech Punkt sentence tokenizer'): 'nltk_punkt_ces', + _tr('wl_settings_global', 'NLTK - Danish Punkt sentence tokenizer'): 'nltk_punkt_dan', + _tr('wl_settings_global', 'NLTK - Dutch Punkt sentence tokenizer'): 'nltk_punkt_nld', + _tr('wl_settings_global', 'NLTK - English Punkt sentence tokenizer'): 'nltk_punkt_eng', + _tr('wl_settings_global', 'NLTK - Estonian Punkt sentence tokenizer'): 'nltk_punkt_est', + _tr('wl_settings_global', 'NLTK - Finnish Punkt sentence tokenizer'): 'nltk_punkt_fin', + _tr('wl_settings_global', 'NLTK - French Punkt sentence tokenizer'): 'nltk_punkt_fra', + _tr('wl_settings_global', 'NLTK - German Punkt sentence tokenizer'): 'nltk_punkt_deu', + _tr('wl_settings_global', 'NLTK - Greek Punkt sentence tokenizer'): 'nltk_punkt_ell', + _tr('wl_settings_global', 'NLTK - Italian Punkt sentence tokenizer'): 'nltk_punkt_ita', + _tr('wl_settings_global', 'NLTK - Malayalam Punkt sentence tokenizer'): 'nltk_punkt_mal', + _tr('wl_settings_global', 'NLTK - Norwegian (Bokmål) Punkt sentence tokenizer'): 'nltk_punkt_nob', + _tr('wl_settings_global', 'NLTK - Polish Punkt sentence tokenizer'): 'nltk_punkt_pol', + _tr('wl_settings_global', 'NLTK - Portuguese Punkt sentence tokenizer'): 'nltk_punkt_por', + _tr('wl_settings_global', 'NLTK - Russian Punkt sentence tokenizer'): 'nltk_punkt_rus', + _tr('wl_settings_global', 'NLTK - Slovene Punkt sentence tokenizer'): 'nltk_punkt_slv', + _tr('wl_settings_global', 'NLTK - Spanish Punkt sentence tokenizer'): 'nltk_punkt_spa', + _tr('wl_settings_global', 'NLTK - Swedish Punkt sentence tokenizer'): 'nltk_punkt_swe', + _tr('wl_settings_global', 'NLTK - Turkish Punkt sentence tokenizer'): 'nltk_punkt_tur', + + 'PyThaiNLP - CRFCut': 'pythainlp_crfcut', + 'PyThaiNLP - ThaiSumCut': 'pythainlp_thaisumcut', + + _tr('wl_settings_global', 'spaCy - Catalan dependency parser'): 'spacy_dependency_parser_cat', + _tr('wl_settings_global', 'spaCy - Chinese dependency parser'): 'spacy_dependency_parser_zho', + _tr('wl_settings_global', 'spaCy - Croatian dependency parser'): 'spacy_dependency_parser_hrv', + _tr('wl_settings_global', 'spaCy - Danish dependency parser'): 'spacy_dependency_parser_dan', + _tr('wl_settings_global', 'spaCy - Dutch dependency parser'): 'spacy_dependency_parser_nld', + _tr('wl_settings_global', 'spaCy - English dependency parser'): 'spacy_dependency_parser_eng', + _tr('wl_settings_global', 'spaCy - Finnish dependency parser'): 'spacy_dependency_parser_fin', + _tr('wl_settings_global', 'spaCy - French dependency parser'): 'spacy_dependency_parser_fra', + _tr('wl_settings_global', 'spaCy - German dependency parser'): 'spacy_dependency_parser_deu', + _tr('wl_settings_global', 'spaCy - Greek (Modern) dependency parser'): 'spacy_dependency_parser_ell', + _tr('wl_settings_global', 'spaCy - Italian dependency parser'): 'spacy_dependency_parser_ita', + _tr('wl_settings_global', 'spaCy - Japanese dependency parser'): 'spacy_dependency_parser_jpn', + _tr('wl_settings_global', 'spaCy - Korean dependency parser'): 'spacy_dependency_parser_kor', + _tr('wl_settings_global', 'spaCy - Lithuanian dependency parser'): 'spacy_dependency_parser_lit', + _tr('wl_settings_global', 'spaCy - Macedonian dependency parser'): 'spacy_dependency_parser_mkd', + _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) dependency parser'): 'spacy_dependency_parser_nob', + _tr('wl_settings_global', 'spaCy - Polish dependency parser'): 'spacy_dependency_parser_pol', + _tr('wl_settings_global', 'spaCy - Portuguese dependency parser'): 'spacy_dependency_parser_por', + _tr('wl_settings_global', 'spaCy - Romanian dependency parser'): 'spacy_dependency_parser_ron', + _tr('wl_settings_global', 'spaCy - Russian dependency parser'): 'spacy_dependency_parser_rus', + _tr('wl_settings_global', 'spaCy - Slovene dependency parser'): 'spacy_dependency_parser_slv', + _tr('wl_settings_global', 'spaCy - Spanish dependency parser'): 'spacy_dependency_parser_spa', + _tr('wl_settings_global', 'spaCy - Swedish dependency parser'): 'spacy_dependency_parser_swe', + _tr('wl_settings_global', 'spaCy - Ukrainian dependency parser'): 'spacy_dependency_parser_ukr', + + _tr('wl_settings_global', 'spaCy - Croatian sentence recognizer'): 'spacy_sentence_recognizer_hrv', + _tr('wl_settings_global', 'spaCy - Dutch sentence recognizer'): 'spacy_sentence_recognizer_nld', + _tr('wl_settings_global', 'spaCy - Finnish sentence recognizer'): 'spacy_sentence_recognizer_fin', + _tr('wl_settings_global', 'spaCy - Greek (Modern) sentence recognizer'): 'spacy_sentence_recognizer_ell', + _tr('wl_settings_global', 'spaCy - Italian sentence recognizer'): 'spacy_sentence_recognizer_ita', + _tr('wl_settings_global', 'spaCy - Korean sentence recognizer'): 'spacy_sentence_recognizer_kor', + _tr('wl_settings_global', 'spaCy - Lithuanian sentence recognizer'): 'spacy_sentence_recognizer_lit', + _tr('wl_settings_global', 'spaCy - Macedonian sentence recognizer'): 'spacy_sentence_recognizer_mkd', + _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) sentence recognizer'): 'spacy_sentence_recognizer_nob', + _tr('wl_settings_global', 'spaCy - Polish sentence recognizer'): 'spacy_sentence_recognizer_pol', + _tr('wl_settings_global', 'spaCy - Portuguese sentence recognizer'): 'spacy_sentence_recognizer_por', + _tr('wl_settings_global', 'spaCy - Romanian sentence recognizer'): 'spacy_sentence_recognizer_ron', + _tr('wl_settings_global', 'spaCy - Russian sentence recognizer'): 'spacy_sentence_recognizer_rus', + _tr('wl_settings_global', 'spaCy - Swedish sentence recognizer'): 'spacy_sentence_recognizer_swe', + + _tr('wl_settings_global', 'spaCy - Sentencizer'): 'spacy_sentencizer', + + _tr('wl_settings_global', 'Stanza - Afrikaans sentence tokenizer'): 'stanza_afr', + _tr('wl_settings_global', 'Stanza - Arabic sentence tokenizer'): 'stanza_ara', + _tr('wl_settings_global', 'Stanza - Armenian (Classical) sentence tokenizer'): 'stanza_xcl', + _tr('wl_settings_global', 'Stanza - Armenian (Eastern) sentence tokenizer'): 'stanza_hye', + _tr('wl_settings_global', 'Stanza - Armenian (Western) sentence tokenizer'): 'stanza_hyw', + _tr('wl_settings_global', 'Stanza - Basque sentence tokenizer'): 'stanza_eus', + _tr('wl_settings_global', 'Stanza - Belarusian sentence tokenizer'): 'stanza_bel', + _tr('wl_settings_global', 'Stanza - Bulgarian sentence tokenizer'): 'stanza_bul', + _tr('wl_settings_global', 'Stanza - Burmese sentence tokenizer'): 'stanza_mya', + _tr('wl_settings_global', 'Stanza - Buryat (Russia) sentence tokenizer'): 'stanza_bxr', + _tr('wl_settings_global', 'Stanza - Catalan sentence tokenizer'): 'stanza_cat', + _tr('wl_settings_global', 'Stanza - Chinese (Classical) sentence tokenizer'): 'stanza_lzh', + _tr('wl_settings_global', 'Stanza - Chinese (Simplified) sentence tokenizer'): 'stanza_zho_cn', + _tr('wl_settings_global', 'Stanza - Chinese (Traditional) sentence tokenizer'): 'stanza_zho_tw', + _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) sentence tokenizer'): 'stanza_chu', + _tr('wl_settings_global', 'Stanza - Coptic sentence tokenizer'): 'stanza_cop', + _tr('wl_settings_global', 'Stanza - Croatian sentence tokenizer'): 'stanza_hrv', + _tr('wl_settings_global', 'Stanza - Czech sentence tokenizer'): 'stanza_ces', + _tr('wl_settings_global', 'Stanza - Danish sentence tokenizer'): 'stanza_dan', + _tr('wl_settings_global', 'Stanza - Dutch sentence tokenizer'): 'stanza_nld', + _tr('wl_settings_global', 'Stanza - English sentence tokenizer'): 'stanza_eng', + _tr('wl_settings_global', 'Stanza - English (Old) sentence tokenizer'): 'stanza_ang', + _tr('wl_settings_global', 'Stanza - Erzya sentence tokenizer'): 'stanza_myv', + _tr('wl_settings_global', 'Stanza - Estonian sentence tokenizer'): 'stanza_est', + _tr('wl_settings_global', 'Stanza - Faroese sentence tokenizer'): 'stanza_fao', + _tr('wl_settings_global', 'Stanza - Finnish sentence tokenizer'): 'stanza_fin', + _tr('wl_settings_global', 'Stanza - French sentence tokenizer'): 'stanza_fra', + _tr('wl_settings_global', 'Stanza - French (Old) sentence tokenizer'): 'stanza_fro', + _tr('wl_settings_global', 'Stanza - Galician sentence tokenizer'): 'stanza_glg', + _tr('wl_settings_global', 'Stanza - German sentence tokenizer'): 'stanza_deu', + _tr('wl_settings_global', 'Stanza - Gothic sentence tokenizer'): 'stanza_got', + _tr('wl_settings_global', 'Stanza - Greek (Ancient) sentence tokenizer'): 'stanza_grc', + _tr('wl_settings_global', 'Stanza - Greek (Modern) sentence tokenizer'): 'stanza_ell', + _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) sentence tokenizer'): 'stanza_hbo', + _tr('wl_settings_global', 'Stanza - Hebrew (Modern) sentence tokenizer'): 'stanza_heb', + _tr('wl_settings_global', 'Stanza - Hindi sentence tokenizer'): 'stanza_hin', + _tr('wl_settings_global', 'Stanza - Hungarian sentence tokenizer'): 'stanza_hun', + _tr('wl_settings_global', 'Stanza - Icelandic sentence tokenizer'): 'stanza_isl', + _tr('wl_settings_global', 'Stanza - Indonesian sentence tokenizer'): 'stanza_ind', + _tr('wl_settings_global', 'Stanza - Irish sentence tokenizer'): 'stanza_gle', + _tr('wl_settings_global', 'Stanza - Italian sentence tokenizer'): 'stanza_ita', + _tr('wl_settings_global', 'Stanza - Japanese sentence tokenizer'): 'stanza_jpn', + _tr('wl_settings_global', 'Stanza - Kazakh sentence tokenizer'): 'stanza_kaz', + _tr('wl_settings_global', 'Stanza - Korean sentence tokenizer'): 'stanza_kor', + _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) sentence tokenizer'): 'stanza_kmr', + _tr('wl_settings_global', 'Stanza - Kyrgyz sentence tokenizer'): 'stanza_kir', + _tr('wl_settings_global', 'Stanza - Latin sentence tokenizer'): 'stanza_lat', + _tr('wl_settings_global', 'Stanza - Latvian sentence tokenizer'): 'stanza_lav', + _tr('wl_settings_global', 'Stanza - Ligurian sentence tokenizer'): 'stanza_lij', + _tr('wl_settings_global', 'Stanza - Lithuanian sentence tokenizer'): 'stanza_lit', + _tr('wl_settings_global', 'Stanza - Maltese sentence tokenizer'): 'stanza_mlt', + _tr('wl_settings_global', 'Stanza - Manx sentence tokenizer'): 'stanza_glv', + _tr('wl_settings_global', 'Stanza - Marathi sentence tokenizer'): 'stanza_mar', + _tr('wl_settings_global', 'Stanza - Nigerian Pidgin sentence tokenizer'): 'stanza_pcm', + _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) sentence tokenizer'): 'stanza_nob', + _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) sentence tokenizer'): 'stanza_nno', + _tr('wl_settings_global', 'Stanza - Persian sentence tokenizer'): 'stanza_fas', + _tr('wl_settings_global', 'Stanza - Polish sentence tokenizer'): 'stanza_pol', + _tr('wl_settings_global', 'Stanza - Pomak sentence tokenizer'): 'stanza_qpm', + _tr('wl_settings_global', 'Stanza - Portuguese sentence tokenizer'): 'stanza_por', + _tr('wl_settings_global', 'Stanza - Romanian sentence tokenizer'): 'stanza_ron', + _tr('wl_settings_global', 'Stanza - Russian sentence tokenizer'): 'stanza_rus', + _tr('wl_settings_global', 'Stanza - Russian (Old) sentence tokenizer'): 'stanza_orv', + _tr('wl_settings_global', 'Stanza - Sámi (Northern) sentence tokenizer'): 'stanza_sme', + _tr('wl_settings_global', 'Stanza - Sanskrit sentence tokenizer'): 'stanza_san', + _tr('wl_settings_global', 'Stanza - Scottish Gaelic sentence tokenizer'): 'stanza_gla', + _tr('wl_settings_global', 'Stanza - Serbian (Latin script) sentence tokenizer'): 'stanza_srp_latn', + _tr('wl_settings_global', 'Stanza - Sindhi sentence tokenizer'): 'stanza_snd', + _tr('wl_settings_global', 'Stanza - Slovak sentence tokenizer'): 'stanza_slk', + _tr('wl_settings_global', 'Stanza - Slovene sentence tokenizer'): 'stanza_slv', + _tr('wl_settings_global', 'Stanza - Sorbian (Upper) sentence tokenizer'): 'stanza_hsb', + _tr('wl_settings_global', 'Stanza - Spanish sentence tokenizer'): 'stanza_spa', + _tr('wl_settings_global', 'Stanza - Swedish sentence tokenizer'): 'stanza_swe', + _tr('wl_settings_global', 'Stanza - Tamil sentence tokenizer'): 'stanza_tam', + _tr('wl_settings_global', 'Stanza - Telugu sentence tokenizer'): 'stanza_tel', + _tr('wl_settings_global', 'Stanza - Thai sentence tokenizer'): 'stanza_tha', + _tr('wl_settings_global', 'Stanza - Turkish sentence tokenizer'): 'stanza_tur', + _tr('wl_settings_global', 'Stanza - Ukrainian sentence tokenizer'): 'stanza_ukr', + _tr('wl_settings_global', 'Stanza - Urdu sentence tokenizer'): 'stanza_urd', + _tr('wl_settings_global', 'Stanza - Uyghur sentence tokenizer'): 'stanza_uig', + _tr('wl_settings_global', 'Stanza - Vietnamese sentence tokenizer'): 'stanza_vie', + _tr('wl_settings_global', 'Stanza - Welsh sentence tokenizer'): 'stanza_cym', + _tr('wl_settings_global', 'Stanza - Wolof sentence tokenizer'): 'stanza_wol', + + _tr('wl_settings_global', 'Underthesea - Vietnamese sentence tokenizer'): 'underthesea_vie' + }, + + 'word_tokenizers': { + _tr('wl_settings_global', 'botok - Tibetan word tokenizer'): 'botok_bod', + _tr('wl_settings_global', 'khmer-nltk - Khmer word tokenizer'): 'khmer_nltk_khm', + _tr('wl_settings_global', 'LaoNLP - Lao word tokenizer'): 'laonlp_lao', + + _tr('wl_settings_global', 'NLTK - NIST tokenizer'): 'nltk_nist', + _tr('wl_settings_global', 'NLTK - NLTK tokenizer'): 'nltk_nltk', + _tr('wl_settings_global', 'NLTK - Penn Treebank tokenizer'): 'nltk_penn_treebank', + _tr('wl_settings_global', 'NLTK - Regular-expression tokenizer'): 'nltk_regex', + _tr('wl_settings_global', 'NLTK - Tok-tok tokenizer'): 'nltk_tok_tok', + _tr('wl_settings_global', 'NLTK - Twitter tokenizer'): 'nltk_twitter', + + _tr('wl_settings_global', 'pkuseg - Chinese word tokenizer'): 'pkuseg_zho', + + _tr('wl_settings_global', 'PyThaiNLP - Longest matching'): 'pythainlp_longest_matching', + _tr('wl_settings_global', 'PyThaiNLP - Maximum matching'): 'pythainlp_max_matching', + _tr('wl_settings_global', 'PyThaiNLP - Maximum matching + TCC'): 'pythainlp_max_matching_tcc', + 'PyThaiNLP - NERCut': 'pythainlp_nercut', + + 'python-mecab-ko - MeCab': 'python_mecab_ko_mecab', + _tr('wl_settings_global', 'Sacremoses - Moses tokenizer'): 'sacremoses_moses', + + _tr('wl_settings_global', 'spaCy - Afrikaans word tokenizer'): 'spacy_afr', + _tr('wl_settings_global', 'spaCy - Albanian word tokenizer'): 'spacy_sqi', + _tr('wl_settings_global', 'spaCy - Amharic word tokenizer'): 'spacy_amh', + _tr('wl_settings_global', 'spaCy - Arabic word tokenizer'): 'spacy_ara', + _tr('wl_settings_global', 'spaCy - Armenian word tokenizer'): 'spacy_hye', + _tr('wl_settings_global', 'spaCy - Azerbaijani word tokenizer'): 'spacy_aze', + _tr('wl_settings_global', 'spaCy - Basque word tokenizer'): 'spacy_eus', + _tr('wl_settings_global', 'spaCy - Bengali word tokenizer'): 'spacy_ben', + _tr('wl_settings_global', 'spaCy - Bulgarian word tokenizer'): 'spacy_bul', + _tr('wl_settings_global', 'spaCy - Catalan word tokenizer'): 'spacy_cat', + _tr('wl_settings_global', 'spaCy - Chinese word tokenizer'): 'spacy_zho', + _tr('wl_settings_global', 'spaCy - Croatian word tokenizer'): 'spacy_hrv', + _tr('wl_settings_global', 'spaCy - Czech word tokenizer'): 'spacy_ces', + _tr('wl_settings_global', 'spaCy - Danish word tokenizer'): 'spacy_dan', + _tr('wl_settings_global', 'spaCy - Dutch word tokenizer'): 'spacy_nld', + _tr('wl_settings_global', 'spaCy - English word tokenizer'): 'spacy_eng', + _tr('wl_settings_global', 'spaCy - Estonian word tokenizer'): 'spacy_est', + _tr('wl_settings_global', 'spaCy - Faroese word tokenizer'): 'spacy_fao', + _tr('wl_settings_global', 'spaCy - Finnish word tokenizer'): 'spacy_fin', + _tr('wl_settings_global', 'spaCy - French word tokenizer'): 'spacy_fra', + _tr('wl_settings_global', 'spaCy - German word tokenizer'): 'spacy_deu', + _tr('wl_settings_global', 'spaCy - Greek (Ancient) word tokenizer'): 'spacy_grc', + _tr('wl_settings_global', 'spaCy - Greek (Modern) word tokenizer'): 'spacy_ell', + _tr('wl_settings_global', 'spaCy - Gujarati word tokenizer'): 'spacy_guj', + _tr('wl_settings_global', 'spaCy - Hebrew (Modern) word tokenizer'): 'spacy_heb', + _tr('wl_settings_global', 'spaCy - Hindi word tokenizer'): 'spacy_hin', + _tr('wl_settings_global', 'spaCy - Hungarian word tokenizer'): 'spacy_hun', + _tr('wl_settings_global', 'spaCy - Icelandic word tokenizer'): 'spacy_isl', + _tr('wl_settings_global', 'spaCy - Indonesian word tokenizer'): 'spacy_ind', + _tr('wl_settings_global', 'spaCy - Irish word tokenizer'): 'spacy_gle', + _tr('wl_settings_global', 'spaCy - Italian word tokenizer'): 'spacy_ita', + _tr('wl_settings_global', 'spaCy - Japanese word tokenizer'): 'spacy_jpn', + _tr('wl_settings_global', 'spaCy - Kannada word tokenizer'): 'spacy_kan', + _tr('wl_settings_global', 'spaCy - Korean word tokenizer'): 'spacy_kor', + _tr('wl_settings_global', 'spaCy - Kyrgyz word tokenizer'): 'spacy_kir', + _tr('wl_settings_global', 'spaCy - Latin word tokenizer'): 'spacy_lat', + _tr('wl_settings_global', 'spaCy - Latvian word tokenizer'): 'spacy_lav', + _tr('wl_settings_global', 'spaCy - Ligurian word tokenizer'): 'spacy_lij', + _tr('wl_settings_global', 'spaCy - Lithuanian word tokenizer'): 'spacy_lit', + _tr('wl_settings_global', 'spaCy - Luganda word tokenizer'): 'spacy_lug', + _tr('wl_settings_global', 'spaCy - Luxembourgish word tokenizer'): 'spacy_ltz', + _tr('wl_settings_global', 'spaCy - Macedonian word tokenizer'): 'spacy_mkd', + _tr('wl_settings_global', 'spaCy - Malay word tokenizer'): 'spacy_msa', + _tr('wl_settings_global', 'spaCy - Malayalam word tokenizer'): 'spacy_mal', + _tr('wl_settings_global', 'spaCy - Marathi word tokenizer'): 'spacy_mar', + _tr('wl_settings_global', 'spaCy - Nepali word tokenizer'): 'spacy_nep', + _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) word tokenizer'): 'spacy_nob', + _tr('wl_settings_global', 'spaCy - Norwegian (Nynorsk) word tokenizer'): 'spacy_nno', + _tr('wl_settings_global', 'spaCy - Persian word tokenizer'): 'spacy_fas', + _tr('wl_settings_global', 'spaCy - Polish word tokenizer'): 'spacy_pol', + _tr('wl_settings_global', 'spaCy - Portuguese word tokenizer'): 'spacy_por', + _tr('wl_settings_global', 'spaCy - Romanian word tokenizer'): 'spacy_ron', + _tr('wl_settings_global', 'spaCy - Russian word tokenizer'): 'spacy_rus', + _tr('wl_settings_global', 'spaCy - Sanskrit word tokenizer'): 'spacy_san', + _tr('wl_settings_global', 'spaCy - Serbian (Cyrillic script) word tokenizer'): 'spacy_srp', + _tr('wl_settings_global', 'spaCy - Sinhala word tokenizer'): 'spacy_sin', + _tr('wl_settings_global', 'spaCy - Slovak word tokenizer'): 'spacy_slk', + _tr('wl_settings_global', 'spaCy - Slovene word tokenizer'): 'spacy_slv', + _tr('wl_settings_global', 'spaCy - Sorbian (Lower) word tokenizer'): 'spacy_dsb', + _tr('wl_settings_global', 'spaCy - Sorbian (Upper) word tokenizer'): 'spacy_hsb', + _tr('wl_settings_global', 'spaCy - Spanish word tokenizer'): 'spacy_spa', + _tr('wl_settings_global', 'spaCy - Swedish word tokenizer'): 'spacy_swe', + _tr('wl_settings_global', 'spaCy - Tagalog word tokenizer'): 'spacy_tgl', + _tr('wl_settings_global', 'spaCy - Tamil word tokenizer'): 'spacy_tam', + _tr('wl_settings_global', 'spaCy - Tatar word tokenizer'): 'spacy_tat', + _tr('wl_settings_global', 'spaCy - Telugu word tokenizer'): 'spacy_tel', + _tr('wl_settings_global', 'spaCy - Tigrinya word tokenizer'): 'spacy_tir', + _tr('wl_settings_global', 'spaCy - Tswana word tokenizer'): 'spacy_tsn', + _tr('wl_settings_global', 'spaCy - Turkish word tokenizer'): 'spacy_tur', + _tr('wl_settings_global', 'spaCy - Ukrainian word tokenizer'): 'spacy_ukr', + _tr('wl_settings_global', 'spaCy - Urdu word tokenizer'): 'spacy_urd', + _tr('wl_settings_global', 'spaCy - Yoruba word tokenizer'): 'spacy_yor', + + _tr('wl_settings_global', 'Stanza - Afrikaans word tokenizer'): 'stanza_afr', + _tr('wl_settings_global', 'Stanza - Arabic word tokenizer'): 'stanza_ara', + _tr('wl_settings_global', 'Stanza - Armenian (Classical) word tokenizer'): 'stanza_xcl', + _tr('wl_settings_global', 'Stanza - Armenian (Eastern) word tokenizer'): 'stanza_hye', + _tr('wl_settings_global', 'Stanza - Armenian (Western) word tokenizer'): 'stanza_hyw', + _tr('wl_settings_global', 'Stanza - Basque word tokenizer'): 'stanza_eus', + _tr('wl_settings_global', 'Stanza - Belarusian word tokenizer'): 'stanza_bel', + _tr('wl_settings_global', 'Stanza - Bulgarian word tokenizer'): 'stanza_bul', + _tr('wl_settings_global', 'Stanza - Burmese word tokenizer'): 'stanza_mya', + _tr('wl_settings_global', 'Stanza - Buryat (Russia) word tokenizer'): 'stanza_bxr', + _tr('wl_settings_global', 'Stanza - Catalan word tokenizer'): 'stanza_cat', + _tr('wl_settings_global', 'Stanza - Chinese (Classical) word tokenizer'): 'stanza_lzh', + _tr('wl_settings_global', 'Stanza - Chinese (Simplified) word tokenizer'): 'stanza_zho_cn', + _tr('wl_settings_global', 'Stanza - Chinese (Traditional) word tokenizer'): 'stanza_zho_tw', + _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) word tokenizer'): 'stanza_chu', + _tr('wl_settings_global', 'Stanza - Coptic word tokenizer'): 'stanza_cop', + _tr('wl_settings_global', 'Stanza - Croatian word tokenizer'): 'stanza_hrv', + _tr('wl_settings_global', 'Stanza - Czech word tokenizer'): 'stanza_ces', + _tr('wl_settings_global', 'Stanza - Danish word tokenizer'): 'stanza_dan', + _tr('wl_settings_global', 'Stanza - Dutch word tokenizer'): 'stanza_nld', + _tr('wl_settings_global', 'Stanza - English word tokenizer'): 'stanza_eng', + _tr('wl_settings_global', 'Stanza - English (Old) word tokenizer'): 'stanza_ang', + _tr('wl_settings_global', 'Stanza - Erzya word tokenizer'): 'stanza_myv', + _tr('wl_settings_global', 'Stanza - Estonian word tokenizer'): 'stanza_est', + _tr('wl_settings_global', 'Stanza - Faroese word tokenizer'): 'stanza_fao', + _tr('wl_settings_global', 'Stanza - Finnish word tokenizer'): 'stanza_fin', + _tr('wl_settings_global', 'Stanza - French word tokenizer'): 'stanza_fra', + _tr('wl_settings_global', 'Stanza - French (Old) word tokenizer'): 'stanza_fro', + _tr('wl_settings_global', 'Stanza - Galician word tokenizer'): 'stanza_glg', + _tr('wl_settings_global', 'Stanza - German word tokenizer'): 'stanza_deu', + _tr('wl_settings_global', 'Stanza - Gothic word tokenizer'): 'stanza_got', + _tr('wl_settings_global', 'Stanza - Greek (Ancient) word tokenizer'): 'stanza_grc', + _tr('wl_settings_global', 'Stanza - Greek (Modern) word tokenizer'): 'stanza_ell', + _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) word tokenizer'): 'stanza_hbo', + _tr('wl_settings_global', 'Stanza - Hebrew (Modern) word tokenizer'): 'stanza_heb', + _tr('wl_settings_global', 'Stanza - Hindi word tokenizer'): 'stanza_hin', + _tr('wl_settings_global', 'Stanza - Hungarian word tokenizer'): 'stanza_hun', + _tr('wl_settings_global', 'Stanza - Icelandic word tokenizer'): 'stanza_isl', + _tr('wl_settings_global', 'Stanza - Indonesian word tokenizer'): 'stanza_ind', + _tr('wl_settings_global', 'Stanza - Irish word tokenizer'): 'stanza_gle', + _tr('wl_settings_global', 'Stanza - Italian word tokenizer'): 'stanza_ita', + _tr('wl_settings_global', 'Stanza - Japanese word tokenizer'): 'stanza_jpn', + _tr('wl_settings_global', 'Stanza - Kazakh word tokenizer'): 'stanza_kaz', + _tr('wl_settings_global', 'Stanza - Korean word tokenizer'): 'stanza_kor', + _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) word tokenizer'): 'stanza_kmr', + _tr('wl_settings_global', 'Stanza - Kyrgyz word tokenizer'): 'stanza_kir', + _tr('wl_settings_global', 'Stanza - Latin word tokenizer'): 'stanza_lat', + _tr('wl_settings_global', 'Stanza - Latvian word tokenizer'): 'stanza_lav', + _tr('wl_settings_global', 'Stanza - Ligurian word tokenizer'): 'stanza_lij', + _tr('wl_settings_global', 'Stanza - Lithuanian word tokenizer'): 'stanza_lit', + _tr('wl_settings_global', 'Stanza - Maltese word tokenizer'): 'stanza_mlt', + _tr('wl_settings_global', 'Stanza - Manx word tokenizer'): 'stanza_glv', + _tr('wl_settings_global', 'Stanza - Marathi word tokenizer'): 'stanza_mar', + _tr('wl_settings_global', 'Stanza - Nigerian Pidgin word tokenizer'): 'stanza_pcm', + _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) word tokenizer'): 'stanza_nob', + _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) word tokenizer'): 'stanza_nno', + _tr('wl_settings_global', 'Stanza - Persian word tokenizer'): 'stanza_fas', + _tr('wl_settings_global', 'Stanza - Polish word tokenizer'): 'stanza_pol', + _tr('wl_settings_global', 'Stanza - Pomak word tokenizer'): 'stanza_qpm', + _tr('wl_settings_global', 'Stanza - Portuguese word tokenizer'): 'stanza_por', + _tr('wl_settings_global', 'Stanza - Romanian word tokenizer'): 'stanza_ron', + _tr('wl_settings_global', 'Stanza - Russian word tokenizer'): 'stanza_rus', + _tr('wl_settings_global', 'Stanza - Russian (Old) word tokenizer'): 'stanza_orv', + _tr('wl_settings_global', 'Stanza - Sámi (Northern) word tokenizer'): 'stanza_sme', + _tr('wl_settings_global', 'Stanza - Sanskrit word tokenizer'): 'stanza_san', + _tr('wl_settings_global', 'Stanza - Scottish Gaelic word tokenizer'): 'stanza_gla', + _tr('wl_settings_global', 'Stanza - Serbian (Latin script) word tokenizer'): 'stanza_srp_latn', + _tr('wl_settings_global', 'Stanza - Sindhi word tokenizer'): 'stanza_snd', + _tr('wl_settings_global', 'Stanza - Slovak word tokenizer'): 'stanza_slk', + _tr('wl_settings_global', 'Stanza - Slovene word tokenizer'): 'stanza_slv', + _tr('wl_settings_global', 'Stanza - Sorbian (Upper) word tokenizer'): 'stanza_hsb', + _tr('wl_settings_global', 'Stanza - Spanish word tokenizer'): 'stanza_spa', + _tr('wl_settings_global', 'Stanza - Swedish word tokenizer'): 'stanza_swe', + _tr('wl_settings_global', 'Stanza - Tamil word tokenizer'): 'stanza_tam', + _tr('wl_settings_global', 'Stanza - Telugu word tokenizer'): 'stanza_tel', + _tr('wl_settings_global', 'Stanza - Thai word tokenizer'): 'stanza_tha', + _tr('wl_settings_global', 'Stanza - Turkish word tokenizer'): 'stanza_tur', + _tr('wl_settings_global', 'Stanza - Ukrainian word tokenizer'): 'stanza_ukr', + _tr('wl_settings_global', 'Stanza - Urdu word tokenizer'): 'stanza_urd', + _tr('wl_settings_global', 'Stanza - Uyghur word tokenizer'): 'stanza_uig', + _tr('wl_settings_global', 'Stanza - Vietnamese word tokenizer'): 'stanza_vie', + _tr('wl_settings_global', 'Stanza - Welsh word tokenizer'): 'stanza_cym', + _tr('wl_settings_global', 'Stanza - Wolof word tokenizer'): 'stanza_wol', + + _tr('wl_settings_global', 'SudachiPy - Japanese word tokenizer (split mode A)'): 'sudachipy_jpn_split_mode_a', + _tr('wl_settings_global', 'SudachiPy - Japanese word tokenizer (split mode B)'): 'sudachipy_jpn_split_mode_b', + _tr('wl_settings_global', 'SudachiPy - Japanese word tokenizer (split mode C)'): 'sudachipy_jpn_split_mode_c', + + _tr('wl_settings_global', 'Underthesea - Vietnamese word tokenizer'): 'underthesea_vie', + + _tr('wl_settings_global', 'Wordless - Chinese character tokenizer'): 'wordless_zho_char', + _tr('wl_settings_global', 'Wordless - Japanese kanji tokenizer'): 'wordless_jpn_kanji' + }, + + 'syl_tokenizers': { + _tr('wl_settings_global', 'NLTK - Legality syllable tokenizer'): 'nltk_legality', + _tr('wl_settings_global', 'NLTK - Sonority sequencing syllable tokenizer'): 'nltk_sonority_sequencing', + + _tr('wl_settings_global', 'Pyphen - Afrikaans syllable tokenizer'): 'pyphen_afr', + _tr('wl_settings_global', 'Pyphen - Albanian syllable tokenizer'): 'pyphen_sqi', + _tr('wl_settings_global', 'Pyphen - Basque syllable tokenizer'): 'pyphen_eus', + _tr('wl_settings_global', 'Pyphen - Belarusian syllable tokenizer'): 'pyphen_bel', + _tr('wl_settings_global', 'Pyphen - Bulgarian syllable tokenizer'): 'pyphen_bul', + _tr('wl_settings_global', 'Pyphen - Catalan syllable tokenizer'): 'pyphen_cat', + _tr('wl_settings_global', 'Pyphen - Croatian syllable tokenizer'): 'pyphen_hrv', + _tr('wl_settings_global', 'Pyphen - Czech syllable tokenizer'): 'pyphen_ces', + _tr('wl_settings_global', 'Pyphen - Danish syllable tokenizer'): 'pyphen_dan', + _tr('wl_settings_global', 'Pyphen - Dutch syllable tokenizer'): 'pyphen_nld', + _tr('wl_settings_global', 'Pyphen - English (United Kingdom) syllable tokenizer'): 'pyphen_eng_gb', + _tr('wl_settings_global', 'Pyphen - English (United States) syllable tokenizer'): 'pyphen_eng_us', + _tr('wl_settings_global', 'Pyphen - Esperanto syllable tokenizer'): 'pyphen_epo', + _tr('wl_settings_global', 'Pyphen - Estonian syllable tokenizer'): 'pyphen_est', + _tr('wl_settings_global', 'Pyphen - French syllable tokenizer'): 'pyphen_fra', + _tr('wl_settings_global', 'Pyphen - Galician syllable tokenizer'): 'pyphen_glg', + _tr('wl_settings_global', 'Pyphen - German (Austria) syllable tokenizer'): 'pyphen_deu_at', + _tr('wl_settings_global', 'Pyphen - German (Germany) syllable tokenizer'): 'pyphen_deu_de', + _tr('wl_settings_global', 'Pyphen - German (Switzerland) syllable tokenizer'): 'pyphen_deu_ch', + _tr('wl_settings_global', 'Pyphen - Greek (Modern) syllable tokenizer'): 'pyphen_ell', + _tr('wl_settings_global', 'Pyphen - Hungarian syllable tokenizer'): 'pyphen_hun', + _tr('wl_settings_global', 'Pyphen - Icelandic syllable tokenizer'): 'pyphen_isl', + _tr('wl_settings_global', 'Pyphen - Indonesian syllable tokenizer'): 'pyphen_ind', + _tr('wl_settings_global', 'Pyphen - Italian syllable tokenizer'): 'pyphen_ita', + _tr('wl_settings_global', 'Pyphen - Lithuanian syllable tokenizer'): 'pyphen_lit', + _tr('wl_settings_global', 'Pyphen - Latvian syllable tokenizer'): 'pyphen_lav', + _tr('wl_settings_global', 'Pyphen - Mongolian syllable tokenizer'): 'pyphen_mon', + _tr('wl_settings_global', 'Pyphen - Norwegian (Bokmål) syllable tokenizer'): 'pyphen_nob', + _tr('wl_settings_global', 'Pyphen - Norwegian (Nynorsk) syllable tokenizer'): 'pyphen_nno', + _tr('wl_settings_global', 'Pyphen - Polish syllable tokenizer'): 'pyphen_pol', + _tr('wl_settings_global', 'Pyphen - Portuguese (Brazil) syllable tokenizer'): 'pyphen_por_br', + _tr('wl_settings_global', 'Pyphen - Portuguese (Portugal) syllable tokenizer'): 'pyphen_por_pt', + _tr('wl_settings_global', 'Pyphen - Romanian syllable tokenizer'): 'pyphen_ron', + _tr('wl_settings_global', 'Pyphen - Russian syllable tokenizer'): 'pyphen_rus', + _tr('wl_settings_global', 'Pyphen - Serbian (Cyrillic script) syllable tokenizer'): 'pyphen_srp_cyrl', + _tr('wl_settings_global', 'Pyphen - Serbian (Latin script) syllable tokenizer'): 'pyphen_srp_latn', + _tr('wl_settings_global', 'Pyphen - Slovak syllable tokenizer'): 'pyphen_slk', + _tr('wl_settings_global', 'Pyphen - Slovene syllable tokenizer'): 'pyphen_slv', + _tr('wl_settings_global', 'Pyphen - Spanish syllable tokenizer'): 'pyphen_spa', + _tr('wl_settings_global', 'Pyphen - Swedish syllable tokenizer'): 'pyphen_swe', + _tr('wl_settings_global', 'Pyphen - Telugu syllable tokenizer'): 'pyphen_tel', + _tr('wl_settings_global', 'Pyphen - Thai syllable tokenizer'): 'pyphen_tha', + _tr('wl_settings_global', 'Pyphen - Ukrainian syllable tokenizer'): 'pyphen_ukr', + _tr('wl_settings_global', 'Pyphen - Zulu syllable tokenizer'): 'pyphen_zul', + + 'PyThaiNLP - Han-solo': 'pythainlp_han_solo', + _tr('wl_settings_global', 'PyThaiNLP - Syllable dictionary'): 'pythainlp_syl_dict' + }, + + 'pos_taggers': { + _tr('wl_settings_global', 'botok - Tibetan part-of-speech tagger'): 'botok_bod', + _tr('wl_settings_global', 'khmer-nltk - Khmer part-of-speech tagger'): 'khmer_nltk_khm', + + 'LaoNLP - SeqLabeling': 'laonlp_seqlabeling', + _tr('wl_settings_global', 'LaoNLP - Yunshan Cup 2020'): 'laonlp_yunshan_cup_2020', + + _tr('wl_settings_global', 'NLTK - English perceptron part-of-speech tagger'): 'nltk_perceptron_eng', + _tr('wl_settings_global', 'NLTK - Russian perceptron part-of-speech tagger'): 'nltk_perceptron_rus', + + _tr('wl_settings_global', 'pymorphy3 - Morphological analyzer'): 'pymorphy3_morphological_analyzer', + 'python-mecab-ko - MeCab': 'python_mecab_ko_mecab', + + _tr('wl_settings_global', 'PyThaiNLP - Perceptron part-of-speech tagger (Blackboard)'): 'pythainlp_perceptron_blackboard', + _tr('wl_settings_global', 'PyThaiNLP - Perceptron part-of-speech tagger (ORCHID)'): 'pythainlp_perceptron_orchid', + _tr('wl_settings_global', 'PyThaiNLP - Perceptron part-of-speech tagger (PUD)'): 'pythainlp_perceptron_pud', + + _tr('wl_settings_global', 'spaCy - Catalan part-of-speech tagger'): 'spacy_cat', + _tr('wl_settings_global', 'spaCy - Chinese part-of-speech tagger'): 'spacy_zho', + _tr('wl_settings_global', 'spaCy - Croatian part-of-speech tagger'): 'spacy_hrv', + _tr('wl_settings_global', 'spaCy - Danish part-of-speech tagger'): 'spacy_dan', + _tr('wl_settings_global', 'spaCy - Dutch part-of-speech tagger'): 'spacy_nld', + _tr('wl_settings_global', 'spaCy - English part-of-speech tagger'): 'spacy_eng', + _tr('wl_settings_global', 'spaCy - Finnish part-of-speech tagger'): 'spacy_fin', + _tr('wl_settings_global', 'spaCy - French part-of-speech tagger'): 'spacy_fra', + _tr('wl_settings_global', 'spaCy - German part-of-speech tagger'): 'spacy_deu', + _tr('wl_settings_global', 'spaCy - Greek (Modern) part-of-speech tagger'): 'spacy_ell', + _tr('wl_settings_global', 'spaCy - Italian part-of-speech tagger'): 'spacy_ita', + _tr('wl_settings_global', 'spaCy - Japanese part-of-speech tagger'): 'spacy_jpn', + _tr('wl_settings_global', 'spaCy - Korean part-of-speech tagger'): 'spacy_kor', + _tr('wl_settings_global', 'spaCy - Lithuanian part-of-speech tagger'): 'spacy_lit', + _tr('wl_settings_global', 'spaCy - Macedonian part-of-speech tagger'): 'spacy_mkd', + _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) part-of-speech tagger'): 'spacy_nob', + _tr('wl_settings_global', 'spaCy - Polish part-of-speech tagger'): 'spacy_pol', + _tr('wl_settings_global', 'spaCy - Portuguese part-of-speech tagger'): 'spacy_por', + _tr('wl_settings_global', 'spaCy - Romanian part-of-speech tagger'): 'spacy_ron', + _tr('wl_settings_global', 'spaCy - Russian part-of-speech tagger'): 'spacy_rus', + _tr('wl_settings_global', 'spaCy - Slovene part-of-speech tagger'): 'spacy_slv', + _tr('wl_settings_global', 'spaCy - Spanish part-of-speech tagger'): 'spacy_spa', + _tr('wl_settings_global', 'spaCy - Swedish part-of-speech tagger'): 'spacy_swe', + _tr('wl_settings_global', 'spaCy - Ukrainian part-of-speech tagger'): 'spacy_ukr', + + _tr('wl_settings_global', 'Stanza - Afrikaans part-of-speech tagger'): 'stanza_afr', + _tr('wl_settings_global', 'Stanza - Arabic part-of-speech tagger'): 'stanza_ara', + _tr('wl_settings_global', 'Stanza - Armenian (Classical) part-of-speech tagger'): 'stanza_xcl', + _tr('wl_settings_global', 'Stanza - Armenian (Eastern) part-of-speech tagger'): 'stanza_hye', + _tr('wl_settings_global', 'Stanza - Armenian (Western) part-of-speech tagger'): 'stanza_hyw', + _tr('wl_settings_global', 'Stanza - Basque part-of-speech tagger'): 'stanza_eus', + _tr('wl_settings_global', 'Stanza - Belarusian part-of-speech tagger'): 'stanza_bel', + _tr('wl_settings_global', 'Stanza - Bulgarian part-of-speech tagger'): 'stanza_bul', + _tr('wl_settings_global', 'Stanza - Buryat (Russia) part-of-speech tagger'): 'stanza_bxr', + _tr('wl_settings_global', 'Stanza - Catalan part-of-speech tagger'): 'stanza_cat', + _tr('wl_settings_global', 'Stanza - Chinese (Classical) part-of-speech tagger'): 'stanza_lzh', + _tr('wl_settings_global', 'Stanza - Chinese (Simplified) part-of-speech tagger'): 'stanza_zho_cn', + _tr('wl_settings_global', 'Stanza - Chinese (Traditional) part-of-speech tagger'): 'stanza_zho_tw', + _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) part-of-speech tagger'): 'stanza_chu', + _tr('wl_settings_global', 'Stanza - Coptic part-of-speech tagger'): 'stanza_cop', + _tr('wl_settings_global', 'Stanza - Croatian part-of-speech tagger'): 'stanza_hrv', + _tr('wl_settings_global', 'Stanza - Czech part-of-speech tagger'): 'stanza_ces', + _tr('wl_settings_global', 'Stanza - Danish part-of-speech tagger'): 'stanza_dan', + _tr('wl_settings_global', 'Stanza - Dutch part-of-speech tagger'): 'stanza_nld', + _tr('wl_settings_global', 'Stanza - English part-of-speech tagger'): 'stanza_eng', + _tr('wl_settings_global', 'Stanza - English (Old) part-of-speech tagger'): 'stanza_ang', + _tr('wl_settings_global', 'Stanza - Erzya part-of-speech tagger'): 'stanza_myv', + _tr('wl_settings_global', 'Stanza - Estonian part-of-speech tagger'): 'stanza_est', + _tr('wl_settings_global', 'Stanza - Faroese part-of-speech tagger'): 'stanza_fao', + _tr('wl_settings_global', 'Stanza - Finnish part-of-speech tagger'): 'stanza_fin', + _tr('wl_settings_global', 'Stanza - French part-of-speech tagger'): 'stanza_fra', + _tr('wl_settings_global', 'Stanza - French (Old) part-of-speech tagger'): 'stanza_fro', + _tr('wl_settings_global', 'Stanza - Galician part-of-speech tagger'): 'stanza_glg', + _tr('wl_settings_global', 'Stanza - German part-of-speech tagger'): 'stanza_deu', + _tr('wl_settings_global', 'Stanza - Gothic part-of-speech tagger'): 'stanza_got', + _tr('wl_settings_global', 'Stanza - Greek (Ancient) part-of-speech tagger'): 'stanza_grc', + _tr('wl_settings_global', 'Stanza - Greek (Modern) part-of-speech tagger'): 'stanza_ell', + _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) part-of-speech tagger'): 'stanza_hbo', + _tr('wl_settings_global', 'Stanza - Hebrew (Modern) part-of-speech tagger'): 'stanza_heb', + _tr('wl_settings_global', 'Stanza - Hindi part-of-speech tagger'): 'stanza_hin', + _tr('wl_settings_global', 'Stanza - Hungarian part-of-speech tagger'): 'stanza_hun', + _tr('wl_settings_global', 'Stanza - Icelandic part-of-speech tagger'): 'stanza_isl', + _tr('wl_settings_global', 'Stanza - Indonesian part-of-speech tagger'): 'stanza_ind', + _tr('wl_settings_global', 'Stanza - Irish part-of-speech tagger'): 'stanza_gle', + _tr('wl_settings_global', 'Stanza - Italian part-of-speech tagger'): 'stanza_ita', + _tr('wl_settings_global', 'Stanza - Japanese part-of-speech tagger'): 'stanza_jpn', + _tr('wl_settings_global', 'Stanza - Kazakh part-of-speech tagger'): 'stanza_kaz', + _tr('wl_settings_global', 'Stanza - Korean part-of-speech tagger'): 'stanza_kor', + _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) part-of-speech tagger'): 'stanza_kmr', + _tr('wl_settings_global', 'Stanza - Kyrgyz part-of-speech tagger'): 'stanza_kir', + _tr('wl_settings_global', 'Stanza - Latin part-of-speech tagger'): 'stanza_lat', + _tr('wl_settings_global', 'Stanza - Latvian part-of-speech tagger'): 'stanza_lav', + _tr('wl_settings_global', 'Stanza - Ligurian part-of-speech tagger'): 'stanza_lij', + _tr('wl_settings_global', 'Stanza - Lithuanian part-of-speech tagger'): 'stanza_lit', + _tr('wl_settings_global', 'Stanza - Maltese part-of-speech tagger'): 'stanza_mlt', + _tr('wl_settings_global', 'Stanza - Manx part-of-speech tagger'): 'stanza_glv', + _tr('wl_settings_global', 'Stanza - Marathi part-of-speech tagger'): 'stanza_mar', + _tr('wl_settings_global', 'Stanza - Nigerian Pidgin part-of-speech tagger'): 'stanza_pcm', + _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) part-of-speech tagger'): 'stanza_nob', + _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) part-of-speech tagger'): 'stanza_nno', + _tr('wl_settings_global', 'Stanza - Persian part-of-speech tagger'): 'stanza_fas', + _tr('wl_settings_global', 'Stanza - Polish part-of-speech tagger'): 'stanza_pol', + _tr('wl_settings_global', 'Stanza - Pomak part-of-speech tagger'): 'stanza_qpm', + _tr('wl_settings_global', 'Stanza - Portuguese part-of-speech tagger'): 'stanza_por', + _tr('wl_settings_global', 'Stanza - Romanian part-of-speech tagger'): 'stanza_ron', + _tr('wl_settings_global', 'Stanza - Russian part-of-speech tagger'): 'stanza_rus', + _tr('wl_settings_global', 'Stanza - Russian (Old) part-of-speech tagger'): 'stanza_orv', + _tr('wl_settings_global', 'Stanza - Sámi (Northern) part-of-speech tagger'): 'stanza_sme', + _tr('wl_settings_global', 'Stanza - Sanskrit part-of-speech tagger'): 'stanza_san', + _tr('wl_settings_global', 'Stanza - Scottish Gaelic part-of-speech tagger'): 'stanza_gla', + _tr('wl_settings_global', 'Stanza - Serbian (Latin script) part-of-speech tagger'): 'stanza_srp_latn', + _tr('wl_settings_global', 'Stanza - Sindhi part-of-speech tagger'): 'stanza_snd', + _tr('wl_settings_global', 'Stanza - Slovak part-of-speech tagger'): 'stanza_slk', + _tr('wl_settings_global', 'Stanza - Slovene part-of-speech tagger'): 'stanza_slv', + _tr('wl_settings_global', 'Stanza - Sorbian (Upper) part-of-speech tagger'): 'stanza_hsb', + _tr('wl_settings_global', 'Stanza - Spanish part-of-speech tagger'): 'stanza_spa', + _tr('wl_settings_global', 'Stanza - Swedish part-of-speech tagger'): 'stanza_swe', + _tr('wl_settings_global', 'Stanza - Tamil part-of-speech tagger'): 'stanza_tam', + _tr('wl_settings_global', 'Stanza - Telugu part-of-speech tagger'): 'stanza_tel', + _tr('wl_settings_global', 'Stanza - Turkish part-of-speech tagger'): 'stanza_tur', + _tr('wl_settings_global', 'Stanza - Ukrainian part-of-speech tagger'): 'stanza_ukr', + _tr('wl_settings_global', 'Stanza - Urdu part-of-speech tagger'): 'stanza_urd', + _tr('wl_settings_global', 'Stanza - Uyghur part-of-speech tagger'): 'stanza_uig', + _tr('wl_settings_global', 'Stanza - Vietnamese part-of-speech tagger'): 'stanza_vie', + _tr('wl_settings_global', 'Stanza - Welsh part-of-speech tagger'): 'stanza_cym', + _tr('wl_settings_global', 'Stanza - Wolof part-of-speech tagger'): 'stanza_wol', + + _tr('wl_settings_global', 'SudachiPy - Japanese part-of-speech tagger'): 'sudachipy_jpn', + _tr('wl_settings_global', 'Underthesea - Vietnamese part-of-speech tagger'): 'underthesea_vie' + }, + + 'lemmatizers': { + _tr('wl_settings_global', 'botok - Tibetan lemmatizer'): 'botok_bod', + _tr('wl_settings_global', 'NLTK - WordNet lemmatizer'): 'nltk_wordnet', + _tr('wl_settings_global', 'pymorphy3 - Morphological analyzer'): 'pymorphy3_morphological_analyzer', + + _tr('wl_settings_global', 'simplemma - Albanian lemmatizer'): 'simplemma_sqi', + _tr('wl_settings_global', 'simplemma - Armenian lemmatizer'): 'simplemma_hye', + _tr('wl_settings_global', 'simplemma - Asturian lemmatizer'): 'simplemma_ast', + _tr('wl_settings_global', 'simplemma - Bulgarian lemmatizer'): 'simplemma_bul', + _tr('wl_settings_global', 'simplemma - Catalan lemmatizer'): 'simplemma_cat', + _tr('wl_settings_global', 'simplemma - Czech lemmatizer'): 'simplemma_ces', + _tr('wl_settings_global', 'simplemma - Danish lemmatizer'): 'simplemma_dan', + _tr('wl_settings_global', 'simplemma - Dutch lemmatizer'): 'simplemma_nld', + _tr('wl_settings_global', 'simplemma - English lemmatizer'): 'simplemma_eng', + _tr('wl_settings_global', 'simplemma - English (Middle) lemmatizer'): 'simplemma_enm', + _tr('wl_settings_global', 'simplemma - Estonian lemmatizer'): 'simplemma_est', + _tr('wl_settings_global', 'simplemma - Finnish lemmatizer'): 'simplemma_fin', + _tr('wl_settings_global', 'simplemma - French lemmatizer'): 'simplemma_fra', + _tr('wl_settings_global', 'simplemma - Galician lemmatizer'): 'simplemma_glg', + _tr('wl_settings_global', 'simplemma - Georgian lemmatizer'): 'simplemma_kat', + _tr('wl_settings_global', 'simplemma - German lemmatizer'): 'simplemma_deu', + _tr('wl_settings_global', 'simplemma - Greek (Modern) lemmatizer'): 'simplemma_ell', + _tr('wl_settings_global', 'simplemma - Hindi lemmatizer'): 'simplemma_hin', + _tr('wl_settings_global', 'simplemma - Hungarian lemmatizer'): 'simplemma_hun', + _tr('wl_settings_global', 'simplemma - Icelandic lemmatizer'): 'simplemma_isl', + _tr('wl_settings_global', 'simplemma - Indonesian lemmatizer'): 'simplemma_ind', + _tr('wl_settings_global', 'simplemma - Irish lemmatizer'): 'simplemma_gle', + _tr('wl_settings_global', 'simplemma - Italian lemmatizer'): 'simplemma_ita', + _tr('wl_settings_global', 'simplemma - Latin lemmatizer'): 'simplemma_lat', + _tr('wl_settings_global', 'simplemma - Latvian lemmatizer'): 'simplemma_lav', + _tr('wl_settings_global', 'simplemma - Lithuanian lemmatizer'): 'simplemma_lit', + _tr('wl_settings_global', 'simplemma - Luxembourgish lemmatizer'): 'simplemma_ltz', + _tr('wl_settings_global', 'simplemma - Macedonian lemmatizer'): 'simplemma_mkd', + _tr('wl_settings_global', 'simplemma - Malay lemmatizer'): 'simplemma_msa', + _tr('wl_settings_global', 'simplemma - Manx lemmatizer'): 'simplemma_glv', + _tr('wl_settings_global', 'simplemma - Norwegian (Bokmål) lemmatizer'): 'simplemma_nob', + _tr('wl_settings_global', 'simplemma - Norwegian (Nynorsk) lemmatizer'): 'simplemma_nno', + _tr('wl_settings_global', 'simplemma - Persian lemmatizer'): 'simplemma_fas', + _tr('wl_settings_global', 'simplemma - Polish lemmatizer'): 'simplemma_pol', + _tr('wl_settings_global', 'simplemma - Portuguese lemmatizer'): 'simplemma_por', + _tr('wl_settings_global', 'simplemma - Romanian lemmatizer'): 'simplemma_ron', + _tr('wl_settings_global', 'simplemma - Russian lemmatizer'): 'simplemma_rus', + _tr('wl_settings_global', 'simplemma - Sámi (Northern) lemmatizer'): 'simplemma_sme', + _tr('wl_settings_global', 'simplemma - Scottish Gaelic lemmatizer'): 'simplemma_gla', + _tr('wl_settings_global', 'simplemma - Serbo-Croatian lemmatizer'): 'simplemma_hbs', + _tr('wl_settings_global', 'simplemma - Slovak lemmatizer'): 'simplemma_slk', + _tr('wl_settings_global', 'simplemma - Slovene lemmatizer'): 'simplemma_slv', + _tr('wl_settings_global', 'simplemma - Spanish lemmatizer'): 'simplemma_spa', + _tr('wl_settings_global', 'simplemma - Swahili lemmatizer'): 'simplemma_swa', + _tr('wl_settings_global', 'simplemma - Swedish lemmatizer'): 'simplemma_swe', + _tr('wl_settings_global', 'simplemma - Tagalog lemmatizer'): 'simplemma_tgl', + _tr('wl_settings_global', 'simplemma - Turkish lemmatizer'): 'simplemma_tur', + _tr('wl_settings_global', 'simplemma - Ukrainian lemmatizer'): 'simplemma_ukr', + _tr('wl_settings_global', 'simplemma - Welsh lemmatizer'): 'simplemma_cym', + + _tr('wl_settings_global', 'spaCy - Bengali lemmatizer'): 'spacy_ben', + _tr('wl_settings_global', 'spaCy - Catalan lemmatizer'): 'spacy_cat', + _tr('wl_settings_global', 'spaCy - Croatian lemmatizer'): 'spacy_hrv', + _tr('wl_settings_global', 'spaCy - Czech lemmatizer'): 'spacy_ces', + _tr('wl_settings_global', 'spaCy - Danish lemmatizer'): 'spacy_dan', + _tr('wl_settings_global', 'spaCy - Dutch lemmatizer'): 'spacy_nld', + _tr('wl_settings_global', 'spaCy - English lemmatizer'): 'spacy_eng', + _tr('wl_settings_global', 'spaCy - Finnish lemmatizer'): 'spacy_fin', + _tr('wl_settings_global', 'spaCy - French lemmatizer'): 'spacy_fra', + _tr('wl_settings_global', 'spaCy - German lemmatizer'): 'spacy_deu', + _tr('wl_settings_global', 'spaCy - Greek (Ancient) lemmatizer'): 'spacy_grc', + _tr('wl_settings_global', 'spaCy - Greek (Modern) lemmatizer'): 'spacy_ell', + _tr('wl_settings_global', 'spaCy - Hungarian lemmatizer'): 'spacy_hun', + _tr('wl_settings_global', 'spaCy - Indonesian lemmatizer'): 'spacy_ind', + _tr('wl_settings_global', 'spaCy - Irish lemmatizer'): 'spacy_gle', + _tr('wl_settings_global', 'spaCy - Italian lemmatizer'): 'spacy_ita', + _tr('wl_settings_global', 'spaCy - Japanese lemmatizer'): 'spacy_jpn', + _tr('wl_settings_global', 'spaCy - Korean lemmatizer'): 'spacy_kor', + _tr('wl_settings_global', 'spaCy - Lithuanian lemmatizer'): 'spacy_lit', + _tr('wl_settings_global', 'spaCy - Luxembourgish lemmatizer'): 'spacy_ltz', + _tr('wl_settings_global', 'spaCy - Macedonian lemmatizer'): 'spacy_mkd', + _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) lemmatizer'): 'spacy_nob', + _tr('wl_settings_global', 'spaCy - Persian lemmatizer'): 'spacy_fas', + _tr('wl_settings_global', 'spaCy - Polish lemmatizer'): 'spacy_pol', + _tr('wl_settings_global', 'spaCy - Portuguese lemmatizer'): 'spacy_por', + _tr('wl_settings_global', 'spaCy - Romanian lemmatizer'): 'spacy_ron', + _tr('wl_settings_global', 'spaCy - Russian lemmatizer'): 'spacy_rus', + _tr('wl_settings_global', 'spaCy - Serbian (Cyrillic script) lemmatizer'): 'spacy_srp', + _tr('wl_settings_global', 'spaCy - Slovene lemmatizer'): 'spacy_slv', + _tr('wl_settings_global', 'spaCy - Spanish lemmatizer'): 'spacy_spa', + _tr('wl_settings_global', 'spaCy - Swedish lemmatizer'): 'spacy_swe', + _tr('wl_settings_global', 'spaCy - Tagalog lemmatizer'): 'spacy_tgl', + _tr('wl_settings_global', 'spaCy - Turkish lemmatizer'): 'spacy_tur', + _tr('wl_settings_global', 'spaCy - Ukrainian lemmatizer'): 'spacy_ukr', + _tr('wl_settings_global', 'spaCy - Urdu lemmatizer'): 'spacy_urd', + + _tr('wl_settings_global', 'Stanza - Afrikaans lemmatizer'): 'stanza_afr', + _tr('wl_settings_global', 'Stanza - Arabic lemmatizer'): 'stanza_ara', + _tr('wl_settings_global', 'Stanza - Armenian (Classical) lemmatizer'): 'stanza_xcl', + _tr('wl_settings_global', 'Stanza - Armenian (Eastern) lemmatizer'): 'stanza_hye', + _tr('wl_settings_global', 'Stanza - Armenian (Western) lemmatizer'): 'stanza_hyw', + _tr('wl_settings_global', 'Stanza - Basque lemmatizer'): 'stanza_eus', + _tr('wl_settings_global', 'Stanza - Belarusian lemmatizer'): 'stanza_bel', + _tr('wl_settings_global', 'Stanza - Bulgarian lemmatizer'): 'stanza_bul', + _tr('wl_settings_global', 'Stanza - Buryat (Russia) lemmatizer'): 'stanza_bxr', + _tr('wl_settings_global', 'Stanza - Catalan lemmatizer'): 'stanza_cat', + _tr('wl_settings_global', 'Stanza - Chinese (Classical) lemmatizer'): 'stanza_lzh', + _tr('wl_settings_global', 'Stanza - Chinese (Simplified) lemmatizer'): 'stanza_zho_cn', + _tr('wl_settings_global', 'Stanza - Chinese (Traditional) lemmatizer'): 'stanza_zho_tw', + _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) lemmatizer'): 'stanza_chu', + _tr('wl_settings_global', 'Stanza - Coptic lemmatizer'): 'stanza_cop', + _tr('wl_settings_global', 'Stanza - Croatian lemmatizer'): 'stanza_hrv', + _tr('wl_settings_global', 'Stanza - Czech lemmatizer'): 'stanza_ces', + _tr('wl_settings_global', 'Stanza - Danish lemmatizer'): 'stanza_dan', + _tr('wl_settings_global', 'Stanza - Dutch lemmatizer'): 'stanza_nld', + _tr('wl_settings_global', 'Stanza - English lemmatizer'): 'stanza_eng', + _tr('wl_settings_global', 'Stanza - English (Old) lemmatizer'): 'stanza_ang', + _tr('wl_settings_global', 'Stanza - Erzya lemmatizer'): 'stanza_myv', + _tr('wl_settings_global', 'Stanza - Estonian lemmatizer'): 'stanza_est', + _tr('wl_settings_global', 'Stanza - Finnish lemmatizer'): 'stanza_fin', + _tr('wl_settings_global', 'Stanza - French lemmatizer'): 'stanza_fra', + _tr('wl_settings_global', 'Stanza - French (Old) lemmatizer'): 'stanza_fro', + _tr('wl_settings_global', 'Stanza - Galician lemmatizer'): 'stanza_glg', + _tr('wl_settings_global', 'Stanza - German lemmatizer'): 'stanza_deu', + _tr('wl_settings_global', 'Stanza - Gothic lemmatizer'): 'stanza_got', + _tr('wl_settings_global', 'Stanza - Greek (Ancient) lemmatizer'): 'stanza_grc', + _tr('wl_settings_global', 'Stanza - Greek (Modern) lemmatizer'): 'stanza_ell', + _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) lemmatizer'): 'stanza_hbo', + _tr('wl_settings_global', 'Stanza - Hebrew (Modern) lemmatizer'): 'stanza_heb', + _tr('wl_settings_global', 'Stanza - Hindi lemmatizer'): 'stanza_hin', + _tr('wl_settings_global', 'Stanza - Hungarian lemmatizer'): 'stanza_hun', + _tr('wl_settings_global', 'Stanza - Icelandic lemmatizer'): 'stanza_isl', + _tr('wl_settings_global', 'Stanza - Indonesian lemmatizer'): 'stanza_ind', + _tr('wl_settings_global', 'Stanza - Irish lemmatizer'): 'stanza_gle', + _tr('wl_settings_global', 'Stanza - Italian lemmatizer'): 'stanza_ita', + _tr('wl_settings_global', 'Stanza - Japanese lemmatizer'): 'stanza_jpn', + _tr('wl_settings_global', 'Stanza - Kazakh lemmatizer'): 'stanza_kaz', + _tr('wl_settings_global', 'Stanza - Korean lemmatizer'): 'stanza_kor', + _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) lemmatizer'): 'stanza_kmr', + _tr('wl_settings_global', 'Stanza - Kyrgyz lemmatizer'): 'stanza_kir', + _tr('wl_settings_global', 'Stanza - Latin lemmatizer'): 'stanza_lat', + _tr('wl_settings_global', 'Stanza - Latvian lemmatizer'): 'stanza_lav', + _tr('wl_settings_global', 'Stanza - Ligurian lemmatizer'): 'stanza_lij', + _tr('wl_settings_global', 'Stanza - Lithuanian lemmatizer'): 'stanza_lit', + _tr('wl_settings_global', 'Stanza - Manx lemmatizer'): 'stanza_glv', + _tr('wl_settings_global', 'Stanza - Marathi lemmatizer'): 'stanza_mar', + _tr('wl_settings_global', 'Stanza - Nigerian Pidgin lemmatizer'): 'stanza_pcm', + _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) lemmatizer'): 'stanza_nob', + _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) lemmatizer'): 'stanza_nno', + _tr('wl_settings_global', 'Stanza - Persian lemmatizer'): 'stanza_fas', + _tr('wl_settings_global', 'Stanza - Polish lemmatizer'): 'stanza_pol', + _tr('wl_settings_global', 'Stanza - Pomak lemmatizer'): 'stanza_qpm', + _tr('wl_settings_global', 'Stanza - Portuguese lemmatizer'): 'stanza_por', + _tr('wl_settings_global', 'Stanza - Romanian lemmatizer'): 'stanza_ron', + _tr('wl_settings_global', 'Stanza - Russian lemmatizer'): 'stanza_rus', + _tr('wl_settings_global', 'Stanza - Russian (Old) lemmatizer'): 'stanza_orv', + _tr('wl_settings_global', 'Stanza - Sámi (Northern) lemmatizer'): 'stanza_sme', + _tr('wl_settings_global', 'Stanza - Sanskrit lemmatizer'): 'stanza_san', + _tr('wl_settings_global', 'Stanza - Scottish Gaelic lemmatizer'): 'stanza_gla', + _tr('wl_settings_global', 'Stanza - Serbian (Latin script) lemmatizer'): 'stanza_srp_latn', + _tr('wl_settings_global', 'Stanza - Slovak lemmatizer'): 'stanza_slk', + _tr('wl_settings_global', 'Stanza - Slovene lemmatizer'): 'stanza_slv', + _tr('wl_settings_global', 'Stanza - Sorbian (Upper) lemmatizer'): 'stanza_hsb', + _tr('wl_settings_global', 'Stanza - Spanish lemmatizer'): 'stanza_spa', + _tr('wl_settings_global', 'Stanza - Swedish lemmatizer'): 'stanza_swe', + _tr('wl_settings_global', 'Stanza - Tamil lemmatizer'): 'stanza_tam', + _tr('wl_settings_global', 'Stanza - Turkish lemmatizer'): 'stanza_tur', + _tr('wl_settings_global', 'Stanza - Ukrainian lemmatizer'): 'stanza_ukr', + _tr('wl_settings_global', 'Stanza - Urdu lemmatizer'): 'stanza_urd', + _tr('wl_settings_global', 'Stanza - Uyghur lemmatizer'): 'stanza_uig', + _tr('wl_settings_global', 'Stanza - Welsh lemmatizer'): 'stanza_cym', + _tr('wl_settings_global', 'Stanza - Wolof lemmatizer'): 'stanza_wol', + + _tr('wl_settings_global', 'SudachiPy - Japanese lemmatizer'): 'sudachipy_jpn' + }, + + 'stop_word_lists': { + _tr('wl_settings_global', 'LaoNLP - Lao stop word list'): 'laonlp_lao', + + _tr('wl_settings_global', 'NLTK - Arabic stop word list'): 'nltk_ara', + _tr('wl_settings_global', 'NLTK - Azerbaijani stop word list'): 'nltk_aze', + _tr('wl_settings_global', 'NLTK - Basque stop word list'): 'nltk_eus', + _tr('wl_settings_global', 'NLTK - Bengali stop word list'): 'nltk_ben', + _tr('wl_settings_global', 'NLTK - Catalan stop word list'): 'nltk_cat', + _tr('wl_settings_global', 'NLTK - Chinese (Simplified) stop word list'): 'nltk_zho_cn', + _tr('wl_settings_global', 'NLTK - Chinese (Traditional) stop word list'): 'nltk_zho_tw', + _tr('wl_settings_global', 'NLTK - Danish stop word list'): 'nltk_dan', + _tr('wl_settings_global', 'NLTK - Dutch stop word list'): 'nltk_nld', + _tr('wl_settings_global', 'NLTK - English stop word list'): 'nltk_eng', + _tr('wl_settings_global', 'NLTK - Finnish stop word list'): 'nltk_fin', + _tr('wl_settings_global', 'NLTK - French stop word list'): 'nltk_fra', + _tr('wl_settings_global', 'NLTK - German stop word list'): 'nltk_deu', + _tr('wl_settings_global', 'NLTK - Greek (Modern) stop word list'): 'nltk_ell', + _tr('wl_settings_global', 'NLTK - Hebrew (Modern) stop word list'): 'nltk_heb', + _tr('wl_settings_global', 'NLTK - Hungarian stop word list'): 'nltk_hun', + _tr('wl_settings_global', 'NLTK - Indonesian stop word list'): 'nltk_ind', + _tr('wl_settings_global', 'NLTK - Italian stop word list'): 'nltk_ita', + _tr('wl_settings_global', 'NLTK - Kazakh stop word list'): 'nltk_kaz', + _tr('wl_settings_global', 'NLTK - Nepali stop word list'): 'nltk_nep', + _tr('wl_settings_global', 'NLTK - Norwegian (Bokmål) stop word list'): 'nltk_nob', + _tr('wl_settings_global', 'NLTK - Portuguese stop word list'): 'nltk_por', + _tr('wl_settings_global', 'NLTK - Romanian stop word list'): 'nltk_ron', + _tr('wl_settings_global', 'NLTK - Russian stop word list'): 'nltk_rus', + _tr('wl_settings_global', 'NLTK - Slovene stop word list'): 'nltk_slv', + _tr('wl_settings_global', 'NLTK - Spanish stop word list'): 'nltk_spa', + _tr('wl_settings_global', 'NLTK - Swedish stop word list'): 'nltk_swe', + _tr('wl_settings_global', 'NLTK - Tajik stop word list'): 'nltk_tgk', + _tr('wl_settings_global', 'NLTK - Turkish stop word list'): 'nltk_tur', + + _tr('wl_settings_global', 'PyThaiNLP - Thai stop word list'): 'pythainlp_tha', + + _tr('wl_settings_global', 'Custom stop word list'): 'custom', + }, + + 'dependency_parsers':{ + _tr('wl_settings_global', 'spaCy - Catalan dependency parser'): 'spacy_cat', + _tr('wl_settings_global', 'spaCy - Chinese dependency parser'): 'spacy_zho', + _tr('wl_settings_global', 'spaCy - Croatian dependency parser'): 'spacy_hrv', + _tr('wl_settings_global', 'spaCy - Danish dependency parser'): 'spacy_dan', + _tr('wl_settings_global', 'spaCy - Dutch dependency parser'): 'spacy_nld', + _tr('wl_settings_global', 'spaCy - English dependency parser'): 'spacy_eng', + _tr('wl_settings_global', 'spaCy - Finnish dependency parser'): 'spacy_fin', + _tr('wl_settings_global', 'spaCy - French dependency parser'): 'spacy_fra', + _tr('wl_settings_global', 'spaCy - German dependency parser'): 'spacy_deu', + _tr('wl_settings_global', 'spaCy - Greek (Modern) dependency parser'): 'spacy_ell', + _tr('wl_settings_global', 'spaCy - Italian dependency parser'): 'spacy_ita', + _tr('wl_settings_global', 'spaCy - Japanese dependency parser'): 'spacy_jpn', + _tr('wl_settings_global', 'spaCy - Korean dependency parser'): 'spacy_kor', + _tr('wl_settings_global', 'spaCy - Lithuanian dependency parser'): 'spacy_lit', + _tr('wl_settings_global', 'spaCy - Macedonian dependency parser'): 'spacy_mkd', + _tr('wl_settings_global', 'spaCy - Norwegian (Bokmål) dependency parser'): 'spacy_nob', + _tr('wl_settings_global', 'spaCy - Polish dependency parser'): 'spacy_pol', + _tr('wl_settings_global', 'spaCy - Portuguese dependency parser'): 'spacy_por', + _tr('wl_settings_global', 'spaCy - Romanian dependency parser'): 'spacy_ron', + _tr('wl_settings_global', 'spaCy - Russian dependency parser'): 'spacy_rus', + _tr('wl_settings_global', 'spaCy - Slovene dependency parser'): 'spacy_slv', + _tr('wl_settings_global', 'spaCy - Spanish dependency parser'): 'spacy_spa', + _tr('wl_settings_global', 'spaCy - Swedish dependency parser'): 'spacy_swe', + _tr('wl_settings_global', 'spaCy - Ukrainian dependency parser'): 'spacy_ukr', + + _tr('wl_settings_global', 'Stanza - Afrikaans dependency parser'): 'stanza_afr', + _tr('wl_settings_global', 'Stanza - Arabic dependency parser'): 'stanza_ara', + _tr('wl_settings_global', 'Stanza - Armenian (Classical) dependency parser'): 'stanza_xcl', + _tr('wl_settings_global', 'Stanza - Armenian (Eastern) dependency parser'): 'stanza_hye', + _tr('wl_settings_global', 'Stanza - Armenian (Western) dependency parser'): 'stanza_hyw', + _tr('wl_settings_global', 'Stanza - Basque dependency parser'): 'stanza_eus', + _tr('wl_settings_global', 'Stanza - Belarusian dependency parser'): 'stanza_bel', + _tr('wl_settings_global', 'Stanza - Bulgarian dependency parser'): 'stanza_bul', + _tr('wl_settings_global', 'Stanza - Buryat (Russia) dependency parser'): 'stanza_bxr', + _tr('wl_settings_global', 'Stanza - Catalan dependency parser'): 'stanza_cat', + _tr('wl_settings_global', 'Stanza - Chinese (Classical) dependency parser'): 'stanza_lzh', + _tr('wl_settings_global', 'Stanza - Chinese (Simplified) dependency parser'): 'stanza_zho_cn', + _tr('wl_settings_global', 'Stanza - Chinese (Traditional) dependency parser'): 'stanza_zho_tw', + _tr('wl_settings_global', 'Stanza - Church Slavonic (Old) dependency parser'): 'stanza_chu', + _tr('wl_settings_global', 'Stanza - Coptic dependency parser'): 'stanza_cop', + _tr('wl_settings_global', 'Stanza - Croatian dependency parser'): 'stanza_hrv', + _tr('wl_settings_global', 'Stanza - Czech dependency parser'): 'stanza_ces', + _tr('wl_settings_global', 'Stanza - Danish dependency parser'): 'stanza_dan', + _tr('wl_settings_global', 'Stanza - Dutch dependency parser'): 'stanza_nld', + _tr('wl_settings_global', 'Stanza - English dependency parser'): 'stanza_eng', + _tr('wl_settings_global', 'Stanza - English (Old) dependency parser'): 'stanza_ang', + _tr('wl_settings_global', 'Stanza - Erzya dependency parser'): 'stanza_myv', + _tr('wl_settings_global', 'Stanza - Estonian dependency parser'): 'stanza_est', + _tr('wl_settings_global', 'Stanza - Faroese dependency parser'): 'stanza_fao', + _tr('wl_settings_global', 'Stanza - Finnish dependency parser'): 'stanza_fin', + _tr('wl_settings_global', 'Stanza - French dependency parser'): 'stanza_fra', + _tr('wl_settings_global', 'Stanza - French (Old) dependency parser'): 'stanza_fro', + _tr('wl_settings_global', 'Stanza - Galician dependency parser'): 'stanza_glg', + _tr('wl_settings_global', 'Stanza - German dependency parser'): 'stanza_deu', + _tr('wl_settings_global', 'Stanza - Gothic dependency parser'): 'stanza_got', + _tr('wl_settings_global', 'Stanza - Greek (Ancient) dependency parser'): 'stanza_grc', + _tr('wl_settings_global', 'Stanza - Greek (Modern) dependency parser'): 'stanza_ell', + _tr('wl_settings_global', 'Stanza - Hebrew (Ancient) dependency parser'): 'stanza_hbo', + _tr('wl_settings_global', 'Stanza - Hebrew (Modern) dependency parser'): 'stanza_heb', + _tr('wl_settings_global', 'Stanza - Hindi dependency parser'): 'stanza_hin', + _tr('wl_settings_global', 'Stanza - Hungarian dependency parser'): 'stanza_hun', + _tr('wl_settings_global', 'Stanza - Icelandic dependency parser'): 'stanza_isl', + _tr('wl_settings_global', 'Stanza - Indonesian dependency parser'): 'stanza_ind', + _tr('wl_settings_global', 'Stanza - Irish dependency parser'): 'stanza_gle', + _tr('wl_settings_global', 'Stanza - Italian dependency parser'): 'stanza_ita', + _tr('wl_settings_global', 'Stanza - Japanese dependency parser'): 'stanza_jpn', + _tr('wl_settings_global', 'Stanza - Kazakh dependency parser'): 'stanza_kaz', + _tr('wl_settings_global', 'Stanza - Korean dependency parser'): 'stanza_kor', + _tr('wl_settings_global', 'Stanza - Kurdish (Kurmanji) dependency parser'): 'stanza_kmr', + _tr('wl_settings_global', 'Stanza - Kyrgyz dependency parser'): 'stanza_kir', + _tr('wl_settings_global', 'Stanza - Latin dependency parser'): 'stanza_lat', + _tr('wl_settings_global', 'Stanza - Latvian dependency parser'): 'stanza_lav', + _tr('wl_settings_global', 'Stanza - Ligurian dependency parser'): 'stanza_lij', + _tr('wl_settings_global', 'Stanza - Lithuanian dependency parser'): 'stanza_lit', + _tr('wl_settings_global', 'Stanza - Maltese dependency parser'): 'stanza_mlt', + _tr('wl_settings_global', 'Stanza - Manx dependency parser'): 'stanza_glv', + _tr('wl_settings_global', 'Stanza - Marathi dependency parser'): 'stanza_mar', + _tr('wl_settings_global', 'Stanza - Nigerian Pidgin dependency parser'): 'stanza_pcm', + _tr('wl_settings_global', 'Stanza - Norwegian (Bokmål) dependency parser'): 'stanza_nob', + _tr('wl_settings_global', 'Stanza - Norwegian (Nynorsk) dependency parser'): 'stanza_nno', + _tr('wl_settings_global', 'Stanza - Persian dependency parser'): 'stanza_fas', + _tr('wl_settings_global', 'Stanza - Polish dependency parser'): 'stanza_pol', + _tr('wl_settings_global', 'Stanza - Pomak dependency parser'): 'stanza_qpm', + _tr('wl_settings_global', 'Stanza - Portuguese dependency parser'): 'stanza_por', + _tr('wl_settings_global', 'Stanza - Romanian dependency parser'): 'stanza_ron', + _tr('wl_settings_global', 'Stanza - Russian dependency parser'): 'stanza_rus', + _tr('wl_settings_global', 'Stanza - Russian (Old) dependency parser'): 'stanza_orv', + _tr('wl_settings_global', 'Stanza - Sámi (Northern) dependency parser'): 'stanza_sme', + _tr('wl_settings_global', 'Stanza - Sanskrit dependency parser'): 'stanza_san', + _tr('wl_settings_global', 'Stanza - Scottish Gaelic dependency parser'): 'stanza_gla', + _tr('wl_settings_global', 'Stanza - Serbian (Latin script) dependency parser'): 'stanza_srp_latn', + _tr('wl_settings_global', 'Stanza - Slovak dependency parser'): 'stanza_slk', + _tr('wl_settings_global', 'Stanza - Slovene dependency parser'): 'stanza_slv', + _tr('wl_settings_global', 'Stanza - Sorbian (Upper) dependency parser'): 'stanza_hsb', + _tr('wl_settings_global', 'Stanza - Spanish dependency parser'): 'stanza_spa', + _tr('wl_settings_global', 'Stanza - Swedish dependency parser'): 'stanza_swe', + _tr('wl_settings_global', 'Stanza - Tamil dependency parser'): 'stanza_tam', + _tr('wl_settings_global', 'Stanza - Telugu dependency parser'): 'stanza_tel', + _tr('wl_settings_global', 'Stanza - Turkish dependency parser'): 'stanza_tur', + _tr('wl_settings_global', 'Stanza - Ukrainian dependency parser'): 'stanza_ukr', + _tr('wl_settings_global', 'Stanza - Urdu dependency parser'): 'stanza_urd', + _tr('wl_settings_global', 'Stanza - Uyghur dependency parser'): 'stanza_uig', + _tr('wl_settings_global', 'Stanza - Vietnamese dependency parser'): 'stanza_vie', + _tr('wl_settings_global', 'Stanza - Welsh dependency parser'): 'stanza_cym', + _tr('wl_settings_global', 'Stanza - Wolof dependency parser'): 'stanza_wol' + }, + + 'sentiment_analyzers': { + _tr('wl_settings_global', 'Stanza - Chinese (Simplified) sentiment analyzer'): 'stanza_zho_cn', + _tr('wl_settings_global', 'Stanza - German sentiment analyzer'): 'stanza_deu', + _tr('wl_settings_global', 'Stanza - English sentiment analyzer'): 'stanza_eng', + _tr('wl_settings_global', 'Stanza - Marathi sentiment analyzer'): 'stanza_mar', + _tr('wl_settings_global', 'Stanza - Spanish sentiment analyzer'): 'stanza_spa', + _tr('wl_settings_global', 'Stanza - Vietnamese sentiment analyzer'): 'stanza_vie', + + _tr('wl_settings_global', 'Underthesea - Vietnamese sentiment analyzer'): 'underthesea_vie', + + _tr('wl_settings_global', 'VADER - Afrikaans sentiment analyzer'): 'vader_afr', + _tr('wl_settings_global', 'VADER - Albanian sentiment analyzer'): 'vader_sqi', + _tr('wl_settings_global', 'VADER - Amharic sentiment analyzer'): 'vader_amh', + _tr('wl_settings_global', 'VADER - Arabic sentiment analyzer'): 'vader_ara', + _tr('wl_settings_global', 'VADER - Armenian sentiment analyzer'): 'vader_hye', + _tr('wl_settings_global', 'VADER - Assamese sentiment analyzer'): 'vader_asm', + _tr('wl_settings_global', 'VADER - Azerbaijani sentiment analyzer'): 'vader_aze', + _tr('wl_settings_global', 'VADER - Basque sentiment analyzer'): 'vader_eus', + _tr('wl_settings_global', 'VADER - Belarusian sentiment analyzer'): 'vader_bel', + _tr('wl_settings_global', 'VADER - Bengali sentiment analyzer'): 'vader_ben', + _tr('wl_settings_global', 'VADER - Bulgarian sentiment analyzer'): 'vader_bul', + _tr('wl_settings_global', 'VADER - Burmese sentiment analyzer'): 'vader_mya', + _tr('wl_settings_global', 'VADER - Catalan sentiment analyzer'): 'vader_cat', + _tr('wl_settings_global', 'VADER - Chinese (Simplified) sentiment analyzer'): 'vader_zho_cn', + _tr('wl_settings_global', 'VADER - Chinese (Traditional) sentiment analyzer'): 'vader_zho_tw', + _tr('wl_settings_global', 'VADER - Croatian sentiment analyzer'): 'vader_hrv', + _tr('wl_settings_global', 'VADER - Czech sentiment analyzer'): 'vader_ces', + _tr('wl_settings_global', 'VADER - Danish sentiment analyzer'): 'vader_dan', + _tr('wl_settings_global', 'VADER - Dutch sentiment analyzer'): 'vader_nld', + _tr('wl_settings_global', 'VADER - English sentiment analyzer'): 'vader_eng', + _tr('wl_settings_global', 'VADER - Esperanto sentiment analyzer'): 'vader_epo', + _tr('wl_settings_global', 'VADER - Estonian sentiment analyzer'): 'vader_est', + _tr('wl_settings_global', 'VADER - Finnish sentiment analyzer'): 'vader_fin', + _tr('wl_settings_global', 'VADER - French sentiment analyzer'): 'vader_fra', + _tr('wl_settings_global', 'VADER - Galician sentiment analyzer'): 'vader_glg', + _tr('wl_settings_global', 'VADER - Georgian sentiment analyzer'): 'vader_kat', + _tr('wl_settings_global', 'VADER - German sentiment analyzer'): 'vader_deu', + _tr('wl_settings_global', 'VADER - Greek (Modern) sentiment analyzer'): 'vader_ell', + _tr('wl_settings_global', 'VADER - Gujarati sentiment analyzer'): 'vader_guj', + _tr('wl_settings_global', 'VADER - Hebrew (Modern) sentiment analyzer'): 'vader_heb', + _tr('wl_settings_global', 'VADER - Hindi sentiment analyzer'): 'vader_hin', + _tr('wl_settings_global', 'VADER - Hungarian sentiment analyzer'): 'vader_hun', + _tr('wl_settings_global', 'VADER - Icelandic sentiment analyzer'): 'vader_isl', + _tr('wl_settings_global', 'VADER - Indonesian sentiment analyzer'): 'vader_ind', + _tr('wl_settings_global', 'VADER - Irish sentiment analyzer'): 'vader_gle', + _tr('wl_settings_global', 'VADER - Italian sentiment analyzer'): 'vader_ita', + _tr('wl_settings_global', 'VADER - Japanese sentiment analyzer'): 'vader_jpn', + _tr('wl_settings_global', 'VADER - Kannada sentiment analyzer'): 'vader_kan', + _tr('wl_settings_global', 'VADER - Kazakh sentiment analyzer'): 'vader_kaz', + _tr('wl_settings_global', 'VADER - Khmer sentiment analyzer'): 'vader_khm', + _tr('wl_settings_global', 'VADER - Korean sentiment analyzer'): 'vader_kor', + _tr('wl_settings_global', 'VADER - Kurdish (Kurmanji) sentiment analyzer'): 'vader_kmr', + _tr('wl_settings_global', 'VADER - Kyrgyz sentiment analyzer'): 'vader_kir', + _tr('wl_settings_global', 'VADER - Lao sentiment analyzer'): 'vader_lao', + _tr('wl_settings_global', 'VADER - Latin sentiment analyzer'): 'vader_lat', + _tr('wl_settings_global', 'VADER - Latvian sentiment analyzer'): 'vader_lav', + _tr('wl_settings_global', 'VADER - Lithuanian sentiment analyzer'): 'vader_lit', + _tr('wl_settings_global', 'VADER - Luganda sentiment analyzer'): 'vader_lug', + _tr('wl_settings_global', 'VADER - Luxembourgish sentiment analyzer'): 'vader_ltz', + _tr('wl_settings_global', 'VADER - Macedonian sentiment analyzer'): 'vader_mkd', + _tr('wl_settings_global', 'VADER - Malay sentiment analyzer'): 'vader_msa', + _tr('wl_settings_global', 'VADER - Malayalam sentiment analyzer'): 'vader_mal', + _tr('wl_settings_global', 'VADER - Maltese sentiment analyzer'): 'vader_mlt', + _tr('wl_settings_global', 'VADER - Marathi sentiment analyzer'): 'vader_mar', + _tr('wl_settings_global', 'VADER - Meitei (Meitei script) sentiment analyzer'): 'vader_mni_mtei', + _tr('wl_settings_global', 'VADER - Mongolian sentiment analyzer'): 'vader_mon', + _tr('wl_settings_global', 'VADER - Nepali sentiment analyzer'): 'vader_nep', + # References: + # https://support.google.com/translate/thread/1818911/norwegin-translate-is-in-bokm%C3%A5l-or-nynorsk?hl=en + # https://www.quora.com/How-does-Google-Translate-do-with-Norwegian-language-as-Norwegian-has-two-official-forms + _tr('wl_settings_global', 'VADER - Norwegian (Bokmål) sentiment analyzer'): 'vader_nob', + _tr('wl_settings_global', 'VADER - Odia sentiment analyzer'): 'vader_ori', + _tr('wl_settings_global', 'VADER - Persian sentiment analyzer'): 'vader_fas', + _tr('wl_settings_global', 'VADER - Polish sentiment analyzer'): 'vader_pol', + _tr('wl_settings_global', 'VADER - Portuguese sentiment analyzer'): 'vader_por', + _tr('wl_settings_global', 'VADER - Punjabi (Gurmukhi script) sentiment analyzer'): 'vader_pan_guru', + _tr('wl_settings_global', 'VADER - Romanian sentiment analyzer'): 'vader_ron', + _tr('wl_settings_global', 'VADER - Russian sentiment analyzer'): 'vader_rus', + _tr('wl_settings_global', 'VADER - Sanskrit sentiment analyzer'): 'vader_san', + _tr('wl_settings_global', 'VADER - Scottish Gaelic sentiment analyzer'): 'vader_gla', + _tr('wl_settings_global', 'VADER - Serbian (Cyrillic script) sentiment analyzer'): 'vader_srp_cyrl', + _tr('wl_settings_global', 'VADER - Sindhi sentiment analyzer'): 'vader_snd', + _tr('wl_settings_global', 'VADER - Sinhala sentiment analyzer'): 'vader_sin', + _tr('wl_settings_global', 'VADER - Slovak sentiment analyzer'): 'vader_slk', + _tr('wl_settings_global', 'VADER - Slovene sentiment analyzer'): 'vader_slv', + _tr('wl_settings_global', 'VADER - Spanish sentiment analyzer'): 'vader_spa', + _tr('wl_settings_global', 'VADER - Swahili sentiment analyzer'): 'vader_swa', + _tr('wl_settings_global', 'VADER - Swedish sentiment analyzer'): 'vader_swe', + _tr('wl_settings_global', 'VADER - Tagalog sentiment analyzer'): 'vader_tgl', + _tr('wl_settings_global', 'VADER - Tajik sentiment analyzer'): 'vader_tgk', + _tr('wl_settings_global', 'VADER - Tamil sentiment analyzer'): 'vader_tam', + _tr('wl_settings_global', 'VADER - Tatar sentiment analyzer'): 'vader_tat', + _tr('wl_settings_global', 'VADER - Telugu sentiment analyzer'): 'vader_tel', + _tr('wl_settings_global', 'VADER - Thai sentiment analyzer'): 'vader_tha', + _tr('wl_settings_global', 'VADER - Tigrinya sentiment analyzer'): 'vader_tir', + _tr('wl_settings_global', 'VADER - Turkish sentiment analyzer'): 'vader_tur', + _tr('wl_settings_global', 'VADER - Ukrainian sentiment analyzer'): 'vader_ukr', + _tr('wl_settings_global', 'VADER - Urdu sentiment analyzer'): 'vader_urd', + _tr('wl_settings_global', 'VADER - Uyghur sentiment analyzer'): 'vader_uig', + _tr('wl_settings_global', 'VADER - Welsh sentiment analyzer'): 'vader_cym', + _tr('wl_settings_global', 'VADER - Yoruba sentiment analyzer'): 'vader_yor', + _tr('wl_settings_global', 'VADER - Zulu sentiment analyzer'): 'vader_zul' + } }, - 'z_score_berry_rogghe': { - 'col_text': _tr('wl_settings_global', 'z-score'), - 'func': wl_measures_statistical_significance.z_score_berry_rogghe, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': False - } - }, - - 'measures_bayes_factor': { - 'none': { - 'func': None, - 'to_sections': None, - 'collocation_extractor': True, - 'keyword_extractor': True - }, - - 'log_likelihood_ratio_test': { - 'func': wl_measures_bayes_factor.bayes_factor_log_likelihood_ratio_test, - 'to_sections': False, - 'collocation_extractor': True, - 'keyword_extractor': True - }, - - 'students_t_test_2_sample': { - 'func': wl_measures_bayes_factor.bayes_factor_students_t_test_2_sample, - 'to_sections': True, - 'collocation_extractor': False, - 'keyword_extractor': True - }, - }, - - 'measures_effect_size': { - 'none': { - 'col_text': None, - 'func': None - }, - - 'pct_diff': { - 'col_text': '%DIFF', - 'func': wl_measures_effect_size.pct_diff - }, - - 'im3': { - 'col_text': 'IM³', - 'func': wl_measures_effect_size.im3 - }, - - 'dices_coeff': { - 'col_text': _tr('wl_settings_global', "Dice's Coefficient"), - 'func': wl_measures_effect_size.dices_coeff - }, - - 'diff_coeff': { - 'col_text': _tr('wl_settings_global', 'Difference Coefficient'), - 'func': wl_measures_effect_size.diff_coeff + 'sentence_tokenizers': { + 'afr': [ + 'spacy_sentencizer', + 'stanza_afr' + ], + + 'ara': [ + 'spacy_sentencizer', + 'stanza_ara' + ], + + 'xcl': [ + 'spacy_sentencizer', + 'stanza_xcl' + ], + 'hye': [ + 'spacy_sentencizer', + 'stanza_hye' + ], + 'hyw': [ + 'spacy_sentencizer', + 'stanza_hyw' + ], + + 'eus': [ + 'spacy_sentencizer', + 'stanza_eus' + ], + + 'bel': [ + 'spacy_sentencizer', + 'stanza_bel' + ], + + 'bul': [ + 'spacy_sentencizer', + 'stanza_bul' + ], + + 'mya': [ + 'spacy_sentencizer', + 'stanza_mya' + ], + + 'bxr': [ + 'spacy_sentencizer', + 'stanza_bxr' + ], + + 'cat': [ + 'spacy_dependency_parser_cat', + 'spacy_sentencizer', + 'stanza_cat' + ], + + 'lzh': [ + 'spacy_sentencizer', + 'stanza_lzh' + ], + 'zho_cn': [ + 'spacy_dependency_parser_zho', + 'spacy_sentencizer', + 'stanza_zho_cn' + ], + 'zho_tw': [ + 'spacy_dependency_parser_zho', + 'spacy_sentencizer', + 'stanza_zho_tw' + ], + + 'chu': [ + 'spacy_sentencizer', + 'stanza_chu' + ], + + 'cop': [ + 'spacy_sentencizer', + 'stanza_cop' + ], + + 'hrv': [ + 'spacy_dependency_parser_hrv', + 'spacy_sentence_recognizer_hrv', + 'spacy_sentencizer', + 'stanza_hrv' + ], + + 'ces': [ + 'nltk_punkt_ces', + 'spacy_sentencizer', + 'stanza_ces' + ], + + 'dan': [ + 'nltk_punkt_dan', + 'spacy_dependency_parser_dan', + 'spacy_sentencizer', + 'stanza_dan' + ], + + 'nld': [ + 'nltk_punkt_nld', + 'spacy_dependency_parser_nld', + 'spacy_sentence_recognizer_nld', + 'spacy_sentencizer', + 'stanza_nld' + ], + + 'ang': [ + 'spacy_sentencizer', + 'stanza_ang' + ], + 'eng_gb': [ + 'nltk_punkt_eng', + 'spacy_dependency_parser_eng', + 'spacy_sentencizer', + 'stanza_eng' + ], + 'eng_us': [ + 'nltk_punkt_eng', + 'spacy_dependency_parser_eng', + 'spacy_sentencizer', + 'stanza_eng' + ], + + 'myv': [ + 'spacy_sentencizer', + 'stanza_myv' + ], + + 'est': [ + 'nltk_punkt_est', + 'spacy_sentencizer', + 'stanza_est' + ], + + 'fao': [ + 'spacy_sentencizer', + 'stanza_fao' + ], + + 'fin': [ + 'nltk_punkt_fin', + 'spacy_dependency_parser_fin', + 'spacy_sentence_recognizer_fin', + 'spacy_sentencizer', + 'stanza_fin' + ], + + 'fra': [ + 'nltk_punkt_fra', + 'spacy_dependency_parser_fra', + 'spacy_sentencizer', + 'stanza_fra' + ], + 'fro': [ + 'spacy_sentencizer', + 'stanza_fro' + ], + + 'glg': [ + 'spacy_sentencizer', + 'stanza_glg' + ], + + 'deu_at': [ + 'nltk_punkt_deu', + 'spacy_dependency_parser_deu', + 'spacy_sentencizer', + 'stanza_deu' + ], + 'deu_de': [ + 'nltk_punkt_deu', + 'spacy_dependency_parser_deu', + 'spacy_sentencizer', + 'stanza_deu' + ], + 'deu_ch': [ + 'nltk_punkt_deu', + 'spacy_dependency_parser_deu', + 'spacy_sentencizer', + 'stanza_deu' + ], + + 'got': [ + 'spacy_sentencizer', + 'stanza_got' + ], + + 'grc': [ + 'spacy_sentencizer', + 'stanza_grc' + ], + 'ell': [ + 'nltk_punkt_ell', + 'spacy_dependency_parser_ell', + 'spacy_sentence_recognizer_ell', + 'spacy_sentencizer', + 'stanza_ell' + ], + + 'hbo': [ + 'spacy_sentencizer', + 'stanza_hbo' + ], + 'heb': [ + 'spacy_sentencizer', + 'stanza_heb' + ], + + 'hin': [ + 'spacy_sentencizer', + 'stanza_hin' + ], + + 'hun': [ + 'spacy_sentencizer', + 'stanza_hun' + ], + + 'isl': [ + 'spacy_sentencizer', + 'stanza_isl' + ], + + 'ind': [ + 'spacy_sentencizer', + 'stanza_ind' + ], + + 'gle': [ + 'spacy_sentencizer', + 'stanza_gle' + ], + + 'ita': [ + 'nltk_punkt_ita', + 'spacy_dependency_parser_ita', + 'spacy_sentence_recognizer_ita', + 'spacy_sentencizer', + 'stanza_ita' + ], + + 'jpn': [ + 'spacy_dependency_parser_jpn', + 'spacy_sentencizer', + 'stanza_jpn' + ], + + 'khm': ['khmer_nltk_khm'], + + 'kaz': [ + 'spacy_sentencizer', + 'stanza_kaz' + ], + + 'kor': [ + 'spacy_dependency_parser_kor', + 'spacy_sentence_recognizer_kor', + 'spacy_sentencizer', + 'stanza_kor' + ], + + 'kmr': [ + 'spacy_sentencizer', + 'stanza_kmr' + ], + + 'kir': [ + 'spacy_sentencizer', + 'stanza_kir' + ], + + 'lao': [ + 'laonlp_lao', + 'spacy_sentencizer' + ], + + 'lat': [ + 'spacy_sentencizer', + 'stanza_lat' + ], + + 'lav': [ + 'spacy_sentencizer', + 'stanza_lav' + ], + + 'lij': [ + 'spacy_sentencizer', + 'stanza_lij' + ], + + 'lit': [ + 'spacy_dependency_parser_lit', + 'spacy_sentence_recognizer_lit', + 'spacy_sentencizer', + 'stanza_lit' + ], + + 'mkd': [ + 'spacy_dependency_parser_mkd', + 'spacy_sentence_recognizer_mkd', + 'spacy_sentencizer' + ], + + 'mal': [ + 'nltk_punkt_mal', + 'spacy_sentencizer' + ], + + 'mlt': [ + 'spacy_sentencizer', + 'stanza_mlt' + ], + + 'glv': [ + 'spacy_sentencizer', + 'stanza_glv' + ], + + 'mar': [ + 'spacy_sentencizer', + 'stanza_mar' + ], + + 'pcm': [ + 'spacy_sentencizer', + 'stanza_pcm' + ], + + 'nob': [ + 'nltk_punkt_nob', + 'spacy_dependency_parser_nob', + 'spacy_sentence_recognizer_nob', + 'spacy_sentencizer', + 'stanza_nob' + ], + + 'nno': [ + 'spacy_sentencizer', + 'stanza_nno' + ], + + 'fas': [ + 'spacy_sentencizer', + 'stanza_fas' + ], + + 'pol': [ + 'nltk_punkt_pol', + 'spacy_dependency_parser_pol', + 'spacy_sentence_recognizer_pol', + 'spacy_sentencizer', + 'stanza_pol' + ], + + 'qpm': [ + 'spacy_sentencizer', + 'stanza_qpm' + ], + + 'por_br': [ + 'nltk_punkt_por', + 'spacy_dependency_parser_por', + 'spacy_sentence_recognizer_por', + 'spacy_sentencizer', + 'stanza_por' + ], + 'por_pt': [ + 'nltk_punkt_por', + 'spacy_dependency_parser_por', + 'spacy_sentence_recognizer_por', + 'spacy_sentencizer', + 'stanza_por' + ], + + 'ron': [ + 'spacy_dependency_parser_ron', + 'spacy_sentence_recognizer_ron', + 'spacy_sentencizer', + 'stanza_ron' + ], + + 'rus': [ + 'nltk_punkt_rus', + 'spacy_dependency_parser_rus', + 'spacy_sentence_recognizer_rus', + 'spacy_sentencizer', + 'stanza_rus' + ], + 'orv': [ + 'spacy_sentencizer', + 'stanza_orv' + ], + + 'sme': [ + 'spacy_sentencizer', + 'stanza_sme' + ], + + 'san': [ + 'spacy_sentencizer', + 'stanza_san' + ], + + 'gla': [ + 'spacy_sentencizer', + 'stanza_gla' + ], + + 'srp_latn': [ + 'spacy_sentencizer', + 'stanza_srp_latn' + ], + + 'snd': [ + 'spacy_sentencizer', + 'stanza_snd' + ], + + 'slk': [ + 'spacy_sentencizer', + 'stanza_slk' + ], + + 'slv': [ + 'nltk_punkt_slv', + 'spacy_dependency_parser_slv', + 'spacy_sentencizer', + 'stanza_slv' + ], + + 'hsb': [ + 'spacy_sentencizer', + 'stanza_hsb' + ], + + 'spa': [ + 'nltk_punkt_spa', + 'spacy_dependency_parser_spa', + 'spacy_sentencizer', + 'stanza_spa' + ], + + 'swe': [ + 'nltk_punkt_swe', + 'spacy_dependency_parser_swe', + 'spacy_sentence_recognizer_swe', + 'spacy_sentencizer', + 'stanza_swe' + ], + + 'tam': [ + 'spacy_sentencizer', + 'stanza_tam' + ], + + 'tel': [ + 'spacy_sentencizer', + 'stanza_tel' + ], + + 'tha': [ + 'pythainlp_crfcut', + 'pythainlp_thaisumcut', + 'stanza_tha' + ], + + 'bod': ['botok_bod'], + + 'tur': [ + 'nltk_punkt_tur', + 'spacy_sentencizer', + 'stanza_tur' + ], + + 'ukr': [ + 'spacy_dependency_parser_ukr', + 'spacy_sentencizer', + 'stanza_ukr' + ], + + 'urd': [ + 'spacy_sentencizer', + 'stanza_urd' + ], + + 'uig': [ + 'spacy_sentencizer', + 'stanza_uig' + ], + + 'vie': [ + 'underthesea_vie', + 'stanza_vie' + ], + + 'cym': [ + 'spacy_sentencizer', + 'stanza_cym' + ], + + 'wol': [ + 'spacy_sentencizer', + 'stanza_wol' + ], + + 'other': [ + 'nltk_punkt_eng', + 'spacy_sentencizer', + 'stanza_eng' + ] }, - 'jaccard_index': { - 'col_text': _tr('wl_settings_global', 'Jaccard Index'), - 'func': wl_measures_effect_size.jaccard_index + 'word_tokenizers': { + 'afr': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_afr', + 'stanza_afr' + ], + + 'sqi': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_sqi' + ], + + 'amh': ['spacy_amh'], + + 'ara': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_ara', + 'stanza_ara' + ], + + 'xcl': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_xcl' + ], + 'hye': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_hye', + 'stanza_hye' + ], + 'hyw': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_hyw' + ], + + 'asm': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses' + ], + + 'aze': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_aze' + ], + + 'eus': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_eus', + 'stanza_eus' + ], + + 'ben': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_ben' + ], + + 'bel': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_bel' + ], + + 'bul': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_bul', + 'stanza_bul' + ], + + 'mya': ['stanza_mya'], + + 'bxr': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_bxr' + ], + + 'cat': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_cat', + 'stanza_cat' + ], + + 'lzh': ['stanza_lzh'], + 'zho_cn': [ + 'pkuseg_zho', + 'spacy_zho', + 'stanza_zho_cn', + 'wordless_zho_char' + ], + 'zho_tw': [ + 'pkuseg_zho', + 'spacy_zho', + 'stanza_zho_tw', + 'wordless_zho_char' + ], + + 'chu': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_chu' + ], + + 'cop': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_cop' + ], + + 'hrv': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_hrv', + 'stanza_hrv' + ], + + 'ces': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_ces', + 'stanza_ces' + ], + + 'dan': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_dan', + 'stanza_dan' + ], + + 'nld': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_nld', + 'stanza_nld' + ], + + 'ang': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_ang' + ], + 'eng_gb': [ + 'nltk_nist', 'nltk_nltk', 'nltk_penn_treebank', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_eng', + 'stanza_eng' + ], + 'eng_us': [ + 'nltk_nist', 'nltk_nltk', 'nltk_penn_treebank', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_eng', + 'stanza_eng' + ], + + 'myv': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_myv' + ], + + 'est': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_est', + 'stanza_est' + ], + + 'fao': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_fao', + 'stanza_fao' + ], + + 'fin': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_fin', + 'stanza_fin' + ], + + 'fra': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_fra', + 'stanza_fra' + ], + 'fro': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_fro' + ], + + 'glg': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_glg' + ], + + 'lug': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_lug' + ], + + 'deu_at': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_deu', + 'stanza_deu' + ], + 'deu_de': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_deu', + 'stanza_deu' + ], + 'deu_ch': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_deu', + 'stanza_deu' + ], + + 'got': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_got' + ], + + 'grc': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_grc', + 'stanza_grc' + ], + 'ell': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_ell', + 'stanza_ell' + ], + + 'guj': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_guj' + ], + + 'hbo': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_hbo' + ], + 'heb': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_heb', + 'stanza_heb' + ], + + 'hin': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_hin', + 'stanza_hin' + ], + + 'hun': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_hun', + 'stanza_hun' + ], + + 'isl': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_isl', + 'stanza_isl' + ], + + 'ind': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_ind', + 'stanza_ind' + ], + + 'gle': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_gle', + 'stanza_gle' + ], + + 'ita': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_ita', + 'stanza_ita' + ], + + 'jpn': [ + 'spacy_jpn', + 'stanza_jpn', + 'sudachipy_jpn_split_mode_a', 'sudachipy_jpn_split_mode_b', 'sudachipy_jpn_split_mode_c', + 'wordless_jpn_kanji' + ], + + 'kan': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_kan' + ], + + 'kaz': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_kaz' + ], + + 'khm': ['khmer_nltk_khm'], + + 'kor': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'python_mecab_ko_mecab', + 'spacy_kor', + 'stanza_kor' + ], + + 'kmr': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_kmr' + ], + + 'kir': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_kir', + 'stanza_kir' + ], + + 'lao': ['laonlp_lao'], + + 'lat': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_lat', + 'stanza_lat' + ], + + 'lav': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_lav', + 'stanza_lav' + ], + + 'lij': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_lij', + 'stanza_lij' + ], + + 'lit': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_lit', + 'stanza_lit' + ], + + 'ltz': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_ltz' + ], + + 'mkd': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_mkd' + ], + + 'msa': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_msa' + ], + + 'mal': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_mal' + ], + + 'mlt': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_mlt' + ], + + 'glv': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_glv' + ], + + 'mar': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_mar', + 'stanza_mar' + ], + + 'pcm': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_pcm' + ], + + 'mni_mtei': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses' + ], + + 'nep': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_nep' + ], + + 'nob': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_nob', + 'stanza_nob' + ], + 'nno': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_nno', + 'stanza_nno' + ], + + 'ori': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses' + ], + + 'fas': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'spacy_fas', + 'stanza_fas' + ], + + 'pol': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_pol', + 'stanza_pol' + ], + + 'qpm': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_qpm' + ], + + 'por_br': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_por', + 'stanza_por' + ], + 'por_pt': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_por', + 'stanza_por' + ], + + 'pan_guru': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses' + ], + + 'ron': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_ron', + 'stanza_ron' + ], + + 'rus': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_rus', + 'stanza_rus' + ], + 'orv': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_orv' + ], + + 'sme': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_sme' + ], + + 'san': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_san', + 'stanza_san' + ], + + 'gla': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_gla' + ], + + 'srp_cyrl': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_srp' + ], + + 'srp_latn': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_srp', + 'stanza_srp_latn' + ], + + 'snd': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_snd' + ], + + 'sin': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_sin' + ], + + 'slk': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_slk', + 'stanza_slk' + ], + + 'slv': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_slv', + 'stanza_slv' + ], + + 'dsb': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_dsb' + ], + + 'hsb': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_hsb', + 'stanza_hsb' + ], + + 'spa': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_spa', + 'stanza_spa' + ], + + 'swe': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_swe', + 'stanza_swe' + ], + + 'tgl': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_tgl' + ], + + 'tgk': ['nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter'], + + 'tam': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_tam', + 'stanza_tam' + ], + + 'tat': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_tat' + ], + + 'tel': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_tel', + 'stanza_tel' + ], + + 'tdt': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'sacremoses_moses' + ], + + 'tha': [ + 'pythainlp_longest_matching', + 'pythainlp_max_matching', + 'pythainlp_max_matching_tcc', + 'pythainlp_nercut', + 'stanza_tha' + ], + + 'bod': ['botok_bod'], + + 'tir': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_tir' + ], + + 'tsn': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_tsn' + ], + + 'tur': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_tur', + 'stanza_tur' + ], + + 'ukr': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_ukr', + 'stanza_ukr' + ], + + 'urd': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_urd', + 'stanza_urd' + ], + + 'uig': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_uig' + ], + + 'vie': [ + 'nltk_tok_tok', + 'underthesea_vie', + 'stanza_vie' + ], + + 'cym': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_cym' + ], + + 'wol': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'stanza_wol' + ], + + 'yor': [ + 'nltk_nist', 'nltk_nltk', 'nltk_regex', 'nltk_twitter', + 'spacy_yor' + ], + + 'other': [ + 'nltk_nist', 'nltk_nltk', 'nltk_penn_treebank', 'nltk_regex', 'nltk_tok_tok', 'nltk_twitter', + 'sacremoses_moses', + 'spacy_eng', + 'stanza_eng' + ] }, - 'lfmd': { - 'col_text': 'LFMD', - 'func': wl_measures_effect_size.lfmd + 'syl_tokenizers': { + 'afr': ['pyphen_afr'], + 'sqi': ['pyphen_sqi'], + 'eus': ['pyphen_eus'], + 'bel': ['pyphen_bel'], + 'bul': ['pyphen_bul'], + 'cat': ['pyphen_cat'], + 'hrv': ['pyphen_hrv'], + 'ces': ['pyphen_ces'], + 'dan': ['pyphen_dan'], + 'nld': ['pyphen_nld'], + + 'eng_gb': [ + 'nltk_legality', + 'nltk_sonority_sequencing', + 'pyphen_eng_gb' + ], + + 'eng_us': [ + 'nltk_legality', + 'nltk_sonority_sequencing', + 'pyphen_eng_us' + ], + + 'epo': ['pyphen_epo'], + 'est': ['pyphen_est'], + 'fra': ['pyphen_fra'], + 'glg': ['pyphen_glg'], + 'deu_at': ['pyphen_deu_at'], + 'deu_de': ['pyphen_deu_de'], + 'deu_ch': ['pyphen_deu_ch'], + 'ell': ['pyphen_ell'], + 'hun': ['pyphen_hun'], + 'isl': ['pyphen_isl'], + 'ind': ['pyphen_ind'], + 'ita': ['pyphen_ita'], + 'lit': ['pyphen_lit'], + 'lav': ['pyphen_lav'], + 'mon': ['pyphen_mon'], + 'nob': ['pyphen_nob'], + 'nno': ['pyphen_nno'], + 'pol': ['pyphen_pol'], + 'por_br': ['pyphen_por_br'], + 'por_pt': ['pyphen_por_pt'], + 'ron': ['pyphen_ron'], + 'rus': ['pyphen_rus'], + 'srp_cyrl': ['pyphen_srp_cyrl'], + 'srp_latn': ['pyphen_srp_latn'], + 'slk': ['pyphen_slk'], + 'slv': ['pyphen_slv'], + 'spa': ['pyphen_spa'], + 'swe': ['pyphen_swe'], + 'tel': ['pyphen_tel'], + + 'tha': [ + 'pyphen_tha', + 'pythainlp_han_solo', + 'pythainlp_syl_dict' + ], + + 'ukr': ['pyphen_ukr'], + 'zul': ['pyphen_zul'] }, - 'kilgarriffs_ratio': { - 'col_text': _tr('wl_settings_global', "Kilgarriff's Ratio"), - 'func': wl_measures_effect_size.kilgarriffs_ratio + 'pos_taggers': { + 'afr': ['stanza_afr'], + 'ara': ['stanza_ara'], + 'xcl': ['stanza_xcl'], + 'hye': ['stanza_hye'], + 'hyw': ['stanza_hyw'], + 'eus': ['stanza_eus'], + 'bel': ['stanza_bel'], + 'bul': ['stanza_bul'], + 'bxr': ['stanza_bxr'], + + 'cat': [ + 'spacy_cat', + 'stanza_cat' + ], + + 'lzh': ['stanza_lzh'], + 'zho_cn': [ + 'spacy_zho', + 'stanza_zho_cn' + ], + 'zho_tw': [ + 'spacy_zho', + 'stanza_zho_tw' + ], + + 'chu': ['stanza_chu'], + 'cop': ['stanza_cop'], + + 'hrv': [ + 'spacy_hrv', + 'stanza_hrv' + ], + + 'ces': ['stanza_ces'], + + 'dan': [ + 'spacy_dan', + 'stanza_dan' + ], + + 'nld': [ + 'spacy_nld', + 'stanza_nld' + ], + + 'ang': ['stanza_ang'], + 'eng_gb': [ + 'nltk_perceptron_eng', + 'spacy_eng', + 'stanza_eng' + ], + 'eng_us': [ + 'nltk_perceptron_eng', + 'spacy_eng', + 'stanza_eng' + ], + + 'myv': ['stanza_myv'], + 'est': ['stanza_est'], + 'fao': ['stanza_fao'], + + 'fin': [ + 'spacy_fin', + 'stanza_fin' + ], + + 'fra': [ + 'spacy_fra', + 'stanza_fra' + ], + 'fro': ['stanza_fro'], + + 'glg': ['stanza_glg'], + + 'deu_at': [ + 'spacy_deu', + 'stanza_deu' + ], + 'deu_de': [ + 'spacy_deu', + 'stanza_deu' + ], + 'deu_ch': [ + 'spacy_deu', + 'stanza_deu' + ], + + 'got': ['stanza_got'], + + 'grc': ['stanza_grc'], + 'ell': [ + 'spacy_ell', + 'stanza_ell' + ], + + 'hbo': ['stanza_hbo'], + 'heb': ['stanza_heb'], + 'hin': ['stanza_hin'], + 'hun': ['stanza_hun'], + 'isl': ['stanza_isl'], + 'ind': ['stanza_ind'], + 'gle': ['stanza_gle'], + + 'ita': [ + 'spacy_ita', + 'stanza_ita' + ], + + 'jpn': [ + 'spacy_jpn', + 'stanza_jpn', + 'sudachipy_jpn' + ], + + 'kaz': ['stanza_kaz'], + 'khm': ['khmer_nltk_khm'], + + 'kor': [ + 'python_mecab_ko_mecab', + 'spacy_kor', + 'stanza_kor' + ], + + 'kmr': ['stanza_kmr'], + 'kir': ['stanza_kir'], + + 'lao': [ + 'laonlp_seqlabeling', + 'laonlp_yunshan_cup_2020' + ], + + 'lat': ['stanza_lat'], + 'lav': ['stanza_lav'], + 'lij': ['stanza_lij'], + + 'lit': [ + 'spacy_lit', + 'stanza_lit' + ], + + 'mkd': ['spacy_mkd'], + 'mlt': ['stanza_mlt'], + 'glv': ['stanza_glv'], + 'mar': ['stanza_mar'], + 'pcm': ['stanza_pcm'], + + 'nob': [ + 'spacy_nob', + 'stanza_nob' + ], + + 'nno': ['stanza_nno'], + 'fas': ['stanza_fas'], + + 'pol': [ + 'spacy_pol', + 'stanza_pol' + ], + + 'qpm': ['stanza_qpm'], + + 'por_br': [ + 'spacy_por', + 'stanza_por' + ], + 'por_pt': [ + 'spacy_por', + 'stanza_por' + ], + + 'ron': [ + 'spacy_ron', + 'stanza_ron' + ], + + + 'rus': [ + 'nltk_perceptron_rus', + 'pymorphy3_morphological_analyzer', + 'spacy_rus', + 'stanza_rus' + ], + 'orv': ['stanza_orv'], + + 'sme': ['stanza_sme'], + 'san': ['stanza_san'], + 'gla': ['stanza_gla'], + 'srp_latn': ['stanza_srp_latn'], + 'snd': ['stanza_snd'], + 'slk': ['stanza_slk'], + + 'slv': [ + 'spacy_slv', + 'stanza_slv'] + , + + 'hsb': ['stanza_hsb'], + + 'spa': [ + 'spacy_spa', + 'stanza_spa' + ], + + 'swe': [ + 'spacy_swe', + 'stanza_swe' + ], + + 'tam': ['stanza_tam'], + 'tel': ['stanza_tel'], + + 'tha': [ + 'pythainlp_perceptron_blackboard', + 'pythainlp_perceptron_orchid', + 'pythainlp_perceptron_pud' + ], + + 'bod': ['botok_bod'], + 'tur': ['stanza_tur'], + + 'ukr': [ + 'pymorphy3_morphological_analyzer', + 'spacy_ukr', + 'stanza_ukr' + ], + + 'urd': ['stanza_urd'], + 'uig': ['stanza_uig'], + + 'vie': [ + 'stanza_vie', + 'underthesea_vie' + ], + + 'cym': ['stanza_cym'], + 'wol': ['stanza_wol'] }, - 'log_dice': { - 'col_text': 'logDice', - 'func': wl_measures_effect_size.log_dice + 'lemmatizers': { + 'afr': ['stanza_afr'], + 'sqi': ['simplemma_sqi'], + 'ara': ['stanza_ara'], + + 'xcl': ['stanza_xcl'], + 'hye': [ + 'simplemma_hye', + 'stanza_hye' + ], + 'hyw': ['stanza_hyw'], + + 'ast': ['simplemma_ast'], + 'eus': ['stanza_eus'], + 'bel': ['stanza_bel'], + 'ben': ['spacy_ben'], + + 'bul': [ + 'simplemma_bul', + 'stanza_bul' + ], + + 'bxr': ['stanza_bxr'], + + 'cat': [ + 'simplemma_cat', + 'spacy_cat', + 'stanza_cat' + ], + + 'lzh': ['stanza_lzh'], + 'zho_cn': ['stanza_zho_cn'], + 'zho_tw': ['stanza_zho_tw'], + 'chu': ['stanza_chu'], + 'cop': ['stanza_cop'], + + 'hrv': [ + 'simplemma_hbs', + 'spacy_hrv', + 'stanza_hrv' + ], + + 'ces': [ + 'simplemma_ces', + 'spacy_ces', + 'stanza_ces' + ], + + 'dan': [ + 'simplemma_dan', + 'spacy_dan', + 'stanza_dan' + ], + + 'nld': [ + 'simplemma_nld', + 'spacy_nld', + 'stanza_nld' + ], + + 'enm': ['simplemma_enm'], + 'ang': ['stanza_ang'], + 'eng_gb': [ + 'nltk_wordnet', + 'simplemma_eng', + 'spacy_eng', + 'stanza_eng' + ], + 'eng_us': [ + 'nltk_wordnet', + 'simplemma_eng', + 'spacy_eng', + 'stanza_eng' + ], + + 'myv': ['stanza_myv'], + + 'est': [ + 'simplemma_est', + 'stanza_est' + ], + + 'fin': [ + 'simplemma_fin', + 'spacy_fin', + 'stanza_fin' + ], + + 'fra': [ + 'simplemma_fra', + 'spacy_fra', + 'stanza_fra' + ], + 'fro': ['stanza_fro'], + + 'glg': [ + 'simplemma_glg', + 'stanza_glg' + ], + + 'kat': ['simplemma_kat'], + + 'deu_at': [ + 'simplemma_deu', + 'spacy_deu', + 'stanza_deu' + ], + 'deu_de': [ + 'simplemma_deu', + 'spacy_deu', + 'stanza_deu' + ], + 'deu_ch': [ + 'simplemma_deu', + 'spacy_deu', + 'stanza_deu' + ], + + 'got': ['stanza_got'], + + 'grc': [ + 'spacy_grc', + 'stanza_grc' + ], + 'ell': [ + 'simplemma_ell', + 'spacy_ell', + 'stanza_ell' + ], + + 'hbo': ['stanza_hbo'], + 'heb': ['stanza_heb'], + + 'hin': [ + 'simplemma_hin', + 'stanza_hin' + ], + + 'hun': [ + 'simplemma_hun', + 'spacy_hun', + 'stanza_hun' + ], + + 'isl': [ + 'simplemma_isl', + 'stanza_isl' + ], + + 'ind': [ + 'simplemma_ind', + 'spacy_ind', + 'stanza_ind' + ], + + 'gle': [ + 'simplemma_gle', + 'spacy_gle', + 'stanza_gle' + ], + + 'ita': [ + 'simplemma_ita', + 'spacy_ita', + 'stanza_ita' + ], + + 'jpn': [ + 'spacy_jpn', + 'stanza_jpn', + 'sudachipy_jpn' + ], + + 'kaz': ['stanza_kaz'], + + 'kor': [ + 'spacy_kor', + 'stanza_kor' + ], + + 'kmr': ['stanza_kmr'], + 'kir': ['stanza_kir'], + + 'lat': [ + 'simplemma_lat', + 'stanza_lat' + ], + + 'lav': [ + 'simplemma_lav', + 'stanza_lav' + ], + + 'lij': ['stanza_lij'], + + 'lit': [ + 'simplemma_lit', + 'spacy_lit', + 'stanza_lit' + ], + + 'ltz': [ + 'simplemma_ltz', + 'spacy_ltz' + ], + + 'mkd': [ + 'simplemma_mkd', + 'spacy_mkd' + ], + + 'msa': ['simplemma_msa'], + + 'glv': [ + 'simplemma_glv', + 'stanza_glv' + ], + + 'mar': ['stanza_mar'], + 'pcm': ['stanza_pcm'], + + 'nob': [ + 'simplemma_nob', + 'spacy_nob', + 'stanza_nob' + ], + 'nno': [ + 'simplemma_nno', + 'stanza_nno' + ], + + 'fas': [ + 'simplemma_fas', + 'spacy_fas', + 'stanza_fas' + ], + + 'pol': [ + 'simplemma_pol', + 'spacy_pol', + 'stanza_pol' + ], + + 'qpm': ['stanza_qpm'], + + 'por_br': [ + 'simplemma_por', + 'spacy_por', + 'stanza_por' + ], + 'por_pt': [ + 'simplemma_por', + 'spacy_por', + 'stanza_por' + ], + + 'ron': [ + 'simplemma_ron', + 'spacy_ron', + 'stanza_ron' + ], + + 'rus': [ + 'simplemma_rus', + 'pymorphy3_morphological_analyzer', + 'spacy_rus', + 'stanza_rus' + ], + 'orv': ['stanza_orv'], + + 'sme': [ + 'simplemma_sme', + 'stanza_sme' + ], + + 'san': ['stanza_san'], + + 'gla': [ + 'simplemma_gla', + 'stanza_gla' + ], + + 'srp_cyrl': ['spacy_srp'], + 'srp_latn': [ + 'simplemma_hbs', + 'stanza_srp_latn' + ], + + 'slk': [ + 'simplemma_slk', + 'stanza_slk' + ], + + 'slv': [ + 'simplemma_slv', + 'spacy_slv', + 'stanza_slv' + ], + + 'hsb': ['stanza_hsb'], + + 'spa': [ + 'simplemma_spa', + 'spacy_spa', + 'stanza_spa' + ], + + 'swa': ['simplemma_swa'], + + 'swe': [ + 'simplemma_swe', + 'spacy_swe', + 'stanza_swe' + ], + + 'tgl': [ + 'simplemma_tgl', + 'spacy_tgl' + ], + + 'tam': ['stanza_tam'], + 'bod': ['botok_bod'], + + 'tur': [ + 'simplemma_tur', + 'spacy_tur', + 'stanza_tur' + ], + + 'ukr': [ + 'pymorphy3_morphological_analyzer', + 'simplemma_ukr', + 'spacy_ukr', + 'stanza_ukr' + ], + + 'urd': [ + 'spacy_urd', + 'stanza_urd' + ], + + 'uig': ['stanza_uig'], + + 'cym': [ + 'simplemma_cym', + 'stanza_cym' + ], + + 'wol': ['stanza_wol'] }, - 'log_ratio': { - 'col_text': _tr('wl_settings_global', 'Log Ratio'), - 'func': wl_measures_effect_size.log_ratio + 'stop_word_lists': { + 'ara': ['nltk_ara'], + 'aze': ['nltk_aze'], + 'eus': ['nltk_eus'], + 'ben': ['nltk_ben'], + 'cat': ['nltk_cat'], + 'zho_cn': ['nltk_zho_cn'], + 'zho_tw': ['nltk_zho_tw'], + 'dan': ['nltk_dan'], + 'nld': ['nltk_nld'], + 'eng_gb': ['nltk_eng'], + 'eng_us': ['nltk_eng'], + 'fin': ['nltk_fin'], + 'fra': ['nltk_fra'], + 'deu_at': ['nltk_deu'], + 'deu_de': ['nltk_deu'], + 'deu_ch': ['nltk_deu'], + 'ell': ['nltk_ell'], + 'heb': ['nltk_heb'], + 'hun': ['nltk_hun'], + 'ind': ['nltk_ind'], + 'ita': ['nltk_ita'], + 'kaz': ['nltk_kaz'], + 'lao': ['laonlp_lao'], + 'nep': ['nltk_nep'], + 'nob': ['nltk_nob'], + 'por_br': ['nltk_por'], + 'por_pt': ['nltk_por'], + 'ron': ['nltk_ron'], + 'rus': ['nltk_rus'], + 'slv': ['nltk_slv'], + 'spa': ['nltk_spa'], + 'swe': ['nltk_swe'], + 'tgk': ['nltk_tgk'], + 'tha': ['pythainlp_tha'], + 'tur': ['nltk_tur'], + + 'other': [] }, - 'mi_log_f': { - 'col_text': 'MI.log-f', - 'func': wl_measures_effect_size.mi_log_f + 'dependency_parsers': { + 'afr': ['stanza_afr'], + 'ara': ['stanza_ara'], + 'xcl': ['stanza_xcl'], + 'hye': ['stanza_hye'], + 'hyw': ['stanza_hyw'], + 'eus': ['stanza_eus'], + 'bel': ['stanza_bel'], + 'bul': ['stanza_bul'], + 'bxr': ['stanza_bxr'], + + 'cat': [ + 'spacy_cat', + 'stanza_cat' + ], + + 'lzh': ['stanza_lzh'], + 'zho_cn': [ + 'spacy_zho', + 'stanza_zho_cn' + ], + 'zho_tw': [ + 'spacy_zho', + 'stanza_zho_tw' + ], + + 'chu': ['stanza_chu'], + 'cop': ['stanza_cop'], + + 'hrv': [ + 'spacy_hrv', + 'stanza_hrv' + ], + + 'ces': ['stanza_ces'], + + 'dan': [ + 'spacy_dan', + 'stanza_dan' + ], + + 'nld': [ + 'spacy_nld', + 'stanza_nld' + ], + + 'ang': ['stanza_ang'], + 'eng_gb': [ + 'spacy_eng', + 'stanza_eng' + ], + 'eng_us': [ + 'spacy_eng', + 'stanza_eng' + ], + + 'myv': ['stanza_myv'], + 'est': ['stanza_est'], + 'fao': ['stanza_fao'], + + 'fin': [ + 'spacy_fin', + 'stanza_fin' + ], + + 'fra': [ + 'spacy_fra', + 'stanza_fra' + ], + 'fro': ['stanza_fro'], + + 'glg': ['stanza_glg'], + + 'deu_at': [ + 'spacy_deu', + 'stanza_deu' + ], + 'deu_de': [ + 'spacy_deu', + 'stanza_deu' + ], + 'deu_ch': [ + 'spacy_deu', + 'stanza_deu' + ], + + 'got': ['stanza_got'], + + 'grc': ['stanza_grc'], + 'ell': [ + 'spacy_ell', + 'stanza_ell' + ], + + 'hbo': ['stanza_hbo'], + 'heb': ['stanza_heb'], + 'hin': ['stanza_hin'], + 'hun': ['stanza_hun'], + 'isl': ['stanza_isl'], + 'ind': ['stanza_ind'], + 'gle': ['stanza_gle'], + + 'ita': [ + 'spacy_ita', + 'stanza_ita' + ], + + 'jpn': [ + 'spacy_jpn', + 'stanza_jpn' + ], + + 'kaz': ['stanza_kaz'], + + 'kor': [ + 'spacy_kor', + 'stanza_kor' + ], + + 'kmr': ['stanza_kmr'], + 'kir': ['stanza_kir'], + 'lat': ['stanza_lat'], + 'lav': ['stanza_lav'], + 'lij': ['stanza_lij'], + + 'lit': [ + 'spacy_lit', + 'stanza_lit' + ], + + 'mkd': ['spacy_mkd'], + 'mlt': ['stanza_mlt'], + 'glv': ['stanza_glv'], + 'mar': ['stanza_mar'], + 'pcm': ['stanza_pcm'], + + 'nob': [ + 'spacy_nob', + 'stanza_nob' + ], + + 'nno': ['stanza_nno'], + 'fas': ['stanza_fas'], + + 'pol': [ + 'spacy_pol', + 'stanza_pol' + ], + + 'qpm': ['stanza_qpm'], + + 'por_br': [ + 'spacy_por', + 'stanza_por' + ], + 'por_pt': [ + 'spacy_por', + 'stanza_por' + ], + + 'ron': [ + 'spacy_ron', + 'stanza_ron' + ], + + 'rus': [ + 'spacy_rus', + 'stanza_rus' + ], + 'orv': ['stanza_orv'], + + 'sme': ['stanza_sme'], + 'san': ['stanza_san'], + 'gla': ['stanza_gla'], + 'srp_latn': ['stanza_srp_latn'], + 'slk': ['stanza_slk'], + + 'slv': [ + 'spacy_slv', + 'stanza_slv' + ], + + 'hsb': ['stanza_hsb'], + + 'spa': [ + 'spacy_spa', + 'stanza_spa' + ], + + 'swe': [ + 'spacy_swe', + 'stanza_swe' + ], + + 'tam': ['stanza_tam'], + 'tel': ['stanza_tel'], + 'tur': ['stanza_tur'], + + 'ukr': [ + 'spacy_ukr', + 'stanza_ukr' + ], + + 'urd': ['stanza_urd'], + 'uig': ['stanza_uig'], + 'vie': ['stanza_vie'], + 'cym': ['stanza_cym'], + 'wol': ['stanza_wol'] }, - 'min_sensitivity': { - 'col_text': _tr('wl_settings_global', 'Minimum Sensitivity'), - 'func': wl_measures_effect_size.min_sensitivity + 'sentiment_analyzers': { + 'afr': ['vader_afr'], + 'sqi': ['vader_sqi'], + 'amh': ['vader_amh'], + 'ara': ['vader_ara'], + 'hye': ['vader_hye'], + 'hyw': ['vader_hye'], + 'asm': ['vader_asm'], + 'aze': ['vader_aze'], + 'eus': ['vader_eus'], + 'bel': ['vader_bel'], + 'ben': ['vader_ben'], + 'bul': ['vader_bul'], + 'mya': ['vader_mya'], + 'cat': ['vader_cat'], + + 'zho_cn': [ + 'stanza_zho_cn', + 'vader_zho_cn' + ], + + 'zho_tw': ['vader_zho_tw'], + 'hrv': ['vader_hrv'], + 'ces': ['vader_ces'], + 'dan': ['vader_dan'], + 'nld': ['vader_nld'], + + 'eng_gb': [ + 'stanza_eng', + 'vader_eng' + ], + 'eng_us': [ + 'stanza_eng', + 'vader_eng' + ], + + 'epo': ['vader_epo'], + 'est': ['vader_est'], + 'fin': ['vader_fin'], + 'fra': ['vader_fra'], + 'glg': ['vader_glg'], + 'kat': ['vader_kat'], + + 'deu_at': [ + 'stanza_deu', + 'vader_deu' + ], + 'deu_de': [ + 'stanza_deu', + 'vader_deu' + ], + 'deu_ch': [ + 'stanza_deu', + 'vader_deu' + ], + + 'ell': ['vader_ell'], + 'guj': ['vader_guj'], + 'heb': ['vader_heb'], + 'hin': ['vader_hin'], + 'hun': ['vader_hun'], + 'isl': ['vader_isl'], + 'ind': ['vader_ind'], + 'gle': ['vader_gle'], + 'ita': ['vader_ita'], + 'jpn': ['vader_jpn'], + 'kan': ['vader_kan'], + 'kaz': ['vader_kaz'], + 'khm': ['vader_khm'], + 'kor': ['vader_kor'], + 'kmr': ['vader_kmr'], + 'kir': ['vader_kir'], + 'lao': ['vader_lao'], + 'lat': ['vader_lat'], + 'lav': ['vader_lav'], + 'lit': ['vader_lit'], + 'lug': ['vader_lug'], + 'ltz': ['vader_ltz'], + 'mkd': ['vader_mkd'], + 'msa': ['vader_msa'], + 'mal': ['vader_mal'], + 'mlt': ['vader_mlt'], + + 'mar': [ + 'stanza_mar', + 'vader_mar' + ], + + 'mni_mtei': ['vader_mni_mtei'], + 'mon': ['vader_mon'], + 'nep': ['vader_nep'], + 'nob': ['vader_nob'], + 'ori': ['vader_ori'], + 'fas': ['vader_fas'], + 'pol': ['vader_pol'], + 'por_pt': ['vader_por'], + 'por_br': ['vader_por'], + 'pan_guru': ['vader_pan_guru'], + 'ron': ['vader_ron'], + 'rus': ['vader_rus'], + 'san': ['vader_san'], + 'gla': ['vader_gla'], + 'srp_cyrl': ['vader_srp_cyrl'], + 'srp_latn': ['vader_srp_cyrl'], + 'snd': ['vader_snd'], + 'sin': ['vader_sin'], + 'slk': ['vader_slk'], + 'slv': ['vader_slv'], + + 'spa': [ + 'stanza_spa', + 'vader_spa' + ], + + 'swa': ['vader_swa'], + 'swe': ['vader_swe'], + 'tgl': ['vader_tgl'], + 'tgk': ['vader_tgk'], + 'tam': ['vader_tam'], + 'tat': ['vader_tat'], + 'tel': ['vader_tel'], + 'tha': ['vader_tha'], + 'tir': ['vader_tir'], + 'tur': ['vader_tur'], + 'ukr': ['vader_ukr'], + 'urd': ['vader_urd'], + 'uig': ['vader_uig'], + + 'vie': [ + 'stanza_vie', + 'underthesea_vie' + ], + + 'cym': ['vader_cym'], + 'yor': ['vader_yor'], + 'zul': ['vader_zul'] }, - 'md': { - 'col_text': 'MD', - 'func': wl_measures_effect_size.md + # Only people's names are capitalized + # Case of measure names are preserved + 'mapping_measures': { + 'dispersion': { + _tr('wl_settings_global', 'None'): 'none', + _tr('wl_settings_global', 'Average logarithmic distance'): 'ald', + _tr('wl_settings_global', 'Average reduced frequency'): 'arf', + _tr('wl_settings_global', 'Average waiting time'): 'awt', + _tr('wl_settings_global', "Carroll's D₂"): 'carrolls_d2', + _tr('wl_settings_global', "Gries's DP"): 'griess_dp', + _tr('wl_settings_global', "Juilland's D"): 'juillands_d', + _tr('wl_settings_global', "Lyne's D₃"): 'lynes_d3', + _tr('wl_settings_global', "Rosengren's S"): 'rosengrens_s', + _tr('wl_settings_global', "Zhang's Distributional Consistency"): 'zhangs_dc' + }, + + 'adjusted_freq': { + _tr('wl_settings_global', 'None'): 'none', + _tr('wl_settings_global', 'Average logarithmic distance'): 'fald', + _tr('wl_settings_global', 'Average reduced frequency'): 'farf', + _tr('wl_settings_global', 'Average waiting time'): 'fawt', + _tr('wl_settings_global', "Carroll's Uₘ"): 'carrolls_um', + _tr('wl_settings_global', "Engwall's FM"): 'engwalls_fm', + _tr('wl_settings_global', "Juilland's U"): 'juillands_u', + _tr('wl_settings_global', "Kromer's UR"): 'kromers_ur', + _tr('wl_settings_global', "Rosengren's KF"): 'rosengrens_kf' + }, + + 'statistical_significance': { + _tr('wl_settings_global', 'None'): 'none', + _tr('wl_settings_global', "Fisher's exact test"): 'fishers_exact_test', + _tr('wl_settings_global', 'Log-likelihood ratio test'): 'log_likelihood_ratio_test', + _tr('wl_settings_global', 'Mann-Whitney U Test'): 'mann_whitney_u_test', + _tr('wl_settings_global', "Pearson's chi-squared test"): 'pearsons_chi_squared_test', + _tr('wl_settings_global', "Student's t-test (1-sample)"): 'students_t_test_1_sample', + _tr('wl_settings_global', "Student's t-test (2-sample)"): 'students_t_test_2_sample', + _tr('wl_settings_global', 'z-score'): 'z_score', + _tr('wl_settings_global', 'z-score (Berry-Rogghe)'): 'z_score_berry_rogghe' + }, + + 'bayes_factor': { + _tr('wl_settings_global', 'None'): 'none', + _tr('wl_settings_global', 'Log-likelihood ratio test'): 'log_likelihood_ratio_test', + _tr('wl_settings_global', "Student's t-test (2-sample)"): 'students_t_test_2_sample' + }, + + 'effect_size': { + _tr('wl_settings_global', 'None'): 'none', + '%DIFF': 'pct_diff', + _tr('wl_settings_global', 'Cubic association ratio'): 'im3', + _tr('wl_settings_global', "Dice's coefficient"): 'dices_coeff', + _tr('wl_settings_global', 'Difference coefficient'): 'diff_coeff', + _tr('wl_settings_global', 'Jaccard index'): 'jaccard_index', + _tr('wl_settings_global', 'Log-frequency biased MD'): 'lfmd', + _tr('wl_settings_global', "Kilgarriff's ratio"): 'kilgarriffs_ratio', + 'logDice': 'log_dice', + _tr('wl_settings_global', 'Log ratio'): 'log_ratio', + 'MI.log-f': 'mi_log_f', + _tr('wl_settings_global', 'Minimum sensitivity'): 'min_sensitivity', + _tr('wl_settings_global', 'Mutual dependency'): 'md', + _tr('wl_settings_global', 'Mutual expectation'): 'me', + _tr('wl_settings_global', 'Mutual information'): 'mi', + _tr('wl_settings_global', 'Odds ratio'): 'or', + _tr('wl_settings_global', 'Pointwise mutual information'): 'pmi', + _tr('wl_settings_global', 'Poisson collocation measure'): 'poisson_collocation_measure', + _tr('wl_settings_global', 'Squared phi coefficient'): 'squared_phi_coeff' + } }, - 'me': { - 'col_text': 'ME', - 'func': wl_measures_effect_size.me + 'measures_dispersion': { + 'none': { + 'col_text': None, + 'func': None, + 'type': '' + }, + + 'ald': { + 'col_text': 'ALD', + 'func': wl_measures_dispersion.ald, + 'type': 'dist_based' + }, + + 'arf': { + 'col_text': 'ARF', + 'func': wl_measures_dispersion.arf, + 'type': 'dist_based' + }, + + 'awt': { + 'col_text': 'AWT', + 'func': wl_measures_dispersion.awt, + 'type': 'dist_based' + }, + + 'carrolls_d2': { + 'col_text': _tr('wl_settings_global', "Carroll's D₂"), + 'func': wl_measures_dispersion.carrolls_d2, + 'type': 'parts_based' + }, + + 'griess_dp': { + 'col_text': _tr('wl_settings_global', "Gries's DP"), + 'func': wl_measures_dispersion.griess_dp, + 'type': 'parts_based' + }, + + 'juillands_d': { + 'col_text': _tr('wl_settings_global', "Juilland's D"), + 'func': wl_measures_dispersion.juillands_d, + 'type': 'parts_based' + }, + + 'lynes_d3': { + 'col_text': _tr('wl_settings_global', "Lyne's D₃"), + 'func': wl_measures_dispersion.lynes_d3, + 'type': 'parts_based' + }, + + 'rosengrens_s': { + 'col_text': _tr('wl_settings_global', "Rosengren's S"), + 'func': wl_measures_dispersion.rosengrens_s, + 'type': 'parts_based' + }, + + 'zhangs_dc': { + 'col_text': _tr('wl_settings_global', "Zhang's DC"), + 'func': wl_measures_dispersion.zhangs_distributional_consistency, + 'type': 'parts_based' + } }, - 'mi': { - 'col_text': 'MI', - 'func': wl_measures_effect_size.mi + 'measures_adjusted_freq': { + 'none': { + 'col_text': None, + 'func': None, + 'type': '' + }, + + 'fald': { + 'col_text': 'f-ALD', + 'func': wl_measures_adjusted_freq.fald, + 'type': 'dist_based' + }, + + 'farf': { + 'col_text': 'f-ARF', + 'func': wl_measures_adjusted_freq.farf, + 'type': 'dist_based' + }, + + 'fawt': { + 'col_text': 'f-AWT', + 'func': wl_measures_adjusted_freq.fawt, + 'type': 'dist_based' + }, + + 'carrolls_um': { + 'col_text': _tr('wl_settings_global', "Carroll's Uₘ"), + 'func': wl_measures_adjusted_freq.carrolls_um, + 'type': 'parts_based' + }, + + 'engwalls_fm': { + 'col_text': _tr('wl_settings_global', "Engwall's FM"), + 'func': wl_measures_adjusted_freq.engwalls_fm, + 'type': 'parts_based' + }, + + 'juillands_u': { + 'col_text': _tr('wl_settings_global', "Juilland's U"), + 'func': wl_measures_adjusted_freq.juillands_u, + 'type': 'parts_based' + }, + + 'kromers_ur': { + 'col_text': _tr('wl_settings_global', "Kromer's UR"), + 'func': wl_measures_adjusted_freq.kromers_ur, + 'type': 'parts_based' + }, + + 'rosengrens_kf': { + 'col_text': _tr('wl_settings_global', "Rosengren's KF"), + 'func': wl_measures_adjusted_freq.rosengrens_kf, + 'type': 'parts_based' + } }, - 'or': { - 'col_text': 'OR', - 'func': wl_measures_effect_size.odds_ratio + 'tests_statistical_significance': { + 'none': { + 'col_text': None, + 'func': None, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'fishers_exact_test': { + # There is no test statistic for Fisher's exact test + 'col_text': None, + 'func': wl_measures_statistical_significance.fishers_exact_test, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'log_likelihood_ratio_test': { + 'col_text': _tr('wl_settings_global', 'Log-likelihood Ratio'), + 'func': wl_measures_statistical_significance.log_likelihood_ratio_test, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'mann_whitney_u_test': { + 'col_text': 'U1', + 'func': wl_measures_statistical_significance.mann_whitney_u_test, + 'to_sections': True, + 'collocation_extractor': False, + 'keyword_extractor': True + }, + + 'pearsons_chi_squared_test': { + 'col_text': 'χ2', + 'func': wl_measures_statistical_significance.pearsons_chi_squared_test, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'students_t_test_1_sample': { + 'col_text': _tr('wl_settings_global', 't-statistic'), + 'func': wl_measures_statistical_significance.students_t_test_1_sample, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'students_t_test_2_sample': { + 'col_text': _tr('wl_settings_global', 't-statistic'), + 'func': wl_measures_statistical_significance.students_t_test_2_sample, + 'to_sections': True, + 'collocation_extractor': False, + 'keyword_extractor': True + }, + + 'z_score': { + 'col_text': _tr('wl_settings_global', 'z-score'), + 'func': wl_measures_statistical_significance.z_score, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'z_score_berry_rogghe': { + 'col_text': _tr('wl_settings_global', 'z-score'), + 'func': wl_measures_statistical_significance.z_score_berry_rogghe, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': False + } }, - 'pmi': { - 'col_text': 'PMI', - 'func': wl_measures_effect_size.pmi + 'measures_bayes_factor': { + 'none': { + 'func': None, + 'to_sections': None, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'log_likelihood_ratio_test': { + 'func': wl_measures_bayes_factor.bayes_factor_log_likelihood_ratio_test, + 'to_sections': False, + 'collocation_extractor': True, + 'keyword_extractor': True + }, + + 'students_t_test_2_sample': { + 'func': wl_measures_bayes_factor.bayes_factor_students_t_test_2_sample, + 'to_sections': True, + 'collocation_extractor': False, + 'keyword_extractor': True + }, }, - 'poisson_collocation_measure': { - 'col_text': _tr('wl_settings_global', 'Poisson Collocation Measure'), - 'func': wl_measures_effect_size.poisson_collocation_measure + 'measures_effect_size': { + 'none': { + 'col_text': None, + 'func': None + }, + + 'pct_diff': { + 'col_text': '%DIFF', + 'func': wl_measures_effect_size.pct_diff + }, + + 'im3': { + 'col_text': 'IM³', + 'func': wl_measures_effect_size.im3 + }, + + 'dices_coeff': { + 'col_text': _tr('wl_settings_global', "Dice's Coefficient"), + 'func': wl_measures_effect_size.dices_coeff + }, + + 'diff_coeff': { + 'col_text': _tr('wl_settings_global', 'Difference Coefficient'), + 'func': wl_measures_effect_size.diff_coeff + }, + + 'jaccard_index': { + 'col_text': _tr('wl_settings_global', 'Jaccard Index'), + 'func': wl_measures_effect_size.jaccard_index + }, + + 'lfmd': { + 'col_text': 'LFMD', + 'func': wl_measures_effect_size.lfmd + }, + + 'kilgarriffs_ratio': { + 'col_text': _tr('wl_settings_global', "Kilgarriff's Ratio"), + 'func': wl_measures_effect_size.kilgarriffs_ratio + }, + + 'log_dice': { + 'col_text': 'logDice', + 'func': wl_measures_effect_size.log_dice + }, + + 'log_ratio': { + 'col_text': _tr('wl_settings_global', 'Log Ratio'), + 'func': wl_measures_effect_size.log_ratio + }, + + 'mi_log_f': { + 'col_text': 'MI.log-f', + 'func': wl_measures_effect_size.mi_log_f + }, + + 'min_sensitivity': { + 'col_text': _tr('wl_settings_global', 'Minimum Sensitivity'), + 'func': wl_measures_effect_size.min_sensitivity + }, + + 'md': { + 'col_text': 'MD', + 'func': wl_measures_effect_size.md + }, + + 'me': { + 'col_text': 'ME', + 'func': wl_measures_effect_size.me + }, + + 'mi': { + 'col_text': 'MI', + 'func': wl_measures_effect_size.mi + }, + + 'or': { + 'col_text': 'OR', + 'func': wl_measures_effect_size.odds_ratio + }, + + 'pmi': { + 'col_text': 'PMI', + 'func': wl_measures_effect_size.pmi + }, + + 'poisson_collocation_measure': { + 'col_text': _tr('wl_settings_global', 'Poisson Collocation Measure'), + 'func': wl_measures_effect_size.poisson_collocation_measure + }, + + 'squared_phi_coeff': { + 'col_text': 'φ2', + 'func': wl_measures_effect_size.squared_phi_coeff + } }, - 'squared_phi_coeff': { - 'col_text': 'φ2', - 'func': wl_measures_effect_size.squared_phi_coeff + 'styles': { + 'style_dialog': ''' + + ''' } - }, - - 'styles': { - 'style_dialog': ''' - - ''' } -} -# Custom stop word lists (preserve order of language names) -stop_word_lists = SETTINGS_GLOBAL['stop_word_lists'].copy() + # Custom stop word lists (preserve order of language names) + stop_word_lists = SETTINGS_GLOBAL['stop_word_lists'].copy() + + SETTINGS_GLOBAL['stop_word_lists'].clear() -SETTINGS_GLOBAL['stop_word_lists'].clear() + for lang in SETTINGS_GLOBAL['langs'].values(): + lang_code = lang[0] -for lang in SETTINGS_GLOBAL['langs'].values(): - lang_code = lang[0] + SETTINGS_GLOBAL['stop_word_lists'][lang_code] = stop_word_lists.get(lang_code, []) + ['custom'] - SETTINGS_GLOBAL['stop_word_lists'][lang_code] = stop_word_lists.get(lang_code, []) + ['custom'] + return SETTINGS_GLOBAL diff --git a/wordless/wl_settings/wl_settings_measures.py b/wordless/wl_settings/wl_settings_measures.py index a25f80859..5a0773446 100644 --- a/wordless/wl_settings/wl_settings_measures.py +++ b/wordless/wl_settings/wl_settings_measures.py @@ -38,8 +38,8 @@ def __init__(self, main): self.combo_box_rd_variant = wl_boxes.Wl_Combo_Box(self) self.combo_box_rd_variant.addItems([ - self.tr('Policy one'), - self.tr('Policy two') + self.tr('Policy One'), + self.tr('Policy Two') ]) self.group_box_rd.setLayout(wl_layouts.Wl_Layout()) @@ -362,7 +362,7 @@ def __init__(self, main): self.settings_custom = self.main.settings_custom['measures']['lexical_density_diversity'] # HD-D - self.group_box_hdd = QGroupBox(self.tr('HD-D'), self) + self.group_box_hdd = QGroupBox('HD-D', self) self.label_sample_size = QLabel(self.tr('Sample size:'), self) self.spin_box_sample_size = wl_boxes.Wl_Spin_Box(self) diff --git a/wordless/wl_settings/wl_settings_pos_tagging.py b/wordless/wl_settings/wl_settings_pos_tagging.py index 03f8a00ff..9bb4b11e9 100644 --- a/wordless/wl_settings/wl_settings_pos_tagging.py +++ b/wordless/wl_settings/wl_settings_pos_tagging.py @@ -354,8 +354,8 @@ def __init__(self, main): self.table_mappings.setItemDelegateForColumn(2, wl_item_delegates.Wl_Item_Delegate_Combo_Box( parent = self.table_mappings, items = [ - 'Content words', - 'Function words' + self.tr('Content words'), + self.tr('Function words') ] )) self.table_mappings.setItemDelegateForColumn(3, wl_item_delegates.Wl_Item_Delegate(self.table_mappings, QPlainTextEdit)) @@ -504,6 +504,7 @@ def reset_mappings(self): title = self.tr('Reset Mappings'), text = self.tr('''
Do you want to reset all mappings to their default settings?
+
Note: This will only affect the mapping settings in the currently shown table.
''') ): @@ -515,6 +516,7 @@ def reset_all_mappings(self): title = self.tr('Reset All Mappings'), text = self.tr('''
Do you want to reset all mappings to their default settings?
+
Warning: This will affect the mapping settings in all tables!
''') ): diff --git a/wordless/wl_utils/wl_threading.py b/wordless/wl_utils/wl_threading.py index ad834a9e5..8df072e19 100644 --- a/wordless/wl_utils/wl_threading.py +++ b/wordless/wl_utils/wl_threading.py @@ -106,4 +106,3 @@ def start_worker(self): self.start() self.quit() - self.wait() diff --git a/wordless/wl_widgets/wl_buttons.py b/wordless/wl_widgets/wl_buttons.py index 329c6c783..163c7d661 100644 --- a/wordless/wl_widgets/wl_buttons.py +++ b/wordless/wl_widgets/wl_buttons.py @@ -82,7 +82,7 @@ def paintEvent(self, event): painter.drawRect(5, 5, 17, 17) def pick_color(self): - color_picked = QColorDialog.getColor(QColor(self.get_color()), self.main, _tr('wl_buttons', 'Pick Color')) + color_picked = QColorDialog.getColor(QColor(self.get_color()), self.main, self.tr('Pick Color')) if color_picked.isValid(): self.set_color(color_picked.name().upper()) @@ -115,7 +115,7 @@ def transparent_changed(): class Wl_Button_Restore_Defaults(Wl_Button): def __init__(self, parent, load_settings): - super().__init__(_tr('wl_buttons', 'Restore defaults'), parent) + super().__init__(_tr('Wl_Button_Restore_Defaults', 'Restore defaults'), parent) self.parent = parent self.load_settings = load_settings diff --git a/wordless/wl_widgets/wl_lists.py b/wordless/wl_widgets/wl_lists.py index 8cfd3f466..d5e31a75e 100644 --- a/wordless/wl_widgets/wl_lists.py +++ b/wordless/wl_widgets/wl_lists.py @@ -41,6 +41,7 @@ _tr = QCoreApplication.translate +# self.tr() does not work in inherited classes class Wl_List_Add_Ins_Del_Clr(QListView): def __init__( self, parent, diff --git a/wordless/wl_widgets/wl_tables.py b/wordless/wl_widgets/wl_tables.py index 6c590ac6c..0d8258b41 100644 --- a/wordless/wl_widgets/wl_tables.py +++ b/wordless/wl_widgets/wl_tables.py @@ -52,6 +52,7 @@ # pylint: disable=unnecessary-lambda +# self.tr() does not work in inherited classes class Wl_Table(QTableView): def __init__( self, parent, @@ -1139,11 +1140,11 @@ def __init__( self.model().itemChanged.connect(self.item_changed) self.selectionModel().selectionChanged.connect(self.selection_changed) - self.button_generate_table = QPushButton(_tr('Wl_Table_Data', 'Generate table'), self) - self.button_generate_fig = QPushButton(_tr('Wl_Table_Data', 'Generate figure'), self) - self.button_exp_selected_cells = QPushButton(_tr('Wl_Table_Data', 'Export selected cells...'), self) - self.button_exp_all_cells = QPushButton(_tr('Wl_Table_Data', 'Export all cells...'), self) - self.button_clr_table = QPushButton(_tr('Wl_Table_Data', 'Clear table'), self) + self.button_generate_table = QPushButton(_tr('wl_tables', 'Generate table'), self) + self.button_generate_fig = QPushButton(_tr('wl_tables', 'Generate figure'), self) + self.button_exp_selected_cells = QPushButton(_tr('wl_tables', 'Export selected cells...'), self) + self.button_exp_all_cells = QPushButton(_tr('wl_tables', 'Export all cells...'), self) + self.button_clr_table = QPushButton(_tr('wl_tables', 'Clear table'), self) if not generate_fig: self.button_generate_fig.hide() @@ -1183,7 +1184,7 @@ def selection_changed(self): def sorting_changed(self): if not self.is_empty(): - if self.tr('Rank') in self.get_header_labels_hor(): + if _tr('wl_tables', 'Rank') in self.get_header_labels_hor(): self.update_ranks() if self.table_settings['show_cum_data']: @@ -1462,7 +1463,7 @@ def update_ranks(self): sort_section = self.horizontalHeader().sortIndicatorSection() sort_order = self.horizontalHeader().sortIndicatorOrder() - col_rank = self.find_header_hor(self.tr('Rank')) + col_rank = self.find_header_hor(_tr('wl_tables', 'Rank')) self.sortByColumn(sort_section, sort_order) @@ -1729,7 +1730,7 @@ def filter_table(self): self.enable_updates() - if self.tr('Rank') in self.get_header_labels_hor(): + if _tr('wl_tables', 'Rank') in self.get_header_labels_hor(): self.update_ranks() if self.table_settings['show_cum_data']: @@ -1749,8 +1750,8 @@ def clr_table(self, num_headers = 1, confirm = False): if not self.is_empty() and not self.results_saved: confirmed = wl_msg_boxes.wl_msg_box_question( self.main, - title = self.tr('Clear Table'), - text = self.tr(''' + title = _tr('wl_tables', 'Clear Table'), + text = _tr('wl_tables', '''
The results in the table have yet been exported. Do you really want to clear the table?
diff --git a/wordless/wl_wordlist_generator.py b/wordless/wl_wordlist_generator.py index e2b9ff53f..734adbd5b 100644 --- a/wordless/wl_wordlist_generator.py +++ b/wordless/wl_wordlist_generator.py @@ -474,13 +474,13 @@ def update_gui_table(self, err_msg, tokens_freq_files, tokens_stats_files, syls_ if settings['token_settings']['use_tags']: self.set_item_err( i, 2, - _tr('wl_wordlist_generator', 'N/A'), + self.tr('N/A'), alignment_hor = 'left' ) elif len(syls_tokens[token]) == 1: token_syllabified = list(syls_tokens[token].values())[0] - if token_syllabified == _tr('wl_wordlist_generator', 'No language support'): + if token_syllabified == self.tr('No language support'): self.set_item_err(i, 2, token_syllabified, alignment_hor = 'left') else: self.model().setItem(i, 2, wl_tables.Wl_Table_Item(token_syllabified)) @@ -494,7 +494,7 @@ def update_gui_table(self, err_msg, tokens_freq_files, tokens_stats_files, syls_ tokens_syllabified = ', '.join(token_syllabified_forms) - if _tr('wl_wordlist_generator', 'No language support') in tokens_syllabified: + if self.tr('No language support') in tokens_syllabified: self.set_item_err(i, 2, tokens_syllabified, alignment_hor = 'left') else: self.model().setItem(i, 2, wl_tables.Wl_Table_Item(tokens_syllabified)) @@ -587,6 +587,7 @@ def update_gui_fig(self, err_msg, tokens_freq_files, tokens_stats_files, syls_to finally: wl_checks_work_area.check_err_fig(self.main, err_msg) +# self.tr() does not work in inherited classes class Wl_Worker_Wordlist_Generator(wl_threading.Wl_Worker): worker_done = pyqtSignal(str, dict, dict, dict) @@ -625,7 +626,7 @@ def run(self): if token.syls: self.syls_tokens[token][text.lang] = '-'.join(token.syls) else: - self.syls_tokens[token][text.lang] = _tr('wl_wordlist_generator', 'No language support') + self.syls_tokens[token][text.lang] = _tr('Wl_Worker_Wordlist_Generator', 'No language support') texts.append(text)