From 61828038774cbc3aee4362e8608dffb76b4b622d Mon Sep 17 00:00:00 2001
From: BLKSerene <blkserene@gmail.com>
Date: Sat, 18 May 2024 02:15:42 +0800
Subject: [PATCH] Dependencies: Upgrade PyThaiNLP to 5.0.3; Utils: Add
 PyThaiNLP's Han-solo

---
 ACKS.md                                       |  2 +-
 CHANGELOG.md                                  |  2 ++
 doc/trs/zho_cn/ACKS.md                        |  2 +-
 doc/trs/zho_tw/ACKS.md                        |  2 +-
 requirements/requirements_dev.txt             |  1 -
 requirements/requirements_tests.txt           |  3 +--
 tests/tests_nlp/test_pos_tagging.py           | 10 +++++-----
 tests/tests_nlp/test_sentence_tokenization.py |  2 +-
 tests/tests_nlp/test_syl_tokenization.py      |  6 +++++-
 tests/tests_nlp/test_word_detokenization.py   |  2 +-
 tests/tests_nlp/test_word_tokenization.py     |  8 ++++----
 utils/wl_generate_acks.py                     |  2 +-
 wordless/wl_nlp/wl_syl_tokenization.py        |  6 ++++--
 wordless/wl_settings/wl_settings_default.py   |  2 +-
 wordless/wl_settings/wl_settings_global.py    |  6 ++++--
 15 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/ACKS.md b/ACKS.md
index 72f76b419..0dad62f3d 100644
--- a/ACKS.md
+++ b/ACKS.md
@@ -39,7 +39,7 @@ As Wordless stands on the shoulders of giants, I hereby extend my sincere gratit
 15|[pypdf](https://github.com/py-pdf/pypdf)|3.16.2|Mathieu Fenniak, Ashish Kulkarni, Steve Witham, Martin Thoma|[BSD-3-Clause](https://github.com/py-pdf/pypdf/blob/main/LICENSE)
 16|[Pyphen](https://pyphen.org/)|0.14.0|Guillaume Ayoub|[GPL-2.0-or-later/LGPL-2.1-or-later/MPL-1.1](https://github.com/Kozea/Pyphen/blob/master/LICENSE)
 17|[PyQt](https://riverbankcomputing.com/software/pyqt/)|5.15.10|Riverbank Computing|[Commercial-License/GPL-3.0-only](https://www.riverbankcomputing.com/static/Docs/PyQt5/introduction.html#license)
-18|[PyThaiNLP](https://github.com/PyThaiNLP/pythainlp)|4.0.2|Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)|[Apache-2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE)
+18|[PyThaiNLP](https://github.com/PyThaiNLP/pythainlp)|5.0.3|Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)|[Apache-2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE)
 19|[python-docx](https://github.com/python-openxml/python-docx)|1.1.0|Steve Canny|[MIT](https://github.com/python-openxml/python-docx/blob/master/LICENSE)
 20|[python-mecab-ko](https://github.com/jonghwanhyeon/python-mecab-ko)|1.3.3|Jonghwan Hyeon|[BSD-3-Clause](https://github.com/jonghwanhyeon/python-mecab-ko/blob/main/LICENSE)
 21|[Requests](https://github.com/psf/requests)|2.31.0|Kenneth Reitz|[Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c919f02d..3bf59a229 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@
 ### 🎉 New Features
 - Settings: Add Settings - Stop Word Lists - Stop Word List Settings - Case-sensitive
 - Settings: Add Settings - Tables - Dependency Parser
+- Utils: Add PyThaiNLP's Han-solo
 - Utils: Add Stanza's Sindhi part-of-speech tagger
 - Utils: Add VADER's sentiment analyzers
 - Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic
@@ -50,6 +51,7 @@
 - Dependencies: Upgrade Lingua to 2.0.2
 - Dependencies: Upgrade pymorphy3 to 2.0.1
 - Dependencies: Upgrade PyQt to 5.15.10
+- Dependencies: Upgrade PyThaiNLP to 5.0.3
 - Dependencies: Upgrade python-docx to 1.1.0
 - Dependencies: Upgrade Sacremoses to 0.1.1
 - Dependencies: Upgrade spaCy to 3.7.2
diff --git a/doc/trs/zho_cn/ACKS.md b/doc/trs/zho_cn/ACKS.md
index eb50301a9..171f791ea 100644
--- a/doc/trs/zho_cn/ACKS.md
+++ b/doc/trs/zho_cn/ACKS.md
@@ -39,7 +39,7 @@
 15|[pypdf](https://github.com/py-pdf/pypdf)|3.16.2|Mathieu Fenniak, Ashish Kulkarni, Steve Witham, Martin Thoma|[BSD-3-Clause](https://github.com/py-pdf/pypdf/blob/main/LICENSE)
 16|[Pyphen](https://pyphen.org/)|0.14.0|Guillaume Ayoub|[GPL-2.0-or-later/LGPL-2.1-or-later/MPL-1.1](https://github.com/Kozea/Pyphen/blob/master/LICENSE)
 17|[PyQt](https://riverbankcomputing.com/software/pyqt/)|5.15.10|Riverbank Computing|[Commercial-License/GPL-3.0-only](https://www.riverbankcomputing.com/static/Docs/PyQt5/introduction.html#license)
-18|[PyThaiNLP](https://github.com/PyThaiNLP/pythainlp)|4.0.2|Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)|[Apache-2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE)
+18|[PyThaiNLP](https://github.com/PyThaiNLP/pythainlp)|5.0.3|Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)|[Apache-2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE)
 19|[python-docx](https://github.com/python-openxml/python-docx)|1.1.0|Steve Canny|[MIT](https://github.com/python-openxml/python-docx/blob/master/LICENSE)
 20|[python-mecab-ko](https://github.com/jonghwanhyeon/python-mecab-ko)|1.3.3|Jonghwan Hyeon|[BSD-3-Clause](https://github.com/jonghwanhyeon/python-mecab-ko/blob/main/LICENSE)
 21|[Requests](https://github.com/psf/requests)|2.31.0|Kenneth Reitz|[Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
diff --git a/doc/trs/zho_tw/ACKS.md b/doc/trs/zho_tw/ACKS.md
index 8bff584f4..5e25ec2ec 100644
--- a/doc/trs/zho_tw/ACKS.md
+++ b/doc/trs/zho_tw/ACKS.md
@@ -39,7 +39,7 @@
 15|[pypdf](https://github.com/py-pdf/pypdf)|3.16.2|Mathieu Fenniak, Ashish Kulkarni, Steve Witham, Martin Thoma|[BSD-3-Clause](https://github.com/py-pdf/pypdf/blob/main/LICENSE)
 16|[Pyphen](https://pyphen.org/)|0.14.0|Guillaume Ayoub|[GPL-2.0-or-later/LGPL-2.1-or-later/MPL-1.1](https://github.com/Kozea/Pyphen/blob/master/LICENSE)
 17|[PyQt](https://riverbankcomputing.com/software/pyqt/)|5.15.10|Riverbank Computing|[Commercial-License/GPL-3.0-only](https://www.riverbankcomputing.com/static/Docs/PyQt5/introduction.html#license)
-18|[PyThaiNLP](https://github.com/PyThaiNLP/pythainlp)|4.0.2|Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)|[Apache-2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE)
+18|[PyThaiNLP](https://github.com/PyThaiNLP/pythainlp)|5.0.3|Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)|[Apache-2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE)
 19|[python-docx](https://github.com/python-openxml/python-docx)|1.1.0|Steve Canny|[MIT](https://github.com/python-openxml/python-docx/blob/master/LICENSE)
 20|[python-mecab-ko](https://github.com/jonghwanhyeon/python-mecab-ko)|1.3.3|Jonghwan Hyeon|[BSD-3-Clause](https://github.com/jonghwanhyeon/python-mecab-ko/blob/main/LICENSE)
 21|[Requests](https://github.com/psf/requests)|2.31.0|Kenneth Reitz|[Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
diff --git a/requirements/requirements_dev.txt b/requirements/requirements_dev.txt
index e90afd8ed..86b04beeb 100644
--- a/requirements/requirements_dev.txt
+++ b/requirements/requirements_dev.txt
@@ -60,7 +60,6 @@ pyqt5
 python-docx
 requests
 scipy
-tzdata; sys_platform == 'win32' # Required by PyThaiNLP on Windows
 wordcloud
 
 # For PyTorch on Linux using CPU
diff --git a/requirements/requirements_tests.txt b/requirements/requirements_tests.txt
index 9257ff8b1..d0b4e9028 100644
--- a/requirements/requirements_tests.txt
+++ b/requirements/requirements_tests.txt
@@ -25,7 +25,7 @@ laonlp == 1.1.3
 lingua-language-detector == 2.0.2
 nltk == 3.8.1
 pyphen == 0.14.0
-pythainlp == 4.0.2
+pythainlp == 5.0.3
 sacremoses == 0.1.1
 simplemma == 0.9.1
 stanza == 1.7.0
@@ -64,7 +64,6 @@ pytest
 python-docx
 requests
 scipy
-tzdata; sys_platform == 'win32' # Required by PyThaiNLP on Windows
 wordcloud
 
 # For PyTorch on Linux using CPU
diff --git a/tests/tests_nlp/test_pos_tagging.py b/tests/tests_nlp/test_pos_tagging.py
index 477092ff7..92b59d5da 100644
--- a/tests/tests_nlp/test_pos_tagging.py
+++ b/tests/tests_nlp/test_pos_tagging.py
@@ -89,13 +89,13 @@ def test_pos_tag(lang, pos_tagger):
         case 'tha':
             match pos_tagger:
                 case 'pythainlp_perceptron_blackboard':
-                    results = [('ภาษาไทย', 'NN'), ('หรือ', 'CC'), ('ภาษาไทย', 'NN'), ('กลาง', 'NN'), ('เป็น', 'VV'), ('ภาษา', 'NN'), ('ใน', 'PS'), ('กลุ่ม', 'NN'), ('ภาษา', 'NN'), ('ไท', 'NN'), ('ซึ่ง', 'CC'), ('เป็น', 'VV'), ('กลุ่มย่อย', 'NN'), ('ของ', 'PS'), ('ตระกูล', 'NN'), ('ภาษา', 'NN'), ('ข', 'NN'), ('ร้า', 'NN'), ('-', 'PU'), ('ไท', 'NN'), ('และ', 'CC'), ('เป็น', 'VV'), ('ภาษาราชการ', 'NN'), ('และ', 'CC'), ('ภาษาประจำชาติ', 'NN'), ('ของ', 'PS'), ('ประเทศ', 'NN'), ('ไทย', 'NN'), ('[', 'NN'), ('3', 'NU'), ('][', 'CL'), ('4', 'NU'), (']', 'CL')]
-                    results_universal = [('ภาษาไทย', 'NOUN'), ('หรือ', 'CCONJ'), ('ภาษาไทย', 'NOUN'), ('กลาง', 'NOUN'), ('เป็น', 'VERB'), ('ภาษา', 'NOUN'), ('ใน', 'ADP'), ('กลุ่ม', 'NOUN'), ('ภาษา', 'NOUN'), ('ไท', 'NOUN'), ('ซึ่ง', 'CCONJ'), ('เป็น', 'VERB'), ('กลุ่มย่อย', 'NOUN'), ('ของ', 'ADP'), ('ตระกูล', 'NOUN'), ('ภาษา', 'NOUN'), ('ข', 'NOUN'), ('ร้า', 'NOUN'), ('-', 'PUNCT'), ('ไท', 'NOUN'), ('และ', 'CCONJ'), ('เป็น', 'VERB'), ('ภาษาราชการ', 'NOUN'), ('และ', 'CCONJ'), ('ภาษาประจำชาติ', 'NOUN'), ('ของ', 'ADP'), ('ประเทศ', 'NOUN'), ('ไทย', 'NOUN'), ('[', 'NOUN'), ('3', 'NUM'), ('][', 'NOUN'), ('4', 'NUM'), (']', 'NOUN')]
+                    results = [('ภาษาไทย', 'NN'), ('หรือ', 'CC'), ('ภาษาไทย', 'NN'), ('กลาง', 'NN'), ('เป็น', 'VV'), ('ภาษา', 'NN'), ('ใน', 'PS'), ('กลุ่ม', 'NN'), ('ภาษา', 'NN'), ('ไท', 'NN'), ('ซึ่ง', 'CC'), ('เป็น', 'VV'), ('กลุ่มย่อย', 'NN'), ('ของ', 'PS'), ('ตระกูล', 'NN'), ('ภาษา', 'NN'), ('ข', 'NN'), ('ร้า', 'NN'), ('-', 'PU'), ('ไท', 'NN'), ('และ', 'CC'), ('เป็น', 'VV'), ('ภาษาราชการ', 'NN'), ('และ', 'CC'), ('ภาษาประจำชาติ', 'NN'), ('ของ', 'PS'), ('ประเทศ', 'NN'), ('ไทย', 'NN'), ('[3][4]', 'NN')]
+                    results_universal = [('ภาษาไทย', 'NOUN'), ('หรือ', 'CCONJ'), ('ภาษาไทย', 'NOUN'), ('กลาง', 'NOUN'), ('เป็น', 'VERB'), ('ภาษา', 'NOUN'), ('ใน', 'ADP'), ('กลุ่ม', 'NOUN'), ('ภาษา', 'NOUN'), ('ไท', 'NOUN'), ('ซึ่ง', 'CCONJ'), ('เป็น', 'VERB'), ('กลุ่มย่อย', 'NOUN'), ('ของ', 'ADP'), ('ตระกูล', 'NOUN'), ('ภาษา', 'NOUN'), ('ข', 'NOUN'), ('ร้า', 'NOUN'), ('-', 'PUNCT'), ('ไท', 'NOUN'), ('และ', 'CCONJ'), ('เป็น', 'VERB'), ('ภาษาราชการ', 'NOUN'), ('และ', 'CCONJ'), ('ภาษาประจำชาติ', 'NOUN'), ('ของ', 'ADP'), ('ประเทศ', 'NOUN'), ('ไทย', 'NOUN'), ('[3][4]', 'NOUN')]
                 case 'pythainlp_perceptron_orchid':
-                    results = [('ภาษาไทย', 'NPRP'), ('หรือ', 'JCRG'), ('ภาษาไทย', 'NPRP'), ('กลาง', 'VATT'), ('เป็น', 'VSTA'), ('ภาษา', 'NCMN'), ('ใน', 'RPRE'), ('กลุ่ม', 'NCMN'), ('ภาษา', 'NCMN'), ('ไท', 'NCMN'), ('ซึ่ง', 'PREL'), ('เป็น', 'VSTA'), ('กลุ่มย่อย', 'NCMN'), ('ของ', 'RPRE'), ('ตระกูล', 'NCMN'), ('ภาษา', 'NCMN'), ('ข', 'NCMN'), ('ร้า', 'NCMN'), ('-', 'PUNC'), ('ไท', 'NCMN'), ('และ', 'JCRG'), ('เป็น', 'VSTA'), ('ภาษาราชการ', 'NCMN'), ('และ', 'JCRG'), ('ภาษาประจำชาติ', 'NCMN'), ('ของ', 'RPRE'), ('ประเทศ', 'NCMN'), ('ไทย', 'NPRP'), ('[', 'NCMN'), ('3', 'NCNM'), ('][', 'PUNC'), ('4', 'NCNM'), (']', 'CMTR')]
-                    results_universal = [('ภาษาไทย', 'PROPN'), ('หรือ', 'CCONJ'), ('ภาษาไทย', 'PROPN'), ('กลาง', 'ADJ'), ('เป็น', 'VERB'), ('ภาษา', 'NOUN'), ('ใน', 'ADP'), ('กลุ่ม', 'NOUN'), ('ภาษา', 'NOUN'), ('ไท', 'NOUN'), ('ซึ่ง', 'SCONJ'), ('เป็น', 'VERB'), ('กลุ่มย่อย', 'NOUN'), ('ของ', 'ADP'), ('ตระกูล', 'NOUN'), ('ภาษา', 'NOUN'), ('ข', 'NOUN'), ('ร้า', 'NOUN'), ('-', 'PUNCT'), ('ไท', 'NOUN'), ('และ', 'CCONJ'), ('เป็น', 'VERB'), ('ภาษาราชการ', 'NOUN'), ('และ', 'CCONJ'), ('ภาษาประจำชาติ', 'NOUN'), ('ของ', 'ADP'), ('ประเทศ', 'NOUN'), ('ไทย', 'PROPN'), ('[', 'NOUN'), ('3', 'NOUN/NUM'), ('][', 'PUNCT'), ('4', 'NOUN/NUM'), (']', 'NOUN')]
+                    results = [('ภาษาไทย', 'NPRP'), ('หรือ', 'JCRG'), ('ภาษาไทย', 'NPRP'), ('กลาง', 'VATT'), ('เป็น', 'VSTA'), ('ภาษา', 'NCMN'), ('ใน', 'RPRE'), ('กลุ่ม', 'NCMN'), ('ภาษา', 'NCMN'), ('ไท', 'NCMN'), ('ซึ่ง', 'PREL'), ('เป็น', 'VSTA'), ('กลุ่มย่อย', 'NCMN'), ('ของ', 'RPRE'), ('ตระกูล', 'NCMN'), ('ภาษา', 'NCMN'), ('ข', 'NCMN'), ('ร้า', 'NCMN'), ('-', 'PUNC'), ('ไท', 'NCMN'), ('และ', 'JCRG'), ('เป็น', 'VSTA'), ('ภาษาราชการ', 'NCMN'), ('และ', 'JCRG'), ('ภาษาประจำชาติ', 'NCMN'), ('ของ', 'RPRE'), ('ประเทศ', 'NCMN'), ('ไทย', 'NPRP'), ('[3][4]', 'NCMN')]
+                    results_universal = [('ภาษาไทย', 'PROPN'), ('หรือ', 'CCONJ'), ('ภาษาไทย', 'PROPN'), ('กลาง', 'ADJ'), ('เป็น', 'VERB'), ('ภาษา', 'NOUN'), ('ใน', 'ADP'), ('กลุ่ม', 'NOUN'), ('ภาษา', 'NOUN'), ('ไท', 'NOUN'), ('ซึ่ง', 'SCONJ'), ('เป็น', 'VERB'), ('กลุ่มย่อย', 'NOUN'), ('ของ', 'ADP'), ('ตระกูล', 'NOUN'), ('ภาษา', 'NOUN'), ('ข', 'NOUN'), ('ร้า', 'NOUN'), ('-', 'PUNCT'), ('ไท', 'NOUN'), ('และ', 'CCONJ'), ('เป็น', 'VERB'), ('ภาษาราชการ', 'NOUN'), ('และ', 'CCONJ'), ('ภาษาประจำชาติ', 'NOUN'), ('ของ', 'ADP'), ('ประเทศ', 'NOUN'), ('ไทย', 'PROPN'), ('[3][4]', 'NOUN')]
                 case 'pythainlp_perceptron_pud':
-                    results = results_universal = [('ภาษาไทย', 'NOUN'), ('หรือ', 'CCONJ'), ('ภาษาไทย', 'NOUN'), ('กลาง', 'NOUN'), ('เป็น', 'AUX'), ('ภาษา', 'NOUN'), ('ใน', 'ADP'), ('กลุ่ม', 'NOUN'), ('ภาษา', 'NOUN'), ('ไท', 'PROPN'), ('ซึ่ง', 'DET'), ('เป็น', 'AUX'), ('กลุ่มย่อย', 'NOUN'), ('ของ', 'ADP'), ('ตระกูล', 'NOUN'), ('ภาษา', 'NOUN'), ('ข', 'NOUN'), ('ร้า', 'NOUN'), ('-', 'PUNCT'), ('ไท', 'PROPN'), ('และ', 'CCONJ'), ('เป็น', 'AUX'), ('ภาษาราชการ', 'NOUN'), ('และ', 'CCONJ'), ('ภาษาประจำชาติ', 'NOUN'), ('ของ', 'ADP'), ('ประเทศ', 'NOUN'), ('ไทย', 'PROPN'), ('[', 'NOUN'), ('3', 'NUM'), ('][', 'NOUN'), ('4', 'NUM'), (']', 'NOUN')]
+                    results = results_universal = [('ภาษาไทย', 'NOUN'), ('หรือ', 'CCONJ'), ('ภาษาไทย', 'NOUN'), ('กลาง', 'NOUN'), ('เป็น', 'AUX'), ('ภาษา', 'NOUN'), ('ใน', 'ADP'), ('กลุ่ม', 'NOUN'), ('ภาษา', 'NOUN'), ('ไท', 'PROPN'), ('ซึ่ง', 'DET'), ('เป็น', 'AUX'), ('กลุ่มย่อย', 'NOUN'), ('ของ', 'ADP'), ('ตระกูล', 'NOUN'), ('ภาษา', 'NOUN'), ('ข', 'NOUN'), ('ร้า', 'NOUN'), ('-', 'PUNCT'), ('ไท', 'PROPN'), ('และ', 'CCONJ'), ('เป็น', 'AUX'), ('ภาษาราชการ', 'NOUN'), ('และ', 'CCONJ'), ('ภาษาประจำชาติ', 'NOUN'), ('ของ', 'ADP'), ('ประเทศ', 'NOUN'), ('ไทย', 'PROPN'), ('[3][4]', 'NOUN')]
                 case _:
                     tests_lang_util_skipped = True
         case 'bod':
diff --git a/tests/tests_nlp/test_sentence_tokenization.py b/tests/tests_nlp/test_sentence_tokenization.py
index 88bfaa208..1e7f96acf 100644
--- a/tests/tests_nlp/test_sentence_tokenization.py
+++ b/tests/tests_nlp/test_sentence_tokenization.py
@@ -114,7 +114,7 @@ def test_sentence_tokenize(lang, sentence_tokenizer):
         case 'tha':
             match sentence_tokenizer:
                 case 'pythainlp_crfcut':
-                    assert sentences == ['ภาษาไทย หรือ ภาษาไทยกลาง เป็นภาษาในกลุ่มภาษาไท ซึ่งเป็นกลุ่มย่อยของตระกูลภาษาขร้า-ไท และเป็นภาษาราชการ และภาษาประจำชาติของประเทศไทย[3][4] มีการสันนิษฐานว่าภาษาในตระกูลนี้มีถิ่นกำเนิดจากทางตอนใต้ของประเทศจีน และนักภาษาศาสตร์บางส่วนเสนอว่า ภาษาไทยน่าจะมีความเชื่อมโยงกับตระกูลภาษาออสโตร-เอเชียติก', 'ตระกูลภาษาออสโตรนีเซียน และตระกูลภาษาจีน-ทิเบต']
+                    assert sentences == ['ภาษาไทย หรือ ภาษาไทยกลาง เป็นภาษาในกลุ่มภาษาไท ซึ่งเป็นกลุ่มย่อยของตระกูลภาษาขร้า-ไท และเป็นภาษาราชการ และภาษาประจำชาติของประเทศไทย[3][4]', 'มีการสันนิษฐานว่าภาษาในตระกูลนี้มีถิ่นกำเนิดจากทางตอนใต้ของประเทศจีน และนักภาษาศาสตร์บางส่วนเสนอว่า ภาษาไทยน่าจะมีความเชื่อมโยงกับตระกูลภาษาออสโตร-เอเชียติก', 'ตระกูลภาษาออสโตรนีเซียน และตระกูลภาษาจีน-ทิเบต']
                 case 'pythainlp_thaisumcut':
                     assert sentences == ['ภาษาไทย', 'หรือ ภาษาไทยกลาง เป็นภาษาในกลุ่มภาษาไท', 'ซึ่งเป็นกลุ่มย่อยของตระกูลภาษาขร้า-ไท และเป็นภาษาราชการ', 'และภาษาประจำชาติของประเทศไทย[3][4] มีการสันนิษฐานว่าภาษาในตระกูลนี้มีถิ่นกำเนิดจากทางตอนใต้ของประเทศจีน', 'และนักภาษาศาสตร์บางส่วนเสนอว่า ภาษาไทยน่าจะมีความเชื่อมโยงกับตระกูลภาษาออสโตร-เอเชียติก ตระกูลภาษาออสโตรนีเซียน และตระกูลภาษาจีน-ทิเบต']
                 case _:
diff --git a/tests/tests_nlp/test_syl_tokenization.py b/tests/tests_nlp/test_syl_tokenization.py
index 534933dee..078d29f4f 100644
--- a/tests/tests_nlp/test_syl_tokenization.py
+++ b/tests/tests_nlp/test_syl_tokenization.py
@@ -190,7 +190,11 @@ def test_syl_tokenize(lang, syl_tokenizer):
         case 'tel':
             assert syls_tokens == [('తె', 'లు', 'గు'), ('అనే', 'ది'), ('ద్రా', 'విడ'), ('భా', 'షల'), ('కు', 'టుం', 'బా', 'ని', 'కి'), ('చెం', 'దిన'), ('భాష',), ('.',)]
         case 'tha':
-            assert syls_tokens == [('ภา', 'ษา', 'ไทย'), ('หรือ',), ('ภา', 'ษา', 'ไทย'), ('กลาง',), ('เป็น',), ('ภา', 'ษา'), ('ใน',), ('กลุ่ม',), ('ภา', 'ษา'), ('ไท',), ('ซึ่ง',), ('เป็น',), ('กลุ่ม', 'ย่อย'), ('ของ',), ('ตระ', 'กูล'), ('ภา', 'ษา'), ('ข',), ('ร้า',), ('-',), ('ไท',), ('และ',), ('เป็น',), ('ภา', 'ษา', 'ราช', 'การ'), ('และ',), ('ภา', 'ษา', 'ประ', 'จำ', 'ชาติ'), ('ของ',), ('ประ', 'เทศ'), ('ไทย',), ('[',), ('3',), ('][',), ('4',), (']',)]
+            match syl_tokenizer:
+                case 'pythainlp_han_solo':
+                    assert syls_tokens == [('ภา', 'ษา', 'ไทย'), ('หรือ',), ('ภา', 'ษา', 'ไทย'), ('กลาง',), ('เป็น',), ('ภา', 'ษา'), ('ใน',), ('กลุ่ม',), ('ภา', 'ษา'), ('ไท',), ('ซึ่ง',), ('เป็น',), ('กลุ่ม', 'ย่อย'), ('ของ',), ('ตระ', 'กูล'), ('ภา', 'ษา'), ('ข',), ('ร้า',), ('-',), ('ไท',), ('และ',), ('เป็น',), ('ภา', 'ษา', 'ราช', 'การ'), ('และ',), ('ภา', 'ษา', 'ประ', 'จำ', 'ชาติ'), ('ของ',), ('ประ', 'เทศ'), ('ไทย',), ('[3', '][4', ']')]
+                case 'pythainlp_syl_dict':
+                    assert syls_tokens == [('ภา', 'ษา', 'ไทย'), ('หรือ',), ('ภา', 'ษา', 'ไทย'), ('กลาง',), ('เป็น',), ('ภา', 'ษา'), ('ใน',), ('กลุ่ม',), ('ภา', 'ษา'), ('ไท',), ('ซึ่ง',), ('เป็น',), ('กลุ่ม', 'ย่อย'), ('ของ',), ('ตระ', 'กูล'), ('ภา', 'ษา'), ('ข',), ('ร้า',), ('-',), ('ไท',), ('และ',), ('เป็น',), ('ภา', 'ษา', 'ราช', 'การ'), ('และ',), ('ภา', 'ษา', 'ประ', 'จำ', 'ชาติ'), ('ของ',), ('ประ', 'เทศ'), ('ไทย',), ('[3][4]',)]
         case 'ukr':
             assert syls_tokens == [('Укра', 'ї', '́', 'н', 'сь', 'ка'), ('мо', '́', 'ва'), ('(',), ('МФА',), (':',), ('[',), ('ukrɑ̽ˈjɪnʲsʲkɑ̽',), ('ˈmɔwɑ̽',), (']',), (',',), ('іс', 'то', 'ри', 'ч', 'ні'), ('на', 'зви'), ('—',), ('ру', '́', 'сь', 'ка'), ('[',), ('10',), (']',), ('[',), ('11',), (']',), ('[',), ('12',), (']',), ('[',), ('*',), ('1',), (']',), (')',), ('—',), ('на', 'ціо', 'на', 'ль', 'на'), ('мо', 'ва'), ('укра', 'ї', 'н', 'ців'), ('.',)]
         case 'zul':
diff --git a/tests/tests_nlp/test_word_detokenization.py b/tests/tests_nlp/test_word_detokenization.py
index 7d032b45f..97e1211b3 100644
--- a/tests/tests_nlp/test_word_detokenization.py
+++ b/tests/tests_nlp/test_word_detokenization.py
@@ -70,7 +70,7 @@ def test_word_detokenize(lang):
         case 'jpn':
             assert text == '''The meaning of "天気がいいから、散歩しましょう。"is: The weather is good so let 's take a walk.'''
         case 'tha':
-            assert text == 'ภาษาไทยหรือภาษาไทยกลางเป็นภาษาในกลุ่มภาษาไทซึ่งเป็นกลุ่มย่อยของตระกูลภาษาขร้า - ไทและเป็นภาษาราชการและภาษาประจำชาติของประเทศไทย [ 3 ][ 4 ]'
+            assert text == 'ภาษาไทยหรือภาษาไทยกลางเป็นภาษาในกลุ่มภาษาไทซึ่งเป็นกลุ่มย่อยของตระกูลภาษาขร้า - ไทและเป็นภาษาราชการและภาษาประจำชาติของประเทศไทย [3][4]'
         case 'bod':
             assert text == 'Test this Tibetan string: དུང་དང་འོ་མར་འགྲན་པའི་ལྷག་བསམ་མཐུ། །དམན་ཡང་དཀར་པོའི་བྱས་འབྲས་ཅུང་ཟད་ཅིག །བློ་དང་འདུན་པ་བཟང་བའི་རང་རིགས་ཀུན། །རྒྱལ་ཁའི་འཕྲིན་བཟང་ལས་དོན་འགྲུབ་ཕྱིར་འབད།།'
         case _:
diff --git a/tests/tests_nlp/test_word_tokenization.py b/tests/tests_nlp/test_word_tokenization.py
index 3a088e31b..b14556c92 100644
--- a/tests/tests_nlp/test_word_tokenization.py
+++ b/tests/tests_nlp/test_word_tokenization.py
@@ -301,12 +301,12 @@ def test_word_tokenize(lang, word_tokenizer):
             assert tokens == ['Tetun', '(', 'iha', 'portugés', ':', 'tétum', ';', 'iha', 'inglés', ':', 'Tetum', ')', 'ne', "'", 'e', 'lian', 'nasionál', 'no', 'ko-ofisiál', 'Timór', 'Lorosa', "'", 'e', 'nian', '.']
         case 'tha':
             match word_tokenizer:
-                case 'pythainlp_longest_matching' | 'pythainlp_max_matching_tcc':
+                case 'pythainlp_longest_matching':
                     assert tokens == ['ภาษาไทย', 'หรือ', 'ภาษาไทย', 'กลาง', 'เป็น', 'ภาษา', 'ใน', 'กลุ่ม', 'ภาษา', 'ไท', 'ซึ่ง', 'เป็น', 'กลุ่มย่อย', 'ของ', 'ตระกูล', 'ภาษา', 'ข', 'ร้า', '-', 'ไท', 'และ', 'เป็น', 'ภาษาราชการ', 'และ', 'ภาษาประจำชาติ', 'ของ', 'ประเทศ', 'ไทย', '[', '3', '][', '4', ']']
-                case 'pythainlp_max_matching':
+                case 'pythainlp_max_matching_tcc':
+                    assert tokens == ['ภาษาไทย', 'หรือ', 'ภาษาไทย', 'กลาง', 'เป็น', 'ภาษา', 'ใน', 'กลุ่ม', 'ภาษา', 'ไท', 'ซึ่ง', 'เป็น', 'กลุ่มย่อย', 'ของ', 'ตระกูล', 'ภาษา', 'ข', 'ร้า', '-', 'ไท', 'และ', 'เป็น', 'ภาษาราชการ', 'และ', 'ภาษาประจำชาติ', 'ของ', 'ประเทศ', 'ไทย', '[3][4]']
+                case 'pythainlp_max_matching' | 'pythainlp_nercut':
                     assert tokens == ['ภาษาไทย', 'หรือ', 'ภาษาไทยกลาง', 'เป็น', 'ภาษา', 'ใน', 'กลุ่ม', 'ภาษา', 'ไท', 'ซึ่ง', 'เป็น', 'กลุ่มย่อย', 'ของ', 'ตระกูล', 'ภาษา', 'ข', 'ร้า', '-', 'ไท', 'และ', 'เป็น', 'ภาษาราชการ', 'และ', 'ภาษาประจำชาติ', 'ของ', 'ประเทศ', 'ไทย', '[', '3', '][', '4', ']']
-                case 'pythainlp_nercut':
-                    assert tokens == ['ภาษาไทย', 'หรือ', 'ภาษาไทย', 'กลาง', 'เป็น', 'ภาษา', 'ใน', 'กลุ่ม', 'ภาษา', 'ไท', 'ซึ่ง', 'เป็น', 'กลุ่มย่อย', 'ของ', 'ตระกูล', 'ภาษา', 'ข', 'ร้า', '-', 'ไท', 'และ', 'เป็น', 'ภาษาราชการ', 'และ', 'ภาษาประจำชาติ', 'ของ', 'ประเทศ', 'ไทย', '[', '3', '][', '4', ']']
                 case _:
                     tests_lang_util_skipped = True
         case 'bod':
diff --git a/utils/wl_generate_acks.py b/utils/wl_generate_acks.py
index 58b59beb0..719ac3dcd 100644
--- a/utils/wl_generate_acks.py
+++ b/utils/wl_generate_acks.py
@@ -53,7 +53,7 @@
     ['pypdf', 'https://github.com/py-pdf/pypdf', '3.16.2', 'Mathieu Fenniak, Ashish Kulkarni, Steve Witham, Martin Thoma', 'BSD-3-Clause', 'https://github.com/py-pdf/pypdf/blob/main/LICENSE'],
     ['Pyphen', 'https://pyphen.org/', '0.14.0', 'Guillaume Ayoub', 'GPL-2.0-or-later/LGPL-2.1-or-later/MPL-1.1', 'https://github.com/Kozea/Pyphen/blob/master/LICENSE'],
     ['PyQt', 'https://riverbankcomputing.com/software/pyqt/', '5.15.10', 'Riverbank Computing', 'Commercial-License/GPL-3.0-only', 'https://www.riverbankcomputing.com/static/Docs/PyQt5/introduction.html#license'],
-    ['PyThaiNLP', 'https://github.com/PyThaiNLP/pythainlp', '4.0.2', 'Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)', 'Apache-2.0', 'https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE'],
+    ['PyThaiNLP', 'https://github.com/PyThaiNLP/pythainlp', '5.0.3', 'Wannaphong Phatthiyaphaibun (วรรณพงษ์ ภัททิยไพบูลย์)', 'Apache-2.0', 'https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE'],
     ['python-docx', 'https://github.com/python-openxml/python-docx', '1.1.0', 'Steve Canny', 'MIT', 'https://github.com/python-openxml/python-docx/blob/master/LICENSE'],
     ['python-mecab-ko', 'https://github.com/jonghwanhyeon/python-mecab-ko', '1.3.3', 'Jonghwan Hyeon', 'BSD-3-Clause', 'https://github.com/jonghwanhyeon/python-mecab-ko/blob/main/LICENSE'],
     ['Requests', 'https://github.com/psf/requests', '2.31.0', 'Kenneth Reitz', 'Apache-2.0', 'https://github.com/psf/requests/blob/main/LICENSE'],
diff --git a/wordless/wl_nlp/wl_syl_tokenization.py b/wordless/wl_nlp/wl_syl_tokenization.py
index 83b97178e..bdc55485d 100644
--- a/wordless/wl_nlp/wl_syl_tokenization.py
+++ b/wordless/wl_nlp/wl_syl_tokenization.py
@@ -105,7 +105,9 @@ def wl_syl_tokenize_tokens(main, tokens, lang, syl_tokenizer):
             else:
                 syls_tokens.append([token])
         # Thai
-        elif syl_tokenizer == 'pythainlp_tha':
-            syls_tokens.append(pythainlp.subword_tokenize(token, engine = 'dict'))
+        elif syl_tokenizer == 'pythainlp_han_solo':
+            syls_tokens.append(pythainlp.tokenize.syllable_tokenize(token, engine = 'han_solo'))
+        elif syl_tokenizer == 'pythainlp_syl_dict':
+            syls_tokens.append(pythainlp.tokenize.syllable_tokenize(token, engine = 'dict'))
 
     return syls_tokens
diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py
index e1b5b7b72..7d53f20f0 100644
--- a/wordless/wl_settings/wl_settings_default.py
+++ b/wordless/wl_settings/wl_settings_default.py
@@ -1473,7 +1473,7 @@ def init_settings_default(main):
                 'spa': 'pyphen_spa',
                 'swe': 'pyphen_swe',
                 'tel': 'pyphen_tel',
-                'tha': 'pythainlp_tha',
+                'tha': 'pythainlp_han_solo',
                 'ukr': 'pyphen_ukr',
                 'zul': 'pyphen_zul'
             },
diff --git a/wordless/wl_settings/wl_settings_global.py b/wordless/wl_settings/wl_settings_global.py
index b818da39d..76be085df 100644
--- a/wordless/wl_settings/wl_settings_global.py
+++ b/wordless/wl_settings/wl_settings_global.py
@@ -765,7 +765,8 @@
             _tr('wl_settings_global', 'Pyphen - Ukrainian syllable tokenizer'): 'pyphen_ukr',
             _tr('wl_settings_global', 'Pyphen - Zulu syllable tokenizer'): 'pyphen_zul',
 
-            _tr('wl_settings_global', 'PyThaiNLP - Thai syllable tokenizer'): 'pythainlp_tha'
+            _tr('wl_settings_global', 'PyThaiNLP - Han-solo'): 'pythainlp_han_solo',
+            _tr('wl_settings_global', 'PyThaiNLP - Syllable dictionary'): 'pythainlp_syl_dict'
         },
 
         'pos_taggers': {
@@ -2519,7 +2520,8 @@
 
         'tha': [
             'pyphen_tha',
-            'pythainlp_tha'
+            'pythainlp_han_solo',
+            'pythainlp_syl_dict'
         ],
 
         'ukr': ['pyphen_ukr'],