diff --git a/.buildinfo b/.buildinfo index f950d78..3e5876a 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: cfc94b7090bd3e12fae79feb6660b68e +config: b554adda744f738965857dbf0b20b874 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/_sources/notebooks/thai_semantic_representation.ipynb b/_sources/notebooks/thai_semantic_representation.ipynb index dcf203f..ad4b097 100644 --- a/_sources/notebooks/thai_semantic_representation.ipynb +++ b/_sources/notebooks/thai_semantic_representation.ipynb @@ -129,7 +129,9 @@ "source": [ "### Usage\n", "\n", - "We use ACE for delphin." + "We use ACE for pydelphin.\n", + "\n", + "Docs: https://pydelphin.readthedocs.io/en/latest/guides/ace.html" ], "metadata": { "id": "aXjtb9ftnsF1" @@ -220,4 +222,4 @@ ] } ] -} +} \ No newline at end of file diff --git a/_static/documentation_options.js b/_static/documentation_options.js index c9bec9a..2c08c68 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: 'thai2plot-27-g0483fa3', + VERSION: 'thai2plot-28-g4a9b64f', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/genindex.html b/genindex.html index 631b1d3..f620774 100644 --- a/genindex.html +++ b/genindex.html @@ -3,7 +3,7 @@ - Index — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Index — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/index.html b/index.html index 36d00e7..4fe8b96 100644 --- a/index.html +++ b/index.html @@ -4,7 +4,7 @@ - Welcome to PyThaiNLP Tutorials — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Welcome to PyThaiNLP Tutorials — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/Han-Coref.html b/notebooks/Han-Coref.html index 295867c..3fe7341 100644 --- a/notebooks/Han-Coref.html +++ b/notebooks/Han-Coref.html @@ -4,7 +4,7 @@ - 🪿 Han-Coref: Thai Coreference resolution by PyThaiNLP — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + 🪿 Han-Coref: Thai Coreference resolution by PyThaiNLP — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/Thai_Dependency_Parser.html b/notebooks/Thai_Dependency_Parser.html index f88927b..ae82177 100644 --- a/notebooks/Thai_Dependency_Parser.html +++ b/notebooks/Thai_Dependency_Parser.html @@ -4,7 +4,7 @@ - Thai Dependency Parser — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Thai Dependency Parser — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/find_all_thai_rhyming_words.html b/notebooks/find_all_thai_rhyming_words.html index b1b9a31..5539f40 100644 --- a/notebooks/find_all_thai_rhyming_words.html +++ b/notebooks/find_all_thai_rhyming_words.html @@ -4,7 +4,7 @@ - Find all Thai rhyming words from Thai word — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Find all Thai rhyming words from Thai word — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/machine_translation.html b/notebooks/machine_translation.html index 50e4f7f..d3c2db4 100644 --- a/notebooks/machine_translation.html +++ b/notebooks/machine_translation.html @@ -4,7 +4,7 @@ - PyThaiNLP Translate — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + PyThaiNLP Translate — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/nlpo3ipynb.html b/notebooks/nlpo3ipynb.html index 770c657..101e44f 100644 --- a/notebooks/nlpo3ipynb.html +++ b/notebooks/nlpo3ipynb.html @@ -4,7 +4,7 @@ - nlpO3 — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + nlpO3 — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/pythainlp_chunk.html b/notebooks/pythainlp_chunk.html index 9454212..fdbea9a 100644 --- a/notebooks/pythainlp_chunk.html +++ b/notebooks/pythainlp_chunk.html @@ -4,7 +4,7 @@ - Thai Chunk Parser — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Thai Chunk Parser — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/pythainlp_get_started.html b/notebooks/pythainlp_get_started.html index bd78690..3258ca0 100644 --- a/notebooks/pythainlp_get_started.html +++ b/notebooks/pythainlp_get_started.html @@ -4,7 +4,7 @@ - PyThaiNLP Get Started — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + PyThaiNLP Get Started — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/pythainlp_wangchanberta.html b/notebooks/pythainlp_wangchanberta.html index 3a534a2..86944ed 100644 --- a/notebooks/pythainlp_wangchanberta.html +++ b/notebooks/pythainlp_wangchanberta.html @@ -4,7 +4,7 @@ - Wangchanberta — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Wangchanberta — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/sentiment_analysis.html b/notebooks/sentiment_analysis.html index be648d3..55ad4c8 100644 --- a/notebooks/sentiment_analysis.html +++ b/notebooks/sentiment_analysis.html @@ -4,7 +4,7 @@ - Wisesight Sentiment Analysis — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Wisesight Sentiment Analysis — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/spaCy_PyThaiNLP_demo.html b/notebooks/spaCy_PyThaiNLP_demo.html index 4f1c8fa..a6fb231 100644 --- a/notebooks/spaCy_PyThaiNLP_demo.html +++ b/notebooks/spaCy_PyThaiNLP_demo.html @@ -4,7 +4,7 @@ - spaCy-PyThaiNLP — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + spaCy-PyThaiNLP — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/text_classification.html b/notebooks/text_classification.html index e21f8a7..38b80ea 100644 --- a/notebooks/text_classification.html +++ b/notebooks/text_classification.html @@ -4,7 +4,7 @@ - Wongnai Review Classification — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Wongnai Review Classification — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/text_generation.html b/notebooks/text_generation.html index 471bc0d..1379a97 100644 --- a/notebooks/text_generation.html +++ b/notebooks/text_generation.html @@ -4,7 +4,7 @@ - Thai Wiki Language Model for Text Generation — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Thai Wiki Language Model for Text Generation — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/thai_semantic_representation.html b/notebooks/thai_semantic_representation.html index 3fc05f7..8890e6b 100644 --- a/notebooks/thai_semantic_representation.html +++ b/notebooks/thai_semantic_representation.html @@ -4,7 +4,7 @@ - Thai Semantic Representation — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Thai Semantic Representation — pythainlp-tutorials thai2plot-28-g4a9b64f documentation @@ -182,7 +182,8 @@

Install

Usage

-

We use ACE for delphin.

+

We use ACE for pydelphin.

+

Docs: https://pydelphin.readthedocs.io/en/latest/guides/ace.html

[2]:
 
diff --git a/notebooks/thai_semantic_representation.ipynb b/notebooks/thai_semantic_representation.ipynb index 2a89a9e..367f579 100644 --- a/notebooks/thai_semantic_representation.ipynb +++ b/notebooks/thai_semantic_representation.ipynb @@ -115,7 +115,9 @@ "source": [ "### Usage\n", "\n", - "We use ACE for delphin." + "We use ACE for pydelphin.\n", + "\n", + "Docs: https://pydelphin.readthedocs.io/en/latest/guides/ace.html" ] }, { diff --git a/notebooks/thai_wav2vec2_onnx.html b/notebooks/thai_wav2vec2_onnx.html index 6c13833..f387060 100644 --- a/notebooks/thai_wav2vec2_onnx.html +++ b/notebooks/thai_wav2vec2_onnx.html @@ -4,7 +4,7 @@ - Thai Wav2vec2 model to ONNX model — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Thai Wav2vec2 model to ONNX model — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/wangchanberta_getting_started_aireseach.html b/notebooks/wangchanberta_getting_started_aireseach.html index f1d50b5..67accce 100644 --- a/notebooks/wangchanberta_getting_started_aireseach.html +++ b/notebooks/wangchanberta_getting_started_aireseach.html @@ -4,7 +4,7 @@ - WangchanBERTa: Getting Started Notebook — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + WangchanBERTa: Getting Started Notebook — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/notebooks/word2vec_examples.html b/notebooks/word2vec_examples.html index 66ff5f9..329b7dd 100644 --- a/notebooks/word2vec_examples.html +++ b/notebooks/word2vec_examples.html @@ -4,7 +4,7 @@ - Thai2Vec Embeddings Examples — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Thai2Vec Embeddings Examples — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/search.html b/search.html index 3f71cd3..55bd0a3 100644 --- a/search.html +++ b/search.html @@ -3,7 +3,7 @@ - Search — pythainlp-tutorials thai2plot-27-g0483fa3 documentation + Search — pythainlp-tutorials thai2plot-28-g4a9b64f documentation diff --git a/searchindex.js b/searchindex.js index 41b8e11..be709a2 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["index", "notebooks/Han-Coref", "notebooks/Thai_Dependency_Parser", "notebooks/find_all_thai_rhyming_words", "notebooks/machine_translation", "notebooks/nlpo3ipynb", "notebooks/pythainlp_chunk", "notebooks/pythainlp_get_started", "notebooks/pythainlp_wangchanberta", "notebooks/sentiment_analysis", "notebooks/spaCy_PyThaiNLP_demo", "notebooks/text_classification", "notebooks/text_generation", "notebooks/thai_semantic_representation", "notebooks/thai_wav2vec2_onnx", "notebooks/wangchanberta_getting_started_aireseach", "notebooks/word2vec_examples"], "filenames": ["index.rst", "notebooks/Han-Coref.ipynb", "notebooks/Thai_Dependency_Parser.ipynb", "notebooks/find_all_thai_rhyming_words.ipynb", "notebooks/machine_translation.ipynb", "notebooks/nlpo3ipynb.ipynb", "notebooks/pythainlp_chunk.ipynb", "notebooks/pythainlp_get_started.ipynb", "notebooks/pythainlp_wangchanberta.ipynb", "notebooks/sentiment_analysis.ipynb", "notebooks/spaCy_PyThaiNLP_demo.ipynb", "notebooks/text_classification.ipynb", "notebooks/text_generation.ipynb", "notebooks/thai_semantic_representation.ipynb", "notebooks/thai_wav2vec2_onnx.ipynb", "notebooks/wangchanberta_getting_started_aireseach.ipynb", "notebooks/word2vec_examples.ipynb"], "titles": ["Welcome to PyThaiNLP Tutorials", "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP", "Thai Dependency Parser", "Find all Thai rhyming words from Thai word", "PyThaiNLP Translate", "nlpO3", "Thai Chunk Parser", "PyThaiNLP Get Started", "Wangchanberta", "Wisesight Sentiment Analysis", "spaCy-PyThaiNLP", "Wongnai Review Classification", "Thai Wiki Language Model for Text Generation", "Thai Semantic Representation", "Thai Wav2vec2 model to ONNX model", "WangchanBERTa: Getting Started Notebook", "Thai2Vec Embeddings Examples"], "terms": {"i": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "python": [0, 2, 3, 4, 5, 6, 8, 10, 14, 15], "librari": [0, 5, 6], "thai": [0, 5, 8, 9, 10, 11, 15, 16], "natur": [0, 5], "languag": [0, 5, 7, 8, 15], "process": [0, 5, 7, 11, 14, 16], "han": 0, "coref": 0, "corefer": 0, "resolut": [0, 12], "depend": [0, 9, 10, 15], "parser": 0, "find": [0, 7, 9], "all": [0, 4, 7, 11, 12, 13, 15, 16], "rhyme": 0, "word": [0, 5, 6, 9, 10, 11, 12, 15], "from": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "translat": 0, "nlpo3": 0, "chunk": 0, "get": [0, 2, 9, 11, 12, 13, 16], "start": [0, 1, 2, 12], "wangchanberta": 0, "wisesight": [0, 11], "sentiment": [0, 11], "analysi": [0, 13, 15], "spaci": [0, 1, 2], "wongnai": 0, "review": 0, "classif": [0, 7, 9, 16], "wiki": [0, 11, 13, 15], "model": [0, 4, 6, 7, 8, 10, 16], "text": [0, 1, 4, 5, 6, 7, 11, 14, 16], "gener": [0, 6, 7, 15, 16], "semant": 0, "represent": [0, 7], "wav2vec2": 0, "onnx": 0, "notebook": [0, 8, 9, 12, 13, 14], "thai2vec": 0, "embed": [0, 9, 11, 12], "exampl": [0, 6, 7, 9, 15], "apach": 0, "softwar": 0, "licens": [0, 13], "2": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "maintain": 0, "team": 0, "see": [0, 7, 11, 12, 15, 16], "sourc": [0, 2, 10], "code": [0, 7, 11, 14, 16], "http": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "github": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "com": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "main": [0, 7, 12, 14, 15], "develop": [0, 12, 15], "websit": 0, "org": [0, 2, 4, 8, 10, 13, 14, 15], "interact": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "onlin": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pip": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "q": [1, 9, 12, 13], "instal": [1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 16], "fastcoref": 1, "transform": [1, 8, 9, 10, 12, 14, 15], "sentencepiec": [1, 4, 8, 15], "prepar": [1, 5, 10, 13, 15], "metadata": [1, 2, 8, 10, 13, 14, 15], "setup": [1, 2, 4, 7, 8, 10, 13, 15], "py": [1, 2, 4, 7, 8, 9, 10, 13, 14, 15, 16], "done": [1, 2, 4, 7, 8, 10, 12, 13, 15], "13": [1, 2, 4, 7, 8, 9, 10, 11, 13, 14, 15, 16], "4": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "mb": [1, 3, 5, 6, 10, 13, 14, 15, 16], "114": [1, 15, 16], "": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "eta": [1, 3, 6, 10, 11, 13, 14, 15], "00": [1, 3, 4, 6, 7, 9, 10, 13, 14, 15], "7": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "119": 1, "474": 1, "6": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "kb": [1, 3, 5, 6, 10, 13, 14, 15, 16], "53": [1, 7, 11, 13, 14], "110": [1, 5, 15, 16], "5": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "14": [1, 4, 5, 7, 8, 9, 10, 11, 14, 15, 16], "9": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "212": 1, "25": [1, 2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "134": 1, "3": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "17": [1, 2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "90": 1, "224": 1, "29": [1, 4, 7, 8, 9, 11, 15, 16], "8": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "95": [1, 4, 8], "268": 1, "32": [1, 7, 9, 10, 11, 15], "149": 1, "19": [1, 2, 3, 4, 7, 8, 9, 10, 14, 16], "build": [1, 2, 4, 8, 10, 13, 15], "wheel": [1, 2, 4, 8, 10, 13, 15], "import": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15], "spacy_compon": 1, "nlp": [1, 2, 10], "blank": [1, 10], "th": [1, 4, 9, 10, 11, 12, 14, 16], "add_pip": [1, 10], "config": [1, 9, 10, 11, 12, 13, 14], "model_architectur": 1, "fcoref": 1, "model_path": [1, 9, 11, 16], "v1": 1, "lt": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15], "fastcorefresolv": 1, "0x7fbd9c2b6560": 1, "gt": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "random": [1, 9, 15], "def": [1, 6, 7, 14, 15, 16], "get2tag": 1, "tag": [1, 6, 8, 10, 12], "titl": [1, 15], "none": [1, 2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "dic_ent": 1, "ent": [1, 10], "_tag": 1, "str": [1, 7], "list": [1, 3, 5, 6, 7, 10, 14, 16], "rang": [1, 11, 15, 16], "len": [1, 3, 7, 9, 11, 12, 14, 16], "enumer": [1, 6, 13, 16], "e": [1, 7, 10, 13, 15], "append": [1, 3, 11, 14, 16], "end": [1, 7, 12, 15], "label": [1, 9, 11, 15, 16], "color": 1, "join": [1, 9, 11, 14], "choic": [1, 12], "0123456789abcdef": 1, "j": [1, 7, 14], "thank": 1, "stackoverflow": [1, 14], "50218895": 1, "return": [1, 6, 7, 14, 15, 16], "displaci": [1, 10], "\u0e2a\u0e32\u0e18": 1, "\u0e15": [1, 3, 5, 7, 8, 9, 11, 15, 16], "\u0e41\u0e08\u0e07\u0e27": 1, "\u0e19": [1, 7, 9, 10, 11, 12, 13, 15, 16], "\u0e20\u0e32\u0e1e\u0e41\u0e04\u0e1b\u0e01\u0e25": 1, "\u0e21\u0e44\u0e25\u0e19": 1, "\u0e17": [1, 3, 7, 8, 9, 10, 11, 12, 15, 16], "\u0e1b\u0e23": [1, 7, 11, 12], "\u0e01\u0e29\u0e32\u0e2f": 1, "\u0e01\u0e25": [1, 3, 9, 11], "\u0e32\u0e27\u0e23": 1, "\u0e32\u0e22": [1, 7, 9, 11], "\u0e1e": [1, 3, 7, 9, 11, 12, 15, 16], "\u0e18\u0e32": 1, "\u0e22": [1, 7, 8, 9, 11, 12, 15, 16], "\u0e44\u0e21": [1, 7, 8, 9, 11, 15, 16], "\u0e43\u0e0a": [1, 7, 9], "\u0e27\u0e40\u0e2d\u0e07": [1, 9], "\u0e41\u0e15": [1, 7, 8, 9, 11, 15], "\u0e40\u0e2b": [1, 9, 11], "\u0e19\u0e14": [1, 7, 9, 16], "\u0e27\u0e22\u0e27": 1, "\u0e32\u0e2d\u0e20": 1, "\u0e1b\u0e23\u0e32\u0e22\u0e14": 1, "\u0e2d\u0e22\u0e04": 1, "\u0e32\u0e1a\u0e33\u0e19\u0e32\u0e0d": 1, "\u0e02\u0e23\u0e01": 1, "doc": [1, 2, 10, 11, 13, 14], "_": [1, 7, 9, 11, 15], "coref_clust": 1, "render": [1, 7, 10], "manual": [1, 13], "true": [1, 7, 8, 9, 10, 11, 12, 14, 15, 16], "style": [1, 10], "option": [1, 10, 12, 13], "jupyt": [1, 10], "\u0e41\u0e21": [1, 9, 11, 15], "\u0e2a": [1, 7, 9, 11, 12, 13, 15, 16], "\u0e07\u0e43\u0e2b": 1, "\u0e25": [1, 3, 7, 11, 15, 16], "\u0e01\u0e0a\u0e32\u0e22\u0e44\u0e1b\u0e0b": 1, "\u0e2d\u0e02\u0e2d\u0e07": [1, 12], "\u0e40\u0e18\u0e2d\u0e01\u0e25": 1, "\u0e1a\u0e25": 1, "\u0e21\u0e40\u0e2d\u0e32\u0e15": 1, "\u0e01": [1, 3, 6, 7, 9, 11, 12, 15, 16], "\u0e01\u0e0a\u0e32\u0e22": 1, "\u0e44\u0e1b\u0e0b": 1, "\u0e40\u0e18\u0e2d": [1, 9], "\u0e2b\u0e21\u0e2d\u0e41\u0e0a\u0e21\u0e1b": 1, "\u0e40\u0e1b": [1, 7, 8, 9, 10, 11, 12, 13, 15, 16], "\u0e14\u0e43\u0e08\u0e17": 1, "\u0e07\u0e19": [1, 7, 11], "\u0e33\u0e15\u0e32": 1, "\u0e40\u0e2a": [1, 7, 11], "\u0e22\u0e43\u0e08\u0e17": 1, "\u0e01\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "\u0e23": [1, 3, 7, 9, 10, 11, 12, 13, 15, 16], "\u0e01\u0e20": 1, "\u0e21": [1, 7, 8, 9, 11, 12, 15, 16], "\u0e43\u0e08\u0e17": 1, "\u0e01\u0e40\u0e2a": 1, "\u0e22\u0e2a\u0e25\u0e30": 1, "\u0e43\u0e2b": [1, 7, 9, 11], "\u0e2d\u0e0a": 1, "\u0e0a": [1, 3, 7, 8, 9, 11, 12, 16], "\u0e1e\u0e0a": 1, "\u0e27\u0e22\u0e40\u0e1e": 1, "\u0e2d\u0e19\u0e17\u0e2b\u0e32\u0e23\u0e23\u0e2d\u0e14": 1, "\u0e27\u0e40\u0e2d\u0e07\u0e40\u0e2a": 1, "\u0e22\u0e0a": [1, 7], "\u0e27": [1, 7, 9, 11, 12, 15, 16], "\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "pythainlp": [2, 3, 6, 8, 9, 11, 12, 13, 14, 15, 16], "doe": [2, 12], "come": [2, 5, 12], "instead": [2, 10, 14, 15], "you": [2, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16], "can": [2, 7, 10, 12, 13, 15, 16], "us": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "which": [2, 5, 6, 7, 9, 12, 14, 16], "wa": [2, 12, 15, 16], "train": [2, 6, 10, 11, 12, 14, 15, 16], "univers": 2, "thi": [2, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "tutori": [2, 5, 6, 14, 16], "show": [2, 9, 12, 14, 15], "how": [2, 5, 6, 12, 13, 14, 15, 16], "spacy_thai": [2, 10], "collect": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16], "download": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15, 16], "file": [2, 4, 5, 8, 13, 14, 16], "pythonhost": [2, 4, 8], "packag": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "ca": [2, 10], "2d": [2, 16], "c2e71a4143d6d9cd9db6744e328dfb9f65b98ad7607644d0ad4369bce303": 2, "py3": [2, 4, 8, 10, 14, 15], "ani": [2, 4, 7, 8, 10, 12, 14, 15], "whl": [2, 3, 4, 6, 8, 10, 14, 15], "1mb": [2, 8], "11": [2, 4, 7, 8, 9, 10, 11, 14, 15, 16], "2mb": [2, 4, 8], "ufal": [2, 10], "udpip": [2, 10], "e5": 2, "72": [2, 9, 15], "2b8b9dc7c80017c790bb3308bbad34b57accfed2ac2f1f4ab252ff4e9cb2": 2, "tar": [2, 4, 8, 10, 13, 15], "gz": [2, 4, 8, 10, 13, 15], "304kb": 2, "307kb": 2, "45": [2, 7, 10, 11], "8mb": [2, 8], "requir": [2, 3, 4, 6, 7, 8, 10, 14, 15], "alreadi": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15], "satisfi": [2, 3, 4, 6, 8, 10, 14, 15], "usr": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "local": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "lib": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "python3": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "dist": [2, 3, 4, 6, 8, 9, 10, 14, 15, 16], "deplaci": [2, 10], "58": [2, 5, 7], "87b6286c9578fc456de1363f877228ee0d117b8de238e3e2cd49dbc06eaa": 2, "c1": 2, "09": 2, "1215cb6f6ef0cfc9dbb427a961fda8a47c111955f782f659ca2d38c79adc": 2, "10": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "6mb": [2, 8], "28": [2, 10, 15, 16], "7mb": [2, 4], "srsly": [2, 10], "request": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15, 16], "23": [2, 4, 7, 8, 9, 10, 14, 15, 16], "thinc": [2, 10], "presh": [2, 10], "wasabi": [2, 10], "plac": 2, "cymem": [2, 10], "bli": [2, 10], "tqdm": [2, 4, 6, 8, 9, 10, 11, 12, 14, 15], "38": [2, 7, 8, 10], "41": [2, 4, 7, 8, 9], "murmurhash": [2, 10], "numpi": [2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "15": [2, 3, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "catalogu": [2, 10], "setuptool": [2, 10], "54": [2, 7, 8, 14], "tinydb": [2, 4, 8, 14], "af": [2, 8], "cd": [2, 8, 9], "1ce3d93818cdeda0446b8033d21e5f32daeb3a866bbafd878a9a62058a9c": [2, 8], "crfsuit": [2, 3, 4, 6, 8, 10, 14], "79": [2, 4, 7, 8, 9, 10], "47": [2, 7, 8, 9, 15], "58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429": [2, 8], "python_crfsuit": [2, 3, 6, 8, 10, 14], "cp37": [2, 4, 8, 14], "cp37m": [2, 4, 8, 14], "manylinux1_x86_64": [2, 4, 8, 14], "743kb": [2, 8], "747kb": [2, 8], "68": [2, 7, 14], "5mb": [2, 4], "chardet": [2, 4, 8, 10, 14], "urllib3": [2, 3, 4, 6, 8, 10, 14, 15], "26": [2, 4, 8, 10, 11, 14, 15, 16], "21": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "24": [2, 4, 7, 8, 10, 14, 15, 16], "certifi": [2, 3, 4, 6, 8, 10, 14, 15], "2017": [2, 3, 4, 6, 8, 10, 14, 15], "2020": [2, 4, 8], "12": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "idna": [2, 3, 4, 6, 8, 10, 14, 15], "importlib": [2, 4, 8, 14], "20": [2, 4, 7, 8, 9, 10, 11, 14, 15, 16], "python_vers": [2, 4, 8], "34": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "type": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "extens": [2, 4, 8, 9, 10, 14, 15], "zipp": [2, 4, 8, 14], "creat": [2, 4, 7, 8, 10, 13, 15, 16], "filenam": [2, 4, 8, 10, 14, 15, 16], "linux_x86_64": [2, 10, 14], "size": [2, 4, 8, 10, 12, 15, 16], "5626703": 2, "sha256": [2, 4, 8, 10, 15], "a58565fc21a1f9d3a7c51a3aea138cf612babbefb36ae05cbaccec852b55d967": 2, "store": [2, 4, 8, 10, 14, 15], "directori": [2, 4, 8, 10, 15], "root": [2, 4, 8, 10, 12, 15], "cach": [2, 4, 8, 10, 15], "0c": 2, "9d": 2, "db": 2, "6d3404c33da5b7adb6c6972853efb6a27649d3ba15f7e9bebb": 2, "successfulli": [2, 3, 4, 5, 6, 8, 10, 14, 15], "built": [2, 4, 8, 10, 15], "load": [2, 5, 10, 11, 12, 14, 15, 16], "do": [2, 7, 9, 11, 12, 15, 16], "pars": [2, 6, 10, 13], "call": [2, 5, 6, 7, 14, 15], "sentenc": [2, 5, 6, 10, 15, 16], "\u0e1e\u0e27\u0e01\u0e40\u0e23\u0e32\u0e43\u0e0a": 2, "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": [2, 5], "visual": [2, 6, 9, 12, 16], "tree": [2, 6, 13], "graphviz": 2, "dot": 2, "pre": [3, 5, 7, 16], "0b4": 3, "22": [3, 4, 5, 6, 7, 8, 10, 11, 14, 15, 16], "31": [3, 6, 7, 9, 14, 15, 16], "charset": [3, 6, 15], "normal": [3, 6, 14, 15], "2023": [3, 6, 15], "cp310": [3, 6, 15], "manylinux_2_17_x86_64": [3, 6, 10, 14, 15], "manylinux2014_x86_64": [3, 4, 6, 8, 10, 14, 15], "993": [3, 6, 15], "16": [3, 7, 8, 9, 11, 14, 16], "corpu": [3, 4, 5, 6, 7, 9, 10, 11, 16], "thai_word": [3, 7], "token": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 16], "syllable_token": [3, 7], "all_thai_words_dict": 3, "18": [3, 4, 7, 9, 11, 14, 16], "khave": 3, "khaveeverifi": 3, "kv": 3, "39": [3, 5, 7, 8, 9, 10, 11, 12, 13, 15, 16], "\u0e40\u0e17\u0e2d\u0e0d": 3, "\u0e08": [3, 7, 9, 11, 12, 15, 16], "\u0e1a": [3, 7, 9, 10, 11, 12, 15, 16], "list_sumpu": 3, "try": [3, 5, 7, 10, 12, 15], "is_sumpu": 3, "except": [3, 12], "pass": [3, 7, 14, 16], "print": [3, 4, 7, 9, 11, 12, 16], "\u0e2d": [3, 7, 9, 11, 12, 13, 15, 16], "\u0e1f": [3, 7], "\u0e16": [3, 7, 9, 11], "\u0e2b\u0e25": [3, 6, 12, 15], "\u0e17\u0e27": 3, "\u0e1b": [3, 7, 9, 11, 15, 16], "\u0e07": [3, 7, 9, 11, 12, 15, 16], "\u0e2b": [3, 11], "\u0e04": [3, 7, 8, 9, 11, 12, 15], "\u0e2b\u0e19": [3, 7, 9, 13, 15], "\u0e04\u0e23": [3, 5, 7, 9, 11, 12], "we": [4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16], "machin": 4, "The": [4, 6, 7, 9, 10, 11, 13, 15, 16], "vistec": [4, 15], "depa": 4, "thailand": 4, "artifici": 4, "intellig": [4, 12], "research": [4, 10, 15], "institut": 4, "fairseq": 4, "ab": 4, "92c6efb05ffdfe16fbdc9e463229d9af8c3b74dc943ed4b4857a87b223c2": 4, "dataclass": 4, "2f": 4, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 4, "cython": 4, "hydra": 4, "core": [4, 10], "52": [4, 7], "e3": [4, 10], "fbd70dd0d3ce4d1d75c22d56c0c9f895cfa7ed6587a9ffb821d6812d6a60": 4, "hydra_cor": 4, "123kb": 4, "133kb": 4, "cffi": [4, 14], "sacrebleu": 4, "7e": 4, "57": [4, 7, 9, 10, 11], "0c7ca4e31a126189dab99c19951910bd081dea5bbd25f24b77107750eae7": 4, "54kb": 4, "61kb": 4, "3mb": [4, 8], "torch": [4, 9, 10, 12, 14, 15], "cu101": 4, "regex": [4, 6, 8, 10, 14, 15], "2019": [4, 7, 8, 10, 13, 14, 15], "omegaconf": 4, "d0": 4, "eb": [4, 10], "9d63ce09dd8aa85767c65668d5414958ea29648a0eec80a4a7d311ec2684": 4, "antlr4": 4, "runtim": 4, "56": [4, 7, 9], "02": [4, 9, 11, 12], "789a0bddf9c9b31b14c3e79ec22b9656185a803dc31c15f006f9855ece0d": 4, "112kb": 4, "4mb": [4, 8], "resourc": [4, 14], "pycpars": [4, 14], "portalock": 4, "89": [4, 10], "a6": 4, "3814b7107e0788040870e8825eebf214d72166adf656ba7d4bf14759a06a": 4, "py2": [4, 10], "pyyaml": [4, 10, 14, 15], "7a": 4, "a5": 4, "393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a": 4, "636kb": 4, "645kb": 4, "0mb": [4, 8], "antlr4_python3_runtim": 4, "141231": 4, "7443fbcc47b93d3b320b897cf91d8b947b6fdc6a0795dcce01ed16fd31c8ab6d": 4, "e2": [4, 13, 16], "fa": 4, "b78480b448b8579ddf393bebd3f47ee23aa84c89b6a78285c8": 4, "found": [4, 5, 10, 14, 16], "exist": [4, 10, 14], "uninstal": [4, 10, 14], "sacremos": [4, 8, 14], "43": [4, 7, 8, 9, 10, 11, 13, 15], "f5": [4, 8], "99": [4, 8, 9, 11, 12], "e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577": [4, 8], "click": [4, 6, 8, 10, 14], "joblib": [4, 6, 8, 10, 14], "six": [4, 8, 9, 10, 14], "archiv": [4, 8, 9, 11, 16], "dev": [4, 5, 7, 9, 10, 11, 15, 16], "zip": [4, 8, 9, 11, 16], "upgrad": 4, "dev0": [4, 8], "11003566": 4, "b64ebc4010c51f2644c15473edd0c49540644725a367c28baa0d3f3e19edcccb": 4, "tmp": 4, "ephem": 4, "zkojv2_o": 4, "4e": 4, "1e": [4, 9, 11, 14], "26f3198c6712ecfbee92928ed1dde923a078da3d222401cc78": 4, "download_model_al": 4, "scb_1m_en": 4, "th_mose": 4, "100": [4, 5, 7, 9, 11, 12, 13, 15, 16], "1174648148": 4, "81506882": 4, "14it": 4, "scb_1m_th": 4, "en_spm": 4, "703780432": 4, "08": [4, 7, 10, 11, 13, 14], "78234386": 4, "81it": 4, "enthtransl": 4, "thentransl": 4, "en": [4, 14], "have": [4, 12, 15, 16], "bpe": 4, "want": [4, 10, 12, 15], "fri": 4, "chicken": 4, "\u0e44\u0e01": [4, 7, 9], "\u0e17\u0e2d\u0e14\u0e04": 4, "\u0e30": [4, 9, 11, 16], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e01": 4, "\u0e19\u0e44\u0e01": 4, "\u0e17\u0e2d\u0e14": [4, 9], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e02": 4, "\u0e22\u0e19\u0e42\u0e1b\u0e23\u0e41\u0e01\u0e23\u0e21\u0e04\u0e2d\u0e21\u0e1e": 4, "\u0e27\u0e40\u0e15\u0e2d\u0e23": 4, "write": [4, 11, 12], "comput": [4, 13], "program": 4, "rust": 5, "node": 5, "bind": 5, "similarli": 5, "newmm": [5, 7, 10, 15], "maxim": 5, "match": [5, 7], "base": [5, 6, 7, 8, 9, 10, 11, 15, 16], "honor": [5, 12], "charact": [5, 12], "cluster": [5, 15], "boundari": 5, "howev": [5, 12], "compar": 5, "pure": 5, "implement": [5, 13], "much": [5, 12], "faster": 5, "For": [5, 7, 9, 10, 14, 15, 16], "comparison": 5, "refer": 5, "benchmark": [5, 11], "segment": [5, 6, 10], "lern": 5, "more": [5, 6, 7, 9, 10, 12, 15, 16], "about": [5, 7, 9, 12], "here": [5, 7, 12, 15], "In": [5, 11, 15], "learn": [5, 9, 11, 12], "serv": 5, "first": [5, 6, 11, 13, 15], "without": [5, 7, 12], "specifi": [5, 7, 15], "paramet": [5, 7, 14], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e15": [5, 8], "\u0e14\u0e04\u0e33\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": 5, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a": [5, 8], "\u0e14": [5, 7, 8, 9, 11, 13, 15, 16], "\u0e04\u0e33": 5, "now": [5, 12], "enhanc": 5, "countri": [5, 12, 15], "wget": [5, 9, 11, 13, 14, 16], "command": 5, "It": [5, 7, 8, 9, 10, 11, 15, 16], "plain": 5, "contain": [5, 9, 11], "one": [5, 7, 12, 16], "per": [5, 8, 13], "line": [5, 9, 12], "raw": [5, 9, 11, 14, 15, 16], "countries_th": 5, "txt": [5, 6, 9, 11, 13], "2021": [5, 6, 8, 14, 15], "06": [5, 7, 11], "05": [5, 9, 14], "resolv": [5, 13, 16], "140": [5, 9, 16], "82": [5, 11, 16], "112": [5, 7], "connect": [5, 13, 16], "443": [5, 16], "sent": [5, 7, 10, 13, 16], "await": [5, 13, 16], "respons": [5, 13, 16], "302": [5, 16], "locat": [5, 7, 10, 16], "githubusercont": [5, 16], "follow": [5, 6, 7, 9, 16], "185": [5, 16], "199": [5, 13, 16], "108": [5, 16], "133": [5, 16], "109": 5, "200": [5, 9, 12, 13, 16], "ok": [5, 7, 13, 16], "length": [5, 8, 13, 14, 16], "7622": 5, "4k": 5, "save": [5, 9, 11, 12, 13, 14, 16], "44k": 5, "70": [5, 7, 9, 11, 14], "load_dict": 5, "function": [5, 6, 7, 9, 12, 15], "content": [5, 11], "success": [5, 12], "name": [5, 9, 10, 11, 12, 14, 15], "ha": [5, 8, 12, 15, 16], "been": [5, 12, 15], "final": [5, 6], "method": [5, 15], "\u0e2a\u0e27": [5, 9, 11], "\u0e2a\u0e14": [5, 9, 11], "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 5, "\u0e40\u0e01\u0e32\u0e2b\u0e25": 5, "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 5, "\u0e44\u0e17\u0e22": [5, 11], "demonstr": [6, 13], "chunk_pars": 6, "phrase": 6, "orchid": 6, "read": [6, 7, 10, 11, 12, 14], "pull": [6, 16], "524": 6, "need": [6, 15], "nltk": [6, 10], "preprocess": [6, 9, 11, 15], "data": [6, 10, 11, 12, 14, 15], "svgling": 6, "svgwrite": 6, "66": [6, 13], "modul": [6, 7, 9, 10, 12], "word_token": [6, 7, 9, 11, 16], "take": [6, 9, 12, 16], "pos_tag": [6, 7, 8], "mark": [6, 7], "them": [6, 7, 11, 12], "part": [6, 10, 15], "speech": [6, 10, 14], "po": [6, 9, 10], "insid": [6, 7, 14], "outsid": [6, 7, 12], "begin": [6, 7, 12], "iob": 6, "conlltags2tre": 6, "convert": [6, 7, 14], "format": [6, 16], "svg": 6, "defin": 6, "new": [6, 7, 9, 12, 16], "test": [6, 7, 9, 11, 12], "input": [6, 11, 14, 15], "perform": [6, 7, 9, 11, 12, 15], "combin": 6, "tripl": 6, "p": [6, 7, 9, 14, 16], "m": [6, 7, 9, 13], "w": [6, 7, 11, 16], "t": [6, 7, 8, 9, 10, 12, 13, 15], "engin": [6, 7, 8, 10], "perceptron": [6, 10], "sever": [6, 12], "draw_tre": 6, "syntact": [6, 13], "were": [6, 10, 12, 15], "\u0e41\u0e21\u0e27\u0e01": 6, "\u0e19\u0e1b\u0e25\u0e32": 6, "\u0e04\u0e19\u0e2b\u0e19\u0e2d\u0e07\u0e04\u0e32\u0e22\u0e40\u0e1b": 6, "\u0e19\u0e04\u0e19\u0e19": 6, "\u0e32\u0e23": [6, 7], "\u0e1b\u0e25\u0e32\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e43\u0e19\u0e19": 6, "\u0e33": [6, 9, 16], "\u0e33\u0e21": 6, "\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e17\u0e33\u0e44\u0e21\u0e40\u0e02\u0e32\u0e23": 6, "\u0e01\u0e04": 6, "\u0e13": [6, 7, 9, 11], "\u0e04\u0e19\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e07\u0e15": [6, 7, 12], "\u0e19\u0e44\u0e21": [6, 7, 16], "basic": 7, "uncom": [7, 9, 11, 12, 16], "run": [7, 9, 10, 11, 12, 13, 14, 16], "colab": [7, 9, 10, 11, 12, 14, 15, 16], "extra": 7, "blob": [7, 14], "epitran": 7, "__version__": 7, "provid": [7, 8, 11, 12, 15], "some": [7, 10, 12, 15], "readi": 7, "set": [7, 9, 10, 11, 12, 14, 15, 16], "g": [7, 10, 15], "conson": 7, "vowel": 7, "tonemark": 7, "symbol": 7, "conveni": 7, "There": 7, "ar": [7, 9, 10, 11, 12, 13, 14, 15, 16], "also": [7, 9, 12, 16], "few": [7, 12], "util": [7, 14], "thai_charact": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e24\u0e26\u0e30": 7, "\u0e32\u0e33": [7, 16], "\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45": 7, "\u0e2f": 7, "\u0e46": [7, 9, 11], "\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59": 7, "88": [7, 10], "thai_conson": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e": 7, "44": 7, "\u0e54": 7, "thai_digit": 7, "isthai": 7, "fals": [7, 8, 9, 10, 11, 12, 14], "ignore_char": 7, "counthai": 7, "proport": 7, "ignor": [7, 11], "non": [7, 16], "alphabet": 7, "default": [7, 8, 9, 10, 11, 15], "countthai": 7, "\u0e19\u0e2d\u0e32\u0e17": [7, 11, 15], "\u0e15\u0e22": [7, 11, 15], "\u0e19\u0e32\u0e04\u0e21": 7, "2562": [7, 15], "67": 7, "85714285714286": 7, "sort": [7, 12], "accord": 7, "\u0e2d\u0e19": [7, 9, 11, 15], "\u0e01\u0e23\u0e30\u0e14\u0e32\u0e29": 7, "\u0e01\u0e23\u0e23\u0e44\u0e01\u0e23": 7, "\u0e44\u0e02": [7, 11], "\u0e1c": [7, 9, 11, 12, 16], "\u0e32\u0e44\u0e2b\u0e21": 7, "revers": 7, "dai": [7, 11, 12, 16], "month": 7, "buddhist": 7, "era": 7, "b": [7, 8, 9, 11, 12], "direct": [7, 12], "similar": [7, 12, 15], "datetim": 7, "strftime": 7, "thai_strftim": 7, "fmt": [7, 9, 15], "a\u0e17": 7, "d": [7, 9, 14], "\u0e28": [7, 9, 11, 12, 15], "y": [7, 9, 11, 12, 16], "\u0e40\u0e27\u0e25\u0e32": 7, "h": 7, "1976": 7, "40": [7, 9, 10, 11, 14], "\u0e19\u0e1e": [7, 12, 15], "\u0e18\u0e17": 7, "\u0e25\u0e32\u0e04\u0e21": 7, "2519": 7, "01": [7, 9, 11, 12, 14], "modifi": 7, "appli": [7, 9, 11], "right": [7, 11, 15, 16], "befor": [7, 9, 11, 12, 14], "minu": 7, "pad": [7, 14, 15], "numer": [7, 11], "result": [7, 10, 12, 13, 14, 15], "avail": [7, 15], "underscor": 7, "space": 7, "zero": [7, 14], "upper": 7, "case": [7, 10, 11, 12, 15], "swap": 7, "o": [7, 8, 14, 15], "letter": [7, 16], "altern": 7, "note": [7, 10, 13, 15, 16], "thai_tim": 7, "renam": 7, "time_to_thaiword": 7, "\u0e19\u0e22": 7, "\u0e19\u0e32\u0e2c": 7, "\u0e01\u0e32\u0e2a": 7, "\u0e1a\u0e2a": 7, "\u0e19\u0e32\u0e17": [7, 9], "\u0e1a\u0e40\u0e01": 7, "\u0e32\u0e27": [7, 9], "wai": [7, 15], "chosen": 7, "24h": 7, "6h": 7, "m6h": 7, "yourself": [7, 12], "\u0e40\u0e17": 7, "\u0e22\u0e07\u0e04": 7, "\u0e19\u0e2a": [7, 11], "precis": 7, "well": [7, 15], "minut": [7, 9, 12], "second": [7, 16], "onli": [7, 10, 12], "valu": [7, 12], "30": [7, 9, 15, 16], "\u0e2a\u0e2d\u0e07\u0e42\u0e21\u0e07\u0e40\u0e0a": 7, "\u0e32\u0e2a": 7, "\u0e1a\u0e40\u0e08": 7, "\u0e14\u0e19\u0e32\u0e17": 7, "\u0e41\u0e1b\u0e14\u0e42\u0e21\u0e07\u0e2a": 7, "\u0e2b\u0e01\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "object": [7, 13, 14], "\u0e1a\u0e2a\u0e32\u0e21\u0e19\u0e32\u0e2c": 7, "\u0e1a\u0e2b": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e2a": 7, "At": 7, "sub": 7, "crfcut": [7, 10], "uss": 7, "sent_token": 7, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e1a": 7, "\u0e0d\u0e0d": 7, "\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e01\u0e32\u0e23\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07\u0e41\u0e1c": 7, "\u0e19\u0e2a\u0e22\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e04\u0e23\u0e32\u0e27": 7, "\u0e17\u0e18\u0e28": 7, "\u0e01\u0e23\u0e32\u0e0a": 7, "\u0e52\u0e54\u0e57\u0e55": 7, "\u0e19\u0e23": [7, 11], "\u0e10\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e09\u0e1a": 7, "\u0e1a\u0e0a": 7, "\u0e0b": [7, 9, 11, 12, 13, 15], "\u0e07\u0e16": 7, "\u0e2d\u0e27": [7, 8, 9], "\u0e32\u0e40\u0e1b": [7, 11], "\u0e1a\u0e41\u0e23\u0e01\u0e41\u0e2b": 7, "\u0e07\u0e23\u0e32\u0e0a\u0e2d\u0e32\u0e13\u0e32\u0e08": 7, "\u0e01\u0e23\u0e2a\u0e22\u0e32\u0e21": 7, "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e43\u0e0a": 7, "\u0e40\u0e21": [7, 9, 11], "\u0e19\u0e17": [7, 9, 10, 11, 12, 15], "27": [7, 8, 10, 11, 14, 15, 16], "\u0e19\u0e32\u0e22\u0e19": 7, "2475": 7, "\u0e42\u0e14\u0e22\u0e40\u0e1b": 7, "\u0e19\u0e1c\u0e25\u0e1e\u0e27\u0e07\u0e2b\u0e25": 7, "\u0e07\u0e01\u0e32\u0e23\u0e1b\u0e0f": 7, "\u0e42\u0e14\u0e22\u0e04\u0e13\u0e30\u0e23\u0e32\u0e29\u0e0e\u0e23": 7, "nwhitespac": 7, "newlin": 7, "whitespac": 7, "maximum": [7, 8], "algorithm": 7, "\u0e08\u0e30\u0e23": 7, "\u0e04\u0e27\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e23": 7, "\u0e32\u0e22\u0e17": 7, "\u0e17\u0e33\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30\u0e04\u0e07\u0e08\u0e30\u0e44\u0e21": 7, "\u0e22\u0e2d\u0e21\u0e43\u0e2b": 7, "\u0e17\u0e33\u0e19\u0e32\u0e1a\u0e19\u0e2b\u0e25": 7, "\u0e07\u0e04\u0e19": 7, "nnewmm": 7, "keep_whitespac": 7, "\u0e08\u0e30": [7, 9, 11, 13], "\u0e04\u0e27\u0e32\u0e21": [7, 9], "\u0e17\u0e33": [7, 9], "\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30": [7, 9, 11, 12, 16], "\u0e04\u0e07\u0e08\u0e30": 7, "other": [7, 12, 15, 16], "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e09\u0e1a": 7, "\u0e1a\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2b\u0e21": 7, "\u0e41\u0e25": [7, 9, 11, 15], "longest": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "custom_token": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "\u0e09\u0e1a": 7, "\u0e43\u0e2b\u0e21": [7, 9, 11], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22": 7, "common": [7, 16], "add": [7, 8, 15], "remov": [7, 14, 15], "\u0e22\u0e32\u0e22\u0e27": 7, "\u0e17\u0e22\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23": [7, 12], "\u0e02\u0e2d\u0e07\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "\u0e2d\u0e2a": 7, "\u0e21\u0e2d\u0e1f": 7, "frozenset": 7, "\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "isaac": 7, "asimov": 7, "\u0e22\u0e32\u0e22": 7, "\u0e02\u0e2d\u0e07": [7, 9, 11, 16], "\u0e21\u0e2d": 7, "trie": 7, "ilo87": 7, "\u0e32\u0e14": 7, "\u0e27\u0e22\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e04": 7, "\u0e21\u0e04\u0e23\u0e2d\u0e07\u0e2a": 7, "\u0e17\u0e18": [7, 11, 15], "\u0e43\u0e19\u0e01\u0e32\u0e23\u0e23\u0e27\u0e21\u0e15": 7, "ilo98": 7, "\u0e27\u0e22\u0e2a": 7, "\u0e27\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e23": 7, "\u0e27\u0e21\u0e40\u0e08\u0e23\u0e08\u0e32\u0e15": 7, "\u0e2d\u0e23\u0e2d\u0e07": 7, "new_word": 7, "\u0e01\u0e32\u0e23\u0e23": 7, "\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e2a": 7, "\u0e21\u0e1e": 7, "\u0e19\u0e18": [7, 16], "union": 7, "custom_dictionary_tri": 7, "custom_dict": 7, "ilo": 7, "87": 7, "\u0e27\u0e22": [7, 9, 11], "\u0e20\u0e32\u0e1e": 7, "\u0e43\u0e19": [7, 9, 11], "\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e01\u0e32\u0e23": [7, 9, 11, 16], "\u0e21\u0e04\u0e23\u0e2d\u0e07": 7, "\u0e23\u0e27\u0e21\u0e15": 7, "98": [7, 11], "\u0e27\u0e21": [7, 9], "\u0e40\u0e08\u0e23\u0e08\u0e32": 7, "differ": [7, 16], "speedtest_text": 7, "\u0e04\u0e23\u0e1a\u0e23\u0e2d\u0e1a": 7, "\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e40\u0e0a": [7, 11, 15], "\u0e19\u0e19": [7, 9, 11], "2547": 7, "\u0e21\u0e19": [7, 16], "\u0e21\u0e0a\u0e32\u0e22\u0e01\u0e27": 7, "\u0e32": [7, 8, 9, 11, 12, 15, 16], "370": 7, "\u0e04\u0e19": [7, 9, 11, 13, 15], "\u0e01\u0e42\u0e22\u0e19\u0e02": 7, "\u0e19\u0e23\u0e16\u0e22": 7, "\u0e40\u0e2d": [7, 9], "\u0e21\u0e0b": 7, "\u0e2b\u0e23": [7, 9, 11, 15], "\u0e19\u0e2d\u0e19\u0e0b": 7, "\u0e2d\u0e19\u0e01": [7, 9], "\u0e19\u0e04": 7, "\u0e19\u0e25\u0e30": 7, "\u0e40\u0e14": [7, 11, 12], "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e16\u0e32\u0e19": 7, "\u0e15\u0e33\u0e23\u0e27\u0e08\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e44\u0e1b\u0e44\u0e01\u0e25": 7, "150": [7, 9], "\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23": [7, 15], "\u0e44\u0e1b\u0e16": 7, "\u0e07\u0e04": 7, "\u0e32\u0e22\u0e2d": 7, "\u0e07\u0e04\u0e22": 7, "\u0e17\u0e18\u0e1a\u0e23": 7, "\u0e2b\u0e32\u0e23": 7, "\u0e40\u0e27\u0e25\u0e32\u0e01\u0e27": 7, "\u0e27\u0e42\u0e21\u0e07": 7, "\u0e43\u0e19\u0e2d": [7, 15], "\u0e01\u0e04\u0e14": 7, "\u0e0d\u0e32\u0e15": 7, "\u0e2d\u0e07\u0e23": [7, 9], "\u0e10": 7, "\u0e04\u0e14": 7, "\u0e08\u0e1a\u0e25\u0e07\u0e17": 7, "\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e19": 7, "\u0e1b\u0e23\u0e30\u0e19\u0e2d\u0e21\u0e22\u0e2d\u0e21\u0e04\u0e27\u0e32\u0e21": 7, "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21\u0e08": 7, "\u0e32\u0e22\u0e04": 7, "\u0e19\u0e44\u0e2b\u0e21\u0e17\u0e14\u0e41\u0e17\u0e19\u0e23\u0e27\u0e21": 7, "42": [7, 8, 9, 10, 14], "\u0e32\u0e19\u0e1a\u0e32\u0e17\u0e43\u0e2b": 7, "\u0e1a\u0e0d\u0e32\u0e15": 7, "\u0e22\u0e2b\u0e32\u0e22": 7, "\u0e23\u0e32\u0e22": 7, "\u0e14\u0e2b": 7, "\u0e1a\u0e41\u0e25\u0e30\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e2a\u0e23": 7, "\u0e08\u0e41\u0e25": 7, "\u0e27\u0e22\u0e40\u0e25": 7, "\u0e2d\u0e01\u0e15": 7, "\u0e07\u0e17": [7, 9], "\u0e40\u0e02\u0e15": 7, "\u0e41\u0e02\u0e27\u0e07\u0e2b": 7, "\u0e27\u0e2b\u0e21\u0e32\u0e01": 7, "\u0e40\u0e02\u0e15\u0e1a\u0e32\u0e07\u0e01\u0e30\u0e1b": 7, "\u0e01\u0e23": [7, 11], "\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23": [7, 11], "\u0e2a\u0e21": [7, 12], "\u0e41\u0e25\u0e30\u0e15": 7, "\u0e27\u0e41\u0e17\u0e19\u0e1e\u0e23\u0e23\u0e04\u0e01\u0e32\u0e23\u0e40\u0e21": 7, "\u0e2d\u0e07\u0e08\u0e32\u0e01\u0e2b\u0e25\u0e32\u0e22\u0e1e\u0e23\u0e23\u0e04\u0e15": 7, "\u0e32\u0e07\u0e21\u0e32\u0e40\u0e1d": 7, "\u0e07\u0e40\u0e01\u0e15\u0e01\u0e32\u0e23\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e43\u0e01\u0e25": 7, "\u0e42\u0e14\u0e22": [7, 9, 11], "\u0e20": [7, 8], "\u0e2a\u0e23": [7, 9, 15], "\u0e42\u0e0a\u0e15": [7, 9], "\u0e40\u0e14\u0e0a\u0e32\u0e0a": 7, "\u0e22\u0e19": [7, 8, 9, 10, 11, 15], "\u0e19\u0e15": [7, 9, 15, 16], "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1e\u0e25": 7, "\u0e07\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e23": 7, "\u0e41\u0e25\u0e30\u0e1e\u0e23": 7, "\u0e29\u0e10": 7, "\u0e0a\u0e23\u0e2a": 7, "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e18": 7, "\u0e44\u0e14": [7, 9, 11, 12, 15, 16], "\u0e04\u0e30\u0e41\u0e19\u0e19": 7, "96": 7, "\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e17": 7, "\u0e32\u0e01": [7, 15], "\u0e40\u0e21\u0e29\u0e32\u0e22\u0e19": [7, 11], "\u0e07\u0e40\u0e1b": 7, "\u0e19\u0e27": 7, "\u0e19\u0e2d": [7, 11], "\u0e2a\u0e40\u0e15\u0e2d\u0e23": 7, "\u0e19\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e02\u0e2d\u0e07\u0e0a\u0e32\u0e27\u0e04\u0e23": 7, "\u0e2a\u0e15": 7, "\u0e40\u0e01": [7, 9, 11, 15], "\u0e14\u0e40\u0e2b\u0e15": 7, "\u0e23\u0e30\u0e40\u0e1a": 7, "\u0e14\u0e15": 7, "\u0e2d\u0e40\u0e19": 7, "\u0e2d\u0e07\u0e43\u0e19\u0e42\u0e1a\u0e2a\u0e16": 7, "\u0e41\u0e25\u0e30\u0e42\u0e23\u0e07\u0e41\u0e23\u0e21\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e19": 7, "\u0e2d\u0e22": [7, 8, 9, 11, 15], "\u0e41\u0e2b": [7, 16], "\u0e07\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e28\u0e23": 7, "\u0e07\u0e01\u0e32": 7, "\u0e15\u0e41\u0e25": 7, "\u0e27\u0e2d\u0e22": 7, "156": 7, "\u0e41\u0e25\u0e30\u0e1a\u0e32\u0e14\u0e40\u0e08": 7, "\u0e1a\u0e2b\u0e25\u0e32\u0e22\u0e23": 7, "\u0e2d\u0e22\u0e04\u0e19": 7, "\u0e07\u0e44\u0e21": 7, "\u0e02": [7, 9, 11, 15], "\u0e2d\u0e21": [7, 11, 15], "\u0e25\u0e27": 7, "\u0e32\u0e1c": 7, "\u0e2d\u0e40\u0e2b\u0e15": 7, "\u0e21\u0e32\u0e08\u0e32\u0e01\u0e1d": 7, "\u0e32\u0e22\u0e43\u0e14": 7, "\u0e19\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e08": 7, "\u0e14\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e0a": 7, "\u0e21\u0e02": [7, 11, 15], "\u0e2d\u0e23": [7, 9], "\u0e40\u0e23": [7, 10, 11], "\u0e21\u0e2a\u0e32\u0e22\u0e41\u0e16\u0e1a\u0e41\u0e25\u0e30\u0e40\u0e2a": 7, "\u0e19\u0e17\u0e32\u0e07\u0e43\u0e19\u0e0a": 7, "\u0e27\u0e07\u0e1b\u0e25\u0e32\u0e22\u0e2a": 7, "\u0e1b\u0e14\u0e32\u0e2b": [7, 11], "\u0e01\u0e01": [7, 9, 16], "\u0e07\u0e22": 7, "\u0e2d\u0e20": 7, "\u0e21\u0e2b\u0e32\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e40\u0e0a": 7, "\u0e2d\u0e21\u0e42\u0e25\u0e01\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e40\u0e04\u0e23": [7, 9, 11], "\u0e2d\u0e07\u0e21": 7, "\u0e2d\u0e41\u0e1c": 7, "\u0e1e\u0e25": 7, "\u0e1a\u0e1f": 7, "\u0e07\u0e02": [7, 9], "\u0e08\u0e32\u0e23\u0e13": 7, "\u0e1b\u0e23\u0e30\u0e40\u0e14": [7, 11], "\u0e19\u0e01": [7, 15], "\u0e1a\u0e14": [7, 16], "\u0e01\u0e2b\u0e19": 7, "\u0e41\u0e25\u0e30\u0e04\u0e27\u0e32\u0e21\u0e44\u0e21": 7, "\u0e42\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2a": 7, "\u0e10\u0e1a\u0e32\u0e25\u0e1b": 7, "\u0e07\u0e1a\u0e2d\u0e01\u0e27": 7, "\u0e40\u0e27\u0e17": 7, "\u0e1b\u0e23\u0e30\u0e0a": 7, "belt": 7, "road": 7, "forum": 7, "\u0e43\u0e19\u0e0a": [7, 12], "\u0e27\u0e07\u0e27": 7, "\u0e2d\u0e40\u0e1b": [7, 12], "\u0e19\u0e07\u0e32\u0e19\u0e01\u0e32\u0e23\u0e17": 7, "\u0e15\u0e17": 7, "\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e17": 7, "\u0e14\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e19\u0e43\u0e19\u0e1b": 7, "speed": 7, "through": [7, 12], "wrapper": 7, "cpu": [7, 9], "user": [7, 9, 10, 11], "253": 7, "sy": [7, 9], "total": [7, 9, 11, 12, 13], "256": 7, "wall": [7, 9], "255": 7, "60": [7, 9], "\u00b5": 7, "46": [7, 10, 13, 14, 16], "safe": 7, "33": [7, 9, 15], "attacut": [7, 10], "833": 7, "174": [7, 11], "576": 7, "possibl": [7, 16], "multi_cut": 7, "find_all_seg": 7, "mmcut": 7, "\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": [7, 9], "\u0e19\u0e44\u0e1b\u0e44\u0e14": 7, "\u0e32\u0e07\u0e44\u0e23\u0e1a": 7, "\u0e32\u0e07": [7, 9, 11], "\u0e44\u0e1b": [7, 9, 10, 11, 13], "\u0e44\u0e23": [7, 15], "\u0e19\u0e44\u0e1b": [7, 9], "\u0e32\u0e07\u0e44\u0e23": 7, "either": 7, "ssg": [7, 10, 15], "ponrawe": 7, "__": [7, 11], "crf": 7, "prasertsom": 7, "smaller": [7, 15], "than": [7, 12, 16], "inform": [7, 9], "retriev": 7, "theeramunkong": 7, "et": [7, 13], "al": [7, 13], "2004": 7, "unit": 7, "35": [7, 9, 10, 13, 15, 16], "subword_token": [7, 8], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": [7, 9], "\u0e23\u0e30": [7, 9], "\u0e44\u0e17": 7, "dict": [7, 9, 11, 12, 14], "known": [7, 16], "36": [7, 9, 10, 11, 13, 14, 15, 16], "\u0e25\u0e40\u0e25\u0e32\u0e30": 7, "\u0e0b\u0e2d\u0e21": 7, "\u0e0b\u0e2d": 7, "\u0e2a\u0e21\u0e2d\u0e07\u0e1a\u0e27\u0e21\u0e23": 7, "\u0e19\u0e41\u0e23\u0e07": 7, "\u0e40\u0e25\u0e32\u0e30": 7, "\u0e2a\u0e21\u0e2d\u0e07": 7, "\u0e1a\u0e27\u0e21": 7, "\u0e41\u0e23\u0e07": 7, "extern": 7, "ommit": 7, "output": [7, 11, 14, 15], "37": [7, 11, 13, 14], "These": 7, "task": [7, 10, 13, 15], "like": [7, 9, 11, 12, 15], "cut": 7, "certain": [7, 12], "point": [7, 12, 16], "typo": 7, "tcc_po": 7, "posit": [7, 16], "ch": 7, "two": [7, 11, 12, 16], "roman": 7, "latin": 7, "royal": 7, "system": [7, 10, 12], "transcript": 7, "rtg": 7, "support": [7, 8, 16], "simpl": [7, 10, 15, 16], "royin": 7, "accur": 7, "thai2rom": 7, "context": 7, "mean": [7, 9, 12, 14], "sound": [7, 14], "ipa": 7, "intern": 7, "phonet": 7, "icu": 7, "compon": [7, 13], "unicod": 7, "pyicu": 7, "\u0e41\u0e21\u0e27": [7, 10, 16], "maeo": 7, "\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": [7, 11], "phapn": 7, "obvious": 7, "wrong": [7, 12], "m\u025b\u02d0w": 7, "updat": [7, 9, 15], "g2p": 7, "up": [7, 12], "\u025b\u02d0": 7, "p\u02b0a\u02d0pjanot": 7, "p\u02b0": 7, "a\u02d0": 7, "n": [7, 8, 11, 13], "width": 7, "zwsp": 7, "zwnj": 7, "duplic": 7, "repeat": [7, 9], "dangl": 7, "reorder": 7, "tone": 7, "dure": 7, "\u0e40\u0e40\u0e1b\u0e25\u0e01": 7, "\u0e41\u0e1b\u0e25\u0e01": 7, "\u0e40": 7, "v": [7, 13, 14], "\u0e41": 7, "below": 7, "standard": 7, "order": [7, 9, 11, 16], "sara": 7, "aa": 7, "mai": [7, 12, 15], "ek": 7, "\u0e40\u0e01\u0e32": 7, "includ": [7, 9, 16], "\u0e1a\u0e27": 7, "\u0e1e\u0e23": 7, "immedi": [7, 13], "nnormal": 7, "multipl": [7, 14], "A": 7, "row": [7, 11, 16], "keep": 7, "reduc": 7, "variat": 7, "48": [7, 9, 10, 14], "\u0e40\u0e01\u0e30\u0e30\u0e30": 7, "\u0e40\u0e01\u0e30": 7, "just": [7, 12], "seri": [7, 12], "remove_zw": 7, "remove_dup_spac": 7, "remove_repeat_vowel": 7, "remove_dangl": 7, "If": [7, 10, 15], "don": [7, 12], "behavior": 7, "those": [7, 12], "shown": 7, "abov": 7, "remove_tonemark": 7, "reorder_vowel": 7, "individu": 7, "your": [7, 12, 14], "own": [7, 12], "sometim": 7, "search": [7, 15], "pythainp": 7, "deal": [7, 12], "49": 7, "arabic_digit_to_thai_digit": 7, "thai_digit_to_arabic_digit": 7, "digit_to_text": 7, "\u0e09": [7, 15], "\u0e01\u0e40\u0e09": 7, "\u0e42\u0e23\u0e1b\u0e40\u0e23": 7, "\u0e22\u0e01": 7, "\u0e51\u0e51\u0e52": 7, "50": [7, 9, 11, 13], "51": [7, 11, 16], "\u0e07\u0e2b\u0e19": [7, 9, 12], "\u0e07\u0e2a\u0e2d\u0e07": 7, "index": [7, 9, 10, 11, 13, 15, 16], "wikipedia": [7, 11, 12, 15], "three": 7, "kind": [7, 12], "lk82": 7, "metasound": 7, "udom83": 7, "equival": 7, "\u0e23\u0e16": [7, 9, 11], "\u0e23\u0e14": 7, "\u0e27\u0e23\u0e23": 7, "\u0e19\u0e20": 7, "\u0e23\u0e13\u0e30": 7, "\u0e23\u0e13\u0e01\u0e32\u0e23": 7, "\u0e21\u0e23\u0e23\u0e04": 7, "\u0e01\u0e29": [7, 16], "\u0e1ae400": 7, "\u0e1a930000": 7, "\u0e1a550": 7, "\u0e1ae419": 7, "\u0e1a931900": 7, "\u0e1a551": 7, "\u0e211000": 7, "\u0e21100000": 7, "\u0e21100": 7, "\u0e21310000": 7, "\u0e21551": 7, "\u0e231000": 7, "\u0e23100000": 7, "\u0e25100": 7, "\u0e23100": 7, "peter": 7, "norvig": 7, "togeth": 7, "nation": 7, "tnc": 7, "\u0e40\u0e2b\u0e25": [7, 9], "\u0e22\u0e21": 7, "correct": [7, 16], "most": [7, 12, 16], "55": [7, 11], "when": [7, 9, 10, 12, 15], "norvigspellcheck": 7, "kei": [7, 16], "int": [7, 11, 13], "tupl": [7, 14, 16], "assign": 7, "everi": [7, 9, 12], "user_dict": 7, "1000": [7, 9, 11, 16], "\u0e22\u0e27": [7, 9, 11, 15, 16], "1000000": 7, "checker": [7, 16], "As": 7, "our": [7, 15], "give": [7, 9, 12], "edit": [7, 12, 16], "distanc": 7, "prioriti": 7, "over": 7, "textbook": 7, "By": 7, "ttc": 7, "word_freq": 7, "To": [7, 9], "current": [7, 15], "59": [7, 9, 14], "\u0e18": [7, 9, 15], "\u0e44\u0e2a": 7, "\u0e01\u0e23\u0e2d\u0e01": 7, "\u0e1b\u0e25": [7, 11], "\u0e40\u0e15": [7, 9, 11], "\u0e02\u0e2d\u0e1a\u0e04": [7, 15], "356": 7, "\u0e1b\u0e23\u0e30\u0e2a\u0e32\u0e19": 7, "84": [7, 15], "\u0e23\u0e33\u0e44\u0e23": 7, "\u0e27\u0e21\u0e17": 7, "\u0e2d\u0e07": [7, 9, 11, 15], "\u0e1d": 7, "\u0e01\u0e21\u0e30\u0e02\u0e32\u0e21": 7, "condit": 7, "filter": 7, "39963": 7, "61": [7, 11], "min_freq": [7, 9, 11, 12], "min_len": 7, "max_len": [7, 9], "30376": 7, "62": [7, 14], "checker_no_filt": 7, "dict_filt": 7, "66209": 7, "63": [7, 10], "remove_yamok": 7, "els": [7, 12, 15], "checker_custom_filt": 7, "66204": 7, "64": [7, 10, 11, 12, 13, 14], "pos_tag_s": 7, "\u0e19\u0e17\u0e32\u0e07": 7, "fixn": 7, "vact": 7, "65": [7, 15], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e2a\u0e33\u0e19": 7, "\u0e01\u0e19\u0e32\u0e22\u0e01\u0e2f": 7, "\u0e2a\u0e23\u0e23\u0e40\u0e2a\u0e23": 7, "\u0e0d": [7, 11, 16], "\u0e41\u0e01": 7, "\u0e27\u0e01\u0e33\u0e40\u0e19": 7, "\u0e19\u0e08\u0e32\u0e01\u0e15\u0e33\u0e41\u0e2b\u0e19": 7, "\u0e17\u0e23\u0e07\u0e04": 7, "\u0e13\u0e27": 7, "\u0e12": 7, "\u0e40\u0e28\u0e29": [7, 9], "\u0e01\u0e2d\u0e07\u0e17": 7, "\u0e1e\u0e1a\u0e01": [7, 12], "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21": 7, "\u0e2d\u0e18": 7, "\u0e01\u0e23\u0e21\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e2a": 7, "ncmn": 7, "punc": 7, "jsbr": 7, "jcrg": 7, "vsta": 7, "tagger": [7, 15], "bio": 7, "scheme": 7, "pip3": 7, "ner": [7, 8, 10, 15], "thainer": [7, 8, 10], "thainametagg": [7, 8], "get_ner": [7, 8], "2563": 7, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e23\u0e30\u0e1a\u0e1a\u0e40\u0e27\u0e25\u0e32": 7, "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e02\u0e19\u0e2a": 7, "\u0e07\u0e01\u0e23": 7, "\u0e07\u0e40\u0e17\u0e1e\u0e43\u0e01\u0e25": 7, "\u0e16\u0e19\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e44\u0e1b\u0e08": 7, "\u0e07\u0e2b\u0e27": [7, 15], "\u0e14\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e27\u0e23\u0e32\u0e04\u0e32": 7, "297": [7, 16], "\u0e1a\u0e32\u0e17": [7, 9], "num": [7, 13, 14], "punct": 7, "noun": [7, 10], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e23\u0e30\u0e1a\u0e1a": 7, "verb": [7, 10, 16], "\u0e08\u0e32\u0e01": [7, 9, 11, 12], "adp": 7, "\u0e02\u0e19\u0e2a": 7, "organ": [7, 8, 15], "\u0e07\u0e40\u0e17\u0e1e": 7, "\u0e43\u0e01\u0e25": 7, "adj": [7, 13], "\u0e16\u0e19\u0e19": 7, "\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "aux": [7, 10, 14], "\u0e23\u0e32\u0e04\u0e32": 7, "monei": [7, 12], "word_vector": [7, 16], "\u0e29\u0e22": [7, 16], "2504981": 7, "doesnt_match": [7, 16], "\u0e04\u0e04\u0e25": 7, "\u0e40\u0e08": [7, 9, 11, 12, 16], "\u0e32\u0e2b\u0e19": 7, "\u0e32\u0e17": 7, "site": 7, "gensim": [7, 16], "keyedvector": [7, 16], "877": 7, "futurewarn": [7, 16], "arrai": [7, 9, 11, 14, 15, 16], "stack": [7, 16], "must": [7, 12, 14, 16], "sequenc": [7, 16], "iter": [7, 11, 16], "deprec": [7, 10, 14, 15, 16], "rais": [7, 16], "an": [7, 12, 15, 16], "error": [7, 16], "futur": [7, 16], "vstack": [7, 16], "self": [7, 16], "word_vec": [7, 16], "use_norm": [7, 16], "used_word": [7, 16], "astyp": [7, 9, 14, 16], "real": [7, 12, 16], "69": [7, 14], "bahttext": 7, "1234567890123": 7, "\u0e07\u0e25": 7, "\u0e32\u0e19\u0e2a\u0e2d\u0e07\u0e41\u0e2a\u0e19\u0e2a\u0e32\u0e21\u0e2b\u0e21": 7, "\u0e19\u0e2b": 7, "\u0e2d\u0e22\u0e2b\u0e01\u0e2a": 7, "\u0e14\u0e25": 7, "\u0e32\u0e19\u0e41\u0e1b\u0e14\u0e41\u0e2a\u0e19\u0e40\u0e01": 7, "\u0e32\u0e2b\u0e21": 7, "\u0e19\u0e2b\u0e19": 7, "\u0e07\u0e23": [7, 11], "\u0e2d\u0e22\u0e22": 7, "\u0e1a\u0e2a\u0e32\u0e21\u0e1a\u0e32\u0e17\u0e2a": 7, "\u0e32\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "round": [7, 14], "satang": 7, "909": 7, "\u0e07\u0e1a\u0e32\u0e17\u0e40\u0e01": 7, "\u0e1a\u0e40\u0e2d": 7, "\u0e14\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "lowphansirikul": 8, "l": [8, 9, 11, 16], "polpanuma": 8, "c": [8, 9, 11, 13, 16], "jantrakulchai": 8, "nutanong": 8, "pretrain": [8, 9, 11, 12], "arxiv": 8, "preprint": 8, "2101": 8, "09635": 8, "jan": 8, "full": [8, 12], "thai2transform": [8, 15], "11006400": 8, "f89b594cbbebbc1940c16b0957a74182f2ea8169de8270e33f0c6bac5d1d4fcd": 8, "9a": 8, "9e": 8, "b2ab1db5c70b14b8d5d8a402e36ed915c2ec906df5c4f4b089": 8, "f9": 8, "5ca07ec9569d2f232f3166de5457b63943882f7950ddfcc887732fc7fb23": 8, "9mb": 8, "71": 8, "2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97": 8, "manylinux2010_x86_64": [8, 14], "filelock": [8, 10, 14, 15], "7d": 8, "09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10": 8, "883kb": 8, "890kb": 8, "pypars": [8, 10, 14], "893262": 8, "26dd1871c98e4cd5fe1938dbeba7086606c31e80a945ec9f752859e252fe7068": 8, "3c": 8, "fd": 8, "7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45": 8, "dataset": [8, 10, 15], "lst20": [8, 10], "dataset_nam": [8, 15], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e1c\u0e21\u0e21": 8, "\u0e19\u0e32\u0e22\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e17\u0e17": 8, "\u0e22\u0e44\u0e1e\u0e1a": 8, "\u0e25\u0e22": 8, "ask": 8, "truncat": 8, "max_length": 8, "predefin": 8, "person": [8, 12], "\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 10, 15], "\u0e22\u0e19\u0e2a\u0e27\u0e19\u0e01": [8, 15], "\u0e2b\u0e25\u0e32\u0e1a\u0e40\u0e1b": [8, 15], "\u0e19\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 15], "\u0e22\u0e19\u0e17": [8, 15], "\u0e2a\u0e27\u0e19\u0e01": [8, 15], "\u0e2b\u0e25\u0e32\u0e1a": [8, 15], "t2": [8, 15], "grouped_ent": [8, 15], "ttl": 8, "\u0e19\u0e32\u0e22": [8, 12], "\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e1c\u0e21\u0e21": 8, "\u0e1c\u0e21": [8, 10, 11, 13], "pr": 8, "nn": [8, 10], "\u0e27\u0e23\u0e23\u0e13": 8, "\u0e1e\u0e07\u0e29": 8, "\u0e44\u0e1e\u0e1a": 8, "grouped_word": 8, "\u0e14\u0e04\u0e33\u0e22": 8, "detail": [9, 12], "step": [9, 11], "taken": 9, "analyz": [9, 11], "evalu": 9, "metric": [9, 11, 12, 15], "overal": 9, "accuraci": [9, 11, 12], "across": [9, 12], "neg": [9, 15, 16], "ativ": 9, "itiv": 9, "neu": 9, "tral": 9, "uestion": 9, "class": [9, 11, 12], "fasttext": [9, 16], "semi": 9, "supervis": [9, 11], "public": [9, 10, 15], "privat": 9, "72781": 9, "7499": 9, "63144": 9, "6131": 9, "71259": 9, "74194": 9, "73119": 9, "75859": 9, "One": 9, "time": [9, 11, 12, 13], "73372": 9, "75968": 9, "kaggl": [9, 11, 16], "competit": 9, "upon": 9, "1st": 9, "place": 9, "solut": 9, "googl": [9, 11, 12, 14, 15, 16], "sklearn_crfsuit": [9, 11, 16], "emoji": [9, 10, 12, 15, 16], "fastai": [9, 11, 12, 16], "master": [9, 11, 16], "unzip": [9, 11], "mkdir": [9, 11, 13], "wisesight_data": 9, "snippet": 9, "font": [9, 16], "matplotlib": [9, 11, 12, 16], "gist": 9, "korakot": 9, "9d7f5db632351dc92607fdec72a4953f": 9, "phonbopit": 9, "sarabun": [9, 16], "webfont": 9, "thsarabunnew": 9, "ttf": [9, 16], "cp": 9, "mpl": 9, "share": [9, 12], "truetyp": 9, "font_manag": [9, 16], "_rebuild": 9, "rc": 9, "famili": [9, 12], "load_ext": 9, "autoreload": [9, 16], "np": [9, 11, 12, 13, 14, 15, 16], "panda": [9, 11, 12, 16], "pd": [9, 11, 12, 16], "tqdm_notebook": [9, 11, 12], "process_thai": [9, 11], "viz": [9, 11], "pyplot": [9, 11, 12, 16], "plt": [9, 11, 12, 16], "seaborn": [9, 11, 12, 15], "sn": [9, 11, 12, 15], "reload": 9, "reload_ext": [9, 16], "clean": [9, 11, 12], "rule": [9, 11], "aim": [9, 11], "spars": [9, 11], "bag": [9, 11], "pre_rul": [9, 11, 12], "post_rul": [9, 11, 12], "after": [9, 11], "\u0e32\u0e19\u0e19\u0e19\u0e19\u0e19": 9, "\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19": 9, "amp": [9, 12], "www": [9, 10, 14], "\u0e32\u0e19": [9, 11], "xxrep": [9, 11], "xxwrep": 9, "\u0e19\u0e32\u0e19": 9, "xxurl": 9, "open": [9, 10, 11, 12, 14, 16], "f": [9, 11, 12, 14, 15, 16], "strip": [9, 11, 12], "readlin": 9, "train_label": 9, "categori": 9, "all_df": [9, 11], "datafram": [9, 11, 16], "to_csv": [9, 11], "shape": [9, 11, 14, 16], "24063": 9, "test_df": [9, 11], "2674": 9, "map": 9, "lambda": 9, "x": [9, 12, 13, 14, 16], "wc": 9, "uwc": 9, "preval": 9, "value_count": [9, 11], "544612": 9, "255164": 9, "178698": 9, "021527": 9, "dtype": [9, 11, 14], "float64": [9, 11], "85": 9, "under": [9, 15], "oversampl": 9, "balanc": [9, 11], "out": [9, 12], "littl": 9, "hyperparamet": 9, "sklearn": [9, 11, 15, 16], "model_select": 9, "train_test_split": 9, "train_df": [9, 11], "valid_df": 9, "test_siz": 9, "random_st": [9, 11], "1412": [9, 11], "reset_index": [9, 11], "drop": [9, 11, 13], "actual": 9, "copi": [9, 11], "read_csv": [9, 11, 12], "head": [9, 11, 12, 13, 16], "\u0e19\u0e04\u0e19\u0e25\u0e1a\u0e41\u0e2d\u0e1e": 9, "viu": 9, "\u0e19\u0e43\u0e08\u0e41\u0e25\u0e30\u0e40\u0e02": 9, "\u0e32\u0e43\u0e08\u0e40\u0e02\u0e32\u0e19\u0e30\u0e04\u0e30": 9, "\u0e41\u0e1c\u0e25\u0e21": 9, "\u0e25\u0e1a": 9, "\u0e41\u0e2d": 9, "\u0e19\u0e43\u0e08": 9, "\u0e40\u0e02": [9, 11], "\u0e32\u0e43\u0e08": 9, "\u0e40\u0e02\u0e32": 9, "\u0e44\u0e1b\u0e0a\u0e21\u0e44\u0e21": 9, "\u0e27\u0e02\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e41\u0e25\u0e30\u0e23\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e19\u0e08": [9, 15], "\u0e0a\u0e21": 9, "\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e23\u0e2d\u0e07": 9, "\u0e21\u0e23\u0e16\u0e0b": 9, "\u0e04\u0e40\u0e1b": 9, "\u0e19\u0e01\u0e25": 9, "\u0e21\u0e17": [9, 12], "\u0e32\u0e23\u0e33\u0e04\u0e32\u0e19\u0e21\u0e32\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01": 9, "\u0e23\u0e33": 9, "\u0e04\u0e32\u0e19": 9, "\u0e21\u0e32\u0e01": [9, 11], "\u0e2d\u0e22\u0e32\u0e01\u0e2a\u0e27\u0e22\u0e40\u0e2b\u0e21": 9, "\u0e2d\u0e19\u0e40\u0e08": 9, "\u0e32\u0e02\u0e2d\u0e07\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e04\u0e30": 9, "\u0e40\u0e19\u0e22": 9, "\u0e01\u0e32": [9, 11], "\u0e43\u0e1a\u0e2b\u0e19": 9, "\u0e2d\u0e22\u0e32\u0e01": 9, "\u0e2a\u0e27\u0e22": 9, "\u0e40\u0e2b\u0e21": 9, "\u0e32\u0e02\u0e2d\u0e07": 9, "\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e32\u0e27\u0e42\u0e16\u0e25\u0e30\u0e23": 9, "\u0e41\u0e1e\u0e07": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30\u0e15": 9, "\u0e01\u0e40\u0e1b": 9, "\u0e19\u0e08\u0e32\u0e19\u0e46\u0e25\u0e3015": 9, "\u0e42\u0e16": 9, "\u0e25\u0e30": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30": 9, "\u0e08\u0e32\u0e19": 9, "381": 9, "218": 9, "544957": 9, "253557": 9, "180071": 9, "021415": 9, "542659": 9, "264266": 9, "170914": 9, "022161": 9, "variabl": [9, 14], "y_train": [9, 11], "y_valid": 9, "faetur": 9, "feature_extract": [9, 11], "tfidfvector": 9, "linear_model": 9, "logisticregress": 9, "tfidf": [9, 11], "ngram_rang": [9, 11], "min_df": [9, 11], "sublinear_tf": 9, "tfidf_fit": 9, "text_train": 9, "text_valid": 9, "text_test": 9, "20453": 9, "4614": 9, "3610": 9, "top_feats_al": 9, "plot_top_feat": 9, "get_feature_nam": 9, "toarrai": 9, "448": 9, "492": 9, "940": 9, "938": 9, "rank": [9, 16], "score": [9, 11, 15], "ngram": 9, "029990": 9, "022852": 9, "020252": 9, "\u0e40\u0e25\u0e22": [9, 11], "019493": 9, "018153": 9, "852": 9, "862": 9, "73": [9, 13, 15], "count": [9, 13], "uniqu": [9, 12], "might": [9, 12, 14], "so": [9, 12, 13], "standardscal": 9, "scaler": 9, "scaler_fit": 9, "float": [9, 14], "mean_": 9, "var_": 9, "num_train": 9, "num_valid": 9, "num_test": 9, "96529942": 9, "22744462": 9, "1151": 9, "47512883": 9, "513": 9, "46009207": 9, "74": 9, "concaten": [9, 14, 15], "x_train": [9, 11], "axi": [9, 14, 16], "x_valid": 9, "x_test": [9, 11], "4616": 9, "75": 9, "penalti": [9, 11], "l2": [9, 11], "solver": 9, "liblinear": 9, "dual": 9, "multi_class": [9, 11], "ovr": [9, 11], "7324099722991689": 9, "76": 9, "prob": [9, 11], "predict_proba": 9, "probs_df": 9, "column": [9, 11, 16], "classes_": 9, "pred": [9, 11], "predict": [9, 10, 11, 12, 14], "hit": 9, "probs_df_linear": 9, "77": 9, "confusion_matrix": 9, "conf_mat": 9, "heatmap": [9, 15], "annot": [9, 13, 15, 16], "xticklabel": [9, 15], "yticklabel": [9, 15], "ylabel": 9, "xlabel": 9, "callback": [9, 11, 12], "csvlogger": [9, 11, 12], "savemodelcallback": 9, "tt": [9, 11, 12], "tok_func": [9, 11, 12], "thaitoken": [9, 11, 12], "lang": [9, 11, 12], "pre_rules_th": [9, 11, 12], "post_rules_th": [9, 11, 12], "tokenizeprocessor": [9, 11, 12], "chunksiz": [9, 11, 12], "10000": [9, 11, 12], "mark_field": [9, 11, 12], "numericalizeprocessor": [9, 11, 12], "vocab": [9, 11, 12, 14, 16], "max_vocab": [9, 11, 12], "60000": [9, 11, 12], "data_lm": [9, 11, 12], "textlist": [9, 11, 12], "from_df": [9, 11, 12], "col": [9, 11, 12], "split_by_rand_pct": [9, 12], "valid_pct": [9, 11], "seed": [9, 11], "label_for_lm": [9, 11, 12], "databunch": [9, 11, 12], "sanity_check": [9, 11, 12], "wisesight_lm": 9, "pkl": [9, 11, 16], "train_d": [9, 11], "valid_d": [9, 11], "23823": 9, "240": [9, 10], "emb_sz": [9, 11, 12], "400": [9, 11, 12], "n_hid": [9, 11, 12], "1550": [9, 11, 12], "n_layer": [9, 11, 12], "pad_token": [9, 11, 12], "qrnn": [9, 11, 12], "tie_weight": [9, 11, 12], "out_bia": [9, 11, 12], "output_p": [9, 11, 12], "hidden_p": [9, 11, 12], "input_p": [9, 11, 12], "embed_p": [9, 11, 12], "weight_p": [9, 11, 12], "trn_arg": [9, 11, 12], "drop_mult": [9, 11, 12], "clip": [9, 11, 12], "alpha": [9, 11, 12], "beta": [9, 11, 12], "language_model_learn": [9, 11, 12], "awd_lstm": [9, 11, 12], "load_pretrain": [9, 11, 12], "_thwiki_lstm": [9, 11, 12], "languagelearn": [9, 12], "textlmdatabunch": [9, 12], "labellist": [9, 12], "item": [9, 12, 14], "lmtextlist": [9, 12], "xxbo": [9, 11, 12], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e40\u0e23\u0e32": [9, 11], "\u0e1c\u0e25": 9, "\u0e07\u0e2d\u0e2d\u0e01": 9, "\u0e22\u0e32\u0e2a": 9, "\u0e40\u0e22\u0e2d\u0e30": [9, 11], "\u0e42\u0e25\u0e01": 9, "\u0e2d\u0e2d\u0e21": 9, "\u0e40\u0e04": [9, 16], "\u0e41\u0e19\u0e19": 9, "\u0e2d\u0e30\u0e44\u0e23": [9, 11], "\u0e19\u0e30": 9, "lmlabellist": [9, 12], "path": [9, 12, 14], "\u0e19\u0e30\u0e04\u0e30": [9, 11, 16], "\u0e41\u0e1c\u0e25": 9, "\u0e41\u0e16\u0e21": 9, "\u0e2d\u0e32\u0e23\u0e21\u0e13": 9, "\u0e42\u0e14\u0e19": 9, "xxunk": [9, 11, 12], "\u0e40\u0e19\u0e2d\u0e30": 9, "\u0e27\u0e19": [9, 11], "\u0e17\u0e32\u0e07": [9, 11], "\u0e01\u0e2d\u0e14": 9, "netflix": 9, "\u0e41\u0e19": [9, 11], "\u0e17\u0e33\u0e23": 9, "\u0e19\u0e2d\u0e19": 9, "\u0e1a\u0e15\u0e01": 9, "\u0e01\u0e32\u0e23\u0e41\u0e02": 9, "\u0e41\u0e2a\u0e07\u0e42\u0e2a\u0e21": 9, "\u0e2a\u0e19": 9, "\u0e01\u0e40\u0e01\u0e2d\u0e23": 9, "\u0e41\u0e14\u0e07": [9, 11], "\u0e42\u0e2d\u0e40\u0e1e": 9, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e1b": 9, "2560": 9, "\u0e2a\u0e19\u0e32\u0e21": 9, "\u0e04\u0e25": 9, "\u0e0b\u0e2d\u0e22": 9, "\u0e42\u0e0a\u0e04": 9, "\u0e25\u0e32\u0e14\u0e1e\u0e23": 9, "\u0e2d\u0e27\u0e14": 9, "\u0e17\u0e33\u0e44\u0e21": 9, "\u0e01\u0e04\u0e19": 9, "\u0e1e\u0e27\u0e01": 9, "\u0e1a\u0e2d": 9, "\u0e01\u0e27": [9, 11], "\u0e19\u0e21": [9, 15], "\u0e40\u0e1a\u0e25\u0e2d": 9, "\u0e43\u0e2a": 9, "\u0e02\u0e19\u0e32\u0e14": 9, "\u0e13\u0e41\u0e21": 9, "\u0e19\u0e30\u0e40\u0e19": 9, "\u0e40\u0e1b\u0e25": 9, "\u0e40\u0e2d\u0e07": 9, "\u0e27\u0e22\u0e15": 9, "\u0e21\u0e32\u0e2a": 9, "\u0e01\u0e42\u0e0a": 9, "\u0e32\u0e21\u0e04": 9, "cho": 9, "cosmet": 9, "daradaili": 9, "\u0e14\u0e32\u0e23\u0e32": 9, "\u0e40\u0e14\u0e25": 9, "\u0e04\u0e19\u0e44\u0e17\u0e22": 9, "\u0e19\u0e02": 9, "\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e2b\u0e21": [9, 11, 16], "\u0e19\u0e25\u0e21": 9, "\u0e09\u0e30": 9, "\u0e42\u0e25": 9, "\u0e21\u0e30\u0e25": 9, "\u0e2d\u0e40\u0e1b\u0e25": 9, "250": 9, "\u0e02\u0e32\u0e22": [9, 11], "160": [9, 13], "\u0e40\u0e22\u0e2d\u0e30\u0e41\u0e22\u0e30": 9, "\u0e01\u0e33\u0e44\u0e23": 9, "\u0e04\u0e27\u0e23": 9, "\u0e32\u0e27\u0e1c": 9, "\u0e43\u0e2b\u0e0d": 9, "300": [9, 11, 16], "\u0e16\u0e32\u0e14": 9, "\u0e32\u0e19\u0e1a\u0e19": 9, "80": [9, 11, 13], "\u0e0a\u0e32\u0e40\u0e22": 9, "\u0e02\u0e27\u0e14": 9, "\u0e19\u0e41\u0e01": 9, "\u0e1e\u0e2d\u0e41\u0e25": 9, "\u0e40\u0e1a": [9, 11], "\u0e22\u0e23": 9, "120": 9, "\u0e32\u0e40\u0e01\u0e25": 9, "\u0e22\u0e14": 9, "\u0e21\u0e32": [9, 11], "360": [9, 11], "\u0e33\u0e41\u0e02": 9, "\u0e1e\u0e2d\u0e44\u0e14": 9, "\u0e2d\u0e32\u0e01\u0e32\u0e28": 9, "\u0e25\u0e30\u0e25\u0e32\u0e22": 9, "\u0e1e\u0e2d": 9, "\u0e17\u0e30\u0e40\u0e25": 9, "\u0e40\u0e1c\u0e32": 9, "\u0e25\u0e27\u0e01": 9, "\u0e32\u0e15\u0e32": 9, "\u0e01\u0e25\u0e32\u0e07\u0e46": [9, 11], "\u0e15\u0e33": 9, "\u0e41\u0e1b": 9, "\u0e21\u0e22\u0e33": 9, "\u0e2b\u0e23\u0e2d\u0e01": 9, "\u0e15\u0e23\u0e07": 9, "\u0e44\u0e1f": 9, "\u0e19\u0e43\u0e19": 9, "\u0e41\u0e17\u0e1a": 9, "\u0e41\u0e15\u0e30": 9, "\u0e19\u0e2d\u0e01": 9, "\u0e23\u0e30\u0e1a\u0e1a": 9, "\u0e41\u0e22": 9, "\u0e40\u0e2d\u0e32\u0e40\u0e1b\u0e23": 9, "\u0e22\u0e1a": 9, "\u0e19\u0e40\u0e2d\u0e07": [9, 12], "\u0e2d\u0e32\u0e2b\u0e32\u0e23": [9, 11], "\u0e1a\u0e02": 9, "\u0e15\u0e32\u0e21": 9, "\u0e41\u0e04": [9, 11, 15], "\u0e40\u0e08\u0e2d": [9, 11], "\u0e41\u0e1a\u0e1a\u0e19": 9, "\u0e2a\u0e07\u0e2a": 9, "\u0e2d\u0e04": 9, "\u0e15\u0e32\u0e22": 9, "\u0e04\u0e32": 9, "\u0e43\u0e04\u0e23": [9, 11], "\u0e21\u0e2d\u0e07": 9, "\u0e32\u0e41\u0e23\u0e07": 9, "\u0e27\u0e19\u0e21\u0e32\u0e01": 9, "\u0e04\u0e19\u0e43\u0e19": 9, "\u0e32\u0e41\u0e23\u0e07\u0e02": 9, "\u0e02\u0e22": 9, "\u0e40\u0e25": [9, 10, 11, 16], "\u0e01\u0e19": [9, 11], "\u0e04\u0e19\u0e08\u0e19": 9, "\u0e04\u0e19\u0e23\u0e27\u0e22": 9, "\u0e01\u0e16": [9, 11], "\u0e2a\u0e21\u0e04\u0e27\u0e23": 9, "\u0e19\u0e41\u0e25\u0e30\u0e01": 9, "\u0e0a\u0e2d\u0e1a": [9, 10, 11], "\u0e19\u0e08\u0e23": 9, "\u0e0a\u0e32\u0e27\u0e15": 9, "\u0e32\u0e07\u0e0a\u0e32\u0e15": 9, "\u0e40\u0e16\u0e2d\u0e30": 9, "\u0e42\u0e2d\u0e01\u0e32\u0e2a": 9, "sequentialrnn": [9, 12], "encod": [9, 11, 12, 14, 15], "15000": 9, "padding_idx": [9, 12], "encoder_dp": [9, 12], "embeddingdropout": [9, 12], "emb": [9, 12], "rnn": [9, 10, 12], "modulelist": [9, 12, 14], "weightdropout": [9, 12], "lstm": [9, 12], "batch_first": [9, 12], "input_dp": [9, 12], "rnndropout": [9, 12], "hidden_dp": [9, 12], "lineardecod": [9, 12], "decod": [9, 12, 15], "linear": [9, 12, 14], "in_featur": [9, 12, 14], "out_featur": [9, 12, 14], "bia": [9, 10, 12, 14, 15], "output_dp": [9, 12], "opt_func": [9, 11, 12], "functool": [9, 12, 15], "partial": [9, 11, 12, 15], "optim": [9, 11, 12, 14], "adam": [9, 11, 12], "loss_func": [9, 12], "flattenedloss": [9, 12], "crossentropyloss": [9, 12], "0x7f51be568268": 9, "true_wd": [9, 12], "bn_wd": [9, 12], "wd": [9, 12], "train_bn": [9, 12], "posixpath": [9, 12], "model_dir": [9, 12], "callback_fn": [9, 12], "basic_train": [9, 12], "record": [9, 11, 12], "add_tim": [9, 12], "silent": [9, 12], "gradientclip": [9, 12], "rnntrainer": [9, 12], "layer_group": [9, 12], "sequenti": [9, 12], "cb_fns_regist": 9, "frozen": [9, 11], "freeze_to": [9, 11], "fit_one_cycl": [9, 11], "mom": [9, 11], "epoch": [9, 11], "train_loss": [9, 11], "valid_loss": [9, 11], "841187": 9, "462714": 9, "319742": 9, "unfrozen": [9, 11], "unfreez": [9, 11], "411834": 9, "205552": 9, "341766": 9, "03": 9, "178030": 9, "037095": 9, "361508": 9, "970388": 9, "930919": 9, "370139": 9, "756190": 9, "890398": 9, "376191": 9, "671704": 9, "890232": 9, "375595": 9, "save_encod": [9, 11], "wisesight_enc": 9, "lm": 9, "load_data": [9, 11], "data_cl": [9, 11], "itemlist": 9, "label_from_df": [9, 11], "ito": [9, 11, 12], "bptt": [9, 11], "500": [9, 16], "text_classifier_learn": [9, 11], "load_encod": [9, 11], "rnnlearner": 9, "textclasdatabunch": 9, "\u0e19\u0e41\u0e14\u0e14": 9, "\u0e40\u0e1e\u0e25\u0e2a": 9, "\u0e27\u0e43\u0e2b\u0e21": 9, "\u0e08\u0e23": [9, 11, 16], "\u0e42\u0e0b\u0e19": 9, "\u0e40\u0e27": 9, "\u0e2b\u0e25\u0e2d\u0e14": 9, "\u0e22\u0e32\u0e27": 9, "\u0e1d\u0e32": 9, "\u0e40\u0e2d\u0e32": [9, 11], "\u0e1e\u0e1a": 9, "\u0e25\u0e1b": 9, "soul": [9, 12], "pop": 9, "\u0e2a\u0e32\u0e21": 9, "\u0e2a\u0e44\u0e15\u0e25": 9, "\u0e07\u0e32\u0e19": [9, 11], "jamnight": 9, "\u0e19\u0e33": 9, "parkinson": 9, "xxup": 9, "toi": 9, "\u0e19\u0e2d\u0e01\u0e08\u0e32\u0e01": 9, "\u0e42\u0e0a\u0e27": 9, "\u0e41\u0e1a\u0e1a": 9, "\u0e1b\u0e41\u0e1a\u0e1a": 9, "\u0e27\u0e07": 9, "\u0e41\u0e08\u0e21": 9, "\u0e1e\u0e25\u0e32\u0e14": 9, "\u0e40\u0e08\u0e2d\u0e01": 9, "\u0e19\u0e22\u0e32\u0e22\u0e19": 9, "\u0e1b\u0e23\u0e30\u0e15": 9, "\u0e2a\u0e32\u0e21\u0e32\u0e23\u0e16": 9, "\u0e15\u0e23": [9, 15, 16], "event": 9, "go": [9, 12], "eventpop": 9, "me": [9, 12, 16], "\u0e08\u0e33\u0e01": 9, "\u0e2d\u0e32\u0e22": 9, "jamnightbyjameson": 9, "jamesonthailand": 9, "soulaftersix": 9, "theparkinson": 9, "thetoi": 9, "\u0e21\u0e30": 9, "\u0e1a\u0e2d\u0e01\u0e15": 9, "\u0e41\u0e1e": [9, 11], "\u0e40\u0e22": 9, "\u0e1e\u0e2d\u0e19": 9, "\u0e41\u0e15\u0e07\u0e42\u0e21": 9, "\u0e25\u0e14": 9, "\u0e2a\u0e07\u0e01\u0e23\u0e32\u0e19\u0e15": 9, "\u0e23\u0e2d\u0e14": 9, "555": 9, "categorylist": 9, "multibatchencod": 9, "poolinglinearclassifi": 9, "layer": [9, 14, 15], "batchnorm1d": 9, "1200": 9, "ep": [9, 14], "momentum": 9, "affin": 9, "track_running_stat": 9, "dropout": [9, 14], "27999999999999997": 9, "relu": 9, "inplac": [9, 14], "2e": [9, 11], "slice": [9, 11], "5e": [9, 11], "improv": 9, "monitor": 9, "bestmodel": 9, "script": [9, 12, 15], "train_model": 9, "812156": 9, "753478": 9, "687532": 9, "740403": 9, "699093": 9, "714394": 9, "727394": 9, "668807": 9, "723011": 9, "722163": 9, "675351": 9, "723517": 9, "675266": 9, "654477": 9, "738723": 9, "669178": 9, "641070": 9, "737962": 9, "612528": 9, "637456": 9, "744551": 9, "618259": 9, "635149": 9, "749366": 9, "572621": 9, "651169": 9, "749873": 9, "561985": 9, "661739": 9, "747593": 9, "534753": 9, "673563": 9, "738469": 9, "530844": 9, "688871": 9, "746072": 9, "522788": 9, "670024": 9, "743031": 9, "y_true": 9, "loss": [9, 11], "get_pr": [9, 11], "ds_type": 9, "datasettyp": [9, 11], "with_loss": 9, "argmax": [9, 11, 14], "to_df": 9, "8392661555312158": 9, "u": [10, 12, 14, 15], "look": [10, 12, 14, 15], "pypi": [10, 15], "pkg": [10, 15], "attempt": [10, 14], "dependency_pars": 10, "esupar": 10, "chu": 10, "liu": 10, "edmond": 10, "chu_liu_edmond": 10, "cp38": 10, "107": 10, "supar": 10, "93": 10, "2022": [10, 15, 16], "304": 10, "dill": [10, 16], "cu116": 10, "stanza": 10, "691": 10, "huggingfac": [10, 14, 15], "hub": [10, 14, 15], "huggingface_hub": [10, 14, 15], "182": 10, "jinja2": 10, "smart": 10, "pathi": 10, "langcod": 10, "pydant": 10, "logger": 10, "legaci": 10, "typer": 10, "protobuf": [10, 14], "confect": 10, "markupsaf": 10, "5626945": 10, "6613dcb188f57561a00a2e40eca1bbafe6203936b8d9c387facd79de3f06fa62": 10, "6f": 10, "3475485c7d991ca5698d39603e22a99bd6904dcac7d0a5855a": 10, "234926": 10, "e3b7a3e928e5e81053b9f869cfef5382b49f133284c6abbd718496ff11e8ee67": 10, "a1": 10, "b0bb1f7683d20b75b34ceeb56ee83a585e9b065a5fef0b2cb1": 10, "warn": [10, 14, 15], "broken": 10, "permiss": 10, "conflict": 10, "behaviour": 10, "manag": 10, "recommend": [10, 15], "virtual": 10, "environ": 10, "pypa": 10, "io": [10, 14], "venv": 10, "spacy_pythainlp": 10, "dev6": 10, "nptype": 10, "473": 10, "docopt": 10, "fire": 10, "termcolor": 10, "13723": 10, "cd282751c98736c79933ed4265624e65891888bb9fdd01dc5d6fcf978d76431f": 10, "cc": 10, "f1e272f628fdb013d969acc99cfe2e031ea15b3efb74ffe842": 10, "116949": 10, "bc82a0082e9931af28c40d49e4494ce66a1f80f929b30ae4e7e1eff347b37c5c": 10, "86": 10, "88e8603bd3b1a9bff9d02d820c7431c47ad032865632657bb9": 10, "cuda": [10, 11], "__init__": 10, "497": 10, "userwarn": [10, 14, 15], "initi": [10, 11, 14, 15], "nvml": 10, "pos_engin": 10, "pos_corpu": 10, "orchid_ud": 10, "sent_engin": 10, "ner_engin": 10, "tokenize_engin": 10, "dependency_parsing_engin": 10, "dependency_parsing_model": 10, "bool": 10, "chang": [10, 12], "turn": [10, 12], "off": [10, 12], "0x7f9c02410a90": 10, "\u0e1c\u0e21\u0e40\u0e1b": 10, "\u0e19\u0e41\u0e21\u0e27": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a\u0e44\u0e1b\u0e40\u0e25": 10, "\u0e22\u0e19\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e21\u0e22": 10, "free": [10, 16], "commerci": 10, "pleas": 10, "contract": 10, "nectec": 10, "facebook": [10, 15, 16], "dancearmi": 10, "post": [10, 13, 14], "10157641945708284": 10, "pos_lst20_perceptron": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a": 10, "\u0e42\u0e23\u0e07": 10, "\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e44\u0e1b\u0e40\u0e25": 10, "0x7f9c0146e880": 10, "weight": [10, 14, 15], "checkpoint": [10, 15], "koichiyasuoka": 10, "roberta": [10, 15], "spm": [10, 15], "upo": 10, "robertamodel": [10, 15], "classifi": [10, 11], "expect": [10, 15, 16], "anoth": [10, 15], "architectur": [10, 15], "bertforsequenceclassif": [10, 15], "bertforpretrain": [10, 15], "NOT": [10, 15], "exactli": [10, 15], "ident": [10, 15], "newli": [10, 15], "pooler": [10, 15], "dens": [10, 15], "should": [10, 12, 15], "probabl": [10, 13, 15, 16], "down": [10, 12, 15], "stream": [10, 15, 16], "abl": [10, 15], "infer": [10, 15], "info": 10, "n_sentenc": 10, "n_batch": 10, "n_bucket": 10, "make": [10, 11, 12, 15], "apply_permut": 10, "tensor": [10, 11], "index_select": 10, "dim": [10, 11, 15], "permut": 10, "204603": 10, "elaps": 10, "dep": 10, "pron": 10, "sconj": 10, "nsubj": 10, "cop": 10, "acl": 10, "xcomp": 10, "obl": 10, "flat": 10, "star": [11, 12], "multi": 11, "both": [11, 12, 15], "number": 11, "micro": 11, "averag": 11, "f1": 11, "challeng": [11, 12], "micro_f1_publ": 11, "micro_f1_priv": 11, "59313": 11, "60322": 11, "5145": 11, "5109": 11, "5022": 11, "4976": 11, "59139": 11, "58139": 11, "bert": [11, 15], "56612": 11, "57057": 11, "review_dataset": 11, "wongnai_data": 11, "ast": [11, 12], "literal_ev": [11, 12], "counter": [11, 12], "re": [11, 12, 14, 16], "ft_data": 11, "respect": 11, "w_review_train": 11, "csv": [11, 12], "sep": [11, 12], "header": 11, "drop_dupl": 11, "rate": 11, "test_fil": 11, "concat": 11, "469282": 11, "304328": 11, "169880": 11, "046133": 11, "010377": 11, "two_df": 11, "one_df": 11, "train_bal": 11, "392365": 11, "254448": 11, "142036": 11, "115715": 11, "095436": 11, "dump": [11, 12, 15, 16], "skipgram": 11, "df_txt": 11, "df": 11, "ft_line": 11, "iterrow": 11, "ft_lab": 11, "__label__": 11, "ft_text": 11, "replace_newlin": 11, "close": [11, 15], "__label__0": 11, "df_all": 11, "home": 11, "charin": 11, "pretrainedvector": 11, "vec": 11, "1m": 11, "18176": 11, "progress": [11, 13], "sec": 11, "thread": 11, "24858": 11, "lr": 11, "000000": [11, 13], "309402": 11, "0h0m": 11, "wongnai_b": 11, "wordngram": 11, "731006": 11, "391282": 11, "764689": 11, "81": 11, "bin": [11, 16], "pred_lab": 11, "split": [11, 14, 16], "submit_df": 11, "reviewid": 11, "submit_fastttext_b": 11, "lukkiddd": 11, "train_split": 11, "test_split": 11, "pipelin": [11, 15], "countvector": 11, "tfidftransform": 11, "svm": 11, "text_clf": 11, "vect": 11, "clf": 11, "fit": 11, "memori": [11, 12], "binari": [11, 16], "decode_error": 11, "strict": 11, "int64": 11, "utf": [11, 14], "lowercas": 11, "max_df": 11, "max_featur": 11, "preprocessor": 11, "stop_word": 11, "ax_it": 11, "tol": 11, "0001": 11, "verbos": 11, "onehotencod": 11, "enc": 11, "handle_unknown": 11, "submit_linearsvc": 11, "59590": 11, "59731": 11, "processor": [11, 12, 14], "random_split_by_pct": 11, "wongnai_lm": 11, "45735": 11, "461": 11, "show_batch": 11, "idx": 11, "\u0e14\u0e32\u0e27": 11, "\u0e2b\u0e21\u0e14": 11, "\u0e0b\u0e30": 11, "\u0e32\u0e27\u0e2a\u0e27\u0e22": 11, "\u0e21\u0e32\u0e13": 11, "\u0e1e\u0e2d\u0e14": 11, "\u0e18\u0e22\u0e32\u0e28": 11, "\u0e1a\u0e23\u0e2d\u0e07": 11, "\u0e1a\u0e21\u0e32": 11, "\u0e2d\u0e22\u0e46": 11, "\u0e41\u0e16\u0e27": 11, "\u0e25\u0e2d\u0e07": 11, "\u0e41\u0e27\u0e30": 11, "\u0e2a\u0e33\u0e2b\u0e23": 11, "\u0e23\u0e2a": 11, "\u0e2d\u0e07\u0e14": 11, "\u0e21\u0e32\u0e01\u0e21\u0e32\u0e22": 11, "\u0e04\u0e07": 11, "\u0e42\u0e01\u0e42\u0e01": 11, "top": [11, 12], "\u0e22\u0e14\u0e32\u0e22": 11, "\u0e2b\u0e32": 11, "\u0e15\u0e2d\u0e19": 11, "\u0e27\u0e22\u0e40\u0e15": 11, "\u0e40\u0e19": 11, "\u0e17\u0e32\u0e19": 11, "\u0e2d\u0e19\u0e02": 11, "\u0e22\u0e32\u0e01": 11, "\u0e27\u0e32": 11, "\u0e2a\u0e32\u0e02\u0e32": 11, "\u0e12\u0e19\u0e32\u0e01\u0e32\u0e23": 11, "\u0e1d\u0e32\u0e01": 11, "\u0e2d\u0e01": [11, 15], "\u0e2b\u0e25\u0e32\u0e22\u0e2d\u0e22": 11, "\u0e1a\u0e23": 11, "\u0e01\u0e30": 11, "\u0e01\u0e2a\u0e32\u0e27": 11, "\u0e32\u0e02\u0e2d\u0e07\u0e23": 11, "\u0e08\u0e32": 11, "\u0e04\u0e27\u0e32\u0e21\u0e04": 11, "\u0e14\u0e40\u0e2b": 11, "\u0e27\u0e19\u0e15": 11, "\u0e2d\u0e2d\u0e01": 11, "\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07": 11, "\u0e1a\u0e27\u0e01": 11, "\u0e27\u0e19\u0e43\u0e2b\u0e0d": 11, "\u0e1a\u0e23\u0e23\u0e22\u0e32\u0e01\u0e32\u0e28": 11, "\u0e23\u0e16\u0e40\u0e02": 11, "\u0e42\u0e15": 11, "\u0e15\u0e01\u0e41\u0e15": 11, "\u0e19\u0e41\u0e19\u0e27": 11, "\u0e1a\u0e32\u0e23": 11, "\u0e42\u0e14\u0e22\u0e23\u0e2d\u0e1a": 11, "\u0e19\u0e23\u0e32": 11, "\u0e40\u0e21\u0e19": [11, 15], "next": [11, 12], "train_dl": 11, "414": 11, "3408": 11, "135": 11, "409": 11, "1325": 11, "1185": 11, "9903": 11, "368": 11, "870": 11, "254": 11, "3448": 11, "429": 11, "devic": 11, "193": 11, "10074": 11, "258": 11, "456": 11, "270": 11, "\u0e1a\u0e1e": 11, "\u0e2d\u0e07\u0e40\u0e2a": 11, "temperatur": [11, 12], "\u0e44\u0e2b\u0e21": 11, "mr": [11, 12, 13], "\u0e04\u0e0a": 11, "\u0e09\u0e32\u0e22": 11, "2557": 11, "\u0e01\u0e33\u0e01": [11, 16], "\u0e1b\u0e1b": 11, "\u0e20\u0e32\u0e04": 11, "\u0e42\u0e23\u0e07\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 11, "2558": 11, "\u0e2d\u0e2b\u0e32": 11, "\u0e22\u0e27\u0e01": [11, 15], "lr_find": 11, "plot": [11, 12, 16], "finder": 11, "complet": 11, "learner_nam": 11, "graph": [11, 14], "min": 11, "gradient": [11, 15], "58e": 11, "04": [11, 14, 16], "22562": 11, "659182": 11, "493942": 11, "342857": 11, "375606": 11, "252919": 11, "385714": 11, "165419": 11, "013862": 11, "371429": 11, "034220": 11, "802707": 11, "357143": 11, "879111": 11, "712463": 11, "823682": 11, "624331": 11, "784611": 11, "580608": 11, "753532": 11, "553170": 11, "719396": 11, "516521": 11, "699165": 11, "513339": 11, "696516": 11, "512542": 11, "wongnai_enc": 11, "\u0e32\u0e19\u0e19": 11, "\u0e08\u0e30\u0e2d\u0e22": 11, "\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07": 11, "\u0e2d\u0e2d\u0e19": 11, "\u0e40\u0e25\u0e22\u0e41\u0e22\u0e01\u0e1a": 11, "\u0e07\u0e44\u0e1b2": [11, 15], "\u0e0a\u0e09\u0e30\u0e25\u0e32\u0e40\u0e15": [11, 15], "\u0e44\u0e2d\u0e28\u0e04\u0e23": [11, 15], "\u0e21\u0e0a\u0e32\u0e40\u0e02": [11, 15], "\u0e27\u0e27\u0e07\u0e40\u0e14": 11, "n\u0e2b": 11, "\u0e27\u0e14": [11, 15], "\u0e01\u0e46": 11, "\u0e15\u0e23\u0e30\u0e40\u0e27\u0e19\u0e2b\u0e32\u0e23": 11, "\u0e32\u0e19\u0e17\u0e32\u0e19": 11, "\u0e21\u0e32\u0e40\u0e08\u0e2d": 11, "\u0e08\u0e30\u0e27": 11, "\u0e19\u0e40\u0e08": 11, "\u0e32\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e01": 11, "\u0e04\u0e07\u0e44\u0e21": 11, "\u0e32\u0e04": 11, "\u0e14\u0e16": 11, "\u0e07\u0e2a\u0e25": 11, "\u0e14\u0e1c\u0e21\u0e04": 11, "\u0e32\u0e19\u0e41\u0e23\u0e01\u0e46\u0e40\u0e25\u0e22\u0e04\u0e23": 11, "add_test": 11, "wongnai_cl": 11, "sure": [11, 15], "got": [11, 12], "target": 11, "\u0e1e\u0e32": 11, "\u0e2d\u0e32\u0e40\u0e0b": 11, "\u0e23\u0e23": 11, "\u0e32\u0e1e\u0e23\u0e30\u0e22\u0e32\u0e1b\u0e32\u0e23": 11, "\u0e0a\u0e14\u0e32\u0e20": 11, "\u0e40\u0e29\u0e01": 11, "\u0e19\u0e01\u0e32\u0e23": 11, "\u0e0a\u0e27\u0e19": 11, "\u0e32\u0e40\u0e14": 11, "\u0e19\u0e40\u0e04\u0e22": 11, "\u0e07\u0e46": 11, "\u0e23\u0e16\u0e15": 11, "\u0e1e\u0e24\u0e28\u0e08": 11, "\u0e01\u0e32\u0e22\u0e19": 11, "\u0e32\u0e19\u0e21\u0e32": 11, "\u0e27\u0e07\u0e43\u0e19": 11, "\u0e14\u0e01": 11, "\u0e08\u0e01\u0e23\u0e23\u0e21": 11, "xxmaj": 11, "relax": 11, "night": [11, 12], "phothalai": 11, "\u0e21\u0e15": 11, "tast": 11, "\u0e2d\u0e07\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e2d\u0e19\u0e23": 11, "group": [11, 14, 16], "\u0e0d\u0e2b\u0e32": 11, "\u0e27\u0e16": 11, "\u0e01\u0e32\u0e23\u0e2a": 11, "\u0e2d\u0e2a\u0e32\u0e23": 11, "\u0e1e\u0e19": 11, "\u0e01\u0e07\u0e32\u0e19": 11, "\u0e21\u0e32\u0e16": 11, "terrac": 11, "\u0e2d\u0e07\u0e08\u0e32\u0e01": 11, "\u0e08\u0e19": 11, "\u0e17\u0e19": 11, "\u0e01\u0e23\u0e30\u0e41\u0e2a": 11, "\u0e04\u0e27\u0e32\u0e21\u0e41\u0e23\u0e07": 11, "shibuya": 11, "shabu": 11, "\u0e44\u0e2b\u0e27": 11, "\u0e02\u0e2d": 11, "\u0e15\u0e32\u0e21\u0e23\u0e2d\u0e22": 11, "\u0e2d\u0e07\u0e2b\u0e32": 11, "\u0e42\u0e2d": 11, "\u0e2a\u0e21\u0e32\u0e17\u0e32\u0e19": 11, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e2a": 11, "\u0e0a\u0e32": 11, "\u0e40\u0e1e": [11, 12], "\u0e0a\u0e32\u0e27": 11, "\u0e01\u0e04\u0e23": 11, "pednoii": 11, "ahha": 11, "\u0e32\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e41\u0e23\u0e01": 11, "\u0e19\u0e33\u0e40\u0e2a\u0e19\u0e2d": 11, "\u0e19\u0e32\u0e07\u0e43\u0e19": 11, "31e": 11, "07": 11, "gradual": 11, "187845": 11, "158394": 11, "472803": 11, "889035": 11, "828990": 11, "629707": 11, "760357": 11, "751162": 11, "656904": 11, "628719": 11, "721673": 11, "669456": 11, "submit_ulmfit": 11, "ulmfit": 12, "thwiki_lstm": 12, "dummi": 12, "imdb": 12, "untar_data": 12, "url": 12, "imdb_sampl": 12, "dummy_df": 12, "thwiki_ito": 12, "pickl": [12, 16], "itos_fnam": 12, "rb": [12, 16], "thwiki_vocab": 12, "check": 12, "60005": 12, "800": 12, "film": 12, "act": 12, "music": 12, "good": 12, "too": [12, 15], "though": 12, "mostli": 12, "earli": 12, "thing": 12, "still": 12, "realli": 12, "superstar": 12, "cast": 12, "face": [12, 14], "entir": 12, "excel": 12, "job": 12, "hard": 12, "watch": 12, "becaus": [12, 16], "situat": 12, "present": [12, 13], "british": 12, "against": 12, "each": [12, 15, 16], "merit": 12, "view": 12, "forc": 12, "region": 12, "thei": [12, 15], "did": 12, "around": 12, "partit": 12, "simpli": [12, 15], "saw": 12, "between": [12, 13, 15], "enough": 12, "veri": 12, "rememb": 12, "screen": 12, "never": 12, "paint": 12, "side": 12, "hope": 12, "younger": 12, "redempt": 12, "man": [12, 13], "who": 12, "her": 12, "life": 12, "truli": 12, "love": 12, "later": 12, "she": 12, "great": 12, "pain": 12, "carri": 12, "messag": 12, "grave": 12, "peopl": 12, "realiti": 12, "sinc": [12, 16], "india": 12, "pakistan": 12, "border": 12, "sens": 12, "glad": 12, "seen": 12, "even": 12, "uk": 12, "could": [12, 16], "would": [12, 16], "better": 12, "onc": 12, "long": 12, "while": [12, 16], "movi": 12, "along": 12, "feel": 12, "labor": 12, "my": 12, "joi": 12, "where": [12, 13, 14], "five": 12, "stereotyp": 12, "had": 12, "gui": 12, "fat": 12, "foreign": 12, "etc": 12, "being": [12, 14], "written": 12, "shot": 12, "product": 12, "low": 12, "junior": 12, "high": [12, 15], "video": 12, "director": 12, "produc": [12, 14], "ever": 12, "wors": 12, "entri": 12, "concept": 12, "funni": 12, "gari": 12, "coleman": 12, "actor": 12, "trust": 12, "sai": [12, 15], "went": 12, "dad": 12, "came": 12, "korea": 12, "he": 12, "short": [12, 15], "period": 12, "made": 12, "epic": 12, "imagin": 12, "cost": 12, "cheap": 12, "theme": 12, "duti": 12, "lip": 12, "offic": 12, "deep": [12, 13], "declar": 12, "hi": 12, "peck": 12, "liber": 12, "understand": 12, "fearless": 12, "human": 12, "ve": 12, "fact": 12, "tail": 12, "mess": 12, "almost": 12, "walk": 12, "paid": 12, "ll": 12, "sit": 12, "bit": 12, "lose": 12, "its": 12, "someth": [12, 15], "ed": 12, "wood": 12, "dialogu": 12, "heard": 12, "viewer": 12, "cannot": [12, 14], "meet": 12, "oper": 12, "soon": 12, "stephen": 12, "best": 12, "ultim": 12, "tara": 12, "reid": 12, "plai": 12, "role": 12, "oh": 12, "help": 12, "talent": 12, "actress": 12, "stick": 12, "american": 12, "pie": 12, "know": 12, "kick": 12, "clich": 12, "\u00e9": 12, "typic": 12, "member": 12, "william": 12, "benton": 12, "believ": 12, "bias": 12, "toward": 12, "thief": 12, "born": 12, "bad": 12, "neither": 12, "slate": 12, "societi": 12, "parent": 12, "educ": 12, "what": [12, 15], "somewher": 12, "isn": [12, 16], "back": 12, "track": 12, "bet": 12, "wast": 12, "piec": 12, "valid": 12, "late": 12, "penn": 12, "teller": 12, "joe": 12, "bob": 12, "fridai": [12, 15], "school": 12, "year": 12, "doubt": 12, "televis": 12, "didn": 12, "stai": 12, "miss": 12, "john": 12, "bloom": 12, "live": 12, "belong": [12, 15], "question": [12, 15], "anyon": 12, "hour": 12, "moral": 12, "disast": 12, "david": 12, "care": 12, "purpos": [12, 13], "singl": 12, "qualiti": 12, "treat": 12, "afternoon": 12, "budget": 12, "project": [12, 14], "stori": 12, "eva": 12, "tv": 12, "ideal": 12, "mani": [12, 13], "cours": 12, "special": 12, "effect": 12, "gun": 12, "scene": 12, "move": 12, "although": 12, "problem": 12, "rent": 12, "student": 12, "ye": 12, "nake": 12, "emperor": 12, "speak": 12, "big": 12, "someon": 12, "state": [12, 15], "truth": 12, "old": 12, "bodi": [12, 13], "nude": 12, "artist": 12, "front": 12, "audienc": 12, "ev": 12, "poor": 12, "wanna": 12, "ladi": 12, "sensit": 12, "becam": 12, "petti": 12, "satisfact": 12, "alarm": 12, "signal": [12, 14], "degre": 12, "work": [12, 13, 15], "art": [12, 15], "cross": 12, "mix": 12, "ordinari": 12, "rural": 12, "pacif": 12, "northwest": 12, "solid": 12, "fine": 12, "dan": 12, "same": [12, 15], "highli": 12, "crash": 12, "paul": 12, "pace": 12, "action": 12, "urban": 12, "lo": 12, "angel": 12, "apart": 12, "relationship": [12, 13, 16], "jim": 12, "0x7f5215ef6ea0": 12, "\u0e01\u0e32\u0e25\u0e04\u0e23": 12, "\u0e07\u0e19\u0e32\u0e19\u0e21\u0e32\u0e41\u0e25": 12, "min_p": 12, "005": 12, "\u0e27\u0e07\u0e2a\u0e2d\u0e07\u0e2b\u0e19": 12, "\u0e10\u0e32\u0e19\u0e30\u0e23": 12, "\u0e33\u0e23\u0e27\u0e22": 12, "\u0e41\u0e25\u0e30\u0e40\u0e1b": 12, "\u0e19\u0e25": 12, "\u0e01\u0e2a\u0e32\u0e27\u0e02\u0e2d\u0e07": 12, "\u0e14\u0e23": 12, "\u0e42\u0e04\u0e25": 12, "\u0e1a\u0e1a\u0e17\u0e42\u0e14\u0e22": 12, "\u0e2d\u0e25": 12, "\u0e01\u0e0a\u0e32\u0e22\u0e04\u0e19\u0e42\u0e15\u0e02\u0e2d\u0e07": 12, "\u0e42\u0e2d\u0e25": 12, "\u0e40\u0e27\u0e2d\u0e23": [12, 15], "\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "\u0e27\u0e07\u0e41\u0e23\u0e01": 12, "\u0e40\u0e02\u0e32\u0e40\u0e1b": 12, "\u0e42\u0e2d\u0e25\u0e25": 12, "\u0e40\u0e02\u0e32\u0e21": 12, "\u0e41\u0e25\u0e30\u0e41\u0e21": 12, "\u0e19\u0e04\u0e19\u0e17": 12, "\u0e15\u0e43\u0e08\u0e2d": 12, "\u0e2d\u0e19\u0e42\u0e22\u0e19": 12, "\u0e19\u0e40\u0e1e": 12, "\u0e2d\u0e19\u0e2a\u0e19": 12, "\u0e17\u0e01": 12, "\u0e04\u0e32\u0e25": 12, "\u0e42\u0e23\u0e2a": 12, "\u0e25\u0e2a": 12, "\u0e2d\u0e02\u0e2d\u0e07\u0e40\u0e18\u0e2d\u0e19": 12, "\u0e43\u0e19\u0e1b": 12, "1967": 12, "\u0e18\u0e44\u0e14": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e40\u0e25": 12, "\u0e22\u0e07\u0e14": 12, "\u0e08\u0e2d\u0e23": 12, "\u0e2a\u0e1b": 12, "\u0e25\u0e40\u0e1a": 12, "\u0e0b\u0e32\u0e23": 12, "\u0e2d\u0e21\u0e32\u0e01": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e14": 12, "\u0e41\u0e25\u0e08\u0e32\u0e01\u0e41\u0e21": 12, "\u0e07\u0e17\u0e33\u0e43\u0e2b": [12, 15], "\u0e01\u0e29\u0e30\u0e14": 12, "\u0e32\u0e19\u0e27": 12, "\u0e41\u0e25\u0e30\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 12, "\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e33\u0e40\u0e23": 12, "\u0e08\u0e01\u0e32\u0e23\u0e28": 12, "\u0e01\u0e29\u0e32\u0e08\u0e32\u0e01\u0e21\u0e2b\u0e32\u0e27": 12, "\u0e17\u0e22\u0e32\u0e25": 12, "\u0e22\u0e41\u0e25": 12, "\u0e19\u0e17\u0e32\u0e07\u0e44\u0e1b\u0e17": 12, "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e2d\u0e2d\u0e2a\u0e40\u0e15\u0e23\u0e40\u0e25": 12, "\u0e01\u0e29\u0e32": 12, "\u0e41\u0e25\u0e30\u0e43\u0e19\u0e0a": 12, "\u0e27\u0e07\u0e19": 12, "\u0e19\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "vipasha": 13, "bansal": 13, "acl2024": 13, "srw": 13, "abstract": 13, "nlu": 13, "droganova": 13, "zeman": 13, "schuster": 13, "2016": 13, "consum": 13, "prefer": 13, "bender": 13, "2015": 13, "paper": 13, "rich": 13, "svc": 13, "clear": 13, "surfac": 13, "form": 13, "fulli": 13, "unifi": 13, "appropri": 13, "copestak": 13, "2005": 13, "featur": 13, "within": 13, "delph": 13, "IN": 13, "slayden": 13, "2009": 13, "increas": [13, 15], "verifi": 13, "coverag": 13, "decreas": 13, "ambigu": 13, "vipashab94": 13, "thaigrammar": 13, "wait": 13, "guid": 13, "wannaphong": 13, "phatthiyaphaibun": 13, "latest": 13, "ac": 13, "sweaglesw": 13, "linguist": 13, "x86": 13, "run_ac": 13, "xvzf": 13, "git": 13, "clone": 13, "pydelphin": 13, "2024": 13, "216": 13, "129": 13, "123": 13, "154": 13, "2001": 13, "1868": 13, "a100": 13, "105": 13, "beae": 13, "c5ff": 13, "fe24": 13, "d767": 13, "2526613": 13, "4m": 13, "applic": [13, 16], "gzip": 13, "41m": 13, "37mb": 13, "english": [13, 15], "postagg": 13, "hmm": 13, "erg": 13, "tdl": 13, "qc": 13, "releas": [13, 15, 16], "remot": 13, "compress": [13, 16], "delta": 13, "reus": 13, "pack": 13, "receiv": 13, "mib": 13, "186": 13, "delphin": 13, "compil": 13, "dat": 13, "execut": [13, 14], "\u0e07\u0e2a": 13, "id": 13, "328": 13, "subj": 13, "322": 13, "bare": 13, "_33142": 13, "327": 13, "comp": 13, "324": 13, "obj": 13, "323": 13, "deic": 13, "tran": 13, "lex": 13, "\u0e44\u0e1b_4158": 13, "326": 13, "\u0e2d_4236": 13, "325": [13, 14], "\u0e2d_4404": 13, "ltop": 13, "h0": 13, "sf": [13, 14], "prop": [13, 16], "rel": 13, "named_rel": 13, "lbl": 13, "h4": 13, "carg": 13, "arg0": 13, "x3": 13, "exist_q_rel": 13, "h6": 13, "rstr": 13, "h7": 13, "h8": 13, "_go_v_1_rel": 13, "h1": 13, "e9": 13, "arg1": 13, "arg2": 13, "x10": 13, "cog": 13, "st": 13, "purpose_rel": 13, "e11": 13, "_buy_v_1_rel": 13, "x12": 13, "_book_n_1_rel": 13, "h13": 13, "h14": 13, "h15": 13, "h16": 13, "hcon": 13, "qeq": 13, "icon": 13, "vp": 13, "flag": 13, "ascor": 13, "603": 13, "598": 13, "\u0e1c\u0e21_4375": 13, "602": 13, "\u0e08\u0e30_33089": 13, "601": 13, "\u0e19_33088": 13, "600": 13, "599": 13, "\u0e04\u0e19_4133": 13, "_4290": 13, "tens": 13, "fut": 13, "pron_rel": 13, "sg": 13, "gend": 13, "speci": 13, "h5": 13, "_be_v_id_rel": 13, "x8": 13, "_person_n_1_rel": 13, "h9": 13, "_good_a_1_rel": 13, "e10": 13, "h11": 13, "h12": 13, "airesearch": [14, 15], "larg": 14, "xlsr": 14, "cu113": 14, "torchvis": 14, "torchaudio": 14, "pytorch": 14, "torch_stabl": 14, "html": 14, "link": 14, "2bcu113": 14, "1821": 14, "834": 14, "43tcmalloc": 14, "alloc": 14, "1147494400": 14, "byte": 14, "0x55bf21ac6000": 14, "0x7faf12d1b615": 14, "0x55bf1efac4cc": 14, "0x55bf1f08c47a": 14, "0x55bf1efaf2": 14, "0x55bf1f0a0e1d": 14, "0x55bf1f022e99": 14, "0x55bf1f01d9ee": 14, "0x55bf1efb0bda": 14, "0x55bf1f022d00": 14, "0x55bf1f01f737": 14, "0x55bf1f0a1c66": 14, "0x55bf1f01edaf": 14, "0x55bf1efb1039": 14, "0x55bf1eff4409": 14, "0x55bf1efafc52": 14, "0x55bf1f022c25": 14, "0x55bf1f01e915": 14, "0x55bf1efb0afa": 14, "0x55bf1f01ec0d": 14, "1055": 14, "37tcmalloc": 14, "1434370048": 14, "0x55bf6611c000": 14, "1336": 14, "39tcmalloc": 14, "1792966656": 14, "0x55bfbb908000": 14, "1691": 14, "38tcmalloc": 14, "2241208320": 14, "01tcmalloc": 14, "1821458432": 14, "0x55bfa7428000": 14, "0x7faf12d1a1e7": 14, "0x55bf1efe2067": 14, "tcmalloc": 14, "2276827136": 14, "0x55c013d3c000": 14, "0x55bf1efb1271": 14, "pillow": 14, "cu111": 14, "onnxruntim": 14, "soundfil": 14, "manylinux_2_12_x86_64": 14, "91": 14, "post1": 14, "895": [14, 16], "manylinux_2_5_x86_64": 14, "596": 14, "flatbuff": 14, "743": 14, "wav2vec2model": 14, "hug": 14, "autotoken": [14, 15], "wav2vec2forctc": 14, "import_huggingface_model": 14, "origin": [14, 16], "from_pretrain": [14, 15], "correspond": 14, "audio": 14, "stabl": 14, "hubert": 14, "configuration_util": 14, "341": 14, "gradient_checkpoint": 14, "v5": [14, 15], "gradient_checkpointing_en": 14, "trainer": [14, 15], "api": 14, "trainingargu": [14, 15], "eval": 14, "mode": 14, "feature_extractor": [14, 15], "featureextractor": 14, "conv_lay": 14, "convlayerblock": 14, "layer_norm": [14, 15], "layernorm": 14, "512": 14, "elementwise_affin": 14, "conv": 14, "conv1d": 14, "kernel_s": 14, "stride": 14, "feature_project": 14, "featureproject": 14, "1024": 14, "pos_conv_emb": 14, "convolutionalpositionalembed": 14, "128": 14, "encoderlay": 14, "attent": 14, "selfattent": 14, "k_proj": 14, "v_proj": 14, "q_proj": 14, "out_proj": 14, "feed_forward": 14, "feedforward": 14, "intermediate_dens": 14, "4096": 14, "intermediate_dropout": 14, "output_dens": 14, "output_dropout": 14, "final_layer_norm": 14, "microsoft": 14, "window": 14, "ai": [14, 15], "ml": 14, "input_s": 14, "100000": 14, "audio_maxlen": 14, "dummy_input": 14, "randn": 14, "requires_grad": 14, "export": 14, "asr3": 14, "export_param": 14, "opset_vers": 14, "do_constant_fold": 14, "whether": 14, "constant": 14, "fold": 14, "input_nam": 14, "modelinput": 14, "output_nam": 14, "modeloutput": 14, "dynamic_ax": 14, "batch_siz": 14, "ax": [14, 15], "symbolic_help": 14, "caus": 14, "incorrect": 14, "dropbox": 14, "9kpeh8eodshcqhj": 14, "common_voice_th_23646850": 14, "wav": 14, "dl": 14, "mv": 14, "json": 14, "co": [14, 15], "r": [14, 16], "sig": 14, "scipi": 14, "wavfil": 14, "sp": 14, "new_rat": 14, "16000": 14, "ort_sess": 14, "inferencesess": 14, "k": [14, 15], "unk": 14, "_normal": 14, "vasudevgupta7": 14, "gsoc": 14, "src": 14, "l101": 14, "fork": [14, 15], "tf": 14, "seqlen": 14, "keepdim": 14, "var": 14, "squeez": 14, "sqrt": 14, "remove_adjac": 14, "3460423": 14, "asr": 14, "wav2vec2_onnx": 14, "ipynb": [14, 16], "sampling_r": 14, "sampl": [14, 16], "new_data": 14, "resampl": 14, "float32": 14, "ort_input": 14, "ort_out": 14, "_t1": 14, "easili": 15, "finetun": 15, "drive": 15, "1kbk6sbspzlwcnoe61adaqo30xxqoq9ko": 15, "scrollto": 15, "n5iacot9b3cf": 15, "specif": [15, 16], "thaixtransform": 15, "236": 15, "106": 15, "safetensor": 15, "fsspec": 15, "355": 15, "seqev": 15, "28115": 15, "d0f182fee94a7c129f5bd1265a3e0d2a52893384d6783d11c8bbd770ef695fac": 15, "2c": 15, "4b": 15, "b2": 15, "a90368d80567249f258a9c58240512046afb5563d794eda4b2": 15, "auto": 15, "camemberttoken": 15, "automodel": 15, "automodelformaskedlm": 15, "automodelforsequenceclassif": 15, "automodelfortokenclassif": 15, "process_transform": 15, "xlmr": 15, "mbert": 15, "downstream": 15, "att": 15, "uncas": 15, "largest": 15, "78": 15, "5gb": 15, "assort": 15, "subword": 15, "xlm": 15, "multilingu": 15, "104": 15, "level": 15, "syllabl": 15, "syllabel": 15, "sefr": 15, "model_nam": [15, 16], "thaiwordsnewmmtoken": 15, "thaiwordssyllabletoken": 15, "fakesefrcuttoken": 15, "thairobertatoken": 15, "public_model": 15, "param": 15, "revis": 15, "model_max_length": 15, "416": 15, "unexpect": 15, "robertatoken": 15, "simplest": 15, "given": 15, "\u0e07\u0e08": 15, "\u0e19\u0e17\u0e23": 15, "\u0e25\u0e40\u0e25\u0e22": 15, "\u0e07\u0e2d\u0e22": 15, "\u0e1a\u0e19\u0e1e": 15, "454": 15, "\u0e02\u0e2d\u0e07\u0e2d\u0e33\u0e40\u0e20\u0e2d\u0e27": 15, "\u0e14\u0e23\u0e30\u0e22\u0e2d\u0e07": 15, "answer": [15, 16], "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e40\u0e21\u0e15\u0e23": 15, "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e27\u0e32": 15, "\u0e44\u0e21\u0e25": 15, "substitut": 15, "instanc": [15, 16], "000": 15, "trane": 15, "proven": 15, "aug": 15, "fill_mask": 15, "fill": 15, "input_text": 15, "u0e02": 15, "u0e2d": 15, "u0e40": 15, "u0e07": 15, "u0e34": 15, "u0e19": 15, "u0e01": 15, "u0e39": 15, "u0e49": 15, "u003cmask": 15, "u0e2b": 15, "u0e48": 15, "u0e22": 15, "\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e21": 15, "\u0e23\u0e30\u0e22\u0e30\u0e17\u0e32\u0e07\u0e17": 15, "\u0e07\u0e2b\u0e21\u0e14": 15, "\u0e08\u0e33\u0e19\u0e27\u0e19\u0e2a\u0e16\u0e32\u0e19": 15, "\u0e2a\u0e16\u0e32\u0e19": 15, "\u0e19\u0e40\u0e2a": 15, "\u0e19\u0e17\u0e32\u0e07\u0e2b\u0e25": 15, "\u0e01\u0e43\u0e19\u0e41\u0e19\u0e27\u0e40\u0e2b\u0e19": 15, "\u0e43\u0e15": 15, "\u0e15\u0e32\u0e21\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07\u0e23\u0e16\u0e44\u0e1f\u0e40\u0e14": 15, "\u0e21\u0e02\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e23\u0e16\u0e44\u0e1f\u0e41\u0e2b": 15, "\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 15, "\u0e32\u0e27\u0e2b\u0e19": 15, "\u0e32\u0e40\u0e19": 15, "\u0e2d\u0e40\u0e23": 15, "\u0e22\u0e01\u0e40\u0e1b": 15, "\u0e19\u0e20\u0e32\u0e29\u0e32": 15, "gy\u016bdon": 15, "\u0e08\u0e30\u0e44\u0e1b\u0e40\u0e1b": 15, "\u0e42\u0e14\u0e14\u0e40\u0e14": 15, "\u0e19\u0e1a\u0e19\u0e1f\u0e32\u0e01\u0e1f": 15, "\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e02\u0e27": 15, "\u0e02\u0e27": 15, "\u0e32\u0e40\u0e2d\u0e32\u0e21\u0e32\u0e14": 15, "\u0e07\u0e43\u0e08\u0e1d": 15, "\u0e04\u0e22\u0e2d\u0e14": 15, "\u0e02\u0e2d\u0e40\u0e07": 15, "\u0e01\u0e14": [15, 16], "allow": 15, "preprocess_input_text": 15, "boolean": 15, "fill_mask_pad": 15, "513759434223175": 15, "4263": 15, "token_str": 15, "\u0e23\u0e32\u0e21": 15, "\u0e23\u0e32\u0e21\u0e2b\u0e19": 15, "05489557236433029": 15, "552": 15, "0474877767264843": 15, "125": 15, "037654660642147064": 15, "5901": 15, "\u0e2a\u0e30\u0e14\u0e27\u0e01": 15, "\u0e2a\u0e30\u0e14\u0e27\u0e01\u0e2b\u0e19": 15, "026551486924290657": 15, "1913": 15, "\u0e19\u0e32": 15, "\u0e19\u0e32\u0e2b\u0e19": 15, "wisesight_senti": 15, "social": 15, "media": 15, "wongnai_review": 15, "awai": [15, 16], "classify_multiclass": 15, "u0e04": 15, "u0e1a": 15, "u0e32": 15, "u0e47": 15, "u0e21": 15, "u0e31": 15, "u0e41": 15, "u0e17": 15, "u0e15": 15, "u0e4c": 15, "u0e25": 15, "u0e303": 15, "u0e27": 15, "u0e14": 15, "u0e42": 15, "u0e23": 15, "u0e30": 15, "u0e1b": 15, "u0e37": 15, "\u0e2d\u0e22\u0e32\u0e01\u0e01": 15, "\u0e19\u0e27\u0e30\u0e41\u0e01": 15, "\u0e2d\u0e21\u0e32\u0e43\u0e2b": 15, "\u0e2d\u0e22\u0e08": 15, "\u0e13\u0e41\u0e01\u0e21\u0e32\u0e01": 15, "\u0e42\u0e04\u0e15\u0e23\u0e1a": 15, "\u0e32\u0e40\u0e25\u0e22": 15, "\u0e1f\u0e2d\u0e23": 15, "\u0e01\u0e15\u0e25\u0e32\u0e14": 15, "\u0e19\u0e40\u0e14": 15, "prachachat": 15, "\u0e15\u0e25\u0e32\u0e14\u0e23\u0e16\u0e22\u0e19\u0e15": 15, "\u0e23\u0e2a\u0e0a\u0e32\u0e40\u0e02": 15, "\u0e22\u0e27\u0e40\u0e02": 15, "\u0e2b\u0e2d\u0e21": 15, "\u0e01\u0e25\u0e21\u0e01\u0e25": 15, "\u0e14\u0e41\u0e1a\u0e1a\u0e08": 15, "\u0e14\u0e2a\u0e19": 15, "\u0e27\u0e19\u0e44\u0e2d\u0e28\u0e04\u0e23": 15, "\u0e17\u0e32\u0e19\u0e41\u0e25": 15, "\u0e27\u0e23\u0e2a\u0e21": 15, "\u0e19\u0e2d\u0e2d\u0e01\u0e43\u0e1a\u0e44\u0e21": 15, "\u0e46\u0e21\u0e32\u0e01\u0e01\u0e27": 15, "\u0e32\u0e0a\u0e32\u0e40\u0e02": 15, "\u0e27\u0e01": 15, "\u0e2b\u0e27\u0e32\u0e19\u0e44\u0e1b": 15, "\u0e42\u0e14\u0e22\u0e23\u0e27\u0e21\u0e41\u0e25": 15, "\u0e27\u0e40\u0e09\u0e22\u0e21\u0e32\u0e01\u0e01": 15, "\u0e33\u0e40\u0e1b\u0e25": 15, "\u0e32\u0e1a\u0e23": 15, "\u0e01\u0e32\u0e23\u0e1f\u0e23": 15, "\u0e40\u0e04\u0e22\u0e1a": 15, "\u0e32\u0e40\u0e2d": 15, "\u0e21\u0e40\u0e04\u0e01": 15, "\u0e1a\u0e41\u0e21": 15, "\u0e25\u0e303": 15, "\u0e42\u0e04\u0e15\u0e23\u0e2b\u0e19": 15, "\u0e01\u0e41\u0e25\u0e30\u0e42\u0e04\u0e15\u0e23\u0e40\u0e1b\u0e25": 15, "\u0e2d\u0e07\u0e07\u0e07\u0e07": 15, "892067551612854": 15, "entiti": 15, "recognit": 15, "classify_token": 15, "ignore_label": 15, "token_classif": 15, "169": 15, "aggregation_strategi": 15, "u0e35": 15, "u0e2a": 15, "u0e38": 15, "u0e44": 15, "\u0e41\u0e14\u0e07\u0e40\u0e14": 15, "\u0e2d\u0e14\u0e23\u0e2d\u0e1a\u0e2a\u0e2d\u0e07": 15, "\u0e01\u0e40\u0e22": 15, "\u0e41\u0e21\u0e19\u0e2f": 15, "\u0e44\u0e19\u0e40\u0e15": 15, "\u0e22\u0e40\u0e2a": 15, "\u0e22\u0e07\u0e2a": 15, "\u0e0d\u0e40\u0e2a": 15, "\u0e22\u0e08\u0e32\u0e01\u0e20": 15, "\u0e22\u0e18\u0e23\u0e23\u0e21\u0e0a\u0e32\u0e15": 15, "\u0e21\u0e32\u0e01\u0e2a": 15, "\u0e17\u0e33\u0e43\u0e2b": 15, "\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19\u0e01\u0e27": 15, "\u0e2d\u0e07\u0e2d\u0e1e\u0e22\u0e1e\u0e2d\u0e2d\u0e01\u0e08\u0e32\u0e01\u0e1e": 15, "\u0e1e\u0e25\u0e02\u0e2d\u0e07\u0e1e\u0e32\u0e22": 15, "\u0e32\u0e19\u0e40\u0e23": 15, "\u0e2d\u0e19\u0e40\u0e01": 15, "\u0e2d\u0e1a": 15, "700": 15, "\u0e07\u0e1e": 15, "\u0e07\u0e16\u0e25": 15, "\u0e21\u0e25\u0e07\u0e21\u0e32": 15, "\u0e32\u0e07\u0e04\u0e27\u0e32\u0e21\u0e40\u0e2a": 15, "\u0e22\u0e2b\u0e32\u0e22\u0e04": 15, "\u0e14\u0e40\u0e1b": 15, "\u0e25\u0e04": 15, "450": 15, "\u0e32\u0e19\u0e2b\u0e22\u0e27\u0e19": 15, "\u0e01\u0e17\u0e0a": 15, "\u0e40\u0e15\u0e23": 15, "\u0e22\u0e21\u0e17\u0e14\u0e25\u0e2d\u0e07\u0e1b\u0e23\u0e30\u0e21": 15, "3\u0e08": 15, "entity_group": 15, "97664016": 15, "99976474": 15, "less": 15, "tradit": 15, "logist": 15, "regress": 15, "forest": 15, "boost": 15, "imag": 15, "mrpeerat": 15, "bramvanroi": 15, "extract_last_k_token": 15, "last_k": 15, "hidden_st": 15, "last_k_token": 15, "concatenated_hidden_st": 15, "sum": 15, "_extract_last_k_lay": 15, "aggregator_fn": 15, "return_tensor": 15, "pt": 15, "no_grad": 15, "output_hidden_st": 15, "select": 15, "hidden": 15, "cat": 15, "aggregated_hidden_st": 15, "extract_last_k_lay": 15, "pretrained_model_name_or_path": 15, "lm_head": 15, "japanes": 15, "food": [15, 16], "gyudon": 15, "italian": 15, "macaroni": 15, "cosin": 15, "consid": 15, "last": 15, "markdown": 15, "obtain": 15, "aggreg": 15, "via": 15, "summat": 15, "represnetaiton": 15, "text1": 15, "\u0e19\u0e0a\u0e2d\u0e1a\u0e01": 15, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e0d": 15, "text2": 15, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2d": 15, "\u0e15\u0e32\u0e40\u0e25": 15, "text3": 15, "text4": 15, "\u0e01\u0e01\u0e30\u0e42\u0e23\u0e19": 15, "t1": 15, "t3": 15, "t4": 15, "pairwis": 15, "cosine_similar": 15, "sim_matrix": 15, "cmap": 15, "blue": 15, "bo": 15, "fanci": 15, "mayb": 15, "march": 15, "wanchanberta": 15, "xnli": 15, "pair": 15, "branch": 15, "xnli_th": 15, "repositori": 15, "zero_classifi": 15, "u0e0d": 15, "u0e0a": 15, "u0e1": 15, "u0e18": 15, "scb": 15, "10x": 15, "u0e43": 15, "blockfi": 15, "startup": 15, "digit": 15, "asset": 15, "u0e13": 15, "u0e10": 15, "u201c": 15, "u201d": 15, "u0e1c": 15, "u0e20": 15, "u0e29": 15, "u201cwher": 15, "u0e08": 15, "u0e16": 15, "u0e1f": 15, "u0e28": 15, "u0e33": 15, "u0e11": 15, "u0e1d": 15, "candidate_label": 15, "\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01": 15, "\u0e23\u0e01": 15, "\u0e01\u0e32\u0e23\u0e40\u0e21": 15, "\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 15, "\u0e25\u0e1b\u0e30": 15, "\u0e19\u0e40\u0e17": 15, "hypothesis_templ": 15, "\u0e1e\u0e32\u0e14\u0e2b": 15, "\u0e27\u0e02": 15, "\u0e32\u0e27\u0e19": 15, "\u0e21\u0e44\u0e1a\u0e40\u0e14\u0e19\u0e2b\u0e32\u0e23": 15, "\u0e1a\u0e0d": 15, "\u0e01\u0e23\u0e30\u0e0a": 15, "\u0e1a\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": 15, "\u0e19\u0e18\u0e21": 15, "34431710839271545": 15, "3195861279964447": 15, "18645761907100677": 15, "14963914453983307": 15, "v0": 16, "word2vec": 16, "oppos": 16, "latter": 16, "garner": 16, "556": 16, "dimens": 16, "descend": 16, "frequenc": 16, "readabl": 16, "vector": 16, "document": 16, "thwiki_lm": 16, "word2vec_exampl": 16, "inlin": 16, "manifold": 16, "tsne": 16, "fm": 16, "load_word2vec_format": 16, "wordvector": 16, "thai2fit_wv": 16, "get_model": 16, "thai2dict": 16, "index2word": 16, "from_dict": 16, "orient": 16, "290": 16, "291": 16, "292": 16, "293": 16, "294": 16, "295": 16, "296": 16, "298": 16, "299": 16, "308956": 16, "097699": 16, "116745": 16, "215612": 16, "015768": 16, "064163": 16, "062168": 16, "039649": 16, "864940": 16, "846904": 16, "142418": 16, "033241": 16, "171581": 16, "624864": 16, "009358": 16, "449131": 16, "120130": 16, "122195": 16, "450617": 16, "071318": 16, "010751": 16, "618971": 16, "129665": 16, "035460": 16, "007560": 16, "027607": 16, "397824": 16, "026543": 16, "254075": 16, "168328": 16, "105786": 16, "180930": 16, "101630": 16, "070885": 16, "037263": 16, "183606": 16, "049088": 16, "672288": 16, "293044": 16, "592576": 16, "015736": 16, "258926": 16, "052953": 16, "153728": 16, "005985": 16, "021081": 16, "041088": 16, "057312": 16, "633230": 16, "442729": 16, "009408": 16, "252576": 16, "305512": 16, "372542": 16, "049151": 16, "568470": 16, "266586": 16, "400800": 16, "784650": 16, "197369": 16, "189711": 16, "174774": 16, "171124": 16, "186771": 16, "054294": 16, "114150": 16, "109456": 16, "094466": 16, "447015": 16, "042377": 16, "168676": 16, "148738": 16, "680404": 16, "097702": 16, "020270": 16, "182967": 16, "083949": 16, "006287": 16, "707434": 16, "070234": 16, "156962": 16, "231863": 16, "080312": 16, "323157": 16, "215695": 16, "055145": 16, "420794": 16, "016842": 16, "256759": 16, "832864": 16, "044267": 16, "147186": 16, "105424": 16, "907078": 16, "009299": 16, "550953": 16, "139337": 16, "031696": 16, "670379": 16, "008048": 16, "428813": 16, "031194": 16, "041922": 16, "036608": 16, "008106": 16, "076470": 16, "782270": 16, "033361": 16, "606864": 16, "440520": 16, "024458": 16, "025031": 16, "103389": 16, "078255": 16, "034323": 16, "459774": 16, "748643": 16, "337775": 16, "487408": 16, "511535": 16, "287710": 16, "064193": 16, "205076": 16, "146356": 16, "071343": 16, "039451": 16, "845461": 16, "163763": 16, "018096": 16, "272786": 16, "051024": 16, "532856": 16, "131856": 16, "090323": 16, "058895": 16, "151262": 16, "420358": 16, "055971": 16, "930814": 16, "163908": 16, "239587": 16, "303620": 16, "079953": 16, "453045": 16, "528826": 16, "161692": 16, "235725": 16, "099673": 16, "691668": 16, "536159": 16, "110436": 16, "297495": 16, "217414": 16, "045158": 16, "066647": 16, "190095": 16, "304333": 16, "724927": 16, "995488": 16, "716609": 16, "120522": 16, "355783": 16, "168180": 16, "377733": 16, "158624": 16, "047249": 16, "361140": 16, "161460": 16, "913314": 16, "345037": 16, "116285": 16, "318218": 16, "356664": 16, "519889": 16, "130475": 16, "125772": 16, "101328": 16, "382658": 16, "205359": 16, "340139": 16, "086848": 16, "155231": 16, "133015": 16, "039913": 16, "183761": 16, "115142": 16, "940854": 16, "066565": 16, "399744": 16, "146722": 16, "019406": 16, "181474": 16, "099863": 16, "516092": 16, "201697": 16, "249139": 16, "252957": 16, "138815": 16, "018209": 16, "232265": 16, "sne": 16, "plane": 16, "thai2plot": 16, "tnse": 16, "n_compon": 16, "init": 16, "pca": 16, "n_iter": 16, "fit_transform": 16, "wb": 16, "jeffmcneil": 16, "dip": 16, "sipa": 16, "regular": 16, "111": 16, "479628": 16, "468k": 16, "octet": 16, "regu": 16, "468": 16, "39k": 16, "stolen": 16, "blog": 16, "manash": 16, "a71e6d55f27": 16, "plot_with_label": 16, "low_dim_emb": 16, "figsiz": 16, "axis_lim": 16, "assert": 16, "figur": 16, "inch": 16, "scatter": 16, "fontproperti": 16, "fname": 16, "xy": 16, "xytext": 16, "textcoord": 16, "offset": 16, "va": 16, "bottom": 16, "savefig": 16, "\u0e2b\u0e0d": 16, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e32": 16, "\u0e0a\u0e32\u0e22": 16, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a": 16, "\u0e19\u0e32\u0e22\u0e01\u0e23": 16, "\u0e10\u0e21\u0e19\u0e15\u0e23": 16, "\u0e2d\u0e33\u0e19\u0e32\u0e08": 16, "\u0e1b\u0e23\u0e30\u0e18\u0e32\u0e19\u0e32\u0e18": 16, "\u0e07\u0e01": 16, "\u0e42\u0e1a\u0e23\u0e32\u0e13": 16, "\u0e44\u0e14\u0e42\u0e19\u0e40\u0e2a\u0e32\u0e23": 16, "most_similar_cosmul": 16, "7954867482185364": 16, "7382755279541016": 16, "\u0e1e\u0e23\u0e30\u0e40\u0e08": 16, "7046602368354797": 16, "\u0e32\u0e0a\u0e32\u0e22": 16, "6979373097419739": 16, "\u0e1e\u0e23\u0e30\u0e21\u0e2b\u0e32\u0e01\u0e29": 16, "6972416639328003": 16, "\u0e32\u0e1f": 16, "\u0e32\u0e2b\u0e0d": 16, "6871017217636108": 16, "\u0e32\u0e41\u0e1c": 16, "6827988624572754": 16, "\u0e1e\u0e23\u0e30\u0e1e": 16, "\u0e17\u0e18\u0e40\u0e08": 16, "671796977519989": 16, "\u0e21\u0e01": 16, "\u0e0e\u0e23\u0e32\u0e0a\u0e01": 16, "\u0e21\u0e32\u0e23": 16, "6711805462837219": 16, "\u0e19\u0e32\u0e22\u0e1e\u0e25": 16, "6694187521934509": 16, "sample_word": 16, "sample_idx": 16, "sample_plot": 16, "\u0e23\u0e2d\u0e07\u0e19\u0e32\u0e22\u0e01\u0e23": 16, "4945054054260254": 16, "400755763053894": 16, "3626699447631836": 16, "\u0e19\u0e40\u0e2d\u0e01": 16, "3437265157699585": 16, "\u0e0d\u0e0a\u0e32\u0e01\u0e32\u0e23\u0e17\u0e2b\u0e32\u0e23\u0e1a\u0e01": 16, "3405414819717407": 16, "\u0e1a\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 16, "3339321613311768": 16, "\u0e01\u0e1f": 16, "\u0e15\u0e1a\u0e2d\u0e25": 16, "331659197807312": 16, "\u0e40\u0e2d\u0e01\u0e2d": 16, "\u0e04\u0e23\u0e23\u0e32\u0e0a\u0e17": 16, "3306005001068115": 16, "3243674039840698": 16, "\u0e20\u0e32\u0e1e\u0e2a\u0e15\u0e23": 16, "3231494426727295": 16, "\u0e15\u0e27": 16, "\u0e07\u0e21": 16, "537461519241333": 16, "\u0e22\u0e07\u0e25": 16, "\u0e27\u0e22\u0e19\u0e21": 16, "5080005526542664": 16, "\u0e41\u0e21\u0e25\u0e07": 16, "5048903226852417": 16, "\u0e1c\u0e25\u0e44\u0e21": 16, "4839756190776825": 16, "47641509771347046": 16, "46431201696395874": 16, "45941096544265747": 16, "45185261964797974": 16, "4504697620868683": 16, "44425833225250244": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e0a": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2a": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e22": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e01\u0e25\u0e32\u0e07\u0e27": 16, "wherea": 16, "meal": 16, "\u0e25\u0e32\u0e01": 16, "push": 16, "rest": 16, "eat": 16, "reli": 16, "\u0e01\u0e40\u0e02\u0e22": 16, "associ": 16, "male": 16, "gender": 16, "\u0e2b\u0e21\u0e32": 16, "\u0e2b\u0e21\u0e2d": 16, "china": 16, "beij": 16, "itali": 16, "rome": 16, "\u0e42\u0e23\u0e21": 16, "\u0e15\u0e32\u0e25": 16, "3135956": 16, "42819628": 16, "27347285": 16, "17900795": 16, "02666693": 16, "24352394": 16, "\u0e42\u0e15\u0e40\u0e01": 16, "contribut": 16, "sakar": 16, "atv": 16, "adapt": 16, "spell": 16, "cpmp": 16, "w_rank": 16, "thai_lett": 16, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e24\u0e45\u0e25\u0e26\u0e26\u0e45\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e30": 16, "\u0e40\u0e41\u0e42\u0e43\u0e44": 16, "findal": 16, "lower": 16, "invers": 16, "proxi": 16, "dictionari": 16, "max": 16, "candid": 16, "edits1": 16, "edits2": 16, "subset": 16, "appear": 16, "delet": 16, "transpos": 16, "replac": 16, "insert": 16, "e1": 16, "\u0e14\u0e19\u0e32": 16, "\u0e12\u0e19\u0e32": 16, "\u0e02\u0e23": 16, "\u0e08\u0e22": 16, "\u0e19\u0e30\u0e04": 16}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"welcom": 0, "pythainlp": [0, 1, 4, 5, 7, 10], "tutori": 0, "han": 1, "coref": 1, "thai": [1, 2, 3, 4, 6, 7, 12, 13, 14], "corefer": 1, "resolut": 1, "depend": 2, "parser": [2, 6], "find": 3, "all": 3, "rhyme": 3, "word": [3, 7, 16], "from": 3, "translat": 4, "instal": [4, 5, 13, 14, 15], "import": [4, 7, 16], "list": 4, "languag": [4, 9, 11, 12], "english": 4, "nlpo3": 5, "dictionari": [5, 7], "custom": [5, 7], "chunk": 6, "get": [7, 15], "start": [7, 15], "charact": 7, "check": 7, "string": 7, "contain": 7, "how": 7, "mani": 7, "collat": 7, "date": 7, "time": 7, "format": 7, "spellout": 7, "token": [7, 15], "segment": 7, "sentenc": 7, "subword": [7, 8], "syllabl": 7, "cluster": 7, "tcc": 7, "low": 7, "level": 7, "oper": 7, "transliter": 7, "normal": 7, "digit": 7, "convers": 7, "soundex": 7, "spellcheck": [7, 16], "frequenc": 7, "part": [7, 8], "speech": [7, 8], "tag": 7, "name": [7, 8], "entiti": [7, 8], "vector": [7, 15], "number": 7, "spell": 7, "out": 7, "wangchanberta": [8, 15], "recognit": 8, "wisesight": [9, 15], "sentiment": [9, 15], "analysi": 9, "text": [9, 12, 15], "processor": 9, "logist": 9, "regress": 9, "process": 9, "file": 9, "csv": 9, "load": 9, "data": 9, "train": 9, "valid": 9, "split": 9, "creat": 9, "featur": [9, 15], "fit": 9, "model": [9, 11, 12, 14, 15], "see": 9, "result": 9, "ulmfit": [9, 11], "finetun": [9, 11], "classifi": [9, 15], "spaci": 10, "wongnai": [11, 15], "review": [11, 15], "classif": [11, 15], "oversampl": 11, "fasttext": 11, "linearsvc": 11, "submiss": 11, "wiki": 12, "gener": 12, "semant": 13, "represent": 13, "automat": 13, "deriv": 13, "serial": 13, "verb": 13, "construct": 13, "A": 13, "grammar": 13, "base": 13, "approach": 13, "usag": 13, "wav2vec2": 14, "onnx": 14, "build": 14, "infer": 14, "notebook": 15, "choos": 15, "pretrain": 15, "mask": 15, "predict": 15, "sequenc": 15, "multi": 15, "class": 15, "thainer": 15, "lst20": 15, "document": 15, "extract": 15, "zero": 15, "shot": 15, "thai2vec": 16, "embed": 16, "exampl": 16, "arithmet": 16, "doesn": 16, "t": 16, "match": 16, "cosin": 16, "similar": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"Welcome to PyThaiNLP Tutorials": [[0, "welcome-to-pythainlp-tutorials"]], "Tutorials:": [[0, null]], "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP": [[1, "\ud83e\udebf-Han-Coref:-Thai-Coreference-resolution-by-PyThaiNLP"]], "Thai Dependency Parser": [[2, "Thai-Dependency-Parser"]], "Find all Thai rhyming words from Thai word": [[3, "Find-all-Thai-rhyming-words-from-Thai-word"]], "PyThaiNLP Translate": [[4, "PyThaiNLP-Translate"]], "Install": [[4, "Install"], [13, "Install"], [14, "Install"]], "Translate": [[4, "Translate"]], "Import": [[4, "Import"]], "List language": [[4, "List-language"]], "English to Thai": [[4, "English-to-Thai"]], "Thai to English": [[4, "Thai-to-English"]], "nlpO3": [[5, "nlpO3"]], "Installation": [[5, "Installation"], [15, "Installation"]], "PyThaiNLP dictionary": [[5, "PyThaiNLP-dictionary"]], "Custom dictionary": [[5, "Custom-dictionary"]], "Thai Chunk Parser": [[6, "Thai-Chunk-Parser"]], "PyThaiNLP Get Started": [[7, "PyThaiNLP-Get-Started"]], "Import PyThaiNLP": [[7, "Import-PyThaiNLP"]], "Thai Characters": [[7, "Thai-Characters"]], "Checking if a string contains Thai character or not, or how many": [[7, "Checking-if-a-string-contains-Thai-character-or-not,-or-how-many"]], "Collation": [[7, "Collation"]], "Date/Time Format and Spellout": [[7, "Date/Time-Format-and-Spellout"]], "Date/Time Format": [[7, "Date/Time-Format"]], "Time Spellout": [[7, "Time-Spellout"]], "Tokenization and Segmentation": [[7, "Tokenization-and-Segmentation"]], "Sentence": [[7, "Sentence"]], "Word": [[7, "Word"]], "Subword, syllable, and Thai Character Cluster (TCC)": [[7, "Subword,-syllable,-and-Thai-Character-Cluster-(TCC)"]], "Subword tokenization": [[7, "Subword-tokenization"]], "Syllable tokenization": [[7, "Syllable-tokenization"]], "Low-level subword operations": [[7, "Low-level-subword-operations"]], "Transliteration": [[7, "Transliteration"]], "Normalization": [[7, "Normalization"]], "Digit conversion": [[7, "Digit-conversion"]], "Soundex": [[7, "Soundex"]], "Spellchecking": [[7, "Spellchecking"], [16, "Spellchecking"]], "Spellchecking - Custom dictionary and word frequency": [[7, "Spellchecking---Custom-dictionary-and-word-frequency"]], "Part-of-Speech Tagging": [[7, "Part-of-Speech-Tagging"]], "Named-Entity Tagging": [[7, "Named-Entity-Tagging"]], "Word Vector": [[7, "Word-Vector"]], "Number Spell Out": [[7, "Number-Spell-Out"]], "Wangchanberta": [[8, "Wangchanberta"]], "Named Entity Recognition": [[8, "Named-Entity-Recognition"]], "Part of speech": [[8, "Part-of-speech"]], "Subword": [[8, "Subword"]], "Wisesight Sentiment Analysis": [[9, "Wisesight-Sentiment-Analysis"]], "Text Processor for Logistic Regression": [[9, "Text-Processor-for-Logistic-Regression"]], "Process Text Files to CSVs": [[9, "Process-Text-Files-to-CSVs"]], "Load Data": [[9, "Load-Data"]], "Train-validation Split": [[9, "Train-validation-Split"]], "Logistic Regression": [[9, "Logistic-Regression"]], "Create Features": [[9, "Create-Features"]], "Fit Model": [[9, "Fit-Model"]], "See Results": [[9, "See-Results"], [9, "id1"]], "ULMFit Model": [[9, "ULMFit-Model"], [11, "ULMFit-Model"]], "Finetune Language Model": [[9, "Finetune-Language-Model"], [11, "Finetune-Language-Model"]], "Train Text Classifier": [[9, "Train-Text-Classifier"]], "spaCy-PyThaiNLP": [[10, "spaCy-PyThaiNLP"]], "Wongnai Review Classification": [[11, "Wongnai-Review-Classification"]], "Oversampling": [[11, "Oversampling"]], "fastText Model": [[11, "fastText-Model"]], "LinearSVC Model": [[11, "LinearSVC-Model"]], "Classification": [[11, "Classification"]], "Submission": [[11, "Submission"]], "Thai Wiki Language Model for Text Generation": [[12, "Thai-Wiki-Language-Model-for-Text-Generation"]], "Thai Semantic Representation": [[13, "Thai-Semantic-Representation"]], "Automatic Derivation of Semantic Representations for Thai Serial Verb Constructions: A Grammar-Based Approach": [[13, "Automatic-Derivation-of-Semantic-Representations-for-Thai-Serial-Verb-Constructions:-A-Grammar-Based-Approach"]], "Usage": [[13, "Usage"]], "Thai Wav2vec2 model to ONNX model": [[14, "Thai-Wav2vec2-model-to-ONNX-model"]], "Build ONNX Model": [[14, "Build-ONNX-Model"]], "Inference": [[14, "Inference"]], "WangchanBERTa: Getting Started Notebook": [[15, "WangchanBERTa:-Getting-Started-Notebook"]], "Choose Pretrained Model": [[15, "Choose-Pretrained-Model"]], "Masked Token Prediction": [[15, "Masked-Token-Prediction"]], "Sequence Classification": [[15, "Sequence-Classification"]], "Pretrained Multi-class Classifiers - Wisesight Sentiment and Wongnai Reviews": [[15, "Pretrained-Multi-class-Classifiers---Wisesight-Sentiment-and-Wongnai-Reviews"]], "Token Classification": [[15, "Token-Classification"]], "Pretrained Token Classifiers - ThaiNER and LST20": [[15, "Pretrained-Token-Classifiers---ThaiNER-and-LST20"]], "Document Vectors": [[15, "Document-Vectors"]], "Feature Extraction": [[15, "Feature-Extraction"]], "Zero-shot Text Classification": [[15, "Zero-shot-Text-Classification"]], "Thai2Vec Embeddings Examples": [[16, "Thai2Vec-Embeddings-Examples"]], "Imports": [[16, "Imports"]], "Word Arithmetic": [[16, "Word-Arithmetic"]], "Doesn\u2019t Match": [[16, "Doesn't-Match"]], "Cosine Similarity": [[16, "Cosine-Similarity"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["index", "notebooks/Han-Coref", "notebooks/Thai_Dependency_Parser", "notebooks/find_all_thai_rhyming_words", "notebooks/machine_translation", "notebooks/nlpo3ipynb", "notebooks/pythainlp_chunk", "notebooks/pythainlp_get_started", "notebooks/pythainlp_wangchanberta", "notebooks/sentiment_analysis", "notebooks/spaCy_PyThaiNLP_demo", "notebooks/text_classification", "notebooks/text_generation", "notebooks/thai_semantic_representation", "notebooks/thai_wav2vec2_onnx", "notebooks/wangchanberta_getting_started_aireseach", "notebooks/word2vec_examples"], "filenames": ["index.rst", "notebooks/Han-Coref.ipynb", "notebooks/Thai_Dependency_Parser.ipynb", "notebooks/find_all_thai_rhyming_words.ipynb", "notebooks/machine_translation.ipynb", "notebooks/nlpo3ipynb.ipynb", "notebooks/pythainlp_chunk.ipynb", "notebooks/pythainlp_get_started.ipynb", "notebooks/pythainlp_wangchanberta.ipynb", "notebooks/sentiment_analysis.ipynb", "notebooks/spaCy_PyThaiNLP_demo.ipynb", "notebooks/text_classification.ipynb", "notebooks/text_generation.ipynb", "notebooks/thai_semantic_representation.ipynb", "notebooks/thai_wav2vec2_onnx.ipynb", "notebooks/wangchanberta_getting_started_aireseach.ipynb", "notebooks/word2vec_examples.ipynb"], "titles": ["Welcome to PyThaiNLP Tutorials", "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP", "Thai Dependency Parser", "Find all Thai rhyming words from Thai word", "PyThaiNLP Translate", "nlpO3", "Thai Chunk Parser", "PyThaiNLP Get Started", "Wangchanberta", "Wisesight Sentiment Analysis", "spaCy-PyThaiNLP", "Wongnai Review Classification", "Thai Wiki Language Model for Text Generation", "Thai Semantic Representation", "Thai Wav2vec2 model to ONNX model", "WangchanBERTa: Getting Started Notebook", "Thai2Vec Embeddings Examples"], "terms": {"i": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "python": [0, 2, 3, 4, 5, 6, 8, 10, 14, 15], "librari": [0, 5, 6], "thai": [0, 5, 8, 9, 10, 11, 15, 16], "natur": [0, 5], "languag": [0, 5, 7, 8, 15], "process": [0, 5, 7, 11, 14, 16], "han": 0, "coref": 0, "corefer": 0, "resolut": [0, 12], "depend": [0, 9, 10, 15], "parser": 0, "find": [0, 7, 9], "all": [0, 4, 7, 11, 12, 13, 15, 16], "rhyme": 0, "word": [0, 5, 6, 9, 10, 11, 12, 15], "from": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "translat": 0, "nlpo3": 0, "chunk": 0, "get": [0, 2, 9, 11, 12, 13, 16], "start": [0, 1, 2, 12], "wangchanberta": 0, "wisesight": [0, 11], "sentiment": [0, 11], "analysi": [0, 13, 15], "spaci": [0, 1, 2], "wongnai": 0, "review": 0, "classif": [0, 7, 9, 16], "wiki": [0, 11, 13, 15], "model": [0, 4, 6, 7, 8, 10, 16], "text": [0, 1, 4, 5, 6, 7, 11, 14, 16], "gener": [0, 6, 7, 15, 16], "semant": 0, "represent": [0, 7], "wav2vec2": 0, "onnx": 0, "notebook": [0, 8, 9, 12, 13, 14], "thai2vec": 0, "embed": [0, 9, 11, 12], "exampl": [0, 6, 7, 9, 15], "apach": 0, "softwar": 0, "licens": [0, 13], "2": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "maintain": 0, "team": 0, "see": [0, 7, 11, 12, 15, 16], "sourc": [0, 2, 10], "code": [0, 7, 11, 14, 16], "http": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "github": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "com": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "main": [0, 7, 12, 14, 15], "develop": [0, 12, 15], "websit": 0, "org": [0, 2, 4, 8, 10, 13, 14, 15], "interact": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "onlin": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "pip": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "q": [1, 9, 12, 13], "instal": [1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 16], "fastcoref": 1, "transform": [1, 8, 9, 10, 12, 14, 15], "sentencepiec": [1, 4, 8, 15], "prepar": [1, 5, 10, 13, 15], "metadata": [1, 2, 8, 10, 13, 14, 15], "setup": [1, 2, 4, 7, 8, 10, 13, 15], "py": [1, 2, 4, 7, 8, 9, 10, 13, 14, 15, 16], "done": [1, 2, 4, 7, 8, 10, 12, 13, 15], "13": [1, 2, 4, 7, 8, 9, 10, 11, 13, 14, 15, 16], "4": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "mb": [1, 3, 5, 6, 10, 13, 14, 15, 16], "114": [1, 15, 16], "": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "eta": [1, 3, 6, 10, 11, 13, 14, 15], "00": [1, 3, 4, 6, 7, 9, 10, 13, 14, 15], "7": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "119": 1, "474": 1, "6": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "kb": [1, 3, 5, 6, 10, 13, 14, 15, 16], "53": [1, 7, 11, 13, 14], "110": [1, 5, 15, 16], "5": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16], "14": [1, 4, 5, 7, 8, 9, 10, 11, 14, 15, 16], "9": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "212": 1, "25": [1, 2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "134": 1, "3": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "17": [1, 2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "90": 1, "224": 1, "29": [1, 4, 7, 8, 9, 11, 15, 16], "8": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "95": [1, 4, 8], "268": 1, "32": [1, 7, 9, 10, 11, 15], "149": 1, "19": [1, 2, 3, 4, 7, 8, 9, 10, 14, 16], "build": [1, 2, 4, 8, 10, 13, 15], "wheel": [1, 2, 4, 8, 10, 13, 15], "import": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15], "spacy_compon": 1, "nlp": [1, 2, 10], "blank": [1, 10], "th": [1, 4, 9, 10, 11, 12, 14, 16], "add_pip": [1, 10], "config": [1, 9, 10, 11, 12, 13, 14], "model_architectur": 1, "fcoref": 1, "model_path": [1, 9, 11, 16], "v1": 1, "lt": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15], "fastcorefresolv": 1, "0x7fbd9c2b6560": 1, "gt": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16], "random": [1, 9, 15], "def": [1, 6, 7, 14, 15, 16], "get2tag": 1, "tag": [1, 6, 8, 10, 12], "titl": [1, 15], "none": [1, 2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "dic_ent": 1, "ent": [1, 10], "_tag": 1, "str": [1, 7], "list": [1, 3, 5, 6, 7, 10, 14, 16], "rang": [1, 11, 15, 16], "len": [1, 3, 7, 9, 11, 12, 14, 16], "enumer": [1, 6, 13, 16], "e": [1, 7, 10, 13, 15], "append": [1, 3, 11, 14, 16], "end": [1, 7, 12, 15], "label": [1, 9, 11, 15, 16], "color": 1, "join": [1, 9, 11, 14], "choic": [1, 12], "0123456789abcdef": 1, "j": [1, 7, 14], "thank": 1, "stackoverflow": [1, 14], "50218895": 1, "return": [1, 6, 7, 14, 15, 16], "displaci": [1, 10], "\u0e2a\u0e32\u0e18": 1, "\u0e15": [1, 3, 5, 7, 8, 9, 11, 15, 16], "\u0e41\u0e08\u0e07\u0e27": 1, "\u0e19": [1, 7, 9, 10, 11, 12, 13, 15, 16], "\u0e20\u0e32\u0e1e\u0e41\u0e04\u0e1b\u0e01\u0e25": 1, "\u0e21\u0e44\u0e25\u0e19": 1, "\u0e17": [1, 3, 7, 8, 9, 10, 11, 12, 15, 16], "\u0e1b\u0e23": [1, 7, 11, 12], "\u0e01\u0e29\u0e32\u0e2f": 1, "\u0e01\u0e25": [1, 3, 9, 11], "\u0e32\u0e27\u0e23": 1, "\u0e32\u0e22": [1, 7, 9, 11], "\u0e1e": [1, 3, 7, 9, 11, 12, 15, 16], "\u0e18\u0e32": 1, "\u0e22": [1, 7, 8, 9, 11, 12, 15, 16], "\u0e44\u0e21": [1, 7, 8, 9, 11, 15, 16], "\u0e43\u0e0a": [1, 7, 9], "\u0e27\u0e40\u0e2d\u0e07": [1, 9], "\u0e41\u0e15": [1, 7, 8, 9, 11, 15], "\u0e40\u0e2b": [1, 9, 11], "\u0e19\u0e14": [1, 7, 9, 16], "\u0e27\u0e22\u0e27": 1, "\u0e32\u0e2d\u0e20": 1, "\u0e1b\u0e23\u0e32\u0e22\u0e14": 1, "\u0e2d\u0e22\u0e04": 1, "\u0e32\u0e1a\u0e33\u0e19\u0e32\u0e0d": 1, "\u0e02\u0e23\u0e01": 1, "doc": [1, 2, 10, 11, 13, 14], "_": [1, 7, 9, 11, 15], "coref_clust": 1, "render": [1, 7, 10], "manual": [1, 13], "true": [1, 7, 8, 9, 10, 11, 12, 14, 15, 16], "style": [1, 10], "option": [1, 10, 12, 13], "jupyt": [1, 10], "\u0e41\u0e21": [1, 9, 11, 15], "\u0e2a": [1, 7, 9, 11, 12, 13, 15, 16], "\u0e07\u0e43\u0e2b": 1, "\u0e25": [1, 3, 7, 11, 15, 16], "\u0e01\u0e0a\u0e32\u0e22\u0e44\u0e1b\u0e0b": 1, "\u0e2d\u0e02\u0e2d\u0e07": [1, 12], "\u0e40\u0e18\u0e2d\u0e01\u0e25": 1, "\u0e1a\u0e25": 1, "\u0e21\u0e40\u0e2d\u0e32\u0e15": 1, "\u0e01": [1, 3, 6, 7, 9, 11, 12, 15, 16], "\u0e01\u0e0a\u0e32\u0e22": 1, "\u0e44\u0e1b\u0e0b": 1, "\u0e40\u0e18\u0e2d": [1, 9], "\u0e2b\u0e21\u0e2d\u0e41\u0e0a\u0e21\u0e1b": 1, "\u0e40\u0e1b": [1, 7, 8, 9, 10, 11, 12, 13, 15, 16], "\u0e14\u0e43\u0e08\u0e17": 1, "\u0e07\u0e19": [1, 7, 11], "\u0e33\u0e15\u0e32": 1, "\u0e40\u0e2a": [1, 7, 11], "\u0e22\u0e43\u0e08\u0e17": 1, "\u0e01\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "\u0e23": [1, 3, 7, 9, 10, 11, 12, 13, 15, 16], "\u0e01\u0e20": 1, "\u0e21": [1, 7, 8, 9, 11, 12, 15, 16], "\u0e43\u0e08\u0e17": 1, "\u0e01\u0e40\u0e2a": 1, "\u0e22\u0e2a\u0e25\u0e30": 1, "\u0e43\u0e2b": [1, 7, 9, 11], "\u0e2d\u0e0a": 1, "\u0e0a": [1, 3, 7, 8, 9, 11, 12, 16], "\u0e1e\u0e0a": 1, "\u0e27\u0e22\u0e40\u0e1e": 1, "\u0e2d\u0e19\u0e17\u0e2b\u0e32\u0e23\u0e23\u0e2d\u0e14": 1, "\u0e27\u0e40\u0e2d\u0e07\u0e40\u0e2a": 1, "\u0e22\u0e0a": [1, 7], "\u0e27": [1, 7, 9, 11, 12, 15, 16], "\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "pythainlp": [2, 3, 6, 8, 9, 11, 12, 13, 14, 15, 16], "doe": [2, 12], "come": [2, 5, 12], "instead": [2, 10, 14, 15], "you": [2, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16], "can": [2, 7, 10, 12, 13, 15, 16], "us": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "which": [2, 5, 6, 7, 9, 12, 14, 16], "wa": [2, 12, 15, 16], "train": [2, 6, 10, 11, 12, 14, 15, 16], "univers": 2, "thi": [2, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16], "tutori": [2, 5, 6, 14, 16], "show": [2, 9, 12, 14, 15], "how": [2, 5, 6, 12, 13, 14, 15, 16], "spacy_thai": [2, 10], "collect": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16], "download": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15, 16], "file": [2, 4, 5, 8, 13, 14, 16], "pythonhost": [2, 4, 8], "packag": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "ca": [2, 10], "2d": [2, 16], "c2e71a4143d6d9cd9db6744e328dfb9f65b98ad7607644d0ad4369bce303": 2, "py3": [2, 4, 8, 10, 14, 15], "ani": [2, 4, 7, 8, 10, 12, 14, 15], "whl": [2, 3, 4, 6, 8, 10, 14, 15], "1mb": [2, 8], "11": [2, 4, 7, 8, 9, 10, 11, 14, 15, 16], "2mb": [2, 4, 8], "ufal": [2, 10], "udpip": [2, 10], "e5": 2, "72": [2, 9, 15], "2b8b9dc7c80017c790bb3308bbad34b57accfed2ac2f1f4ab252ff4e9cb2": 2, "tar": [2, 4, 8, 10, 13, 15], "gz": [2, 4, 8, 10, 13, 15], "304kb": 2, "307kb": 2, "45": [2, 7, 10, 11], "8mb": [2, 8], "requir": [2, 3, 4, 6, 7, 8, 10, 14, 15], "alreadi": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15], "satisfi": [2, 3, 4, 6, 8, 10, 14, 15], "usr": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "local": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "lib": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "python3": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "dist": [2, 3, 4, 6, 8, 9, 10, 14, 15, 16], "deplaci": [2, 10], "58": [2, 5, 7], "87b6286c9578fc456de1363f877228ee0d117b8de238e3e2cd49dbc06eaa": 2, "c1": 2, "09": 2, "1215cb6f6ef0cfc9dbb427a961fda8a47c111955f782f659ca2d38c79adc": 2, "10": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16], "6mb": [2, 8], "28": [2, 10, 15, 16], "7mb": [2, 4], "srsly": [2, 10], "request": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15, 16], "23": [2, 4, 7, 8, 9, 10, 14, 15, 16], "thinc": [2, 10], "presh": [2, 10], "wasabi": [2, 10], "plac": 2, "cymem": [2, 10], "bli": [2, 10], "tqdm": [2, 4, 6, 8, 9, 10, 11, 12, 14, 15], "38": [2, 7, 8, 10], "41": [2, 4, 7, 8, 9], "murmurhash": [2, 10], "numpi": [2, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "15": [2, 3, 4, 7, 8, 9, 10, 11, 12, 14, 15, 16], "catalogu": [2, 10], "setuptool": [2, 10], "54": [2, 7, 8, 14], "tinydb": [2, 4, 8, 14], "af": [2, 8], "cd": [2, 8, 9], "1ce3d93818cdeda0446b8033d21e5f32daeb3a866bbafd878a9a62058a9c": [2, 8], "crfsuit": [2, 3, 4, 6, 8, 10, 14], "79": [2, 4, 7, 8, 9, 10], "47": [2, 7, 8, 9, 15], "58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429": [2, 8], "python_crfsuit": [2, 3, 6, 8, 10, 14], "cp37": [2, 4, 8, 14], "cp37m": [2, 4, 8, 14], "manylinux1_x86_64": [2, 4, 8, 14], "743kb": [2, 8], "747kb": [2, 8], "68": [2, 7, 14], "5mb": [2, 4], "chardet": [2, 4, 8, 10, 14], "urllib3": [2, 3, 4, 6, 8, 10, 14, 15], "26": [2, 4, 8, 10, 11, 14, 15, 16], "21": [2, 3, 4, 6, 7, 8, 9, 10, 14, 15, 16], "24": [2, 4, 7, 8, 10, 14, 15, 16], "certifi": [2, 3, 4, 6, 8, 10, 14, 15], "2017": [2, 3, 4, 6, 8, 10, 14, 15], "2020": [2, 4, 8], "12": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "idna": [2, 3, 4, 6, 8, 10, 14, 15], "importlib": [2, 4, 8, 14], "20": [2, 4, 7, 8, 9, 10, 11, 14, 15, 16], "python_vers": [2, 4, 8], "34": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "type": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16], "extens": [2, 4, 8, 9, 10, 14, 15], "zipp": [2, 4, 8, 14], "creat": [2, 4, 7, 8, 10, 13, 15, 16], "filenam": [2, 4, 8, 10, 14, 15, 16], "linux_x86_64": [2, 10, 14], "size": [2, 4, 8, 10, 12, 15, 16], "5626703": 2, "sha256": [2, 4, 8, 10, 15], "a58565fc21a1f9d3a7c51a3aea138cf612babbefb36ae05cbaccec852b55d967": 2, "store": [2, 4, 8, 10, 14, 15], "directori": [2, 4, 8, 10, 15], "root": [2, 4, 8, 10, 12, 15], "cach": [2, 4, 8, 10, 15], "0c": 2, "9d": 2, "db": 2, "6d3404c33da5b7adb6c6972853efb6a27649d3ba15f7e9bebb": 2, "successfulli": [2, 3, 4, 5, 6, 8, 10, 14, 15], "built": [2, 4, 8, 10, 15], "load": [2, 5, 10, 11, 12, 14, 15, 16], "do": [2, 7, 9, 11, 12, 15, 16], "pars": [2, 6, 10, 13], "call": [2, 5, 6, 7, 14, 15], "sentenc": [2, 5, 6, 10, 15, 16], "\u0e1e\u0e27\u0e01\u0e40\u0e23\u0e32\u0e43\u0e0a": 2, "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": [2, 5], "visual": [2, 6, 9, 12, 16], "tree": [2, 6, 13], "graphviz": 2, "dot": 2, "pre": [3, 5, 7, 16], "0b4": 3, "22": [3, 4, 5, 6, 7, 8, 10, 11, 14, 15, 16], "31": [3, 6, 7, 9, 14, 15, 16], "charset": [3, 6, 15], "normal": [3, 6, 14, 15], "2023": [3, 6, 15], "cp310": [3, 6, 15], "manylinux_2_17_x86_64": [3, 6, 10, 14, 15], "manylinux2014_x86_64": [3, 4, 6, 8, 10, 14, 15], "993": [3, 6, 15], "16": [3, 7, 8, 9, 11, 14, 16], "corpu": [3, 4, 5, 6, 7, 9, 10, 11, 16], "thai_word": [3, 7], "token": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 16], "syllable_token": [3, 7], "all_thai_words_dict": 3, "18": [3, 4, 7, 9, 11, 14, 16], "khave": 3, "khaveeverifi": 3, "kv": 3, "39": [3, 5, 7, 8, 9, 10, 11, 12, 13, 15, 16], "\u0e40\u0e17\u0e2d\u0e0d": 3, "\u0e08": [3, 7, 9, 11, 12, 15, 16], "\u0e1a": [3, 7, 9, 10, 11, 12, 15, 16], "list_sumpu": 3, "try": [3, 5, 7, 10, 12, 15], "is_sumpu": 3, "except": [3, 12], "pass": [3, 7, 14, 16], "print": [3, 4, 7, 9, 11, 12, 16], "\u0e2d": [3, 7, 9, 11, 12, 13, 15, 16], "\u0e1f": [3, 7], "\u0e16": [3, 7, 9, 11], "\u0e2b\u0e25": [3, 6, 12, 15], "\u0e17\u0e27": 3, "\u0e1b": [3, 7, 9, 11, 15, 16], "\u0e07": [3, 7, 9, 11, 12, 15, 16], "\u0e2b": [3, 11], "\u0e04": [3, 7, 8, 9, 11, 12, 15], "\u0e2b\u0e19": [3, 7, 9, 13, 15], "\u0e04\u0e23": [3, 5, 7, 9, 11, 12], "we": [4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16], "machin": 4, "The": [4, 6, 7, 9, 10, 11, 13, 15, 16], "vistec": [4, 15], "depa": 4, "thailand": 4, "artifici": 4, "intellig": [4, 12], "research": [4, 10, 15], "institut": 4, "fairseq": 4, "ab": 4, "92c6efb05ffdfe16fbdc9e463229d9af8c3b74dc943ed4b4857a87b223c2": 4, "dataclass": 4, "2f": 4, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 4, "cython": 4, "hydra": 4, "core": [4, 10], "52": [4, 7], "e3": [4, 10], "fbd70dd0d3ce4d1d75c22d56c0c9f895cfa7ed6587a9ffb821d6812d6a60": 4, "hydra_cor": 4, "123kb": 4, "133kb": 4, "cffi": [4, 14], "sacrebleu": 4, "7e": 4, "57": [4, 7, 9, 10, 11], "0c7ca4e31a126189dab99c19951910bd081dea5bbd25f24b77107750eae7": 4, "54kb": 4, "61kb": 4, "3mb": [4, 8], "torch": [4, 9, 10, 12, 14, 15], "cu101": 4, "regex": [4, 6, 8, 10, 14, 15], "2019": [4, 7, 8, 10, 13, 14, 15], "omegaconf": 4, "d0": 4, "eb": [4, 10], "9d63ce09dd8aa85767c65668d5414958ea29648a0eec80a4a7d311ec2684": 4, "antlr4": 4, "runtim": 4, "56": [4, 7, 9], "02": [4, 9, 11, 12], "789a0bddf9c9b31b14c3e79ec22b9656185a803dc31c15f006f9855ece0d": 4, "112kb": 4, "4mb": [4, 8], "resourc": [4, 14], "pycpars": [4, 14], "portalock": 4, "89": [4, 10], "a6": 4, "3814b7107e0788040870e8825eebf214d72166adf656ba7d4bf14759a06a": 4, "py2": [4, 10], "pyyaml": [4, 10, 14, 15], "7a": 4, "a5": 4, "393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a": 4, "636kb": 4, "645kb": 4, "0mb": [4, 8], "antlr4_python3_runtim": 4, "141231": 4, "7443fbcc47b93d3b320b897cf91d8b947b6fdc6a0795dcce01ed16fd31c8ab6d": 4, "e2": [4, 13, 16], "fa": 4, "b78480b448b8579ddf393bebd3f47ee23aa84c89b6a78285c8": 4, "found": [4, 5, 10, 14, 16], "exist": [4, 10, 14], "uninstal": [4, 10, 14], "sacremos": [4, 8, 14], "43": [4, 7, 8, 9, 10, 11, 13, 15], "f5": [4, 8], "99": [4, 8, 9, 11, 12], "e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577": [4, 8], "click": [4, 6, 8, 10, 14], "joblib": [4, 6, 8, 10, 14], "six": [4, 8, 9, 10, 14], "archiv": [4, 8, 9, 11, 16], "dev": [4, 5, 7, 9, 10, 11, 15, 16], "zip": [4, 8, 9, 11, 16], "upgrad": 4, "dev0": [4, 8], "11003566": 4, "b64ebc4010c51f2644c15473edd0c49540644725a367c28baa0d3f3e19edcccb": 4, "tmp": 4, "ephem": 4, "zkojv2_o": 4, "4e": 4, "1e": [4, 9, 11, 14], "26f3198c6712ecfbee92928ed1dde923a078da3d222401cc78": 4, "download_model_al": 4, "scb_1m_en": 4, "th_mose": 4, "100": [4, 5, 7, 9, 11, 12, 13, 15, 16], "1174648148": 4, "81506882": 4, "14it": 4, "scb_1m_th": 4, "en_spm": 4, "703780432": 4, "08": [4, 7, 10, 11, 13, 14], "78234386": 4, "81it": 4, "enthtransl": 4, "thentransl": 4, "en": [4, 13, 14], "have": [4, 12, 15, 16], "bpe": 4, "want": [4, 10, 12, 15], "fri": 4, "chicken": 4, "\u0e44\u0e01": [4, 7, 9], "\u0e17\u0e2d\u0e14\u0e04": 4, "\u0e30": [4, 9, 11, 16], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e01": 4, "\u0e19\u0e44\u0e01": 4, "\u0e17\u0e2d\u0e14": [4, 9], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e02": 4, "\u0e22\u0e19\u0e42\u0e1b\u0e23\u0e41\u0e01\u0e23\u0e21\u0e04\u0e2d\u0e21\u0e1e": 4, "\u0e27\u0e40\u0e15\u0e2d\u0e23": 4, "write": [4, 11, 12], "comput": [4, 13], "program": 4, "rust": 5, "node": 5, "bind": 5, "similarli": 5, "newmm": [5, 7, 10, 15], "maxim": 5, "match": [5, 7], "base": [5, 6, 7, 8, 9, 10, 11, 15, 16], "honor": [5, 12], "charact": [5, 12], "cluster": [5, 15], "boundari": 5, "howev": [5, 12], "compar": 5, "pure": 5, "implement": [5, 13], "much": [5, 12], "faster": 5, "For": [5, 7, 9, 10, 14, 15, 16], "comparison": 5, "refer": 5, "benchmark": [5, 11], "segment": [5, 6, 10], "lern": 5, "more": [5, 6, 7, 9, 10, 12, 15, 16], "about": [5, 7, 9, 12], "here": [5, 7, 12, 15], "In": [5, 11, 15], "learn": [5, 9, 11, 12], "serv": 5, "first": [5, 6, 11, 13, 15], "without": [5, 7, 12], "specifi": [5, 7, 15], "paramet": [5, 7, 14], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e15": [5, 8], "\u0e14\u0e04\u0e33\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": 5, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a": [5, 8], "\u0e14": [5, 7, 8, 9, 11, 13, 15, 16], "\u0e04\u0e33": 5, "now": [5, 12], "enhanc": 5, "countri": [5, 12, 15], "wget": [5, 9, 11, 13, 14, 16], "command": 5, "It": [5, 7, 8, 9, 10, 11, 15, 16], "plain": 5, "contain": [5, 9, 11], "one": [5, 7, 12, 16], "per": [5, 8, 13], "line": [5, 9, 12], "raw": [5, 9, 11, 14, 15, 16], "countries_th": 5, "txt": [5, 6, 9, 11, 13], "2021": [5, 6, 8, 14, 15], "06": [5, 7, 11], "05": [5, 9, 14], "resolv": [5, 13, 16], "140": [5, 9, 16], "82": [5, 11, 16], "112": [5, 7], "connect": [5, 13, 16], "443": [5, 16], "sent": [5, 7, 10, 13, 16], "await": [5, 13, 16], "respons": [5, 13, 16], "302": [5, 16], "locat": [5, 7, 10, 16], "githubusercont": [5, 16], "follow": [5, 6, 7, 9, 16], "185": [5, 16], "199": [5, 13, 16], "108": [5, 16], "133": [5, 16], "109": 5, "200": [5, 9, 12, 13, 16], "ok": [5, 7, 13, 16], "length": [5, 8, 13, 14, 16], "7622": 5, "4k": 5, "save": [5, 9, 11, 12, 13, 14, 16], "44k": 5, "70": [5, 7, 9, 11, 14], "load_dict": 5, "function": [5, 6, 7, 9, 12, 15], "content": [5, 11], "success": [5, 12], "name": [5, 9, 10, 11, 12, 14, 15], "ha": [5, 8, 12, 15, 16], "been": [5, 12, 15], "final": [5, 6], "method": [5, 15], "\u0e2a\u0e27": [5, 9, 11], "\u0e2a\u0e14": [5, 9, 11], "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 5, "\u0e40\u0e01\u0e32\u0e2b\u0e25": 5, "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 5, "\u0e44\u0e17\u0e22": [5, 11], "demonstr": [6, 13], "chunk_pars": 6, "phrase": 6, "orchid": 6, "read": [6, 7, 10, 11, 12, 14], "pull": [6, 16], "524": 6, "need": [6, 15], "nltk": [6, 10], "preprocess": [6, 9, 11, 15], "data": [6, 10, 11, 12, 14, 15], "svgling": 6, "svgwrite": 6, "66": [6, 13], "modul": [6, 7, 9, 10, 12], "word_token": [6, 7, 9, 11, 16], "take": [6, 9, 12, 16], "pos_tag": [6, 7, 8], "mark": [6, 7], "them": [6, 7, 11, 12], "part": [6, 10, 15], "speech": [6, 10, 14], "po": [6, 9, 10], "insid": [6, 7, 14], "outsid": [6, 7, 12], "begin": [6, 7, 12], "iob": 6, "conlltags2tre": 6, "convert": [6, 7, 14], "format": [6, 16], "svg": 6, "defin": 6, "new": [6, 7, 9, 12, 16], "test": [6, 7, 9, 11, 12], "input": [6, 11, 14, 15], "perform": [6, 7, 9, 11, 12, 15], "combin": 6, "tripl": 6, "p": [6, 7, 9, 14, 16], "m": [6, 7, 9, 13], "w": [6, 7, 11, 16], "t": [6, 7, 8, 9, 10, 12, 13, 15], "engin": [6, 7, 8, 10], "perceptron": [6, 10], "sever": [6, 12], "draw_tre": 6, "syntact": [6, 13], "were": [6, 10, 12, 15], "\u0e41\u0e21\u0e27\u0e01": 6, "\u0e19\u0e1b\u0e25\u0e32": 6, "\u0e04\u0e19\u0e2b\u0e19\u0e2d\u0e07\u0e04\u0e32\u0e22\u0e40\u0e1b": 6, "\u0e19\u0e04\u0e19\u0e19": 6, "\u0e32\u0e23": [6, 7], "\u0e1b\u0e25\u0e32\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e43\u0e19\u0e19": 6, "\u0e33": [6, 9, 16], "\u0e33\u0e21": 6, "\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e17\u0e33\u0e44\u0e21\u0e40\u0e02\u0e32\u0e23": 6, "\u0e01\u0e04": 6, "\u0e13": [6, 7, 9, 11], "\u0e04\u0e19\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e07\u0e15": [6, 7, 12], "\u0e19\u0e44\u0e21": [6, 7, 16], "basic": 7, "uncom": [7, 9, 11, 12, 16], "run": [7, 9, 10, 11, 12, 13, 14, 16], "colab": [7, 9, 10, 11, 12, 14, 15, 16], "extra": 7, "blob": [7, 14], "epitran": 7, "__version__": 7, "provid": [7, 8, 11, 12, 15], "some": [7, 10, 12, 15], "readi": 7, "set": [7, 9, 10, 11, 12, 14, 15, 16], "g": [7, 10, 15], "conson": 7, "vowel": 7, "tonemark": 7, "symbol": 7, "conveni": 7, "There": 7, "ar": [7, 9, 10, 11, 12, 13, 14, 15, 16], "also": [7, 9, 12, 16], "few": [7, 12], "util": [7, 14], "thai_charact": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e24\u0e26\u0e30": 7, "\u0e32\u0e33": [7, 16], "\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45": 7, "\u0e2f": 7, "\u0e46": [7, 9, 11], "\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59": 7, "88": [7, 10], "thai_conson": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e": 7, "44": 7, "\u0e54": 7, "thai_digit": 7, "isthai": 7, "fals": [7, 8, 9, 10, 11, 12, 14], "ignore_char": 7, "counthai": 7, "proport": 7, "ignor": [7, 11], "non": [7, 16], "alphabet": 7, "default": [7, 8, 9, 10, 11, 15], "countthai": 7, "\u0e19\u0e2d\u0e32\u0e17": [7, 11, 15], "\u0e15\u0e22": [7, 11, 15], "\u0e19\u0e32\u0e04\u0e21": 7, "2562": [7, 15], "67": 7, "85714285714286": 7, "sort": [7, 12], "accord": 7, "\u0e2d\u0e19": [7, 9, 11, 15], "\u0e01\u0e23\u0e30\u0e14\u0e32\u0e29": 7, "\u0e01\u0e23\u0e23\u0e44\u0e01\u0e23": 7, "\u0e44\u0e02": [7, 11], "\u0e1c": [7, 9, 11, 12, 16], "\u0e32\u0e44\u0e2b\u0e21": 7, "revers": 7, "dai": [7, 11, 12, 16], "month": 7, "buddhist": 7, "era": 7, "b": [7, 8, 9, 11, 12], "direct": [7, 12], "similar": [7, 12, 15], "datetim": 7, "strftime": 7, "thai_strftim": 7, "fmt": [7, 9, 15], "a\u0e17": 7, "d": [7, 9, 14], "\u0e28": [7, 9, 11, 12, 15], "y": [7, 9, 11, 12, 16], "\u0e40\u0e27\u0e25\u0e32": 7, "h": 7, "1976": 7, "40": [7, 9, 10, 11, 14], "\u0e19\u0e1e": [7, 12, 15], "\u0e18\u0e17": 7, "\u0e25\u0e32\u0e04\u0e21": 7, "2519": 7, "01": [7, 9, 11, 12, 14], "modifi": 7, "appli": [7, 9, 11], "right": [7, 11, 15, 16], "befor": [7, 9, 11, 12, 14], "minu": 7, "pad": [7, 14, 15], "numer": [7, 11], "result": [7, 10, 12, 13, 14, 15], "avail": [7, 15], "underscor": 7, "space": 7, "zero": [7, 14], "upper": 7, "case": [7, 10, 11, 12, 15], "swap": 7, "o": [7, 8, 14, 15], "letter": [7, 16], "altern": 7, "note": [7, 10, 13, 15, 16], "thai_tim": 7, "renam": 7, "time_to_thaiword": 7, "\u0e19\u0e22": 7, "\u0e19\u0e32\u0e2c": 7, "\u0e01\u0e32\u0e2a": 7, "\u0e1a\u0e2a": 7, "\u0e19\u0e32\u0e17": [7, 9], "\u0e1a\u0e40\u0e01": 7, "\u0e32\u0e27": [7, 9], "wai": [7, 15], "chosen": 7, "24h": 7, "6h": 7, "m6h": 7, "yourself": [7, 12], "\u0e40\u0e17": 7, "\u0e22\u0e07\u0e04": 7, "\u0e19\u0e2a": [7, 11], "precis": 7, "well": [7, 15], "minut": [7, 9, 12], "second": [7, 16], "onli": [7, 10, 12], "valu": [7, 12], "30": [7, 9, 15, 16], "\u0e2a\u0e2d\u0e07\u0e42\u0e21\u0e07\u0e40\u0e0a": 7, "\u0e32\u0e2a": 7, "\u0e1a\u0e40\u0e08": 7, "\u0e14\u0e19\u0e32\u0e17": 7, "\u0e41\u0e1b\u0e14\u0e42\u0e21\u0e07\u0e2a": 7, "\u0e2b\u0e01\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "object": [7, 13, 14], "\u0e1a\u0e2a\u0e32\u0e21\u0e19\u0e32\u0e2c": 7, "\u0e1a\u0e2b": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e2a": 7, "At": 7, "sub": 7, "crfcut": [7, 10], "uss": 7, "sent_token": 7, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e1a": 7, "\u0e0d\u0e0d": 7, "\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e01\u0e32\u0e23\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07\u0e41\u0e1c": 7, "\u0e19\u0e2a\u0e22\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e04\u0e23\u0e32\u0e27": 7, "\u0e17\u0e18\u0e28": 7, "\u0e01\u0e23\u0e32\u0e0a": 7, "\u0e52\u0e54\u0e57\u0e55": 7, "\u0e19\u0e23": [7, 11], "\u0e10\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e09\u0e1a": 7, "\u0e1a\u0e0a": 7, "\u0e0b": [7, 9, 11, 12, 13, 15], "\u0e07\u0e16": 7, "\u0e2d\u0e27": [7, 8, 9], "\u0e32\u0e40\u0e1b": [7, 11], "\u0e1a\u0e41\u0e23\u0e01\u0e41\u0e2b": 7, "\u0e07\u0e23\u0e32\u0e0a\u0e2d\u0e32\u0e13\u0e32\u0e08": 7, "\u0e01\u0e23\u0e2a\u0e22\u0e32\u0e21": 7, "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e43\u0e0a": 7, "\u0e40\u0e21": [7, 9, 11], "\u0e19\u0e17": [7, 9, 10, 11, 12, 15], "27": [7, 8, 10, 11, 14, 15, 16], "\u0e19\u0e32\u0e22\u0e19": 7, "2475": 7, "\u0e42\u0e14\u0e22\u0e40\u0e1b": 7, "\u0e19\u0e1c\u0e25\u0e1e\u0e27\u0e07\u0e2b\u0e25": 7, "\u0e07\u0e01\u0e32\u0e23\u0e1b\u0e0f": 7, "\u0e42\u0e14\u0e22\u0e04\u0e13\u0e30\u0e23\u0e32\u0e29\u0e0e\u0e23": 7, "nwhitespac": 7, "newlin": 7, "whitespac": 7, "maximum": [7, 8], "algorithm": 7, "\u0e08\u0e30\u0e23": 7, "\u0e04\u0e27\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e23": 7, "\u0e32\u0e22\u0e17": 7, "\u0e17\u0e33\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30\u0e04\u0e07\u0e08\u0e30\u0e44\u0e21": 7, "\u0e22\u0e2d\u0e21\u0e43\u0e2b": 7, "\u0e17\u0e33\u0e19\u0e32\u0e1a\u0e19\u0e2b\u0e25": 7, "\u0e07\u0e04\u0e19": 7, "nnewmm": 7, "keep_whitespac": 7, "\u0e08\u0e30": [7, 9, 11, 13], "\u0e04\u0e27\u0e32\u0e21": [7, 9], "\u0e17\u0e33": [7, 9], "\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30": [7, 9, 11, 12, 16], "\u0e04\u0e07\u0e08\u0e30": 7, "other": [7, 12, 15, 16], "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e09\u0e1a": 7, "\u0e1a\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2b\u0e21": 7, "\u0e41\u0e25": [7, 9, 11, 15], "longest": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "custom_token": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "\u0e09\u0e1a": 7, "\u0e43\u0e2b\u0e21": [7, 9, 11], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22": 7, "common": [7, 16], "add": [7, 8, 15], "remov": [7, 14, 15], "\u0e22\u0e32\u0e22\u0e27": 7, "\u0e17\u0e22\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23": [7, 12], "\u0e02\u0e2d\u0e07\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "\u0e2d\u0e2a": 7, "\u0e21\u0e2d\u0e1f": 7, "frozenset": 7, "\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "isaac": 7, "asimov": 7, "\u0e22\u0e32\u0e22": 7, "\u0e02\u0e2d\u0e07": [7, 9, 11, 16], "\u0e21\u0e2d": 7, "trie": 7, "ilo87": 7, "\u0e32\u0e14": 7, "\u0e27\u0e22\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e04": 7, "\u0e21\u0e04\u0e23\u0e2d\u0e07\u0e2a": 7, "\u0e17\u0e18": [7, 11, 15], "\u0e43\u0e19\u0e01\u0e32\u0e23\u0e23\u0e27\u0e21\u0e15": 7, "ilo98": 7, "\u0e27\u0e22\u0e2a": 7, "\u0e27\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e23": 7, "\u0e27\u0e21\u0e40\u0e08\u0e23\u0e08\u0e32\u0e15": 7, "\u0e2d\u0e23\u0e2d\u0e07": 7, "new_word": 7, "\u0e01\u0e32\u0e23\u0e23": 7, "\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e2a": 7, "\u0e21\u0e1e": 7, "\u0e19\u0e18": [7, 16], "union": 7, "custom_dictionary_tri": 7, "custom_dict": 7, "ilo": 7, "87": 7, "\u0e27\u0e22": [7, 9, 11], "\u0e20\u0e32\u0e1e": 7, "\u0e43\u0e19": [7, 9, 11], "\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e01\u0e32\u0e23": [7, 9, 11, 16], "\u0e21\u0e04\u0e23\u0e2d\u0e07": 7, "\u0e23\u0e27\u0e21\u0e15": 7, "98": [7, 11], "\u0e27\u0e21": [7, 9], "\u0e40\u0e08\u0e23\u0e08\u0e32": 7, "differ": [7, 16], "speedtest_text": 7, "\u0e04\u0e23\u0e1a\u0e23\u0e2d\u0e1a": 7, "\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e40\u0e0a": [7, 11, 15], "\u0e19\u0e19": [7, 9, 11], "2547": 7, "\u0e21\u0e19": [7, 16], "\u0e21\u0e0a\u0e32\u0e22\u0e01\u0e27": 7, "\u0e32": [7, 8, 9, 11, 12, 15, 16], "370": 7, "\u0e04\u0e19": [7, 9, 11, 13, 15], "\u0e01\u0e42\u0e22\u0e19\u0e02": 7, "\u0e19\u0e23\u0e16\u0e22": 7, "\u0e40\u0e2d": [7, 9], "\u0e21\u0e0b": 7, "\u0e2b\u0e23": [7, 9, 11, 15], "\u0e19\u0e2d\u0e19\u0e0b": 7, "\u0e2d\u0e19\u0e01": [7, 9], "\u0e19\u0e04": 7, "\u0e19\u0e25\u0e30": 7, "\u0e40\u0e14": [7, 11, 12], "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e16\u0e32\u0e19": 7, "\u0e15\u0e33\u0e23\u0e27\u0e08\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e44\u0e1b\u0e44\u0e01\u0e25": 7, "150": [7, 9], "\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23": [7, 15], "\u0e44\u0e1b\u0e16": 7, "\u0e07\u0e04": 7, "\u0e32\u0e22\u0e2d": 7, "\u0e07\u0e04\u0e22": 7, "\u0e17\u0e18\u0e1a\u0e23": 7, "\u0e2b\u0e32\u0e23": 7, "\u0e40\u0e27\u0e25\u0e32\u0e01\u0e27": 7, "\u0e27\u0e42\u0e21\u0e07": 7, "\u0e43\u0e19\u0e2d": [7, 15], "\u0e01\u0e04\u0e14": 7, "\u0e0d\u0e32\u0e15": 7, "\u0e2d\u0e07\u0e23": [7, 9], "\u0e10": 7, "\u0e04\u0e14": 7, "\u0e08\u0e1a\u0e25\u0e07\u0e17": 7, "\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e19": 7, "\u0e1b\u0e23\u0e30\u0e19\u0e2d\u0e21\u0e22\u0e2d\u0e21\u0e04\u0e27\u0e32\u0e21": 7, "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21\u0e08": 7, "\u0e32\u0e22\u0e04": 7, "\u0e19\u0e44\u0e2b\u0e21\u0e17\u0e14\u0e41\u0e17\u0e19\u0e23\u0e27\u0e21": 7, "42": [7, 8, 9, 10, 14], "\u0e32\u0e19\u0e1a\u0e32\u0e17\u0e43\u0e2b": 7, "\u0e1a\u0e0d\u0e32\u0e15": 7, "\u0e22\u0e2b\u0e32\u0e22": 7, "\u0e23\u0e32\u0e22": 7, "\u0e14\u0e2b": 7, "\u0e1a\u0e41\u0e25\u0e30\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e2a\u0e23": 7, "\u0e08\u0e41\u0e25": 7, "\u0e27\u0e22\u0e40\u0e25": 7, "\u0e2d\u0e01\u0e15": 7, "\u0e07\u0e17": [7, 9], "\u0e40\u0e02\u0e15": 7, "\u0e41\u0e02\u0e27\u0e07\u0e2b": 7, "\u0e27\u0e2b\u0e21\u0e32\u0e01": 7, "\u0e40\u0e02\u0e15\u0e1a\u0e32\u0e07\u0e01\u0e30\u0e1b": 7, "\u0e01\u0e23": [7, 11], "\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23": [7, 11], "\u0e2a\u0e21": [7, 12], "\u0e41\u0e25\u0e30\u0e15": 7, "\u0e27\u0e41\u0e17\u0e19\u0e1e\u0e23\u0e23\u0e04\u0e01\u0e32\u0e23\u0e40\u0e21": 7, "\u0e2d\u0e07\u0e08\u0e32\u0e01\u0e2b\u0e25\u0e32\u0e22\u0e1e\u0e23\u0e23\u0e04\u0e15": 7, "\u0e32\u0e07\u0e21\u0e32\u0e40\u0e1d": 7, "\u0e07\u0e40\u0e01\u0e15\u0e01\u0e32\u0e23\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e43\u0e01\u0e25": 7, "\u0e42\u0e14\u0e22": [7, 9, 11], "\u0e20": [7, 8], "\u0e2a\u0e23": [7, 9, 15], "\u0e42\u0e0a\u0e15": [7, 9], "\u0e40\u0e14\u0e0a\u0e32\u0e0a": 7, "\u0e22\u0e19": [7, 8, 9, 10, 11, 15], "\u0e19\u0e15": [7, 9, 15, 16], "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1e\u0e25": 7, "\u0e07\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e23": 7, "\u0e41\u0e25\u0e30\u0e1e\u0e23": 7, "\u0e29\u0e10": 7, "\u0e0a\u0e23\u0e2a": 7, "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e18": 7, "\u0e44\u0e14": [7, 9, 11, 12, 15, 16], "\u0e04\u0e30\u0e41\u0e19\u0e19": 7, "96": 7, "\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e17": 7, "\u0e32\u0e01": [7, 15], "\u0e40\u0e21\u0e29\u0e32\u0e22\u0e19": [7, 11], "\u0e07\u0e40\u0e1b": 7, "\u0e19\u0e27": 7, "\u0e19\u0e2d": [7, 11], "\u0e2a\u0e40\u0e15\u0e2d\u0e23": 7, "\u0e19\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e02\u0e2d\u0e07\u0e0a\u0e32\u0e27\u0e04\u0e23": 7, "\u0e2a\u0e15": 7, "\u0e40\u0e01": [7, 9, 11, 15], "\u0e14\u0e40\u0e2b\u0e15": 7, "\u0e23\u0e30\u0e40\u0e1a": 7, "\u0e14\u0e15": 7, "\u0e2d\u0e40\u0e19": 7, "\u0e2d\u0e07\u0e43\u0e19\u0e42\u0e1a\u0e2a\u0e16": 7, "\u0e41\u0e25\u0e30\u0e42\u0e23\u0e07\u0e41\u0e23\u0e21\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e19": 7, "\u0e2d\u0e22": [7, 8, 9, 11, 15], "\u0e41\u0e2b": [7, 16], "\u0e07\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e28\u0e23": 7, "\u0e07\u0e01\u0e32": 7, "\u0e15\u0e41\u0e25": 7, "\u0e27\u0e2d\u0e22": 7, "156": 7, "\u0e41\u0e25\u0e30\u0e1a\u0e32\u0e14\u0e40\u0e08": 7, "\u0e1a\u0e2b\u0e25\u0e32\u0e22\u0e23": 7, "\u0e2d\u0e22\u0e04\u0e19": 7, "\u0e07\u0e44\u0e21": 7, "\u0e02": [7, 9, 11, 15], "\u0e2d\u0e21": [7, 11, 15], "\u0e25\u0e27": 7, "\u0e32\u0e1c": 7, "\u0e2d\u0e40\u0e2b\u0e15": 7, "\u0e21\u0e32\u0e08\u0e32\u0e01\u0e1d": 7, "\u0e32\u0e22\u0e43\u0e14": 7, "\u0e19\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e08": 7, "\u0e14\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e0a": 7, "\u0e21\u0e02": [7, 11, 15], "\u0e2d\u0e23": [7, 9], "\u0e40\u0e23": [7, 10, 11], "\u0e21\u0e2a\u0e32\u0e22\u0e41\u0e16\u0e1a\u0e41\u0e25\u0e30\u0e40\u0e2a": 7, "\u0e19\u0e17\u0e32\u0e07\u0e43\u0e19\u0e0a": 7, "\u0e27\u0e07\u0e1b\u0e25\u0e32\u0e22\u0e2a": 7, "\u0e1b\u0e14\u0e32\u0e2b": [7, 11], "\u0e01\u0e01": [7, 9, 16], "\u0e07\u0e22": 7, "\u0e2d\u0e20": 7, "\u0e21\u0e2b\u0e32\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e40\u0e0a": 7, "\u0e2d\u0e21\u0e42\u0e25\u0e01\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e40\u0e04\u0e23": [7, 9, 11], "\u0e2d\u0e07\u0e21": 7, "\u0e2d\u0e41\u0e1c": 7, "\u0e1e\u0e25": 7, "\u0e1a\u0e1f": 7, "\u0e07\u0e02": [7, 9], "\u0e08\u0e32\u0e23\u0e13": 7, "\u0e1b\u0e23\u0e30\u0e40\u0e14": [7, 11], "\u0e19\u0e01": [7, 15], "\u0e1a\u0e14": [7, 16], "\u0e01\u0e2b\u0e19": 7, "\u0e41\u0e25\u0e30\u0e04\u0e27\u0e32\u0e21\u0e44\u0e21": 7, "\u0e42\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2a": 7, "\u0e10\u0e1a\u0e32\u0e25\u0e1b": 7, "\u0e07\u0e1a\u0e2d\u0e01\u0e27": 7, "\u0e40\u0e27\u0e17": 7, "\u0e1b\u0e23\u0e30\u0e0a": 7, "belt": 7, "road": 7, "forum": 7, "\u0e43\u0e19\u0e0a": [7, 12], "\u0e27\u0e07\u0e27": 7, "\u0e2d\u0e40\u0e1b": [7, 12], "\u0e19\u0e07\u0e32\u0e19\u0e01\u0e32\u0e23\u0e17": 7, "\u0e15\u0e17": 7, "\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e17": 7, "\u0e14\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e19\u0e43\u0e19\u0e1b": 7, "speed": 7, "through": [7, 12], "wrapper": 7, "cpu": [7, 9], "user": [7, 9, 10, 11], "253": 7, "sy": [7, 9], "total": [7, 9, 11, 12, 13], "256": 7, "wall": [7, 9], "255": 7, "60": [7, 9], "\u00b5": 7, "46": [7, 10, 13, 14, 16], "safe": 7, "33": [7, 9, 15], "attacut": [7, 10], "833": 7, "174": [7, 11], "576": 7, "possibl": [7, 16], "multi_cut": 7, "find_all_seg": 7, "mmcut": 7, "\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": [7, 9], "\u0e19\u0e44\u0e1b\u0e44\u0e14": 7, "\u0e32\u0e07\u0e44\u0e23\u0e1a": 7, "\u0e32\u0e07": [7, 9, 11], "\u0e44\u0e1b": [7, 9, 10, 11, 13], "\u0e44\u0e23": [7, 15], "\u0e19\u0e44\u0e1b": [7, 9], "\u0e32\u0e07\u0e44\u0e23": 7, "either": 7, "ssg": [7, 10, 15], "ponrawe": 7, "__": [7, 11], "crf": 7, "prasertsom": 7, "smaller": [7, 15], "than": [7, 12, 16], "inform": [7, 9], "retriev": 7, "theeramunkong": 7, "et": [7, 13], "al": [7, 13], "2004": 7, "unit": 7, "35": [7, 9, 10, 13, 15, 16], "subword_token": [7, 8], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": [7, 9], "\u0e23\u0e30": [7, 9], "\u0e44\u0e17": 7, "dict": [7, 9, 11, 12, 14], "known": [7, 16], "36": [7, 9, 10, 11, 13, 14, 15, 16], "\u0e25\u0e40\u0e25\u0e32\u0e30": 7, "\u0e0b\u0e2d\u0e21": 7, "\u0e0b\u0e2d": 7, "\u0e2a\u0e21\u0e2d\u0e07\u0e1a\u0e27\u0e21\u0e23": 7, "\u0e19\u0e41\u0e23\u0e07": 7, "\u0e40\u0e25\u0e32\u0e30": 7, "\u0e2a\u0e21\u0e2d\u0e07": 7, "\u0e1a\u0e27\u0e21": 7, "\u0e41\u0e23\u0e07": 7, "extern": 7, "ommit": 7, "output": [7, 11, 14, 15], "37": [7, 11, 13, 14], "These": 7, "task": [7, 10, 13, 15], "like": [7, 9, 11, 12, 15], "cut": 7, "certain": [7, 12], "point": [7, 12, 16], "typo": 7, "tcc_po": 7, "posit": [7, 16], "ch": 7, "two": [7, 11, 12, 16], "roman": 7, "latin": 7, "royal": 7, "system": [7, 10, 12], "transcript": 7, "rtg": 7, "support": [7, 8, 16], "simpl": [7, 10, 15, 16], "royin": 7, "accur": 7, "thai2rom": 7, "context": 7, "mean": [7, 9, 12, 14], "sound": [7, 14], "ipa": 7, "intern": 7, "phonet": 7, "icu": 7, "compon": [7, 13], "unicod": 7, "pyicu": 7, "\u0e41\u0e21\u0e27": [7, 10, 16], "maeo": 7, "\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": [7, 11], "phapn": 7, "obvious": 7, "wrong": [7, 12], "m\u025b\u02d0w": 7, "updat": [7, 9, 15], "g2p": 7, "up": [7, 12], "\u025b\u02d0": 7, "p\u02b0a\u02d0pjanot": 7, "p\u02b0": 7, "a\u02d0": 7, "n": [7, 8, 11, 13], "width": 7, "zwsp": 7, "zwnj": 7, "duplic": 7, "repeat": [7, 9], "dangl": 7, "reorder": 7, "tone": 7, "dure": 7, "\u0e40\u0e40\u0e1b\u0e25\u0e01": 7, "\u0e41\u0e1b\u0e25\u0e01": 7, "\u0e40": 7, "v": [7, 13, 14], "\u0e41": 7, "below": 7, "standard": 7, "order": [7, 9, 11, 16], "sara": 7, "aa": 7, "mai": [7, 12, 15], "ek": 7, "\u0e40\u0e01\u0e32": 7, "includ": [7, 9, 16], "\u0e1a\u0e27": 7, "\u0e1e\u0e23": 7, "immedi": [7, 13], "nnormal": 7, "multipl": [7, 14], "A": 7, "row": [7, 11, 16], "keep": 7, "reduc": 7, "variat": 7, "48": [7, 9, 10, 14], "\u0e40\u0e01\u0e30\u0e30\u0e30": 7, "\u0e40\u0e01\u0e30": 7, "just": [7, 12], "seri": [7, 12], "remove_zw": 7, "remove_dup_spac": 7, "remove_repeat_vowel": 7, "remove_dangl": 7, "If": [7, 10, 15], "don": [7, 12], "behavior": 7, "those": [7, 12], "shown": 7, "abov": 7, "remove_tonemark": 7, "reorder_vowel": 7, "individu": 7, "your": [7, 12, 14], "own": [7, 12], "sometim": 7, "search": [7, 15], "pythainp": 7, "deal": [7, 12], "49": 7, "arabic_digit_to_thai_digit": 7, "thai_digit_to_arabic_digit": 7, "digit_to_text": 7, "\u0e09": [7, 15], "\u0e01\u0e40\u0e09": 7, "\u0e42\u0e23\u0e1b\u0e40\u0e23": 7, "\u0e22\u0e01": 7, "\u0e51\u0e51\u0e52": 7, "50": [7, 9, 11, 13], "51": [7, 11, 16], "\u0e07\u0e2b\u0e19": [7, 9, 12], "\u0e07\u0e2a\u0e2d\u0e07": 7, "index": [7, 9, 10, 11, 13, 15, 16], "wikipedia": [7, 11, 12, 15], "three": 7, "kind": [7, 12], "lk82": 7, "metasound": 7, "udom83": 7, "equival": 7, "\u0e23\u0e16": [7, 9, 11], "\u0e23\u0e14": 7, "\u0e27\u0e23\u0e23": 7, "\u0e19\u0e20": 7, "\u0e23\u0e13\u0e30": 7, "\u0e23\u0e13\u0e01\u0e32\u0e23": 7, "\u0e21\u0e23\u0e23\u0e04": 7, "\u0e01\u0e29": [7, 16], "\u0e1ae400": 7, "\u0e1a930000": 7, "\u0e1a550": 7, "\u0e1ae419": 7, "\u0e1a931900": 7, "\u0e1a551": 7, "\u0e211000": 7, "\u0e21100000": 7, "\u0e21100": 7, "\u0e21310000": 7, "\u0e21551": 7, "\u0e231000": 7, "\u0e23100000": 7, "\u0e25100": 7, "\u0e23100": 7, "peter": 7, "norvig": 7, "togeth": 7, "nation": 7, "tnc": 7, "\u0e40\u0e2b\u0e25": [7, 9], "\u0e22\u0e21": 7, "correct": [7, 16], "most": [7, 12, 16], "55": [7, 11], "when": [7, 9, 10, 12, 15], "norvigspellcheck": 7, "kei": [7, 16], "int": [7, 11, 13], "tupl": [7, 14, 16], "assign": 7, "everi": [7, 9, 12], "user_dict": 7, "1000": [7, 9, 11, 16], "\u0e22\u0e27": [7, 9, 11, 15, 16], "1000000": 7, "checker": [7, 16], "As": 7, "our": [7, 15], "give": [7, 9, 12], "edit": [7, 12, 16], "distanc": 7, "prioriti": 7, "over": 7, "textbook": 7, "By": 7, "ttc": 7, "word_freq": 7, "To": [7, 9], "current": [7, 15], "59": [7, 9, 14], "\u0e18": [7, 9, 15], "\u0e44\u0e2a": 7, "\u0e01\u0e23\u0e2d\u0e01": 7, "\u0e1b\u0e25": [7, 11], "\u0e40\u0e15": [7, 9, 11], "\u0e02\u0e2d\u0e1a\u0e04": [7, 15], "356": 7, "\u0e1b\u0e23\u0e30\u0e2a\u0e32\u0e19": 7, "84": [7, 15], "\u0e23\u0e33\u0e44\u0e23": 7, "\u0e27\u0e21\u0e17": 7, "\u0e2d\u0e07": [7, 9, 11, 15], "\u0e1d": 7, "\u0e01\u0e21\u0e30\u0e02\u0e32\u0e21": 7, "condit": 7, "filter": 7, "39963": 7, "61": [7, 11], "min_freq": [7, 9, 11, 12], "min_len": 7, "max_len": [7, 9], "30376": 7, "62": [7, 14], "checker_no_filt": 7, "dict_filt": 7, "66209": 7, "63": [7, 10], "remove_yamok": 7, "els": [7, 12, 15], "checker_custom_filt": 7, "66204": 7, "64": [7, 10, 11, 12, 13, 14], "pos_tag_s": 7, "\u0e19\u0e17\u0e32\u0e07": 7, "fixn": 7, "vact": 7, "65": [7, 15], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e2a\u0e33\u0e19": 7, "\u0e01\u0e19\u0e32\u0e22\u0e01\u0e2f": 7, "\u0e2a\u0e23\u0e23\u0e40\u0e2a\u0e23": 7, "\u0e0d": [7, 11, 16], "\u0e41\u0e01": 7, "\u0e27\u0e01\u0e33\u0e40\u0e19": 7, "\u0e19\u0e08\u0e32\u0e01\u0e15\u0e33\u0e41\u0e2b\u0e19": 7, "\u0e17\u0e23\u0e07\u0e04": 7, "\u0e13\u0e27": 7, "\u0e12": 7, "\u0e40\u0e28\u0e29": [7, 9], "\u0e01\u0e2d\u0e07\u0e17": 7, "\u0e1e\u0e1a\u0e01": [7, 12], "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21": 7, "\u0e2d\u0e18": 7, "\u0e01\u0e23\u0e21\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e2a": 7, "ncmn": 7, "punc": 7, "jsbr": 7, "jcrg": 7, "vsta": 7, "tagger": [7, 15], "bio": 7, "scheme": 7, "pip3": 7, "ner": [7, 8, 10, 15], "thainer": [7, 8, 10], "thainametagg": [7, 8], "get_ner": [7, 8], "2563": 7, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e23\u0e30\u0e1a\u0e1a\u0e40\u0e27\u0e25\u0e32": 7, "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e02\u0e19\u0e2a": 7, "\u0e07\u0e01\u0e23": 7, "\u0e07\u0e40\u0e17\u0e1e\u0e43\u0e01\u0e25": 7, "\u0e16\u0e19\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e44\u0e1b\u0e08": 7, "\u0e07\u0e2b\u0e27": [7, 15], "\u0e14\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e27\u0e23\u0e32\u0e04\u0e32": 7, "297": [7, 16], "\u0e1a\u0e32\u0e17": [7, 9], "num": [7, 13, 14], "punct": 7, "noun": [7, 10], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e23\u0e30\u0e1a\u0e1a": 7, "verb": [7, 10, 16], "\u0e08\u0e32\u0e01": [7, 9, 11, 12], "adp": 7, "\u0e02\u0e19\u0e2a": 7, "organ": [7, 8, 15], "\u0e07\u0e40\u0e17\u0e1e": 7, "\u0e43\u0e01\u0e25": 7, "adj": [7, 13], "\u0e16\u0e19\u0e19": 7, "\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "aux": [7, 10, 14], "\u0e23\u0e32\u0e04\u0e32": 7, "monei": [7, 12], "word_vector": [7, 16], "\u0e29\u0e22": [7, 16], "2504981": 7, "doesnt_match": [7, 16], "\u0e04\u0e04\u0e25": 7, "\u0e40\u0e08": [7, 9, 11, 12, 16], "\u0e32\u0e2b\u0e19": 7, "\u0e32\u0e17": 7, "site": 7, "gensim": [7, 16], "keyedvector": [7, 16], "877": 7, "futurewarn": [7, 16], "arrai": [7, 9, 11, 14, 15, 16], "stack": [7, 16], "must": [7, 12, 14, 16], "sequenc": [7, 16], "iter": [7, 11, 16], "deprec": [7, 10, 14, 15, 16], "rais": [7, 16], "an": [7, 12, 15, 16], "error": [7, 16], "futur": [7, 16], "vstack": [7, 16], "self": [7, 16], "word_vec": [7, 16], "use_norm": [7, 16], "used_word": [7, 16], "astyp": [7, 9, 14, 16], "real": [7, 12, 16], "69": [7, 14], "bahttext": 7, "1234567890123": 7, "\u0e07\u0e25": 7, "\u0e32\u0e19\u0e2a\u0e2d\u0e07\u0e41\u0e2a\u0e19\u0e2a\u0e32\u0e21\u0e2b\u0e21": 7, "\u0e19\u0e2b": 7, "\u0e2d\u0e22\u0e2b\u0e01\u0e2a": 7, "\u0e14\u0e25": 7, "\u0e32\u0e19\u0e41\u0e1b\u0e14\u0e41\u0e2a\u0e19\u0e40\u0e01": 7, "\u0e32\u0e2b\u0e21": 7, "\u0e19\u0e2b\u0e19": 7, "\u0e07\u0e23": [7, 11], "\u0e2d\u0e22\u0e22": 7, "\u0e1a\u0e2a\u0e32\u0e21\u0e1a\u0e32\u0e17\u0e2a": 7, "\u0e32\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "round": [7, 14], "satang": 7, "909": 7, "\u0e07\u0e1a\u0e32\u0e17\u0e40\u0e01": 7, "\u0e1a\u0e40\u0e2d": 7, "\u0e14\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "lowphansirikul": 8, "l": [8, 9, 11, 16], "polpanuma": 8, "c": [8, 9, 11, 13, 16], "jantrakulchai": 8, "nutanong": 8, "pretrain": [8, 9, 11, 12], "arxiv": 8, "preprint": 8, "2101": 8, "09635": 8, "jan": 8, "full": [8, 12], "thai2transform": [8, 15], "11006400": 8, "f89b594cbbebbc1940c16b0957a74182f2ea8169de8270e33f0c6bac5d1d4fcd": 8, "9a": 8, "9e": 8, "b2ab1db5c70b14b8d5d8a402e36ed915c2ec906df5c4f4b089": 8, "f9": 8, "5ca07ec9569d2f232f3166de5457b63943882f7950ddfcc887732fc7fb23": 8, "9mb": 8, "71": 8, "2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97": 8, "manylinux2010_x86_64": [8, 14], "filelock": [8, 10, 14, 15], "7d": 8, "09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10": 8, "883kb": 8, "890kb": 8, "pypars": [8, 10, 14], "893262": 8, "26dd1871c98e4cd5fe1938dbeba7086606c31e80a945ec9f752859e252fe7068": 8, "3c": 8, "fd": 8, "7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45": 8, "dataset": [8, 10, 15], "lst20": [8, 10], "dataset_nam": [8, 15], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e1c\u0e21\u0e21": 8, "\u0e19\u0e32\u0e22\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e17\u0e17": 8, "\u0e22\u0e44\u0e1e\u0e1a": 8, "\u0e25\u0e22": 8, "ask": 8, "truncat": 8, "max_length": 8, "predefin": 8, "person": [8, 12], "\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 10, 15], "\u0e22\u0e19\u0e2a\u0e27\u0e19\u0e01": [8, 15], "\u0e2b\u0e25\u0e32\u0e1a\u0e40\u0e1b": [8, 15], "\u0e19\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 15], "\u0e22\u0e19\u0e17": [8, 15], "\u0e2a\u0e27\u0e19\u0e01": [8, 15], "\u0e2b\u0e25\u0e32\u0e1a": [8, 15], "t2": [8, 15], "grouped_ent": [8, 15], "ttl": 8, "\u0e19\u0e32\u0e22": [8, 12], "\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e1c\u0e21\u0e21": 8, "\u0e1c\u0e21": [8, 10, 11, 13], "pr": 8, "nn": [8, 10], "\u0e27\u0e23\u0e23\u0e13": 8, "\u0e1e\u0e07\u0e29": 8, "\u0e44\u0e1e\u0e1a": 8, "grouped_word": 8, "\u0e14\u0e04\u0e33\u0e22": 8, "detail": [9, 12], "step": [9, 11], "taken": 9, "analyz": [9, 11], "evalu": 9, "metric": [9, 11, 12, 15], "overal": 9, "accuraci": [9, 11, 12], "across": [9, 12], "neg": [9, 15, 16], "ativ": 9, "itiv": 9, "neu": 9, "tral": 9, "uestion": 9, "class": [9, 11, 12], "fasttext": [9, 16], "semi": 9, "supervis": [9, 11], "public": [9, 10, 15], "privat": 9, "72781": 9, "7499": 9, "63144": 9, "6131": 9, "71259": 9, "74194": 9, "73119": 9, "75859": 9, "One": 9, "time": [9, 11, 12, 13], "73372": 9, "75968": 9, "kaggl": [9, 11, 16], "competit": 9, "upon": 9, "1st": 9, "place": 9, "solut": 9, "googl": [9, 11, 12, 14, 15, 16], "sklearn_crfsuit": [9, 11, 16], "emoji": [9, 10, 12, 15, 16], "fastai": [9, 11, 12, 16], "master": [9, 11, 16], "unzip": [9, 11], "mkdir": [9, 11, 13], "wisesight_data": 9, "snippet": 9, "font": [9, 16], "matplotlib": [9, 11, 12, 16], "gist": 9, "korakot": 9, "9d7f5db632351dc92607fdec72a4953f": 9, "phonbopit": 9, "sarabun": [9, 16], "webfont": 9, "thsarabunnew": 9, "ttf": [9, 16], "cp": 9, "mpl": 9, "share": [9, 12], "truetyp": 9, "font_manag": [9, 16], "_rebuild": 9, "rc": 9, "famili": [9, 12], "load_ext": 9, "autoreload": [9, 16], "np": [9, 11, 12, 13, 14, 15, 16], "panda": [9, 11, 12, 16], "pd": [9, 11, 12, 16], "tqdm_notebook": [9, 11, 12], "process_thai": [9, 11], "viz": [9, 11], "pyplot": [9, 11, 12, 16], "plt": [9, 11, 12, 16], "seaborn": [9, 11, 12, 15], "sn": [9, 11, 12, 15], "reload": 9, "reload_ext": [9, 16], "clean": [9, 11, 12], "rule": [9, 11], "aim": [9, 11], "spars": [9, 11], "bag": [9, 11], "pre_rul": [9, 11, 12], "post_rul": [9, 11, 12], "after": [9, 11], "\u0e32\u0e19\u0e19\u0e19\u0e19\u0e19": 9, "\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19": 9, "amp": [9, 12], "www": [9, 10, 14], "\u0e32\u0e19": [9, 11], "xxrep": [9, 11], "xxwrep": 9, "\u0e19\u0e32\u0e19": 9, "xxurl": 9, "open": [9, 10, 11, 12, 14, 16], "f": [9, 11, 12, 14, 15, 16], "strip": [9, 11, 12], "readlin": 9, "train_label": 9, "categori": 9, "all_df": [9, 11], "datafram": [9, 11, 16], "to_csv": [9, 11], "shape": [9, 11, 14, 16], "24063": 9, "test_df": [9, 11], "2674": 9, "map": 9, "lambda": 9, "x": [9, 12, 13, 14, 16], "wc": 9, "uwc": 9, "preval": 9, "value_count": [9, 11], "544612": 9, "255164": 9, "178698": 9, "021527": 9, "dtype": [9, 11, 14], "float64": [9, 11], "85": 9, "under": [9, 15], "oversampl": 9, "balanc": [9, 11], "out": [9, 12], "littl": 9, "hyperparamet": 9, "sklearn": [9, 11, 15, 16], "model_select": 9, "train_test_split": 9, "train_df": [9, 11], "valid_df": 9, "test_siz": 9, "random_st": [9, 11], "1412": [9, 11], "reset_index": [9, 11], "drop": [9, 11, 13], "actual": 9, "copi": [9, 11], "read_csv": [9, 11, 12], "head": [9, 11, 12, 13, 16], "\u0e19\u0e04\u0e19\u0e25\u0e1a\u0e41\u0e2d\u0e1e": 9, "viu": 9, "\u0e19\u0e43\u0e08\u0e41\u0e25\u0e30\u0e40\u0e02": 9, "\u0e32\u0e43\u0e08\u0e40\u0e02\u0e32\u0e19\u0e30\u0e04\u0e30": 9, "\u0e41\u0e1c\u0e25\u0e21": 9, "\u0e25\u0e1a": 9, "\u0e41\u0e2d": 9, "\u0e19\u0e43\u0e08": 9, "\u0e40\u0e02": [9, 11], "\u0e32\u0e43\u0e08": 9, "\u0e40\u0e02\u0e32": 9, "\u0e44\u0e1b\u0e0a\u0e21\u0e44\u0e21": 9, "\u0e27\u0e02\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e41\u0e25\u0e30\u0e23\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e19\u0e08": [9, 15], "\u0e0a\u0e21": 9, "\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e23\u0e2d\u0e07": 9, "\u0e21\u0e23\u0e16\u0e0b": 9, "\u0e04\u0e40\u0e1b": 9, "\u0e19\u0e01\u0e25": 9, "\u0e21\u0e17": [9, 12], "\u0e32\u0e23\u0e33\u0e04\u0e32\u0e19\u0e21\u0e32\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01": 9, "\u0e23\u0e33": 9, "\u0e04\u0e32\u0e19": 9, "\u0e21\u0e32\u0e01": [9, 11], "\u0e2d\u0e22\u0e32\u0e01\u0e2a\u0e27\u0e22\u0e40\u0e2b\u0e21": 9, "\u0e2d\u0e19\u0e40\u0e08": 9, "\u0e32\u0e02\u0e2d\u0e07\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e04\u0e30": 9, "\u0e40\u0e19\u0e22": 9, "\u0e01\u0e32": [9, 11], "\u0e43\u0e1a\u0e2b\u0e19": 9, "\u0e2d\u0e22\u0e32\u0e01": 9, "\u0e2a\u0e27\u0e22": 9, "\u0e40\u0e2b\u0e21": 9, "\u0e32\u0e02\u0e2d\u0e07": 9, "\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e32\u0e27\u0e42\u0e16\u0e25\u0e30\u0e23": 9, "\u0e41\u0e1e\u0e07": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30\u0e15": 9, "\u0e01\u0e40\u0e1b": 9, "\u0e19\u0e08\u0e32\u0e19\u0e46\u0e25\u0e3015": 9, "\u0e42\u0e16": 9, "\u0e25\u0e30": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30": 9, "\u0e08\u0e32\u0e19": 9, "381": 9, "218": 9, "544957": 9, "253557": 9, "180071": 9, "021415": 9, "542659": 9, "264266": 9, "170914": 9, "022161": 9, "variabl": [9, 14], "y_train": [9, 11], "y_valid": 9, "faetur": 9, "feature_extract": [9, 11], "tfidfvector": 9, "linear_model": 9, "logisticregress": 9, "tfidf": [9, 11], "ngram_rang": [9, 11], "min_df": [9, 11], "sublinear_tf": 9, "tfidf_fit": 9, "text_train": 9, "text_valid": 9, "text_test": 9, "20453": 9, "4614": 9, "3610": 9, "top_feats_al": 9, "plot_top_feat": 9, "get_feature_nam": 9, "toarrai": 9, "448": 9, "492": 9, "940": 9, "938": 9, "rank": [9, 16], "score": [9, 11, 15], "ngram": 9, "029990": 9, "022852": 9, "020252": 9, "\u0e40\u0e25\u0e22": [9, 11], "019493": 9, "018153": 9, "852": 9, "862": 9, "73": [9, 13, 15], "count": [9, 13], "uniqu": [9, 12], "might": [9, 12, 14], "so": [9, 12, 13], "standardscal": 9, "scaler": 9, "scaler_fit": 9, "float": [9, 14], "mean_": 9, "var_": 9, "num_train": 9, "num_valid": 9, "num_test": 9, "96529942": 9, "22744462": 9, "1151": 9, "47512883": 9, "513": 9, "46009207": 9, "74": 9, "concaten": [9, 14, 15], "x_train": [9, 11], "axi": [9, 14, 16], "x_valid": 9, "x_test": [9, 11], "4616": 9, "75": 9, "penalti": [9, 11], "l2": [9, 11], "solver": 9, "liblinear": 9, "dual": 9, "multi_class": [9, 11], "ovr": [9, 11], "7324099722991689": 9, "76": 9, "prob": [9, 11], "predict_proba": 9, "probs_df": 9, "column": [9, 11, 16], "classes_": 9, "pred": [9, 11], "predict": [9, 10, 11, 12, 14], "hit": 9, "probs_df_linear": 9, "77": 9, "confusion_matrix": 9, "conf_mat": 9, "heatmap": [9, 15], "annot": [9, 13, 15, 16], "xticklabel": [9, 15], "yticklabel": [9, 15], "ylabel": 9, "xlabel": 9, "callback": [9, 11, 12], "csvlogger": [9, 11, 12], "savemodelcallback": 9, "tt": [9, 11, 12], "tok_func": [9, 11, 12], "thaitoken": [9, 11, 12], "lang": [9, 11, 12], "pre_rules_th": [9, 11, 12], "post_rules_th": [9, 11, 12], "tokenizeprocessor": [9, 11, 12], "chunksiz": [9, 11, 12], "10000": [9, 11, 12], "mark_field": [9, 11, 12], "numericalizeprocessor": [9, 11, 12], "vocab": [9, 11, 12, 14, 16], "max_vocab": [9, 11, 12], "60000": [9, 11, 12], "data_lm": [9, 11, 12], "textlist": [9, 11, 12], "from_df": [9, 11, 12], "col": [9, 11, 12], "split_by_rand_pct": [9, 12], "valid_pct": [9, 11], "seed": [9, 11], "label_for_lm": [9, 11, 12], "databunch": [9, 11, 12], "sanity_check": [9, 11, 12], "wisesight_lm": 9, "pkl": [9, 11, 16], "train_d": [9, 11], "valid_d": [9, 11], "23823": 9, "240": [9, 10], "emb_sz": [9, 11, 12], "400": [9, 11, 12], "n_hid": [9, 11, 12], "1550": [9, 11, 12], "n_layer": [9, 11, 12], "pad_token": [9, 11, 12], "qrnn": [9, 11, 12], "tie_weight": [9, 11, 12], "out_bia": [9, 11, 12], "output_p": [9, 11, 12], "hidden_p": [9, 11, 12], "input_p": [9, 11, 12], "embed_p": [9, 11, 12], "weight_p": [9, 11, 12], "trn_arg": [9, 11, 12], "drop_mult": [9, 11, 12], "clip": [9, 11, 12], "alpha": [9, 11, 12], "beta": [9, 11, 12], "language_model_learn": [9, 11, 12], "awd_lstm": [9, 11, 12], "load_pretrain": [9, 11, 12], "_thwiki_lstm": [9, 11, 12], "languagelearn": [9, 12], "textlmdatabunch": [9, 12], "labellist": [9, 12], "item": [9, 12, 14], "lmtextlist": [9, 12], "xxbo": [9, 11, 12], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e40\u0e23\u0e32": [9, 11], "\u0e1c\u0e25": 9, "\u0e07\u0e2d\u0e2d\u0e01": 9, "\u0e22\u0e32\u0e2a": 9, "\u0e40\u0e22\u0e2d\u0e30": [9, 11], "\u0e42\u0e25\u0e01": 9, "\u0e2d\u0e2d\u0e21": 9, "\u0e40\u0e04": [9, 16], "\u0e41\u0e19\u0e19": 9, "\u0e2d\u0e30\u0e44\u0e23": [9, 11], "\u0e19\u0e30": 9, "lmlabellist": [9, 12], "path": [9, 12, 14], "\u0e19\u0e30\u0e04\u0e30": [9, 11, 16], "\u0e41\u0e1c\u0e25": 9, "\u0e41\u0e16\u0e21": 9, "\u0e2d\u0e32\u0e23\u0e21\u0e13": 9, "\u0e42\u0e14\u0e19": 9, "xxunk": [9, 11, 12], "\u0e40\u0e19\u0e2d\u0e30": 9, "\u0e27\u0e19": [9, 11], "\u0e17\u0e32\u0e07": [9, 11], "\u0e01\u0e2d\u0e14": 9, "netflix": 9, "\u0e41\u0e19": [9, 11], "\u0e17\u0e33\u0e23": 9, "\u0e19\u0e2d\u0e19": 9, "\u0e1a\u0e15\u0e01": 9, "\u0e01\u0e32\u0e23\u0e41\u0e02": 9, "\u0e41\u0e2a\u0e07\u0e42\u0e2a\u0e21": 9, "\u0e2a\u0e19": 9, "\u0e01\u0e40\u0e01\u0e2d\u0e23": 9, "\u0e41\u0e14\u0e07": [9, 11], "\u0e42\u0e2d\u0e40\u0e1e": 9, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e1b": 9, "2560": 9, "\u0e2a\u0e19\u0e32\u0e21": 9, "\u0e04\u0e25": 9, "\u0e0b\u0e2d\u0e22": 9, "\u0e42\u0e0a\u0e04": 9, "\u0e25\u0e32\u0e14\u0e1e\u0e23": 9, "\u0e2d\u0e27\u0e14": 9, "\u0e17\u0e33\u0e44\u0e21": 9, "\u0e01\u0e04\u0e19": 9, "\u0e1e\u0e27\u0e01": 9, "\u0e1a\u0e2d": 9, "\u0e01\u0e27": [9, 11], "\u0e19\u0e21": [9, 15], "\u0e40\u0e1a\u0e25\u0e2d": 9, "\u0e43\u0e2a": 9, "\u0e02\u0e19\u0e32\u0e14": 9, "\u0e13\u0e41\u0e21": 9, "\u0e19\u0e30\u0e40\u0e19": 9, "\u0e40\u0e1b\u0e25": 9, "\u0e40\u0e2d\u0e07": 9, "\u0e27\u0e22\u0e15": 9, "\u0e21\u0e32\u0e2a": 9, "\u0e01\u0e42\u0e0a": 9, "\u0e32\u0e21\u0e04": 9, "cho": 9, "cosmet": 9, "daradaili": 9, "\u0e14\u0e32\u0e23\u0e32": 9, "\u0e40\u0e14\u0e25": 9, "\u0e04\u0e19\u0e44\u0e17\u0e22": 9, "\u0e19\u0e02": 9, "\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e2b\u0e21": [9, 11, 16], "\u0e19\u0e25\u0e21": 9, "\u0e09\u0e30": 9, "\u0e42\u0e25": 9, "\u0e21\u0e30\u0e25": 9, "\u0e2d\u0e40\u0e1b\u0e25": 9, "250": 9, "\u0e02\u0e32\u0e22": [9, 11], "160": [9, 13], "\u0e40\u0e22\u0e2d\u0e30\u0e41\u0e22\u0e30": 9, "\u0e01\u0e33\u0e44\u0e23": 9, "\u0e04\u0e27\u0e23": 9, "\u0e32\u0e27\u0e1c": 9, "\u0e43\u0e2b\u0e0d": 9, "300": [9, 11, 16], "\u0e16\u0e32\u0e14": 9, "\u0e32\u0e19\u0e1a\u0e19": 9, "80": [9, 11, 13], "\u0e0a\u0e32\u0e40\u0e22": 9, "\u0e02\u0e27\u0e14": 9, "\u0e19\u0e41\u0e01": 9, "\u0e1e\u0e2d\u0e41\u0e25": 9, "\u0e40\u0e1a": [9, 11], "\u0e22\u0e23": 9, "120": 9, "\u0e32\u0e40\u0e01\u0e25": 9, "\u0e22\u0e14": 9, "\u0e21\u0e32": [9, 11], "360": [9, 11], "\u0e33\u0e41\u0e02": 9, "\u0e1e\u0e2d\u0e44\u0e14": 9, "\u0e2d\u0e32\u0e01\u0e32\u0e28": 9, "\u0e25\u0e30\u0e25\u0e32\u0e22": 9, "\u0e1e\u0e2d": 9, "\u0e17\u0e30\u0e40\u0e25": 9, "\u0e40\u0e1c\u0e32": 9, "\u0e25\u0e27\u0e01": 9, "\u0e32\u0e15\u0e32": 9, "\u0e01\u0e25\u0e32\u0e07\u0e46": [9, 11], "\u0e15\u0e33": 9, "\u0e41\u0e1b": 9, "\u0e21\u0e22\u0e33": 9, "\u0e2b\u0e23\u0e2d\u0e01": 9, "\u0e15\u0e23\u0e07": 9, "\u0e44\u0e1f": 9, "\u0e19\u0e43\u0e19": 9, "\u0e41\u0e17\u0e1a": 9, "\u0e41\u0e15\u0e30": 9, "\u0e19\u0e2d\u0e01": 9, "\u0e23\u0e30\u0e1a\u0e1a": 9, "\u0e41\u0e22": 9, "\u0e40\u0e2d\u0e32\u0e40\u0e1b\u0e23": 9, "\u0e22\u0e1a": 9, "\u0e19\u0e40\u0e2d\u0e07": [9, 12], "\u0e2d\u0e32\u0e2b\u0e32\u0e23": [9, 11], "\u0e1a\u0e02": 9, "\u0e15\u0e32\u0e21": 9, "\u0e41\u0e04": [9, 11, 15], "\u0e40\u0e08\u0e2d": [9, 11], "\u0e41\u0e1a\u0e1a\u0e19": 9, "\u0e2a\u0e07\u0e2a": 9, "\u0e2d\u0e04": 9, "\u0e15\u0e32\u0e22": 9, "\u0e04\u0e32": 9, "\u0e43\u0e04\u0e23": [9, 11], "\u0e21\u0e2d\u0e07": 9, "\u0e32\u0e41\u0e23\u0e07": 9, "\u0e27\u0e19\u0e21\u0e32\u0e01": 9, "\u0e04\u0e19\u0e43\u0e19": 9, "\u0e32\u0e41\u0e23\u0e07\u0e02": 9, "\u0e02\u0e22": 9, "\u0e40\u0e25": [9, 10, 11, 16], "\u0e01\u0e19": [9, 11], "\u0e04\u0e19\u0e08\u0e19": 9, "\u0e04\u0e19\u0e23\u0e27\u0e22": 9, "\u0e01\u0e16": [9, 11], "\u0e2a\u0e21\u0e04\u0e27\u0e23": 9, "\u0e19\u0e41\u0e25\u0e30\u0e01": 9, "\u0e0a\u0e2d\u0e1a": [9, 10, 11], "\u0e19\u0e08\u0e23": 9, "\u0e0a\u0e32\u0e27\u0e15": 9, "\u0e32\u0e07\u0e0a\u0e32\u0e15": 9, "\u0e40\u0e16\u0e2d\u0e30": 9, "\u0e42\u0e2d\u0e01\u0e32\u0e2a": 9, "sequentialrnn": [9, 12], "encod": [9, 11, 12, 14, 15], "15000": 9, "padding_idx": [9, 12], "encoder_dp": [9, 12], "embeddingdropout": [9, 12], "emb": [9, 12], "rnn": [9, 10, 12], "modulelist": [9, 12, 14], "weightdropout": [9, 12], "lstm": [9, 12], "batch_first": [9, 12], "input_dp": [9, 12], "rnndropout": [9, 12], "hidden_dp": [9, 12], "lineardecod": [9, 12], "decod": [9, 12, 15], "linear": [9, 12, 14], "in_featur": [9, 12, 14], "out_featur": [9, 12, 14], "bia": [9, 10, 12, 14, 15], "output_dp": [9, 12], "opt_func": [9, 11, 12], "functool": [9, 12, 15], "partial": [9, 11, 12, 15], "optim": [9, 11, 12, 14], "adam": [9, 11, 12], "loss_func": [9, 12], "flattenedloss": [9, 12], "crossentropyloss": [9, 12], "0x7f51be568268": 9, "true_wd": [9, 12], "bn_wd": [9, 12], "wd": [9, 12], "train_bn": [9, 12], "posixpath": [9, 12], "model_dir": [9, 12], "callback_fn": [9, 12], "basic_train": [9, 12], "record": [9, 11, 12], "add_tim": [9, 12], "silent": [9, 12], "gradientclip": [9, 12], "rnntrainer": [9, 12], "layer_group": [9, 12], "sequenti": [9, 12], "cb_fns_regist": 9, "frozen": [9, 11], "freeze_to": [9, 11], "fit_one_cycl": [9, 11], "mom": [9, 11], "epoch": [9, 11], "train_loss": [9, 11], "valid_loss": [9, 11], "841187": 9, "462714": 9, "319742": 9, "unfrozen": [9, 11], "unfreez": [9, 11], "411834": 9, "205552": 9, "341766": 9, "03": 9, "178030": 9, "037095": 9, "361508": 9, "970388": 9, "930919": 9, "370139": 9, "756190": 9, "890398": 9, "376191": 9, "671704": 9, "890232": 9, "375595": 9, "save_encod": [9, 11], "wisesight_enc": 9, "lm": 9, "load_data": [9, 11], "data_cl": [9, 11], "itemlist": 9, "label_from_df": [9, 11], "ito": [9, 11, 12], "bptt": [9, 11], "500": [9, 16], "text_classifier_learn": [9, 11], "load_encod": [9, 11], "rnnlearner": 9, "textclasdatabunch": 9, "\u0e19\u0e41\u0e14\u0e14": 9, "\u0e40\u0e1e\u0e25\u0e2a": 9, "\u0e27\u0e43\u0e2b\u0e21": 9, "\u0e08\u0e23": [9, 11, 16], "\u0e42\u0e0b\u0e19": 9, "\u0e40\u0e27": 9, "\u0e2b\u0e25\u0e2d\u0e14": 9, "\u0e22\u0e32\u0e27": 9, "\u0e1d\u0e32": 9, "\u0e40\u0e2d\u0e32": [9, 11], "\u0e1e\u0e1a": 9, "\u0e25\u0e1b": 9, "soul": [9, 12], "pop": 9, "\u0e2a\u0e32\u0e21": 9, "\u0e2a\u0e44\u0e15\u0e25": 9, "\u0e07\u0e32\u0e19": [9, 11], "jamnight": 9, "\u0e19\u0e33": 9, "parkinson": 9, "xxup": 9, "toi": 9, "\u0e19\u0e2d\u0e01\u0e08\u0e32\u0e01": 9, "\u0e42\u0e0a\u0e27": 9, "\u0e41\u0e1a\u0e1a": 9, "\u0e1b\u0e41\u0e1a\u0e1a": 9, "\u0e27\u0e07": 9, "\u0e41\u0e08\u0e21": 9, "\u0e1e\u0e25\u0e32\u0e14": 9, "\u0e40\u0e08\u0e2d\u0e01": 9, "\u0e19\u0e22\u0e32\u0e22\u0e19": 9, "\u0e1b\u0e23\u0e30\u0e15": 9, "\u0e2a\u0e32\u0e21\u0e32\u0e23\u0e16": 9, "\u0e15\u0e23": [9, 15, 16], "event": 9, "go": [9, 12], "eventpop": 9, "me": [9, 12, 16], "\u0e08\u0e33\u0e01": 9, "\u0e2d\u0e32\u0e22": 9, "jamnightbyjameson": 9, "jamesonthailand": 9, "soulaftersix": 9, "theparkinson": 9, "thetoi": 9, "\u0e21\u0e30": 9, "\u0e1a\u0e2d\u0e01\u0e15": 9, "\u0e41\u0e1e": [9, 11], "\u0e40\u0e22": 9, "\u0e1e\u0e2d\u0e19": 9, "\u0e41\u0e15\u0e07\u0e42\u0e21": 9, "\u0e25\u0e14": 9, "\u0e2a\u0e07\u0e01\u0e23\u0e32\u0e19\u0e15": 9, "\u0e23\u0e2d\u0e14": 9, "555": 9, "categorylist": 9, "multibatchencod": 9, "poolinglinearclassifi": 9, "layer": [9, 14, 15], "batchnorm1d": 9, "1200": 9, "ep": [9, 14], "momentum": 9, "affin": 9, "track_running_stat": 9, "dropout": [9, 14], "27999999999999997": 9, "relu": 9, "inplac": [9, 14], "2e": [9, 11], "slice": [9, 11], "5e": [9, 11], "improv": 9, "monitor": 9, "bestmodel": 9, "script": [9, 12, 15], "train_model": 9, "812156": 9, "753478": 9, "687532": 9, "740403": 9, "699093": 9, "714394": 9, "727394": 9, "668807": 9, "723011": 9, "722163": 9, "675351": 9, "723517": 9, "675266": 9, "654477": 9, "738723": 9, "669178": 9, "641070": 9, "737962": 9, "612528": 9, "637456": 9, "744551": 9, "618259": 9, "635149": 9, "749366": 9, "572621": 9, "651169": 9, "749873": 9, "561985": 9, "661739": 9, "747593": 9, "534753": 9, "673563": 9, "738469": 9, "530844": 9, "688871": 9, "746072": 9, "522788": 9, "670024": 9, "743031": 9, "y_true": 9, "loss": [9, 11], "get_pr": [9, 11], "ds_type": 9, "datasettyp": [9, 11], "with_loss": 9, "argmax": [9, 11, 14], "to_df": 9, "8392661555312158": 9, "u": [10, 12, 14, 15], "look": [10, 12, 14, 15], "pypi": [10, 15], "pkg": [10, 15], "attempt": [10, 14], "dependency_pars": 10, "esupar": 10, "chu": 10, "liu": 10, "edmond": 10, "chu_liu_edmond": 10, "cp38": 10, "107": 10, "supar": 10, "93": 10, "2022": [10, 15, 16], "304": 10, "dill": [10, 16], "cu116": 10, "stanza": 10, "691": 10, "huggingfac": [10, 14, 15], "hub": [10, 14, 15], "huggingface_hub": [10, 14, 15], "182": 10, "jinja2": 10, "smart": 10, "pathi": 10, "langcod": 10, "pydant": 10, "logger": 10, "legaci": 10, "typer": 10, "protobuf": [10, 14], "confect": 10, "markupsaf": 10, "5626945": 10, "6613dcb188f57561a00a2e40eca1bbafe6203936b8d9c387facd79de3f06fa62": 10, "6f": 10, "3475485c7d991ca5698d39603e22a99bd6904dcac7d0a5855a": 10, "234926": 10, "e3b7a3e928e5e81053b9f869cfef5382b49f133284c6abbd718496ff11e8ee67": 10, "a1": 10, "b0bb1f7683d20b75b34ceeb56ee83a585e9b065a5fef0b2cb1": 10, "warn": [10, 14, 15], "broken": 10, "permiss": 10, "conflict": 10, "behaviour": 10, "manag": 10, "recommend": [10, 15], "virtual": 10, "environ": 10, "pypa": 10, "io": [10, 13, 14], "venv": 10, "spacy_pythainlp": 10, "dev6": 10, "nptype": 10, "473": 10, "docopt": 10, "fire": 10, "termcolor": 10, "13723": 10, "cd282751c98736c79933ed4265624e65891888bb9fdd01dc5d6fcf978d76431f": 10, "cc": 10, "f1e272f628fdb013d969acc99cfe2e031ea15b3efb74ffe842": 10, "116949": 10, "bc82a0082e9931af28c40d49e4494ce66a1f80f929b30ae4e7e1eff347b37c5c": 10, "86": 10, "88e8603bd3b1a9bff9d02d820c7431c47ad032865632657bb9": 10, "cuda": [10, 11], "__init__": 10, "497": 10, "userwarn": [10, 14, 15], "initi": [10, 11, 14, 15], "nvml": 10, "pos_engin": 10, "pos_corpu": 10, "orchid_ud": 10, "sent_engin": 10, "ner_engin": 10, "tokenize_engin": 10, "dependency_parsing_engin": 10, "dependency_parsing_model": 10, "bool": 10, "chang": [10, 12], "turn": [10, 12], "off": [10, 12], "0x7f9c02410a90": 10, "\u0e1c\u0e21\u0e40\u0e1b": 10, "\u0e19\u0e41\u0e21\u0e27": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a\u0e44\u0e1b\u0e40\u0e25": 10, "\u0e22\u0e19\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e21\u0e22": 10, "free": [10, 16], "commerci": 10, "pleas": 10, "contract": 10, "nectec": 10, "facebook": [10, 15, 16], "dancearmi": 10, "post": [10, 13, 14], "10157641945708284": 10, "pos_lst20_perceptron": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a": 10, "\u0e42\u0e23\u0e07": 10, "\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e44\u0e1b\u0e40\u0e25": 10, "0x7f9c0146e880": 10, "weight": [10, 14, 15], "checkpoint": [10, 15], "koichiyasuoka": 10, "roberta": [10, 15], "spm": [10, 15], "upo": 10, "robertamodel": [10, 15], "classifi": [10, 11], "expect": [10, 15, 16], "anoth": [10, 15], "architectur": [10, 15], "bertforsequenceclassif": [10, 15], "bertforpretrain": [10, 15], "NOT": [10, 15], "exactli": [10, 15], "ident": [10, 15], "newli": [10, 15], "pooler": [10, 15], "dens": [10, 15], "should": [10, 12, 15], "probabl": [10, 13, 15, 16], "down": [10, 12, 15], "stream": [10, 15, 16], "abl": [10, 15], "infer": [10, 15], "info": 10, "n_sentenc": 10, "n_batch": 10, "n_bucket": 10, "make": [10, 11, 12, 15], "apply_permut": 10, "tensor": [10, 11], "index_select": 10, "dim": [10, 11, 15], "permut": 10, "204603": 10, "elaps": 10, "dep": 10, "pron": 10, "sconj": 10, "nsubj": 10, "cop": 10, "acl": 10, "xcomp": 10, "obl": 10, "flat": 10, "star": [11, 12], "multi": 11, "both": [11, 12, 15], "number": 11, "micro": 11, "averag": 11, "f1": 11, "challeng": [11, 12], "micro_f1_publ": 11, "micro_f1_priv": 11, "59313": 11, "60322": 11, "5145": 11, "5109": 11, "5022": 11, "4976": 11, "59139": 11, "58139": 11, "bert": [11, 15], "56612": 11, "57057": 11, "review_dataset": 11, "wongnai_data": 11, "ast": [11, 12], "literal_ev": [11, 12], "counter": [11, 12], "re": [11, 12, 14, 16], "ft_data": 11, "respect": 11, "w_review_train": 11, "csv": [11, 12], "sep": [11, 12], "header": 11, "drop_dupl": 11, "rate": 11, "test_fil": 11, "concat": 11, "469282": 11, "304328": 11, "169880": 11, "046133": 11, "010377": 11, "two_df": 11, "one_df": 11, "train_bal": 11, "392365": 11, "254448": 11, "142036": 11, "115715": 11, "095436": 11, "dump": [11, 12, 15, 16], "skipgram": 11, "df_txt": 11, "df": 11, "ft_line": 11, "iterrow": 11, "ft_lab": 11, "__label__": 11, "ft_text": 11, "replace_newlin": 11, "close": [11, 15], "__label__0": 11, "df_all": 11, "home": 11, "charin": 11, "pretrainedvector": 11, "vec": 11, "1m": 11, "18176": 11, "progress": [11, 13], "sec": 11, "thread": 11, "24858": 11, "lr": 11, "000000": [11, 13], "309402": 11, "0h0m": 11, "wongnai_b": 11, "wordngram": 11, "731006": 11, "391282": 11, "764689": 11, "81": 11, "bin": [11, 16], "pred_lab": 11, "split": [11, 14, 16], "submit_df": 11, "reviewid": 11, "submit_fastttext_b": 11, "lukkiddd": 11, "train_split": 11, "test_split": 11, "pipelin": [11, 15], "countvector": 11, "tfidftransform": 11, "svm": 11, "text_clf": 11, "vect": 11, "clf": 11, "fit": 11, "memori": [11, 12], "binari": [11, 16], "decode_error": 11, "strict": 11, "int64": 11, "utf": [11, 14], "lowercas": 11, "max_df": 11, "max_featur": 11, "preprocessor": 11, "stop_word": 11, "ax_it": 11, "tol": 11, "0001": 11, "verbos": 11, "onehotencod": 11, "enc": 11, "handle_unknown": 11, "submit_linearsvc": 11, "59590": 11, "59731": 11, "processor": [11, 12, 14], "random_split_by_pct": 11, "wongnai_lm": 11, "45735": 11, "461": 11, "show_batch": 11, "idx": 11, "\u0e14\u0e32\u0e27": 11, "\u0e2b\u0e21\u0e14": 11, "\u0e0b\u0e30": 11, "\u0e32\u0e27\u0e2a\u0e27\u0e22": 11, "\u0e21\u0e32\u0e13": 11, "\u0e1e\u0e2d\u0e14": 11, "\u0e18\u0e22\u0e32\u0e28": 11, "\u0e1a\u0e23\u0e2d\u0e07": 11, "\u0e1a\u0e21\u0e32": 11, "\u0e2d\u0e22\u0e46": 11, "\u0e41\u0e16\u0e27": 11, "\u0e25\u0e2d\u0e07": 11, "\u0e41\u0e27\u0e30": 11, "\u0e2a\u0e33\u0e2b\u0e23": 11, "\u0e23\u0e2a": 11, "\u0e2d\u0e07\u0e14": 11, "\u0e21\u0e32\u0e01\u0e21\u0e32\u0e22": 11, "\u0e04\u0e07": 11, "\u0e42\u0e01\u0e42\u0e01": 11, "top": [11, 12], "\u0e22\u0e14\u0e32\u0e22": 11, "\u0e2b\u0e32": 11, "\u0e15\u0e2d\u0e19": 11, "\u0e27\u0e22\u0e40\u0e15": 11, "\u0e40\u0e19": 11, "\u0e17\u0e32\u0e19": 11, "\u0e2d\u0e19\u0e02": 11, "\u0e22\u0e32\u0e01": 11, "\u0e27\u0e32": 11, "\u0e2a\u0e32\u0e02\u0e32": 11, "\u0e12\u0e19\u0e32\u0e01\u0e32\u0e23": 11, "\u0e1d\u0e32\u0e01": 11, "\u0e2d\u0e01": [11, 15], "\u0e2b\u0e25\u0e32\u0e22\u0e2d\u0e22": 11, "\u0e1a\u0e23": 11, "\u0e01\u0e30": 11, "\u0e01\u0e2a\u0e32\u0e27": 11, "\u0e32\u0e02\u0e2d\u0e07\u0e23": 11, "\u0e08\u0e32": 11, "\u0e04\u0e27\u0e32\u0e21\u0e04": 11, "\u0e14\u0e40\u0e2b": 11, "\u0e27\u0e19\u0e15": 11, "\u0e2d\u0e2d\u0e01": 11, "\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07": 11, "\u0e1a\u0e27\u0e01": 11, "\u0e27\u0e19\u0e43\u0e2b\u0e0d": 11, "\u0e1a\u0e23\u0e23\u0e22\u0e32\u0e01\u0e32\u0e28": 11, "\u0e23\u0e16\u0e40\u0e02": 11, "\u0e42\u0e15": 11, "\u0e15\u0e01\u0e41\u0e15": 11, "\u0e19\u0e41\u0e19\u0e27": 11, "\u0e1a\u0e32\u0e23": 11, "\u0e42\u0e14\u0e22\u0e23\u0e2d\u0e1a": 11, "\u0e19\u0e23\u0e32": 11, "\u0e40\u0e21\u0e19": [11, 15], "next": [11, 12], "train_dl": 11, "414": 11, "3408": 11, "135": 11, "409": 11, "1325": 11, "1185": 11, "9903": 11, "368": 11, "870": 11, "254": 11, "3448": 11, "429": 11, "devic": 11, "193": 11, "10074": 11, "258": 11, "456": 11, "270": 11, "\u0e1a\u0e1e": 11, "\u0e2d\u0e07\u0e40\u0e2a": 11, "temperatur": [11, 12], "\u0e44\u0e2b\u0e21": 11, "mr": [11, 12, 13], "\u0e04\u0e0a": 11, "\u0e09\u0e32\u0e22": 11, "2557": 11, "\u0e01\u0e33\u0e01": [11, 16], "\u0e1b\u0e1b": 11, "\u0e20\u0e32\u0e04": 11, "\u0e42\u0e23\u0e07\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 11, "2558": 11, "\u0e2d\u0e2b\u0e32": 11, "\u0e22\u0e27\u0e01": [11, 15], "lr_find": 11, "plot": [11, 12, 16], "finder": 11, "complet": 11, "learner_nam": 11, "graph": [11, 14], "min": 11, "gradient": [11, 15], "58e": 11, "04": [11, 14, 16], "22562": 11, "659182": 11, "493942": 11, "342857": 11, "375606": 11, "252919": 11, "385714": 11, "165419": 11, "013862": 11, "371429": 11, "034220": 11, "802707": 11, "357143": 11, "879111": 11, "712463": 11, "823682": 11, "624331": 11, "784611": 11, "580608": 11, "753532": 11, "553170": 11, "719396": 11, "516521": 11, "699165": 11, "513339": 11, "696516": 11, "512542": 11, "wongnai_enc": 11, "\u0e32\u0e19\u0e19": 11, "\u0e08\u0e30\u0e2d\u0e22": 11, "\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07": 11, "\u0e2d\u0e2d\u0e19": 11, "\u0e40\u0e25\u0e22\u0e41\u0e22\u0e01\u0e1a": 11, "\u0e07\u0e44\u0e1b2": [11, 15], "\u0e0a\u0e09\u0e30\u0e25\u0e32\u0e40\u0e15": [11, 15], "\u0e44\u0e2d\u0e28\u0e04\u0e23": [11, 15], "\u0e21\u0e0a\u0e32\u0e40\u0e02": [11, 15], "\u0e27\u0e27\u0e07\u0e40\u0e14": 11, "n\u0e2b": 11, "\u0e27\u0e14": [11, 15], "\u0e01\u0e46": 11, "\u0e15\u0e23\u0e30\u0e40\u0e27\u0e19\u0e2b\u0e32\u0e23": 11, "\u0e32\u0e19\u0e17\u0e32\u0e19": 11, "\u0e21\u0e32\u0e40\u0e08\u0e2d": 11, "\u0e08\u0e30\u0e27": 11, "\u0e19\u0e40\u0e08": 11, "\u0e32\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e01": 11, "\u0e04\u0e07\u0e44\u0e21": 11, "\u0e32\u0e04": 11, "\u0e14\u0e16": 11, "\u0e07\u0e2a\u0e25": 11, "\u0e14\u0e1c\u0e21\u0e04": 11, "\u0e32\u0e19\u0e41\u0e23\u0e01\u0e46\u0e40\u0e25\u0e22\u0e04\u0e23": 11, "add_test": 11, "wongnai_cl": 11, "sure": [11, 15], "got": [11, 12], "target": 11, "\u0e1e\u0e32": 11, "\u0e2d\u0e32\u0e40\u0e0b": 11, "\u0e23\u0e23": 11, "\u0e32\u0e1e\u0e23\u0e30\u0e22\u0e32\u0e1b\u0e32\u0e23": 11, "\u0e0a\u0e14\u0e32\u0e20": 11, "\u0e40\u0e29\u0e01": 11, "\u0e19\u0e01\u0e32\u0e23": 11, "\u0e0a\u0e27\u0e19": 11, "\u0e32\u0e40\u0e14": 11, "\u0e19\u0e40\u0e04\u0e22": 11, "\u0e07\u0e46": 11, "\u0e23\u0e16\u0e15": 11, "\u0e1e\u0e24\u0e28\u0e08": 11, "\u0e01\u0e32\u0e22\u0e19": 11, "\u0e32\u0e19\u0e21\u0e32": 11, "\u0e27\u0e07\u0e43\u0e19": 11, "\u0e14\u0e01": 11, "\u0e08\u0e01\u0e23\u0e23\u0e21": 11, "xxmaj": 11, "relax": 11, "night": [11, 12], "phothalai": 11, "\u0e21\u0e15": 11, "tast": 11, "\u0e2d\u0e07\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e2d\u0e19\u0e23": 11, "group": [11, 14, 16], "\u0e0d\u0e2b\u0e32": 11, "\u0e27\u0e16": 11, "\u0e01\u0e32\u0e23\u0e2a": 11, "\u0e2d\u0e2a\u0e32\u0e23": 11, "\u0e1e\u0e19": 11, "\u0e01\u0e07\u0e32\u0e19": 11, "\u0e21\u0e32\u0e16": 11, "terrac": 11, "\u0e2d\u0e07\u0e08\u0e32\u0e01": 11, "\u0e08\u0e19": 11, "\u0e17\u0e19": 11, "\u0e01\u0e23\u0e30\u0e41\u0e2a": 11, "\u0e04\u0e27\u0e32\u0e21\u0e41\u0e23\u0e07": 11, "shibuya": 11, "shabu": 11, "\u0e44\u0e2b\u0e27": 11, "\u0e02\u0e2d": 11, "\u0e15\u0e32\u0e21\u0e23\u0e2d\u0e22": 11, "\u0e2d\u0e07\u0e2b\u0e32": 11, "\u0e42\u0e2d": 11, "\u0e2a\u0e21\u0e32\u0e17\u0e32\u0e19": 11, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e2a": 11, "\u0e0a\u0e32": 11, "\u0e40\u0e1e": [11, 12], "\u0e0a\u0e32\u0e27": 11, "\u0e01\u0e04\u0e23": 11, "pednoii": 11, "ahha": 11, "\u0e32\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e41\u0e23\u0e01": 11, "\u0e19\u0e33\u0e40\u0e2a\u0e19\u0e2d": 11, "\u0e19\u0e32\u0e07\u0e43\u0e19": 11, "31e": 11, "07": 11, "gradual": 11, "187845": 11, "158394": 11, "472803": 11, "889035": 11, "828990": 11, "629707": 11, "760357": 11, "751162": 11, "656904": 11, "628719": 11, "721673": 11, "669456": 11, "submit_ulmfit": 11, "ulmfit": 12, "thwiki_lstm": 12, "dummi": 12, "imdb": 12, "untar_data": 12, "url": 12, "imdb_sampl": 12, "dummy_df": 12, "thwiki_ito": 12, "pickl": [12, 16], "itos_fnam": 12, "rb": [12, 16], "thwiki_vocab": 12, "check": 12, "60005": 12, "800": 12, "film": 12, "act": 12, "music": 12, "good": 12, "too": [12, 15], "though": 12, "mostli": 12, "earli": 12, "thing": 12, "still": 12, "realli": 12, "superstar": 12, "cast": 12, "face": [12, 14], "entir": 12, "excel": 12, "job": 12, "hard": 12, "watch": 12, "becaus": [12, 16], "situat": 12, "present": [12, 13], "british": 12, "against": 12, "each": [12, 15, 16], "merit": 12, "view": 12, "forc": 12, "region": 12, "thei": [12, 15], "did": 12, "around": 12, "partit": 12, "simpli": [12, 15], "saw": 12, "between": [12, 13, 15], "enough": 12, "veri": 12, "rememb": 12, "screen": 12, "never": 12, "paint": 12, "side": 12, "hope": 12, "younger": 12, "redempt": 12, "man": [12, 13], "who": 12, "her": 12, "life": 12, "truli": 12, "love": 12, "later": 12, "she": 12, "great": 12, "pain": 12, "carri": 12, "messag": 12, "grave": 12, "peopl": 12, "realiti": 12, "sinc": [12, 16], "india": 12, "pakistan": 12, "border": 12, "sens": 12, "glad": 12, "seen": 12, "even": 12, "uk": 12, "could": [12, 16], "would": [12, 16], "better": 12, "onc": 12, "long": 12, "while": [12, 16], "movi": 12, "along": 12, "feel": 12, "labor": 12, "my": 12, "joi": 12, "where": [12, 13, 14], "five": 12, "stereotyp": 12, "had": 12, "gui": 12, "fat": 12, "foreign": 12, "etc": 12, "being": [12, 14], "written": 12, "shot": 12, "product": 12, "low": 12, "junior": 12, "high": [12, 15], "video": 12, "director": 12, "produc": [12, 14], "ever": 12, "wors": 12, "entri": 12, "concept": 12, "funni": 12, "gari": 12, "coleman": 12, "actor": 12, "trust": 12, "sai": [12, 15], "went": 12, "dad": 12, "came": 12, "korea": 12, "he": 12, "short": [12, 15], "period": 12, "made": 12, "epic": 12, "imagin": 12, "cost": 12, "cheap": 12, "theme": 12, "duti": 12, "lip": 12, "offic": 12, "deep": [12, 13], "declar": 12, "hi": 12, "peck": 12, "liber": 12, "understand": 12, "fearless": 12, "human": 12, "ve": 12, "fact": 12, "tail": 12, "mess": 12, "almost": 12, "walk": 12, "paid": 12, "ll": 12, "sit": 12, "bit": 12, "lose": 12, "its": 12, "someth": [12, 15], "ed": 12, "wood": 12, "dialogu": 12, "heard": 12, "viewer": 12, "cannot": [12, 14], "meet": 12, "oper": 12, "soon": 12, "stephen": 12, "best": 12, "ultim": 12, "tara": 12, "reid": 12, "plai": 12, "role": 12, "oh": 12, "help": 12, "talent": 12, "actress": 12, "stick": 12, "american": 12, "pie": 12, "know": 12, "kick": 12, "clich": 12, "\u00e9": 12, "typic": 12, "member": 12, "william": 12, "benton": 12, "believ": 12, "bias": 12, "toward": 12, "thief": 12, "born": 12, "bad": 12, "neither": 12, "slate": 12, "societi": 12, "parent": 12, "educ": 12, "what": [12, 15], "somewher": 12, "isn": [12, 16], "back": 12, "track": 12, "bet": 12, "wast": 12, "piec": 12, "valid": 12, "late": 12, "penn": 12, "teller": 12, "joe": 12, "bob": 12, "fridai": [12, 15], "school": 12, "year": 12, "doubt": 12, "televis": 12, "didn": 12, "stai": 12, "miss": 12, "john": 12, "bloom": 12, "live": 12, "belong": [12, 15], "question": [12, 15], "anyon": 12, "hour": 12, "moral": 12, "disast": 12, "david": 12, "care": 12, "purpos": [12, 13], "singl": 12, "qualiti": 12, "treat": 12, "afternoon": 12, "budget": 12, "project": [12, 14], "stori": 12, "eva": 12, "tv": 12, "ideal": 12, "mani": [12, 13], "cours": 12, "special": 12, "effect": 12, "gun": 12, "scene": 12, "move": 12, "although": 12, "problem": 12, "rent": 12, "student": 12, "ye": 12, "nake": 12, "emperor": 12, "speak": 12, "big": 12, "someon": 12, "state": [12, 15], "truth": 12, "old": 12, "bodi": [12, 13], "nude": 12, "artist": 12, "front": 12, "audienc": 12, "ev": 12, "poor": 12, "wanna": 12, "ladi": 12, "sensit": 12, "becam": 12, "petti": 12, "satisfact": 12, "alarm": 12, "signal": [12, 14], "degre": 12, "work": [12, 13, 15], "art": [12, 15], "cross": 12, "mix": 12, "ordinari": 12, "rural": 12, "pacif": 12, "northwest": 12, "solid": 12, "fine": 12, "dan": 12, "same": [12, 15], "highli": 12, "crash": 12, "paul": 12, "pace": 12, "action": 12, "urban": 12, "lo": 12, "angel": 12, "apart": 12, "relationship": [12, 13, 16], "jim": 12, "0x7f5215ef6ea0": 12, "\u0e01\u0e32\u0e25\u0e04\u0e23": 12, "\u0e07\u0e19\u0e32\u0e19\u0e21\u0e32\u0e41\u0e25": 12, "min_p": 12, "005": 12, "\u0e27\u0e07\u0e2a\u0e2d\u0e07\u0e2b\u0e19": 12, "\u0e10\u0e32\u0e19\u0e30\u0e23": 12, "\u0e33\u0e23\u0e27\u0e22": 12, "\u0e41\u0e25\u0e30\u0e40\u0e1b": 12, "\u0e19\u0e25": 12, "\u0e01\u0e2a\u0e32\u0e27\u0e02\u0e2d\u0e07": 12, "\u0e14\u0e23": 12, "\u0e42\u0e04\u0e25": 12, "\u0e1a\u0e1a\u0e17\u0e42\u0e14\u0e22": 12, "\u0e2d\u0e25": 12, "\u0e01\u0e0a\u0e32\u0e22\u0e04\u0e19\u0e42\u0e15\u0e02\u0e2d\u0e07": 12, "\u0e42\u0e2d\u0e25": 12, "\u0e40\u0e27\u0e2d\u0e23": [12, 15], "\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "\u0e27\u0e07\u0e41\u0e23\u0e01": 12, "\u0e40\u0e02\u0e32\u0e40\u0e1b": 12, "\u0e42\u0e2d\u0e25\u0e25": 12, "\u0e40\u0e02\u0e32\u0e21": 12, "\u0e41\u0e25\u0e30\u0e41\u0e21": 12, "\u0e19\u0e04\u0e19\u0e17": 12, "\u0e15\u0e43\u0e08\u0e2d": 12, "\u0e2d\u0e19\u0e42\u0e22\u0e19": 12, "\u0e19\u0e40\u0e1e": 12, "\u0e2d\u0e19\u0e2a\u0e19": 12, "\u0e17\u0e01": 12, "\u0e04\u0e32\u0e25": 12, "\u0e42\u0e23\u0e2a": 12, "\u0e25\u0e2a": 12, "\u0e2d\u0e02\u0e2d\u0e07\u0e40\u0e18\u0e2d\u0e19": 12, "\u0e43\u0e19\u0e1b": 12, "1967": 12, "\u0e18\u0e44\u0e14": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e40\u0e25": 12, "\u0e22\u0e07\u0e14": 12, "\u0e08\u0e2d\u0e23": 12, "\u0e2a\u0e1b": 12, "\u0e25\u0e40\u0e1a": 12, "\u0e0b\u0e32\u0e23": 12, "\u0e2d\u0e21\u0e32\u0e01": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e14": 12, "\u0e41\u0e25\u0e08\u0e32\u0e01\u0e41\u0e21": 12, "\u0e07\u0e17\u0e33\u0e43\u0e2b": [12, 15], "\u0e01\u0e29\u0e30\u0e14": 12, "\u0e32\u0e19\u0e27": 12, "\u0e41\u0e25\u0e30\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 12, "\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e33\u0e40\u0e23": 12, "\u0e08\u0e01\u0e32\u0e23\u0e28": 12, "\u0e01\u0e29\u0e32\u0e08\u0e32\u0e01\u0e21\u0e2b\u0e32\u0e27": 12, "\u0e17\u0e22\u0e32\u0e25": 12, "\u0e22\u0e41\u0e25": 12, "\u0e19\u0e17\u0e32\u0e07\u0e44\u0e1b\u0e17": 12, "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e2d\u0e2d\u0e2a\u0e40\u0e15\u0e23\u0e40\u0e25": 12, "\u0e01\u0e29\u0e32": 12, "\u0e41\u0e25\u0e30\u0e43\u0e19\u0e0a": 12, "\u0e27\u0e07\u0e19": 12, "\u0e19\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "vipasha": 13, "bansal": 13, "acl2024": 13, "srw": 13, "abstract": 13, "nlu": 13, "droganova": 13, "zeman": 13, "schuster": 13, "2016": 13, "consum": 13, "prefer": 13, "bender": 13, "2015": 13, "paper": 13, "rich": 13, "svc": 13, "clear": 13, "surfac": 13, "form": 13, "fulli": 13, "unifi": 13, "appropri": 13, "copestak": 13, "2005": 13, "featur": 13, "within": 13, "delph": 13, "IN": 13, "slayden": 13, "2009": 13, "increas": [13, 15], "verifi": 13, "coverag": 13, "decreas": 13, "ambigu": 13, "vipashab94": 13, "thaigrammar": 13, "wait": 13, "guid": 13, "wannaphong": 13, "phatthiyaphaibun": 13, "latest": 13, "ac": 13, "sweaglesw": 13, "linguist": 13, "x86": 13, "run_ac": 13, "xvzf": 13, "git": 13, "clone": 13, "pydelphin": 13, "2024": 13, "216": 13, "129": 13, "123": 13, "154": 13, "2001": 13, "1868": 13, "a100": 13, "105": 13, "beae": 13, "c5ff": 13, "fe24": 13, "d767": 13, "2526613": 13, "4m": 13, "applic": [13, 16], "gzip": 13, "41m": 13, "37mb": 13, "english": [13, 15], "postagg": 13, "hmm": 13, "erg": 13, "tdl": 13, "qc": 13, "releas": [13, 15, 16], "remot": 13, "compress": [13, 16], "delta": 13, "reus": 13, "pack": 13, "receiv": 13, "mib": 13, "186": 13, "readthedoc": 13, "html": [13, 14], "delphin": 13, "compil": 13, "dat": 13, "execut": [13, 14], "\u0e07\u0e2a": 13, "id": 13, "328": 13, "subj": 13, "322": 13, "bare": 13, "_33142": 13, "327": 13, "comp": 13, "324": 13, "obj": 13, "323": 13, "deic": 13, "tran": 13, "lex": 13, "\u0e44\u0e1b_4158": 13, "326": 13, "\u0e2d_4236": 13, "325": [13, 14], "\u0e2d_4404": 13, "ltop": 13, "h0": 13, "sf": [13, 14], "prop": [13, 16], "rel": 13, "named_rel": 13, "lbl": 13, "h4": 13, "carg": 13, "arg0": 13, "x3": 13, "exist_q_rel": 13, "h6": 13, "rstr": 13, "h7": 13, "h8": 13, "_go_v_1_rel": 13, "h1": 13, "e9": 13, "arg1": 13, "arg2": 13, "x10": 13, "cog": 13, "st": 13, "purpose_rel": 13, "e11": 13, "_buy_v_1_rel": 13, "x12": 13, "_book_n_1_rel": 13, "h13": 13, "h14": 13, "h15": 13, "h16": 13, "hcon": 13, "qeq": 13, "icon": 13, "vp": 13, "flag": 13, "ascor": 13, "603": 13, "598": 13, "\u0e1c\u0e21_4375": 13, "602": 13, "\u0e08\u0e30_33089": 13, "601": 13, "\u0e19_33088": 13, "600": 13, "599": 13, "\u0e04\u0e19_4133": 13, "_4290": 13, "tens": 13, "fut": 13, "pron_rel": 13, "sg": 13, "gend": 13, "speci": 13, "h5": 13, "_be_v_id_rel": 13, "x8": 13, "_person_n_1_rel": 13, "h9": 13, "_good_a_1_rel": 13, "e10": 13, "h11": 13, "h12": 13, "airesearch": [14, 15], "larg": 14, "xlsr": 14, "cu113": 14, "torchvis": 14, "torchaudio": 14, "pytorch": 14, "torch_stabl": 14, "link": 14, "2bcu113": 14, "1821": 14, "834": 14, "43tcmalloc": 14, "alloc": 14, "1147494400": 14, "byte": 14, "0x55bf21ac6000": 14, "0x7faf12d1b615": 14, "0x55bf1efac4cc": 14, "0x55bf1f08c47a": 14, "0x55bf1efaf2": 14, "0x55bf1f0a0e1d": 14, "0x55bf1f022e99": 14, "0x55bf1f01d9ee": 14, "0x55bf1efb0bda": 14, "0x55bf1f022d00": 14, "0x55bf1f01f737": 14, "0x55bf1f0a1c66": 14, "0x55bf1f01edaf": 14, "0x55bf1efb1039": 14, "0x55bf1eff4409": 14, "0x55bf1efafc52": 14, "0x55bf1f022c25": 14, "0x55bf1f01e915": 14, "0x55bf1efb0afa": 14, "0x55bf1f01ec0d": 14, "1055": 14, "37tcmalloc": 14, "1434370048": 14, "0x55bf6611c000": 14, "1336": 14, "39tcmalloc": 14, "1792966656": 14, "0x55bfbb908000": 14, "1691": 14, "38tcmalloc": 14, "2241208320": 14, "01tcmalloc": 14, "1821458432": 14, "0x55bfa7428000": 14, "0x7faf12d1a1e7": 14, "0x55bf1efe2067": 14, "tcmalloc": 14, "2276827136": 14, "0x55c013d3c000": 14, "0x55bf1efb1271": 14, "pillow": 14, "cu111": 14, "onnxruntim": 14, "soundfil": 14, "manylinux_2_12_x86_64": 14, "91": 14, "post1": 14, "895": [14, 16], "manylinux_2_5_x86_64": 14, "596": 14, "flatbuff": 14, "743": 14, "wav2vec2model": 14, "hug": 14, "autotoken": [14, 15], "wav2vec2forctc": 14, "import_huggingface_model": 14, "origin": [14, 16], "from_pretrain": [14, 15], "correspond": 14, "audio": 14, "stabl": 14, "hubert": 14, "configuration_util": 14, "341": 14, "gradient_checkpoint": 14, "v5": [14, 15], "gradient_checkpointing_en": 14, "trainer": [14, 15], "api": 14, "trainingargu": [14, 15], "eval": 14, "mode": 14, "feature_extractor": [14, 15], "featureextractor": 14, "conv_lay": 14, "convlayerblock": 14, "layer_norm": [14, 15], "layernorm": 14, "512": 14, "elementwise_affin": 14, "conv": 14, "conv1d": 14, "kernel_s": 14, "stride": 14, "feature_project": 14, "featureproject": 14, "1024": 14, "pos_conv_emb": 14, "convolutionalpositionalembed": 14, "128": 14, "encoderlay": 14, "attent": 14, "selfattent": 14, "k_proj": 14, "v_proj": 14, "q_proj": 14, "out_proj": 14, "feed_forward": 14, "feedforward": 14, "intermediate_dens": 14, "4096": 14, "intermediate_dropout": 14, "output_dens": 14, "output_dropout": 14, "final_layer_norm": 14, "microsoft": 14, "window": 14, "ai": [14, 15], "ml": 14, "input_s": 14, "100000": 14, "audio_maxlen": 14, "dummy_input": 14, "randn": 14, "requires_grad": 14, "export": 14, "asr3": 14, "export_param": 14, "opset_vers": 14, "do_constant_fold": 14, "whether": 14, "constant": 14, "fold": 14, "input_nam": 14, "modelinput": 14, "output_nam": 14, "modeloutput": 14, "dynamic_ax": 14, "batch_siz": 14, "ax": [14, 15], "symbolic_help": 14, "caus": 14, "incorrect": 14, "dropbox": 14, "9kpeh8eodshcqhj": 14, "common_voice_th_23646850": 14, "wav": 14, "dl": 14, "mv": 14, "json": 14, "co": [14, 15], "r": [14, 16], "sig": 14, "scipi": 14, "wavfil": 14, "sp": 14, "new_rat": 14, "16000": 14, "ort_sess": 14, "inferencesess": 14, "k": [14, 15], "unk": 14, "_normal": 14, "vasudevgupta7": 14, "gsoc": 14, "src": 14, "l101": 14, "fork": [14, 15], "tf": 14, "seqlen": 14, "keepdim": 14, "var": 14, "squeez": 14, "sqrt": 14, "remove_adjac": 14, "3460423": 14, "asr": 14, "wav2vec2_onnx": 14, "ipynb": [14, 16], "sampling_r": 14, "sampl": [14, 16], "new_data": 14, "resampl": 14, "float32": 14, "ort_input": 14, "ort_out": 14, "_t1": 14, "easili": 15, "finetun": 15, "drive": 15, "1kbk6sbspzlwcnoe61adaqo30xxqoq9ko": 15, "scrollto": 15, "n5iacot9b3cf": 15, "specif": [15, 16], "thaixtransform": 15, "236": 15, "106": 15, "safetensor": 15, "fsspec": 15, "355": 15, "seqev": 15, "28115": 15, "d0f182fee94a7c129f5bd1265a3e0d2a52893384d6783d11c8bbd770ef695fac": 15, "2c": 15, "4b": 15, "b2": 15, "a90368d80567249f258a9c58240512046afb5563d794eda4b2": 15, "auto": 15, "camemberttoken": 15, "automodel": 15, "automodelformaskedlm": 15, "automodelforsequenceclassif": 15, "automodelfortokenclassif": 15, "process_transform": 15, "xlmr": 15, "mbert": 15, "downstream": 15, "att": 15, "uncas": 15, "largest": 15, "78": 15, "5gb": 15, "assort": 15, "subword": 15, "xlm": 15, "multilingu": 15, "104": 15, "level": 15, "syllabl": 15, "syllabel": 15, "sefr": 15, "model_nam": [15, 16], "thaiwordsnewmmtoken": 15, "thaiwordssyllabletoken": 15, "fakesefrcuttoken": 15, "thairobertatoken": 15, "public_model": 15, "param": 15, "revis": 15, "model_max_length": 15, "416": 15, "unexpect": 15, "robertatoken": 15, "simplest": 15, "given": 15, "\u0e07\u0e08": 15, "\u0e19\u0e17\u0e23": 15, "\u0e25\u0e40\u0e25\u0e22": 15, "\u0e07\u0e2d\u0e22": 15, "\u0e1a\u0e19\u0e1e": 15, "454": 15, "\u0e02\u0e2d\u0e07\u0e2d\u0e33\u0e40\u0e20\u0e2d\u0e27": 15, "\u0e14\u0e23\u0e30\u0e22\u0e2d\u0e07": 15, "answer": [15, 16], "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e40\u0e21\u0e15\u0e23": 15, "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e27\u0e32": 15, "\u0e44\u0e21\u0e25": 15, "substitut": 15, "instanc": [15, 16], "000": 15, "trane": 15, "proven": 15, "aug": 15, "fill_mask": 15, "fill": 15, "input_text": 15, "u0e02": 15, "u0e2d": 15, "u0e40": 15, "u0e07": 15, "u0e34": 15, "u0e19": 15, "u0e01": 15, "u0e39": 15, "u0e49": 15, "u003cmask": 15, "u0e2b": 15, "u0e48": 15, "u0e22": 15, "\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e21": 15, "\u0e23\u0e30\u0e22\u0e30\u0e17\u0e32\u0e07\u0e17": 15, "\u0e07\u0e2b\u0e21\u0e14": 15, "\u0e08\u0e33\u0e19\u0e27\u0e19\u0e2a\u0e16\u0e32\u0e19": 15, "\u0e2a\u0e16\u0e32\u0e19": 15, "\u0e19\u0e40\u0e2a": 15, "\u0e19\u0e17\u0e32\u0e07\u0e2b\u0e25": 15, "\u0e01\u0e43\u0e19\u0e41\u0e19\u0e27\u0e40\u0e2b\u0e19": 15, "\u0e43\u0e15": 15, "\u0e15\u0e32\u0e21\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07\u0e23\u0e16\u0e44\u0e1f\u0e40\u0e14": 15, "\u0e21\u0e02\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e23\u0e16\u0e44\u0e1f\u0e41\u0e2b": 15, "\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 15, "\u0e32\u0e27\u0e2b\u0e19": 15, "\u0e32\u0e40\u0e19": 15, "\u0e2d\u0e40\u0e23": 15, "\u0e22\u0e01\u0e40\u0e1b": 15, "\u0e19\u0e20\u0e32\u0e29\u0e32": 15, "gy\u016bdon": 15, "\u0e08\u0e30\u0e44\u0e1b\u0e40\u0e1b": 15, "\u0e42\u0e14\u0e14\u0e40\u0e14": 15, "\u0e19\u0e1a\u0e19\u0e1f\u0e32\u0e01\u0e1f": 15, "\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e02\u0e27": 15, "\u0e02\u0e27": 15, "\u0e32\u0e40\u0e2d\u0e32\u0e21\u0e32\u0e14": 15, "\u0e07\u0e43\u0e08\u0e1d": 15, "\u0e04\u0e22\u0e2d\u0e14": 15, "\u0e02\u0e2d\u0e40\u0e07": 15, "\u0e01\u0e14": [15, 16], "allow": 15, "preprocess_input_text": 15, "boolean": 15, "fill_mask_pad": 15, "513759434223175": 15, "4263": 15, "token_str": 15, "\u0e23\u0e32\u0e21": 15, "\u0e23\u0e32\u0e21\u0e2b\u0e19": 15, "05489557236433029": 15, "552": 15, "0474877767264843": 15, "125": 15, "037654660642147064": 15, "5901": 15, "\u0e2a\u0e30\u0e14\u0e27\u0e01": 15, "\u0e2a\u0e30\u0e14\u0e27\u0e01\u0e2b\u0e19": 15, "026551486924290657": 15, "1913": 15, "\u0e19\u0e32": 15, "\u0e19\u0e32\u0e2b\u0e19": 15, "wisesight_senti": 15, "social": 15, "media": 15, "wongnai_review": 15, "awai": [15, 16], "classify_multiclass": 15, "u0e04": 15, "u0e1a": 15, "u0e32": 15, "u0e47": 15, "u0e21": 15, "u0e31": 15, "u0e41": 15, "u0e17": 15, "u0e15": 15, "u0e4c": 15, "u0e25": 15, "u0e303": 15, "u0e27": 15, "u0e14": 15, "u0e42": 15, "u0e23": 15, "u0e30": 15, "u0e1b": 15, "u0e37": 15, "\u0e2d\u0e22\u0e32\u0e01\u0e01": 15, "\u0e19\u0e27\u0e30\u0e41\u0e01": 15, "\u0e2d\u0e21\u0e32\u0e43\u0e2b": 15, "\u0e2d\u0e22\u0e08": 15, "\u0e13\u0e41\u0e01\u0e21\u0e32\u0e01": 15, "\u0e42\u0e04\u0e15\u0e23\u0e1a": 15, "\u0e32\u0e40\u0e25\u0e22": 15, "\u0e1f\u0e2d\u0e23": 15, "\u0e01\u0e15\u0e25\u0e32\u0e14": 15, "\u0e19\u0e40\u0e14": 15, "prachachat": 15, "\u0e15\u0e25\u0e32\u0e14\u0e23\u0e16\u0e22\u0e19\u0e15": 15, "\u0e23\u0e2a\u0e0a\u0e32\u0e40\u0e02": 15, "\u0e22\u0e27\u0e40\u0e02": 15, "\u0e2b\u0e2d\u0e21": 15, "\u0e01\u0e25\u0e21\u0e01\u0e25": 15, "\u0e14\u0e41\u0e1a\u0e1a\u0e08": 15, "\u0e14\u0e2a\u0e19": 15, "\u0e27\u0e19\u0e44\u0e2d\u0e28\u0e04\u0e23": 15, "\u0e17\u0e32\u0e19\u0e41\u0e25": 15, "\u0e27\u0e23\u0e2a\u0e21": 15, "\u0e19\u0e2d\u0e2d\u0e01\u0e43\u0e1a\u0e44\u0e21": 15, "\u0e46\u0e21\u0e32\u0e01\u0e01\u0e27": 15, "\u0e32\u0e0a\u0e32\u0e40\u0e02": 15, "\u0e27\u0e01": 15, "\u0e2b\u0e27\u0e32\u0e19\u0e44\u0e1b": 15, "\u0e42\u0e14\u0e22\u0e23\u0e27\u0e21\u0e41\u0e25": 15, "\u0e27\u0e40\u0e09\u0e22\u0e21\u0e32\u0e01\u0e01": 15, "\u0e33\u0e40\u0e1b\u0e25": 15, "\u0e32\u0e1a\u0e23": 15, "\u0e01\u0e32\u0e23\u0e1f\u0e23": 15, "\u0e40\u0e04\u0e22\u0e1a": 15, "\u0e32\u0e40\u0e2d": 15, "\u0e21\u0e40\u0e04\u0e01": 15, "\u0e1a\u0e41\u0e21": 15, "\u0e25\u0e303": 15, "\u0e42\u0e04\u0e15\u0e23\u0e2b\u0e19": 15, "\u0e01\u0e41\u0e25\u0e30\u0e42\u0e04\u0e15\u0e23\u0e40\u0e1b\u0e25": 15, "\u0e2d\u0e07\u0e07\u0e07\u0e07": 15, "892067551612854": 15, "entiti": 15, "recognit": 15, "classify_token": 15, "ignore_label": 15, "token_classif": 15, "169": 15, "aggregation_strategi": 15, "u0e35": 15, "u0e2a": 15, "u0e38": 15, "u0e44": 15, "\u0e41\u0e14\u0e07\u0e40\u0e14": 15, "\u0e2d\u0e14\u0e23\u0e2d\u0e1a\u0e2a\u0e2d\u0e07": 15, "\u0e01\u0e40\u0e22": 15, "\u0e41\u0e21\u0e19\u0e2f": 15, "\u0e44\u0e19\u0e40\u0e15": 15, "\u0e22\u0e40\u0e2a": 15, "\u0e22\u0e07\u0e2a": 15, "\u0e0d\u0e40\u0e2a": 15, "\u0e22\u0e08\u0e32\u0e01\u0e20": 15, "\u0e22\u0e18\u0e23\u0e23\u0e21\u0e0a\u0e32\u0e15": 15, "\u0e21\u0e32\u0e01\u0e2a": 15, "\u0e17\u0e33\u0e43\u0e2b": 15, "\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19\u0e01\u0e27": 15, "\u0e2d\u0e07\u0e2d\u0e1e\u0e22\u0e1e\u0e2d\u0e2d\u0e01\u0e08\u0e32\u0e01\u0e1e": 15, "\u0e1e\u0e25\u0e02\u0e2d\u0e07\u0e1e\u0e32\u0e22": 15, "\u0e32\u0e19\u0e40\u0e23": 15, "\u0e2d\u0e19\u0e40\u0e01": 15, "\u0e2d\u0e1a": 15, "700": 15, "\u0e07\u0e1e": 15, "\u0e07\u0e16\u0e25": 15, "\u0e21\u0e25\u0e07\u0e21\u0e32": 15, "\u0e32\u0e07\u0e04\u0e27\u0e32\u0e21\u0e40\u0e2a": 15, "\u0e22\u0e2b\u0e32\u0e22\u0e04": 15, "\u0e14\u0e40\u0e1b": 15, "\u0e25\u0e04": 15, "450": 15, "\u0e32\u0e19\u0e2b\u0e22\u0e27\u0e19": 15, "\u0e01\u0e17\u0e0a": 15, "\u0e40\u0e15\u0e23": 15, "\u0e22\u0e21\u0e17\u0e14\u0e25\u0e2d\u0e07\u0e1b\u0e23\u0e30\u0e21": 15, "3\u0e08": 15, "entity_group": 15, "97664016": 15, "99976474": 15, "less": 15, "tradit": 15, "logist": 15, "regress": 15, "forest": 15, "boost": 15, "imag": 15, "mrpeerat": 15, "bramvanroi": 15, "extract_last_k_token": 15, "last_k": 15, "hidden_st": 15, "last_k_token": 15, "concatenated_hidden_st": 15, "sum": 15, "_extract_last_k_lay": 15, "aggregator_fn": 15, "return_tensor": 15, "pt": 15, "no_grad": 15, "output_hidden_st": 15, "select": 15, "hidden": 15, "cat": 15, "aggregated_hidden_st": 15, "extract_last_k_lay": 15, "pretrained_model_name_or_path": 15, "lm_head": 15, "japanes": 15, "food": [15, 16], "gyudon": 15, "italian": 15, "macaroni": 15, "cosin": 15, "consid": 15, "last": 15, "markdown": 15, "obtain": 15, "aggreg": 15, "via": 15, "summat": 15, "represnetaiton": 15, "text1": 15, "\u0e19\u0e0a\u0e2d\u0e1a\u0e01": 15, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e0d": 15, "text2": 15, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2d": 15, "\u0e15\u0e32\u0e40\u0e25": 15, "text3": 15, "text4": 15, "\u0e01\u0e01\u0e30\u0e42\u0e23\u0e19": 15, "t1": 15, "t3": 15, "t4": 15, "pairwis": 15, "cosine_similar": 15, "sim_matrix": 15, "cmap": 15, "blue": 15, "bo": 15, "fanci": 15, "mayb": 15, "march": 15, "wanchanberta": 15, "xnli": 15, "pair": 15, "branch": 15, "xnli_th": 15, "repositori": 15, "zero_classifi": 15, "u0e0d": 15, "u0e0a": 15, "u0e1": 15, "u0e18": 15, "scb": 15, "10x": 15, "u0e43": 15, "blockfi": 15, "startup": 15, "digit": 15, "asset": 15, "u0e13": 15, "u0e10": 15, "u201c": 15, "u201d": 15, "u0e1c": 15, "u0e20": 15, "u0e29": 15, "u201cwher": 15, "u0e08": 15, "u0e16": 15, "u0e1f": 15, "u0e28": 15, "u0e33": 15, "u0e11": 15, "u0e1d": 15, "candidate_label": 15, "\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01": 15, "\u0e23\u0e01": 15, "\u0e01\u0e32\u0e23\u0e40\u0e21": 15, "\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 15, "\u0e25\u0e1b\u0e30": 15, "\u0e19\u0e40\u0e17": 15, "hypothesis_templ": 15, "\u0e1e\u0e32\u0e14\u0e2b": 15, "\u0e27\u0e02": 15, "\u0e32\u0e27\u0e19": 15, "\u0e21\u0e44\u0e1a\u0e40\u0e14\u0e19\u0e2b\u0e32\u0e23": 15, "\u0e1a\u0e0d": 15, "\u0e01\u0e23\u0e30\u0e0a": 15, "\u0e1a\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": 15, "\u0e19\u0e18\u0e21": 15, "34431710839271545": 15, "3195861279964447": 15, "18645761907100677": 15, "14963914453983307": 15, "v0": 16, "word2vec": 16, "oppos": 16, "latter": 16, "garner": 16, "556": 16, "dimens": 16, "descend": 16, "frequenc": 16, "readabl": 16, "vector": 16, "document": 16, "thwiki_lm": 16, "word2vec_exampl": 16, "inlin": 16, "manifold": 16, "tsne": 16, "fm": 16, "load_word2vec_format": 16, "wordvector": 16, "thai2fit_wv": 16, "get_model": 16, "thai2dict": 16, "index2word": 16, "from_dict": 16, "orient": 16, "290": 16, "291": 16, "292": 16, "293": 16, "294": 16, "295": 16, "296": 16, "298": 16, "299": 16, "308956": 16, "097699": 16, "116745": 16, "215612": 16, "015768": 16, "064163": 16, "062168": 16, "039649": 16, "864940": 16, "846904": 16, "142418": 16, "033241": 16, "171581": 16, "624864": 16, "009358": 16, "449131": 16, "120130": 16, "122195": 16, "450617": 16, "071318": 16, "010751": 16, "618971": 16, "129665": 16, "035460": 16, "007560": 16, "027607": 16, "397824": 16, "026543": 16, "254075": 16, "168328": 16, "105786": 16, "180930": 16, "101630": 16, "070885": 16, "037263": 16, "183606": 16, "049088": 16, "672288": 16, "293044": 16, "592576": 16, "015736": 16, "258926": 16, "052953": 16, "153728": 16, "005985": 16, "021081": 16, "041088": 16, "057312": 16, "633230": 16, "442729": 16, "009408": 16, "252576": 16, "305512": 16, "372542": 16, "049151": 16, "568470": 16, "266586": 16, "400800": 16, "784650": 16, "197369": 16, "189711": 16, "174774": 16, "171124": 16, "186771": 16, "054294": 16, "114150": 16, "109456": 16, "094466": 16, "447015": 16, "042377": 16, "168676": 16, "148738": 16, "680404": 16, "097702": 16, "020270": 16, "182967": 16, "083949": 16, "006287": 16, "707434": 16, "070234": 16, "156962": 16, "231863": 16, "080312": 16, "323157": 16, "215695": 16, "055145": 16, "420794": 16, "016842": 16, "256759": 16, "832864": 16, "044267": 16, "147186": 16, "105424": 16, "907078": 16, "009299": 16, "550953": 16, "139337": 16, "031696": 16, "670379": 16, "008048": 16, "428813": 16, "031194": 16, "041922": 16, "036608": 16, "008106": 16, "076470": 16, "782270": 16, "033361": 16, "606864": 16, "440520": 16, "024458": 16, "025031": 16, "103389": 16, "078255": 16, "034323": 16, "459774": 16, "748643": 16, "337775": 16, "487408": 16, "511535": 16, "287710": 16, "064193": 16, "205076": 16, "146356": 16, "071343": 16, "039451": 16, "845461": 16, "163763": 16, "018096": 16, "272786": 16, "051024": 16, "532856": 16, "131856": 16, "090323": 16, "058895": 16, "151262": 16, "420358": 16, "055971": 16, "930814": 16, "163908": 16, "239587": 16, "303620": 16, "079953": 16, "453045": 16, "528826": 16, "161692": 16, "235725": 16, "099673": 16, "691668": 16, "536159": 16, "110436": 16, "297495": 16, "217414": 16, "045158": 16, "066647": 16, "190095": 16, "304333": 16, "724927": 16, "995488": 16, "716609": 16, "120522": 16, "355783": 16, "168180": 16, "377733": 16, "158624": 16, "047249": 16, "361140": 16, "161460": 16, "913314": 16, "345037": 16, "116285": 16, "318218": 16, "356664": 16, "519889": 16, "130475": 16, "125772": 16, "101328": 16, "382658": 16, "205359": 16, "340139": 16, "086848": 16, "155231": 16, "133015": 16, "039913": 16, "183761": 16, "115142": 16, "940854": 16, "066565": 16, "399744": 16, "146722": 16, "019406": 16, "181474": 16, "099863": 16, "516092": 16, "201697": 16, "249139": 16, "252957": 16, "138815": 16, "018209": 16, "232265": 16, "sne": 16, "plane": 16, "thai2plot": 16, "tnse": 16, "n_compon": 16, "init": 16, "pca": 16, "n_iter": 16, "fit_transform": 16, "wb": 16, "jeffmcneil": 16, "dip": 16, "sipa": 16, "regular": 16, "111": 16, "479628": 16, "468k": 16, "octet": 16, "regu": 16, "468": 16, "39k": 16, "stolen": 16, "blog": 16, "manash": 16, "a71e6d55f27": 16, "plot_with_label": 16, "low_dim_emb": 16, "figsiz": 16, "axis_lim": 16, "assert": 16, "figur": 16, "inch": 16, "scatter": 16, "fontproperti": 16, "fname": 16, "xy": 16, "xytext": 16, "textcoord": 16, "offset": 16, "va": 16, "bottom": 16, "savefig": 16, "\u0e2b\u0e0d": 16, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e32": 16, "\u0e0a\u0e32\u0e22": 16, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a": 16, "\u0e19\u0e32\u0e22\u0e01\u0e23": 16, "\u0e10\u0e21\u0e19\u0e15\u0e23": 16, "\u0e2d\u0e33\u0e19\u0e32\u0e08": 16, "\u0e1b\u0e23\u0e30\u0e18\u0e32\u0e19\u0e32\u0e18": 16, "\u0e07\u0e01": 16, "\u0e42\u0e1a\u0e23\u0e32\u0e13": 16, "\u0e44\u0e14\u0e42\u0e19\u0e40\u0e2a\u0e32\u0e23": 16, "most_similar_cosmul": 16, "7954867482185364": 16, "7382755279541016": 16, "\u0e1e\u0e23\u0e30\u0e40\u0e08": 16, "7046602368354797": 16, "\u0e32\u0e0a\u0e32\u0e22": 16, "6979373097419739": 16, "\u0e1e\u0e23\u0e30\u0e21\u0e2b\u0e32\u0e01\u0e29": 16, "6972416639328003": 16, "\u0e32\u0e1f": 16, "\u0e32\u0e2b\u0e0d": 16, "6871017217636108": 16, "\u0e32\u0e41\u0e1c": 16, "6827988624572754": 16, "\u0e1e\u0e23\u0e30\u0e1e": 16, "\u0e17\u0e18\u0e40\u0e08": 16, "671796977519989": 16, "\u0e21\u0e01": 16, "\u0e0e\u0e23\u0e32\u0e0a\u0e01": 16, "\u0e21\u0e32\u0e23": 16, "6711805462837219": 16, "\u0e19\u0e32\u0e22\u0e1e\u0e25": 16, "6694187521934509": 16, "sample_word": 16, "sample_idx": 16, "sample_plot": 16, "\u0e23\u0e2d\u0e07\u0e19\u0e32\u0e22\u0e01\u0e23": 16, "4945054054260254": 16, "400755763053894": 16, "3626699447631836": 16, "\u0e19\u0e40\u0e2d\u0e01": 16, "3437265157699585": 16, "\u0e0d\u0e0a\u0e32\u0e01\u0e32\u0e23\u0e17\u0e2b\u0e32\u0e23\u0e1a\u0e01": 16, "3405414819717407": 16, "\u0e1a\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 16, "3339321613311768": 16, "\u0e01\u0e1f": 16, "\u0e15\u0e1a\u0e2d\u0e25": 16, "331659197807312": 16, "\u0e40\u0e2d\u0e01\u0e2d": 16, "\u0e04\u0e23\u0e23\u0e32\u0e0a\u0e17": 16, "3306005001068115": 16, "3243674039840698": 16, "\u0e20\u0e32\u0e1e\u0e2a\u0e15\u0e23": 16, "3231494426727295": 16, "\u0e15\u0e27": 16, "\u0e07\u0e21": 16, "537461519241333": 16, "\u0e22\u0e07\u0e25": 16, "\u0e27\u0e22\u0e19\u0e21": 16, "5080005526542664": 16, "\u0e41\u0e21\u0e25\u0e07": 16, "5048903226852417": 16, "\u0e1c\u0e25\u0e44\u0e21": 16, "4839756190776825": 16, "47641509771347046": 16, "46431201696395874": 16, "45941096544265747": 16, "45185261964797974": 16, "4504697620868683": 16, "44425833225250244": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e0a": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2a": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e22": 16, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e01\u0e25\u0e32\u0e07\u0e27": 16, "wherea": 16, "meal": 16, "\u0e25\u0e32\u0e01": 16, "push": 16, "rest": 16, "eat": 16, "reli": 16, "\u0e01\u0e40\u0e02\u0e22": 16, "associ": 16, "male": 16, "gender": 16, "\u0e2b\u0e21\u0e32": 16, "\u0e2b\u0e21\u0e2d": 16, "china": 16, "beij": 16, "itali": 16, "rome": 16, "\u0e42\u0e23\u0e21": 16, "\u0e15\u0e32\u0e25": 16, "3135956": 16, "42819628": 16, "27347285": 16, "17900795": 16, "02666693": 16, "24352394": 16, "\u0e42\u0e15\u0e40\u0e01": 16, "contribut": 16, "sakar": 16, "atv": 16, "adapt": 16, "spell": 16, "cpmp": 16, "w_rank": 16, "thai_lett": 16, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e24\u0e45\u0e25\u0e26\u0e26\u0e45\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e30": 16, "\u0e40\u0e41\u0e42\u0e43\u0e44": 16, "findal": 16, "lower": 16, "invers": 16, "proxi": 16, "dictionari": 16, "max": 16, "candid": 16, "edits1": 16, "edits2": 16, "subset": 16, "appear": 16, "delet": 16, "transpos": 16, "replac": 16, "insert": 16, "e1": 16, "\u0e14\u0e19\u0e32": 16, "\u0e12\u0e19\u0e32": 16, "\u0e02\u0e23": 16, "\u0e08\u0e22": 16, "\u0e19\u0e30\u0e04": 16}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"welcom": 0, "pythainlp": [0, 1, 4, 5, 7, 10], "tutori": 0, "han": 1, "coref": 1, "thai": [1, 2, 3, 4, 6, 7, 12, 13, 14], "corefer": 1, "resolut": 1, "depend": 2, "parser": [2, 6], "find": 3, "all": 3, "rhyme": 3, "word": [3, 7, 16], "from": 3, "translat": 4, "instal": [4, 5, 13, 14, 15], "import": [4, 7, 16], "list": 4, "languag": [4, 9, 11, 12], "english": 4, "nlpo3": 5, "dictionari": [5, 7], "custom": [5, 7], "chunk": 6, "get": [7, 15], "start": [7, 15], "charact": 7, "check": 7, "string": 7, "contain": 7, "how": 7, "mani": 7, "collat": 7, "date": 7, "time": 7, "format": 7, "spellout": 7, "token": [7, 15], "segment": 7, "sentenc": 7, "subword": [7, 8], "syllabl": 7, "cluster": 7, "tcc": 7, "low": 7, "level": 7, "oper": 7, "transliter": 7, "normal": 7, "digit": 7, "convers": 7, "soundex": 7, "spellcheck": [7, 16], "frequenc": 7, "part": [7, 8], "speech": [7, 8], "tag": 7, "name": [7, 8], "entiti": [7, 8], "vector": [7, 15], "number": 7, "spell": 7, "out": 7, "wangchanberta": [8, 15], "recognit": 8, "wisesight": [9, 15], "sentiment": [9, 15], "analysi": 9, "text": [9, 12, 15], "processor": 9, "logist": 9, "regress": 9, "process": 9, "file": 9, "csv": 9, "load": 9, "data": 9, "train": 9, "valid": 9, "split": 9, "creat": 9, "featur": [9, 15], "fit": 9, "model": [9, 11, 12, 14, 15], "see": 9, "result": 9, "ulmfit": [9, 11], "finetun": [9, 11], "classifi": [9, 15], "spaci": 10, "wongnai": [11, 15], "review": [11, 15], "classif": [11, 15], "oversampl": 11, "fasttext": 11, "linearsvc": 11, "submiss": 11, "wiki": 12, "gener": 12, "semant": 13, "represent": 13, "automat": 13, "deriv": 13, "serial": 13, "verb": 13, "construct": 13, "A": 13, "grammar": 13, "base": 13, "approach": 13, "usag": 13, "wav2vec2": 14, "onnx": 14, "build": 14, "infer": 14, "notebook": 15, "choos": 15, "pretrain": 15, "mask": 15, "predict": 15, "sequenc": 15, "multi": 15, "class": 15, "thainer": 15, "lst20": 15, "document": 15, "extract": 15, "zero": 15, "shot": 15, "thai2vec": 16, "embed": 16, "exampl": 16, "arithmet": 16, "doesn": 16, "t": 16, "match": 16, "cosin": 16, "similar": 16}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"Welcome to PyThaiNLP Tutorials": [[0, "welcome-to-pythainlp-tutorials"]], "Tutorials:": [[0, null]], "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP": [[1, "\ud83e\udebf-Han-Coref:-Thai-Coreference-resolution-by-PyThaiNLP"]], "Thai Dependency Parser": [[2, "Thai-Dependency-Parser"]], "Find all Thai rhyming words from Thai word": [[3, "Find-all-Thai-rhyming-words-from-Thai-word"]], "PyThaiNLP Translate": [[4, "PyThaiNLP-Translate"]], "Install": [[4, "Install"], [13, "Install"], [14, "Install"]], "Translate": [[4, "Translate"]], "Import": [[4, "Import"]], "List language": [[4, "List-language"]], "English to Thai": [[4, "English-to-Thai"]], "Thai to English": [[4, "Thai-to-English"]], "nlpO3": [[5, "nlpO3"]], "Installation": [[5, "Installation"], [15, "Installation"]], "PyThaiNLP dictionary": [[5, "PyThaiNLP-dictionary"]], "Custom dictionary": [[5, "Custom-dictionary"]], "Thai Chunk Parser": [[6, "Thai-Chunk-Parser"]], "PyThaiNLP Get Started": [[7, "PyThaiNLP-Get-Started"]], "Import PyThaiNLP": [[7, "Import-PyThaiNLP"]], "Thai Characters": [[7, "Thai-Characters"]], "Checking if a string contains Thai character or not, or how many": [[7, "Checking-if-a-string-contains-Thai-character-or-not,-or-how-many"]], "Collation": [[7, "Collation"]], "Date/Time Format and Spellout": [[7, "Date/Time-Format-and-Spellout"]], "Date/Time Format": [[7, "Date/Time-Format"]], "Time Spellout": [[7, "Time-Spellout"]], "Tokenization and Segmentation": [[7, "Tokenization-and-Segmentation"]], "Sentence": [[7, "Sentence"]], "Word": [[7, "Word"]], "Subword, syllable, and Thai Character Cluster (TCC)": [[7, "Subword,-syllable,-and-Thai-Character-Cluster-(TCC)"]], "Subword tokenization": [[7, "Subword-tokenization"]], "Syllable tokenization": [[7, "Syllable-tokenization"]], "Low-level subword operations": [[7, "Low-level-subword-operations"]], "Transliteration": [[7, "Transliteration"]], "Normalization": [[7, "Normalization"]], "Digit conversion": [[7, "Digit-conversion"]], "Soundex": [[7, "Soundex"]], "Spellchecking": [[7, "Spellchecking"], [16, "Spellchecking"]], "Spellchecking - Custom dictionary and word frequency": [[7, "Spellchecking---Custom-dictionary-and-word-frequency"]], "Part-of-Speech Tagging": [[7, "Part-of-Speech-Tagging"]], "Named-Entity Tagging": [[7, "Named-Entity-Tagging"]], "Word Vector": [[7, "Word-Vector"]], "Number Spell Out": [[7, "Number-Spell-Out"]], "Wangchanberta": [[8, "Wangchanberta"]], "Named Entity Recognition": [[8, "Named-Entity-Recognition"]], "Part of speech": [[8, "Part-of-speech"]], "Subword": [[8, "Subword"]], "Wisesight Sentiment Analysis": [[9, "Wisesight-Sentiment-Analysis"]], "Text Processor for Logistic Regression": [[9, "Text-Processor-for-Logistic-Regression"]], "Process Text Files to CSVs": [[9, "Process-Text-Files-to-CSVs"]], "Load Data": [[9, "Load-Data"]], "Train-validation Split": [[9, "Train-validation-Split"]], "Logistic Regression": [[9, "Logistic-Regression"]], "Create Features": [[9, "Create-Features"]], "Fit Model": [[9, "Fit-Model"]], "See Results": [[9, "See-Results"], [9, "id1"]], "ULMFit Model": [[9, "ULMFit-Model"], [11, "ULMFit-Model"]], "Finetune Language Model": [[9, "Finetune-Language-Model"], [11, "Finetune-Language-Model"]], "Train Text Classifier": [[9, "Train-Text-Classifier"]], "spaCy-PyThaiNLP": [[10, "spaCy-PyThaiNLP"]], "Wongnai Review Classification": [[11, "Wongnai-Review-Classification"]], "Oversampling": [[11, "Oversampling"]], "fastText Model": [[11, "fastText-Model"]], "LinearSVC Model": [[11, "LinearSVC-Model"]], "Classification": [[11, "Classification"]], "Submission": [[11, "Submission"]], "Thai Wiki Language Model for Text Generation": [[12, "Thai-Wiki-Language-Model-for-Text-Generation"]], "Thai Semantic Representation": [[13, "Thai-Semantic-Representation"]], "Automatic Derivation of Semantic Representations for Thai Serial Verb Constructions: A Grammar-Based Approach": [[13, "Automatic-Derivation-of-Semantic-Representations-for-Thai-Serial-Verb-Constructions:-A-Grammar-Based-Approach"]], "Usage": [[13, "Usage"]], "Thai Wav2vec2 model to ONNX model": [[14, "Thai-Wav2vec2-model-to-ONNX-model"]], "Build ONNX Model": [[14, "Build-ONNX-Model"]], "Inference": [[14, "Inference"]], "WangchanBERTa: Getting Started Notebook": [[15, "WangchanBERTa:-Getting-Started-Notebook"]], "Choose Pretrained Model": [[15, "Choose-Pretrained-Model"]], "Masked Token Prediction": [[15, "Masked-Token-Prediction"]], "Sequence Classification": [[15, "Sequence-Classification"]], "Pretrained Multi-class Classifiers - Wisesight Sentiment and Wongnai Reviews": [[15, "Pretrained-Multi-class-Classifiers---Wisesight-Sentiment-and-Wongnai-Reviews"]], "Token Classification": [[15, "Token-Classification"]], "Pretrained Token Classifiers - ThaiNER and LST20": [[15, "Pretrained-Token-Classifiers---ThaiNER-and-LST20"]], "Document Vectors": [[15, "Document-Vectors"]], "Feature Extraction": [[15, "Feature-Extraction"]], "Zero-shot Text Classification": [[15, "Zero-shot-Text-Classification"]], "Thai2Vec Embeddings Examples": [[16, "Thai2Vec-Embeddings-Examples"]], "Imports": [[16, "Imports"]], "Word Arithmetic": [[16, "Word-Arithmetic"]], "Doesn\u2019t Match": [[16, "Doesn't-Match"]], "Cosine Similarity": [[16, "Cosine-Similarity"]]}, "indexentries": {}}) \ No newline at end of file