File tree Expand file tree Collapse file tree 3 files changed +41
-0
lines changed Expand file tree Collapse file tree 3 files changed +41
-0
lines changed Original file line number Diff line number Diff line change @@ -122,6 +122,9 @@ def transliterate(
122122 * *tltk_ipa* - tltk, output is International Phonetic Alphabet (IPA)
123123 * *thaig2p_v2* - Thai Grapheme-to-Phoneme,
124124 output is IPA. https://huggingface.co/pythainlp/thaig2p-v2.0
125+ * *umt5_thaig2p* - Thai Grapheme-to-Phoneme,
126+ output is IPA, powered by UMT5.\
127+ https://huggingface.co/B-K/umt5-thai-g2p-v2-0.5k
125128
126129 :Example:
127130 ::
@@ -174,6 +177,8 @@ def transliterate(
174177 from pythainlp .transliterate .iso_11940 import transliterate
175178 elif engine == "thaig2p_v2" :
176179 from pythainlp .transliterate .thaig2p_v2 import transliterate
180+ elif engine == "umt5_thaig2p" :
181+ from pythainlp .translate .umt5_thaig2p import transliterate
177182 else : # use default engine: "thaig2p"
178183 from pythainlp .transliterate .thaig2p import transliterate
179184
Original file line number Diff line number Diff line change 1+ # -*- coding: utf-8 -*-
2+ # SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project
3+ # SPDX-FileType: SOURCE
4+ # SPDX-License-Identifier: Apache-2.0
5+ """
6+ umt5-thai-g2p-v2-0.5k
7+
8+ huggingface: https://huggingface.co/B-K/umt5-thai-g2p-v2-0.5k
9+ """
10+
11+ # Use a pipeline as a high-level helper
12+ from transformers import pipeline
13+
14+
15+ class Umt5ThaiG2P :
16+ """
17+ Latin transliteration of Thai words, using International Phonetic Alphabet
18+ """
19+
20+ def __init__ (self , device : str = "cpu" ):
21+ self .pipe = pipeline ("text2text-generation" , model = "B-K/umt5-thai-g2p-v2-0.5k" , device = device )
22+
23+ def g2p (self , text : str ) -> str :
24+ return self .pipe (text )[0 ]["generated_text" ]
25+
26+
27+ _THAI_G2P = None
28+
29+
30+ def transliterate (text : str , device = "cpu" ) -> str :
31+ global _THAI_G2P
32+ if _THAI_G2P is None :
33+ _THAI_G2P = Umt5ThaiG2P (device = device )
34+ return _THAI_G2P .g2p (text )
Original file line number Diff line number Diff line change @@ -140,6 +140,8 @@ def test_transliterate(self):
140140 self .assertIsNotNone (transliterate ("แมว" , engine = "thaig2p" ))
141141 self .assertIsNotNone (transliterate ("คน" , engine = "thaig2p_v2" ))
142142 self .assertIsNotNone (transliterate ("แมว" , engine = "thaig2p_v2" ))
143+ self .assertIsNotNone (transliterate ("คน" , engine = "umt5_thaig2p" ))
144+ self .assertIsNotNone (transliterate ("แมว" , engine = "umt5_thaig2p" ))
143145 self .assertIsNotNone (transliterate ("คน" , engine = "tltk_g2p" ))
144146 self .assertIsNotNone (transliterate ("แมว" , engine = "tltk_g2p" ))
145147 self .assertIsNotNone (transliterate ("คน" , engine = "tltk_ipa" ))
You can’t perform that action at this time.
0 commit comments