1414 unigram ,
1515)
1616
17+ TEST_TOKENS = ["ผม" , "รัก" , "คุณ" ]
18+
1719
1820class TagTestCase (unittest .TestCase ):
19- # ### pythainlp.tag.pos_tag
21+ """Test pythainlp.tag.pos_tag"""
2022
2123 def test_pos_tag (self ):
22- tokens = ["ผม" , "รัก" , "คุณ" ]
23-
2424 self .assertEqual (pos_tag (None ), [])
2525 self .assertEqual (pos_tag ([]), [])
2626 self .assertEqual (
@@ -40,25 +40,31 @@ def test_pos_tag(self):
4040 self .assertEqual (unigram .tag (None , corpus = "tud" ), [])
4141 self .assertEqual (unigram .tag ([], corpus = "tud" ), [])
4242 self .assertIsNotNone (
43- pos_tag (tokens , engine = "unigram" , corpus = "orchid" )
43+ pos_tag (TEST_TOKENS , engine = "unigram" , corpus = "orchid" )
4444 )
4545 self .assertIsNotNone (
46- pos_tag (tokens , engine = "unigram" , corpus = "orchid_ud" )
46+ pos_tag (TEST_TOKENS , engine = "unigram" , corpus = "orchid_ud" )
47+ )
48+ self .assertIsNotNone (
49+ pos_tag (TEST_TOKENS , engine = "unigram" , corpus = "pud" )
4750 )
48- self .assertIsNotNone (pos_tag (tokens , engine = "unigram" , corpus = "pud" ))
4951 self .assertIsNotNone (pos_tag (["" ], engine = "unigram" , corpus = "pud" ))
5052 self .assertIsNotNone (
51- pos_tag (tokens , engine = "unigram" , corpus = "blackboard" )
53+ pos_tag (TEST_TOKENS , engine = "unigram" , corpus = "blackboard" )
5254 )
5355 self .assertIsNotNone (
5456 pos_tag (["" ], engine = "unigram" , corpus = "blackboard" )
5557 )
5658 self .assertIsNotNone (
5759 pos_tag (["" ], engine = "unigram" , corpus = "blackboard_ud" )
5860 )
59- self .assertIsNotNone (pos_tag (tokens , engine = "unigram" , corpus = "tdtb" ))
61+ self .assertIsNotNone (
62+ pos_tag (TEST_TOKENS , engine = "unigram" , corpus = "tdtb" )
63+ )
6064 self .assertIsNotNone (pos_tag (["" ], engine = "unigram" , corpus = "tdtb" ))
61- self .assertIsNotNone (pos_tag (tokens , engine = "unigram" , corpus = "tud" ))
65+ self .assertIsNotNone (
66+ pos_tag (TEST_TOKENS , engine = "unigram" , corpus = "tud" )
67+ )
6268 self .assertIsNotNone (pos_tag (["" ], engine = "unigram" , corpus = "tud" ))
6369 self .assertEqual (
6470 pos_tag (["คุณ" , "กำลัง" , "ประชุม" ], engine = "unigram" ),
@@ -72,6 +78,25 @@ def test_pos_tag(self):
7278 pos_tag (["ความ" , "พอเพียง" ], corpus = "orchid_ud" )[0 ][1 ], "NOUN"
7379 )
7480
81+ self .assertEqual (pos_tag_sents (None ), [])
82+ self .assertEqual (pos_tag_sents ([]), [])
83+ self .assertEqual (
84+ pos_tag_sents ([["ผม" , "กิน" , "ข้าว" ], ["แมว" , "วิ่ง" ]]),
85+ [
86+ [("ผม" , "PPRS" ), ("กิน" , "VACT" ), ("ข้าว" , "NCMN" )],
87+ [("แมว" , "NCMN" ), ("วิ่ง" , "VACT" )],
88+ ],
89+ )
90+
91+
92+ class PerceptronTaggerTestCase (unittest .TestCase ):
93+ """Test pythainlp.tag.PerceptronTagger
94+
95+ :param unittest: _description_
96+ :type unittest: _type_
97+ """
98+
99+ def test_perceptron_tagger (self ):
75100 self .assertEqual (perceptron .tag (None , corpus = "orchid" ), [])
76101 self .assertEqual (perceptron .tag ([], corpus = "orchid" ), [])
77102 self .assertEqual (perceptron .tag (None , corpus = "orchid_ud" ), [])
@@ -82,44 +107,34 @@ def test_pos_tag(self):
82107 self .assertEqual (perceptron .tag ([], corpus = "blackboard" ), [])
83108 self .assertEqual (perceptron .tag (None , corpus = "tud" ), [])
84109 self .assertEqual (perceptron .tag ([], corpus = "tud" ), [])
110+
85111 self .assertIsNotNone (
86- pos_tag (tokens , engine = "perceptron" , corpus = "orchid" )
112+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "orchid" )
87113 )
88114 self .assertIsNotNone (
89- pos_tag (tokens , engine = "perceptron" , corpus = "orchid_ud" )
115+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "orchid_ud" )
90116 )
91117 self .assertIsNotNone (
92- pos_tag (tokens , engine = "perceptron" , corpus = "pud" )
118+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "pud" )
93119 )
94120 self .assertIsNotNone (
95- pos_tag (tokens , engine = "perceptron" , corpus = "blackboard" )
121+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "blackboard" )
96122 )
97123 self .assertIsNotNone (
98- pos_tag (tokens , engine = "perceptron" , corpus = "blackboard_ud" )
124+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "blackboard_ud" )
99125 )
100126 self .assertIsNotNone (
101- pos_tag (tokens , engine = "perceptron" , corpus = "tdtb" )
127+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "tdtb" )
102128 )
103129 self .assertIsNotNone (
104- pos_tag (tokens , engine = "perceptron" , corpus = "tdtb" )
130+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "tdtb" )
105131 )
106132 self .assertIsNotNone (
107- pos_tag (tokens , engine = "perceptron" , corpus = "tud" )
108- )
109-
110- self .assertEqual (pos_tag_sents (None ), [])
111- self .assertEqual (pos_tag_sents ([]), [])
112- self .assertEqual (
113- pos_tag_sents ([["ผม" , "กิน" , "ข้าว" ], ["แมว" , "วิ่ง" ]]),
114- [
115- [("ผม" , "PPRS" ), ("กิน" , "VACT" ), ("ข้าว" , "NCMN" )],
116- [("แมว" , "NCMN" ), ("วิ่ง" , "VACT" )],
117- ],
133+ pos_tag (TEST_TOKENS , engine = "perceptron" , corpus = "tud" )
118134 )
119135
120- # ### pythainlp.tag.PerceptronTagger
121-
122- def test_perceptron_tagger (self ):
136+ def test_perceptron_tagger_custom (self ):
137+ """Test pythainlp.tag.PerceptronTagger"""
123138 tagger = PerceptronTagger ()
124139 # train data, with "กิน" > 20 instances to trigger conditions
125140 # in _make_tagdict()
@@ -182,7 +197,9 @@ def test_perceptron_tagger(self):
182197 with self .assertRaises (IOError ):
183198 tagger .load ("ptagger_notexistX4AcOcX.pkl" ) # file does not exist
184199
185- # ### pythainlp.tag.locations
200+
201+ class TagLocationsTestCase (unittest .TestCase ):
202+ """Test pythainlp.tag.locations"""
186203
187204 def test_ner_locations (self ):
188205 self .assertEqual (
0 commit comments