Skip to content

Commit af7e85a

Browse files
author
SeanLee97
committed
fixed some bug
1 parent 2095e49 commit af7e85a

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

xmnlp/postag/postag.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,22 +92,26 @@ def seg(self, sent):
9292
continue
9393
if R.zh.match(s):
9494
for w in list(self.dag.seg(s)):
95-
yield w
95+
if len(w.strip()) > 0:
96+
yield w
9697
else:
9798
tmp = R.skip.split(s)
9899
for x in tmp:
99100
if R.skip.match(x):
100-
yield x
101+
if len(x.strip()) > 0:
102+
yield x
101103
else:
102104
x = x.replace(' ','')
103105
endigts = R.endigt.findall(x)
104106
parts = re.split(r'[0-9]+\.?[0-9]+|[0-9]+|[a-zA-Z]+', x)
105107
if len(endigts) > 0:
106108
for w, t in self.re_decode(parts, endigts, False):
107-
yield w
109+
if len(w.strip()) > 0:
110+
yield w
108111
else:
109112
for xx in x:
110-
yield xx
113+
if len(xx.strip()) > 0:
114+
yield xx
111115
def tag(self, sent):
112116
for s in R.zh.split(sent):
113117
s = s.strip()
@@ -117,19 +121,23 @@ def tag(self, sent):
117121
continue
118122
if R.zh.match(s):
119123
for w,t in self.dag.tag(s):
120-
yield w, t
124+
if len(w.strip()) > 0:
125+
yield w, t
121126
else:
122127
tmp = R.skip.split(s)
123128
for x in tmp:
124129
if R.skip.match(x):
125-
yield x
130+
if len(x.strip()) > 0:
131+
yield x
126132
else:
127133
x = x.replace(' ', '')
128134
endigts = R.endigt.findall(x)
129135
parts = re.split(r'[0-9]+\.?[0-9]+|[0-9]+|[a-zA-Z]+', x)
130136
if len(endigts) > 0:
131137
for w, t in self.re_decode(parts, endigts, True):
132-
yield w, t
138+
if len(w.strip()) > 0:
139+
yield w, t
133140
else:
134141
for xx in x:
135-
yield xx, 'un'
142+
if len(xx.strip()) > 0:
143+
yield xx, 'un'

0 commit comments

Comments
 (0)