File tree Expand file tree Collapse file tree 1 file changed +16
-8
lines changed Expand file tree Collapse file tree 1 file changed +16
-8
lines changed Original file line number Diff line number Diff line change @@ -92,22 +92,26 @@ def seg(self, sent):
92
92
continue
93
93
if R .zh .match (s ):
94
94
for w in list (self .dag .seg (s )):
95
- yield w
95
+ if len (w .strip ()) > 0 :
96
+ yield w
96
97
else :
97
98
tmp = R .skip .split (s )
98
99
for x in tmp :
99
100
if R .skip .match (x ):
100
- yield x
101
+ if len (x .strip ()) > 0 :
102
+ yield x
101
103
else :
102
104
x = x .replace (' ' ,'' )
103
105
endigts = R .endigt .findall (x )
104
106
parts = re .split (r'[0-9]+\.?[0-9]+|[0-9]+|[a-zA-Z]+' , x )
105
107
if len (endigts ) > 0 :
106
108
for w , t in self .re_decode (parts , endigts , False ):
107
- yield w
109
+ if len (w .strip ()) > 0 :
110
+ yield w
108
111
else :
109
112
for xx in x :
110
- yield xx
113
+ if len (xx .strip ()) > 0 :
114
+ yield xx
111
115
def tag (self , sent ):
112
116
for s in R .zh .split (sent ):
113
117
s = s .strip ()
@@ -117,19 +121,23 @@ def tag(self, sent):
117
121
continue
118
122
if R .zh .match (s ):
119
123
for w ,t in self .dag .tag (s ):
120
- yield w , t
124
+ if len (w .strip ()) > 0 :
125
+ yield w , t
121
126
else :
122
127
tmp = R .skip .split (s )
123
128
for x in tmp :
124
129
if R .skip .match (x ):
125
- yield x
130
+ if len (x .strip ()) > 0 :
131
+ yield x
126
132
else :
127
133
x = x .replace (' ' , '' )
128
134
endigts = R .endigt .findall (x )
129
135
parts = re .split (r'[0-9]+\.?[0-9]+|[0-9]+|[a-zA-Z]+' , x )
130
136
if len (endigts ) > 0 :
131
137
for w , t in self .re_decode (parts , endigts , True ):
132
- yield w , t
138
+ if len (w .strip ()) > 0 :
139
+ yield w , t
133
140
else :
134
141
for xx in x :
135
- yield xx , 'un'
142
+ if len (xx .strip ()) > 0 :
143
+ yield xx , 'un'
You can’t perform that action at this time.
0 commit comments