File tree 1 file changed +13
-6
lines changed
1 file changed +13
-6
lines changed Original file line number Diff line number Diff line change @@ -78,13 +78,20 @@ def basic_extractor(document, train_set):
78
78
:param list train_set: Training data set, a list of tuples of the form
79
79
``(words, label)`` OR an iterable of strings.
80
80
"""
81
- el_zero = iter (train_set ).next () #Infer input from first element.
82
- if isinstance (el_zero , tuple ):
83
- word_features = _get_words_from_dataset (train_set )
84
- elif isinstance (el_zero , str ):
85
- word_features = train_set
81
+
82
+ try :
83
+ el_zero = iter (train_set ).next () #Infer input from first element.
84
+ except StopIteration :
85
+ return {}
86
+ if isinstance (el_zero , str ):
87
+ word_features = [w for w in chain ([el_zero ],train_set )]
86
88
else :
87
- raise ValueError ('train_set is proabably malformed.' )
89
+ try :
90
+ assert (isinstance (el_zero [0 ], str ))
91
+ word_features = _get_words_from_dataset (chain ([el_zero ],train_set ))
92
+ except :
93
+ raise ValueError ('train_set is proabably malformed.' )
94
+
88
95
tokens = _get_document_tokens (document )
89
96
features = dict (((u'contains({0})' .format (word ), (word in tokens ))
90
97
for word in word_features ))
You can’t perform that action at this time.
0 commit comments