Skip to content

Commit 7505da4

Browse files
committed
Special-cased when train_set is the null set
Now returns an empty dict if passed an empty training set. Also, cover some bases if train_set is consumed by .next()
1 parent 2430cca commit 7505da4

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

textblob/classifiers.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,20 @@ def basic_extractor(document, train_set):
7878
:param list train_set: Training data set, a list of tuples of the form
7979
``(words, label)`` OR an iterable of strings.
8080
"""
81-
el_zero = iter(train_set).next() #Infer input from first element.
82-
if isinstance(el_zero, tuple):
83-
word_features = _get_words_from_dataset(train_set)
84-
elif isinstance(el_zero, str):
85-
word_features = train_set
81+
82+
try:
83+
el_zero = iter(train_set).next() #Infer input from first element.
84+
except StopIteration:
85+
return {}
86+
if isinstance(el_zero, str):
87+
word_features = [w for w in chain([el_zero],train_set)]
8688
else:
87-
raise ValueError('train_set is proabably malformed.')
89+
try:
90+
assert(isinstance(el_zero[0], str))
91+
word_features = _get_words_from_dataset(chain([el_zero],train_set))
92+
except:
93+
raise ValueError('train_set is proabably malformed.')
94+
8895
tokens = _get_document_tokens(document)
8996
features = dict(((u'contains({0})'.format(word), (word in tokens))
9097
for word in word_features))

0 commit comments

Comments
 (0)