diff --git a/dev-requirements.txt b/dev-requirements.txt index 0ff51baa..366c1eb5 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,5 @@ nose>=1.3.0 tox>=2.6.0 -wheel -twine invoke>=0.15.0 mock +flake8==3.5.0 diff --git a/run_tests.py b/run_tests.py index 9be496e2..accf7fdc 100644 --- a/run_tests.py +++ b/run_tests.py @@ -12,8 +12,11 @@ python run_tests.py no-internet ''' from __future__ import unicode_literals -import nose +import subprocess import sys + +import nose + from textblob.compat import PY2 PY26 = PY2 and int(sys.version_info[1]) < 7 @@ -22,6 +25,9 @@ def main(): args = get_argv() + retcode = subprocess.call(['flake8', 'textblob']) + if retcode: + sys.exit(1) success = nose.run(argv=args) sys.exit(0) if success else sys.exit(1) @@ -59,5 +65,6 @@ def get_argv(): args.extend(["-A", attr_expression]) return args + if __name__ == '__main__': main() diff --git a/setup.cfg b/setup.cfg index 37160b76..2390a1a7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,19 @@ universal = 1 [flake8] -ignore = E501,E127,E128,E265,E302 +ignore = E501,E127,E128,E265,E302,E266 max-line-length = 90 +exclude = + .git, + .ropeproject, + .tox, + docs, + .git, + build, + env, + venv, + # Exclude vendorized code + textblob/en, + textblob/unicodecsv, + textblob/_text.py, + textblob/compat.py diff --git a/textblob/base.py b/textblob/base.py index e4ac6e3f..eaeca61f 100644 --- a/textblob/base.py +++ b/textblob/base.py @@ -65,6 +65,7 @@ def itokenize(self, text, *args, **kwargs): ##### SENTIMENT ANALYZERS #### + DISCRETE = 'ds' CONTINUOUS = 'co' diff --git a/textblob/blob.py b/textblob/blob.py index c0c4a8eb..497bf2b8 100644 --- a/textblob/blob.py +++ b/textblob/blob.py @@ -433,7 +433,7 @@ def sentiment_assessments(self): :rtype: namedtuple of the form ``Sentiment(polarity, subjectivity, assessments)`` """ - return self.analyzer.analyze(self.raw,keep_assessments=True) + return self.analyzer.analyze(self.raw, keep_assessments=True) @cached_property def polarity(self): diff --git a/textblob/classifiers.py b/textblob/classifiers.py index 03e3eb76..9e0b5b20 100644 --- a/textblob/classifiers.py +++ b/textblob/classifiers.py @@ -89,8 +89,8 @@ def basic_extractor(document, train_set): try: assert(isinstance(el_zero[0], basestring)) word_features = _get_words_from_dataset(chain([el_zero], train_set)) - except: - raise ValueError('train_set is proabably malformed.') + except Exception: + raise ValueError('train_set is probably malformed.') tokens = _get_document_tokens(document) features = dict(((u'contains({0})'.format(word), (word in tokens)) @@ -136,7 +136,7 @@ def __init__(self, train_set, feature_extractor=basic_extractor, format=None, ** self.train_set = self._read_data(train_set, format) else: # train_set is a list of tuples self.train_set = train_set - self._word_set = _get_words_from_dataset(self.train_set) #Keep a hidden set of unique words. + self._word_set = _get_words_from_dataset(self.train_set) # Keep a hidden set of unique words. self.train_features = None def _read_data(self, dataset, format=None): diff --git a/textblob/download_corpora.py b/textblob/download_corpora.py index f555e1f7..47231a80 100644 --- a/textblob/download_corpora.py +++ b/textblob/download_corpora.py @@ -46,5 +46,6 @@ def main(): download_all() print("Finished.") + if __name__ == '__main__': main() diff --git a/textblob/en/sentiments.py b/textblob/en/sentiments.py index a1bfe8c1..e5106bf9 100644 --- a/textblob/en/sentiments.py +++ b/textblob/en/sentiments.py @@ -23,8 +23,10 @@ class PatternAnalyzer(BaseSentimentAnalyzer): where [assessments] is a list of the assessed tokens and their polarity and subjectivity scores """ - kind = CONTINUOUS + # This is only here for backwards-compatibility. + # The return type is actually determined upon calling analyze() + RETURN_TYPE = namedtuple('Sentiment', ['polarity', 'subjectivity']) def analyze(self, text, keep_assessments=False): """Return the sentiment as a named tuple of the form: @@ -32,14 +34,14 @@ def analyze(self, text, keep_assessments=False): """ #: Return type declaration if keep_assessments: - RETURN_TYPE = namedtuple('Sentiment', ['polarity', 'subjectivity', 'assessments']) + Sentiment = namedtuple('Sentiment', ['polarity', 'subjectivity', 'assessments']) assessments = pattern_sentiment(text).assessments - polarity,subjectivity = pattern_sentiment(text) - return RETURN_TYPE( polarity,subjectivity,assessments ) + polarity, subjectivity = pattern_sentiment(text) + return Sentiment(polarity, subjectivity, assessments) else: - RETURN_TYPE = namedtuple('Sentiment', ['polarity', 'subjectivity']) - return RETURN_TYPE(*pattern_sentiment(text)) + Sentiment = namedtuple('Sentiment', ['polarity', 'subjectivity']) + return Sentiment(*pattern_sentiment(text)) def _default_feature_extractor(words): diff --git a/textblob/exceptions.py b/textblob/exceptions.py index 738f36f2..004c41e1 100644 --- a/textblob/exceptions.py +++ b/textblob/exceptions.py @@ -15,6 +15,7 @@ class TextBlobError(Exception): """A TextBlob-related error.""" pass + TextBlobException = TextBlobError # Backwards compat class MissingCorpusError(TextBlobError): @@ -25,6 +26,7 @@ class MissingCorpusError(TextBlobError): def __init__(self, message=MISSING_CORPUS_MESSAGE, *args, **kwargs): super(MissingCorpusError, self).__init__(message, *args, **kwargs) + MissingCorpusException = MissingCorpusError # Backwards compat class DeprecationError(TextBlobError): diff --git a/textblob/formats.py b/textblob/formats.py index 4bbb9c27..7aa5083f 100644 --- a/textblob/formats.py +++ b/textblob/formats.py @@ -127,6 +127,7 @@ def detect(cls, stream): except ValueError: return False + _registry = OrderedDict([ ('csv', CSV), ('json', JSON), diff --git a/textblob/tokenizers.py b/textblob/tokenizers.py index 53603293..c120503f 100644 --- a/textblob/tokenizers.py +++ b/textblob/tokenizers.py @@ -56,6 +56,7 @@ def tokenize(self, text): '''Return a list of sentences.''' return nltk.tokenize.sent_tokenize(text) + #: Convenience function for tokenizing sentences sent_tokenize = SentenceTokenizer().itokenize diff --git a/textblob/translate.py b/textblob/translate.py index e9fcd487..1f8fa25a 100644 --- a/textblob/translate.py +++ b/textblob/translate.py @@ -98,8 +98,7 @@ def _unescape(text): """Unescape unicode character codes within a string. """ pattern = r'\\{1,2}u[0-9a-fA-F]{4}' - decode = lambda x: codecs.getdecoder('unicode_escape')(x.group())[0] - return re.sub(pattern, decode, text) + return re.sub(pattern, lambda x: codecs.getdecoder('unicode_escape')(x.group())[0], text) def _calculate_tk(source):