diff --git a/.flake8 b/.flake8 index cf1a96476c2..be9d83eaf70 100644 --- a/.flake8 +++ b/.flake8 @@ -1,8 +1,63 @@ [flake8] max-line-length = 119 -ignore = E203, E501, E701, E704, W503 +ignore = + # black disagrees with flake8 about these + E203, E501, E701, E704, W503 + # Assigning to `os.environ` doesn't clear the environment. + B003 + # Do not use mutable data structures for argument defaults. + B006 + # Loop control variable not used within the loop body. + B007 + # Do not perform function calls in argument defaults. + B008 + # return/continue/break inside finally blocks cause exceptions to be + # silenced. + B012 + # Star-arg unpacking after a keyword argument is strongly discouraged + B026 + # No explicit stacklevel argument found. + B028 + + # docstring does contain unindexed parameters + P102 + # other string does contain unindexed parameters + P103 + + # Missing docstring in public module + D100 + # Missing docstring in public class + D101 + # Missing docstring in public method + D102 + # Missing docstring in public function + D103 + # Missing docstring in public package + D104 + # Missing docstring in magic method + D105 + # Missing docstring in public nested class + D106 + # Missing docstring in __init__ + D107 + # One-line docstring should fit on one line with quotes + D200 + # No blank lines allowed after function docstring + D202 + # 1 blank line required between summary line and description + D205 + # Multi-line docstring closing quotes should be on a separate line + D209 + # First line should end with a period + D400 + # First line should be in imperative mood; try rephrasing + D401 + # First line should not be the function's "signature" + D402 + # First word of the first line should be properly capitalized + D403 exclude = docs/conf.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 63da5544d4c..f70effc5d90 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,12 @@ repos: rev: 7.0.0 hooks: - id: flake8 + additional_dependencies: + - flake8-bugbear + - flake8-comprehensions + - flake8-debugger + - flake8-docstrings + - flake8-string-format - repo: https://github.com/psf/black.git rev: 24.2.0 hooks: diff --git a/scrapy/extensions/debug.py b/scrapy/extensions/debug.py index a0fc7b99f30..b360ce48df4 100644 --- a/scrapy/extensions/debug.py +++ b/scrapy/extensions/debug.py @@ -74,4 +74,4 @@ def __init__(self) -> None: def _enter_debugger(self, signum: int, frame: Optional[FrameType]) -> None: assert frame - Pdb().set_trace(frame.f_back) + Pdb().set_trace(frame.f_back) # noqa: T100 diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py index de8a288f61b..941bd4b2660 100644 --- a/scrapy/extensions/feedexport.py +++ b/scrapy/extensions/feedexport.py @@ -104,7 +104,7 @@ def __init__(self, feed_options: Optional[Dict[str, Any]]) -> None: for item_class in feed_options.get("item_classes") or () ) else: - self.item_classes = tuple() + self.item_classes = () def accepts(self, item: Any) -> bool: """ diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py index 3e327105eb2..09e95cf5d35 100644 --- a/scrapy/pipelines/media.py +++ b/scrapy/pipelines/media.py @@ -234,7 +234,7 @@ def _cache_result_and_execute_waiters( # Exception Chaining (https://www.python.org/dev/peps/pep-3134/). context = getattr(result.value, "__context__", None) if isinstance(context, StopIteration): - setattr(result.value, "__context__", None) + result.value.__context__ = None info.downloading.remove(fp) info.downloaded[fp] = result # cache result diff --git a/scrapy/utils/console.py b/scrapy/utils/console.py index bf180311552..32821983140 100644 --- a/scrapy/utils/console.py +++ b/scrapy/utils/console.py @@ -10,10 +10,10 @@ def _embed_ipython_shell( ) -> EmbedFuncT: """Start an IPython Shell""" try: - from IPython.terminal.embed import InteractiveShellEmbed + from IPython.terminal.embed import InteractiveShellEmbed # noqa: T100 from IPython.terminal.ipapp import load_default_config except ImportError: - from IPython.frontend.terminal.embed import ( # type: ignore[no-redef] + from IPython.frontend.terminal.embed import ( # type: ignore[no-redef] # noqa: T100 InteractiveShellEmbed, ) from IPython.frontend.terminal.ipapp import ( # type: ignore[no-redef] diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py index ddb68c86b66..877eb438896 100644 --- a/scrapy/utils/defer.py +++ b/scrapy/utils/defer.py @@ -407,7 +407,7 @@ def maybeDeferred_coro( """Copy of defer.maybeDeferred that also converts coroutines to Deferreds.""" try: result = f(*args, **kw) - except: # noqa: E722 + except: # noqa: E722,B001 return defer.fail(failure.Failure(captureVars=Deferred.debug)) if isinstance(result, Deferred): diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py index 059d8e04d4e..f56950fdd57 100644 --- a/scrapy/utils/python.py +++ b/scrapy/utils/python.py @@ -269,7 +269,7 @@ def get_spec(func: Callable[..., Any]) -> Tuple[List[str], Dict[str, Any]]: if inspect.isfunction(func) or inspect.ismethod(func): spec = inspect.getfullargspec(func) - elif hasattr(func, "__call__"): + elif hasattr(func, "__call__"): # noqa: B004 spec = inspect.getfullargspec(func.__call__) else: raise TypeError(f"{type(func)} is not callable") diff --git a/scrapy/utils/signal.py b/scrapy/utils/signal.py index 89cfbd2ec0c..bb6d807ee65 100644 --- a/scrapy/utils/signal.py +++ b/scrapy/utils/signal.py @@ -100,7 +100,10 @@ def logerror(failure: Failure, recv: Any) -> Failure: d.addErrback(logerror, receiver) # TODO https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/cell-var-from-loop.html d.addBoth( - lambda result: (receiver, result) # pylint: disable=cell-var-from-loop + lambda result: ( + receiver, # pylint: disable=cell-var-from-loop # noqa: B023 + result, + ) ) dfds.append(d) d = DeferredList(dfds) diff --git a/tests/test_cmdline/__init__.py b/tests/test_cmdline/__init__.py index 25ded143c1c..4835e936b0b 100644 --- a/tests/test_cmdline/__init__.py +++ b/tests/test_cmdline/__init__.py @@ -20,7 +20,7 @@ def setUp(self): self.env["SCRAPY_SETTINGS_MODULE"] = "tests.test_cmdline.settings" def _execute(self, *new_args, **kwargs): - encoding = getattr(sys.stdout, "encoding") or "utf-8" + encoding = sys.stdout.encoding or "utf-8" args = (sys.executable, "-m", "scrapy.cmdline") + new_args proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs) comm = proc.communicate()[0].strip() diff --git a/tests/test_command_version.py b/tests/test_command_version.py index a52d0d13cc0..18c1c531c2b 100644 --- a/tests/test_command_version.py +++ b/tests/test_command_version.py @@ -12,7 +12,7 @@ class VersionTest(ProcessTest, unittest.TestCase): @defer.inlineCallbacks def test_output(self): - encoding = getattr(sys.stdout, "encoding") or "utf-8" + encoding = sys.stdout.encoding or "utf-8" _, out, _ = yield self.execute([]) self.assertEqual( out.strip().decode(encoding), @@ -21,7 +21,7 @@ def test_output(self): @defer.inlineCallbacks def test_verbose_output(self): - encoding = getattr(sys.stdout, "encoding") or "utf-8" + encoding = sys.stdout.encoding or "utf-8" _, out, _ = yield self.execute(["-v"]) headers = [ line.partition(":")[0].strip() diff --git a/tests/test_commands.py b/tests/test_commands.py index 857a56b7358..a23b7f4a9dd 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -101,7 +101,7 @@ def proc(self, *new_args, **popen_kwargs): def kill_proc(): p.kill() p.communicate() - assert False, "Command took too much time to complete" + raise AssertionError("Command took too much time to complete") timer = Timer(15, kill_proc) try: @@ -200,7 +200,7 @@ def get_permissions(path: Path) -> str: path_obj = Path(path) - renamings = renamings or tuple() + renamings = renamings or () permissions_dict = { ".": get_permissions(path_obj), } diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py index d3fd63847f1..884491d0101 100644 --- a/tests/test_downloader_handlers.py +++ b/tests/test_downloader_handlers.py @@ -892,7 +892,7 @@ def test_extra_kw(self): except Exception as e: self.assertIsInstance(e, (TypeError, NotConfigured)) else: - assert False + raise AssertionError() def test_request_signing1(self): # gets an object from the johnsmith bucket. diff --git a/tests/test_dupefilters.py b/tests/test_dupefilters.py index aa0975555bc..f617fc02743 100644 --- a/tests/test_dupefilters.py +++ b/tests/test_dupefilters.py @@ -146,7 +146,7 @@ def fingerprint(self, request): case_insensitive_dupefilter.close("finished") def test_seenreq_newlines(self): - """Checks against adding duplicate \r to + r"""Checks against adding duplicate \r to line endings on Windows platforms.""" r1 = Request("http://scrapytest.org/1") diff --git a/tests/test_engine.py b/tests/test_engine.py index 33544e8db50..86526420f83 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -459,7 +459,7 @@ def test_short_timeout(self): def kill_proc(): p.kill() p.communicate() - assert False, "Command took too much time to complete" + raise AssertionError("Command took too much time to complete") timer = Timer(15, kill_proc) try: diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index 3771df8f10f..253987e15b7 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -1356,7 +1356,7 @@ def test_export_feed_export_fields(self): @defer.inlineCallbacks def test_export_encoding(self): - items = [dict({"foo": "Test\xd6"})] + items = [{"foo": "Test\xd6"}] formats = { "json": b'[{"foo": "Test\\u00d6"}]', @@ -1401,7 +1401,7 @@ def test_export_encoding(self): @defer.inlineCallbacks def test_export_multiple_configs(self): - items = [dict({"foo": "FOO", "bar": "BAR"})] + items = [{"foo": "FOO", "bar": "BAR"}] formats = { "json": b'[\n{"bar": "BAR"}\n]', @@ -2513,8 +2513,8 @@ def test_export_no_items_store_empty(self): @defer.inlineCallbacks def test_export_multiple_configs(self): items = [ - dict({"foo": "FOO", "bar": "BAR"}), - dict({"foo": "FOO1", "bar": "BAR1"}), + {"foo": "FOO", "bar": "BAR"}, + {"foo": "FOO1", "bar": "BAR1"}, ] formats = { @@ -2574,7 +2574,7 @@ def test_export_multiple_configs(self): @defer.inlineCallbacks def test_batch_item_count_feeds_setting(self): - items = [dict({"foo": "FOO"}), dict({"foo": "FOO1"})] + items = [{"foo": "FOO"}, {"foo": "FOO1"}] formats = { "json": [ b'[{"foo": "FOO"}]', diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py index d9c09a16a8e..b1043c1111b 100644 --- a/tests/test_linkextractors.py +++ b/tests/test_linkextractors.py @@ -186,7 +186,7 @@ def test_extraction_using_single_values(self): ) def test_nofollow(self): - '''Test the extractor's behaviour for links with rel="nofollow"''' + """Test the extractor's behaviour for links with rel='nofollow'""" html = b"""Page title<title> <body> diff --git a/tests/test_loader.py b/tests/test_loader.py index b0b7f8723a6..8db929dcf3e 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -156,7 +156,7 @@ def test_get_output_value_singlevalue(self): self.assertEqual(il.get_output_value("name"), ["foo"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(ItemAdapter(loaded_item).asdict(), dict({"name": ["foo"]})) + self.assertEqual(ItemAdapter(loaded_item).asdict(), {"name": ["foo"]}) def test_get_output_value_list(self): """Getting output value must not remove value from item""" @@ -165,9 +165,7 @@ def test_get_output_value_list(self): self.assertEqual(il.get_output_value("name"), ["foo", "bar"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual( - ItemAdapter(loaded_item).asdict(), dict({"name": ["foo", "bar"]}) - ) + self.assertEqual(ItemAdapter(loaded_item).asdict(), {"name": ["foo", "bar"]}) def test_values_single(self): """Values from initial item must be added to loader._values""" diff --git a/tests/test_loader_deprecated.py b/tests/test_loader_deprecated.py index 528efa142a7..0d245bec929 100644 --- a/tests/test_loader_deprecated.py +++ b/tests/test_loader_deprecated.py @@ -526,7 +526,7 @@ def test_get_output_value_singlevalue(self): self.assertEqual(il.get_output_value("name"), ["foo"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(loaded_item, dict({"name": ["foo"]})) + self.assertEqual(loaded_item, {"name": ["foo"]}) def test_get_output_value_list(self): """Getting output value must not remove value from item""" @@ -535,7 +535,7 @@ def test_get_output_value_list(self): self.assertEqual(il.get_output_value("name"), ["foo", "bar"]) loaded_item = il.load_item() self.assertIsInstance(loaded_item, self.item_class) - self.assertEqual(loaded_item, dict({"name": ["foo", "bar"]})) + self.assertEqual(loaded_item, {"name": ["foo", "bar"]}) def test_values_single(self): """Values from initial item must be added to loader._values""" diff --git a/tests/test_request_dict.py b/tests/test_request_dict.py index 7312eb036e7..d3f416347ed 100644 --- a/tests/test_request_dict.py +++ b/tests/test_request_dict.py @@ -147,7 +147,7 @@ def parse(self, response): spider = MySpider() r = Request("http://www.example.com", callback=spider.parse) - setattr(spider, "parse", None) + spider.parse = None self.assertRaises(ValueError, r.to_dict, spider=spider) def test_callback_not_available(self): diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 02b50baa3a6..9b7bad4bf48 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -284,7 +284,7 @@ def test_logic(self): downloader.decrement(slot) self.assertTrue( - _is_scheduling_fair(list(s for u, s in _URLS_WITH_SLOTS), dequeued_slots) + _is_scheduling_fair([s for u, s in _URLS_WITH_SLOTS], dequeued_slots) ) self.assertEqual(sum(len(s.active) for s in downloader.slots.values()), 0) diff --git a/tests/test_spider.py b/tests/test_spider.py index d629d33afc5..18a86335013 100644 --- a/tests/test_spider.py +++ b/tests/test_spider.py @@ -244,7 +244,7 @@ class _CrawlSpider(self.spider_class): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -270,7 +270,7 @@ def dummy_process_links(self, links): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -299,7 +299,7 @@ def filter_process_links(self, links): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 2) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -324,7 +324,7 @@ def dummy_process_links(self, links): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -352,7 +352,7 @@ class _CrawlSpider(self.spider_class): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -383,7 +383,7 @@ class _CrawlSpider(self.spider_class): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -413,7 +413,7 @@ def process_request_upper(self, request, response): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -445,7 +445,7 @@ def process_request_meta_response_class(self, request, response): spider = _CrawlSpider() output = list(spider._requests_to_follow(response)) self.assertEqual(len(output), 3) - self.assertTrue(all(map(lambda r: isinstance(r, Request), output))) + self.assertTrue(all(isinstance(r, Request) for r in output)) self.assertEqual( [r.url for r in output], [ @@ -637,7 +637,7 @@ def test_sitemap_filter_with_alternate_links(self): class FilteredSitemapSpider(self.spider_class): def sitemap_filter(self, entries): for entry in entries: - alternate_links = entry.get("alternate", tuple()) + alternate_links = entry.get("alternate", ()) for link in alternate_links: if "/deutsch/" in link: entry["loc"] = link