Skip to content

Commit 421262a

Browse files
committed
add warning for FeedExporter
1 parent 631fc65 commit 421262a

File tree

6 files changed

+93
-2
lines changed

6 files changed

+93
-2
lines changed

Diff for: scrapy/commands/__init__.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import argparse
66
import builtins
7+
import logging
78
import os
89
from pathlib import Path
910
from typing import Any, Dict, Iterable, List, Optional
@@ -12,7 +13,13 @@
1213

1314
from scrapy.crawler import Crawler, CrawlerProcess
1415
from scrapy.exceptions import UsageError
15-
from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
16+
from scrapy.utils.conf import (
17+
arglist_to_dict,
18+
build_component_list,
19+
feed_process_params_from_cli,
20+
)
21+
22+
logger = logging.getLogger(__name__)
1623

1724

1825
class ScrapyCommand:
@@ -178,6 +185,16 @@ def process_options(self, args: List[str], opts: argparse.Namespace) -> None:
178185
)
179186
self.settings.set("FEEDS", feeds, priority="cmdline")
180187

188+
def validate_feed_exporter(self, opts):
189+
feed_eport_key = "scrapy.extensions.feedexport.FeedExporter"
190+
191+
if opts.output:
192+
extensions = build_component_list(self.settings.getwithbase("EXTENSIONS"))
193+
if feed_eport_key not in extensions:
194+
logger.warning(
195+
"'FeedExporter' extension must be enabled for Feed Exports to work."
196+
)
197+
181198

182199
class ScrapyHelpFormatter(argparse.HelpFormatter):
183200
"""

Diff for: scrapy/commands/crawl.py

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def run(self, args: List[str], opts: argparse.Namespace) -> None:
2323
raise UsageError(
2424
"running 'scrapy crawl' with more than one spider is not supported"
2525
)
26+
self.validate_feed_exporter(opts)
2627
spname = args[0]
2728

2829
assert self.crawler_process

Diff for: scrapy/commands/parse.py

+1
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ def run(self, args: List[str], opts: argparse.Namespace) -> None:
403403
raise UsageError()
404404
else:
405405
url = args[0]
406+
self.validate_feed_exporter(opts)
406407

407408
# prepare spidercls
408409
self.set_spidercls(url, opts)

Diff for: scrapy/commands/runspider.py

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def long_desc(self) -> str:
4040
def run(self, args: List[str], opts: argparse.Namespace) -> None:
4141
if len(args) != 1:
4242
raise UsageError()
43+
self.validate_feed_exporter(opts)
4344
filename = Path(args[0])
4445
if not filename.exists():
4546
raise UsageError(f"File not found: {filename}\n")

Diff for: tests/test_command_parse.py

+23
Original file line numberDiff line numberDiff line change
@@ -449,3 +449,26 @@ def test_parse_add_options(self):
449449
self.assertEqual(namespace.depth, 2)
450450
self.assertEqual(namespace.spider, self.spider_name)
451451
self.assertTrue(namespace.verbose)
452+
453+
def test_exporter_disabled(self):
454+
with self.assertLogs() as cm:
455+
command = parse.Command()
456+
settings = Settings()
457+
settings.setdict(
458+
{"EXTENSIONS": {"scrapy.extensions.feedexport.FeedExporter": None}},
459+
priority="project",
460+
)
461+
command.settings = settings
462+
parser = argparse.ArgumentParser(
463+
formatter_class=argparse.HelpFormatter, conflict_handler="resolve"
464+
)
465+
command.add_options(parser)
466+
opts, _ = parser.parse_known_args(args=[])
467+
468+
opts.output = ["example.json"]
469+
470+
command.validate_feed_exporter(opts)
471+
expected = (
472+
"'FeedExporter' extension must be enabled for Feed Exports to work."
473+
)
474+
self.assertTrue(any(expected in str for str in cm.output))

Diff for: tests/test_commands.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
from twisted.trial import unittest
2121

2222
import scrapy
23-
from scrapy.commands import ScrapyCommand, ScrapyHelpFormatter, view
23+
from scrapy.commands import ScrapyCommand, ScrapyHelpFormatter, crawl, runspider, view
2424
from scrapy.commands.startproject import IGNORE
25+
from scrapy.crawler import CrawlerProcess
2526
from scrapy.settings import Settings
2627
from scrapy.utils.python import to_unicode
2728
from scrapy.utils.test import get_testenv
@@ -986,6 +987,29 @@ def start_requests(self):
986987
self.assertIn("Spider closed (finished)", log)
987988
self.assertIn("The value of FOO is 42", log)
988989

990+
def test_exporter_disabled(self):
991+
with self.assertLogs() as cm:
992+
command = runspider.Command()
993+
settings = Settings()
994+
settings.setdict(
995+
{"EXTENSIONS": {"scrapy.extensions.feedexport.FeedExporter": None}},
996+
priority="project",
997+
)
998+
command.settings = settings
999+
parser = argparse.ArgumentParser(
1000+
formatter_class=ScrapyHelpFormatter, conflict_handler="resolve"
1001+
)
1002+
command.add_options(parser)
1003+
opts, _ = parser.parse_known_args(args=[])
1004+
1005+
opts.output = ["example.json"]
1006+
1007+
command.validate_feed_exporter(opts)
1008+
expected = (
1009+
"'FeedExporter' extension must be enabled for Feed Exports to work."
1010+
)
1011+
self.assertTrue(any(expected in str for str in cm.output))
1012+
9891013

9901014
class WindowsRunSpiderCommandTest(RunSpiderCommandTest):
9911015
spider_filename = "myspider.pyw"
@@ -1055,6 +1079,30 @@ def start_requests(self):
10551079
log = self.get_log(spider_code)
10561080
self.assertIn("[myspider] DEBUG: It works!", log)
10571081

1082+
def test_exporter_disabled(self):
1083+
with self.assertLogs() as cm:
1084+
command = crawl.Command()
1085+
settings = Settings()
1086+
settings.setdict(
1087+
{"EXTENSIONS": {"scrapy.extensions.feedexport.FeedExporter": None}},
1088+
priority="project",
1089+
)
1090+
command.settings = settings
1091+
parser = argparse.ArgumentParser(
1092+
formatter_class=ScrapyHelpFormatter, conflict_handler="resolve"
1093+
)
1094+
command.add_options(parser)
1095+
opts, _ = parser.parse_known_args(args=[])
1096+
command.crawler_process = CrawlerProcess(settings)
1097+
1098+
opts.output = ["example.json"]
1099+
1100+
command.validate_feed_exporter(opts)
1101+
expected = (
1102+
"'FeedExporter' extension must be enabled for Feed Exports to work."
1103+
)
1104+
self.assertTrue(any(expected in str for str in cm.output))
1105+
10581106
def test_output(self):
10591107
spider_code = """
10601108
import scrapy

0 commit comments

Comments
 (0)