diff --git a/docs/settings.md b/docs/settings.md index 2420c49..de5160d 100644 --- a/docs/settings.md +++ b/docs/settings.md @@ -57,12 +57,4 @@ This setting defines the location of the WACZ file that should be used as a sour SW_WACZ_CRAWL = True ``` -Setting to ignore original `start_requests`, just yield all responses found. - -### `SW_WACZ_TIMEOUT` - -```python -SW_WACZ_TIMEOUT = 60 -``` - -Transport parameter for retrieving the `SW_WACZ_SOURCE_URI` from the defined location. +Setting to control the scraping behavior. If set to `False`, the scraper will bypass the WACZ middleware/downloadermiddleware during the crawling process. diff --git a/scrapy_webarchive/spidermiddlewares.py b/scrapy_webarchive/spidermiddlewares.py index 0cd17c3..2c42a0c 100644 --- a/scrapy_webarchive/spidermiddlewares.py +++ b/scrapy_webarchive/spidermiddlewares.py @@ -30,7 +30,6 @@ def __init__(self, settings: Settings, stats: StatsCollector) -> None: self.wacz_uris = re.split(r"\s*,\s*", wacz_uri) self.crawl = settings.get("SW_WACZ_CRAWL", False) - self.timeout = settings.getfloat("SW_WACZ_TIMEOUT", 60.0) @classmethod def from_crawler(cls, crawler: Crawler) -> Self: diff --git a/tests/test_downloadermiddlewares.py b/tests/test_downloadermiddlewares.py index 49ebcac..67a5dc7 100644 --- a/tests/test_downloadermiddlewares.py +++ b/tests/test_downloadermiddlewares.py @@ -24,7 +24,6 @@ def _get_settings(self, **new_settings): settings = { "SW_WACZ_SOURCE_URI": self._get_wacz_source_url(), "SW_WACZ_CRAWL": True, - "SW_WACZ_TIMEOUT": 60, } settings.update(new_settings) return Settings(settings) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 4df2a1f..0d0fdaf 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -17,7 +17,6 @@ def setup_method(self): def _get_settings(self, **new_settings): settings = { "SW_WACZ_SOURCE_URI": get_test_data_path("warc_1_1", "quotes.wacz").as_uri(), - "SW_WACZ_TIMEOUT": 60, } settings.update(new_settings) return Settings(settings)