Skip to content

Commit

Permalink
Better crawl command error on invalid target
Browse files Browse the repository at this point in the history
Support for mixed spider dicts
Fix #913
Fix #915
  • Loading branch information
Yomguithereal committed Nov 28, 2023
1 parent b8815c4 commit ba36626
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion ftest/crawlers/echojs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def process(self, job: CrawlJob, response: Response) -> SpiderResult:
next_links = response.soup().scrape("#newslist article > h2 > a[href]", "href")
next_targets = [CrawlTarget(url=link, spider="article") for link in next_links]

return job.domain, next_targets
return job.group, next_targets


class ArticleSpider(Spider):
Expand Down
15 changes: 11 additions & 4 deletions minet/cli/crawl/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def callback(self: Crawler, result: SuccessfulCrawlResult) -> Optional[str]:
# - a dict of Spider instances
# - a callable
valid_spiders_dict = isinstance(target, Mapping) and all(
isinstance(v, Spider) for v in target.values()
isinstance(v, Spider) or callable(v) for v in target.values()
)

# TODO: inspect arity to weed out potential footguns
Expand All @@ -271,8 +271,15 @@ def callback(self: Crawler, result: SuccessfulCrawlResult) -> Optional[str]:
and not isinstance(target, Spider)
and not callable(target)
):
# TODO: explain further
raise FatalError("Invalid crawling target!")
raise FatalError(
[
"Invalid crawling target!",
"Expecting either:",
" - a function",
" - a Spider instance",
" - a dict mapping spider names to functions and/or Spider instances",
]
)

# NOTE: target IS a spider declaration
target = cast(SpiderDeclaration, target)
Expand All @@ -289,7 +296,7 @@ def callback(self: Crawler, result: SuccessfulCrawlResult) -> Optional[str]:
crawler = target(**crawler_kwargs)

if not isinstance(crawler, Crawler):
raise FatalError("Factory did not return a crawler!")
raise FatalError("Factory did not return a crawler instance!")

except CrawlerAlreadyFinishedError:
loading_bar.erase()
Expand Down

0 comments on commit ba36626

Please sign in to comment.