Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions akd/tools/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from akd.agents.relevancy import MultiRubricRelevancyAgent
from akd.tools.relevancy import EnhancedRelevancyCheckerConfig, RubricWeights

from .scrapers.composite import CompositeWebScraper, ResearchArticleResolver
from .scrapers.composite import CompositeScraper, ResearchArticleResolver
from .scrapers.pdf_scrapers import SimplePDFScraper
from .scrapers.resolvers import (
ADSResolver,
Expand All @@ -20,6 +20,11 @@
WebScraperToolBase,
)
from .search import SearchTool, SearxNGSearchTool, SearxNGSearchToolConfig
from .source_validator import (
SourceValidator,
SourceValidatorConfig,
create_source_validator,
)


def create_default_scraper(
Expand All @@ -28,7 +33,7 @@ def create_default_scraper(
) -> WebScraperToolBase:
config = config or WebpageScraperToolConfig()
config.debug = debug
return CompositeWebScraper(
return CompositeScraper(
SimpleWebScraper(config),
Crawl4AIWebScraper(config),
SimplePDFScraper(config),
Expand All @@ -54,6 +59,33 @@ def create_default_article_resolver(
)


def create_default_source_validator(
config: Optional[SourceValidatorConfig] = None,
whitelist_file_path: Optional[str] = None,
max_concurrent_requests: int = 10,
debug: bool = False,
) -> SourceValidator:
"""
Create a source validator with default parameters.

Args:
config: Optional SourceValidatorConfig. If provided, other parameters are ignored.
whitelist_file_path: Path to source whitelist JSON file. If None, uses default path in akd/docs/pubs_whitelist.json.
max_concurrent_requests: Maximum number of concurrent API requests.
debug: Enable debug logging.

Returns:
Configured SourceValidator instance.
"""
if config is None:
return create_source_validator(
whitelist_file_path=whitelist_file_path,
max_concurrent_requests=max_concurrent_requests,
debug=debug,
)
return SourceValidator(config, debug=debug)


def create_strict_literature_config_for_relevancy(
n_iter: int = 1,
relevance_threshold: float = 0.7,
Expand Down
Loading