Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 5 additions & 96 deletions cli_crawler.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,12 @@
#!/usr/bin/env python3
"""Universal CLI Help Crawler - OpenAPI for CLIs.
"""Legacy compatibility script for ``cli-crawler``.

Crawls CLI --help outputs and generates structured JSON maps
that AI agents can use for precise command reasoning.
"""

from __future__ import annotations

import argparse
import logging
import sys
from pathlib import Path

from crawler.config import CLIConfig, CrawlerConfig, load_config
from crawler.pipeline import crawl_all, crawl_cli


def main() -> None:
parser = argparse.ArgumentParser(
description="Crawl CLI --help outputs and generate structured JSON maps",
epilog="Examples:\n"
" python cli_crawler.py git -o output/git.json\n"
" python cli_crawler.py --config config.yaml --all\n"
" python cli_crawler.py docker -v --include-raw\n",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("cli", nargs="?", help="CLI to crawl (e.g., git, docker)")
parser.add_argument("--config", "-c", type=Path, help="Path to config YAML")
parser.add_argument("--output", "-o", type=Path, help="Output file path")
parser.add_argument(
"--output-dir",
type=Path,
default=Path("./output"),
help="Output directory (default: ./output)",
)
parser.add_argument("--all", action="store_true", help="Crawl all CLIs in config")
parser.add_argument(
"--include-raw", action="store_true", help="Include raw help text in main JSON"
)
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging")
parser.add_argument("--strict", action="store_true", help="Fail on first parse error")
parser.add_argument("--max-depth", type=int, help="Override max recursion depth")
parser.add_argument("--timeout", type=int, help="Override timeout per command (seconds)")
parser.add_argument("--list", action="store_true", help="List configured CLIs and exit")

args = parser.parse_args()

# Configure logging
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
Prefer invoking the canonical command directly:

# Load config
config: CrawlerConfig
if args.config and args.config.exists():
config = load_config(str(args.config))
else:
config = CrawlerConfig()

# List mode
if args.list:
if not config.clis:
print("No CLIs configured. Use --config to specify a config file.")
else:
print(f"Configured CLIs ({len(config.clis)}):")
for name, cfg in sorted(config.clis.items()):
group = f" [{cfg.group}]" if cfg.group else ""
env = f" (env: {cfg.environment})" if cfg.environment != "wsl" else ""
print(f" {name}{group}{env}")
return

# Crawl all CLIs
if args.all:
if not config.clis:
print("No CLIs configured. Use --config to specify a config file.")
sys.exit(1)
crawl_all(config, args.output_dir, args.include_raw, args.strict)
return

# Crawl single CLI
if args.cli:
cli_config = config.clis.get(args.cli, CLIConfig(name=args.cli))

# Apply CLI arg overrides
if args.max_depth is not None:
cli_config.max_depth = args.max_depth
if args.timeout is not None:
cli_config.timeout = args.timeout

output = args.output or args.output_dir / f"{args.cli}.json"
crawl_cli(args.cli, cli_config, output, args.include_raw, args.strict)
return

# No action specified
parser.print_help()
sys.exit(1)
cli-crawler <cli_name> [options]
"""

from crawler.cli_crawler import main

if __name__ == "__main__":
main()
69 changes: 65 additions & 4 deletions src/crawler/cli_crawler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,74 @@
"""Compatibility crawler entrypoint and basic help execution helpers (T013)."""
"""Compatibility wrappers for legacy ``cli_crawler`` entrypoints."""

from __future__ import annotations

import sys
from pathlib import Path

from . import pipeline as _pipeline
from .config import CLIConfig, CrawlerConfig
from .executor import Executor
from .models import CLIMap, ExecutionResult
from .pipeline import crawl_all, crawl_cli, main
from .pipeline import crawl_all, crawl_cli

_RAW_FLAG = "--raw"
_LEGACY_RAW_FLAG = "--include-raw"


def _normalize_legacy_args(argv: list[str]) -> tuple[list[str], list[str]]:
"""Normalize legacy flags to canonical ``cli-crawler`` arguments."""
normalized: list[str] = []
warnings: list[str] = []
raw_enabled = False
legacy_raw_seen = False

for arg in argv:
if arg == _RAW_FLAG:
raw_enabled = True
normalized.append(arg)
continue
if arg == _LEGACY_RAW_FLAG:
legacy_raw_seen = True
if not raw_enabled:
normalized.append(_RAW_FLAG)
raw_enabled = True
continue
normalized.append(arg)

if legacy_raw_seen:
if _RAW_FLAG in argv:
warnings.append(
f"{_LEGACY_RAW_FLAG} is deprecated and ignored when {_RAW_FLAG} is also provided."
)
else:
warnings.append(f"{_LEGACY_RAW_FLAG} is deprecated; treating it as {_RAW_FLAG}.")

return normalized, warnings


def _legacy_entrypoint_warning(program_name: str) -> str | None:
"""Return a warning when executed via legacy file-based entrypoint."""
if Path(program_name).name == "cli_crawler.py":
return "Legacy entrypoint detected. Prefer the canonical command: cli-crawler."
return None


def main() -> None:
"""Compatibility entrypoint that delegates to ``crawler.pipeline.main``."""
original_argv = sys.argv[:]
normalized_args, warnings = _normalize_legacy_args(original_argv[1:])
legacy_warning = _legacy_entrypoint_warning(original_argv[0])
if legacy_warning:
warnings.insert(0, legacy_warning)

for warning in warnings:
print(f"warning: {warning}", file=sys.stderr)

sys.argv = [original_argv[0], *normalized_args]
try:
_pipeline.main()
finally:
sys.argv = original_argv


def run_root_help(cli_name: str, config: CLIConfig | None = None) -> ExecutionResult:
Expand All @@ -26,8 +89,6 @@ def crawl_single(

def crawl_configured(config: CrawlerConfig, output_dir: str = "output") -> list[CLIMap]:
"""Compatibility helper for crawling all configured CLIs."""
from pathlib import Path

return crawl_all(config, Path(output_dir))


Expand Down
54 changes: 54 additions & 0 deletions tests/unit/test_cli_crawler_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Compatibility tests for legacy ``cli_crawler`` wrappers."""

from __future__ import annotations

import sys

import pytest

from crawler import cli_crawler


def test_main_maps_include_raw_to_raw(
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture[str],
) -> None:
"""Legacy ``--include-raw`` should map to canonical ``--raw``."""
monkeypatch.setattr(sys, "argv", ["cli_crawler.py", "git", "--include-raw"])
captured_argv: list[str] = []

def _fake_pipeline_main() -> None:
captured_argv.extend(sys.argv)

monkeypatch.setattr(cli_crawler._pipeline, "main", _fake_pipeline_main)

cli_crawler.main()

assert captured_argv == ["cli_crawler.py", "git", "--raw"]
stderr = capsys.readouterr().err
assert "cli-crawler" in stderr
assert "--include-raw is deprecated; treating it as --raw." in stderr


def test_main_prefers_raw_when_both_raw_flags_are_present(
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture[str],
) -> None:
"""When both flags are present, keep one canonical ``--raw`` only."""
monkeypatch.setattr(
sys,
"argv",
["cli_crawler.py", "git", "--raw", "--include-raw"],
)
captured_argv: list[str] = []

def _fake_pipeline_main() -> None:
captured_argv.extend(sys.argv)

monkeypatch.setattr(cli_crawler._pipeline, "main", _fake_pipeline_main)

cli_crawler.main()

assert captured_argv == ["cli_crawler.py", "git", "--raw"]
stderr = capsys.readouterr().err
assert "--include-raw is deprecated and ignored when --raw is also provided." in stderr
Loading