Skip to content

Commit 1e085ab

Browse files
authored
Merge pull request #23 from nsalvacao/worker/lote2-aud007-command-surface
fix: canonicalize legacy cli crawler command surface
2 parents 5ec2699 + 0192f75 commit 1e085ab

3 files changed

Lines changed: 124 additions & 100 deletions

File tree

cli_crawler.py

Lines changed: 5 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,12 @@
11
#!/usr/bin/env python3
2-
"""Universal CLI Help Crawler - OpenAPI for CLIs.
2+
"""Legacy compatibility script for ``cli-crawler``.
33
4-
Crawls CLI --help outputs and generates structured JSON maps
5-
that AI agents can use for precise command reasoning.
6-
"""
7-
8-
from __future__ import annotations
9-
10-
import argparse
11-
import logging
12-
import sys
13-
from pathlib import Path
14-
15-
from crawler.config import CLIConfig, CrawlerConfig, load_config
16-
from crawler.pipeline import crawl_all, crawl_cli
17-
18-
19-
def main() -> None:
20-
parser = argparse.ArgumentParser(
21-
description="Crawl CLI --help outputs and generate structured JSON maps",
22-
epilog="Examples:\n"
23-
" python cli_crawler.py git -o output/git.json\n"
24-
" python cli_crawler.py --config config.yaml --all\n"
25-
" python cli_crawler.py docker -v --include-raw\n",
26-
formatter_class=argparse.RawDescriptionHelpFormatter,
27-
)
28-
parser.add_argument("cli", nargs="?", help="CLI to crawl (e.g., git, docker)")
29-
parser.add_argument("--config", "-c", type=Path, help="Path to config YAML")
30-
parser.add_argument("--output", "-o", type=Path, help="Output file path")
31-
parser.add_argument(
32-
"--output-dir",
33-
type=Path,
34-
default=Path("./output"),
35-
help="Output directory (default: ./output)",
36-
)
37-
parser.add_argument("--all", action="store_true", help="Crawl all CLIs in config")
38-
parser.add_argument(
39-
"--include-raw", action="store_true", help="Include raw help text in main JSON"
40-
)
41-
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging")
42-
parser.add_argument("--strict", action="store_true", help="Fail on first parse error")
43-
parser.add_argument("--max-depth", type=int, help="Override max recursion depth")
44-
parser.add_argument("--timeout", type=int, help="Override timeout per command (seconds)")
45-
parser.add_argument("--list", action="store_true", help="List configured CLIs and exit")
46-
47-
args = parser.parse_args()
48-
49-
# Configure logging
50-
logging.basicConfig(
51-
level=logging.DEBUG if args.verbose else logging.INFO,
52-
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
53-
datefmt="%H:%M:%S",
54-
)
4+
Prefer invoking the canonical command directly:
555
56-
# Load config
57-
config: CrawlerConfig
58-
if args.config and args.config.exists():
59-
config = load_config(str(args.config))
60-
else:
61-
config = CrawlerConfig()
62-
63-
# List mode
64-
if args.list:
65-
if not config.clis:
66-
print("No CLIs configured. Use --config to specify a config file.")
67-
else:
68-
print(f"Configured CLIs ({len(config.clis)}):")
69-
for name, cfg in sorted(config.clis.items()):
70-
group = f" [{cfg.group}]" if cfg.group else ""
71-
env = f" (env: {cfg.environment})" if cfg.environment != "wsl" else ""
72-
print(f" {name}{group}{env}")
73-
return
74-
75-
# Crawl all CLIs
76-
if args.all:
77-
if not config.clis:
78-
print("No CLIs configured. Use --config to specify a config file.")
79-
sys.exit(1)
80-
crawl_all(config, args.output_dir, args.include_raw, args.strict)
81-
return
82-
83-
# Crawl single CLI
84-
if args.cli:
85-
cli_config = config.clis.get(args.cli, CLIConfig(name=args.cli))
86-
87-
# Apply CLI arg overrides
88-
if args.max_depth is not None:
89-
cli_config.max_depth = args.max_depth
90-
if args.timeout is not None:
91-
cli_config.timeout = args.timeout
92-
93-
output = args.output or args.output_dir / f"{args.cli}.json"
94-
crawl_cli(args.cli, cli_config, output, args.include_raw, args.strict)
95-
return
96-
97-
# No action specified
98-
parser.print_help()
99-
sys.exit(1)
6+
cli-crawler <cli_name> [options]
7+
"""
1008

9+
from crawler.cli_crawler import main
10110

10211
if __name__ == "__main__":
10312
main()

src/crawler/cli_crawler.py

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,74 @@
1-
"""Compatibility crawler entrypoint and basic help execution helpers (T013)."""
1+
"""Compatibility wrappers for legacy ``cli_crawler`` entrypoints."""
22

33
from __future__ import annotations
44

5+
import sys
6+
from pathlib import Path
7+
8+
from . import pipeline as _pipeline
59
from .config import CLIConfig, CrawlerConfig
610
from .executor import Executor
711
from .models import CLIMap, ExecutionResult
8-
from .pipeline import crawl_all, crawl_cli, main
12+
from .pipeline import crawl_all, crawl_cli
13+
14+
_RAW_FLAG = "--raw"
15+
_LEGACY_RAW_FLAG = "--include-raw"
16+
17+
18+
def _normalize_legacy_args(argv: list[str]) -> tuple[list[str], list[str]]:
19+
"""Normalize legacy flags to canonical ``cli-crawler`` arguments."""
20+
normalized: list[str] = []
21+
warnings: list[str] = []
22+
raw_enabled = False
23+
legacy_raw_seen = False
24+
25+
for arg in argv:
26+
if arg == _RAW_FLAG:
27+
if not raw_enabled:
28+
normalized.append(arg)
29+
raw_enabled = True
30+
elif arg == _LEGACY_RAW_FLAG:
31+
legacy_raw_seen = True
32+
if not raw_enabled:
33+
normalized.append(_RAW_FLAG)
34+
raw_enabled = True
35+
else:
36+
normalized.append(arg)
37+
38+
if legacy_raw_seen:
39+
if _RAW_FLAG in argv:
40+
warnings.append(
41+
f"{_LEGACY_RAW_FLAG} is deprecated and ignored when {_RAW_FLAG} is also provided."
42+
)
43+
else:
44+
warnings.append(f"{_LEGACY_RAW_FLAG} is deprecated; treating it as {_RAW_FLAG}.")
45+
46+
return normalized, warnings
47+
48+
49+
def _legacy_entrypoint_warning(program_name: str) -> str | None:
50+
"""Return a warning when executed via legacy file-based entrypoint."""
51+
if Path(program_name).name == "cli_crawler.py":
52+
return "Legacy entrypoint detected. Prefer the canonical command: cli-crawler."
53+
return None
54+
55+
56+
def main() -> None:
57+
"""Compatibility entrypoint that delegates to ``crawler.pipeline.main``."""
58+
original_argv = sys.argv[:]
59+
normalized_args, warnings = _normalize_legacy_args(original_argv[1:])
60+
legacy_warning = _legacy_entrypoint_warning(original_argv[0])
61+
if legacy_warning:
62+
warnings.insert(0, legacy_warning)
63+
64+
for warning in warnings:
65+
print(f"warning: {warning}", file=sys.stderr)
66+
67+
sys.argv = [original_argv[0], *normalized_args]
68+
try:
69+
_pipeline.main()
70+
finally:
71+
sys.argv = original_argv
972

1073

1174
def run_root_help(cli_name: str, config: CLIConfig | None = None) -> ExecutionResult:
@@ -26,8 +89,6 @@ def crawl_single(
2689

2790
def crawl_configured(config: CrawlerConfig, output_dir: str = "output") -> list[CLIMap]:
2891
"""Compatibility helper for crawling all configured CLIs."""
29-
from pathlib import Path
30-
3192
return crawl_all(config, Path(output_dir))
3293

3394

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Compatibility tests for legacy ``cli_crawler`` wrappers."""
2+
3+
from __future__ import annotations
4+
5+
import sys
6+
7+
import pytest
8+
9+
from crawler import cli_crawler
10+
11+
12+
def test_main_maps_include_raw_to_raw(
13+
monkeypatch: pytest.MonkeyPatch,
14+
capsys: pytest.CaptureFixture[str],
15+
) -> None:
16+
"""Legacy ``--include-raw`` should map to canonical ``--raw``."""
17+
monkeypatch.setattr(sys, "argv", ["cli_crawler.py", "git", "--include-raw"])
18+
captured_argv: list[str] = []
19+
20+
def _fake_pipeline_main() -> None:
21+
captured_argv.extend(sys.argv)
22+
23+
monkeypatch.setattr(cli_crawler._pipeline, "main", _fake_pipeline_main)
24+
25+
cli_crawler.main()
26+
27+
assert captured_argv == ["cli_crawler.py", "git", "--raw"]
28+
stderr = capsys.readouterr().err
29+
assert "cli-crawler" in stderr
30+
assert "--include-raw is deprecated; treating it as --raw." in stderr
31+
32+
33+
def test_main_prefers_raw_when_both_raw_flags_are_present(
34+
monkeypatch: pytest.MonkeyPatch,
35+
capsys: pytest.CaptureFixture[str],
36+
) -> None:
37+
"""When both flags are present, keep one canonical ``--raw`` only."""
38+
monkeypatch.setattr(
39+
sys,
40+
"argv",
41+
["cli_crawler.py", "git", "--raw", "--include-raw"],
42+
)
43+
captured_argv: list[str] = []
44+
45+
def _fake_pipeline_main() -> None:
46+
captured_argv.extend(sys.argv)
47+
48+
monkeypatch.setattr(cli_crawler._pipeline, "main", _fake_pipeline_main)
49+
50+
cli_crawler.main()
51+
52+
assert captured_argv == ["cli_crawler.py", "git", "--raw"]
53+
stderr = capsys.readouterr().err
54+
assert "--include-raw is deprecated and ignored when --raw is also provided." in stderr

0 commit comments

Comments
 (0)