Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CI

on:
push:
branches: [main, "001-cli-plugins-base"]
branches: [main, "001-cli-plugins-base", "candidate/**"]
pull_request:
branches: [main]
branches: [main, "candidate/**"]

jobs:
test:
Expand Down Expand Up @@ -47,3 +47,43 @@ jobs:

- name: Ruff format check
run: ruff format --check src/ tests/

package-release-gates:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install build tools
run: python -m pip install --upgrade pip build

- name: Build sdist and wheel
env:
SOURCE_DATE_EPOCH: "1704067200"
run: python -m build --sdist --wheel --outdir dist

- name: Validate build artifacts
run: |
set -euo pipefail
shopt -s nullglob
sdists=(dist/*.tar.gz)
wheels=(dist/*.whl)
test "${#sdists[@]}" -eq 1
test "${#wheels[@]}" -eq 1
ls -lh dist

- name: Install built wheel and run smoke import checks
run: |
set -euo pipefail
python -m venv .venv-smoke
. .venv-smoke/bin/activate
python -m pip install --upgrade pip
python -m pip install --no-deps dist/*.whl
python -c "import crawler, generator, config, lib; print('artifact smoke import OK')"
cli-crawler --help >/dev/null
generate-plugin --help >/dev/null
config-audit --help >/dev/null
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@

---

## Start Here (Canonical Docs Route)

If this is your first visit, **this `README.md` is the canonical onboarding path**.

Follow this order:

1. Stay in this file for context + Quick Start.
2. Use [`docs/README.md`](docs/README.md) as the docs index for deeper reading.
3. Open [`docs/CONTRIBUTING.md`](docs/CONTRIBUTING.md) when you are ready to contribute.

This keeps first-time navigation deterministic and avoids documentation dead-ends.

---

## Why This Exists

LLMs are strong at reasoning, but weak on precise, current CLI syntax unless the exact tool/version is in context.
Expand Down
101 changes: 5 additions & 96 deletions cli_crawler.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,12 @@
#!/usr/bin/env python3
"""Universal CLI Help Crawler - OpenAPI for CLIs.
"""Legacy compatibility script for ``cli-crawler``.

Crawls CLI --help outputs and generates structured JSON maps
that AI agents can use for precise command reasoning.
"""

from __future__ import annotations

import argparse
import logging
import sys
from pathlib import Path

from crawler.config import CLIConfig, CrawlerConfig, load_config
from crawler.pipeline import crawl_all, crawl_cli


def main() -> None:
parser = argparse.ArgumentParser(
description="Crawl CLI --help outputs and generate structured JSON maps",
epilog="Examples:\n"
" python cli_crawler.py git -o output/git.json\n"
" python cli_crawler.py --config config.yaml --all\n"
" python cli_crawler.py docker -v --include-raw\n",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("cli", nargs="?", help="CLI to crawl (e.g., git, docker)")
parser.add_argument("--config", "-c", type=Path, help="Path to config YAML")
parser.add_argument("--output", "-o", type=Path, help="Output file path")
parser.add_argument(
"--output-dir",
type=Path,
default=Path("./output"),
help="Output directory (default: ./output)",
)
parser.add_argument("--all", action="store_true", help="Crawl all CLIs in config")
parser.add_argument(
"--include-raw", action="store_true", help="Include raw help text in main JSON"
)
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging")
parser.add_argument("--strict", action="store_true", help="Fail on first parse error")
parser.add_argument("--max-depth", type=int, help="Override max recursion depth")
parser.add_argument("--timeout", type=int, help="Override timeout per command (seconds)")
parser.add_argument("--list", action="store_true", help="List configured CLIs and exit")

args = parser.parse_args()

# Configure logging
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
Prefer invoking the canonical command directly:

# Load config
config: CrawlerConfig
if args.config and args.config.exists():
config = load_config(str(args.config))
else:
config = CrawlerConfig()

# List mode
if args.list:
if not config.clis:
print("No CLIs configured. Use --config to specify a config file.")
else:
print(f"Configured CLIs ({len(config.clis)}):")
for name, cfg in sorted(config.clis.items()):
group = f" [{cfg.group}]" if cfg.group else ""
env = f" (env: {cfg.environment})" if cfg.environment != "wsl" else ""
print(f" {name}{group}{env}")
return

# Crawl all CLIs
if args.all:
if not config.clis:
print("No CLIs configured. Use --config to specify a config file.")
sys.exit(1)
crawl_all(config, args.output_dir, args.include_raw, args.strict)
return

# Crawl single CLI
if args.cli:
cli_config = config.clis.get(args.cli, CLIConfig(name=args.cli))

# Apply CLI arg overrides
if args.max_depth is not None:
cli_config.max_depth = args.max_depth
if args.timeout is not None:
cli_config.timeout = args.timeout

output = args.output or args.output_dir / f"{args.cli}.json"
crawl_cli(args.cli, cli_config, output, args.include_raw, args.strict)
return

# No action specified
parser.print_help()
sys.exit(1)
cli-crawler <cli_name> [options]
"""

from crawler.cli_crawler import main

if __name__ == "__main__":
main()
Loading
Loading