From 49a6978c77c8e0ab4b43e23c7463311198bf8670 Mon Sep 17 00:00:00 2001
From: mxyhi <imxyi@hotmail.com>
Date: Wed, 11 Mar 2026 19:51:21 +0800
Subject: [PATCH] feat: add local skill support

---
 .agents/skills/cli-anything/SKILL.md          |  36 +
 .../skills/cli-anything/agents/openai.yaml    |   3 +
 .../skills/cli-anything/references/HARNESS.md | 622 ++++++++++++++++++
 .../references/commands/cli-anything.md       | 126 ++++
 .../cli-anything/references/commands/list.md  | 239 +++++++
 .../references/commands/refine.md             | 106 +++
 .../cli-anything/references/commands/test.md  |  75 +++
 .../references/commands/validate.md           | 125 ++++
 .gitignore                                    |   7 +-
 README.md                                     |  80 ++-
 README_CN.md                                  |  79 ++-
 tests/test_skill_layout.py                    |  63 ++
 12 files changed, 1492 insertions(+), 69 deletions(-)
 create mode 100644 .agents/skills/cli-anything/SKILL.md
 create mode 100644 .agents/skills/cli-anything/agents/openai.yaml
 create mode 100644 .agents/skills/cli-anything/references/HARNESS.md
 create mode 100644 .agents/skills/cli-anything/references/commands/cli-anything.md
 create mode 100644 .agents/skills/cli-anything/references/commands/list.md
 create mode 100644 .agents/skills/cli-anything/references/commands/refine.md
 create mode 100644 .agents/skills/cli-anything/references/commands/test.md
 create mode 100644 .agents/skills/cli-anything/references/commands/validate.md
 create mode 100644 tests/test_skill_layout.py
diff --git a/.agents/skills/cli-anything/SKILL.md b/.agents/skills/cli-anything/SKILL.md
new file mode 100644
index 0000000..4b90af4
--- /dev/null
+++ b/.agents/skills/cli-anything/SKILL.md
@@ -0,0 +1,36 @@
+---
+name: cli-anything
+description: Build, refine, test, validate, or inventory agent-usable CLI harnesses for software codebases with the CLI-Anything methodology. Use this skill when a request involves turning a GUI app, desktop tool, web service SDK, or existing codebase into a stateful CLI under agent-harness/, or when extending an existing cli-anything-* harness.
+---
+
+# CLI-Anything
+
+## Overview
+
+Use this skill when your agent runtime supports local skills and you want to apply the CLI-Anything methodology without relying on slash-command plugins. It complements the existing plugin flow and uses the same source of truth: [`references/HARNESS.md`](references/HARNESS.md).
+
+## Core Workflow
+
+1. Choose the task mode first:
+   - Build a new harness from a source tree or repository: [`references/commands/cli-anything.md`](references/commands/cli-anything.md)
+   - Expand coverage of an existing harness: [`references/commands/refine.md`](references/commands/refine.md)
+   - Run or update tests for an existing harness: [`references/commands/test.md`](references/commands/test.md)
+   - Validate an implementation against the standard: [`references/commands/validate.md`](references/commands/validate.md)
+   - Inventory generated or installed harnesses: [`references/commands/list.md`](references/commands/list.md)
+2. Read [`references/HARNESS.md`](references/HARNESS.md) before changing code. It defines the architecture, testing, packaging, and backend integration rules.
+3. Preserve the generated layout `<software>/agent-harness/cli_anything/<software>/...` and package name `cli-anything-<software>`.
+4. Use the real software backend for rendering and export. Do not replace the target application with toy Python reimplementations.
+5. Reuse existing harnesses in this repository as examples before inventing a new structure.
+
+## Repository Examples
+
+- `anygen/agent-harness/` for cloud API backed workflows.
+- `gimp/agent-harness/`, `blender/agent-harness/`, and `inkscape/agent-harness/` for GUI-to-CLI mappings.
+- `libreoffice/agent-harness/` for document generation and real headless export.
+- `cli-anything-plugin/repl_skin.py` as the shared REPL presentation layer that generated harnesses can copy into `utils/repl_skin.py`.
+
+## Notes
+
+- The repository supports both the original plugin flow and the repo-local skill flow.
+- Do not assume slash commands such as `/cli-anything` exist in the current runtime. Translate the command references into normal agent execution steps.
+- If the request is only about using an already-generated CLI, prefer the installed `cli-anything-<software>` command instead of regenerating the harness.
diff --git a/.agents/skills/cli-anything/agents/openai.yaml b/.agents/skills/cli-anything/agents/openai.yaml
new file mode 100644
index 0000000..720b6df
--- /dev/null
+++ b/.agents/skills/cli-anything/agents/openai.yaml
@@ -0,0 +1,3 @@
+interface:
+  display_name: "CLI Anything"
+  short_description: "Turn software codebases into agent-usable CLIs"
diff --git a/.agents/skills/cli-anything/references/HARNESS.md b/.agents/skills/cli-anything/references/HARNESS.md
new file mode 100644
index 0000000..9b16737
--- /dev/null
+++ b/.agents/skills/cli-anything/references/HARNESS.md
@@ -0,0 +1,622 @@
+# Agent Harness: GUI-to-CLI for Open Source Software
+
+## Purpose
+
+This harness provides a standard operating procedure (SOP) and toolkit for coding
+agents (Claude Code, Codex, etc.) to build powerful, stateful CLI interfaces for
+open-source GUI applications. The goal: let AI agents operate software that was
+designed for humans, without needing a display or mouse.
+
+## General SOP: Turning Any GUI App into an Agent-Usable CLI
+
+### Phase 1: Codebase Analysis
+
+1. **Identify the backend engine** — Most GUI apps separate presentation from logic.
+   Find the core library/framework (e.g., MLT for Shotcut, ImageMagick for GIMP).
+2. **Map GUI actions to API calls** — Every button click, drag, and menu item
+   corresponds to a function call. Catalog these mappings.
+3. **Identify the data model** — What file formats does it use? How is project state
+   represented? (XML, JSON, binary, database?)
+4. **Find existing CLI tools** — Many backends ship their own CLI (`melt`, `ffmpeg`,
+   `convert`). These are building blocks.
+5. **Catalog the command/undo system** — If the app has undo/redo, it likely uses a
+   command pattern. These commands are your CLI operations.
+
+### Phase 2: CLI Architecture Design
+
+1. **Choose the interaction model**:
+   - **Stateful REPL** for interactive sessions (agents that maintain context)
+   - **Subcommand CLI** for one-shot operations (scripting, pipelines)
+   - **Both** (recommended) — a CLI that works in both modes
+
+2. **Define command groups** matching the app's logical domains:
+   - Project management (new, open, save, close)
+   - Core operations (the app's primary purpose)
+   - Import/Export (file I/O, format conversion)
+   - Configuration (settings, preferences, profiles)
+   - Session/State management (undo, redo, history, status)
+
+3. **Design the state model**:
+   - What must persist between commands? (open project, cursor position, selection)
+   - Where is state stored? (in-memory for REPL, file-based for CLI)
+   - How does state serialize? (JSON session files)
+
+4. **Plan the output format**:
+   - Human-readable (tables, colors) for interactive use
+   - Machine-readable (JSON) for agent consumption
+   - Both, controlled by `--json` flag
+
+### Phase 3: Implementation
+
+1. **Start with the data layer** — XML/JSON manipulation of project files
+2. **Add probe/info commands** — Let agents inspect before they modify
+3. **Add mutation commands** — One command per logical operation
+4. **Add the backend integration** — A `utils/<software>_backend.py` module that
+   wraps the real software's CLI. This module handles:
+   - Finding the software executable (`shutil.which()`)
+   - Invoking it with proper arguments (`subprocess.run()`)
+   - Error handling with clear install instructions if not found
+   - Example (LibreOffice):
+     ```python
+     # utils/lo_backend.py
+     def convert_odf_to(odf_path, output_format, output_path=None, overwrite=False):
+         lo = find_libreoffice()  # raises RuntimeError with install instructions
+         subprocess.run([lo, "--headless", "--convert-to", output_format, ...])
+         return {"output": final_path, "format": output_format, "method": "libreoffice-headless"}
+     ```
+5. **Add rendering/export** — The export pipeline calls the backend module.
+   Generate valid intermediate files, then invoke the real software for conversion.
+6. **Add session management** — State persistence, undo/redo
+7. **Add the REPL with unified skin** — Interactive mode wrapping the subcommands.
+   - Copy `repl_skin.py` from the plugin (`cli-anything-plugin/repl_skin.py`) into
+     `utils/repl_skin.py` in your CLI package
+   - Import and use `ReplSkin` for the REPL interface:
+     ```python
+     from cli_anything.<software>.utils.repl_skin import ReplSkin
+
+     skin = ReplSkin("<software>", version="1.0.0")
+     skin.print_banner()          # Branded startup box
+     pt_session = skin.create_prompt_session()  # prompt_toolkit with history + styling
+     line = skin.get_input(pt_session, project_name="my_project", modified=True)
+     skin.help(commands_dict)     # Formatted help listing
+     skin.success("Saved")        # ✓ green message
+     skin.error("Not found")      # ✗ red message
+     skin.warning("Unsaved")      # ⚠ yellow message
+     skin.info("Processing...")   # ● blue message
+     skin.status("Key", "value")  # Key-value status line
+     skin.table(headers, rows)    # Formatted table
+     skin.progress(3, 10, "...")  # Progress bar
+     skin.print_goodbye()         # Styled exit message
+     ```
+   - Make REPL the default behavior: use `invoke_without_command=True` on the main
+     Click group, and invoke the `repl` command when no subcommand is given:
+     ```python
+     @click.group(invoke_without_command=True)
+     @click.pass_context
+     def cli(ctx, ...):
+         ...
+         if ctx.invoked_subcommand is None:
+             ctx.invoke(repl, project_path=None)
+     ```
+   - This ensures `cli-anything-<software>` with no arguments enters the REPL
+
+### Phase 4: Test Planning (TEST.md - Part 1)
+
+**BEFORE writing any test code**, create a `TEST.md` file in the
+`agent-harness/cli_anything/<software>/tests/` directory. This file serves as your test plan and
+MUST contain:
+
+1. **Test Inventory Plan** — List planned test files and estimated test counts:
+   - `test_core.py`: XX unit tests planned
+   - `test_full_e2e.py`: XX E2E tests planned
+
+2. **Unit Test Plan** — For each core module, describe what will be tested:
+   - Module name (e.g., `project.py`)
+   - Functions to test
+   - Edge cases to cover (invalid inputs, boundary conditions, error handling)
+   - Expected test count
+
+3. **E2E Test Plan** — Describe the real-world scenarios to test:
+   - What workflows will be simulated?
+   - What real files will be generated/processed?
+   - What output properties will be verified?
+   - What format validations will be performed?
+
+4. **Realistic Workflow Scenarios** — Detail each multi-step workflow:
+   - **Workflow name**: Brief title
+   - **Simulates**: What real-world task (e.g., "photo editing pipeline",
+     "podcast production", "product render setup")
+   - **Operations chained**: Step-by-step operations
+   - **Verified**: What output properties will be checked
+
+This planning document ensures comprehensive test coverage before writing code.
+
+### Phase 5: Test Implementation
+
+Now write the actual test code based on the TEST.md plan:
+
+1. **Unit tests** (`test_core.py`) — Every core function tested in isolation with
+   synthetic data. No external dependencies.
+2. **E2E tests — intermediate files** (`test_full_e2e.py`) — Verify the project files
+   your CLI generates are structurally correct (valid XML, correct ZIP structure, etc.)
+3. **E2E tests — true backend** (`test_full_e2e.py`) — **MUST invoke the real software.**
+   Create a project, export via the actual software backend, and verify the output:
+   - File exists and size > 0
+   - Correct format (PDF magic bytes `%PDF-`, DOCX/XLSX/PPTX is valid ZIP/OOXML, etc.)
+   - Content verification where possible (CSV contains expected data, etc.)
+   - **Print artifact paths** so users can manually inspect: `print(f"\n  PDF: {path} ({size:,} bytes)")`
+   - **No graceful degradation** — if the software isn't installed, tests fail, not skip
+4. **Output verification** — **Don't trust that export works just because it exits
+   successfully.** Verify outputs programmatically:
+   - Magic bytes / file format validation
+   - ZIP structure for OOXML formats (DOCX, XLSX, PPTX)
+   - Pixel-level analysis for video/images (probe frames, compare brightness)
+   - Audio analysis (RMS levels, spectral comparison)
+   - Duration/format checks against expected values
+5. **CLI subprocess tests** — Test the installed CLI command as a real user/agent would.
+   The subprocess tests MUST also produce real final output (not just ODF intermediate).
+   Use the `_resolve_cli` helper to run the installed `cli-anything-<software>` command:
+   ```python
+   def _resolve_cli(name):
+       """Resolve installed CLI command; falls back to python -m for dev.
+
+       Set env CLI_ANYTHING_FORCE_INSTALLED=1 to require the installed command.
+       """
+       import shutil
+       force = os.environ.get("CLI_ANYTHING_FORCE_INSTALLED", "").strip() == "1"
+       path = shutil.which(name)
+       if path:
+           print(f"[_resolve_cli] Using installed command: {path}")
+           return [path]
+       if force:
+           raise RuntimeError(f"{name} not found in PATH. Install with: pip install -e .")
+       module = name.replace("cli-anything-", "cli_anything.") + "." + name.split("-")[-1] + "_cli"
+       print(f"[_resolve_cli] Falling back to: {sys.executable} -m {module}")
+       return [sys.executable, "-m", module]
+
+
+   class TestCLISubprocess:
+       CLI_BASE = _resolve_cli("cli-anything-<software>")
+
+       def _run(self, args, check=True):
+           return subprocess.run(
+               self.CLI_BASE + args,
+               capture_output=True, text=True,
+               check=check,
+           )
+
+       def test_help(self):
+           result = self._run(["--help"])
+           assert result.returncode == 0
+
+       def test_project_new_json(self, tmp_dir):
+           out = os.path.join(tmp_dir, "test.json")
+           result = self._run(["--json", "project", "new", "-o", out])
+           assert result.returncode == 0
+           data = json.loads(result.stdout)
+           # ... verify structure
+   ```
+
+   **Key rules for subprocess tests:**
+   - Always use `_resolve_cli("cli-anything-<software>")` — never hardcode
+     `sys.executable` or module paths directly
+   - Do NOT set `cwd` — installed commands must work from any directory
+   - Use `CLI_ANYTHING_FORCE_INSTALLED=1` in CI/release testing to ensure the
+     installed command (not a fallback) is being tested
+   - Test `--help`, `--json`, project creation, key commands, and full workflows
+
+6. **Round-trip test** — Create project via CLI, open in GUI, verify correctness
+7. **Agent test** — Have an AI agent complete a real task using only the CLI
+
+### Phase 6: Test Documentation (TEST.md - Part 2)
+
+After running all tests successfully, **append** to the existing TEST.md:
+
+1. **Test Results** — Paste the full `pytest -v --tb=no` output showing all tests
+   passing with their names and status
+2. **Summary Statistics** — Total tests, pass rate, execution time
+3. **Coverage Notes** — Any gaps or areas not covered by tests
+
+The TEST.md now serves as both the test plan (written before implementation) and
+the test results documentation (appended after execution), providing a complete
+record of the testing process.
+
+## Critical Lessons Learned
+
+### Use the Real Software — Don't Reimplement It
+
+**This is the #1 rule.** The CLI MUST call the actual software for rendering and
+export — not reimplement the software's functionality in Python.
+
+**The anti-pattern:** Building a Pillow-based image compositor to replace GIMP,
+or generating bpy scripts without ever calling Blender. This produces a toy that
+can't handle real workloads and diverges from the actual software's behavior.
+
+**The correct approach:**
+1. **Use the software's CLI/scripting interface** as the backend:
+   - LibreOffice: `libreoffice --headless --convert-to pdf/docx/xlsx/pptx`
+   - Blender: `blender --background --python script.py`
+   - GIMP: `gimp -i -b '(script-fu-console-eval ...)'`
+   - Inkscape: `inkscape --actions="..." --export-filename=...`
+   - Shotcut/Kdenlive: `melt project.mlt -consumer avformat:output.mp4`
+   - Audacity: `sox` for effects processing
+   - OBS: `obs-websocket` protocol
+
+2. **The software is a required dependency**, not optional. Add it to installation
+   instructions. The CLI is useless without the actual software.
+
+3. **Generate valid project/intermediate files** (ODF, MLT XML, .blend, SVG, etc.)
+   then hand them to the real software for rendering. Your CLI is a structured
+   command-line interface to the software, not a replacement for it.
+
+**Example — LibreOffice CLI export pipeline:**
+```python
+# 1. Build the document as a valid ODF file (our XML builder)
+odf_path = write_odf(tmp_path, doc_type, project)
+
+# 2. Convert via the REAL LibreOffice (not a reimplementation)
+subprocess.run([
+    "libreoffice", "--headless",
+    "--convert-to", "pdf",
+    "--outdir", output_dir,
+    odf_path,
+])
+# Result: a real PDF rendered by LibreOffice's full engine
+```
+
+### The Rendering Gap
+
+**This is the #2 pitfall.** Most GUI apps apply effects at render time via their
+engine. When you build a CLI that manipulates project files directly, you must also
+handle rendering — and naive approaches will silently drop effects.
+
+**The problem:** Your CLI adds filters/effects to the project file format. But when
+rendering, if you use a simple tool (e.g., ffmpeg concat demuxer), it reads raw
+media files and **ignores** all project-level effects. The output looks identical to
+the input. Users can't tell anything happened.
+
+**The solution — a filter translation layer:**
+1. **Best case:** Use the app's native renderer (`melt` for MLT projects). It reads
+   the project file and applies everything.
+2. **Fallback:** Build a translation layer that converts project-format effects into
+   the rendering tool's native syntax (e.g., MLT filters → ffmpeg `-filter_complex`).
+3. **Last resort:** Generate a render script the user can run manually.
+
+**Priority order for rendering:** native engine → translated filtergraph → script.
+
+### Filter Translation Pitfalls
+
+When translating effects between formats (e.g., MLT → ffmpeg), watch for:
+
+- **Duplicate filter types:** Some tools (ffmpeg) don't allow the same filter twice
+  in a chain. If your project has both `brightness` and `saturation` filters, and
+  both map to ffmpeg's `eq=`, you must **merge** them into a single `eq=brightness=X:saturation=Y`.
+- **Ordering constraints:** ffmpeg's `concat` filter requires **interleaved** stream
+  ordering: `[v0][a0][v1][a1][v2][a2]`, NOT grouped `[v0][v1][v2][a0][a1][a2]`.
+  The error message ("media type mismatch") is cryptic if you don't know this.
+- **Parameter space differences:** Effect parameters often use different scales.
+  MLT brightness `1.15` = +15%, but ffmpeg `eq=brightness=0.06` on a -1..1 scale.
+  Document every mapping explicitly.
+- **Unmappable effects:** Some effects have no equivalent in the render tool. Handle
+  gracefully (warn, skip) rather than crash.
+
+### Timecode Precision
+
+Non-integer frame rates (29.97fps = 30000/1001) cause cumulative rounding errors:
+
+- **Use `round()`, not `int()`** for float-to-frame conversion. `int(9000 * 29.97)`
+  truncates and loses frames; `round()` gets the right answer.
+- **Use integer arithmetic for timecode display.** Convert frames → total milliseconds
+  via `round(frames * fps_den * 1000 / fps_num)`, then decompose with integer
+  division. Avoid intermediate floats that drift over long durations.
+- **Accept ±1 frame tolerance** in roundtrip tests at non-integer FPS. Exact equality
+  is mathematically impossible.
+
+### Output Verification Methodology
+
+Never assume an export is correct just because it ran without errors. Verify:
+
+```python
+# Video: probe specific frames with ffmpeg
+# Frame 0 for fade-in (should be near-black)
+# Middle frames for color effects (compare brightness/saturation vs source)
+# Last frame for fade-out (should be near-black)
+
+# When comparing pixel values between different resolutions,
+# exclude letterboxing/pillarboxing (black padding bars).
+# A vertical video in a horizontal frame will have ~40% black pixels.
+
+# Audio: check RMS levels at start/end for fades
+# Compare spectral characteristics against source
+```
+
+### Testing Strategy
+
+Four test layers with complementary purposes:
+
+1. **Unit tests** (`test_core.py`): Synthetic data, no external dependencies. Tests
+   every function in isolation. Fast, deterministic, good for CI.
+2. **E2E tests — native** (`test_full_e2e.py`): Tests the project file generation
+   pipeline (ODF structure, XML content, format validation). Verifies the
+   intermediate files your CLI produces are correct.
+3. **E2E tests — true backend** (`test_full_e2e.py`): Invokes the **real software**
+   (LibreOffice, Blender, melt, etc.) to produce final output files (PDF, DOCX,
+   rendered images, videos). Verifies the output files:
+   - Exist and have size > 0
+   - Have correct format (magic bytes, ZIP structure, etc.)
+   - Contain expected content where verifiable
+   - **Print artifact paths** so users can manually inspect results
+4. **CLI subprocess tests** (in `test_full_e2e.py`): Invokes the installed
+   `cli-anything-<software>` command via `subprocess.run` to run the full workflow
+   end-to-end: create project → add content → export via real software → verify output.
+
+**No graceful degradation.** The real software MUST be installed. Tests must NOT
+skip or fake results when the software is missing — the CLI is useless without it.
+The software is a hard dependency, not optional.
+
+**Example — true E2E test for LibreOffice:**
+```python
+class TestWriterToPDF:
+    def test_rich_writer_to_pdf(self, tmp_dir):
+        proj = create_document(doc_type="writer", name="Report")
+        add_heading(proj, text="Quarterly Report", level=1)
+        add_table(proj, rows=3, cols=3, data=[...])
+
+        pdf_path = os.path.join(tmp_dir, "report.pdf")
+        result = export(proj, pdf_path, preset="pdf", overwrite=True)
+
+        # Verify the REAL output file
+        assert os.path.exists(result["output"])
+        assert result["file_size"] > 1000  # Not suspiciously small
+        with open(result["output"], "rb") as f:
+            assert f.read(5) == b"%PDF-"  # Validate format magic bytes
+        print(f"\n  PDF: {result['output']} ({result['file_size']:,} bytes)")
+
+
+class TestCLISubprocessE2E:
+    CLI_BASE = _resolve_cli("cli-anything-libreoffice")
+
+    def test_full_writer_pdf_workflow(self, tmp_dir):
+        proj_path = os.path.join(tmp_dir, "test.json")
+        pdf_path = os.path.join(tmp_dir, "output.pdf")
+        self._run(["document", "new", "-o", proj_path, "--type", "writer"])
+        self._run(["--project", proj_path, "writer", "add-heading", "-t", "Title"])
+        self._run(["--project", proj_path, "export", "render", pdf_path, "-p", "pdf", "--overwrite"])
+        assert os.path.exists(pdf_path)
+        with open(pdf_path, "rb") as f:
+            assert f.read(5) == b"%PDF-"
+```
+
+   Run tests in force-installed mode to guarantee the real command is used:
+   ```bash
+   CLI_ANYTHING_FORCE_INSTALLED=1 python3 -m pytest cli_anything/<software>/tests/ -v -s
+   ```
+   The `-s` flag shows the `[_resolve_cli]` print output confirming which backend
+   is being used and **prints artifact paths** for manual inspection.
+
+Real-world workflow test scenarios should include:
+- Multi-segment editing (YouTube-style cut/trim)
+- Montage assembly (many short clips)
+- Picture-in-picture compositing
+- Color grading pipelines
+- Audio mixing (podcast-style)
+- Heavy undo/redo stress testing
+- Save/load round-trips of complex projects
+- Iterative refinement (add, modify, remove, re-add)
+
+## Key Principles
+
+- **Use the real software** — The CLI MUST invoke the actual application for rendering
+  and export. Generate valid intermediate files (ODF, MLT XML, .blend, SVG), then hand
+  them to the real software. Never reimplement the rendering engine in Python.
+- **The software is a hard dependency** — Not optional, not gracefully degraded. If
+  LibreOffice isn't installed, `cli-anything-libreoffice` must error clearly, not
+  silently produce inferior output with a fallback library.
+- **Manipulate the native format directly** — Parse and modify the app's native project
+  files (MLT XML, ODF, SVG, etc.) as the data layer.
+- **Leverage existing CLI tools** — Use `libreoffice --headless`, `blender --background`,
+  `melt`, `ffmpeg`, `inkscape --actions`, `sox` as subprocesses for rendering.
+- **Verify rendering produces correct output** — See "The Rendering Gap" above.
+- **E2E tests must produce real artifacts** — PDF, DOCX, rendered images, videos.
+  Print output paths so users can inspect. Never test only the intermediate format.
+- **Fail loudly and clearly** — Agents need unambiguous error messages to self-correct.
+- **Be idempotent where possible** — Running the same command twice should be safe.
+- **Provide introspection** — `info`, `list`, `status` commands are critical for agents
+  to understand current state before acting.
+- **JSON output mode** — Every command should support `--json` for machine parsing.
+
+## Rules
+
+- **The real software MUST be a hard dependency.** The CLI must invoke the actual
+  software (LibreOffice, Blender, GIMP, etc.) for rendering and export. Do NOT
+  reimplement rendering in Python. Do NOT gracefully degrade to a fallback library.
+  If the software is not installed, the CLI must error with clear install instructions.
+- **Every `cli_anything/<software>/` directory MUST contain a `README.md`** that explains how to
+  install the software dependency, install the CLI, run tests, and shows basic usage.
+- **E2E tests MUST invoke the real software** and produce real output files (PDF, DOCX,
+  rendered images, videos). Tests must verify output exists, has correct format, and
+  print artifact paths so users can inspect results. Never test only intermediate files.
+- **Every export/render function MUST be verified** with programmatic output analysis
+  before being marked as working. "It ran without errors" is not sufficient.
+- **Every filter/effect in the registry MUST have a corresponding render mapping**
+  or be explicitly documented as "project-only (not rendered)".
+- **Test suites MUST include real-file E2E tests**, not just unit tests with synthetic
+  data. Format assumptions break constantly with real media.
+- **E2E tests MUST include subprocess tests** that invoke the installed
+  `cli-anything-<software>` command via `_resolve_cli()`. Tests must work against
+  the actual installed package, not just source imports.
+- **Every `cli_anything/<software>/tests/` directory MUST contain a `TEST.md`** documenting what the tests
+  cover, what realistic workflows are tested, and the full test results output.
+- **Every CLI MUST use the unified REPL skin** (`repl_skin.py`) for the interactive mode.
+  Copy `cli-anything-plugin/repl_skin.py` to `utils/repl_skin.py` and use `ReplSkin`
+  for the banner, prompt, help, messages, and goodbye. REPL MUST be the default behavior
+  when the CLI is invoked without a subcommand (`invoke_without_command=True`).
+
+## Directory Structure
+
+```
+<software>/
+└── agent-harness/
+    ├── <SOFTWARE>.md          # Project-specific analysis and SOP
+    ├── setup.py               # PyPI package configuration (Phase 7)
+    ├── cli_anything/          # Namespace package (NO __init__.py here)
+    │   └── <software>/        # Sub-package for this CLI
+    │       ├── __init__.py
+    │       ├── __main__.py    # python3 -m cli_anything.<software>
+    │       ├── README.md      # HOW TO RUN — required
+    │       ├── <software>_cli.py  # Main CLI entry point (Click + REPL)
+    │       ├── core/          # Core modules (one per domain)
+    │       │   ├── __init__.py
+    │       │   ├── project.py     # Project create/open/save/info
+    │       │   ├── ...            # Domain-specific modules
+    │       │   ├── export.py      # Render pipeline + filter translation
+    │       │   └── session.py     # Stateful session, undo/redo
+    │       ├── utils/         # Shared utilities
+    │       │   ├── __init__.py
+    │       │   ├── <software>_backend.py  # Backend: invokes the real software
+    │       │   └── repl_skin.py  # Unified REPL skin (copy from plugin)
+    │       └── tests/         # Test suites
+    │           ├── TEST.md        # Test documentation and results — required
+    │           ├── test_core.py   # Unit tests (synthetic data)
+    │           └── test_full_e2e.py # E2E tests (real files)
+    └── examples/              # Example scripts and workflows
+```
+
+**Critical:** The `cli_anything/` directory must NOT contain an `__init__.py`.
+This is what makes it a PEP 420 namespace package — multiple separately-installed
+PyPI packages can each contribute a sub-package under `cli_anything/` without
+conflicting. For example, `cli-anything-gimp` adds `cli_anything/gimp/` and
+`cli-anything-blender` adds `cli_anything/blender/`, and both coexist in the
+same Python environment.
+
+Note: This HARNESS.md is part of the cli-anything-plugin. Individual software directories reference this file — do NOT duplicate it.
+
+## Applying This to Other Software
+
+This same SOP applies to any GUI application:
+
+| Software | Backend CLI | Native Format | System Package | How the CLI Uses It |
+|----------|-------------|---------------|----------------|-------------------|
+| LibreOffice | `libreoffice --headless` | .odt/.ods/.odp (ODF ZIP) | `apt install libreoffice` | Generate ODF → convert to PDF/DOCX/XLSX/PPTX |
+| Blender | `blender --background --python` | .blend-cli.json | `apt install blender` | Generate bpy script → Blender renders to PNG/MP4 |
+| GIMP | `gimp -i -b '(script-fu ...)'` | .xcf | `apt install gimp` | Script-Fu commands → GIMP processes & exports |
+| Inkscape | `inkscape --actions="..."` | .svg (XML) | `apt install inkscape` | Manipulate SVG → Inkscape exports to PNG/PDF |
+| Shotcut/Kdenlive | `melt` or `ffmpeg` | .mlt (XML) | `apt install melt ffmpeg` | Build MLT XML → melt/ffmpeg renders video |
+| Audacity | `sox` | .aup3 | `apt install sox` | Generate sox commands → sox processes audio |
+| OBS Studio | `obs-websocket` | scene.json | `apt install obs-studio` | WebSocket API → OBS captures/records |
+
+**The software is a required dependency, not optional.** The CLI generates valid
+intermediate files (ODF, MLT XML, bpy scripts, SVG) and hands them to the real
+software for rendering. This is what makes the CLI actually useful — it's a
+command-line interface TO the software, not a replacement for it.
+
+The pattern is always the same: **build the data → call the real software → verify
+the output**.
+
+### Phase 7: PyPI Publishing and Installation
+
+After building and testing the CLI, make it installable and discoverable.
+
+All cli-anything CLIs use **PEP 420 namespace packages** under the shared
+`cli_anything` namespace. This allows multiple CLI packages to be installed
+side-by-side in the same Python environment without conflicts.
+
+1. **Structure the package** as a namespace package:
+   ```
+   agent-harness/
+   ├── setup.py
+   └── cli_anything/           # NO __init__.py here (namespace package)
+       └── <software>/         # e.g., gimp, blender, audacity
+           ├── __init__.py     # HAS __init__.py (regular sub-package)
+           ├── <software>_cli.py
+           ├── core/
+           ├── utils/
+           └── tests/
+   ```
+
+   The key rule: `cli_anything/` has **no** `__init__.py`. Each sub-package
+   (`gimp/`, `blender/`, etc.) **does** have `__init__.py`. This is what
+   enables multiple packages to contribute to the same namespace.
+
+2. **Create setup.py** in the `agent-harness/` directory:
+   ```python
+   from setuptools import setup, find_namespace_packages
+
+   setup(
+       name="cli-anything-<software>",
+       version="1.0.0",
+       packages=find_namespace_packages(include=["cli_anything.*"]),
+       install_requires=[
+           "click>=8.0.0",
+           "prompt-toolkit>=3.0.0",
+           # Add Python library dependencies here
+       ],
+       entry_points={
+           "console_scripts": [
+               "cli-anything-<software>=cli_anything.<software>.<software>_cli:main",
+           ],
+       },
+       python_requires=">=3.10",
+   )
+   ```
+
+   **Important details:**
+   - Use `find_namespace_packages`, NOT `find_packages`
+   - Use `include=["cli_anything.*"]` to scope discovery
+   - Entry point format: `cli_anything.<software>.<software>_cli:main`
+   - The **system package** (LibreOffice, Blender, etc.) is a **hard dependency**
+     that cannot be expressed in `install_requires`. Document it in README.md and
+     have the backend module raise a clear error with install instructions:
+     ```python
+     # In utils/<software>_backend.py
+     def find_<software>():
+         path = shutil.which("<software>")
+         if path:
+             return path
+         raise RuntimeError(
+             "<Software> is not installed. Install it with:\n"
+             "  apt install <software>   # Debian/Ubuntu\n"
+             "  brew install <software>  # macOS"
+         )
+     ```
+
+3. **All imports** use the `cli_anything.<software>` prefix:
+   ```python
+   from cli_anything.gimp.core.project import create_project
+   from cli_anything.gimp.core.session import Session
+   from cli_anything.blender.core.scene import create_scene
+   ```
+
+4. **Test local installation**:
+   ```bash
+   cd /root/cli-anything/<software>/agent-harness
+   pip install -e .
+   ```
+
+5. **Verify PATH installation**:
+   ```bash
+   which cli-anything-<software>
+   cli-anything-<software> --help
+   ```
+
+6. **Run tests against the installed command**:
+   ```bash
+   cd /root/cli-anything/<software>/agent-harness
+   CLI_ANYTHING_FORCE_INSTALLED=1 python3 -m pytest cli_anything/<software>/tests/ -v -s
+   ```
+   The output must show `[_resolve_cli] Using installed command: /path/to/cli-anything-<software>`
+   confirming subprocess tests ran against the real installed binary, not a module fallback.
+
+7. **Verify namespace works across packages** (when multiple CLIs installed):
+   ```python
+   import cli_anything.gimp
+   import cli_anything.blender
+   # Both resolve to their respective source directories
+   ```
+
+**Why namespace packages:**
+- Multiple CLIs coexist in the same Python environment without conflicts
+- Clean, organized imports under a single `cli_anything` namespace
+- Each CLI is independently installable/uninstallable via pip
+- Agents can discover all installed CLIs via `cli_anything.*`
+- Standard Python packaging — no hacks or workarounds
diff --git a/.agents/skills/cli-anything/references/commands/cli-anything.md b/.agents/skills/cli-anything/references/commands/cli-anything.md
new file mode 100644
index 0000000..57aeb2c
--- /dev/null
+++ b/.agents/skills/cli-anything/references/commands/cli-anything.md
@@ -0,0 +1,126 @@
+# cli-anything Command
+
+> Historical slash-command spec. In skill mode, follow the same workflow without requiring `/cli-anything` command support.
+
+Build a complete, stateful CLI harness for any GUI application.
+
+## CRITICAL: Read HARNESS.md First
+
+**Before doing anything else, you MUST read `../HARNESS.md`.** It defines the complete methodology, architecture standards, and implementation patterns. Every phase below follows HARNESS.md. Do not improvise — follow the harness specification.
+
+## Usage
+
+```bash
+/cli-anything <software-path-or-repo>
+```
+
+## Arguments
+
+- `<software-path-or-repo>` - **Required.** Either:
+  - A **local path** to the software source code (e.g., `/home/user/gimp`, `./blender`)
+  - A **GitHub repository URL** (e.g., `https://github.com/GNOME/gimp`, `github.com/blender/blender`)
+
+  If a GitHub URL is provided, the agent clones the repo locally first, then works on the local copy.
+
+  **Note:** Software names alone (e.g., "gimp") are NOT accepted. You must provide the actual source code path or repository URL so the agent can analyze the codebase.
+
+## What This Command Does
+
+This command implements the complete cli-anything methodology to build a production-ready CLI harness for any GUI application. **All phases follow the standards defined in HARNESS.md.**
+
+### Phase 0: Source Acquisition
+- If `<software-path-or-repo>` is a GitHub URL, clone it to a local working directory
+- Verify the local path exists and contains source code
+- Derive the software name from the directory name (e.g., `/home/user/gimp` -> `gimp`)
+
+### Phase 1: Codebase Analysis
+- Analyzes the local source code
+- Analyzes the backend engine and data model
+- Maps GUI actions to API calls
+- Identifies existing CLI tools
+- Documents the architecture
+
+### Phase 2: CLI Architecture Design
+- Designs command groups matching the app's domains
+- Plans the state model and output formats
+- Creates the software-specific SOP document (e.g., GIMP.md)
+
+### Phase 3: Implementation
+- Creates the directory structure: `agent-harness/cli_anything/<software>/core`, `utils`, `tests`
+- Implements core modules (project, session, export, etc.)
+- Builds the Click-based CLI with REPL support
+- Implements `--json` output mode for agent consumption
+- All imports use `cli_anything.<software>.*` namespace
+
+### Phase 4: Test Planning
+- Creates `TEST.md` with comprehensive test plan
+- Plans unit tests for all core modules
+- Plans E2E tests with real files
+- Designs realistic workflow scenarios
+
+### Phase 5: Test Implementation
+- Writes unit tests (`test_core.py`) - synthetic data, no external deps
+- Writes E2E tests (`test_full_e2e.py`) - real files, full pipeline
+- Implements workflow tests simulating real-world usage
+- Adds output verification (pixel analysis, format validation, etc.)
+- Adds `TestCLISubprocess` class with `_resolve_cli("cli-anything-<software>")`
+  that tests the installed command via subprocess (no hardcoded paths or CWD)
+
+### Phase 6: Test Documentation
+- Runs all tests with `pytest -v --tb=no`
+- Appends full test results to `TEST.md`
+- Documents test coverage and any gaps
+
+### Phase 7: PyPI Publishing and Installation
+- Creates `setup.py` with `find_namespace_packages(include=["cli_anything.*"])`
+- Package name: `cli-anything-<software>`, namespace: `cli_anything.<software>`
+- `cli_anything/` has NO `__init__.py` (PEP 420 namespace package)
+- Configures console_scripts entry point for PATH installation
+- Tests local installation with `pip install -e .`
+- Verifies CLI is available in PATH: `which cli-anything-<software>`
+
+## Output Structure
+
+```
+<software-name>/
+└── agent-harness/
+    ├── <SOFTWARE>.md          # Software-specific SOP
+    ├── setup.py               # PyPI package config (find_namespace_packages)
+    └── cli_anything/          # Namespace package (NO __init__.py)
+        └── <software>/        # Sub-package (HAS __init__.py)
+            ├── README.md          # Installation and usage guide
+            ├── <software>_cli.py  # Main CLI entry point
+            ├── core/              # Core modules
+            │   ├── project.py
+            │   ├── session.py
+            │   ├── export.py
+            │   └── ...
+            ├── utils/             # Utilities
+            └── tests/
+                ├── TEST.md        # Test plan and results
+                ├── test_core.py   # Unit tests
+                └── test_full_e2e.py # E2E tests
+```
+
+## Example
+
+```bash
+# Build a CLI for GIMP from local source
+/cli-anything /home/user/gimp
+
+# Build from a GitHub repo
+/cli-anything https://github.com/blender/blender
+```
+
+## Success Criteria
+
+The command succeeds when:
+1. All core modules are implemented and functional
+2. CLI supports both one-shot commands and REPL mode
+3. `--json` output mode works for all commands
+4. All tests pass (100% pass rate)
+5. Subprocess tests use `_resolve_cli()` and pass with `CLI_ANYTHING_FORCE_INSTALLED=1`
+6. TEST.md contains both plan and results
+7. README.md documents installation and usage
+8. setup.py is created and local installation works
+9. CLI is available in PATH as `cli-anything-<software>`
diff --git a/.agents/skills/cli-anything/references/commands/list.md b/.agents/skills/cli-anything/references/commands/list.md
new file mode 100644
index 0000000..55b49c4
--- /dev/null
+++ b/.agents/skills/cli-anything/references/commands/list.md
@@ -0,0 +1,239 @@
+# cli-anything:list Command
+
+> Historical slash-command spec. In skill mode, follow the same workflow without requiring `/cli-anything:list` command support.
+
+List all available CLI-Anything tools (installed and generated).
+
+## Usage
+
+```bash
+/cli-anything:list [--path <directory>] [--depth <n>] [--json]
+```
+
+## Options
+
+- `--path <directory>` - Directory to search for generated CLIs (default: current directory)
+- `--depth <n>` - Maximum recursion depth for scanning (default: unlimited). Use `0` for current directory only, `1` for one level deep, etc.
+- `--json` - Output in JSON format for machine parsing
+
+## What This Command Does
+
+Displays all CLI-Anything tools available in the system:
+
+### 1. Installed CLIs
+
+Uses `importlib.metadata` to find installed `cli-anything-*` packages:
+- Pattern: package name starts with `cli-anything-`
+- Extracts: software name, version, entry point
+
+```python
+from importlib.metadata import distributions
+
+installed = {}
+for dist in distributions():
+    name = dist.metadata.get("Name", "")
+    if name.startswith("cli-anything-"):
+        software = name.replace("cli-anything-", "")
+        version = dist.version
+        # Find executable via entry points or shutil.which
+        executable = shutil.which(f"cli-anything-{software}")
+        installed[software] = {
+            "status": "installed",
+            "version": version,
+            "executable": executable
+        }
+```
+
+### 2. Generated CLIs
+
+Uses `glob` to find local CLI directories:
+- Pattern: `**/agent-harness/cli_anything/*/__init__.py` (or depth-limited variant)
+- Extracts: software name, version (from setup.py), source path
+- Status: `generated`
+
+```python
+from pathlib import Path
+import glob
+import re
+
+search_path = args.get("path", ".")
+max_depth = args.get("depth", None)  # None means unlimited
+generated = {}
+
+def extract_version_from_setup(setup_path):
+    """Extract version from setup.py using regex."""
+    try:
+        content = Path(setup_path).read_text()
+        match = re.search(r'version\s*=\s*["\']([^"\']+)["\']', content)
+        return match.group(1) if match else None
+    except:
+        return None
+
+def build_glob_patterns(base_path, depth):
+    """Build list of glob patterns for depths 0 through max_depth.
+
+    Returns multiple patterns so that --depth 2 finds tools at depth 0, 1, AND 2.
+    """
+    base = Path(base_path)
+    suffix = "agent-harness/cli_anything/*/__init__.py"
+
+    if depth is None:
+        # Unlimited depth: use **
+        return [str(base / "**" / suffix)]
+
+    # Generate patterns for all depths from 0 to max_depth
+    patterns = []
+    for d in range(depth + 1):
+        if d == 0:
+            # depth 0: look in current directory
+            patterns.append(str(base / suffix))
+        else:
+            # depth N: look N levels deep
+            prefix = "/".join(["*"] * d)
+            patterns.append(str(base / prefix / suffix))
+    return patterns
+
+patterns = build_glob_patterns(search_path, max_depth)
+for pattern in patterns:
+    for init_file in glob.glob(pattern, recursive=True):
+    parts = Path(init_file).parts
+    # Find cli_anything/<software> pattern
+    for i, p in enumerate(parts):
+        if p == "cli_anything" and i + 1 < len(parts):
+            software = parts[i + 1]
+            # Get agent-harness directory as source
+            agent_harness_idx = parts.index("agent-harness") if "agent-harness" in parts else i - 1
+            source = str(Path(*parts[:agent_harness_idx + 2]))  # up to agent-harness
+            # Extract version from setup.py (setup.py is in agent-harness/, not cli_anything/)
+            setup_path = Path(*parts[:agent_harness_idx + 1]) / "setup.py"
+            version = extract_version_from_setup(setup_path)
+            generated[software] = {
+                "status": "generated",
+                "version": version,
+                "executable": None,
+                "source": source
+            }
+            break
+```
+
+### 3. Merge Results
+
+- Deduplicate by software name
+- If both installed and generated: show `installed` status with both paths
+- The `source` field shows where the generated code is (even for installed)
+
+## Output Formats
+
+### Table Format (default)
+
+```
+CLI-Anything Tools (found 5)
+
+Name            Status      Version   Source
+──────────────────────────────────────────────────────────────
+gimp            installed   1.0.0     ./gimp/agent-harness
+blender         installed   1.0.0     ./blender/agent-harness
+inkscape        generated   1.0.0     ./inkscape/agent-harness
+audacity        generated   1.0.0     ./audacity/agent-harness
+libreoffice     generated   1.0.0     ./libreoffice/agent-harness
+```
+
+### JSON Format (--json)
+
+```json
+{
+  "tools": [
+    {
+      "name": "gimp",
+      "status": "installed",
+      "version": "1.0.0",
+      "executable": "/usr/local/bin/cli-anything-gimp",
+      "source": "./gimp/agent-harness"
+    },
+    {
+      "name": "inkscape",
+      "status": "generated",
+      "version": "1.0.0",
+      "executable": null,
+      "source": "./inkscape/agent-harness"
+    }
+  ],
+  "total": 2,
+  "installed": 1,
+  "generated_only": 1
+}
+```
+
+## Error Handling
+
+| Scenario | Action |
+|----------|--------|
+| No CLIs found | Show "No CLI-Anything tools found" message |
+| Invalid --path | Show error: "Path not found: <path>" |
+| Permission denied | Skip directory, continue scanning, show warning |
+
+## Implementation Steps
+
+When this command is invoked, the agent should:
+
+1. **Parse arguments**
+   - Extract `--path` value (default: `.`)
+   - Extract `--depth` value (default: `None` for unlimited recursion)
+   - Extract `--json` flag (default: false)
+
+2. **Validate path exists**
+   - If `--path` specified and doesn't exist, show error and exit
+
+3. **Scan installed CLIs**
+   - Use `importlib.metadata.distributions()` to find all packages
+   - Filter for packages starting with `cli-anything-`
+   - Extract name, version, find executable path
+
+4. **Scan generated CLIs**
+   - Build glob pattern based on depth parameter
+   - Use `glob.glob(pattern, recursive=True)`
+   - Parse directory structure to extract software name
+   - Calculate relative path from current directory
+
+5. **Merge results**
+   - Create dict keyed by software name
+   - Prefer installed data when both exist
+   - Keep source path from generated if available
+
+6. **Format output**
+   - If `--json`: output JSON to stdout
+   - Otherwise: format as table with proper alignment
+
+7. **Print results**
+   - Show summary line with count
+   - Show table or JSON
+
+## Examples
+
+```bash
+# List all tools in current directory (unlimited depth)
+/cli-anything:list
+
+# List tools with depth limit (only scan 2 levels deep)
+/cli-anything:list --depth 2
+
+# List tools in current directory only (no recursion)
+/cli-anything:list --depth 0
+
+# List tools with JSON output
+/cli-anything:list --json
+
+# Search a specific directory with depth limit
+/cli-anything:list --path /projects/my-tools --depth 3
+
+# Combined
+/cli-anything:list --path ./output --depth 2 --json
+```
+
+## Notes
+
+- `--depth` controls how many directory levels to descend from the search path
+- Default depth is unlimited (`**` glob pattern)
+- CLI-Anything tools typically need at least 3-4 levels to find `agent-harness/cli_anything/software/__init__.py`
+- Relative paths are preferred for readability
+- The command should work without any external dependencies beyond Python stdlib
diff --git a/.agents/skills/cli-anything/references/commands/refine.md b/.agents/skills/cli-anything/references/commands/refine.md
new file mode 100644
index 0000000..f1b3281
--- /dev/null
+++ b/.agents/skills/cli-anything/references/commands/refine.md
@@ -0,0 +1,106 @@
+# cli-anything:refine Command
+
+> Historical slash-command spec. In skill mode, follow the same workflow without requiring `/cli-anything:refine` command support.
+
+Refine an existing CLI harness to improve coverage of the software's functions and usage patterns.
+
+## CRITICAL: Read HARNESS.md First
+
+**Before refining, read `../HARNESS.md`.** All new commands and tests must follow the same standards as the original build. HARNESS.md is the single source of truth for architecture, patterns, and quality requirements.
+
+## Usage
+
+```bash
+/cli-anything:refine <software-path> [focus]
+```
+
+## Arguments
+
+- `<software-path>` - **Required.** Local path to the software source code (e.g., `/home/user/gimp`, `./blender`). Must be the same source tree used during the original build.
+
+  **Note:** Only local paths are accepted. If you need to work from a GitHub repo, run the build workflow described in `cli-anything.md` first, then refine.
+
+- `[focus]` - **Optional.** A natural-language description of the functionality area to focus on. When provided, the agent skips broad gap analysis and instead targets the specified capability area.
+
+  Examples:
+  - `/cli-anything:refine /home/user/shotcut "vid-in-vid and picture-in-picture features"`
+  - `/cli-anything:refine /home/user/gimp "all batch processing and scripting filters"`
+  - `/cli-anything:refine /home/user/blender "particle systems and physics simulation"`
+  - `/cli-anything:refine /home/user/inkscape "path boolean operations and clipping"`
+
+  When `[focus]` is provided:
+  - Step 2 (Analyze Software Capabilities) narrows to only the specified area
+  - Step 3 (Gap Analysis) compares only the focused capabilities against current coverage
+  - The agent should still present findings before implementing, but scoped to the focus area
+
+## What This Command Does
+
+This command is used **after** a CLI harness has already been built with the main CLI-Anything workflow. It analyzes gaps between the software's full capabilities and what the current CLI covers, then iteratively expands coverage. If a `[focus]` is given, the agent narrows its analysis and implementation to that specific functionality area.
+
+### Step 1: Inventory Current Coverage
+- Read the existing CLI entry point (`<software>_cli.py`) and all core modules
+- List every command, subcommand, and option currently implemented
+- Read the existing test suite to understand what's tested
+- Build a coverage map: `{ function_name: covered | not_covered }`
+
+### Step 2: Analyze Software Capabilities
+- Re-scan the software source at `<software-path>`
+- Identify all public APIs, CLI tools, scripting interfaces, and batch-mode operations
+- Focus on functions that produce observable output (renders, exports, transforms, conversions)
+- Categorize by domain (e.g., for GIMP: filters, color adjustments, layer ops, selection tools)
+
+### Step 3: Gap Analysis
+- Compare current CLI coverage against the software's full capability set
+- Prioritize gaps by:
+  1. **High impact** — commonly used functions missing from the CLI
+  2. **Easy wins** — functions with simple APIs that can be wrapped quickly
+  3. **Composability** — functions that unlock new workflows when combined with existing commands
+- Present the gap report to the user and confirm which gaps to address
+
+### Step 4: Implement New Commands
+- Add new commands/subcommands to the CLI for the selected gaps
+- Follow the same patterns as existing commands (as defined in HARNESS.md):
+  - Click command groups
+  - `--json` output support
+  - Session state integration
+  - Error handling with `handle_error`
+- Add corresponding core module functions in `core/` or `utils/`
+
+### Step 5: Expand Tests
+- Add unit tests for every new function in `test_core.py`
+- Add E2E tests for new commands in `test_full_e2e.py`
+- Add workflow tests that combine new commands with existing ones
+- Run all tests (old + new) to ensure no regressions
+
+### Step 6: Update Documentation
+- Update `README.md` with new commands and usage examples
+- Update `TEST.md` with new test results
+- Update the SOP document (`<SOFTWARE>.md`) with new coverage notes
+
+## Example
+
+```bash
+# Broad refinement — agent finds gaps across all capabilities
+/cli-anything:refine /home/user/gimp
+
+# Focused refinement — agent targets a specific functionality area
+/cli-anything:refine /home/user/shotcut "vid-in-vid and picture-in-picture compositing"
+/cli-anything:refine /home/user/gimp "batch processing and Script-Fu filters"
+/cli-anything:refine /home/user/blender "particle systems and physics simulation"
+/cli-anything:refine /home/user/inkscape "path boolean operations and clipping masks"
+```
+
+## Success Criteria
+
+- All existing tests still pass (no regressions)
+- New commands follow the same architectural patterns (per HARNESS.md)
+- New tests achieve 100% pass rate
+- Coverage meaningfully improved (new functions exposed via CLI)
+- Documentation updated to reflect changes
+
+## Notes
+
+- Refine is incremental — run it multiple times to steadily expand coverage
+- Each run should focus on a coherent set of related functions rather than trying to cover everything at once
+- The agent should present the gap analysis before implementing, so the user can steer priorities
+- Refine never removes existing commands — it only adds or enhances
diff --git a/.agents/skills/cli-anything/references/commands/test.md b/.agents/skills/cli-anything/references/commands/test.md
new file mode 100644
index 0000000..d4681e1
--- /dev/null
+++ b/.agents/skills/cli-anything/references/commands/test.md
@@ -0,0 +1,75 @@
+# cli-anything:test Command
+
+> Historical slash-command spec. In skill mode, follow the same workflow without requiring `/cli-anything:test` command support.
+
+Run tests for a CLI harness and update TEST.md with results.
+
+## CRITICAL: Read HARNESS.md First
+
+**Before running tests, read `../HARNESS.md`.** It defines the test standards, expected structure, and what constitutes a passing test suite.
+
+## Usage
+
+```bash
+/cli-anything:test <software-path-or-repo>
+```
+
+## Arguments
+
+- `<software-path-or-repo>` - **Required.** Either:
+  - A **local path** to the software source code (e.g., `/home/user/gimp`, `./blender`)
+  - A **GitHub repository URL** (e.g., `https://github.com/GNOME/gimp`, `github.com/blender/blender`)
+
+  If a GitHub URL is provided, the agent clones the repo locally first, then works on the local copy.
+
+  The software name is derived from the directory name. The agent locates the CLI harness at `<repo-root>/<software-name>/agent-harness/`.
+
+## What This Command Does
+
+1. **Locates the CLI** - Finds the CLI harness based on the software path
+2. **Runs pytest** - Executes tests with `-v -s --tb=short`
+3. **Captures output** - Saves full test results
+4. **Verifies subprocess backend** - Confirms `[_resolve_cli] Using installed command:` appears in output
+5. **Updates TEST.md** - Appends results to the Test Results section
+6. **Reports status** - Shows pass/fail summary
+
+## Test Output Format
+
+The command appends to TEST.md:
+
+```markdown
+## Test Results
+
+Last run: 2024-03-05 14:30:00
+
+```
+[full pytest -v --tb=no output]
+```
+
+**Summary**: 103 passed in 3.05s
+```
+
+## Example
+
+```bash
+# Run all tests for GIMP CLI
+/cli-anything:test /home/user/gimp
+
+# Run tests for Blender from GitHub
+/cli-anything:test https://github.com/blender/blender
+```
+
+## Success Criteria
+
+- All tests pass (100% pass rate)
+- TEST.md is updated with full results
+- No test failures or errors
+- `[_resolve_cli]` output confirms installed command path
+
+## Failure Handling
+
+If tests fail:
+1. Shows which tests failed
+2. Does NOT update TEST.md (keeps previous passing results)
+3. Suggests fixes based on error messages
+4. Offers to re-run after fixes
diff --git a/.agents/skills/cli-anything/references/commands/validate.md b/.agents/skills/cli-anything/references/commands/validate.md
new file mode 100644
index 0000000..179ecb8
--- /dev/null
+++ b/.agents/skills/cli-anything/references/commands/validate.md
@@ -0,0 +1,125 @@
+# cli-anything:validate Command
+
+> Historical slash-command spec. In skill mode, follow the same workflow without requiring `/cli-anything:validate` command support.
+
+Validate a CLI harness against HARNESS.md standards and best practices.
+
+## CRITICAL: Read HARNESS.md First
+
+**Before validating, read `../HARNESS.md`.** It is the single source of truth for all validation checks below. Every check in this command maps to a requirement in HARNESS.md.
+
+## Usage
+
+```bash
+/cli-anything:validate <software-path-or-repo>
+```
+
+## Arguments
+
+- `<software-path-or-repo>` - **Required.** Either:
+  - A **local path** to the software source code (e.g., `/home/user/gimp`, `./blender`)
+  - A **GitHub repository URL** (e.g., `https://github.com/GNOME/gimp`, `github.com/blender/blender`)
+
+  If a GitHub URL is provided, the agent clones the repo locally first, then works on the local copy.
+
+  The software name is derived from the directory name. The agent locates the CLI harness at `<repo-root>/<software-name>/agent-harness/`.
+
+## What This Command Validates
+
+### 1. Directory Structure
+- `agent-harness/cli_anything/<software>/` exists (namespace sub-package)
+- `cli_anything/` has NO `__init__.py` (PEP 420 namespace package)
+- `<software>/` HAS `__init__.py` (regular sub-package)
+- `core/`, `utils/`, `tests/` subdirectories present
+- `setup.py` in agent-harness/ uses `find_namespace_packages`
+
+### 2. Required Files
+- `README.md` - Installation and usage guide
+- `<software>_cli.py` - Main CLI entry point
+- `core/project.py` - Project management
+- `core/session.py` - Undo/redo
+- `core/export.py` - Rendering/export
+- `tests/TEST.md` - Test plan and results
+- `tests/test_core.py` - Unit tests
+- `tests/test_full_e2e.py` - E2E tests
+- `../<SOFTWARE>.md` - Software-specific SOP
+
+### 3. CLI Implementation Standards
+- Uses Click framework
+- Has command groups (not flat commands)
+- Implements `--json` flag for machine-readable output
+- Implements `--project` flag for project file
+- Has `handle_error` decorator for consistent error handling
+- Has REPL mode
+- Has global session state
+
+### 4. Core Module Standards
+- `project.py` has: create, open, save, info, list_profiles
+- `session.py` has: Session class with undo/redo/snapshot
+- `export.py` has: render function and EXPORT_PRESETS
+- All modules have proper docstrings
+- All functions have type hints
+
+### 5. Test Standards
+- `TEST.md` has both plan (Part 1) and results (Part 2)
+- Unit tests use synthetic data only
+- E2E tests use real files
+- Workflow tests simulate real-world scenarios
+- `test_full_e2e.py` has a `TestCLISubprocess` class
+- `TestCLISubprocess` uses `_resolve_cli("cli-anything-<software>")` (no hardcoded paths)
+- `_resolve_cli` prints which backend is used and supports `CLI_ANYTHING_FORCE_INSTALLED`
+- Subprocess `_run` does NOT set `cwd` (installed commands work from any directory)
+- All tests pass (100% pass rate)
+
+### 6. Documentation Standards
+- `README.md` has: installation, usage, command reference, examples
+- `<SOFTWARE>.md` has: architecture analysis, command map, rendering gap assessment
+- No duplicate `HARNESS.md` (should reference plugin's HARNESS.md)
+- All commands documented with examples
+
+### 7. PyPI Packaging Standards
+- `setup.py` uses `find_namespace_packages(include=["cli_anything.*"])`
+- Package name follows `cli-anything-<software>` convention
+- Entry point: `cli-anything-<software>=cli_anything.<software>.<software>_cli:main`
+- `cli_anything/` has NO `__init__.py` (namespace package rule)
+- All imports use `cli_anything.<software>.*` prefix
+- Dependencies listed in install_requires
+- Python version requirement specified (>=3.10)
+
+### 8. Code Quality
+- No syntax errors
+- No import errors
+- Follows PEP 8 style
+- No hardcoded paths (uses relative paths or config)
+- Proper error handling (no bare `except:`)
+
+## Validation Report
+
+The command generates a detailed report:
+
+```
+CLI Harness Validation Report
+Software: gimp
+Path: <repo-root>/gimp/agent-harness/cli_anything/gimp
+
+Directory Structure (5/5 checks passed)
+Required Files (9/9 files present)
+CLI Implementation (7/7 standards met)
+Core Modules (5/5 standards met)
+Test Standards (10/10 standards met)
+Documentation (4/4 standards met)
+PyPI Packaging (7/7 standards met)
+Code Quality (5/5 checks passed)
+
+Overall: PASS (52/52 checks)
+```
+
+## Example
+
+```bash
+# Validate GIMP CLI
+/cli-anything:validate /home/user/gimp
+
+# Validate from GitHub repo
+/cli-anything:validate https://github.com/blender/blender
+```
diff --git a/.gitignore b/.gitignore
index 9710ea7..a9aec7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,11 @@
 !/README.md
 !/assets/
 !/.claude-plugin/
+!/.agents/
+!/.agents/skills/
+!/.agents/skills/cli-anything/
+!/tests/
+!/tests/test_skill_layout.py
 
 # Step 3: Allow cli-anything-plugin entirely
 !/cli-anything-plugin/
@@ -88,4 +93,4 @@
 !/assets/*.jpg
 
 assets/gen_typing_gif.py
-!README_CN.md
\ No newline at end of file
+!README_CN.md
diff --git a/README.md b/README.md
index 582669d..bf93b9b 100644
--- a/README.md
+++ b/README.md
@@ -54,34 +54,35 @@ CLI is the universal interface for both humans and AI agents:
 
 ### Prerequisites
 
-- **Claude Code** (with plugin support)
+- **An agent runtime that supports local skills** (Codex, Claude Code, or similar)
 - **Python 3.10+**
 - Target software installed (e.g., GIMP, Blender, LibreOffice, or your own application)
 
-### Step 1: Add the Marketplace
+### Step 1: Install via Plugin or Skill
 
-CLI-Anything is distributed as a Claude Code plugin marketplace hosted on GitHub.
+CLI-Anything still supports the original plugin flow, and now also exposes a repo-local skill under `.agents/skills/cli-anything`.
 
 ```bash
-# Add the CLI-Anything marketplace
+# Plugin path
 /plugin marketplace add HKUDS/CLI-Anything
-```
-
-### Step 2: Install the Plugin
-
-```bash
-# Install the cli-anything plugin from the marketplace
 /plugin install cli-anything
+
+# Skill path (additional option for runtimes with local skills)
+git clone https://github.com/HKUDS/CLI-Anything.git
+cd CLI-Anything
+mkdir -p ~/.agents/skills
+ln -s "$PWD/.agents/skills/cli-anything" ~/.agents/skills/cli-anything
 ```
 
-That's it. The plugin is now available in your Claude Code session.
+### Step 2: Invoke the Workflow
+
+If you are using the plugin, call `/cli-anything <software-path-or-repo>`. If you are using a local-skill runtime, ask your agent to use the `cli-anything` skill on the same target path. Both entrypoints follow the same methodology.
 
 ### Step 3: Build a CLI in One Command
 
 ```bash
-# /cli-anything <software-path-or-repo>
-# Generate a complete CLI for GIMP (all 7 phases)
-/cli-anything ./gimp
+# Example prompt to your agent
+Use the cli-anything skill to build a complete CLI harness for ./gimp.
 ```
 
 This runs the full pipeline:
@@ -109,19 +110,15 @@ cli-anything-gimp
 ```
 
 <details>
-<summary><strong>Alternative: Manual Installation</strong></summary>
+<summary><strong>Alternative: Skill-Only Installation</strong></summary>
 
-If you prefer not to use the marketplace:
+If your runtime does not support the plugin flow but does support local skills:
 
 ```bash
-# Clone the repo
 git clone https://github.com/HKUDS/CLI-Anything.git
-
-# Copy plugin to Claude Code plugins directory
-cp -r CLI-Anything/cli-anything-plugin ~/.claude/plugins/cli-anything
-
-# Reload plugins
-/reload-plugins
+cd CLI-Anything
+mkdir -p ~/.agents/skills
+ln -s "$PWD/.agents/skills/cli-anything" ~/.agents/skills/cli-anything
 ```
 
 </details>
@@ -407,11 +404,15 @@ TOTAL        1,458 passed  ✅   100% pass rate
 ```
 cli-anything/
 ├── 📄 README.md                          # You are here
+├── 🧠 .agents/skills/cli-anything/       # Additional local-skill entrypoint
+│   ├── SKILL.md                          # Triggering + workflow guidance
+│   ├── agents/openai.yaml                # Optional UI metadata
+│   └── references/                       # Skill-local SOP and command specs
 ├── 📁 assets/                            # Images and media
 │   ├── icon.png                          # Project icon
 │   └── teaser.png                        # Teaser figure
 │
-├── 🔌 cli-anything-plugin/               # The Claude Code plugin
+├── 🔌 cli-anything-plugin/               # Claude Code plugin wrapper
 │   ├── HARNESS.md                        # Methodology SOP (source of truth)
 │   ├── README.md                         # Plugin documentation
 │   ├── QUICKSTART.md                     # 5-minute getting started
@@ -553,18 +554,27 @@ The playbook distills key insights from successfully building all 9 diverse, pro
 
 ## 📦 Installation & Usage
 
-### For Plugin Users (Claude Code)
+### For Plugin Users
 
 ```bash
-# Add marketplace & install (recommended)
 /plugin marketplace add HKUDS/CLI-Anything
 /plugin install cli-anything
+/cli-anything <software-path-or-repo>
+```
+
+### For Skill Users
 
-# Build a CLI for any software with a codebase
-/cli-anything <software-name>
+```bash
+git clone https://github.com/HKUDS/CLI-Anything.git
+cd CLI-Anything
+ls .agents/skills/cli-anything
+mkdir -p ~/.agents/skills
+ln -s "$PWD/.agents/skills/cli-anything" ~/.agents/skills/cli-anything
 ```
 
-### For Generated CLIs
+Then ask your agent to use the `cli-anything` skill against a local source tree or repository URL. The skill uses the same methodology and output layout as the plugin flow.
+
+### Generated CLIs
 
 ```bash
 # Install any generated CLI
@@ -597,9 +607,9 @@ CLI_ANYTHING_FORCE_INSTALLED=1 python3 -m pytest cli_anything/<software>/tests/
 
 We welcome contributions! CLI-Anything is designed to be extensible:
 
-- **New software targets** — Use the plugin to generate a CLI for any software with a codebase, then submit your harness via [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md).
+- **New software targets** — Use the repo-local skill to generate a CLI for any software with a codebase, then submit your harness via [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md).
 - **Methodology improvements** — PRs to `HARNESS.md` that encode new lessons learned
-- **Plugin enhancements** — New commands, phase improvements, better validation
+- **Skill and workflow enhancements** — Better prompts, clearer references, and improved validation flows
 - **Test coverage** — More E2E scenarios, edge cases, workflow tests
 
 ### Roadmap
@@ -609,7 +619,7 @@ We welcome contributions! CLI-Anything is designed to be extensible:
 - [ ] Community-contributed CLI harnesses for internal/custom software
 - [ ] Integration with additional agent frameworks beyond Claude Code
 - [ ] Support packaging APIs for closed-source software and web services into CLIs
-- [ ] Produce SKILL.md alongside the CLI for agent skill discovery and orchestration
+- [x] Ship a repo-local `cli-anything` skill for agent skill discovery and orchestration
 
 ---
 
@@ -617,10 +627,12 @@ We welcome contributions! CLI-Anything is designed to be extensible:
 
 | Document | Description |
 |----------|-------------|
+| [`.agents/skills/cli-anything/SKILL.md`](.agents/skills/cli-anything/SKILL.md) | Additional skill entrypoint for runtimes that support local skills |
+| [`.agents/skills/cli-anything/references/HARNESS.md`](.agents/skills/cli-anything/references/HARNESS.md) | Skill-local copy of the methodology SOP |
 | [`cli-anything-plugin/HARNESS.md`](cli-anything-plugin/HARNESS.md) | The methodology SOP — single source of truth |
 | [`cli-anything-plugin/README.md`](cli-anything-plugin/README.md) | Plugin documentation — commands, options, phases |
-| [`cli-anything-plugin/QUICKSTART.md`](cli-anything-plugin/QUICKSTART.md) | 5-minute getting started guide |
-| [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md) | Distribution and publishing guide |
+| [`cli-anything-plugin/QUICKSTART.md`](cli-anything-plugin/QUICKSTART.md) | Plugin getting started guide |
+| [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md) | Harness distribution and publishing guide |
 
 Each generated harness also includes:
 - `<SOFTWARE>.md` — Architecture SOP specific to that application
diff --git a/README_CN.md b/README_CN.md
index 2f720c4..b13382f 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -54,34 +54,35 @@ CLI 是人类和 AI Agent 共通的万能接口：
 
 ### 环境要求
 
-- **Claude Code**（需支持插件）
+- **支持本地 skill 的 Agent 运行时**（Codex、Claude Code 等）
 - **Python 3.10+**
 - 目标软件已安装（如 GIMP、Blender、LibreOffice 或你自己的应用）
 
-### 第一步：添加插件市场
+### 第一步：通过插件或 Skill 接入
 
-CLI-Anything 以 Claude Code 插件市场的形式托管在 GitHub 上。
+CLI-Anything 保留原有插件接入方式，同时新增仓库内 skill，路径是 `.agents/skills/cli-anything`。
 
 ```bash
-# 添加 CLI-Anything 插件市场
+# 插件方式
 /plugin marketplace add HKUDS/CLI-Anything
-```
-
-### 第二步：安装插件
-
-```bash
-# 从市场安装 cli-anything 插件
 /plugin install cli-anything
+
+# Skill 方式（支持本地 skill 的运行时可选）
+git clone https://github.com/HKUDS/CLI-Anything.git
+cd CLI-Anything
+mkdir -p ~/.agents/skills
+ln -s "$PWD/.agents/skills/cli-anything" ~/.agents/skills/cli-anything
 ```
 
-搞定。插件已经在你的 Claude Code 会话中可用了。
+### 第二步：调用工作流
+
+如果你走插件方式，就调用 `/cli-anything <软件路径或仓库地址>`。如果你走本地 skill 方式，就让 Agent 对同一个目标路径使用 `cli-anything` skill。两条入口都遵循同一套方法论。
 
 ### 第三步：一行命令生成 CLI
 
 ```bash
-# /cli-anything <软件路径或仓库地址>
-# 为 GIMP 生成完整的 CLI（7 个阶段全自动）
-/cli-anything ./gimp
+# 给 Agent 的示例提示词
+使用 cli-anything skill，为 ./gimp 构建完整的 CLI harness。
 ```
 
 完整流水线自动执行：
@@ -110,19 +111,15 @@ cli-anything-gimp
 ```
 
 <details>
-<summary><strong>备选方案：手动安装</strong></summary>
+<summary><strong>备选方案：仅使用 Skill</strong></summary>
 
-如果你不想用插件市场：
+如果你的运行时不支持插件，但支持本地 skill：
 
 ```bash
-# 克隆仓库
 git clone https://github.com/HKUDS/CLI-Anything.git
-
-# 复制插件到 Claude Code 插件目录
-cp -r CLI-Anything/cli-anything-plugin ~/.claude/plugins/cli-anything
-
-# 重新加载插件
-/reload-plugins
+cd CLI-Anything
+mkdir -p ~/.agents/skills
+ln -s "$PWD/.agents/skills/cli-anything" ~/.agents/skills/cli-anything
 ```
 
 </details>
@@ -415,11 +412,15 @@ TOTAL        1,458 passed  ✅   100% pass rate
 cli-anything/
 ├── 📄 README.md                          # 英文文档
 ├── 📄 README_CN.md                       # 中文文档（你在这里）
+├── 🧠 .agents/skills/cli-anything/       # 新增的本地 skill 入口
+│   ├── SKILL.md                          # 触发条件与工作流说明
+│   ├── agents/openai.yaml                # 可选 UI 元数据
+│   └── references/                       # skill 内置 SOP 与命令规范
 ├── 📁 assets/                            # 图片和媒体文件
 │   ├── icon.png                          # 项目图标
 │   └── teaser.png                        # 概览图
 │
-├── 🔌 cli-anything-plugin/               # Claude Code 插件
+├── 🔌 cli-anything-plugin/               # Claude Code 插件壳
 │   ├── HARNESS.md                        # 方法论 SOP（唯一权威来源）
 │   ├── README.md                         # 插件文档
 │   ├── QUICKSTART.md                     # 5 分钟快速上手
@@ -427,7 +428,7 @@ cli-anything/
 │   ├── repl_skin.py                      # 统一 REPL 界面
 │   ├── commands/                         # 插件命令定义
 │   │   ├── cli-anything.md               # 主构建命令
-│   │   ├── build.md                      # 扩展已有 harness 覆盖面
+│   │   ├── refine.md                     # 扩展已有 harness 覆盖面
 │   │   ├── test.md                       # 测试运行器
 │   │   └── validate.md                   # 标准验证
 │   └── scripts/
@@ -561,17 +562,26 @@ HARNESS.md 是我们通过自动化 CLI 生成让任意软件变得 Agent 可用
 
 ## 📦 安装与使用
 
-### 插件用户（Claude Code）
+### 插件用户
 
 ```bash
-# 添加市场并安装（推荐）
 /plugin marketplace add HKUDS/CLI-Anything
 /plugin install cli-anything
+/cli-anything <软件路径或仓库地址>
+```
 
-# 为任何有代码库的软件生成 CLI
-/cli-anything <软件名>
+### Skill 用户
+
+```bash
+git clone https://github.com/HKUDS/CLI-Anything.git
+cd CLI-Anything
+ls .agents/skills/cli-anything
+mkdir -p ~/.agents/skills
+ln -s "$PWD/.agents/skills/cli-anything" ~/.agents/skills/cli-anything
 ```
 
+然后让 Agent 对本地源码树或仓库 URL 使用 `cli-anything` skill。它和插件方式共用同一套方法论与输出结构。
+
 ### 使用生成的 CLI
 
 ```bash
@@ -605,9 +615,9 @@ CLI_ANYTHING_FORCE_INSTALLED=1 python3 -m pytest cli_anything/<软件名>/tests/
 
 欢迎贡献！CLI-Anything 天然支持扩展：
 
-- **新的目标软件** — 用插件为任意有代码库的软件生成 CLI，然后通过 [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md) 提交你的成果。
+- **新的目标软件** — 用仓库内 skill 为任意有代码库的软件生成 CLI，然后通过 [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md) 提交你的成果。
 - **方法论改进** — 向 `HARNESS.md` 提 PR，把新的经验教训沉淀下来
-- **插件增强** — 新命令、阶段优化、更好的验证逻辑
+- **Skill 与工作流增强** — 更好的提示词、更清晰的 references、更强的验证链路
 - **测试覆盖** — 更多端到端场景、边界情况、工作流测试
 
 ---
@@ -616,10 +626,12 @@ CLI_ANYTHING_FORCE_INSTALLED=1 python3 -m pytest cli_anything/<软件名>/tests/
 
 | 文档 | 说明 |
 |-----|------|
+| [`.agents/skills/cli-anything/SKILL.md`](.agents/skills/cli-anything/SKILL.md) | 面向支持本地 skill 的运行时的新增入口 |
+| [`.agents/skills/cli-anything/references/HARNESS.md`](.agents/skills/cli-anything/references/HARNESS.md) | skill 内置的方法论 SOP 副本 |
 | [`cli-anything-plugin/HARNESS.md`](cli-anything-plugin/HARNESS.md) | 方法论 SOP — 唯一权威来源 |
 | [`cli-anything-plugin/README.md`](cli-anything-plugin/README.md) | 插件文档 — 命令、选项、阶段 |
-| [`cli-anything-plugin/QUICKSTART.md`](cli-anything-plugin/QUICKSTART.md) | 5 分钟快速上手 |
-| [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md) | 分发与发布指南 |
+| [`cli-anything-plugin/QUICKSTART.md`](cli-anything-plugin/QUICKSTART.md) | 插件快速上手 |
+| [`cli-anything-plugin/PUBLISHING.md`](cli-anything-plugin/PUBLISHING.md) | harness 分发与发布指南 |
 
 每个生成的 CLI 还包含：
 
@@ -668,4 +680,3 @@ MIT License — 可自由使用、修改和分发。
   <em>感谢访问 ✨ CLI-Anything！</em><br><br>
   <img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.CLI-Anything&style=for-the-badge&color=00d4ff" alt="Views">
 </p>
-
diff --git a/tests/test_skill_layout.py b/tests/test_skill_layout.py
new file mode 100644
index 0000000..26ac1a3
--- /dev/null
+++ b/tests/test_skill_layout.py
@@ -0,0 +1,63 @@
+from pathlib import Path
+import unittest
+
+
+ROOT = Path(__file__).resolve().parents[1]
+SKILL_DIR = ROOT / ".agents" / "skills" / "cli-anything"
+
+
+def _read(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+
+
+def _frontmatter_value(text: str, key: str) -> str | None:
+    if not text.startswith("---\n"):
+        return None
+
+    _, rest = text.split("---\n", 1)
+    frontmatter, _, _ = rest.partition("\n---\n")
+    for line in frontmatter.splitlines():
+        prefix = f"{key}:"
+        if line.startswith(prefix):
+            return line.removeprefix(prefix).strip()
+    return None
+
+
+class SkillLayoutTest(unittest.TestCase):
+    def test_cli_anything_skill_layout_exists(self) -> None:
+        self.assertTrue(SKILL_DIR.is_dir())
+        self.assertTrue((SKILL_DIR / "SKILL.md").is_file())
+        self.assertTrue((SKILL_DIR / "references" / "HARNESS.md").is_file())
+
+        commands_dir = SKILL_DIR / "references" / "commands"
+        self.assertTrue(commands_dir.is_dir())
+        for name in ["cli-anything.md", "refine.md", "test.md", "validate.md", "list.md"]:
+            self.assertTrue((commands_dir / name).is_file())
+
+    def test_skill_frontmatter_describes_triggering(self) -> None:
+        skill_text = _read(SKILL_DIR / "SKILL.md")
+
+        self.assertEqual(_frontmatter_value(skill_text, "name"), "cli-anything")
+        description = _frontmatter_value(skill_text, "description")
+        self.assertIsNotNone(description)
+        assert description is not None
+        self.assertTrue("skill" in description.lower() or "skills" in description.lower())
+        self.assertIn("cli", description.lower())
+        self.assertIn("agent", description.lower())
+
+    def test_readme_documents_plugin_and_skill_installation(self) -> None:
+        readme = _read(ROOT / "README.md")
+        readme_cn = _read(ROOT / "README_CN.md")
+
+        self.assertIn(".agents/skills/cli-anything", readme)
+        self.assertIn(".agents/skills/cli-anything", readme_cn)
+        self.assertIn("/plugin install cli-anything", readme)
+        self.assertIn("/plugin install cli-anything", readme_cn)
+        self.assertNotIn("Legacy Plugin Users", readme)
+        self.assertNotIn("Legacy 插件用户", readme_cn)
+        self.assertNotIn("preferred generic skill entrypoint", readme)
+        self.assertNotIn("首选的通用 skill 入口", readme_cn)
+
+
+if __name__ == "__main__":
+    unittest.main()