diff --git a/.mindspec/docs/domains/execution/architecture.md b/.mindspec/docs/domains/execution/architecture.md index 8628019..0c4c7d1 100644 --- a/.mindspec/docs/domains/execution/architecture.md +++ b/.mindspec/docs/domains/execution/architecture.md @@ -7,27 +7,29 @@ The `Executor` interface separates enforcement ("what") from execution ("how"): ``` -Workflow Layer Execution Layer -┌─────────────────┐ ┌──────────────────┐ -│ approve/ │──Executor──▶│ executor/git.go │ -│ complete/ │ interface │ (GitExecutor) │ -│ next/ │ │ │ -│ specinit/ │ │ gitutil/ │ -│ cleanup/ │ │ (low-level ops) │ -└─────────────────┘ └──────────────────┘ +Workflow Layer Execution Engine +┌─────────────────┐ ┌─────────────────────────────┐ +│ approve/ │──Executor──▶│ executor/mindspec_executor.go│ +│ complete/ │ interface │ (MindspecExecutor) │ +│ next/ │ │ │ +│ spec/ │ │ gitutil/ │ +│ cleanup/ │ │ (low-level ops) │ +└─────────────────┘ └─────────────────────────────┘ ``` -- **GitExecutor** — concrete implementation wrapping git+worktree operations +- **MindspecExecutor** — concrete implementation wrapping git+worktree operations (dispatches beads to worktrees, merges completed bead branches, finalizes specs) - **MockExecutor** — test double for enforcement testing without git side effects - **DI wiring** — `cmd/mindspec/root.go` has `newExecutor(root)` factory +The execution engine reads beads produced by the planning layer. Each bead is a self-contained work packet — requirements, context, dependencies, acceptance criteria — so a fresh agent can pick it up without session history. Beads are the substrate that makes the `Executor` interface pluggable: any orchestrator that can read a bead can dispatch work. + ### withWorkingDir Safety -Worktree removal and branch deletion require CWD to be outside the target worktree. `GitExecutor` uses `withWorkingDir(root, func)` to temporarily chdir to the repo root before destructive operations, then restores the original CWD. This prevents "cannot remove worktree: in use" errors. +Worktree removal and branch deletion require CWD to be outside the target worktree. `MindspecExecutor` uses `withWorkingDir(root, func)` to temporarily chdir to the repo root before destructive operations, then restores the original CWD. This prevents "cannot remove worktree: in use" errors. ### Function Injection for Testability -`GitExecutor` exposes function variables (`WorktreeRemoveFn`, `DeleteBranchFn`, `MergeBranchFn`, etc.) that can be replaced in tests. This avoids requiring a real git repository for unit tests while keeping the production code straightforward. +`MindspecExecutor` exposes function variables (`WorktreeRemoveFn`, `DeleteBranchFn`, `MergeBranchFn`, etc.) that can be replaced in tests. This avoids requiring a real git repository for unit tests while keeping the production code straightforward. ### Branch Conventions diff --git a/.mindspec/docs/domains/execution/interfaces.md b/.mindspec/docs/domains/execution/interfaces.md index b2dd885..ae34fc0 100644 --- a/.mindspec/docs/domains/execution/interfaces.md +++ b/.mindspec/docs/domains/execution/interfaces.md @@ -13,7 +13,7 @@ type Executor interface { FinalizeEpic(epicID, specID, specBranch string) (FinalizeResult, error) Cleanup(specID string, force bool) error - // Epic handoff (notification hook — no-op for GitExecutor) + // Epic handoff (notification hook — no-op for MindspecExecutor) HandoffEpic(epicID, specID string, beadIDs []string) error // Query methods @@ -26,7 +26,7 @@ type Executor interface { ### GitUtil Helpers (`internal/gitutil/gitutil.go`) -Low-level git operations used only by `GitExecutor`: +Low-level git operations used only by `MindspecExecutor`: | Function | Purpose | |:---------|:--------| @@ -49,5 +49,5 @@ Low-level git operations used only by `GitExecutor`: | Type | Package | Purpose | |:-----|:--------|:--------| -| `GitExecutor` | `internal/executor/git.go` | Production: real git+worktree operations | +| `MindspecExecutor` | `internal/executor/mindspec_executor.go` | Production: real git+worktree operations | | `MockExecutor` | `internal/executor/mock.go` | Testing: records calls, returns configured errors | diff --git a/.mindspec/docs/domains/execution/overview.md b/.mindspec/docs/domains/execution/overview.md index 2e80cde..82a7ce7 100644 --- a/.mindspec/docs/domains/execution/overview.md +++ b/.mindspec/docs/domains/execution/overview.md @@ -2,7 +2,7 @@ ## What This Domain Owns -The **execution** domain owns all git, worktree, and filesystem operations — the "how" layer that performs operations delegated by the workflow layer. +The **execution engine** owns all git, worktree, and filesystem operations — the "how" layer that implements operations delegated by the workflow layer. It dispatches beads to worktrees, executes code changes, merges results, and finalizes specs. - **Git operations** — branching, merging, diffstat, commit counting, push/PR creation - **Worktree lifecycle** — creating, removing, and switching between isolated workspaces @@ -14,6 +14,7 @@ The **execution** domain owns all git, worktree, and filesystem operations — t Execution does **not** own: - Lifecycle phase derivation or mode enforcement (workflow) - Approval gates or validation logic (workflow) +- Plan decomposition or quality assessment (workflow) - Beads integration or epic/bead queries (workflow) - CLI infrastructure or project health checks (core) @@ -23,9 +24,9 @@ Execution **receives** instructions from the workflow layer via the `Executor` i | Package | Purpose | |:--------|:--------| -| `internal/executor/` | `Executor` interface + `GitExecutor` + `MockExecutor` | -| `internal/gitutil/` | Low-level git helpers (branch, merge, PR, diffstat) | +| `internal/executor/` | `Executor` interface + `MindspecExecutor` (production) + `MockExecutor` (testing) | +| `internal/gitutil/` | Low-level git helpers (branch, merge, PR, diffstat) — used only by `MindspecExecutor` | ## Import Rule -Workflow packages (`internal/approve/`, `internal/complete/`, `internal/next/`, `internal/cleanup/`, `internal/specinit/`) MUST call `executor.Executor` methods. They MUST NOT import `internal/gitutil/` directly. This boundary is enforced by convention and checked by `mindspec doctor`. +Workflow packages (`internal/approve/`, `internal/complete/`, `internal/next/`, `internal/cleanup/`, `internal/spec/`) MUST call `executor.Executor` methods. They MUST NOT import `internal/gitutil/` directly. This boundary is enforced by convention and checked by `mindspec doctor`. diff --git a/.mindspec/docs/domains/workflow/architecture.md b/.mindspec/docs/domains/workflow/architecture.md index 97eeaab..ddd5f56 100644 --- a/.mindspec/docs/domains/workflow/architecture.md +++ b/.mindspec/docs/domains/workflow/architecture.md @@ -13,7 +13,11 @@ Each mode gates: - **Required context** — what must be reviewed before proceeding - **Transition gates** — what conditions must hold to advance -### Beads as Single State Store (ADR-0023) +### Beads as Substrate (ADR-0023) + +Beads is both the single state store and the **contract between the planning and execution layers**. Each bead is a self-contained work packet that encapsulates requirements, context (impacted domains, ADR citations), dependencies, and acceptance criteria. A fresh agent picking up a bead doesn't need session history — the bead carries everything it needs. + +This is what makes execution pluggable: any orchestrator that can read beads can dispatch work. The planning layer writes beads; the execution engine reads them. All lifecycle state is derived from Beads — no filesystem state files (no `focus`, no `lifecycle.yaml`): @@ -46,7 +50,19 @@ approve/impl.go ──▶ exec.FinalizeEpic() cleanup/ ──▶ exec.Cleanup() ``` -**Import rule**: Workflow packages (`approve/`, `complete/`, `next/`, `cleanup/`, `specinit/`) call `executor.Executor` methods. They MUST NOT import `internal/gitutil/` directly. +**Import rule**: Workflow packages (`approve/`, `complete/`, `next/`, `cleanup/`, `spec/`) call `executor.Executor` methods. They MUST NOT import `internal/gitutil/` directly. + +### Plan Quality Responsibility + +The workflow layer ensures plans are well-decomposed before handoff to the execution engine. This is critical because AI agents perform significantly better on well-structured, bitesize tasks than on vague or monolithic ones (see [arXiv:2512.08296](https://arxiv.org/abs/2512.08296)). + +Workflow enforces: +- **Bead decomposition** — each bead must be a focused, independently completable unit of work +- **Clear acceptance criteria** — every bead has verifiable completion conditions +- **Dependency ordering** — beads declare dependencies so the execution engine dispatches them in the right order +- **Validation gates** — `internal/validate/` checks structural requirements and ADR compliance before plan approval + +The execution engine trusts that approved plans are well-decomposed and simply executes them — it does not assess plan quality. ### ADR Governance diff --git a/.mindspec/docs/domains/workflow/overview.md b/.mindspec/docs/domains/workflow/overview.md index 42ccdfc..4cc1e5d 100644 --- a/.mindspec/docs/domains/workflow/overview.md +++ b/.mindspec/docs/domains/workflow/overview.md @@ -29,7 +29,7 @@ Workflow **uses** context packs (from context-system) to provide mode-appropriat | `internal/approve/` | Spec, plan, and impl approval enforcement | | `internal/complete/` | Bead close-out orchestration | | `internal/next/` | Work selection, claiming, worktree dispatch | -| `internal/specinit/` | Spec creation (worktree-first flow) | +| `internal/spec/` | Spec creation (worktree-first flow) | | `internal/cleanup/` | Post-lifecycle worktree/branch cleanup | | `internal/phase/` | Phase derivation from beads (ADR-0023) | | `internal/resolve/` | Target spec resolution and prefix matching | diff --git a/.mindspec/docs/specs/081-executor-docs-tests/plan.md b/.mindspec/docs/specs/081-executor-docs-tests/plan.md new file mode 100644 index 0000000..63bf27b --- /dev/null +++ b/.mindspec/docs/specs/081-executor-docs-tests/plan.md @@ -0,0 +1,235 @@ +--- +approved_at: "2026-03-10T21:18:25Z" +approved_by: user +bead_ids: + - mindspec-4ya5.1 + - mindspec-4ya5.2 + - mindspec-4ya5.3 + - mindspec-4ya5.4 + - mindspec-qszb +last_updated: "2026-03-10" +spec_id: 081-executor-docs-tests +status: Approved +version: 1 +--- +# Plan: 081-executor-docs-tests + +## ADR Fitness + +- **ADR-0023** (Beads as single state authority): Documentation updates reference beads as the foundation for decomposition and state tracking +- **ADR-0006** (Protected main with PR-based merging): Execution layer docs reference the branch/merge strategy + +## Testing Strategy + +- Beads 1-2 (renames): `make build` + `go test ./...` + `go vet ./...` catch all breakage +- Bead 3 (docs): Grep-based validation proofs confirm no stale terminology +- Bead 4 (test audit): Run `TestLLM_SingleBead` as smoke test; document all 18 scenario findings +- Bead 5 (stop behavior): Fix instruct templates + CLI output to prevent agent auto-proceeding + +## Bead 1: Rename GitExecutor → MindspecExecutor + purge gitops + +Mechanical rename of the executor struct, constructor, and source file. Also purges the last `gitops` reference in live Go code. + +**Steps** + +1. `git mv internal/executor/git.go internal/executor/mindspec_executor.go` +2. In `mindspec_executor.go`: rename struct `GitExecutor` → `MindspecExecutor`, constructor `NewGitExecutor` → `NewMindspecExecutor` +3. In `executor_test.go`: update all `GitExecutor`/`NewGitExecutor` references +4. In `cmd/mindspec/root.go`: update `newExecutor()` to call `NewMindspecExecutor` +5. In `internal/adr/store_test.go`: replace `gitops` test fixture tag with `execution` +6. Update comment in `mindspec_executor.go` referencing `specinit` → `spec` + +**Acceptance Criteria** + +- [ ] Zero grep hits for `GitExecutor`, `NewGitExecutor`, and `gitops` in `internal/` and `cmd/` Go files +- [ ] `make build` succeeds and `go test ./internal/executor/... -v` passes +- [ ] `go vet ./...` clean + +**Verification** + +- [ ] `grep -r "GitExecutor" internal/ cmd/` → zero hits +- [ ] `grep -r "NewGitExecutor" internal/ cmd/` → zero hits +- [ ] `grep -rn "gitops" --include="*.go" internal/` → zero hits +- [ ] `make build` → exit 0 +- [ ] `go test ./internal/executor/... -v` → all pass +- [ ] `go vet ./...` → clean + +**Depends on** + +None + +## Bead 2: Rename `internal/specinit/` → `internal/spec/` + +Package rename with import path updates. Also renames source files for clarity. + +**Steps** + +1. `git mv internal/specinit/ internal/spec/` and rename `specinit.go` → `create.go`, `specinit_test.go` → `create_test.go` +2. Update package declarations: `package specinit` → `package spec` +3. Update imports and call sites in `cmd/mindspec/spec.go` and `cmd/mindspec/spec_init.go` (`specinit.Run` → `spec.Run`) +4. Update all comments referencing `specinit` in `internal/executor/mindspec_executor.go`, `internal/lifecycle/scenario_test.go`, and `cmd/mindspec/root.go` +5. Verify no stale references remain + +**Acceptance Criteria** + +- [ ] Zero grep hits for `specinit` in Go source files under `internal/` and `cmd/` +- [ ] `internal/spec/create.go` exists with `package spec` +- [ ] `make build` succeeds and `go test ./internal/spec/... -v` passes + +**Verification** + +- [ ] `grep -rn "specinit" --include="*.go" internal/ cmd/` → zero hits +- [ ] `ls internal/spec/create.go` → exists +- [ ] `make build` → exit 0 +- [ ] `go test ./internal/spec/... -v` → all pass +- [ ] `go test ./cmd/mindspec/... -v` → all pass +- [ ] `go vet ./...` → clean + +**Depends on** + +Bead 1 (file `mindspec_executor.go` must exist before updating its comments) + +## Bead 3: Architecture documentation overhaul + +Rewrite documentation to clearly articulate the two-layer architecture and reflect new naming. + +**Steps** + +1. **AGENTS.md** §138–147: Rewrite "Architecture: Workflow/Execution Boundary": + - Workflow layer: spec creation, plan decomposition into bitesize beads, validation against architecture (ADRs, domain boundaries), quality gates (tests, acceptance criteria), phase enforcement + - Execution engine (`MindspecExecutor`): dispatching beads to worktrees, implementing code changes, merging results, finalizing the spec + - Reference arXiv:2512.08296 for decomposition quality rationale + - Update package lists to use `internal/spec/` and `MindspecExecutor` + +2. **`.mindspec/docs/domains/execution/overview.md`**: Update key packages table, refine "what this domain owns" with execution engine framing + +3. **`.mindspec/docs/domains/execution/architecture.md`**: `GitExecutor` → `MindspecExecutor` throughout + +4. **`.mindspec/docs/domains/execution/interfaces.md`**: Update implementation names + +5. **`.mindspec/docs/domains/workflow/overview.md`**: `specinit` → `spec` in key packages table + +6. **`.mindspec/docs/domains/workflow/architecture.md`**: Add plan quality responsibility section — workflow layer ensures beads are well-decomposed, reviewed, have clear acceptance criteria before handoff to execution engine + +7. **Classify `mindspec next` and `mindspec complete` as execution layer commands** in AGENTS.md and domain docs — they create/destroy worktrees and manage branch topology, which is execution concern. The workflow layer (approve commands) decides *when* transitions happen; the execution layer (next/complete) performs them. + +8. **Auto-memory** (`MEMORY.md`): Update `GitExecutor` → `MindspecExecutor`, `specinit` → `spec` + +**Acceptance Criteria** + +- [ ] Zero grep hits for `GitExecutor` and `specinit` in AGENTS.md and `.mindspec/docs/domains/` +- [ ] AGENTS.md architecture section describes workflow layer (decomposition, validation, quality gates) and execution engine (implementation, merging, finalization) +- [ ] `go test ./internal/executor/... -v` still passes (no code changes, but confirms docs didn't break anything) + +**Verification** + +- [ ] `grep -rn "GitExecutor" .mindspec/docs/ AGENTS.md` → zero hits (excluding historical spec 077) +- [ ] `grep -rn "specinit" AGENTS.md .mindspec/docs/domains/` → zero hits +- [ ] AGENTS.md architecture section clearly describes both layers +- [ ] Execution domain docs reference `MindspecExecutor` +- [ ] Workflow domain docs describe plan quality responsibility +- [ ] `go test ./internal/executor/... -v` → all pass (regression check) + +**Depends on** + +Beads 1-2 (docs must reference final names) + +## Bead 4: LLM test scenario audit + +Review all 18 scenarios for correctness. Document findings in HISTORY.md. Fix any broken expectations. + +**Steps** + +1. Read every scenario in `internal/harness/scenario.go` — prompts, assertions, setup, expected behavior +2. Cross-reference with `implement.md` template (lines 49, 94: STOP after complete) +3. For each scenario assess: prompt validity, assertion correctness, MaxTurns/timeout realism +4. Scrutinize specifically: + - **SpecToIdle**: 100 turns for full manual lifecycle + - **MultiBeadDeps**: Expects explicit `mindspec next` + - **BlockedBeadTransition**: Mode→plan when only blocked beads remain + - **UnmergedBeadGuard**: Recovery flow after close-without-complete +5. Write "Test Audit (Spec 081)" section in `internal/harness/HISTORY.md` with per-scenario findings +6. Fix any broken test expectations +7. Run `TestLLM_SingleBead` as smoke test +8. **Create stop-behavior LLM test scenarios** — new tests that verify: + - After `mindspec approve plan`, agent STOPS (does not auto-implement or write code) + - After `mindspec complete`, agent STOPS (does not auto-claim or run `mindspec next`) + - Agent uses `mindspec next` to create bead worktree (not working on spec branch directly) + These tests are expected to **fail before Bead 5** (which fixes the guidance). Run them to establish the baseline, document failures in HISTORY.md. Bead 5 re-runs them as verification. + +**Acceptance Criteria** + +- [ ] HISTORY.md contains "Test Audit (Spec 081)" section with findings for all 18 scenarios +- [ ] Any outdated test expectations fixed +- [ ] `TestLLM_SingleBead` smoke test passes +- [ ] Stop-behavior test scenarios created (`TestLLM_StopAfterPlanApprove`, `TestLLM_StopAfterComplete`) +- [ ] Baseline results documented in HISTORY.md (expected failures before Bead 5) + +**Verification** + +- [ ] HISTORY.md contains "Test Audit (Spec 081)" section covering all 18 scenarios +- [ ] Any outdated expectations fixed (if found) +- [ ] `env -u CLAUDECODE go test ./internal/harness/ -v -run TestLLM_SingleBead -timeout 10m` → passes +- [ ] Stop-behavior tests exist and run (failures expected — baseline captured) + +**Depends on** + +Beads 1-2 (code references in scenarios should use new names if applicable) + +## Bead 5: Harden phase-transition stop behavior + +Fix observed failure: agent auto-proceeded after plan approval and worked on the spec branch instead of using `mindspec next` to create a bead worktree. Root causes: outdated instruct template, insufficiently emphatic CLI output. + +**Steps** + +1. **Fix `internal/instruct/templates/plan.md`**: Remove false claim "This will approve the plan AND automatically claim the first bead." Replace with clear guidance: after plan approval, STOP, run `/clear`, then `mindspec next`. +2. **Strengthen plan approve output in `cmd/mindspec/plan_cmd.go`**: Make STOP instruction unmissable with clear separator (e.g., `⛔ STOP` or `---` banner). Emphasize: do NOT proceed, run `/clear` first, then `mindspec next`. +3. **Strengthen `mindspec complete` output in `internal/complete/complete.go` `FormatResult()`**: When reporting "Next bead ready: X", append explicit instruction: "STOP. Run `/clear`, then `mindspec next` to claim it." +4. **Remove dead `--no-next` flag** from `cmd/mindspec/approve.go`. +5. **Update `implement.md` template** if needed — verify STOP guidance is clear and consistent with the CLI output changes. + +**Acceptance Criteria** + +- [ ] `plan.md` template no longer says "automatically claim the first bead" +- [ ] Plan approve output includes emphatic STOP + `/clear` + `mindspec next` instructions +- [ ] `mindspec complete` output includes STOP instruction when next bead is ready +- [ ] `--no-next` flag removed from `approve.go` +- [ ] `make build` succeeds and `go test ./cmd/mindspec/... -v` passes +- [ ] Stop-behavior LLM tests pass (created in Bead 4, expected to fail before this bead) + +**Verification** + +- [ ] `grep "auto.*claim" internal/instruct/templates/plan.md` → zero hits +- [ ] `grep "no-next" cmd/mindspec/approve.go` → zero hits +- [ ] `make build` → exit 0 +- [ ] `go test ./cmd/mindspec/... -v` → all pass +- [ ] `go vet ./...` → clean +- [ ] Manual review: `./bin/mindspec approve plan --help` no longer shows `--no-next` +- [ ] `env -u CLAUDECODE go test ./internal/harness/ -v -run TestLLM_StopAfterPlanApprove -timeout 10m` → passes +- [ ] `env -u CLAUDECODE go test ./internal/harness/ -v -run TestLLM_StopAfterComplete -timeout 10m` → passes + +**Depends on** + +Bead 4 (stop-behavior tests must exist before this bead fixes the guidance) + +## Provenance + +| Acceptance Criterion | Verified By | +|---------------------|-------------| +| `GitExecutor` → `MindspecExecutor` (zero grep hits) | Bead 1 verification | +| `git.go` → `mindspec_executor.go` | Bead 1 verification | +| `specinit` → `spec` (zero grep hits) | Bead 2 verification | +| `gitops` purged from live code | Bead 1 verification | +| `make build` + `go test` + `go vet` pass | Beads 1, 2 verification | +| AGENTS.md two-layer architecture | Bead 3 verification | +| Domain docs updated | Bead 3 verification | +| 18 LLM test scenarios reviewed in HISTORY.md | Bead 4 verification | +| Outdated test expectations fixed | Bead 4 verification | +| SingleBead smoke test passes | Bead 4 verification | +| `plan.md` template no longer claims auto-claim | Bead 5 verification | +| Plan approve output has emphatic STOP | Bead 5 verification | +| `mindspec complete` output has STOP for next bead | Bead 5 verification | +| Dead `--no-next` flag removed | Bead 5 verification | +| Stop-behavior tests created (baseline) | Bead 4 verification | +| Stop-behavior tests pass (after fix) | Bead 5 verification | +| `next`/`complete` classified as execution layer | Bead 3 verification | diff --git a/.mindspec/docs/specs/081-executor-docs-tests/spec.md b/.mindspec/docs/specs/081-executor-docs-tests/spec.md new file mode 100644 index 0000000..5ad6e7c --- /dev/null +++ b/.mindspec/docs/specs/081-executor-docs-tests/spec.md @@ -0,0 +1,199 @@ +--- +approved_at: "2026-03-10T21:08:26Z" +approved_by: user +status: Approved +--- +# Spec 081-executor-docs-tests: Executor rename, architecture docs, and LLM test review + +## Goal + +Rename `GitExecutor` to `MindspecExecutor` (including file rename `git.go` → `mindspec_executor.go`), purge legacy `gitops` and `specinit` terminology from codebase and documentation, update architecture documentation to clearly articulate the two-layer design (Workflow layer + Execution engine), and audit all 18 LLM test scenarios for correctness after the "stop between beads" behavioral change. Test audit findings go in `HISTORY.md`. + +## Background + +Spec 077 introduced the Executor interface separating workflow enforcement from execution mechanics. The implementation is complete and working, but two issues remain: + +1. **Naming**: The production executor is called `GitExecutor`, which implies it's merely a git wrapper. In reality it orchestrates the full MindSpec execution lifecycle — worktree creation, bead dispatch, branch merging, PR creation, and cleanup. The name `MindspecExecutor` better reflects its role as the standard execution engine for the MindSpec workflow. + +2. **Documentation gaps**: The architecture documentation doesn't clearly articulate the conceptual model. The Workflow layer is responsible for feeding the Execution engine a plan that has been broken down into bitesize beads — reviewed thoroughly, validated against architecture, with high-quality tests and clear acceptance criteria. The Execution engine is responsible for implementing those beads. This maps to research on scaling agent systems (Kim et al., "Towards a Science of Scaling Agent Systems," arXiv:2512.08296) which demonstrates that task decomposition quality directly impacts agent execution success. + +3. **Test staleness**: The recent change to stop between beads (rather than auto-continuing) may have invalidated some LLM test scenario assumptions. All 18 scenarios need a thorough review. + +4. **Legacy terminology**: The codebase still contains references to `gitops` (the pre-077 name for `gitutil`) and `specinit` (the package name for spec creation). These legacy names should be cleaned up: + - `gitops` appears in test fixtures, historical spec docs, and HISTORY.md + - `specinit` is still a live Go package (`internal/specinit/`) — should be renamed to something that reflects the workflow layer's vocabulary (e.g., `internal/spec/` or folded into the `spec create` command path) + - Old spec/plan documents (048, 050, 051, 058, 062) reference `gitops` — these are historical artifacts but comments/cross-references in live code should be updated + +## Impacted Domains + +- **execution**: Rename `GitExecutor` → `MindspecExecutor` across package, tests, docs, and DI wiring +- **workflow**: Update documentation references to executor naming +- **context-system**: Update domain docs, architecture descriptions, and AGENTS.md +- **core**: Update MEMORY.md references (auto-memory files) + +## ADR Touchpoints + +- [ADR-0023](../../adr/ADR-0023.md): Beads as single state authority — documentation should reference this as foundation for the decomposition model +- [ADR-0006](../../adr/ADR-0006.md): Protected main with PR-based merging — execution layer documentation should reference this + +## Requirements + +### R1: Rename GitExecutor → MindspecExecutor + +1. Rename file `internal/executor/git.go` → `internal/executor/mindspec_executor.go` +2. Rename `GitExecutor` struct to `MindspecExecutor` +3. Rename `NewGitExecutor` constructor to `NewMindspecExecutor` +4. Update all references in: + - `internal/executor/executor_test.go` + - `cmd/mindspec/root.go` (DI factory) + - Domain documentation (`execution/overview.md`, `execution/architecture.md`, `execution/interfaces.md`) + - `AGENTS.md` section on Workflow/Execution Boundary + - Auto-memory files referencing `GitExecutor` + +### R1b: Purge legacy `gitops` terminology + +1. Update `internal/adr/store_test.go` test fixture — replace `gitops` tag with `gitutil` or `execution` +2. Update comment in `internal/executor/git.go` (line 74) referencing `specinit` +3. Update any live code comments referencing `gitops` (historical spec docs are left as-is — they're closed artifacts) + +### R1c: Rename `specinit` package + +1. Rename `internal/specinit/` → `internal/spec/` (or `internal/speccreate/`) +2. Update all import paths: + - `cmd/mindspec/spec.go` + - `cmd/mindspec/spec_init.go` + - `internal/executor/git.go` (comment reference) + - `internal/lifecycle/scenario_test.go` (comments) +3. Update domain documentation referencing `specinit` +4. Update AGENTS.md and MEMORY.md references +5. Update `spec_init.go` backward-compat alias registration in `root.go` + +### R2: Architecture documentation overhaul + +1. Update `AGENTS.md` §138–147 to clearly describe the two-layer model: + - **Workflow layer**: Responsible for spec creation, plan decomposition into bitesize beads, validation against architecture (ADRs, domain boundaries), quality gates (tests, acceptance criteria), and phase enforcement + - **Execution engine**: Responsible for implementing the plan — dispatching beads to worktrees, executing code changes, merging results, and finalizing the spec +2. Update `.mindspec/docs/domains/execution/overview.md` with the refined conceptual model +3. Update `.mindspec/docs/domains/execution/architecture.md` to reference `MindspecExecutor` +4. Update `.mindspec/docs/domains/workflow/` docs to describe the workflow layer's responsibility for plan quality +5. Reference the decomposition research (arXiv:2512.08296) where appropriate + +### R4: Harden phase-transition stop behavior + +Observed failure mode: after `mindspec approve plan`, the agent auto-proceeded to implement bead 1 on the spec branch instead of stopping, running `/clear`, and using `mindspec next` to create a proper bead worktree. Two root causes: + +1. **`plan.md` instruct template is outdated** — still says "This will approve the plan AND automatically claim the first bead" (false since Spec 080) +2. **Plan approve output is not emphatic enough** — agent ignored the "Run /clear" guidance +3. **`mindspec complete` output says "Next bead ready: X"** which implicitly invites continuation + +Fixes: + +1. **Fix `plan.md` template** — remove the "auto-claim" lie. Clearly state: after plan approval, STOP. Run `/clear` or start a fresh agent, then `mindspec next`. +2. **Strengthen plan approve CLI output** — make the STOP instruction unmissable. Use a clear separator/banner. +3. **Strengthen `mindspec complete` CLI output** — after reporting "Next bead ready", add explicit STOP instruction: "Run `/clear`, then `mindspec next` to claim it." +4. **Remove dead `--no-next` flag** from `approve.go` — it's unused and misleading. +5. **Classify `mindspec next` and `mindspec complete` as execution layer commands** in documentation — they create/destroy worktrees and manage branch topology, which is execution, not workflow. + +Note: `mindspec next` already correctly branches from the spec branch via `exec.DispatchBead(beadID, specID)`. No new `--base-branch` parameter needed — the specID already determines the base. The problem was the agent skipping `mindspec next` entirely. + +### R3: LLM test scenario audit + +Review all 18 scenarios in `internal/harness/scenario.go` and `scenario_test.go`: + +1. **Verify each scenario's assumptions** match current behavior (stop between beads, manual `mindspec next`) +2. **Flag scenarios that are outdated** or test behavior that no longer exists +3. **Document findings** in `internal/harness/HISTORY.md` as a test audit section +4. **Fix any broken test expectations** — update assertions, prompts, or setup to match current behavior +5. Specific scenarios to scrutinize: + - `TestLLM_SpecToIdle` — does the 100-turn full lifecycle still work with manual bead transitions? + - `TestLLM_MultiBeadDeps` — already expects manual `mindspec next`, should be OK + - `TestLLM_BlockedBeadTransition` — mode should be `plan` when only blocked beads remain + - `TestLLM_UnmergedBeadGuard` — tests recovery flow, verify assumptions + - Any test that previously assumed auto-continuation between beads + +## Scope + +### In Scope + +- `internal/executor/git.go` → `internal/executor/mindspec_executor.go` (file rename + struct/constructor rename) +- `internal/executor/executor_test.go` — update references +- `internal/executor/mock.go` — no changes expected (already `MockExecutor`) +- `cmd/mindspec/root.go` — update DI factory +- `internal/specinit/` → `internal/spec/` (package rename) +- `cmd/mindspec/spec.go`, `cmd/mindspec/spec_init.go` — update imports +- `internal/adr/store_test.go` — update `gitops` test fixture +- `.mindspec/docs/domains/execution/` — all three docs +- `.mindspec/docs/domains/workflow/` — architecture and overview updates +- `AGENTS.md` — architecture section + legacy terminology +- `internal/harness/scenario.go` — test scenario review and fixes +- `internal/harness/scenario_test.go` — test function review and fixes +- `internal/harness/HISTORY.md` — test audit findings +- `internal/instruct/templates/plan.md` — fix outdated auto-claim guidance +- `internal/instruct/templates/implement.md` — strengthen STOP after complete +- `cmd/mindspec/plan_cmd.go` — strengthen plan approve output +- `cmd/mindspec/complete.go` or `internal/complete/complete.go` — strengthen complete output +- `cmd/mindspec/approve.go` — remove dead `--no-next` flag +- Auto-memory files referencing old naming + +### Out of Scope + +- New Executor implementations (e.g., GastownExecutor) +- Changes to the `Executor` interface itself +- New LLM test scenarios (only reviewing/fixing existing ones) +- Changes to `internal/gitutil/` package +- Historical spec/plan documents (048, 050, 051, 058, 062, 077 — closed artifacts, left as-is) + +## Non-Goals + +- Changing the Executor interface methods or signatures +- Adding new executor capabilities +- Refactoring the workflow layer's internal logic +- Writing new LLM test scenarios beyond fixing existing ones +- Updating historical spec/plan documents that reference `gitops` — these are closed artifacts +- Renaming `internal/gitutil/` (already correct per Spec 077) + +## Acceptance Criteria + +- [ ] `GitExecutor` renamed to `MindspecExecutor` everywhere (zero grep hits for `GitExecutor` in live code) +- [ ] `NewGitExecutor` renamed to `NewMindspecExecutor` everywhere +- [ ] `git.go` renamed to `mindspec_executor.go` +- [ ] `internal/specinit/` renamed (zero grep hits for `specinit` in Go imports) +- [ ] Zero grep hits for `gitops` in live Go code (test fixtures, comments) +- [ ] `make build` succeeds +- [ ] `go test ./internal/executor/... -v` passes +- [ ] `go test ./internal/... -v` passes (catch import path breakage) +- [ ] `go vet ./...` clean +- [ ] AGENTS.md clearly describes the two-layer architecture with workflow/execution responsibilities +- [ ] Domain docs updated with `MindspecExecutor` naming and refined conceptual model +- [ ] All 18 LLM test scenarios reviewed — findings documented in HISTORY.md +- [ ] Any outdated test expectations fixed +- [ ] `go test ./internal/harness/ -run TestLLM_SingleBead -timeout 10m` passes (smoke test) +- [ ] `plan.md` template no longer claims auto-claim behavior +- [ ] Plan approve output includes emphatic STOP + `/clear` + `mindspec next` instructions +- [ ] `mindspec complete` output includes STOP + `/clear` instruction when next bead is ready +- [ ] Dead `--no-next` flag removed from `approve.go` +- [ ] Documentation classifies `mindspec next` and `mindspec complete` as execution layer commands + +## Validation Proofs + +- `grep -r "GitExecutor" internal/ cmd/` → zero results +- `grep -r "NewGitExecutor" internal/ cmd/` → zero results +- `grep -rn "specinit" --include="*.go" internal/ cmd/` → zero import hits +- `grep -rn "gitops" --include="*.go" internal/ cmd/` → zero hits +- `ls internal/executor/mindspec_executor.go` → exists +- `ls internal/spec/` → exists (replaces `internal/specinit/`) +- `make build` → exit 0 +- `go test ./internal/executor/... -v` → all pass +- `go test ./internal/... -v` → all pass +- `go vet ./...` → clean + +## Open Questions + +- [x] Should `internal/specinit/` become `internal/spec/` or `internal/speccreate/`? → **`internal/spec/`** — shorter, natural, no collision risk since it's the only package dealing with spec creation. + +## Approval + +- **Status**: APPROVED +- **Approved By**: user +- **Approval Date**: 2026-03-10 +- **Notes**: Approved via mindspec approve spec \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index f03fbdd..c234b85 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -139,10 +139,37 @@ For more details, see README.md and docs/QUICKSTART.md. MindSpec has a two-layer architecture separating *what* from *how*: -- **Workflow layer** (`internal/approve/`, `internal/complete/`, `internal/next/`, `internal/cleanup/`, `internal/specinit/`) — decides what operations should happen (approval gates, phase transitions, bead selection) -- **Execution layer** (`internal/executor/`, `internal/gitutil/`) — performs git, worktree, and filesystem operations +### Workflow Layer (the "what") -**Import rule**: Workflow packages call `executor.Executor` methods. They MUST NOT import `internal/gitutil/` directly. This keeps enforcement logic testable with `MockExecutor` and decouples workflow decisions from git mechanics. +The workflow layer owns the spec-driven development lifecycle — deciding which operations should happen and enforcing quality at every gate: + +- **Spec creation** — `internal/spec/` creates spec branches, worktrees, and template files +- **Plan decomposition** — breaks specs into bitesize beads with clear acceptance criteria. Well-decomposed plans are critical for AI agent success (see [arXiv:2512.08296](https://arxiv.org/abs/2512.08296) on task decomposition quality) +- **Validation** — `internal/validate/` checks ADR compliance, doc-sync, and structural requirements +- **Quality gates** — `internal/approve/` enforces human-in-the-loop approval at spec, plan, and impl transitions +- **Phase enforcement** — `internal/phase/` derives lifecycle phase from beads epic/child statuses (ADR-0023) +- **Work selection** — `internal/next/` selects ready beads, `internal/complete/` orchestrates bead close-out +- **Cleanup** — `internal/cleanup/` handles post-lifecycle worktree/branch removal + +Key packages: `internal/approve/`, `internal/complete/`, `internal/next/`, `internal/spec/`, `internal/cleanup/`, `internal/phase/`, `internal/validate/`, `internal/bead/` + +### Beads: The Substrate + +[Beads](https://github.com/steveyegge/beads) is the interface between the two layers. Each bead is a self-contained work packet — requirements, context, dependencies, acceptance criteria — that a fresh agent can pick up without session history. The planning layer writes beads; the execution engine reads them. This is what makes the `Executor` interface pluggable: any orchestrator that can read a bead can dispatch work. + +### Execution Engine (the "how") + +The execution engine reads beads and implements them — it never decides *what* should happen: + +- **`MindspecExecutor`** (`internal/executor/`) — dispatches beads to worktrees, merges completed bead branches, finalizes specs via PR or direct merge +- **`MockExecutor`** (`internal/executor/`) — test double for enforcement testing without git side effects +- **`internal/gitutil/`** — low-level git helpers (branch, merge, PR, diffstat) used only by `MindspecExecutor` + +DI wiring: `cmd/mindspec/root.go` has `newExecutor(root)` factory. + +### Import Rule + +Workflow packages call `executor.Executor` methods. They MUST NOT import `internal/gitutil/` directly. This keeps enforcement logic testable with `MockExecutor` and decouples workflow decisions from git mechanics. See `.mindspec/docs/domains/execution/` and `.mindspec/docs/domains/workflow/` for full documentation. diff --git a/README.md b/README.md index be8e9c3..a0df75d 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,14 @@ # MindSpec -**Spec-driven development and real-time observability for AI coding agents.** +**A planning and governance layer for AI coding agents.** -AI coding agents are powerful but unstructured. Without guardrails they: +AI coding agents are powerful executors but poor planners. Without structure, they drift from intent, steamroll architecture decisions, and let scope creep turn a small feature into a three-subsystem refactor. -- **Drift from intent** — the agent builds what it infers, not what you specified -- **Ignore architecture** — existing design decisions and ADRs get steamrolled -- **Lose context between sessions** — every conversation starts from scratch -- **Skip documentation** — code ships, docs rot -- **Resist scope discipline** — a "small feature" becomes a refactor of three subsystems +The fix isn't better prompting — it's better planning. -MindSpec treats these as system design problems, not prompting problems. It provides a **gated development lifecycle** where architecture divergence is detected and blocked until explicitly resolved, **bounded contexts** borrowed from domain-driven design to manage what the agent sees — deterministic, token-budgeted context packs assembled from domain docs, ADRs, and the Context Map so the agent gets exactly the right context without manual prompt engineering — and an **observability layer** (AgentMind) that shows you exactly what your agent is doing, spending, and how efficiently it's working. +MindSpec is the **planning and governance layer** that sits upstream of your agent orchestrator. It breaks work into spec → plan → bitesize beads, validates each bead against your architecture (ADRs, domain boundaries), and enforces quality gates before any code gets written. The result is a structured plan with clear acceptance criteria that any execution engine — Claude Code working solo, a multi-agent orchestrator like [Gastown](https://github.com/steveyegge/gastown), or OpenAI Codex — can implement reliably. + +Research on scaling agent systems ([arXiv:2512.08296](https://arxiv.org/abs/2512.08296)) confirms what we've seen in practice: **task decomposition quality is the #1 predictor of agent execution success.** MindSpec exists to get the decomposition right.
@@ -81,6 +79,44 @@ The work graph is tracked by [Beads](https://github.com/steveyegge/beads), a git
Documentation stays current because the system won't let you skip it — beads can't close without doc-sync, architecture decisions are tracked as ADRs that plans must cite, and every spec produces versioned artifacts that persist alongside the code.
+## Architecture
+
+MindSpec separates **planning** from **execution**, with [Beads](https://github.com/steveyegge/beads) as the substrate that connects them.
+
+### Planning & Governance Layer
+
+The planning layer owns everything *before* code gets written:
+
+- **Specification** — defines what "done" looks like with acceptance criteria and impacted domains
+- **Decomposition** — breaks specs into bitesize beads, each independently completable with clear scope
+- **Architecture validation** — plans must cite ADRs; divergence is blocked until a human approves a superseding ADR
+- **Quality gates** — every phase transition (spec → plan → implement → review) requires human approval
+- **Context engineering** — deterministic, token-budgeted context packs so the agent gets exactly the right information
+
+The planning layer doesn't write code. It produces a validated plan — a directed graph of beads with dependencies, acceptance criteria, and scoped documentation — then hands it off.
+
+### Beads: The Substrate
+
+[Beads](https://github.com/steveyegge/beads) is the interface between the two layers. Each bead is a self-contained work packet that encapsulates everything a fresh agent needs:
+
+- **Requirements** — what to build, with acceptance criteria and verification steps
+- **Context** — which domains are impacted, which ADRs apply, what dependencies exist
+- **Status** — lifecycle phase, blocking relationships, proof of completion
+
+This is what makes pluggable orchestration possible. The planning layer writes beads; the execution engine reads them. A fresh agent picking up a bead doesn't need session history or tribal knowledge — the bead carries the plan and the context. Any orchestrator that can read a bead can dispatch work.
+
+### Execution Engine
+
+The execution engine implements the plan by reading beads and dispatching agents:
+
+- **Bead dispatch** — each bead runs in an isolated git worktree, scoped to exactly what the plan defined
+- **Merge topology** — bead branches merge into the spec branch; the spec branch merges to main via PR
+- **Finalization** — once all beads close, the spec lifecycle completes with a single PR
+
+MindSpec ships with a built-in execution engine (`MindspecExecutor`) that drives Claude Code, Codex, or Copilot through implementation with human control between steps. But the `Executor` interface is pluggable — MindSpec's planning and governance layer can readily plug into multi-agent orchestrators like [Gastown](https://github.com/steveyegge/gastown), dispatching beads to parallel agents with their own quality gates and auto-finalization.
+
+**MindSpec doesn't compete with agent orchestrators — it makes them better.** An orchestrator running MindSpec-planned beads gets architecture-validated, well-decomposed work packets instead of a vague prompt. The orchestrator focuses on execution; MindSpec ensures there's something worth executing.
+
---
## Quickstart
@@ -151,6 +187,7 @@ MindSpec's workflow is continuously validated by a behavioral test harness that
6. **Dynamic over static** — runtime guidance beats static files that drift
7. **CLI-first** — logic lives in testable, versionable Go; IDE integrations are thin shims
8. **Deterministic context** — token-budgeted context packs, not "go read this file" prompting
+9. **Planning over prompting** — structured decomposition beats prompt engineering at scale
## Requirements
diff --git a/cmd/mindspec/approve.go b/cmd/mindspec/approve.go
index 8faad62..c6452d2 100644
--- a/cmd/mindspec/approve.go
+++ b/cmd/mindspec/approve.go
@@ -38,7 +38,6 @@ var approveImplCmd = &cobra.Command{
func init() {
approveSpecCmd.Flags().String("approved-by", "user", "Identity of the approver")
approvePlanCmd.Flags().String("approved-by", "user", "Identity of the approver")
- approvePlanCmd.Flags().Bool("no-next", false, "Approve without auto-claiming the first bead")
approveCmd.AddCommand(approveSpecCmd)
approveCmd.AddCommand(approvePlanCmd)
approveCmd.AddCommand(approveImplCmd)
diff --git a/cmd/mindspec/root.go b/cmd/mindspec/root.go
index af4b05a..46d04e1 100644
--- a/cmd/mindspec/root.go
+++ b/cmd/mindspec/root.go
@@ -11,10 +11,10 @@ import (
"github.com/spf13/cobra"
)
-// newExecutor creates a GitExecutor rooted at the given path.
+// newExecutor creates a MindspecExecutor rooted at the given path.
// Used as a factory function across CLI commands.
func newExecutor(root string) executor.Executor {
- return executor.NewGitExecutor(root)
+ return executor.NewMindspecExecutor(root)
}
// Set by goreleaser ldflags.
diff --git a/cmd/mindspec/spec.go b/cmd/mindspec/spec.go
index 8f7f9a4..69daf47 100644
--- a/cmd/mindspec/spec.go
+++ b/cmd/mindspec/spec.go
@@ -7,7 +7,7 @@ import (
"github.com/mrmaxsteel/mindspec/internal/approve"
"github.com/mrmaxsteel/mindspec/internal/bead"
- "github.com/mrmaxsteel/mindspec/internal/specinit"
+ "github.com/mrmaxsteel/mindspec/internal/spec"
"github.com/mrmaxsteel/mindspec/internal/workspace"
"github.com/spf13/cobra"
)
@@ -33,7 +33,7 @@ creates a branch and worktree, sets state to spec mode, and emits guidance.`,
}
exec := newExecutor(root)
- result, err := specinit.Run(root, specID, title, exec)
+ result, err := spec.Run(root, specID, title, exec)
if err != nil {
return err
}
diff --git a/cmd/mindspec/spec_init.go b/cmd/mindspec/spec_init.go
index ea306ae..46d49c7 100644
--- a/cmd/mindspec/spec_init.go
+++ b/cmd/mindspec/spec_init.go
@@ -5,7 +5,7 @@ import (
"os"
"path/filepath"
- "github.com/mrmaxsteel/mindspec/internal/specinit"
+ "github.com/mrmaxsteel/mindspec/internal/spec"
"github.com/mrmaxsteel/mindspec/internal/workspace"
"github.com/spf13/cobra"
)
@@ -26,7 +26,7 @@ var specInitCmd = &cobra.Command{
}
exec := newExecutor(root)
- result, err := specinit.Run(root, specID, title, exec)
+ result, err := spec.Run(root, specID, title, exec)
if err != nil {
return err
}
diff --git a/internal/adr/store_test.go b/internal/adr/store_test.go
index ad1cc73..838a7b6 100644
--- a/internal/adr/store_test.go
+++ b/internal/adr/store_test.go
@@ -100,7 +100,7 @@ func TestFileStore_Get(t *testing.T) {
func TestFileStore_Search(t *testing.T) {
root := t.TempDir()
- writeADR(t, root, "ADR-0001", "Worktree Management", "Accepted", []string{"gitops"})
+ writeADR(t, root, "ADR-0001", "Worktree Management", "Accepted", []string{"execution"})
writeADR(t, root, "ADR-0002", "Bead Lifecycle", "Accepted", []string{"workflow"})
store := NewFileStore(root)
diff --git a/internal/approve/plan.go b/internal/approve/plan.go
index be3c3c4..535acd2 100644
--- a/internal/approve/plan.go
+++ b/internal/approve/plan.go
@@ -123,7 +123,7 @@ func ApprovePlan(root, specID, approvedBy string, exec executor.Executor) (*Plan
}
// Step 5: HandoffEpic — notify executor that beads are ready for dispatch.
- // For GitExecutor this is a no-op. Other executors may use this to schedule work.
+ // For MindspecExecutor this is a no-op. Other executors may use this to schedule work.
if parentID != "" && len(result.BeadIDs) > 0 {
if err := exec.HandoffEpic(parentID, specID, result.BeadIDs); err != nil {
result.Warnings = append(result.Warnings, fmt.Sprintf("handoff epic failed: %v", err))
diff --git a/internal/bead/bdcli.go b/internal/bead/bdcli.go
index 36f53ad..9755523 100644
--- a/internal/bead/bdcli.go
+++ b/internal/bead/bdcli.go
@@ -122,15 +122,12 @@ func WorktreeList() ([]WorktreeListEntry, error) {
return entries, nil
}
-// WorktreeRemove removes a worktree via `bd worktree remove`.
-// Beads performs safety checks (uncommitted changes, unpushed commits).
-// When no git remote is configured, --force is passed to skip the
-// unpushed-commits check (which would always fail without a remote).
+// WorktreeRemove removes a worktree via `bd worktree remove --force`.
+// The --force flag skips the "unpushed commits" safety check, which is
+// appropriate because mindspec always merges bead work into the spec
+// branch before removing the worktree.
func WorktreeRemove(name string) error {
- args := []string{"worktree", "remove", name}
- if !hasGitRemote() {
- args = append(args, "--force")
- }
+ args := []string{"worktree", "remove", name, "--force"}
out, err := tracedCombined("worktree-remove", args)
if err != nil {
return fmt.Errorf("bd worktree remove failed: %s", string(out))
@@ -138,16 +135,6 @@ func WorktreeRemove(name string) error {
return nil
}
-// hasGitRemote returns true if at least one git remote is configured.
-func hasGitRemote() bool {
- cmd := execCommand("git", "remote")
- out, err := cmd.Output()
- if err != nil {
- return false
- }
- return strings.TrimSpace(string(out)) != ""
-}
-
// ListJSON runs `bd list