diff --git a/pi-ralph-wiggum/CHANGELOG.md b/pi-ralph-wiggum/CHANGELOG.md index 1cac967..c2fbbff 100644 --- a/pi-ralph-wiggum/CHANGELOG.md +++ b/pi-ralph-wiggum/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Unreleased + +### Changed +- Add a completion gate to Ralph prompts and skill guidance. Agents are now instructed to preserve required verification artifacts and record an exact monitor-rerunnable final command before emitting `COMPLETE`. +- Queue Ralph follow-up messages with `streamingBehavior: "followUp"` to avoid runtime warnings when a loop tool schedules the next iteration while the agent is still processing. +- Add a stale-prompt guard instructing agents to reload loop state and ignore already-completed loops instead of doing duplicate work. + ## 0.2.0 - 2026-04-19 ### Changed diff --git a/pi-ralph-wiggum/README.md b/pi-ralph-wiggum/README.md index 7eeeab9..ad7df84 100644 --- a/pi-ralph-wiggum/README.md +++ b/pi-ralph-wiggum/README.md @@ -46,11 +46,24 @@ You ask Pi to set up a ralph-wiggum loop. - It gets a prompt telling it to work on the task, update the task file, and call ralph_done when it finishes that iteration - When the iteration is done, it calls `ralph_done`, resending the same prompt* - Pi runs until either: - - All tasks are done (Pi sends `COMPLETE`) + - All tasks are done and final verification is externally rerunnable (Pi sends `COMPLETE`) - Max iterations (default 50) - You hit `esc` (pausing the loop) If you hit `esc`, you can run `/ralph-stop` to clear the loop. Alternatively, just tell Pi to continue to keep going. +## Completion gate + +For build/test/refactor tasks, Ralph prompts the agent not to complete based only on checked checklist items. Before sending `COMPLETE`, the agent should: + +- Preserve any build artifacts, generated files, virtualenvs, or copied libraries required by final verification. +- Record the exact final command, working directory, relevant environment variables, and output summary in the task file. +- Ensure a separate monitor can rerun that command from the same worktree in a fresh shell. +- Mark work blocked or deferred if the final command cannot be made externally rerunnable. + +## Stale prompt guard + +If an already-queued Ralph prompt arrives after a loop has completed, the agent should reload `.ralph/.state.json` before doing work. If the loop state is `completed`, it should ignore the stale prompt, avoid file edits and task commands, and not call `ralph_done`. + ## Commands | Command | Description | diff --git a/pi-ralph-wiggum/SKILL.md b/pi-ralph-wiggum/SKILL.md index a44568c..7b224ba 100644 --- a/pi-ralph-wiggum/SKILL.md +++ b/pi-ralph-wiggum/SKILL.md @@ -23,9 +23,25 @@ ralph_start({ 2. Work on the task and update the file each iteration. 3. Record verification evidence (commands run, file paths, outputs) in the task file. 4. Call `ralph_done` to proceed to the next iteration. -5. Output `COMPLETE` when finished. +5. Before outputting `COMPLETE`, run a final verification command that an external monitor can rerun from the same worktree. 6. Stop when complete or when max iterations is reached (default 50). +## Completion Gate + +For build/test/refactor tasks, do not mark complete based only on checked checklist items. + +Before emitting `COMPLETE`: + +- Preserve any build artifacts, generated files, virtualenvs, or environment setup required by the final verification command. +- Record the exact final command, working directory, relevant environment variables, and output summary in the task file. +- Ensure the command can be rerun by a separate monitor in a fresh shell from the same worktree. +- If a test cannot be rerun externally, mark the item blocked or deferred instead of complete. +- If cleanup removes required verification artifacts, recreate them or update the final command before completion. + +## Stale Prompt Guard + +Before doing any work from a Ralph prompt, reload `.ralph/.state.json`. If the loop state says `"status": "completed"`, do not edit files, do not run task commands, and do not call `ralph_done`. Reply briefly that the stale prompt was ignored because the loop is already completed. + ## User Commands - `/ralph start ` - Start a new loop. @@ -58,7 +74,13 @@ Brief description. - [x] Completed item ## Verification -- Evidence, commands run, or file paths +- Commands run, working directories, relevant environment variables, outputs, and whether artifacts required for reruns were preserved + +## Final Verification +- Exact monitor-rerunnable command: `` +- Working directory: `` +- Required preserved artifacts: `` +- Result: `` ## Notes (Update with progress, decisions, blockers) @@ -70,4 +92,5 @@ Brief description. 2. Update checklist and notes as you go. 3. Capture verification evidence for completed items. 4. Reflect when stuck to reassess approach. -5. Output the completion marker only when truly done. +5. Preserve the environment needed to rerun final verification. +6. Output the completion marker only when truly done and externally rerunnable. diff --git a/pi-ralph-wiggum/index.ts b/pi-ralph-wiggum/index.ts index 8cce822..730dc7a 100644 --- a/pi-ralph-wiggum/index.ts +++ b/pi-ralph-wiggum/index.ts @@ -23,10 +23,34 @@ Describe your task here. - [ ] Item 1 - [ ] Item 2 +## Verification +- Commands run, working directories, relevant environment variables, outputs, and preserved artifacts + +## Final Verification +- Exact monitor-rerunnable command: +- Working directory: +- Required preserved artifacts: +- Result: + ## Notes (Update this as you work) `; +const DEFAULT_COMPLETION_GATE = `COMPLETION GATE + +Do not output ${COMPLETE_MARKER} based only on checked checklist items. +Before completion: +1. Run a final verification command that an external monitor can rerun from the same worktree in a fresh shell. +2. Record the exact command, working directory, relevant environment variables, and output summary in the task file. +3. Preserve every artifact required by that command, including build directories, generated libraries, virtualenvs, caches, or copied dylibs. +4. If cleanup removes required artifacts, recreate them or update the final command before completing. +5. If the final command cannot be made externally rerunnable, mark the item blocked/deferred instead of complete.`; + +const DEFAULT_STALE_PROMPT_GUARD = `STALE PROMPT GUARD + +Before doing any work from a Ralph prompt, reload the loop state file named in the prompt (usually .ralph/.state.json). +If the state says \"status\": \"completed\", do not edit files, do not run task commands, and do not call ralph_done. Reply briefly that the stale prompt was ignored because the loop is already completed.`; + const DEFAULT_REFLECT_INSTRUCTIONS = `REFLECTION CHECKPOINT Pause and reflect on your progress: @@ -169,7 +193,10 @@ export default function (pi: ExtensionAPI) { saveState(ctx, state); currentLoop = null; updateUI(ctx); - pi.sendUserMessage(banner); + pi.sendUserMessage(banner, { + deliverAs: "followUp", + streamingBehavior: "followUp", + }); } function stopLoop(ctx: ExtensionContext, state: LoopState, message?: string): void { @@ -235,6 +262,8 @@ export default function (pi: ExtensionAPI) { if (isReflection) parts.push(state.reflectInstructions, "\n---\n"); parts.push(`## Current Task (from ${state.taskFile})\n\n${taskContent}\n\n---`); + parts.push(`\n## Stale Prompt Guard\n\n${DEFAULT_STALE_PROMPT_GUARD}\n`); + parts.push(`\n## Completion Gate\n\n${DEFAULT_COMPLETION_GATE}\n`); parts.push(`\n## Instructions\n`); parts.push("User controls: ESC pauses the assistant. Send a message to resume. Run /ralph-stop when idle to stop the loop.\n"); parts.push( @@ -248,7 +277,7 @@ export default function (pi: ExtensionAPI) { parts.push(`1. Continue working on the task`); } parts.push(`2. Update the task file (${state.taskFile}) with your progress`); - parts.push(`3. When FULLY COMPLETE, respond with: ${COMPLETE_MARKER}`); + parts.push(`3. When FULLY COMPLETE and the completion gate is satisfied, respond with: ${COMPLETE_MARKER}`); parts.push(`4. Otherwise, call the ralph_done tool to proceed to next iteration`); return parts.join("\n"); @@ -341,7 +370,10 @@ export default function (pi: ExtensionAPI) { ctx.ui.notify(`Could not read task file: ${taskFile}`, "error"); return; } - pi.sendUserMessage(buildPrompt(state, content, false)); + pi.sendUserMessage(buildPrompt(state, content, false), { + deliverAs: "followUp", + streamingBehavior: "followUp", + }); }, stop(_rest, ctx) { @@ -401,7 +433,10 @@ export default function (pi: ExtensionAPI) { const needsReflection = state.reflectEvery > 0 && state.iteration > 1 && (state.iteration - 1) % state.reflectEvery === 0; - pi.sendUserMessage(buildPrompt(state, content, needsReflection)); + pi.sendUserMessage(buildPrompt(state, content, needsReflection), { + deliverAs: "followUp", + streamingBehavior: "followUp", + }); }, status(_rest, ctx) { @@ -652,7 +687,10 @@ Examples: currentLoop = loopName; updateUI(ctx); - pi.sendUserMessage(buildPrompt(state, params.taskContent, false), { deliverAs: "followUp" }); + pi.sendUserMessage(buildPrompt(state, params.taskContent, false), { + deliverAs: "followUp", + streamingBehavior: "followUp", + }); return { content: [{ type: "text", text: `Started loop "${loopName}" (max ${state.maxIterations} iterations).` }], @@ -717,7 +755,10 @@ Examples: } // Queue next iteration - use followUp so user can still interrupt - pi.sendUserMessage(buildPrompt(state, content, needsReflection), { deliverAs: "followUp" }); + pi.sendUserMessage(buildPrompt(state, content, needsReflection), { + deliverAs: "followUp", + streamingBehavior: "followUp", + }); return { content: [{ type: "text", text: `Iteration ${state.iteration - 1} complete. Next iteration queued.` }], @@ -736,11 +777,14 @@ Examples: const iterStr = `${state.iteration}${state.maxIterations > 0 ? `/${state.maxIterations}` : ""}`; let instructions = `You are in a Ralph loop working on: ${state.taskFile}\n`; + instructions += `- Before doing work, reload .ralph/${state.name}.state.json; if status is completed, ignore this stale prompt and do not call ralph_done\n`; if (state.itemsPerIteration > 0) { instructions += `- Work on ~${state.itemsPerIteration} items this iteration\n`; } instructions += `- Update the task file as you progress\n`; - instructions += `- When FULLY COMPLETE: ${COMPLETE_MARKER}\n`; + instructions += `- Preserve artifacts needed by final verification\n`; + instructions += `- Record an exact monitor-rerunnable final command before completion\n`; + instructions += `- When FULLY COMPLETE and externally rerunnable: ${COMPLETE_MARKER}\n`; instructions += `- Otherwise, call ralph_done tool to proceed to next iteration`; return {