From 6dad3b58a0274a8d38561e79f8ff652153c44554 Mon Sep 17 00:00:00 2001 From: PR Babysitter Date: Fri, 15 May 2026 20:03:01 -0500 Subject: [PATCH 1/6] fix(bug-1): Missing artifact error omits completion-contract guidance missing-artifact failures now include completion-contract guidance and an explicit execute-task completion-tool hint. --- src/resources/extensions/gsd/auto-post-unit.ts | 8 ++++++-- .../auto-post-unit-artifact-diagnostic.test.ts | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index 2f4c96931d..6ffd1c595d 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -449,12 +449,16 @@ function describeArtifactVerificationFailure(unitType: string, unitId: string, b } const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath); + const expected = diagnoseExpectedArtifact(unitType, unitId, basePath); if (!artifactPath) { return `Artifact verification failed: ${unitType} "${unitId}" has no resolvable artifact path.`; } const relPath = relative(basePath, artifactPath); if (!existsSync(artifactPath)) { - return `Artifact verification failed: ${relPath} was not found on disk after unit execution.`; + const completionToolHint = unitType === "execute-task" + ? " No completion tool call detected (`gsd_task_complete`/alias)." + : ""; + return `Artifact verification failed: ${relPath} was not found on disk after unit execution${expected ? ` (${expected})` : ""}.${completionToolHint}`; } const validationKind = artifactValidationKind(unitType); @@ -469,9 +473,9 @@ function describeArtifactVerificationFailure(unitType: string, unitId: string, b } } - const expected = diagnoseExpectedArtifact(unitType, unitId, basePath); return `Artifact verification failed: ${relPath} exists but did not satisfy the ${unitType} completion contract${expected ? ` (${expected})` : ""}.`; } +export const _describeArtifactVerificationFailureForTest = describeArtifactVerificationFailure; export async function autoCommitUnit( basePath: string, diff --git a/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts b/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts new file mode 100644 index 0000000000..178c4fb287 --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts @@ -0,0 +1,17 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { _describeArtifactVerificationFailureForTest } from "../auto-post-unit.ts"; + +test("missing execute-task artifact includes completion contract and completion-tool hint", () => { + const base = mkdtempSync(join(tmpdir(), "gsd-artifact-diag-")); + const taskDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"); + mkdirSync(taskDir, { recursive: true }); + + const msg = _describeArtifactVerificationFailureForTest("execute-task", "M001/S01/T01", base); + assert.match(msg, /was not found on disk after unit execution/); + assert.match(msg, /Task T01 marked \[x\].*summary written/i); + assert.match(msg, /No completion tool call detected \(`gsd_task_complete`\/alias\)/); +}); From b36c97c54feaf1395a6d27a33d137f68a84ae979 Mon Sep 17 00:00:00 2001 From: PR Babysitter Date: Fri, 15 May 2026 20:03:01 -0500 Subject: [PATCH 2/6] fix(bug-2): Read-only reconnaissance classification misses common tools added read-only reconnaissance classification coverage for `skill` and read-only `gsd_exec` command patterns. --- .../extensions/gsd/session-forensics.ts | 34 ++++++++++++++++++- ...-forensics-readonly-classification.test.ts | 33 ++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts diff --git a/src/resources/extensions/gsd/session-forensics.ts b/src/resources/extensions/gsd/session-forensics.ts index 9cf0a23c5f..4f9402655b 100644 --- a/src/resources/extensions/gsd/session-forensics.ts +++ b/src/resources/extensions/gsd/session-forensics.ts @@ -64,6 +64,39 @@ export interface RecoveryBriefing { prompt: string; } +const READ_ONLY_TOOL_NAMES = new Set([ + "read", + "memory_query", + "gsd_resume", + "gsd_exec_search", + "grep", + "find", + "ls", + "glob", + "skill", +]); + +const READ_ONLY_EXEC_COMMAND_RE = /^\s*(cat|head|tail|ls|find|grep|rg|git\s+(status|log|show|diff|branch|remote|rev-parse|ls-files)|npm\s+(ls|list|info|view|show|outdated|audit|doctor|ping|--version|-v)|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b)|jq\s|yq\s|env\b|printenv\b)/; + +function isReadOnlyReconnaissanceTool(call: ToolCall): boolean { + const name = call.name.toLowerCase(); + if (READ_ONLY_TOOL_NAMES.has(name)) return true; + if (name !== "gsd_exec") return false; + const command = String(call.input.command || call.input.cmd || "").trim(); + if (!command) return false; + return READ_ONLY_EXEC_COMMAND_RE.test(command); +} + +export function classifyTraceProgress(trace: ExecutionTrace): { isReadOnlyReconnaissanceOnly: boolean } { + if (trace.toolCalls.length === 0) return { isReadOnlyReconnaissanceOnly: false }; + for (const call of trace.toolCalls) { + if (!isReadOnlyReconnaissanceTool(call)) { + return { isReadOnlyReconnaissanceOnly: false }; + } + } + return { isReadOnlyReconnaissanceOnly: true }; +} + // ─── JSONL Parsing ──────────────────────────────────────────────────────────── // MAX_JSONL_BYTES and parseJSONL are imported from ./jsonl-utils.js @@ -543,4 +576,3 @@ function findLast(arr: T[], predicate: (item: T) => boolean): T | undefined { } return undefined; } - diff --git a/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts b/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts new file mode 100644 index 0000000000..6a21930eb3 --- /dev/null +++ b/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts @@ -0,0 +1,33 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { classifyTraceProgress, type ExecutionTrace } from "../session-forensics.ts"; + +function traceWithToolCalls(toolCalls: ExecutionTrace["toolCalls"]): ExecutionTrace { + return { + toolCalls, + filesWritten: [], + filesRead: [], + commandsRun: [], + errors: [], + lastReasoning: "", + toolCallCount: toolCalls.length, + }; +} + +test("classifyTraceProgress treats skill + read-only gsd_exec as reconnaissance-only", () => { + const trace = traceWithToolCalls([ + { name: "skill", input: { name: "diagnose" }, isError: false }, + { name: "gsd_exec", input: { command: "rg -n TODO src" }, isError: false }, + ]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, true); +}); + +test("classifyTraceProgress rejects mutating gsd_exec command", () => { + const trace = traceWithToolCalls([ + { name: "gsd_exec", input: { command: "npm run build" }, isError: false }, + ]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, false); +}); + From 3ee901cc2f1ee3f9d0447ee7a76b3f3bdafc1583 Mon Sep 17 00:00:00 2001 From: PR Babysitter Date: Fri, 15 May 2026 20:33:11 -0500 Subject: [PATCH 3/6] Apply PatchDeck fixes for PR #6187 --- .../extensions/gsd/auto-post-unit.ts | 31 +++++++++++++++++-- .../extensions/gsd/session-forensics.ts | 4 ++- ...auto-post-unit-artifact-diagnostic.test.ts | 15 +++++++++ ...-forensics-readonly-classification.test.ts | 15 +++++++++ 4 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index 6ffd1c595d..3e1c6ce6fd 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -442,7 +442,33 @@ function artifactValidationKind(unitType: string): "project" | "requirements" | return null; } -function describeArtifactVerificationFailure(unitType: string, unitId: string, basePath: string): string { +const TASK_COMPLETION_TOOL_NAMES = new Set(["gsd_task_complete", "gsd_complete_task"]); + +function hasTaskCompletionToolCall(agentEndMessages?: unknown[] | null): boolean { + if (!Array.isArray(agentEndMessages)) return false; + for (const rawMessage of agentEndMessages) { + if (!rawMessage || typeof rawMessage !== "object") continue; + const message = rawMessage as { content?: unknown }; + if (!Array.isArray(message.content)) continue; + for (const rawPart of message.content) { + if (!rawPart || typeof rawPart !== "object") continue; + const part = rawPart as { type?: unknown; name?: unknown }; + if (part.type !== "toolCall") continue; + const name = String(part.name ?? "").toLowerCase(); + if (TASK_COMPLETION_TOOL_NAMES.has(name)) { + return true; + } + } + } + return false; +} + +function describeArtifactVerificationFailure( + unitType: string, + unitId: string, + basePath: string, + agentEndMessages?: unknown[] | null, +): string { const worktreeFailure = diagnoseWorktreeIntegrityFailure(basePath); if (worktreeFailure) { return `${worktreeFailure} Unit: ${unitType} ${unitId}.`; @@ -455,7 +481,7 @@ function describeArtifactVerificationFailure(unitType: string, unitId: string, b } const relPath = relative(basePath, artifactPath); if (!existsSync(artifactPath)) { - const completionToolHint = unitType === "execute-task" + const completionToolHint = unitType === "execute-task" && !hasTaskCompletionToolCall(agentEndMessages) ? " No completion tool call detected (`gsd_task_complete`/alias)." : ""; return `Artifact verification failed: ${relPath} was not found on disk after unit execution${expected ? ` (${expected})` : ""}.${completionToolHint}`; @@ -1266,6 +1292,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV s.currentUnit.type, s.currentUnit.id, s.basePath, + s.lastUnitAgentEndMessages, ); if (attempt > MAX_ARTIFACT_VERIFICATION_RETRIES) { s.verificationRetryCount.delete(retryKey); diff --git a/src/resources/extensions/gsd/session-forensics.ts b/src/resources/extensions/gsd/session-forensics.ts index 4f9402655b..d3312cc15a 100644 --- a/src/resources/extensions/gsd/session-forensics.ts +++ b/src/resources/extensions/gsd/session-forensics.ts @@ -76,7 +76,8 @@ const READ_ONLY_TOOL_NAMES = new Set([ "skill", ]); -const READ_ONLY_EXEC_COMMAND_RE = /^\s*(cat|head|tail|ls|find|grep|rg|git\s+(status|log|show|diff|branch|remote|rev-parse|ls-files)|npm\s+(ls|list|info|view|show|outdated|audit|doctor|ping|--version|-v)|node\s+(--print|--version|-v\b)|python[23]?\s+(-c\s+'[^']*'|--version|-V\b)|jq\s|yq\s|env\b|printenv\b)/; +const UNSAFE_SHELL_TOKENS_RE = /(?:&&|\|\||;|[<>]|`|\$\(|\n)/; +const READ_ONLY_EXEC_COMMAND_RE = /^\s*(cat|head|tail|ls|find|grep|rg|git\s+(status|log|show|diff|branch|remote|rev-parse|ls-files)|npm\s+(ls|list|info|view|show|outdated|audit|doctor|ping|--version|-v)|node\s+(--version|-v\b)|python[23]?\s+(--version|-V\b)|jq|yq|env|printenv)\b[\w\s./:@,+-]*$/; function isReadOnlyReconnaissanceTool(call: ToolCall): boolean { const name = call.name.toLowerCase(); @@ -84,6 +85,7 @@ function isReadOnlyReconnaissanceTool(call: ToolCall): boolean { if (name !== "gsd_exec") return false; const command = String(call.input.command || call.input.cmd || "").trim(); if (!command) return false; + if (UNSAFE_SHELL_TOKENS_RE.test(command)) return false; return READ_ONLY_EXEC_COMMAND_RE.test(command); } diff --git a/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts b/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts index 178c4fb287..6f8a891c2d 100644 --- a/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts +++ b/src/resources/extensions/gsd/tests/auto-post-unit-artifact-diagnostic.test.ts @@ -15,3 +15,18 @@ test("missing execute-task artifact includes completion contract and completion- assert.match(msg, /Task T01 marked \[x\].*summary written/i); assert.match(msg, /No completion tool call detected \(`gsd_task_complete`\/alias\)/); }); + +test("missing execute-task artifact skips completion-tool hint when completion tool call is present", () => { + const base = mkdtempSync(join(tmpdir(), "gsd-artifact-diag-")); + const taskDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"); + mkdirSync(taskDir, { recursive: true }); + + const msg = _describeArtifactVerificationFailureForTest( + "execute-task", + "M001/S01/T01", + base, + [{ content: [{ type: "toolCall", name: "gsd_task_complete" }] }], + ); + assert.match(msg, /was not found on disk after unit execution/); + assert.doesNotMatch(msg, /No completion tool call detected \(`gsd_task_complete`\/alias\)/); +}); diff --git a/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts b/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts index 6a21930eb3..492088c872 100644 --- a/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts +++ b/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts @@ -31,3 +31,18 @@ test("classifyTraceProgress rejects mutating gsd_exec command", () => { assert.equal(result.isReadOnlyReconnaissanceOnly, false); }); +test("classifyTraceProgress rejects shell-chained gsd_exec command", () => { + const trace = traceWithToolCalls([ + { name: "gsd_exec", input: { command: "cat file && echo x > y" }, isError: false }, + ]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, false); +}); + +test("classifyTraceProgress rejects script-eval gsd_exec command", () => { + const trace = traceWithToolCalls([ + { name: "gsd_exec", input: { command: "python -c \"import pathlib; pathlib.Path('x').write_text('y')\"" }, isError: false }, + ]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, false); +}); From 37c92c78e5d15ad9ee1a901a1228aed16bf2fbf5 Mon Sep 17 00:00:00 2001 From: PR Babysitter Date: Fri, 15 May 2026 21:01:26 -0500 Subject: [PATCH 4/6] Apply PatchDeck fixes for PR #6187 --- ...-forensics-readonly-classification.test.ts | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts b/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts index 492088c872..79962252ea 100644 --- a/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts +++ b/src/resources/extensions/gsd/tests/session-forensics-readonly-classification.test.ts @@ -23,6 +23,28 @@ test("classifyTraceProgress treats skill + read-only gsd_exec as reconnaissance- assert.equal(result.isReadOnlyReconnaissanceOnly, true); }); +test("classifyTraceProgress treats skill alone as reconnaissance-only", () => { + const trace = traceWithToolCalls([ + { name: "skill", input: { name: "diagnose" }, isError: false }, + ]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, true); +}); + +test("classifyTraceProgress treats read-only gsd_exec alone as reconnaissance-only", () => { + const trace = traceWithToolCalls([ + { name: "gsd_exec", input: { command: "rg -n TODO src" }, isError: false }, + ]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, true); +}); + +test("classifyTraceProgress treats empty trace as not reconnaissance-only", () => { + const trace = traceWithToolCalls([]); + const result = classifyTraceProgress(trace); + assert.equal(result.isReadOnlyReconnaissanceOnly, false); +}); + test("classifyTraceProgress rejects mutating gsd_exec command", () => { const trace = traceWithToolCalls([ { name: "gsd_exec", input: { command: "npm run build" }, isError: false }, From de966c16f7dbefe138a094adafa8fafd341e6ea1 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Sat, 16 May 2026 02:03:30 +0000 Subject: [PATCH 5/6] =?UTF-8?q?=F0=9F=93=9D=20CodeRabbit=20Chat:=20Impleme?= =?UTF-8?q?nt=20requested=20code=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../extensions/gsd/commands-prefs-wizard.ts | 9 ++++++--- .../gsd/tests/prefs-wizard-coverage.test.ts | 14 +++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/resources/extensions/gsd/commands-prefs-wizard.ts b/src/resources/extensions/gsd/commands-prefs-wizard.ts index 1501c19e97..790065f7da 100644 --- a/src/resources/extensions/gsd/commands-prefs-wizard.ts +++ b/src/resources/extensions/gsd/commands-prefs-wizard.ts @@ -587,10 +587,13 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record = (prefs.models as Record) ?? {}; const availableModels = ctx.modelRegistry.getAvailable(); - const getAllWithDiscovered = (ctx.modelRegistry as { getAllWithDiscovered?: () => typeof availableModels }).getAllWithDiscovered; + // Call getAllWithDiscovered as a method so `this` stays bound to the + // registry — invoking a detached reference loses `this` and the method's + // internal `this.models` access throws. + const registry = ctx.modelRegistry as { getAllWithDiscovered?: () => typeof availableModels }; const availableProviders = new Set(availableModels.map((m) => m.provider)); - const selectableModels = typeof getAllWithDiscovered === "function" - ? getAllWithDiscovered().filter((m) => availableProviders.has(m.provider)) + const selectableModels = typeof registry.getAllWithDiscovered === "function" + ? registry.getAllWithDiscovered().filter((m) => availableProviders.has(m.provider)) : availableModels; if (selectableModels.length > 0) { // Group models by provider, sorted alphabetically diff --git a/src/resources/extensions/gsd/tests/prefs-wizard-coverage.test.ts b/src/resources/extensions/gsd/tests/prefs-wizard-coverage.test.ts index 0d196211ae..590433d4df 100644 --- a/src/resources/extensions/gsd/tests/prefs-wizard-coverage.test.ts +++ b/src/resources/extensions/gsd/tests/prefs-wizard-coverage.test.ts @@ -169,17 +169,25 @@ test("models wizard offers discovered models for enabled providers", async () => "(keep current)", ]; const ctx = { + // `getAllWithDiscovered` reads `this._all` so the wizard must call it as a + // method — invoking a detached reference would lose `this` and throw, + // mirroring the real ModelRegistry implementation. modelRegistry: { - getAvailable: () => [{ provider: "local", id: "baseline-model" }], - getAllWithDiscovered: () => [ + _all: [ { provider: "local", id: "baseline-model" }, { provider: "local", id: "discovered-model" }, { provider: "disabled", id: "hidden-model" }, ], + getAvailable() { + return [{ provider: "local", id: "baseline-model" }]; + }, + getAllWithDiscovered() { + return this._all; + }, }, ui: { notify() {}, - select: async (label: string, options: string[]) => { + const choice = choices.shift(); if (!choice && label === "GSD Preferences") return "── Save & Exit ──"; if (!choice && options.includes("(keep current)")) return "(keep current)"; From 3dfb7a4f5bd97761c0485ba01c5bfa0b82835d98 Mon Sep 17 00:00:00 2001 From: PR Babysitter Date: Fri, 15 May 2026 22:57:22 -0500 Subject: [PATCH 6/6] Apply PatchDeck fixes for PR #6187 --- src/resources/extensions/gsd/commands-prefs-wizard.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resources/extensions/gsd/commands-prefs-wizard.ts b/src/resources/extensions/gsd/commands-prefs-wizard.ts index 790065f7da..a54cd5e681 100644 --- a/src/resources/extensions/gsd/commands-prefs-wizard.ts +++ b/src/resources/extensions/gsd/commands-prefs-wizard.ts @@ -589,7 +589,7 @@ async function configureModels(ctx: ExtensionCommandContext, prefs: Record typeof availableModels }; const availableProviders = new Set(availableModels.map((m) => m.provider)); const selectableModels = typeof registry.getAllWithDiscovered === "function"