diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index 8a0b40c82e..d142b097ee 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -1033,7 +1033,6 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV if (safetyConfig.evidence_cross_reference && s.currentUnit.type === "execute-task") { try { const actual = getEvidence(); - const bashCalls = actual.filter(e => e.kind === "bash"); if (sMid && sSid && sTid && isDbAvailable()) { const taskRow = getTask(sMid, sSid, sTid); if (taskRow?.status === "complete") { @@ -1056,10 +1055,13 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV } } - if (claimedEvidence.length > 0 && bashCalls.length === 0) { - logWarning("safety", "task claimed verification command evidence but no execution tool calls were recorded"); + const missingCommandMismatches = mismatches.filter((mismatch) => ( + mismatch.severity === "warning" && mismatch.actual === null + )); + if (missingCommandMismatches.length > 0) { + logWarning("safety", `evidence mismatch: ${missingCommandMismatches.length} claimed command(s) not found in bash calls`); ctx.ui.notify( - `Safety: task ${sTid} claimed command evidence but no execution tool calls were recorded`, + `Safety: task ${sTid} claimed ${missingCommandMismatches.length} command(s) not found in recorded bash calls`, "warning", ); } diff --git a/src/resources/extensions/gsd/tests/evidence-cross-ref.test.ts b/src/resources/extensions/gsd/tests/evidence-cross-ref.test.ts index 2818db6c10..42d28b7b6f 100644 --- a/src/resources/extensions/gsd/tests/evidence-cross-ref.test.ts +++ b/src/resources/extensions/gsd/tests/evidence-cross-ref.test.ts @@ -95,3 +95,27 @@ test("missing recorded bash evidence remains a warning", () => { assert.equal(mismatches.length, 1); assert.equal(mismatches[0].severity, "warning"); }); + +test("claimed command absent from bash calls reports a warning mismatch with null actual", () => { + // Regression: postUnitPreVerification flags fabricated evidence by filtering + // crossReferenceEvidence mismatches on `severity === "warning" && actual === null`. + // A claimed command with no matching bash call must produce exactly that shape, + // otherwise fabricated evidence silently bypasses the safety check. + const mismatches = crossReferenceEvidence( + [{ command: "npm run verify", exitCode: 0, verdict: "passed" }], + [ + { + kind: "bash", + toolCallId: "call-1", + command: "ls -la", + exitCode: 0, + outputSnippet: "files", + timestamp: Date.now(), + }, + ] as EvidenceEntry[], + ); + + const missing = mismatches.filter((m) => m.severity === "warning" && m.actual === null); + assert.equal(missing.length, 1); + assert.equal(missing[0].actual, null); +});