Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions src/resources/extensions/gsd/auto-post-unit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -442,19 +442,49 @@ function artifactValidationKind(unitType: string): "project" | "requirements" |
return null;
}

function describeArtifactVerificationFailure(unitType: string, unitId: string, basePath: string): string {
const TASK_COMPLETION_TOOL_NAMES = new Set(["gsd_task_complete", "gsd_complete_task"]);

function hasTaskCompletionToolCall(agentEndMessages?: unknown[] | null): boolean {
if (!Array.isArray(agentEndMessages)) return false;
for (const rawMessage of agentEndMessages) {
if (!rawMessage || typeof rawMessage !== "object") continue;
const message = rawMessage as { content?: unknown };
if (!Array.isArray(message.content)) continue;
for (const rawPart of message.content) {
if (!rawPart || typeof rawPart !== "object") continue;
const part = rawPart as { type?: unknown; name?: unknown };
if (part.type !== "toolCall") continue;
const name = String(part.name ?? "").toLowerCase();
if (TASK_COMPLETION_TOOL_NAMES.has(name)) {
return true;
}
}
}
return false;
}

function describeArtifactVerificationFailure(
unitType: string,
unitId: string,
basePath: string,
agentEndMessages?: unknown[] | null,
): string {
const worktreeFailure = diagnoseWorktreeIntegrityFailure(basePath);
if (worktreeFailure) {
return `${worktreeFailure} Unit: ${unitType} ${unitId}.`;
}

const artifactPath = resolveExpectedArtifactPath(unitType, unitId, basePath);
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
if (!artifactPath) {
return `Artifact verification failed: ${unitType} "${unitId}" has no resolvable artifact path.`;
}
const relPath = relative(basePath, artifactPath);
if (!existsSync(artifactPath)) {
return `Artifact verification failed: ${relPath} was not found on disk after unit execution.`;
const completionToolHint = unitType === "execute-task" && !hasTaskCompletionToolCall(agentEndMessages)
? " No completion tool call detected (`gsd_task_complete`/alias)."
: "";
return `Artifact verification failed: ${relPath} was not found on disk after unit execution${expected ? ` (${expected})` : ""}.${completionToolHint}`;
}

const validationKind = artifactValidationKind(unitType);
Expand All @@ -469,9 +499,9 @@ function describeArtifactVerificationFailure(unitType: string, unitId: string, b
}
}

const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
return `Artifact verification failed: ${relPath} exists but did not satisfy the ${unitType} completion contract${expected ? ` (${expected})` : ""}.`;
}
export const _describeArtifactVerificationFailureForTest = describeArtifactVerificationFailure;

export async function autoCommitUnit(
basePath: string,
Expand Down Expand Up @@ -1262,6 +1292,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
s.currentUnit.type,
s.currentUnit.id,
s.basePath,
s.lastUnitAgentEndMessages,
);
if (attempt > MAX_ARTIFACT_VERIFICATION_RETRIES) {
s.verificationRetryCount.delete(retryKey);
Expand Down
36 changes: 35 additions & 1 deletion src/resources/extensions/gsd/session-forensics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,41 @@ export interface RecoveryBriefing {
prompt: string;
}

const READ_ONLY_TOOL_NAMES = new Set([
"read",
"memory_query",
"gsd_resume",
"gsd_exec_search",
"grep",
"find",
"ls",
"glob",
"skill",
]);

const UNSAFE_SHELL_TOKENS_RE = /(?:&&|\|\||;|[<>]|`|\$\(|\n)/;
const READ_ONLY_EXEC_COMMAND_RE = /^\s*(cat|head|tail|ls|find|grep|rg|git\s+(status|log|show|diff|branch|remote|rev-parse|ls-files)|npm\s+(ls|list|info|view|show|outdated|audit|doctor|ping|--version|-v)|node\s+(--version|-v\b)|python[23]?\s+(--version|-V\b)|jq|yq|env|printenv)\b[\w\s./:@,+-]*$/;

function isReadOnlyReconnaissanceTool(call: ToolCall): boolean {
const name = call.name.toLowerCase();
if (READ_ONLY_TOOL_NAMES.has(name)) return true;
if (name !== "gsd_exec") return false;
const command = String(call.input.command || call.input.cmd || "").trim();
if (!command) return false;
if (UNSAFE_SHELL_TOKENS_RE.test(command)) return false;
return READ_ONLY_EXEC_COMMAND_RE.test(command);
}

export function classifyTraceProgress(trace: ExecutionTrace): { isReadOnlyReconnaissanceOnly: boolean } {
if (trace.toolCalls.length === 0) return { isReadOnlyReconnaissanceOnly: false };
for (const call of trace.toolCalls) {
if (!isReadOnlyReconnaissanceTool(call)) {
return { isReadOnlyReconnaissanceOnly: false };
}
}
return { isReadOnlyReconnaissanceOnly: true };
}

// ─── JSONL Parsing ────────────────────────────────────────────────────────────
// MAX_JSONL_BYTES and parseJSONL are imported from ./jsonl-utils.js

Expand Down Expand Up @@ -543,4 +578,3 @@ function findLast<T>(arr: T[], predicate: (item: T) => boolean): T | undefined {
}
return undefined;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import test from "node:test";
import assert from "node:assert/strict";
import { mkdtempSync, mkdirSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { _describeArtifactVerificationFailureForTest } from "../auto-post-unit.ts";

test("missing execute-task artifact includes completion contract and completion-tool hint", () => {
const base = mkdtempSync(join(tmpdir(), "gsd-artifact-diag-"));
const taskDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(taskDir, { recursive: true });

const msg = _describeArtifactVerificationFailureForTest("execute-task", "M001/S01/T01", base);
assert.match(msg, /was not found on disk after unit execution/);
assert.match(msg, /Task T01 marked \[x\].*summary written/i);
assert.match(msg, /No completion tool call detected \(`gsd_task_complete`\/alias\)/);
});

test("missing execute-task artifact skips completion-tool hint when completion tool call is present", () => {
const base = mkdtempSync(join(tmpdir(), "gsd-artifact-diag-"));
const taskDir = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks");
mkdirSync(taskDir, { recursive: true });

const msg = _describeArtifactVerificationFailureForTest(
"execute-task",
"M001/S01/T01",
base,
[{ content: [{ type: "toolCall", name: "gsd_task_complete" }] }],
);
assert.match(msg, /was not found on disk after unit execution/);
assert.doesNotMatch(msg, /No completion tool call detected \(`gsd_task_complete`\/alias\)/);
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import test from "node:test";
import assert from "node:assert/strict";
import { classifyTraceProgress, type ExecutionTrace } from "../session-forensics.ts";

function traceWithToolCalls(toolCalls: ExecutionTrace["toolCalls"]): ExecutionTrace {
return {
toolCalls,
filesWritten: [],
filesRead: [],
commandsRun: [],
errors: [],
lastReasoning: "",
toolCallCount: toolCalls.length,
};
}

test("classifyTraceProgress treats skill + read-only gsd_exec as reconnaissance-only", () => {
const trace = traceWithToolCalls([
{ name: "skill", input: { name: "diagnose" }, isError: false },
{ name: "gsd_exec", input: { command: "rg -n TODO src" }, isError: false },
]);
const result = classifyTraceProgress(trace);
assert.equal(result.isReadOnlyReconnaissanceOnly, true);
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

test("classifyTraceProgress rejects mutating gsd_exec command", () => {
const trace = traceWithToolCalls([
{ name: "gsd_exec", input: { command: "npm run build" }, isError: false },
]);
const result = classifyTraceProgress(trace);
assert.equal(result.isReadOnlyReconnaissanceOnly, false);
});

test("classifyTraceProgress rejects shell-chained gsd_exec command", () => {
const trace = traceWithToolCalls([
{ name: "gsd_exec", input: { command: "cat file && echo x > y" }, isError: false },
]);
const result = classifyTraceProgress(trace);
assert.equal(result.isReadOnlyReconnaissanceOnly, false);
});

test("classifyTraceProgress rejects script-eval gsd_exec command", () => {
const trace = traceWithToolCalls([
{ name: "gsd_exec", input: { command: "python -c \"import pathlib; pathlib.Path('x').write_text('y')\"" }, isError: false },
]);
const result = classifyTraceProgress(trace);
assert.equal(result.isReadOnlyReconnaissanceOnly, false);
});
Loading