Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
270 changes: 270 additions & 0 deletions src/__tests__/history-resume-delegation.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ describe("History resume delegation - parent metadata transitions", () => {
it("reopenParentFromDelegation injects tool_result when new_task tool_use exists in API history", async () => {
const provider = {
contextProxy: { globalStorageUri: { fsPath: "/storage" } },
log: vi.fn(),
getTaskWithId: vi.fn().mockResolvedValue({
historyItem: {
id: "p-tool",
Expand Down Expand Up @@ -541,4 +542,273 @@ describe("History resume delegation - parent metadata transitions", () => {
}),
)
})

it("reopenParentFromDelegation skips duplicate tool_result when one already exists in history (EXT-665)", async () => {
const logSpy = vi.fn()
const provider = {
contextProxy: { globalStorageUri: { fsPath: "/storage" } },
log: logSpy,
getTaskWithId: vi.fn().mockResolvedValue({
historyItem: {
id: "p-dup",
status: "delegated",
awaitingChildId: "c-dup",
childIds: [],
ts: 100,
task: "Parent with existing tool_result",
tokensIn: 0,
tokensOut: 0,
totalCost: 0,
},
}),
emit: vi.fn(),
getCurrentTask: vi.fn(() => ({ taskId: "c-dup" })),
removeClineFromStack: vi.fn().mockResolvedValue(undefined),
createTaskWithHistoryItem: vi.fn().mockResolvedValue({
taskId: "p-dup",
resumeAfterDelegation: vi.fn().mockResolvedValue(undefined),
overwriteClineMessages: vi.fn().mockResolvedValue(undefined),
overwriteApiConversationHistory: vi.fn().mockResolvedValue(undefined),
}),
updateTaskHistory: vi.fn().mockResolvedValue([]),
} as unknown as ClineProvider

// Simulate the bug scenario: API history already has a tool_result for the same tool_use_id
// This can happen if the tool was interrupted and the result was already added
const existingToolUseId = "toolu_01SnH3c7xgVdfLc2md4Fk6yB"
const existingUiMessages = [{ type: "ask", ask: "tool", text: "new_task request", ts: 50 }]
const existingApiMessages = [
{ role: "user", content: [{ type: "text", text: "Create a subtask" }], ts: 40 },
{
role: "assistant",
content: [
{
type: "tool_use",
name: "new_task",
id: existingToolUseId,
input: { mode: "code", message: "Do something" },
},
],
ts: 50,
},
// This tool_result already exists from a previous operation (e.g., interruption)
{
role: "user",
content: [
{
type: "tool_result",
tool_use_id: existingToolUseId,
content: "Tool execution was interrupted before completion.",
},
],
ts: 60,
},
]

vi.mocked(readTaskMessages).mockResolvedValue(existingUiMessages as any)
vi.mocked(readApiMessages).mockResolvedValue(existingApiMessages as any)

await (ClineProvider.prototype as any).reopenParentFromDelegation.call(provider, {
parentTaskId: "p-dup",
childTaskId: "c-dup",
completionResultSummary: "Subtask completed successfully",
})

// Verify that we logged the skip message
expect(logSpy).toHaveBeenCalledWith(
expect.stringContaining(`Skipping duplicate tool_result for tool_use_id: ${existingToolUseId}`),
)

// Verify API history was saved WITHOUT a new tool_result (should still have exactly 3 messages)
const apiCall = vi.mocked(saveApiMessages).mock.calls[0][0]
expect(apiCall.messages).toHaveLength(3) // Original 3 messages, no new one added

// Count tool_result blocks - should only be 1 (the existing one)
const toolResultBlocks = apiCall.messages.flatMap((msg: any) =>
msg.role === "user" && Array.isArray(msg.content)
? msg.content.filter((block: any) => block.type === "tool_result")
: [],
)
expect(toolResultBlocks).toHaveLength(1)
expect(toolResultBlocks[0].tool_use_id).toBe(existingToolUseId)
})

it("reopenParentFromDelegation adds tool_result when none exists for the tool_use_id (normal case)", async () => {
const logSpy = vi.fn()
const provider = {
contextProxy: { globalStorageUri: { fsPath: "/storage" } },
log: logSpy,
getTaskWithId: vi.fn().mockResolvedValue({
historyItem: {
id: "p-new",
status: "delegated",
awaitingChildId: "c-new",
childIds: [],
ts: 100,
task: "Parent without tool_result yet",
tokensIn: 0,
tokensOut: 0,
totalCost: 0,
},
}),
emit: vi.fn(),
getCurrentTask: vi.fn(() => ({ taskId: "c-new" })),
removeClineFromStack: vi.fn().mockResolvedValue(undefined),
createTaskWithHistoryItem: vi.fn().mockResolvedValue({
taskId: "p-new",
resumeAfterDelegation: vi.fn().mockResolvedValue(undefined),
overwriteClineMessages: vi.fn().mockResolvedValue(undefined),
overwriteApiConversationHistory: vi.fn().mockResolvedValue(undefined),
}),
updateTaskHistory: vi.fn().mockResolvedValue([]),
} as unknown as ClineProvider

// Normal case: API history has tool_use but no tool_result yet
const toolUseId = "toolu_normal123"
const existingUiMessages = [{ type: "ask", ask: "tool", text: "new_task request", ts: 50 }]
const existingApiMessages = [
{ role: "user", content: [{ type: "text", text: "Create a subtask" }], ts: 40 },
{
role: "assistant",
content: [
{
type: "tool_use",
name: "new_task",
id: toolUseId,
input: { mode: "code", message: "Do something" },
},
],
ts: 50,
},
]

vi.mocked(readTaskMessages).mockResolvedValue(existingUiMessages as any)
vi.mocked(readApiMessages).mockResolvedValue(existingApiMessages as any)

await (ClineProvider.prototype as any).reopenParentFromDelegation.call(provider, {
parentTaskId: "p-new",
childTaskId: "c-new",
completionResultSummary: "Subtask completed successfully",
})

// Verify that we did NOT log a skip message
expect(logSpy).not.toHaveBeenCalledWith(expect.stringContaining("Skipping duplicate tool_result"))

// Verify API history was saved WITH a new tool_result (should have 3 messages now)
const apiCall = vi.mocked(saveApiMessages).mock.calls[0][0]
expect(apiCall.messages).toHaveLength(3)

// The last message should be the new tool_result
const lastMsg = apiCall.messages[2]
expect(lastMsg.role).toBe("user")
expect((lastMsg.content[0] as any).type).toBe("tool_result")
expect((lastMsg.content[0] as any).tool_use_id).toBe(toolUseId)
expect((lastMsg.content[0] as any).content).toContain("Subtask c-new completed")
})

it("reopenParentFromDelegation appends to existing user message when multiple tools in one turn (EXT-665 root cause)", async () => {
// This tests the actual root cause of EXT-665:
// When assistant calls multiple tools (e.g., update_todo_list + new_task) in one turn,
// flushPendingToolResultsToHistory saves a user message with update_todo_list tool_result.
// When child completes, we must APPEND new_task tool_result to that existing message,
// NOT create a new message (which would cause validateAndFixToolResultIds to generate
// a placeholder for update_todo_list since the new message only has new_task).
const logSpy = vi.fn()
const provider = {
contextProxy: { globalStorageUri: { fsPath: "/storage" } },
log: logSpy,
getTaskWithId: vi.fn().mockResolvedValue({
historyItem: {
id: "p-multi",
status: "delegated",
awaitingChildId: "c-multi",
childIds: [],
ts: 100,
task: "Parent with multiple tools in one turn",
tokensIn: 0,
tokensOut: 0,
totalCost: 0,
},
}),
emit: vi.fn(),
getCurrentTask: vi.fn(() => ({ taskId: "c-multi" })),
removeClineFromStack: vi.fn().mockResolvedValue(undefined),
createTaskWithHistoryItem: vi.fn().mockResolvedValue({
taskId: "p-multi",
resumeAfterDelegation: vi.fn().mockResolvedValue(undefined),
overwriteClineMessages: vi.fn().mockResolvedValue(undefined),
overwriteApiConversationHistory: vi.fn().mockResolvedValue(undefined),
}),
updateTaskHistory: vi.fn().mockResolvedValue([]),
} as unknown as ClineProvider

// Simulate the EXT-665 scenario:
// - Assistant calls update_todo_list (id=A) AND new_task (id=B) in one turn
// - flushPendingToolResultsToHistory saved tool_result for update_todo_list
const updateTodoListToolId = "toolu_update123"
const newTaskToolId = "toolu_newtask456"
const existingUiMessages = [{ type: "ask", ask: "tool", text: "tools", ts: 50 }]
const existingApiMessages = [
{ role: "user", content: [{ type: "text", text: "Do the work" }], ts: 40 },
{
role: "assistant",
content: [
{
type: "tool_use",
name: "update_todo_list",
id: updateTodoListToolId,
input: { todos: "[ ] Task 1" },
},
{
type: "tool_use",
name: "new_task",
id: newTaskToolId,
input: { mode: "code", message: "Do subtask" },
},
],
ts: 50,
},
// This user message was saved by flushPendingToolResultsToHistory during delegation
// It has tool_result for update_todo_list, but NOT for new_task yet
{
role: "user",
content: [
{
type: "tool_result",
tool_use_id: updateTodoListToolId,
content: "Delegating to subtask...",
},
],
ts: 60,
},
]

vi.mocked(readTaskMessages).mockResolvedValue(existingUiMessages as any)
vi.mocked(readApiMessages).mockResolvedValue(existingApiMessages as any)

await (ClineProvider.prototype as any).reopenParentFromDelegation.call(provider, {
parentTaskId: "p-multi",
childTaskId: "c-multi",
completionResultSummary: "Subtask completed",
})

// Verify that we logged APPEND (not CREATE)
expect(logSpy).toHaveBeenCalledWith(
expect.stringContaining("Appended new_task tool_result to existing user message"),
)

// Verify API history still has exactly 3 messages (no new message created)
const apiCall = vi.mocked(saveApiMessages).mock.calls[0][0]
expect(apiCall.messages).toHaveLength(3)

// Verify the last user message now has BOTH tool_results
const lastUserMsg = apiCall.messages[2]
expect(lastUserMsg.role).toBe("user")
expect(lastUserMsg.content).toHaveLength(2) // update_todo_list + new_task

// Verify both tool_results are present
const toolResults = (lastUserMsg.content as any[]).filter((b: any) => b.type === "tool_result")
expect(toolResults).toHaveLength(2)
expect(toolResults.map((t: any) => t.tool_use_id).sort()).toEqual([updateTodoListToolId, newTaskToolId].sort())
})
})
13 changes: 10 additions & 3 deletions src/core/tools/NewTaskTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,23 @@ export class NewTaskTool extends BaseTool<"new_task"> {
return
}

// IMPORTANT: Push the tool_result BEFORE delegation, because delegateParentAndOpenChild
// disposes the parent task. If we push after, the tool_result is lost and
// flushPendingToolResultsToHistory will generate a placeholder "interrupted" tool_result,
// causing duplicate tool_results when the child completes (EXT-665).
//
// The child taskId isn't known yet, so we use a generic message. The actual completion
// result will be injected by reopenParentFromDelegation when the child completes.
pushToolResult(`Delegating to subtask...`)

// Delegate parent and open child as sole active task
const child = await (provider as any).delegateParentAndOpenChild({
await (provider as any).delegateParentAndOpenChild({
parentTaskId: task.taskId,
message: unescapedMessage,
initialTodos: todoItems,
mode,
})

// Reflect delegation in tool result (no pause/unpause, no wait)
pushToolResult(`Delegated to child task ${child.taskId}`)
return
} catch (error) {
await handleError("creating new task", error)
Expand Down
16 changes: 8 additions & 8 deletions src/core/tools/__tests__/newTaskTool.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ describe("newTaskTool", () => {
)

// Verify side effects
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task"))
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})

it("should not un-escape single escaped \@", async () => {
Expand Down Expand Up @@ -280,7 +280,7 @@ describe("newTaskTool", () => {
expect(mockStartSubtask).toHaveBeenCalledWith("Test message", [], "code")

// Should complete successfully
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task"))
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})

it("should work with todos parameter when provided", async () => {
Expand Down Expand Up @@ -311,7 +311,7 @@ describe("newTaskTool", () => {
"code",
)

expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task"))
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})

it("should error when mode parameter is missing", async () => {
Expand Down Expand Up @@ -423,7 +423,7 @@ describe("newTaskTool", () => {
expect(mockStartSubtask).toHaveBeenCalledWith("Test message", [], "code")

// Should complete successfully
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task"))
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})

it("should REQUIRE todos when VSCode setting is enabled", async () => {
Expand Down Expand Up @@ -501,7 +501,7 @@ describe("newTaskTool", () => {
)

// Should complete successfully
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task"))
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})

it("should work with empty todos string when VSCode setting is enabled", async () => {
Expand Down Expand Up @@ -536,7 +536,7 @@ describe("newTaskTool", () => {
expect(mockStartSubtask).toHaveBeenCalledWith("Test message", [], "code")

// Should complete successfully
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task"))
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})

it("should check VSCode setting with Package.name configuration key", async () => {
Expand Down Expand Up @@ -671,7 +671,7 @@ describe("newTaskTool delegation flow", () => {
)
expect(pauseEvents.length).toBe(0)

// Assert: tool result reflects delegation
expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task child-1"))
// Assert: tool result reflects delegation (pushed BEFORE delegation, so no child ID yet)
expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...")
})
})
Loading
Loading