diff --git a/src/__tests__/history-resume-delegation.spec.ts b/src/__tests__/history-resume-delegation.spec.ts index f3256bd1432..833af2338ff 100644 --- a/src/__tests__/history-resume-delegation.spec.ts +++ b/src/__tests__/history-resume-delegation.spec.ts @@ -205,6 +205,7 @@ describe("History resume delegation - parent metadata transitions", () => { it("reopenParentFromDelegation injects tool_result when new_task tool_use exists in API history", async () => { const provider = { contextProxy: { globalStorageUri: { fsPath: "/storage" } }, + log: vi.fn(), getTaskWithId: vi.fn().mockResolvedValue({ historyItem: { id: "p-tool", @@ -541,4 +542,273 @@ describe("History resume delegation - parent metadata transitions", () => { }), ) }) + + it("reopenParentFromDelegation skips duplicate tool_result when one already exists in history (EXT-665)", async () => { + const logSpy = vi.fn() + const provider = { + contextProxy: { globalStorageUri: { fsPath: "/storage" } }, + log: logSpy, + getTaskWithId: vi.fn().mockResolvedValue({ + historyItem: { + id: "p-dup", + status: "delegated", + awaitingChildId: "c-dup", + childIds: [], + ts: 100, + task: "Parent with existing tool_result", + tokensIn: 0, + tokensOut: 0, + totalCost: 0, + }, + }), + emit: vi.fn(), + getCurrentTask: vi.fn(() => ({ taskId: "c-dup" })), + removeClineFromStack: vi.fn().mockResolvedValue(undefined), + createTaskWithHistoryItem: vi.fn().mockResolvedValue({ + taskId: "p-dup", + resumeAfterDelegation: vi.fn().mockResolvedValue(undefined), + overwriteClineMessages: vi.fn().mockResolvedValue(undefined), + overwriteApiConversationHistory: vi.fn().mockResolvedValue(undefined), + }), + updateTaskHistory: vi.fn().mockResolvedValue([]), + } as unknown as ClineProvider + + // Simulate the bug scenario: API history already has a tool_result for the same tool_use_id + // This can happen if the tool was interrupted and the result was already added + const existingToolUseId = "toolu_01SnH3c7xgVdfLc2md4Fk6yB" + const existingUiMessages = [{ type: "ask", ask: "tool", text: "new_task request", ts: 50 }] + const existingApiMessages = [ + { role: "user", content: [{ type: "text", text: "Create a subtask" }], ts: 40 }, + { + role: "assistant", + content: [ + { + type: "tool_use", + name: "new_task", + id: existingToolUseId, + input: { mode: "code", message: "Do something" }, + }, + ], + ts: 50, + }, + // This tool_result already exists from a previous operation (e.g., interruption) + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: existingToolUseId, + content: "Tool execution was interrupted before completion.", + }, + ], + ts: 60, + }, + ] + + vi.mocked(readTaskMessages).mockResolvedValue(existingUiMessages as any) + vi.mocked(readApiMessages).mockResolvedValue(existingApiMessages as any) + + await (ClineProvider.prototype as any).reopenParentFromDelegation.call(provider, { + parentTaskId: "p-dup", + childTaskId: "c-dup", + completionResultSummary: "Subtask completed successfully", + }) + + // Verify that we logged the skip message + expect(logSpy).toHaveBeenCalledWith( + expect.stringContaining(`Skipping duplicate tool_result for tool_use_id: ${existingToolUseId}`), + ) + + // Verify API history was saved WITHOUT a new tool_result (should still have exactly 3 messages) + const apiCall = vi.mocked(saveApiMessages).mock.calls[0][0] + expect(apiCall.messages).toHaveLength(3) // Original 3 messages, no new one added + + // Count tool_result blocks - should only be 1 (the existing one) + const toolResultBlocks = apiCall.messages.flatMap((msg: any) => + msg.role === "user" && Array.isArray(msg.content) + ? msg.content.filter((block: any) => block.type === "tool_result") + : [], + ) + expect(toolResultBlocks).toHaveLength(1) + expect(toolResultBlocks[0].tool_use_id).toBe(existingToolUseId) + }) + + it("reopenParentFromDelegation adds tool_result when none exists for the tool_use_id (normal case)", async () => { + const logSpy = vi.fn() + const provider = { + contextProxy: { globalStorageUri: { fsPath: "/storage" } }, + log: logSpy, + getTaskWithId: vi.fn().mockResolvedValue({ + historyItem: { + id: "p-new", + status: "delegated", + awaitingChildId: "c-new", + childIds: [], + ts: 100, + task: "Parent without tool_result yet", + tokensIn: 0, + tokensOut: 0, + totalCost: 0, + }, + }), + emit: vi.fn(), + getCurrentTask: vi.fn(() => ({ taskId: "c-new" })), + removeClineFromStack: vi.fn().mockResolvedValue(undefined), + createTaskWithHistoryItem: vi.fn().mockResolvedValue({ + taskId: "p-new", + resumeAfterDelegation: vi.fn().mockResolvedValue(undefined), + overwriteClineMessages: vi.fn().mockResolvedValue(undefined), + overwriteApiConversationHistory: vi.fn().mockResolvedValue(undefined), + }), + updateTaskHistory: vi.fn().mockResolvedValue([]), + } as unknown as ClineProvider + + // Normal case: API history has tool_use but no tool_result yet + const toolUseId = "toolu_normal123" + const existingUiMessages = [{ type: "ask", ask: "tool", text: "new_task request", ts: 50 }] + const existingApiMessages = [ + { role: "user", content: [{ type: "text", text: "Create a subtask" }], ts: 40 }, + { + role: "assistant", + content: [ + { + type: "tool_use", + name: "new_task", + id: toolUseId, + input: { mode: "code", message: "Do something" }, + }, + ], + ts: 50, + }, + ] + + vi.mocked(readTaskMessages).mockResolvedValue(existingUiMessages as any) + vi.mocked(readApiMessages).mockResolvedValue(existingApiMessages as any) + + await (ClineProvider.prototype as any).reopenParentFromDelegation.call(provider, { + parentTaskId: "p-new", + childTaskId: "c-new", + completionResultSummary: "Subtask completed successfully", + }) + + // Verify that we did NOT log a skip message + expect(logSpy).not.toHaveBeenCalledWith(expect.stringContaining("Skipping duplicate tool_result")) + + // Verify API history was saved WITH a new tool_result (should have 3 messages now) + const apiCall = vi.mocked(saveApiMessages).mock.calls[0][0] + expect(apiCall.messages).toHaveLength(3) + + // The last message should be the new tool_result + const lastMsg = apiCall.messages[2] + expect(lastMsg.role).toBe("user") + expect((lastMsg.content[0] as any).type).toBe("tool_result") + expect((lastMsg.content[0] as any).tool_use_id).toBe(toolUseId) + expect((lastMsg.content[0] as any).content).toContain("Subtask c-new completed") + }) + + it("reopenParentFromDelegation appends to existing user message when multiple tools in one turn (EXT-665 root cause)", async () => { + // This tests the actual root cause of EXT-665: + // When assistant calls multiple tools (e.g., update_todo_list + new_task) in one turn, + // flushPendingToolResultsToHistory saves a user message with update_todo_list tool_result. + // When child completes, we must APPEND new_task tool_result to that existing message, + // NOT create a new message (which would cause validateAndFixToolResultIds to generate + // a placeholder for update_todo_list since the new message only has new_task). + const logSpy = vi.fn() + const provider = { + contextProxy: { globalStorageUri: { fsPath: "/storage" } }, + log: logSpy, + getTaskWithId: vi.fn().mockResolvedValue({ + historyItem: { + id: "p-multi", + status: "delegated", + awaitingChildId: "c-multi", + childIds: [], + ts: 100, + task: "Parent with multiple tools in one turn", + tokensIn: 0, + tokensOut: 0, + totalCost: 0, + }, + }), + emit: vi.fn(), + getCurrentTask: vi.fn(() => ({ taskId: "c-multi" })), + removeClineFromStack: vi.fn().mockResolvedValue(undefined), + createTaskWithHistoryItem: vi.fn().mockResolvedValue({ + taskId: "p-multi", + resumeAfterDelegation: vi.fn().mockResolvedValue(undefined), + overwriteClineMessages: vi.fn().mockResolvedValue(undefined), + overwriteApiConversationHistory: vi.fn().mockResolvedValue(undefined), + }), + updateTaskHistory: vi.fn().mockResolvedValue([]), + } as unknown as ClineProvider + + // Simulate the EXT-665 scenario: + // - Assistant calls update_todo_list (id=A) AND new_task (id=B) in one turn + // - flushPendingToolResultsToHistory saved tool_result for update_todo_list + const updateTodoListToolId = "toolu_update123" + const newTaskToolId = "toolu_newtask456" + const existingUiMessages = [{ type: "ask", ask: "tool", text: "tools", ts: 50 }] + const existingApiMessages = [ + { role: "user", content: [{ type: "text", text: "Do the work" }], ts: 40 }, + { + role: "assistant", + content: [ + { + type: "tool_use", + name: "update_todo_list", + id: updateTodoListToolId, + input: { todos: "[ ] Task 1" }, + }, + { + type: "tool_use", + name: "new_task", + id: newTaskToolId, + input: { mode: "code", message: "Do subtask" }, + }, + ], + ts: 50, + }, + // This user message was saved by flushPendingToolResultsToHistory during delegation + // It has tool_result for update_todo_list, but NOT for new_task yet + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: updateTodoListToolId, + content: "Delegating to subtask...", + }, + ], + ts: 60, + }, + ] + + vi.mocked(readTaskMessages).mockResolvedValue(existingUiMessages as any) + vi.mocked(readApiMessages).mockResolvedValue(existingApiMessages as any) + + await (ClineProvider.prototype as any).reopenParentFromDelegation.call(provider, { + parentTaskId: "p-multi", + childTaskId: "c-multi", + completionResultSummary: "Subtask completed", + }) + + // Verify that we logged APPEND (not CREATE) + expect(logSpy).toHaveBeenCalledWith( + expect.stringContaining("Appended new_task tool_result to existing user message"), + ) + + // Verify API history still has exactly 3 messages (no new message created) + const apiCall = vi.mocked(saveApiMessages).mock.calls[0][0] + expect(apiCall.messages).toHaveLength(3) + + // Verify the last user message now has BOTH tool_results + const lastUserMsg = apiCall.messages[2] + expect(lastUserMsg.role).toBe("user") + expect(lastUserMsg.content).toHaveLength(2) // update_todo_list + new_task + + // Verify both tool_results are present + const toolResults = (lastUserMsg.content as any[]).filter((b: any) => b.type === "tool_result") + expect(toolResults).toHaveLength(2) + expect(toolResults.map((t: any) => t.tool_use_id).sort()).toEqual([updateTodoListToolId, newTaskToolId].sort()) + }) }) diff --git a/src/core/tools/NewTaskTool.ts b/src/core/tools/NewTaskTool.ts index f36d8e1e379..bc95d9edb6c 100644 --- a/src/core/tools/NewTaskTool.ts +++ b/src/core/tools/NewTaskTool.ts @@ -109,16 +109,23 @@ export class NewTaskTool extends BaseTool<"new_task"> { return } + // IMPORTANT: Push the tool_result BEFORE delegation, because delegateParentAndOpenChild + // disposes the parent task. If we push after, the tool_result is lost and + // flushPendingToolResultsToHistory will generate a placeholder "interrupted" tool_result, + // causing duplicate tool_results when the child completes (EXT-665). + // + // The child taskId isn't known yet, so we use a generic message. The actual completion + // result will be injected by reopenParentFromDelegation when the child completes. + pushToolResult(`Delegating to subtask...`) + // Delegate parent and open child as sole active task - const child = await (provider as any).delegateParentAndOpenChild({ + await (provider as any).delegateParentAndOpenChild({ parentTaskId: task.taskId, message: unescapedMessage, initialTodos: todoItems, mode, }) - // Reflect delegation in tool result (no pause/unpause, no wait) - pushToolResult(`Delegated to child task ${child.taskId}`) return } catch (error) { await handleError("creating new task", error) diff --git a/src/core/tools/__tests__/newTaskTool.spec.ts b/src/core/tools/__tests__/newTaskTool.spec.ts index fc383c13eef..5bce727391f 100644 --- a/src/core/tools/__tests__/newTaskTool.spec.ts +++ b/src/core/tools/__tests__/newTaskTool.spec.ts @@ -175,7 +175,7 @@ describe("newTaskTool", () => { ) // Verify side effects - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task")) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) it("should not un-escape single escaped \@", async () => { @@ -280,7 +280,7 @@ describe("newTaskTool", () => { expect(mockStartSubtask).toHaveBeenCalledWith("Test message", [], "code") // Should complete successfully - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task")) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) it("should work with todos parameter when provided", async () => { @@ -311,7 +311,7 @@ describe("newTaskTool", () => { "code", ) - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task")) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) it("should error when mode parameter is missing", async () => { @@ -423,7 +423,7 @@ describe("newTaskTool", () => { expect(mockStartSubtask).toHaveBeenCalledWith("Test message", [], "code") // Should complete successfully - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task")) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) it("should REQUIRE todos when VSCode setting is enabled", async () => { @@ -501,7 +501,7 @@ describe("newTaskTool", () => { ) // Should complete successfully - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task")) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) it("should work with empty todos string when VSCode setting is enabled", async () => { @@ -536,7 +536,7 @@ describe("newTaskTool", () => { expect(mockStartSubtask).toHaveBeenCalledWith("Test message", [], "code") // Should complete successfully - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task")) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) it("should check VSCode setting with Package.name configuration key", async () => { @@ -671,7 +671,7 @@ describe("newTaskTool delegation flow", () => { ) expect(pauseEvents.length).toBe(0) - // Assert: tool result reflects delegation - expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("Delegated to child task child-1")) + // Assert: tool result reflects delegation (pushed BEFORE delegation, so no child ID yet) + expect(mockPushToolResult).toHaveBeenCalledWith("Delegating to subtask...") }) }) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index a13c0f00677..788ff764236 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -3344,43 +3344,88 @@ export class ClineProvider // inject a matching tool_result for the Anthropic message contract: // user → assistant (tool_use) → user (tool_result) if (toolUseId) { - // Check if the last message is already a user message with a tool_result for this tool_use_id - // (in case this is a retry or the history was already updated) - const lastMsg = parentApiMessages[parentApiMessages.length - 1] + // Check ALL user messages for an existing tool_result with this tool_use_id + // (not just the last message, to prevent duplicate tool_results - EXT-665) let alreadyHasToolResult = false - if (lastMsg?.role === "user" && Array.isArray(lastMsg.content)) { - for (const block of lastMsg.content) { - if (block.type === "tool_result" && block.tool_use_id === toolUseId) { - // Update the existing tool_result content - block.content = `Subtask ${childTaskId} completed.\n\nResult:\n${completionResultSummary}` - alreadyHasToolResult = true - break + let existingUserMsgIndex = -1 + for (let i = 0; i < parentApiMessages.length; i++) { + const msg = parentApiMessages[i] + if (msg.role === "user" && Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "tool_result" && block.tool_use_id === toolUseId) { + alreadyHasToolResult = true + this.log( + `[reopenParentFromDelegation] Skipping duplicate tool_result for tool_use_id: ${toolUseId}`, + ) + break + } } + if (alreadyHasToolResult) break + // Track the last user message index for potential appending + existingUserMsgIndex = i } } - // If no existing tool_result found, create a NEW user message with the tool_result + // If no tool_result exists for new_task, we need to add one. + // IMPORTANT: If there's already a user message with tool_results AFTER the assistant + // message (from flushPendingToolResultsToHistory), append to it instead of creating a new one. + // Creating a new user message causes validateAndFixToolResultIds to generate + // placeholder tool_results for OTHER tool_uses that were already satisfied in + // the previous user message (EXT-665 root cause). if (!alreadyHasToolResult) { - parentApiMessages.push({ - role: "user", - content: [ - { - type: "tool_result" as const, - tool_use_id: toolUseId, - content: `Subtask ${childTaskId} completed.\n\nResult:\n${completionResultSummary}`, - }, - ], - ts, - }) - } + const newToolResult: Anthropic.ToolResultBlockParam = { + type: "tool_result" as const, + tool_use_id: toolUseId, + content: `Subtask ${childTaskId} completed.\n\nResult:\n${completionResultSummary}`, + } + + // Find the assistant message with the new_task tool_use + let assistantMsgIdx = -1 + for (let i = parentApiMessages.length - 1; i >= 0; i--) { + const msg = parentApiMessages[i] + if (msg.role === "assistant" && Array.isArray(msg.content)) { + for (const block of msg.content) { + if (block.type === "tool_use" && block.name === "new_task" && block.id === toolUseId) { + assistantMsgIdx = i + break + } + } + if (assistantMsgIdx !== -1) break + } + } - // Validate the newly injected tool_result against the preceding assistant message. - // This ensures the tool_result's tool_use_id matches a tool_use in the immediately - // preceding assistant message (Anthropic API requirement). - const lastMessage = parentApiMessages[parentApiMessages.length - 1] - if (lastMessage?.role === "user") { - const validatedMessage = validateAndFixToolResultIds(lastMessage, parentApiMessages.slice(0, -1)) - parentApiMessages[parentApiMessages.length - 1] = validatedMessage + // Find a user message with tool_results that comes AFTER the assistant message + // This would be from flushPendingToolResultsToHistory during delegation + let targetUserMsgIdx = -1 + if (assistantMsgIdx !== -1) { + for (let i = assistantMsgIdx + 1; i < parentApiMessages.length; i++) { + const msg = parentApiMessages[i] + if (msg.role === "user" && Array.isArray(msg.content)) { + // Check if this user message has any tool_results + const hasToolResults = msg.content.some((block: any) => block.type === "tool_result") + if (hasToolResults) { + targetUserMsgIdx = i + break + } + } + } + } + + if (targetUserMsgIdx !== -1 && Array.isArray(parentApiMessages[targetUserMsgIdx].content)) { + // Append to existing user message that has tool_results + ;(parentApiMessages[targetUserMsgIdx].content as Anthropic.ContentBlockParam[]).push(newToolResult) + this.log( + `[reopenParentFromDelegation] Appended new_task tool_result to existing user message at index ${targetUserMsgIdx}`, + ) + } else { + // Create new user message if no suitable existing message found + parentApiMessages.push({ + role: "user", + content: [newToolResult], + ts, + }) + this.log(`[reopenParentFromDelegation] Created new user message for new_task tool_result`) + } } } else { // If there is no corresponding tool_use in the parent API history, we cannot emit a