zimushui · pull · Dec 19, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/.changeset/polite-games-arrive.md b/.changeset/polite-games-arrive.md
@@ -0,0 +1,59 @@
+---
+"kilo-code": patch
+---
+
+Include changes from Roo Code v3.36.6
+
+- Add tool alias support for model-specific tool customization, allowing users to configure how tools are presented to different AI models (PR #9989 by @daniel-lxs)
+- Sanitize MCP server and tool names for API compatibility, ensuring special characters don't cause issues with API calls (PR #10054 by @daniel-lxs)
+- Improve auto-approve timer visibility in follow-up suggestions for better user awareness of pending actions (PR #10048 by @brunobergher)
+- Fix: Cancel auto-approval timeout when user starts typing, preventing accidental auto-approvals during user interaction (PR #9937 by @roomote)
+- Add WorkspaceTaskVisibility type for organization cloud settings to support team visibility controls (PR #10020 by @roomote)
+- Fix: Extract raw error message from OpenRouter metadata for clearer error reporting (PR #10039 by @daniel-lxs)
+- Fix: Show tool protocol dropdown for LiteLLM provider, restoring missing configuration option (PR #10053 by @daniel-lxs)
+- Add: GPT-5.2 model to openai-native provider (PR #10024 by @hannesrudolph)
+- Fix: Handle empty Gemini responses and reasoning loops to prevent infinite retries (PR #10007 by @hannesrudolph)
+- Fix: Add missing tool_result blocks to prevent API errors when tool results are expected (PR #10015 by @daniel-lxs)
+- Fix: Filter orphaned tool_results when more results than tool_uses to prevent message validation errors (PR #10027 by @daniel-lxs)
+- Fix: Add general API endpoints for Z.ai provider (#9879 by @richtong, PR #9894 by @roomote)
+- Remove: Deprecated list_code_definition_names tool (PR #10005 by @hannesrudolph)
+- Add error details modal with on-demand display for improved error visibility when debugging issues (PR #9985 by @roomote)
+- Fix: Prevent premature rawChunkTracker clearing for MCP tools, improving reliability of MCP tool streaming (PR #9993 by @daniel-lxs)
+- Fix: Filter out 429 rate limit errors from API error telemetry for cleaner metrics (PR #9987 by @daniel-lxs)
+- Fix: Correct TODO list display order in chat view to show items in proper sequence (PR #9991 by @roomote)
+- Refactor: Unified context-management architecture with improved UX for better context control (PR #9795 by @hannesrudolph)
+- Add new `search_replace` native tool for single-replacement operations with improved editing precision (PR #9918 by @hannesrudolph)
+- Streaming tool stats and token usage throttling for better real-time feedback during generation (PR #9926 by @hannesrudolph)
+- Add versioned settings support with minPluginVersion gating for Roo provider (PR #9934 by @hannesrudolph)
+- Make Architect mode save plans to `/plans` directory and gitignore it (PR #9944 by @brunobergher)
+- Add ability to save screenshots from the browser tool (PR #9963 by @mrubens)
+- Refactor: Decouple tools from system prompt for cleaner architecture (PR #9784 by @daniel-lxs)
+- Update DeepSeek models to V3.2 with new pricing (PR #9962 by @hannesrudolph)
+- Add minimal and medium reasoning effort levels for Gemini models (PR #9973 by @hannesrudolph)
+- Update xAI models catalog with latest model options (PR #9872 by @hannesrudolph)
+- Add DeepSeek V3-2 support for Baseten provider (PR #9861 by @AlexKer)
+- Tweaks to Baseten model definitions for better defaults (PR #9866 by @mrubens)
+- Fix: Add xhigh reasoning effort support for gpt-5.1-codex-max (#9891 by @andrewginns, PR #9900 by @andrewginns)
+- Fix: Add Kimi, MiniMax, and Qwen model configurations for Bedrock (#9902 by @jbearak, PR #9905 by @app/roomote)
+- Configure tool preferences for xAI models (PR #9923 by @hannesrudolph)
+- Default to using native tools when supported on OpenRouter (PR #9878 by @mrubens)
+- Fix: Exclude apply_diff from native tools when diffEnabled is false (#9919 by @denis-kudelin, PR #9920 by @app/roomote)
+- Fix: Always show tool protocol selector for openai-compatible provider (#9965 by @bozoweed, PR #9966 by @hannesrudolph)
+- Fix: Respect explicit supportsReasoningEffort array values for proper model configuration (PR #9970 by @hannesrudolph)
+- Add timeout configuration to OpenAI Compatible Provider Client (PR #9898 by @dcbartlett)
+- Revert default tool protocol change from xml to native for stability (PR #9956 by @mrubens)
+- Improve OpenAI error messages to be more useful for debugging (PR #9639 by @mrubens)
+- Better error logs for parseToolCall exceptions (PR #9857 by @cte)
+- Improve cloud job error logging for RCC provider errors (PR #9924 by @cte)
+- Fix: Display actual API error message instead of generic text on retry (PR #9954 by @hannesrudolph)
+- Add API error telemetry to OpenRouter provider for better diagnostics (PR #9953 by @daniel-lxs)
+- Fix: Sanitize removed/invalid API providers to prevent infinite loop (PR #9869 by @hannesrudolph)
+- Fix: Use foreground color for context-management icons (PR #9912 by @hannesrudolph)
+- Fix: Suppress 'ask promise was ignored' error in handleError (PR #9914 by @daniel-lxs)
+- Fix: Process finish_reason to emit tool_call_end events properly (PR #9927 by @daniel-lxs)
+- Fix: Add finish_reason processing to xai.ts provider (PR #9929 by @daniel-lxs)
+- Fix: Validate and fix tool_result IDs before API requests (PR #9952 by @daniel-lxs)
+- Fix: Return undefined instead of 0 for disabled API timeout (PR #9960 by @hannesrudolph)
+- Stop making unnecessary count_tokens requests for better performance (PR #9884 by @mrubens)
+- Refactor: Consolidate ThinkingBudget components and fix disable handling (PR #9930 by @hannesrudolph)
+- Forbid time estimates in architect mode for more focused planning (PR #9931 by @app/roomote
diff --git a/.gitignore b/.gitignore
@@ -68,4 +68,6 @@ qdrant_storage/
 *.code-workspace
 
 # Act Secret Files
-.secrets
+.secrets
+# Architect plans
+plans/
diff --git a/.roo/rules-translate/AGENTS.md b/.roo/rules-translate/AGENTS.md
@@ -316,31 +316,36 @@ For each language that is missing translations:
 "dragFiles": "按住shift拖动文件"
 =======
 "dragFiles": "Shift+拖拽文件"
->>>>>>> AFTER
+
+> > > > > > > AFTER
 
 <<<<<<< BEFORE
 "description": "启用后，Kilo Code 将能够与 MCP 服务器交互以获取高级功能。"
 =======
 "description": "启用后 Kilo Code 可与 MCP 服务交互获取高级功能。"
->>>>>>> AFTER
+
+> > > > > > > AFTER
 
 <<<<<<< BEFORE
 "cannotUndo": "此操作无法撤消。"
 =======
 "cannotUndo": "此操作不可逆。"
->>>>>>> AFTER
+
+> > > > > > > AFTER
 
 <<<<<<< BEFORE
 "hold shift to drag in files" → "按住shift拖动文件"
 =======
 "hold shift to drag in files" → "Shift+拖拽文件"
->>>>>>> AFTER
+
+> > > > > > > AFTER
 
 <<<<<<< BEFORE
 "Double click to edit" → "双击进行编辑"
 =======
 "Double click to edit" → "双击编辑"
->>>>>>> AFTER
+
+> > > > > > > AFTER
 ```
 
 ### Common Pitfalls

diff --git a/apps/kilocode-docs/docs/advanced-usage/appbuilder.md b/apps/kilocode-docs/docs/advanced-usage/appbuilder.md
@@ -39,26 +39,27 @@ Before using App Builder:
 1. Navigate to **[App Builder](https://app.kilo.ai/app-builder)** from your Kilo dashboard.
 2. Choose an **AI Model** for development (e.g., Grok Code Fast 1, Claude Sonnet 4.5, GPT-5.2).
 3. Describe your application in plain language:
-   - What it should do
-   - Key features and functionality
-   - Design preferences or constraints
+    - What it should do
+    - Key features and functionality
+    - Design preferences or constraints
 4. Watch the **live preview** update as the AI generates your app.
 5. Provide feedback to refine:
-   - "Make the header sticky"
-   - "Add a dark mode toggle"
-   - "Connect this form to a database"
+    - "Make the header sticky"
+    - "Add a dark mode toggle"
+    - "Connect this form to a database"
 6. When satisfied, click **Deploy** to push your app live.
 
 ---
 
 ## How App Builder Works
 
 - When you describe your application:
-  1. The AI model interprets your requirements and generates an initial implementation.
-  2. Code is rendered in real-time in the live preview panel.
-  3. You can interact with the preview as if it were the deployed app.
-  4. Each refinement request triggers targeted updates to the codebase.
-  5. The AI maintains context across your entire conversation for coherent iteration.
+
+    1. The AI model interprets your requirements and generates an initial implementation.
+    2. Code is rendered in real-time in the live preview panel.
+    3. You can interact with the preview as if it were the deployed app.
+    4. Each refinement request triggers targeted updates to the codebase.
+    5. The AI maintains context across your entire conversation for coherent iteration.
 
 - Deployment packages your application and provisions hosting automatically.
 

diff --git a/apps/web-evals/src/app/runs/[id]/run.tsx b/apps/web-evals/src/app/runs/[id]/run.tsx
@@ -242,7 +242,7 @@ function formatLogContent(log: string): React.ReactNode[] {
 
 export function Run({ run }: { run: Run }) {
 	const runStatus = useRunStatus(run)
-	const { tasks, tokenUsage, usageUpdatedAt, heartbeat, runners } = runStatus
+	const { tasks, tokenUsage, toolUsage, usageUpdatedAt, heartbeat, runners } = runStatus
 
 	const [selectedTask, setSelectedTask] = useState<Task | null>(null)
 	const [taskLog, setTaskLog] = useState<string | null>(null)
@@ -336,37 +336,70 @@ export function Run({ run }: { run: Run }) {
 	)
 
 	const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
+		// Reference usageUpdatedAt to trigger recomputation when Map contents change
+		void usageUpdatedAt
 		const metrics: Record<number, TaskMetrics> = {}
 
 		tasks?.forEach((task) => {
-			const usage = tokenUsage.get(task.id)
-
-			if (task.finishedAt && task.taskMetrics) {
-				metrics[task.id] = task.taskMetrics
-			} else if (usage) {
+			const streamingUsage = tokenUsage.get(task.id)
+			const dbMetrics = task.taskMetrics
+
+			// For finished tasks, prefer DB values but fall back to streaming values
+			// This handles race conditions during timeout where DB might not have latest data
+			if (task.finishedAt) {
+				// Check if DB metrics have meaningful values (not just default/empty)
+				const dbHasData = dbMetrics && (dbMetrics.tokensIn > 0 || dbMetrics.tokensOut > 0 || dbMetrics.cost > 0)
+				if (dbHasData) {
+					metrics[task.id] = dbMetrics
+				} else if (streamingUsage) {
+					// Fall back to streaming values if DB is empty/stale
+					metrics[task.id] = {
+						tokensIn: streamingUsage.totalTokensIn,
+						tokensOut: streamingUsage.totalTokensOut,
+						tokensContext: streamingUsage.contextTokens,
+						duration: streamingUsage.duration ?? 0,
+						cost: streamingUsage.totalCost,
+					}
+				}
+			} else if (streamingUsage) {
+				// For running tasks, use streaming values
 				metrics[task.id] = {
-					tokensIn: usage.totalTokensIn,
-					tokensOut: usage.totalTokensOut,
-					tokensContext: usage.contextTokens,
-					duration: usage.duration ?? 0,
-					cost: usage.totalCost,
+					tokensIn: streamingUsage.totalTokensIn,
+					tokensOut: streamingUsage.totalTokensOut,
+					tokensContext: streamingUsage.contextTokens,
+					duration: streamingUsage.duration ?? 0,
+					cost: streamingUsage.totalCost,
 				}
 			}
 		})
 
 		return metrics
-		// eslint-disable-next-line react-hooks/exhaustive-deps
 	}, [tasks, tokenUsage, usageUpdatedAt])
 
 	// Collect all unique tool names from all tasks and sort by total attempts
 	const toolColumns = useMemo<ToolName[]>(() => {
+		// Reference usageUpdatedAt to trigger recomputation when Map contents change
+		void usageUpdatedAt
 		if (!tasks) return []
 
 		const toolTotals = new Map<ToolName, number>()
 
 		for (const task of tasks) {
-			if (task.taskMetrics?.toolUsage) {
-				for (const [toolName, usage] of Object.entries(task.taskMetrics.toolUsage)) {
+			// Get both DB and streaming values
+			const dbToolUsage = task.taskMetrics?.toolUsage
+			const streamingToolUsage = toolUsage.get(task.id)
+
+			// For finished tasks, prefer DB values but fall back to streaming values
+			// For running tasks, use streaming values
+			// This handles race conditions during timeout where DB might not have latest data
+			const taskToolUsage = task.finishedAt
+				? dbToolUsage && Object.keys(dbToolUsage).length > 0
+					? dbToolUsage
+					: streamingToolUsage
+				: streamingToolUsage
+
+			if (taskToolUsage) {
+				for (const [toolName, usage] of Object.entries(taskToolUsage)) {
 					const tool = toolName as ToolName
 					const current = toolTotals.get(tool) ?? 0
 					toolTotals.set(tool, current + usage.attempts)
@@ -378,10 +411,13 @@ export function Run({ run }: { run: Run }) {
 		return Array.from(toolTotals.entries())
 			.sort((a, b) => b[1] - a[1])
 			.map(([name]): ToolName => name)
-	}, [tasks])
+		// toolUsage ref is stable; usageUpdatedAt triggers recomputation when Map contents change
+	}, [tasks, toolUsage, usageUpdatedAt])
 
 	// Compute aggregate stats
 	const stats = useMemo(() => {
+		// Reference usageUpdatedAt to trigger recomputation when Map contents change
+		void usageUpdatedAt
 		if (!tasks) return null
 
 		const passed = tasks.filter((t) => t.passed === true).length
@@ -393,8 +429,8 @@ export function Run({ run }: { run: Run }) {
 		let totalCost = 0
 		let totalDuration = 0
 
-		// Aggregate tool usage from completed tasks
-		const toolUsage: ToolUsage = {}
+		// Aggregate tool usage from all tasks (both finished and running)
+		const toolUsageAggregate: ToolUsage = {}
 
 		for (const task of tasks) {
 			const metrics = taskMetrics[task.id]
@@ -405,15 +441,24 @@ export function Run({ run }: { run: Run }) {
 				totalDuration += metrics.duration
 			}
 
-			// Aggregate tool usage from finished tasks with taskMetrics
-			if (task.finishedAt && task.taskMetrics?.toolUsage) {
-				for (const [key, usage] of Object.entries(task.taskMetrics.toolUsage)) {
+			// Aggregate tool usage: prefer DB values for finished tasks, fall back to streaming values
+			// This handles race conditions during timeout where DB might not have latest data
+			const dbToolUsage = task.taskMetrics?.toolUsage
+			const streamingToolUsage = toolUsage.get(task.id)
+			const taskToolUsage = task.finishedAt
+				? dbToolUsage && Object.keys(dbToolUsage).length > 0
+					? dbToolUsage
+					: streamingToolUsage
+				: streamingToolUsage
+
+			if (taskToolUsage) {
+				for (const [key, usage] of Object.entries(taskToolUsage)) {
 					const tool = key as keyof ToolUsage
-					if (!toolUsage[tool]) {
-						toolUsage[tool] = { attempts: 0, failures: 0 }
+					if (!toolUsageAggregate[tool]) {
+						toolUsageAggregate[tool] = { attempts: 0, failures: 0 }
 					}
-					toolUsage[tool].attempts += usage.attempts
-					toolUsage[tool].failures += usage.failures
+					toolUsageAggregate[tool].attempts += usage.attempts
+					toolUsageAggregate[tool].failures += usage.failures
 				}
 			}
 		}
@@ -427,13 +472,15 @@ export function Run({ run }: { run: Run }) {
 			totalTokensOut,
 			totalCost,
 			totalDuration,
-			toolUsage,
+			toolUsage: toolUsageAggregate,
 		}
-		// eslint-disable-next-line react-hooks/exhaustive-deps
-	}, [tasks, taskMetrics, tokenUsage, usageUpdatedAt])
+		// Map refs are stable; usageUpdatedAt triggers recomputation when Map contents change
+	}, [tasks, taskMetrics, toolUsage, usageUpdatedAt])
 
 	// Calculate elapsed time (wall-clock time from run creation to completion or now)
 	const elapsedTime = useMemo(() => {
+		// Reference usageUpdatedAt to trigger recomputation for live elapsed time updates
+		void usageUpdatedAt
 		if (!tasks || tasks.length === 0) return null
 
 		const startTime = new Date(run.createdAt).getTime()
@@ -452,7 +499,6 @@ export function Run({ run }: { run: Run }) {
 
 		// If still running, use current time
 		return Date.now() - startTime
-		// eslint-disable-next-line react-hooks/exhaustive-deps
 	}, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt])
 
 	return (
@@ -655,7 +701,14 @@ export function Run({ run }: { run: Run }) {
 													{formatTokens(taskMetrics[task.id]!.tokensContext)}
 												</TableCell>
 												{toolColumns.map((toolName) => {
-													const usage = task.taskMetrics?.toolUsage?.[toolName]
+													// Use DB values for finished tasks, but fall back to streaming values
+													// if DB values are missing (handles race condition during timeout)
+													const dbUsage = task.taskMetrics?.toolUsage?.[toolName]
+													const streamingUsage = toolUsage.get(task.id)?.[toolName]
+													const usage = task.finishedAt
+														? (dbUsage ?? streamingUsage)
+														: streamingUsage
+
 													const successRate =
 														usage && usage.attempts > 0
 															? ((usage.attempts - usage.failures) / usage.attempts) * 100

diff --git a/apps/web-evals/src/hooks/use-run-status.ts b/apps/web-evals/src/hooks/use-run-status.ts
@@ -1,7 +1,7 @@
 import { useState, useCallback, useRef } from "react"
 import { useQuery, keepPreviousData } from "@tanstack/react-query"
 
-import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
+import { type TokenUsage, type ToolUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
 import type { Run, Task, TaskMetrics } from "@roo-code/evals"
 
 import { getHeartbeat } from "@/actions/heartbeat"
@@ -15,6 +15,7 @@ export type RunStatus = {
 	runners: string[] | undefined
 	tasks: (Task & { taskMetrics: TaskMetrics | null })[] | undefined
 	tokenUsage: Map<number, TokenUsage & { duration?: number }>
+	toolUsage: Map<number, ToolUsage>
 	usageUpdatedAt: number | undefined
 }
 
@@ -23,6 +24,7 @@ export const useRunStatus = (run: Run): RunStatus => {
 	const [usageUpdatedAt, setUsageUpdatedAt] = useState<number>()
 
 	const tokenUsage = useRef<Map<number, TokenUsage & { duration?: number }>>(new Map())
+	const toolUsage = useRef<Map<number, ToolUsage>>(new Map())
 	const startTimes = useRef<Map<number, number>>(new Map())
 
 	const { data: heartbeat } = useQuery({
@@ -78,6 +80,12 @@ export const useRunStatus = (run: Run): RunStatus => {
 				const startTime = startTimes.current.get(taskId)
 				const duration = startTime ? Date.now() - startTime : undefined
 				tokenUsage.current.set(taskId, { ...payload[1], duration })
+
+				// Track tool usage from streaming updates
+				if (payload[2]) {
+					toolUsage.current.set(taskId, payload[2])
+				}
+
 				setUsageUpdatedAt(Date.now())
 				break
 			}
@@ -96,6 +104,7 @@ export const useRunStatus = (run: Run): RunStatus => {
 		runners,
 		tasks,
 		tokenUsage: tokenUsage.current,
+		toolUsage: toolUsage.current,
 		usageUpdatedAt,
 	}
 }