diff --git a/.changeset/add-zai-glm-4-7-cerebras-model.md b/.changeset/add-zai-glm-4-7-cerebras-model.md new file mode 100644 index 00000000000..141553f5f1d --- /dev/null +++ b/.changeset/add-zai-glm-4-7-cerebras-model.md @@ -0,0 +1,5 @@ +--- +"kilo-code": patch +--- + +Add `zai-glm-4.7` to Cerebras models diff --git a/.changeset/cmdv-image-paste-macos.md b/.changeset/cmdv-image-paste-macos.md deleted file mode 100644 index 778e74b47b4..00000000000 --- a/.changeset/cmdv-image-paste-macos.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -"kilo-code": patch ---- - -Support Cmd+V for pasting images on macOS in VSCode terminal - -- Detect empty bracketed paste (when clipboard contains image instead of text) -- Trigger clipboard image check on empty paste or paste timeout -- Add Cmd+V (meta key) support alongside Ctrl+V for image paste diff --git a/.changeset/cute-flies-dance.md b/.changeset/cute-flies-dance.md new file mode 100644 index 00000000000..753c120f407 --- /dev/null +++ b/.changeset/cute-flies-dance.md @@ -0,0 +1,5 @@ +--- +"kilo-code": patch +--- + +Improved prompt caching when using Anthropic models on OpenRouter with native tool calling diff --git a/.changeset/enable-jetbrains-autocomplete.md b/.changeset/enable-jetbrains-autocomplete.md deleted file mode 100644 index 109f89977ea..00000000000 --- a/.changeset/enable-jetbrains-autocomplete.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"kilo-code": patch ---- - -Enable autocomplete by default in the JetBrains extension diff --git a/.changeset/fix-vscode-paste-truncation.md b/.changeset/fix-vscode-paste-truncation.md deleted file mode 100644 index 36a1c97f667..00000000000 --- a/.changeset/fix-vscode-paste-truncation.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -"kilo-code": patch ---- - -Fix paste truncation in VSCode terminal - -- Prevent React StrictMode cleanup from interrupting paste operations -- Remove `completePaste()` and `clearBuffers()` from useEffect cleanup -- Paste buffer refs now persist across React re-mounts and flush properly when paste end marker is received diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fcf33f0562..638ca212ae5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # kilo-code +## 4.143.1 + +### Patch Changes + +- [#4832](https://github.com/Kilo-Org/kilocode/pull/4832) [`22a4ebf`](https://github.com/Kilo-Org/kilocode/commit/22a4ebfcd9f885b6ef9979dc6830226db9a4f397) Thanks [@Drilmo](https://github.com/Drilmo)! - Support Cmd+V for pasting images on macOS in VSCode terminal + + - Detect empty bracketed paste (when clipboard contains image instead of text) + - Trigger clipboard image check on empty paste or paste timeout + - Add Cmd+V (meta key) support alongside Ctrl+V for image paste + +- [#3856](https://github.com/Kilo-Org/kilocode/pull/3856) [`91e0a17`](https://github.com/Kilo-Org/kilocode/commit/91e0a1788963b8be50c58881f11ded96516ab163) Thanks [@markijbema](https://github.com/markijbema)! - Faster autocomplete when using the Mistral provider + +- [#4839](https://github.com/Kilo-Org/kilocode/pull/4839) [`abaada6`](https://github.com/Kilo-Org/kilocode/commit/abaada6b7ced6d3f4e37e69441e722e453289b81) Thanks [@markijbema](https://github.com/markijbema)! - Enable autocomplete by default in the JetBrains extension + +- [#4831](https://github.com/Kilo-Org/kilocode/pull/4831) [`a9cbb2c`](https://github.com/Kilo-Org/kilocode/commit/a9cbb2cebd75e0c675dc3b55e7a1653ccb93921b) Thanks [@Drilmo](https://github.com/Drilmo)! - Fix paste truncation in VSCode terminal + + - Prevent React StrictMode cleanup from interrupting paste operations + - Remove `completePaste()` and `clearBuffers()` from useEffect cleanup + - Paste buffer refs now persist across React re-mounts and flush properly when paste end marker is received + +- [#4847](https://github.com/Kilo-Org/kilocode/pull/4847) [`8ee812a`](https://github.com/Kilo-Org/kilocode/commit/8ee812a18da5da691bf76ee5c5d9d94cfb678f25) Thanks [@chrarnoldus](https://github.com/chrarnoldus)! - Disable structured outputs for Anthropic models, because the tool schema doesn't yet support it + +- [#4843](https://github.com/Kilo-Org/kilocode/pull/4843) [`0e3520a`](https://github.com/Kilo-Org/kilocode/commit/0e3520a0aa9a74f7a28af1f820558d2343fd4fba) Thanks [@markijbema](https://github.com/markijbema)! - Filter unhelpful suggestions in chat autocomplete + ## 4.143.0 ### Minor Changes diff --git a/apps/kilocode-docs/docs/providers/cerebras.md b/apps/kilocode-docs/docs/providers/cerebras.md index 5a92ce88246..14ab0289b2f 100644 --- a/apps/kilocode-docs/docs/providers/cerebras.md +++ b/apps/kilocode-docs/docs/providers/cerebras.md @@ -20,7 +20,8 @@ Cerebras is known for their ultra-fast AI inference powered by the Cerebras CS-3 Kilo Code supports the following Cerebras models: - `gpt-oss-120b` (Default) – High-performance open-source model optimized for fast inference -- `zai-glm-4.6` – Advanced GLM model with enhanced reasoning capabilities +- `zai-glm-4.6` – Fast general-purpose model on Cerebras (up to 1,000 tokens/s). To be deprecated soon. +- `zai-glm-4.7` – Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks. Refer to the [Cerebras documentation](https://docs.cerebras.ai/) for detailed information on model capabilities and performance characteristics. diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts index 1f28c00bdfd..c5f770d4b2a 100644 --- a/packages/types/src/providers/cerebras.ts +++ b/packages/types/src/providers/cerebras.ts @@ -14,7 +14,18 @@ export const cerebrasModels = { supportsNativeTools: true, inputPrice: 0, outputPrice: 0, - description: "Highly intelligent general purpose model with up to 1,000 tokens/s", + description: "Fast general-purpose model on Cerebras (up to 1,000 tokens/s). To be deprecated soon.", + }, + "zai-glm-4.7": { + maxTokens: 16384, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront) + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + supportsNativeTools: true, + inputPrice: 0, + outputPrice: 0, + description: + "Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks.", }, "qwen-3-235b-a22b-instruct-2507": { maxTokens: 16384, // Conservative default to avoid premature rate limiting diff --git a/src/api/providers/__tests__/kilocode-openrouter.spec.ts b/src/api/providers/__tests__/kilocode-openrouter.spec.ts index 016eac8fb40..c1138019118 100644 --- a/src/api/providers/__tests__/kilocode-openrouter.spec.ts +++ b/src/api/providers/__tests__/kilocode-openrouter.spec.ts @@ -259,26 +259,6 @@ describe("KilocodeOpenrouterHandler", () => { expect(handler.supportsFim()).toBe(false) }) - it("completeFim handles errors correctly", async () => { - const handler = new KilocodeOpenrouterHandler({ - ...mockOptions, - kilocodeModel: "mistral/codestral-latest", - }) - - const mockResponse = { - ok: false, - status: 500, - statusText: "Internal Server Error", - text: vitest.fn().mockResolvedValue("Error details"), - } - - global.fetch = vitest.fn().mockResolvedValue(mockResponse) - - await expect(handler.completeFim("prefix", "suffix")).rejects.toThrow( - "FIM streaming failed: 500 Internal Server Error - Error details", - ) - }) - it("streamFim yields chunks correctly", async () => { const handler = new KilocodeOpenrouterHandler({ ...mockOptions, diff --git a/src/api/providers/__tests__/mistral-fim.spec.ts b/src/api/providers/__tests__/mistral-fim.spec.ts new file mode 100644 index 00000000000..b9ebade8fac --- /dev/null +++ b/src/api/providers/__tests__/mistral-fim.spec.ts @@ -0,0 +1,180 @@ +// kilocode_change - new file +// npx vitest run src/api/providers/__tests__/mistral-fim.spec.ts + +// Mock vscode first to avoid import errors +vitest.mock("vscode", () => ({})) + +import { MistralHandler } from "../mistral" +import { ApiHandlerOptions } from "../../../shared/api" +import { streamSse } from "../../../services/continuedev/core/fetch/stream" + +// Mock the stream module +vitest.mock("../../../services/continuedev/core/fetch/stream", () => ({ + streamSse: vitest.fn(), +})) + +// Mock delay +vitest.mock("delay", () => ({ default: vitest.fn(() => Promise.resolve()) })) + +describe("MistralHandler FIM support", () => { + const mockOptions: ApiHandlerOptions = { + mistralApiKey: "test-api-key", + apiModelId: "codestral-latest", + } + + beforeEach(() => vitest.clearAllMocks()) + + describe("supportsFim", () => { + it("returns true for codestral models", () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "codestral-latest", + }) + + expect(handler.supportsFim()).toBe(true) + }) + + it("returns true for codestral-2405", () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "codestral-2405", + }) + + expect(handler.supportsFim()).toBe(true) + }) + + it("returns false for non-codestral models", () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "mistral-large-latest", + }) + + expect(handler.supportsFim()).toBe(false) + }) + + it("returns true when no model is specified (defaults to codestral-latest)", () => { + const handler = new MistralHandler({ + mistralApiKey: "test-api-key", + }) + + // Default model is codestral-latest, which supports FIM + expect(handler.supportsFim()).toBe(true) + }) + }) + + describe("streamFim", () => { + it("yields chunks correctly", async () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "codestral-latest", + }) + + // Mock streamSse to return the expected data + ;(streamSse as any).mockImplementation(async function* () { + yield { choices: [{ delta: { content: "chunk1" } }] } + yield { choices: [{ delta: { content: "chunk2" } }] } + yield { choices: [{ delta: { content: "chunk3" } }] } + }) + + const mockResponse = { + ok: true, + status: 200, + statusText: "OK", + } as Response + + global.fetch = vitest.fn().mockResolvedValue(mockResponse) + + const chunks: string[] = [] + + for await (const chunk of handler.streamFim("prefix", "suffix")) { + chunks.push(chunk) + } + + expect(chunks).toEqual(["chunk1", "chunk2", "chunk3"]) + expect(streamSse).toHaveBeenCalledWith(mockResponse) + }) + + it("handles errors correctly", async () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "codestral-latest", + }) + + const mockResponse = { + ok: false, + status: 400, + statusText: "Bad Request", + text: vitest.fn().mockResolvedValue("Invalid request"), + } + + global.fetch = vitest.fn().mockResolvedValue(mockResponse) + + const generator = handler.streamFim("prefix", "suffix") + await expect(generator.next()).rejects.toThrow("FIM streaming failed: 400 Bad Request - Invalid request") + }) + + it("uses correct endpoint for codestral models", async () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "codestral-latest", + }) + + ;(streamSse as any).mockImplementation(async function* () { + yield { choices: [{ delta: { content: "test" } }] } + }) + + const mockResponse = { + ok: true, + status: 200, + statusText: "OK", + } as Response + + global.fetch = vitest.fn().mockResolvedValue(mockResponse) + + const generator = handler.streamFim("prefix", "suffix") + await generator.next() + + expect(global.fetch).toHaveBeenCalledWith( + expect.objectContaining({ + href: "https://codestral.mistral.ai/v1/fim/completions", + }), + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ + Authorization: "Bearer test-api-key", + }), + }), + ) + }) + + it("uses custom codestral URL when provided", async () => { + const handler = new MistralHandler({ + ...mockOptions, + apiModelId: "codestral-latest", + mistralCodestralUrl: "https://custom.codestral.url", + }) + + ;(streamSse as any).mockImplementation(async function* () { + yield { choices: [{ delta: { content: "test" } }] } + }) + + const mockResponse = { + ok: true, + status: 200, + statusText: "OK", + } as Response + + global.fetch = vitest.fn().mockResolvedValue(mockResponse) + + const generator = handler.streamFim("prefix", "suffix") + await generator.next() + + expect(global.fetch).toHaveBeenCalledWith( + expect.objectContaining({ + href: "https://custom.codestral.url/v1/fim/completions", + }), + expect.any(Object), + ) + }) + }) +}) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index 2dc6a3dc1cb..ee34fd4f52f 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -71,10 +71,6 @@ describe("OpenRouterHandler", () => { openRouterModelId: "anthropic/claude-sonnet-4", } - // kilocode_change start - const anthropicBetaHeaderValue = "fine-grained-tool-streaming-2025-05-14,structured-outputs-2025-11-13" - // kilocode_change end - beforeEach(() => vitest.clearAllMocks()) it("initializes with correct options", () => { @@ -208,13 +204,7 @@ describe("OpenRouterHandler", () => { top_p: undefined, transforms: ["middle-out"], }), - // kilocode_change start - expect.objectContaining({ - headers: expect.objectContaining({ - "x-anthropic-beta": anthropicBetaHeaderValue, - }), - }), - // kilocode_change end + { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, ) }) @@ -239,16 +229,9 @@ describe("OpenRouterHandler", () => { await handler.createMessage("test", []).next() - expect(mockCreate).toHaveBeenCalledWith( - expect.objectContaining({ transforms: ["middle-out"] }), - // kilocode_change start - expect.objectContaining({ - headers: expect.objectContaining({ - "x-anthropic-beta": anthropicBetaHeaderValue, - }), - }), - // kilocode_change end - ) + expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ transforms: ["middle-out"] }), { + headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" }, + }) }) it("adds cache control for supported models", async () => { @@ -290,13 +273,7 @@ describe("OpenRouterHandler", () => { }), ]), }), - // kilocode_change start - expect.objectContaining({ - headers: expect.objectContaining({ - "x-anthropic-beta": anthropicBetaHeaderValue, - }), - }), - // kilocode_change end + { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, ) }) @@ -537,13 +514,7 @@ describe("OpenRouterHandler", () => { messages: [{ role: "user", content: "test prompt" }], stream: false, }, - // kilocode_change start - expect.objectContaining({ - headers: expect.objectContaining({ - "x-anthropic-beta": anthropicBetaHeaderValue, - }), - }), - // kilocode_change end + { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }, ) }) diff --git a/src/api/providers/kilocode-openrouter.ts b/src/api/providers/kilocode-openrouter.ts index 6bcb55ffa27..a09e00e03ba 100644 --- a/src/api/providers/kilocode-openrouter.ts +++ b/src/api/providers/kilocode-openrouter.ts @@ -148,14 +148,6 @@ export class KilocodeOpenrouterHandler extends OpenRouterHandler { return modelId.includes("codestral") } - async completeFim(prefix: string, suffix: string, taskId?: string): Promise { - let result = "" - for await (const chunk of this.streamFim(prefix, suffix, taskId)) { - result += chunk - } - return result - } - async *streamFim( prefix: string, suffix: string, diff --git a/src/api/providers/kilocode/IFimProvider.ts b/src/api/providers/kilocode/IFimProvider.ts index 487301dc86b..dc964b79c48 100644 --- a/src/api/providers/kilocode/IFimProvider.ts +++ b/src/api/providers/kilocode/IFimProvider.ts @@ -13,15 +13,6 @@ export interface IFimProvider { */ supportsFim(): boolean - /** - * Complete code between a prefix and suffix (non-streaming) - * @param prefix - The code before the cursor/insertion point - * @param suffix - The code after the cursor/insertion point - * @param taskId - Optional task ID for tracking - * @returns The completed code string - */ - completeFim(prefix: string, suffix: string, taskId?: string): Promise - /** * Stream code completion between a prefix and suffix * @param prefix - The code before the cursor/insertion point diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts index 96d2c332552..6ef99acd93f 100644 --- a/src/api/providers/mistral.ts +++ b/src/api/providers/mistral.ts @@ -11,6 +11,9 @@ import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" +import { DEFAULT_HEADERS } from "./constants" // kilocode_change +import { streamSse } from "../../services/continuedev/core/fetch/stream" // kilocode_change +import type { CompletionUsage } from "./openrouter" // kilocode_change // Type helper to handle thinking chunks from Mistral API // The SDK includes ThinkChunk but TypeScript has trouble with the discriminated union @@ -209,4 +212,74 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand throw error } } + + // kilocode_change start + supportsFim(): boolean { + const modelId = this.options.apiModelId ?? mistralDefaultModelId + return modelId.startsWith("codestral-") + } + + async *streamFim( + prefix: string, + suffix: string, + _taskId?: string, + onUsage?: (usage: CompletionUsage) => void, + ): AsyncGenerator { + const { id: model, maxTokens } = this.getModel() + + // Get the base URL for the model + // copy pasted from constructor, be sure to keep in sync + const baseUrl = model.startsWith("codestral-") + ? this.options.mistralCodestralUrl || "https://codestral.mistral.ai" + : "https://api.mistral.ai" + + const endpoint = new URL("v1/fim/completions", baseUrl) + + const headers: Record = { + ...DEFAULT_HEADERS, + "Content-Type": "application/json", + Accept: "application/json", + Authorization: `Bearer ${this.options.mistralApiKey}`, + } + + // temperature: 0.2 is mentioned as a sane example in mistral's docs + const temperature = 0.2 + const requestMaxTokens = 256 + + const response = await fetch(endpoint, { + method: "POST", + body: JSON.stringify({ + model, + prompt: prefix, + suffix, + max_tokens: Math.min(requestMaxTokens, maxTokens ?? requestMaxTokens), + temperature, + stream: true, + }), + headers, + }) + + if (!response.ok) { + const errorText = await response.text() + throw new Error(`FIM streaming failed: ${response.status} ${response.statusText} - ${errorText}`) + } + + for await (const data of streamSse(response)) { + const content = data.choices?.[0]?.delta?.content + if (content) { + yield content + } + + // Call usage callback when available + // Note: Mistral FIM API returns usage in the final chunk with prompt_tokens and completion_tokens + if (data.usage && onUsage) { + onUsage({ + prompt_tokens: data.usage.prompt_tokens, + completion_tokens: data.usage.completion_tokens, + total_tokens: data.usage.total_tokens, + }) + } + } + } + // kilocode_change end } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 97a7fef3ac4..5fb60bde6df 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -20,7 +20,7 @@ import { resolveToolProtocol } from "../../utils/resolveToolProtocol" import { TOOL_PROTOCOL } from "@roo-code/types" import { ApiStreamChunk } from "../transform/stream" import { convertToR1Format } from "../transform/r1-format" -import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic" +import { addAnthropicCacheBreakpoints } from "../transform/caching/kilocode" // kilocode_change: own implementation that supports tool results import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" import type { OpenRouterReasoningParams } from "../transform/reasoning" import { getModelParams } from "../transform/model-params" @@ -315,8 +315,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH // kilocode_change start const requestOptions = this.customRequestOptions(metadata) ?? { headers: {} } if (modelId.startsWith("anthropic/")) { - requestOptions.headers["x-anthropic-beta"] = - "fine-grained-tool-streaming-2025-05-14,structured-outputs-2025-11-13" + requestOptions.headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14" } // kilocode_change end @@ -566,8 +565,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH // kilocode_change start const requestOptions = this.customRequestOptions() ?? { headers: {} } if (modelId.startsWith("anthropic/")) { - requestOptions.headers["x-anthropic-beta"] = - "fine-grained-tool-streaming-2025-05-14,structured-outputs-2025-11-13" + requestOptions.headers["x-anthropic-beta"] = "fine-grained-tool-streaming-2025-05-14" } // kilocode_change end diff --git a/src/api/transform/caching/__tests__/kilocode.spec.ts b/src/api/transform/caching/__tests__/kilocode.spec.ts new file mode 100644 index 00000000000..e4d1ddbde62 --- /dev/null +++ b/src/api/transform/caching/__tests__/kilocode.spec.ts @@ -0,0 +1,245 @@ +// npx vitest run src/api/transform/caching/__tests__/kilocode.spec.ts + +import OpenAI from "openai" + +import { addAnthropicCacheBreakpoints } from "../kilocode" + +describe("addAnthropicCacheBreakpoints (Kilocode)", () => { + const systemPrompt = "You are a helpful assistant." + + it("should add a cache breakpoint to the system prompt", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: "Hello" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should add a breakpoint to the only user message if only one exists", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: "User message 1" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // System prompt gets cache control + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + + // Last user message gets cache control + expect(messages[1].content).toEqual([ + { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should add breakpoints to system, last user, and user before last assistant", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: "User message 1" }, + { role: "assistant", content: "Assistant response 1" }, + { role: "user", content: "User message 2" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // System prompt gets cache control + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + + // User message before last assistant gets cache control + expect(messages[1].content).toEqual([ + { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, + ]) + + // Assistant message should not be modified + expect(messages[2].content).toBe("Assistant response 1") + + // Last user message gets cache control + expect(messages[3].content).toEqual([ + { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should handle multiple assistant messages and find the user before the last one", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: "User message 1" }, + { role: "assistant", content: "Assistant response 1" }, + { role: "user", content: "User message 2" }, + { role: "assistant", content: "Assistant response 2" }, + { role: "user", content: "User message 3" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // System prompt gets cache control + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + + // First user message should NOT get cache control (not before last assistant) + expect(messages[1].content).toBe("User message 1") + + // User message before last assistant (index 4) gets cache control + expect(messages[3].content).toEqual([ + { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } }, + ]) + + // Last user message gets cache control + expect(messages[5].content).toEqual([ + { type: "text", text: "User message 3", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should handle tool messages the same as user messages", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: "User message 1" }, + { role: "assistant", content: "Let me use a tool" }, + { role: "tool", content: "Tool result", tool_call_id: "call_123" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // System prompt gets cache control + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + + // User message before last assistant gets cache control + expect(messages[1].content).toEqual([ + { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, + ]) + + // Tool message (last user/tool) gets cache control + expect(messages[3].content).toEqual([ + { type: "text", text: "Tool result", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should handle array content and add cache control to last item", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { + role: "user", + content: [ + { type: "text", text: "First part" }, + { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, + { type: "text", text: "Last part" }, + ], + }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + expect(messages[1].content).toEqual([ + { type: "text", text: "First part" }, + { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, + { type: "text", text: "Last part", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should add cache control to last item of array when it's an image", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { + role: "user", + content: [ + { type: "text", text: "Some text" }, + { type: "image_url", image_url: { url: "data:image/png;base64,..." } }, + ], + }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // Cache control should be on the last item (the image) + expect(messages[1].content).toEqual([ + { type: "text", text: "Some text" }, + { + type: "image_url", + image_url: { url: "data:image/png;base64,..." }, + cache_control: { type: "ephemeral" }, + }, + ]) + }) + + it("should not add breakpoints when there are no user or tool messages", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "assistant", content: "Hello" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // Only system prompt should get cache control + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + + // Assistant message should not be modified + expect(messages[1].content).toBe("Hello") + }) + + it("should handle case when system prompt is found in messages array", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: "Different system prompt in array" }, + { role: "user", content: "Hello" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // Should use the system prompt found in messages, not the passed parameter + expect(messages[0].content).toEqual([ + { type: "text", text: "Different system prompt in array", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should handle when last user message is also user before last assistant (same message)", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: "User message 1" }, + { role: "assistant", content: "Assistant response" }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // System prompt gets cache control + expect(messages[0].content).toEqual([ + { type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }, + ]) + + // User message 1 is both before last assistant and is the last user message + // It should have cache control set (the function calls setCacheControl twice on same message) + expect(messages[1].content).toEqual([ + { type: "text", text: "User message 1", cache_control: { type: "ephemeral" } }, + ]) + }) + + it("should handle empty messages array gracefully", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [] + + // Should not throw + expect(() => addAnthropicCacheBreakpoints(systemPrompt, messages)).not.toThrow() + }) + + it("should handle empty array content", () => { + const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + { role: "user", content: [] }, + ] + + addAnthropicCacheBreakpoints(systemPrompt, messages) + + // Empty array should remain empty (no last item to add cache control to) + expect(messages[1].content).toEqual([]) + }) +}) diff --git a/src/api/transform/caching/kilocode.ts b/src/api/transform/caching/kilocode.ts new file mode 100644 index 00000000000..b955ef620a8 --- /dev/null +++ b/src/api/transform/caching/kilocode.ts @@ -0,0 +1,47 @@ +import OpenAI from "openai" +import { findLast, findLastIndex } from "../../../shared/array" + +function setCacheControl(message: OpenAI.ChatCompletionMessageParam) { + if (typeof message.content === "string") { + message.content = [ + { + type: "text", + text: message.content, + // @ts-ignore-next-line + cache_control: { type: "ephemeral" }, + }, + ] + } else if (Array.isArray(message.content)) { + const lastItem = message.content.at(-1) + if (lastItem) { + // @ts-ignore-next-line + lastItem.cache_control = { type: "ephemeral" } + } + } +} + +export function addAnthropicCacheBreakpoints( + _systemPrompt: string, + messages: OpenAI.Chat.ChatCompletionMessageParam[], +) { + const systemPrompt = messages.find((msg) => msg.role === "system") + if (systemPrompt) { + setCacheControl(systemPrompt) + } + + const lastUserMessage = findLast(messages, (msg) => msg.role === "user" || msg.role === "tool") + if (lastUserMessage) { + setCacheControl(lastUserMessage) + } + + const lastAssistantIndex = findLastIndex(messages, (msg) => msg.role === "assistant") + if (lastAssistantIndex >= 0) { + const previousUserMessage = findLast( + messages.slice(0, lastAssistantIndex), + (msg) => msg.role === "user" || msg.role === "tool", + ) + if (previousUserMessage) { + setCacheControl(previousUserMessage) + } + } +} diff --git a/src/package.json b/src/package.json index 28d26e7f29b..9c03f733942 100644 --- a/src/package.json +++ b/src/package.json @@ -3,7 +3,7 @@ "displayName": "%extension.displayName%", "description": "%extension.description%", "publisher": "kilocode", - "version": "4.143.0", + "version": "4.143.1", "icon": "assets/icons/logo-outline-black.png", "galleryBanner": { "color": "#FFFFFF", diff --git a/src/services/ghost/GhostModel.ts b/src/services/ghost/GhostModel.ts index 50019360dd2..3e9a74ffea4 100644 --- a/src/services/ghost/GhostModel.ts +++ b/src/services/ghost/GhostModel.ts @@ -1,3 +1,4 @@ +// kilocode_change new file import { modelIdKeysByProvider, ProviderName } from "@roo-code/types" import { ApiHandler, buildApiHandler } from "../../api" import { ProviderSettingsManager } from "../../core/config/ProviderSettingsManager" @@ -9,6 +10,33 @@ import { KilocodeOpenrouterHandler } from "../../api/providers/kilocode-openrout import { PROVIDERS } from "../../../webview-ui/src/components/settings/constants" import { ResponseMetaData } from "./types" +/** + * Interface for handlers that support FIM (Fill-in-the-Middle) completions. + * Uses duck typing - any handler implementing these methods can be used for FIM. + */ +interface FimCapableHandler { + supportsFim(): boolean + streamFim( + prefix: string, + suffix: string, + taskId?: string, + onUsage?: (usage: CompletionUsage) => void, + ): AsyncGenerator + getModel(): { id: string; info: any; maxTokens?: number } + getTotalCost?(usage: CompletionUsage): number +} + +/** + * Type guard to check if a handler supports FIM operations using duck typing. + */ +function isFimCapable(handler: ApiHandler): handler is ApiHandler & FimCapableHandler { + return ( + typeof (handler as any).supportsFim === "function" && + typeof (handler as any).streamFim === "function" && + (handler as any).supportsFim() === true + ) +} + // Convert PROVIDERS array to a lookup map for display names const PROVIDER_DISPLAY_NAMES = Object.fromEntries(PROVIDERS.map(({ value, label }) => [value, label])) as Record< ProviderName, @@ -92,15 +120,13 @@ export class GhostModel { return false } - if (this.apiHandler instanceof KilocodeOpenrouterHandler) { - return this.apiHandler.supportsFim() - } - - return false + // Use duck typing to check if the handler supports FIM + return isFimCapable(this.apiHandler) } /** - * Generate FIM completion using the FIM API endpoint + * Generate FIM completion using the FIM API endpoint. + * Uses duck typing to support any handler that implements supportsFim() and streamFim(). */ public async generateFimResponse( prefix: string, @@ -113,12 +139,8 @@ export class GhostModel { throw new Error("API handler is not initialized. Please check your configuration.") } - if (!(this.apiHandler instanceof KilocodeOpenrouterHandler)) { - throw new Error("FIM is only supported for KiloCode provider") - } - - if (!this.apiHandler.supportsFim()) { - throw new Error("Current model does not support FIM completions") + if (!isFimCapable(this.apiHandler)) { + throw new Error("Current provider/model does not support FIM completions") } console.log("USED MODEL (FIM)", this.apiHandler.getModel()) @@ -131,7 +153,9 @@ export class GhostModel { onChunk(chunk) } - const cost = usage ? this.apiHandler.getTotalCost(usage) : 0 + // Calculate cost if the handler supports it (duck typing) + const cost = + usage && typeof this.apiHandler.getTotalCost === "function" ? this.apiHandler.getTotalCost(usage) : 0 const inputTokens = usage?.prompt_tokens ?? 0 const outputTokens = usage?.completion_tokens ?? 0 const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens ?? 0 diff --git a/src/services/ghost/chat-autocomplete/ChatTextAreaAutocomplete.ts b/src/services/ghost/chat-autocomplete/ChatTextAreaAutocomplete.ts index 334c426392f..4e961d959b8 100644 --- a/src/services/ghost/chat-autocomplete/ChatTextAreaAutocomplete.ts +++ b/src/services/ghost/chat-autocomplete/ChatTextAreaAutocomplete.ts @@ -2,13 +2,10 @@ import * as vscode from "vscode" import { GhostModel } from "../GhostModel" import { ProviderSettingsManager } from "../../../core/config/ProviderSettingsManager" import { AutocompleteContext, VisibleCodeContext } from "../types" -import { ApiStreamChunk } from "../../../api/transform/stream" import { removePrefixOverlap } from "../../continuedev/core/autocomplete/postprocessing/removePrefixOverlap.js" import { AutocompleteTelemetry } from "../classic-auto-complete/AutocompleteTelemetry" +import { postprocessGhostSuggestion } from "../classic-auto-complete/uselessSuggestionFilter" -/** - * Service for providing FIM-based autocomplete suggestions in ChatTextArea - */ export class ChatTextAreaAutocomplete { private model: GhostModel private providerSettingsManager: ProviderSettingsManager @@ -24,14 +21,6 @@ export class ChatTextAreaAutocomplete { return this.model.reload(this.providerSettingsManager) } - /** - * Check if we can successfully make a FIM request. - * Validates that model is loaded, has valid API handler, and supports FIM. - */ - isFimAvailable(): boolean { - return this.model.hasValidCredentials() && this.model.supportsFim() - } - async getCompletion(userText: string, visibleCodeContext?: VisibleCodeContext): Promise<{ suggestion: string }> { const startTime = Date.now() @@ -147,9 +136,6 @@ TASK: Complete the user's message naturally. - Return ONLY the completion text (what comes next), no explanations.` } - /** - * Build the prefix for FIM completion with visible code context and additional sources - */ private async buildPrefix(userText: string, visibleCodeContext?: VisibleCodeContext): Promise { const contextParts: string[] = [] @@ -179,9 +165,6 @@ TASK: Complete the user's message naturally. return contextParts.join("\n") } - /** - * Get clipboard content for context - */ private async getClipboardContext(): Promise { try { const text = await vscode.env.clipboard.readText() @@ -195,51 +178,30 @@ TASK: Complete the user's message naturally. return null } - /** - * Clean the suggestion by removing any leading repetition of user text - * and filtering out unwanted patterns like comments - */ - private cleanSuggestion(suggestion: string, userText: string): string { - let cleaned = suggestion - - cleaned = removePrefixOverlap(cleaned, userText) - - const firstNewline = cleaned.indexOf("\n") - if (firstNewline !== -1) { - cleaned = cleaned.substring(0, firstNewline) - } - cleaned = cleaned.trimEnd() // Do NOT trim the end of the suggestion + public cleanSuggestion(suggestion: string, userText: string): string { + let cleaned = postprocessGhostSuggestion({ + suggestion: removePrefixOverlap(suggestion, userText), + prefix: userText, + suffix: "", // Chat textarea has no suffix + model: this.model.getModelName() ?? "unknown", + }) - // Filter out suggestions that start with comment patterns - // This happens because the context uses // prefixes for labels - if (this.isUnwantedSuggestion(cleaned)) { + if (cleaned === undefined) { return "" } - return cleaned - } - - /** - * Check if suggestion should be filtered out - */ - public isUnwantedSuggestion(suggestion: string): boolean { - // Filter comment-starting suggestions - if (suggestion.startsWith("//") || suggestion.startsWith("/*") || suggestion.startsWith("*")) { - return true - } - // Filter suggestions that look like code rather than natural language - // This includes preprocessor directives (#include) and markdown headers - // Chat is for natural language, not formatted documents - if (suggestion.startsWith("#")) { - return true + if (cleaned.match(/^(\/\/|\/\*|\*|#)/)) { + return "" } - // Filter suggestions that are just punctuation or whitespace - if (suggestion.length < 2 || /^[\s\p{P}]+$/u.test(suggestion)) { - return true + // Chat-specific: truncate at first newline for single-line suggestions + const firstNewline = cleaned.indexOf("\n") + if (firstNewline !== -1) { + cleaned = cleaned.substring(0, firstNewline) } + cleaned = cleaned.trimEnd() - return false + return cleaned } } diff --git a/src/services/ghost/chat-autocomplete/__tests__/ChatTextAreaAutocomplete.spec.ts b/src/services/ghost/chat-autocomplete/__tests__/ChatTextAreaAutocomplete.spec.ts index 26d1a26dff4..0d6bd41d254 100644 --- a/src/services/ghost/chat-autocomplete/__tests__/ChatTextAreaAutocomplete.spec.ts +++ b/src/services/ghost/chat-autocomplete/__tests__/ChatTextAreaAutocomplete.spec.ts @@ -45,46 +45,41 @@ describe("ChatTextAreaAutocomplete", () => { }) }) - describe("isFimAvailable", () => { - it("should return false when model is not loaded", () => { - const result = autocomplete.isFimAvailable() - expect(result).toBe(false) - }) - }) - - describe("isUnwantedSuggestion", () => { - it("should filter code patterns (comments, preprocessor, short/empty)", () => { - const filter = autocomplete.isUnwantedSuggestion.bind(autocomplete) - - // Comments - expect(filter("// comment")).toBe(true) - expect(filter("/* comment")).toBe(true) - expect(filter("*")).toBe(true) + describe("cleanSuggestion", () => { + it("should filter code patterns (comments, preprocessor)", () => { + // Comments - filtered by the regex check in cleanSuggestion + expect(autocomplete.cleanSuggestion("// comment", "")).toBe("") + expect(autocomplete.cleanSuggestion("/* comment", "")).toBe("") + expect(autocomplete.cleanSuggestion("* something", "")).toBe("") // Code patterns - expect(filter("#include")).toBe(true) - expect(filter("# Header")).toBe(true) + expect(autocomplete.cleanSuggestion("#include", "")).toBe("") + expect(autocomplete.cleanSuggestion("# Header", "")).toBe("") + }) - // Meaningless content - expect(filter("")).toBe(true) - expect(filter("a")).toBe(true) - expect(filter("...")).toBe(true) + it("should filter empty content", () => { + // Empty content is filtered by postprocessGhostSuggestion + expect(autocomplete.cleanSuggestion("", "")).toBe("") }) it("should accept natural language suggestions", () => { - const filter = autocomplete.isUnwantedSuggestion.bind(autocomplete) - - expect(filter("Hello world")).toBe(false) - expect(filter("Can you help me")).toBe(false) - expect(filter("test123")).toBe(false) - expect(filter("What's up?")).toBe(false) + expect(autocomplete.cleanSuggestion("Hello world", "")).toBe("Hello world") + expect(autocomplete.cleanSuggestion("Can you help me", "")).toBe("Can you help me") + expect(autocomplete.cleanSuggestion("test123", "")).toBe("test123") + expect(autocomplete.cleanSuggestion("What's up?", "")).toBe("What's up?") }) it("should accept symbols in middle of text", () => { - const filter = autocomplete.isUnwantedSuggestion.bind(autocomplete) + expect(autocomplete.cleanSuggestion("Text with # in middle", "")).toBe("Text with # in middle") + expect(autocomplete.cleanSuggestion("Hello // but not a comment", "")).toBe("Hello // but not a comment") + }) + + it("should truncate at first newline", () => { + expect(autocomplete.cleanSuggestion("First line\nSecond line", "")).toBe("First line") + }) - expect(filter("Text with # in middle")).toBe(false) - expect(filter("Hello // but not a comment")).toBe(false) + it("should remove prefix overlap", () => { + expect(autocomplete.cleanSuggestion("Hello world", "Hello ")).toBe("world") }) }) }) diff --git a/src/services/ghost/classic-auto-complete/__tests__/uselessSuggestionFilter.test.ts b/src/services/ghost/classic-auto-complete/__tests__/uselessSuggestionFilter.test.ts index d6b888b3cf3..b0ee68f4092 100644 --- a/src/services/ghost/classic-auto-complete/__tests__/uselessSuggestionFilter.test.ts +++ b/src/services/ghost/classic-auto-complete/__tests__/uselessSuggestionFilter.test.ts @@ -250,4 +250,22 @@ return 1 `), ).toBe(true) }) + + it("treats as duplication when suggestion repeats the same phrase from the prefix", () => { + // User types "We are going to start from" and suggestion repeats "the beginning. We are going to start from the beginning..." + expect( + isDuplication( + `We are going to start from <<>>`, + ), + ).toBe(true) + }) + + it("treats as duplication when suggestion ends with non-word characters but still has repetitive phrases", () => { + // Suggestion ends with "..." but the repeating phrase should still be detected + expect( + isDuplication( + `<<>>`, + ), + ).toBe(true) + }) }) diff --git a/src/services/ghost/classic-auto-complete/uselessSuggestionFilter.ts b/src/services/ghost/classic-auto-complete/uselessSuggestionFilter.ts index a32334e12ba..114b8d7a2b7 100644 --- a/src/services/ghost/classic-auto-complete/uselessSuggestionFilter.ts +++ b/src/services/ghost/classic-auto-complete/uselessSuggestionFilter.ts @@ -18,6 +18,11 @@ export function suggestionConsideredDuplication(params: AutocompleteSuggestion): return true } + // Check if the suggestion contains repetitive phrases that continue from the prefix + if (containsRepetitivePhraseFromPrefix(params)) { + return true + } + // When the suggestion isn't a full line or set of lines, normalize by including // the rest of the line in the prefix/suffix and check with the completed line(s) const normalized = normalizeToCompleteLine(params) @@ -58,6 +63,43 @@ function DuplicatesFromEdgeLines(params: AutocompleteSuggestion): boolean { return false } +/** + * Detects when a suggestion's tail is repeating itself - a common LLM failure mode. + * For example: "the beginning. We are going to start from the beginning. We are going to start from the beginning..." + * The suggestion gets stuck in a loop repeating the same phrase. + */ +function containsRepetitivePhraseFromPrefix(params: AutocompleteSuggestion): boolean { + const suggestion = params.suggestion + const phraseLength = 30 // Phrase length to check for repetition + const minRepetitions = 3 // Minimum number of repetitions to consider it repetitive + + // Only check suggestions that are long enough to contain repetition + if (suggestion.length < phraseLength * minRepetitions) { + return false + } + + // Strip non-word characters from the right before selecting the tail + // This handles cases like "...the beginning..." where trailing punctuation would break detection + const strippedSuggestion = suggestion.replace(/\W+$/, "") + + if (strippedSuggestion.length < phraseLength) { + return false + } + + // Extract a phrase from the end of the stripped suggestion + const phrase = strippedSuggestion.slice(-phraseLength) + + // Count how many times this phrase appears in the original suggestion + let count = 0 + let pos = 0 + while ((pos = suggestion.indexOf(phrase, pos)) !== -1) { + count++ + pos += phrase.length + } + + return count >= minRepetitions +} + /** * Normalizes partial-line suggestions by expanding them to the full current line: * (prefix line tail) + (suggestion first line) + (suffix line head).