jasonkneen · pull · Jan 7, 2026 · Nov 18, 2025 · Jan 7, 2026 · Jan 7, 2026
diff --git a/.changeset/add-zai-glm-4-7-cerebras-model.md b/.changeset/add-zai-glm-4-7-cerebras-model.md
@@ -0,0 +1,5 @@
+---
+"kilo-code": patch
+---
+
+Add `zai-glm-4.7` to Cerebras models
diff --git a/.changeset/cmdv-image-paste-macos.md b/.changeset/cmdv-image-paste-macos.md
diff --git a/.changeset/cute-flies-dance.md b/.changeset/cute-flies-dance.md
@@ -0,0 +1,5 @@
+---
+"kilo-code": patch
+---
+
+Improved prompt caching when using Anthropic models on OpenRouter with native tool calling
diff --git a/.changeset/enable-jetbrains-autocomplete.md b/.changeset/enable-jetbrains-autocomplete.md
diff --git a/.changeset/fix-vscode-paste-truncation.md b/.changeset/fix-vscode-paste-truncation.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,29 @@
 # kilo-code
 
+## 4.143.1
+
+### Patch Changes
+
+- [#4832](https://github.com/Kilo-Org/kilocode/pull/4832) [`22a4ebf`](https://github.com/Kilo-Org/kilocode/commit/22a4ebfcd9f885b6ef9979dc6830226db9a4f397) Thanks [@Drilmo](https://github.com/Drilmo)! - Support Cmd+V for pasting images on macOS in VSCode terminal
+
+    - Detect empty bracketed paste (when clipboard contains image instead of text)
+    - Trigger clipboard image check on empty paste or paste timeout
+    - Add Cmd+V (meta key) support alongside Ctrl+V for image paste
+
+- [#3856](https://github.com/Kilo-Org/kilocode/pull/3856) [`91e0a17`](https://github.com/Kilo-Org/kilocode/commit/91e0a1788963b8be50c58881f11ded96516ab163) Thanks [@markijbema](https://github.com/markijbema)! - Faster autocomplete when using the Mistral provider
+
+- [#4839](https://github.com/Kilo-Org/kilocode/pull/4839) [`abaada6`](https://github.com/Kilo-Org/kilocode/commit/abaada6b7ced6d3f4e37e69441e722e453289b81) Thanks [@markijbema](https://github.com/markijbema)! - Enable autocomplete by default in the JetBrains extension
+
+- [#4831](https://github.com/Kilo-Org/kilocode/pull/4831) [`a9cbb2c`](https://github.com/Kilo-Org/kilocode/commit/a9cbb2cebd75e0c675dc3b55e7a1653ccb93921b) Thanks [@Drilmo](https://github.com/Drilmo)! - Fix paste truncation in VSCode terminal
+
+    - Prevent React StrictMode cleanup from interrupting paste operations
+    - Remove `completePaste()` and `clearBuffers()` from useEffect cleanup
+    - Paste buffer refs now persist across React re-mounts and flush properly when paste end marker is received
+
+- [#4847](https://github.com/Kilo-Org/kilocode/pull/4847) [`8ee812a`](https://github.com/Kilo-Org/kilocode/commit/8ee812a18da5da691bf76ee5c5d9d94cfb678f25) Thanks [@chrarnoldus](https://github.com/chrarnoldus)! - Disable structured outputs for Anthropic models, because the tool schema doesn't yet support it
+
+- [#4843](https://github.com/Kilo-Org/kilocode/pull/4843) [`0e3520a`](https://github.com/Kilo-Org/kilocode/commit/0e3520a0aa9a74f7a28af1f820558d2343fd4fba) Thanks [@markijbema](https://github.com/markijbema)! - Filter unhelpful suggestions in chat autocomplete
+
 ## 4.143.0
 
 ### Minor Changes

diff --git a/apps/kilocode-docs/docs/providers/cerebras.md b/apps/kilocode-docs/docs/providers/cerebras.md
@@ -20,7 +20,8 @@ Cerebras is known for their ultra-fast AI inference powered by the Cerebras CS-3
 Kilo Code supports the following Cerebras models:
 
 - `gpt-oss-120b` (Default) – High-performance open-source model optimized for fast inference
-- `zai-glm-4.6` – Advanced GLM model with enhanced reasoning capabilities
+- `zai-glm-4.6` – Fast general-purpose model on Cerebras (up to 1,000 tokens/s). To be deprecated soon.
+- `zai-glm-4.7` – Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks.
 
 Refer to the [Cerebras documentation](https://docs.cerebras.ai/) for detailed information on model capabilities and performance characteristics.
 

diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts
@@ -14,7 +14,18 @@ export const cerebrasModels = {
 		supportsNativeTools: true,
 		inputPrice: 0,
 		outputPrice: 0,
-		description: "Highly intelligent general purpose model with up to 1,000 tokens/s",
+		description: "Fast general-purpose model on Cerebras (up to 1,000 tokens/s). To be deprecated soon.",
+	},
+	"zai-glm-4.7": {
+		maxTokens: 16384, // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
+		contextWindow: 131072,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsNativeTools: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		description:
+			"Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks.",
 	},
 	"qwen-3-235b-a22b-instruct-2507": {
 		maxTokens: 16384, // Conservative default to avoid premature rate limiting

diff --git a/src/api/providers/__tests__/kilocode-openrouter.spec.ts b/src/api/providers/__tests__/kilocode-openrouter.spec.ts
@@ -259,26 +259,6 @@ describe("KilocodeOpenrouterHandler", () => {
 			expect(handler.supportsFim()).toBe(false)
 		})
 
-		it("completeFim handles errors correctly", async () => {
-			const handler = new KilocodeOpenrouterHandler({
-				...mockOptions,
-				kilocodeModel: "mistral/codestral-latest",
-			})
-
-			const mockResponse = {
-				ok: false,
-				status: 500,
-				statusText: "Internal Server Error",
-				text: vitest.fn().mockResolvedValue("Error details"),
-			}
-
-			global.fetch = vitest.fn().mockResolvedValue(mockResponse)
-
-			await expect(handler.completeFim("prefix", "suffix")).rejects.toThrow(
-				"FIM streaming failed: 500 Internal Server Error - Error details",
-			)
-		})
-
 		it("streamFim yields chunks correctly", async () => {
 			const handler = new KilocodeOpenrouterHandler({
 				...mockOptions,

diff --git a/src/api/providers/__tests__/mistral-fim.spec.ts b/src/api/providers/__tests__/mistral-fim.spec.ts
@@ -0,0 +1,180 @@
+// kilocode_change - new file
+// npx vitest run src/api/providers/__tests__/mistral-fim.spec.ts
+
+// Mock vscode first to avoid import errors
+vitest.mock("vscode", () => ({}))
+
+import { MistralHandler } from "../mistral"
+import { ApiHandlerOptions } from "../../../shared/api"
+import { streamSse } from "../../../services/continuedev/core/fetch/stream"
+
+// Mock the stream module
+vitest.mock("../../../services/continuedev/core/fetch/stream", () => ({
+	streamSse: vitest.fn(),
+}))
+
+// Mock delay
+vitest.mock("delay", () => ({ default: vitest.fn(() => Promise.resolve()) }))
+
+describe("MistralHandler FIM support", () => {
+	const mockOptions: ApiHandlerOptions = {
+		mistralApiKey: "test-api-key",
+		apiModelId: "codestral-latest",
+	}
+
+	beforeEach(() => vitest.clearAllMocks())
+
+	describe("supportsFim", () => {
+		it("returns true for codestral models", () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "codestral-latest",
+			})
+
+			expect(handler.supportsFim()).toBe(true)
+		})
+
+		it("returns true for codestral-2405", () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "codestral-2405",
+			})
+
+			expect(handler.supportsFim()).toBe(true)
+		})
+
+		it("returns false for non-codestral models", () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "mistral-large-latest",
+			})
+
+			expect(handler.supportsFim()).toBe(false)
+		})
+
+		it("returns true when no model is specified (defaults to codestral-latest)", () => {
+			const handler = new MistralHandler({
+				mistralApiKey: "test-api-key",
+			})
+
+			// Default model is codestral-latest, which supports FIM
+			expect(handler.supportsFim()).toBe(true)
+		})
+	})
+
+	describe("streamFim", () => {
+		it("yields chunks correctly", async () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "codestral-latest",
+			})
+
+			// Mock streamSse to return the expected data
+			;(streamSse as any).mockImplementation(async function* () {
+				yield { choices: [{ delta: { content: "chunk1" } }] }
+				yield { choices: [{ delta: { content: "chunk2" } }] }
+				yield { choices: [{ delta: { content: "chunk3" } }] }
+			})
+
+			const mockResponse = {
+				ok: true,
+				status: 200,
+				statusText: "OK",
+			} as Response
+
+			global.fetch = vitest.fn().mockResolvedValue(mockResponse)
+
+			const chunks: string[] = []
+
+			for await (const chunk of handler.streamFim("prefix", "suffix")) {
+				chunks.push(chunk)
+			}
+
+			expect(chunks).toEqual(["chunk1", "chunk2", "chunk3"])
+			expect(streamSse).toHaveBeenCalledWith(mockResponse)
+		})
+
+		it("handles errors correctly", async () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "codestral-latest",
+			})
+
+			const mockResponse = {
+				ok: false,
+				status: 400,
+				statusText: "Bad Request",
+				text: vitest.fn().mockResolvedValue("Invalid request"),
+			}
+
+			global.fetch = vitest.fn().mockResolvedValue(mockResponse)
+
+			const generator = handler.streamFim("prefix", "suffix")
+			await expect(generator.next()).rejects.toThrow("FIM streaming failed: 400 Bad Request - Invalid request")
+		})
+
+		it("uses correct endpoint for codestral models", async () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "codestral-latest",
+			})
+
+			;(streamSse as any).mockImplementation(async function* () {
+				yield { choices: [{ delta: { content: "test" } }] }
+			})
+
+			const mockResponse = {
+				ok: true,
+				status: 200,
+				statusText: "OK",
+			} as Response
+
+			global.fetch = vitest.fn().mockResolvedValue(mockResponse)
+
+			const generator = handler.streamFim("prefix", "suffix")
+			await generator.next()
+
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.objectContaining({
+					href: "https://codestral.mistral.ai/v1/fim/completions",
+				}),
+				expect.objectContaining({
+					method: "POST",
+					headers: expect.objectContaining({
+						Authorization: "Bearer test-api-key",
+					}),
+				}),
+			)
+		})
+
+		it("uses custom codestral URL when provided", async () => {
+			const handler = new MistralHandler({
+				...mockOptions,
+				apiModelId: "codestral-latest",
+				mistralCodestralUrl: "https://custom.codestral.url",
+			})
+
+			;(streamSse as any).mockImplementation(async function* () {
+				yield { choices: [{ delta: { content: "test" } }] }
+			})
+
+			const mockResponse = {
+				ok: true,
+				status: 200,
+				statusText: "OK",
+			} as Response
+
+			global.fetch = vitest.fn().mockResolvedValue(mockResponse)
+
+			const generator = handler.streamFim("prefix", "suffix")
+			await generator.next()
+
+			expect(global.fetch).toHaveBeenCalledWith(
+				expect.objectContaining({
+					href: "https://custom.codestral.url/v1/fim/completions",
+				}),
+				expect.any(Object),
+			)
+		})
+	})
+})
diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts
@@ -71,10 +71,6 @@ describe("OpenRouterHandler", () => {
 		openRouterModelId: "anthropic/claude-sonnet-4",
 	}
 
-	// kilocode_change start
-	const anthropicBetaHeaderValue = "fine-grained-tool-streaming-2025-05-14,structured-outputs-2025-11-13"
-	// kilocode_change end
-
 	beforeEach(() => vitest.clearAllMocks())
 
 	it("initializes with correct options", () => {
@@ -208,13 +204,7 @@ describe("OpenRouterHandler", () => {
 					top_p: undefined,
 					transforms: ["middle-out"],
 				}),
-				// kilocode_change start
-				expect.objectContaining({
-					headers: expect.objectContaining({
-						"x-anthropic-beta": anthropicBetaHeaderValue,
-					}),
-				}),
-				// kilocode_change end
+				{ headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } },
 			)
 		})
 
@@ -239,16 +229,9 @@ describe("OpenRouterHandler", () => {
 
 			await handler.createMessage("test", []).next()
 
-			expect(mockCreate).toHaveBeenCalledWith(
-				expect.objectContaining({ transforms: ["middle-out"] }),
-				// kilocode_change start
-				expect.objectContaining({
-					headers: expect.objectContaining({
-						"x-anthropic-beta": anthropicBetaHeaderValue,
-					}),
-				}),
-				// kilocode_change end
-			)
+			expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ transforms: ["middle-out"] }), {
+				headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" },
+			})
 		})
 
 		it("adds cache control for supported models", async () => {
@@ -290,13 +273,7 @@ describe("OpenRouterHandler", () => {
 						}),
 					]),
 				}),
-				// kilocode_change start
-				expect.objectContaining({
-					headers: expect.objectContaining({
-						"x-anthropic-beta": anthropicBetaHeaderValue,
-					}),
-				}),
-				// kilocode_change end
+				{ headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } },
 			)
 		})
 
@@ -537,13 +514,7 @@ describe("OpenRouterHandler", () => {
 					messages: [{ role: "user", content: "test prompt" }],
 					stream: false,
 				},
-				// kilocode_change start
-				expect.objectContaining({
-					headers: expect.objectContaining({
-						"x-anthropic-beta": anthropicBetaHeaderValue,
-					}),
-				}),
-				// kilocode_change end
+				{ headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } },
 			)
 		})
 

diff --git a/src/api/providers/kilocode-openrouter.ts b/src/api/providers/kilocode-openrouter.ts
@@ -148,14 +148,6 @@ export class KilocodeOpenrouterHandler extends OpenRouterHandler {
 		return modelId.includes("codestral")
 	}
 
-	async completeFim(prefix: string, suffix: string, taskId?: string): Promise<string> {
-		let result = ""
-		for await (const chunk of this.streamFim(prefix, suffix, taskId)) {
-			result += chunk
-		}
-		return result
-	}
-
 	async *streamFim(
 		prefix: string,
 		suffix: string,