Add zai-coding-plan provider and make zai point to regular API #899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

kaofelix wants to merge 10 commits into badlogic:main from kaofelix:zai-coding-plan-provider

packages/agent/test/e2e.test.ts

-Original file line number
+Diff line change
@@ Expand Up / @@ -302,8 +302,8 @@ describe("Agent E2E Tests", () => { @@
     		});
     	});
-    	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air)", () => {
-    		const model = getModel("zai", "glm-4.5-air");
+    	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Coding Plan Provider (glm-4.5-air)", () => {
+    		const model = getModel("zai-coding-plan", "glm-4.5-air");
     		it("should handle basic text prompt", async () => {
     			await basicPrompt(model);
@@ Expand Down @@

packages/ai/CHANGELOG.md

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,9 @@ @@
     ## [Unreleased]
+    ### Breaking Changes
+    - Renamed `zai` provider to `zai-coding-plan` to align with naming in models.dev and be explicit about which endpoint we use
     ## [0.50.1] - 2026-01-26
     ### Fixed
@@ Expand Down @@

packages/ai/README.md

-Original file line number
+Diff line change
@@ Expand Up @@
     - **Groq**
     - **Cerebras**
     - **xAI**
+    - **zAI Coding Plan**
     - **OpenRouter**
     - **Vercel AI Gateway**
     - **MiniMax**
@@ Expand Down Expand Up @@
     | xAI | `XAI_API_KEY` |
     | OpenRouter | `OPENROUTER_API_KEY` |
     | Vercel AI Gateway | `AI_GATEWAY_API_KEY` |
-    | zAI | `ZAI_API_KEY` |
+    | zAI Coding Plan | `ZAI_API_KEY` |
     | MiniMax | `MINIMAX_API_KEY` |
     | GitHub Copilot | `COPILOT_GITHUB_TOKEN` or `GH_TOKEN` or `GITHUB_TOKEN` |
@@ Expand Down @@

packages/ai/scripts/generate-models.ts

-Original file line number
+Diff line change
@@ Expand Up @@
     			}
     		}
-    		// Process zAi models
-    		if (data.zai?.models) {
-    			for (const [modelId, model] of Object.entries(data.zai.models)) {
+    		// Process zAi Coding Plan models
+    		if (data["zai-coding-plan"]?.models) {
+    			for (const [modelId, model] of Object.entries(data["zai-coding-plan"].models)) {
     				const m = model as ModelsDevModel;
     				if (m.tool_call !== true) continue;
     				const supportsImage = m.modalities?.input?.includes("image")
     				models.push({
-    				id: modelId,
-    				name: m.name || modelId,
-    				api: "openai-completions",
-    				provider: "zai",
-    				baseUrl: "https://api.z.ai/api/coding/paas/v4",
-    				reasoning: m.reasoning === true,
-    				input: supportsImage ? ["text", "image"] : ["text"],
-    				cost: {
-    					input: m.cost?.input || 0,
-    					output: m.cost?.output || 0,
-    					cacheRead: m.cost?.cache_read || 0,
-    					cacheWrite: m.cost?.cache_write || 0,
-    				},
-    				compat: {
-    					supportsDeveloperRole: false,
-    					thinkingFormat: "zai",
-    				},
-    				contextWindow: m.limit?.context || 4096,
-    				maxTokens: m.limit?.output || 4096,
+    					id: modelId,
+    					name: m.name || modelId,
+    					api: "openai-completions",
+    					provider: "zai-coding-plan",
+    					baseUrl: "https://api.z.ai/api/coding/paas/v4",
+    					reasoning: m.reasoning === true,
+    					input: supportsImage ? ["text", "image"] : ["text"],
+    					cost: {
+    						input: m.cost?.input || 0,
+    						output: m.cost?.output || 0,
+    						cacheRead: m.cost?.cache_read || 0,
+    						cacheWrite: m.cost?.cache_write || 0,
+    					},
+    					compat: {
+    						supportsDeveloperRole: false,
+    						thinkingFormat: "zai",
+    					},
+    					contextWindow: m.limit?.context || 4096,
+    					maxTokens: m.limit?.output || 4096,
     				});
     			}
     		}
@@ Expand Down @@

packages/ai/src/env-api-keys.ts

-Original file line number
+Diff line change
@@ Expand Up @@
     		openrouter: "OPENROUTER_API_KEY",
     		"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
     		zai: "ZAI_API_KEY",
+    		"zai-coding-plan": "ZAI_API_KEY",
     		mistral: "MISTRAL_API_KEY",
     		minimax: "MINIMAX_API_KEY",
     		"minimax-cn": "MINIMAX_CN_API_KEY",
@@ Expand Down @@

packages/ai/src/models.generated.ts

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -4886,7 +4886,7 @@ export const MODELS = {
  
    			input: ["text"],

    			cost: {

    				input: 0.09,

    				output: 0.39999999999999997,

    				output: 0.44999999999999996,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    @@ -11171,20 +11171,20 @@ export const MODELS = {
  
    			maxTokens: 4096,

    		} satisfies Model<"openai-completions">,

    	},

    	"zai": {

    	"zai-coding-plan": {

    		"glm-4.5": {

    			id: "glm-4.5",

    			name: "GLM-4.5",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text"],

    			cost: {

    				input: 0.6,

    				output: 2.2,

    				cacheRead: 0.11,

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    			contextWindow: 131072,

    @@ -11194,15 +11194,15 @@ export const MODELS = {
  
    			id: "glm-4.5-air",

    			name: "GLM-4.5-Air",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text"],

    			cost: {

    				input: 0.2,

    				output: 1.1,

    				cacheRead: 0.03,

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    			contextWindow: 131072,

    @@ -11212,7 +11212,7 @@ export const MODELS = {
  
    			id: "glm-4.5-flash",

    			name: "GLM-4.5-Flash",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    @@ -11230,14 +11230,14 @@ export const MODELS = {
  
    			id: "glm-4.5v",

    			name: "GLM-4.5V",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text", "image"],

    			cost: {

    				input: 0.6,

    				output: 1.8,

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    @@ -11248,15 +11248,15 @@ export const MODELS = {
  
    			id: "glm-4.6",

    			name: "GLM-4.6",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text"],

    			cost: {

    				input: 0.6,

    				output: 2.2,

    				cacheRead: 0.11,

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    			contextWindow: 204800,

    @@ -11266,14 +11266,14 @@ export const MODELS = {
  
    			id: "glm-4.6v",

    			name: "GLM-4.6V",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text", "image"],

    			cost: {

    				input: 0.3,

    				output: 0.9,

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    @@ -11284,19 +11284,37 @@ export const MODELS = {
  
    			id: "glm-4.7",

    			name: "GLM-4.7",

    			api: "openai-completions",

    			provider: "zai",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text"],

    			cost: {

    				input: 0.6,

    				output: 2.2,

    				cacheRead: 0.11,

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    			contextWindow: 204800,

    			maxTokens: 131072,

    		} satisfies Model<"openai-completions">,

    		"glm-4.7-flash": {

    			id: "glm-4.7-flash",

    			name: "GLM-4.7-Flash",

    			api: "openai-completions",

    			provider: "zai-coding-plan",

    			baseUrl: "https://api.z.ai/api/coding/paas/v4",

    			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},

    			reasoning: true,

    			input: ["text"],

    			cost: {

    				input: 0,

    				output: 0,

    				cacheRead: 0,

    				cacheWrite: 0,

    			},

    			contextWindow: 200000,

    			maxTokens: 131072,

    		} satisfies Model<"openai-completions">,

    	},

    } as const;

packages/ai/src/providers/openai-completions.ts

-Original file line number
+Diff line change
@@ Expand Up @@
     			const client = createClient(model, context, apiKey, options?.headers);
     			const params = buildParams(model, context, options);
     			options?.onPayload?.(params);
-    			const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
+    			const openaiStream = await client.chat.completions.create(params, {
+    				signal: options?.signal,
+    			});
     			stream.push({ type: "start", partial: output });
     			let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
@@ Expand Down Expand Up @@
     							finishCurrentBlock(currentBlock);
     							currentBlock = { type: "text", text: "" };
     							output.content.push(currentBlock);
-    							stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+    							stream.push({
+    								type: "text_start",
+    								contentIndex: blockIndex(),
+    								partial: output,
+    							});
     						}
     						if (currentBlock.type === "text") {
@@ Expand Down Expand Up @@
     								thinkingSignature: foundReasoningField,
     							};
     							output.content.push(currentBlock);
-    							stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+    							stream.push({
+    								type: "thinking_start",
+    								contentIndex: blockIndex(),
+    								partial: output,
+    							});
     						}
     						if (currentBlock.type === "thinking") {
@@ Expand Down Expand Up @@
     									partialArgs: "",
     								};
     								output.content.push(currentBlock);
-    								stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
+    								stream.push({
+    									type: "toolcall_start",
+    									contentIndex: blockIndex(),
+    									partial: output,
+    								});
     							}
     							if (currentBlock.type === "toolCall") {
@@ Expand Down Expand Up @@
     	if (compat.thinkingFormat === "zai" && model.reasoning) {
     		// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
     		// Must explicitly disable since z.ai defaults to thinking enabled
-    		(params as any).thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
+    		(params as any).thinking = {
+    			type: options?.reasoningEffort ? "enabled" : "disabled",
+    		};
     	} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
     		// OpenAI-style reasoning_effort
     		params.reasoning_effort = options.reasoningEffort;
@@ Expand Down Expand Up / @@ -508,7 +524,10 @@ export function convertMessages( @@
     	if (context.systemPrompt) {
     		const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
     		const role = useDeveloperRole ? "developer" : "system";
-    		params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
+    		params.push({
+    			role: role,
+    			content: sanitizeSurrogates(context.systemPrompt),
+    		});
     	}
     	let lastRole: string | null = null;
@@ Expand Down Expand Up / @@ -585,7 +604,10 @@ export function convertMessages( @@
     				if (compat.requiresThinkingAsText) {
     					// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
     					const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
-    					const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
+    					const textContent = assistantMsg.content as Array<{
+    						type: "text";
+    						text: string;
+    					}> | null;
     					if (textContent) {
     						textContent.unshift({ type: "text", text: thinkingText });
     					} else {
@@ Expand Down @@

packages/ai/src/types.ts

-Original file line number
+Diff line change
@@ Expand Up / @@ -31,7 +31,7 @@ export type KnownProvider = @@
     	| "cerebras"
     	| "openrouter"
     	| "vercel-ai-gateway"
-    	| "zai"
+    	| "zai-coding-plan"
     	| "mistral"
     	| "minimax"
     	| "minimax-cn"
@@ Expand Down @@

packages/ai/test/context-overflow.test.ts

-Original file line number
+Diff line change
@@ Expand Up / @@ -374,7 +374,7 @@ @@
     	describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
     		it("glm-4.5-flash - should detect overflow via isContextOverflow (silent overflow or rate limit)", async () => {
-    			const model = getModel("zai", "glm-4.5-flash");
+    			const model = getModel("zai-coding-plan", "glm-4.5-flash");
     			const result = await testContextOverflow(model, process.env.ZAI_API_KEY!);
     			logResult(result);
@@ Expand Down Expand Up / @@ -450,7 +450,7 @@ @@
     	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
     		// Anthropic backend
     		it("anthropic/claude-sonnet-4 via OpenRouter - should detect overflow via isContextOverflow", async () => {
     			const model = getModel("openrouter", "anthropic/claude-sonnet-4");
     			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
     			logResult(result);
@@ Expand All / @@ -461,7 +461,7 @@ @@
     		// DeepSeek backend
     		it("deepseek/deepseek-v3.2 via OpenRouter - should detect overflow via isContextOverflow", async () => {
     			const model = getModel("openrouter", "deepseek/deepseek-v3.2");
     			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
     			logResult(result);
@@ Expand All / @@ -472,7 +472,7 @@ @@
     		// Mistral backend
     		it("mistralai/mistral-large-2512 via OpenRouter - should detect overflow via isContextOverflow", async () => {
     			const model = getModel("openrouter", "mistralai/mistral-large-2512");
     			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
     			logResult(result);
@@ Expand All / @@ -483,7 +483,7 @@ @@
     		// Google backend
     		it("google/gemini-2.5-flash via OpenRouter - should detect overflow via isContextOverflow", async () => {
     			const model = getModel("openrouter", "google/gemini-2.5-flash");
     			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
     			logResult(result);
@@ Expand All / @@ -494,7 +494,7 @@ @@
     		// Meta/Llama backend
     		it("meta-llama/llama-4-maverick via OpenRouter - should detect overflow via isContextOverflow", async () => {
     			const model = getModel("openrouter", "meta-llama/llama-4-maverick");
     			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
     			logResult(result);
@@ Expand Down @@

packages/ai/test/empty.test.ts

-Original file line number
+Diff line change
@@ Expand Up / @@ -309,7 +309,7 @@ describe("AI Providers Empty Message Tests", () => { @@
     	});
     	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => {
-    		const llm = getModel("zai", "glm-4.5-air");
+    		const llm = getModel("zai-coding-plan", "glm-4.5-air");
     		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
     			await testEmptyMessage(llm);
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add zai-coding-plan provider and make zai point to regular API #899

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

GitHub Actions / build-check-test

GitHub Actions / build-check-test

GitHub Actions / build-check-test

GitHub Actions / build-check-test

GitHub Actions / build-check-test

Uh oh!

Uh oh!

Add zai-coding-plan provider and make zai point to regular API #899

Are you sure you want to change the base?

Uh oh!

Add zai-coding-plan provider and make zai point to regular API #899

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

GitHub Actions / build-check-test

GitHub Actions / build-check-test

GitHub Actions / build-check-test

GitHub Actions / build-check-test

GitHub Actions / build-check-test

Uh oh!

Uh oh!