Skip to content
4 changes: 2 additions & 2 deletions packages/agent/test/e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ describe("Agent E2E Tests", () => {
});
});

describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air)", () => {
const model = getModel("zai", "glm-4.5-air");
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Coding Plan Provider (glm-4.5-air)", () => {
const model = getModel("zai-coding-plan", "glm-4.5-air");

it("should handle basic text prompt", async () => {
await basicPrompt(model);
Expand Down
3 changes: 3 additions & 0 deletions packages/ai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## [Unreleased]

### Breaking Changes

- Renamed `zai` provider to `zai-coding-plan` to align with naming in models.dev and be explicit about which endpoint we use
## [0.50.1] - 2026-01-26

### Fixed
Expand Down
3 changes: 2 additions & 1 deletion packages/ai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
- **Groq**
- **Cerebras**
- **xAI**
- **zAI Coding Plan**
- **OpenRouter**
- **Vercel AI Gateway**
- **MiniMax**
Expand Down Expand Up @@ -892,7 +893,7 @@ In Node.js environments, you can set environment variables to avoid passing API
| xAI | `XAI_API_KEY` |
| OpenRouter | `OPENROUTER_API_KEY` |
| Vercel AI Gateway | `AI_GATEWAY_API_KEY` |
| zAI | `ZAI_API_KEY` |
| zAI Coding Plan | `ZAI_API_KEY` |
| MiniMax | `MINIMAX_API_KEY` |
| GitHub Copilot | `COPILOT_GITHUB_TOKEN` or `GH_TOKEN` or `GITHUB_TOKEN` |

Expand Down
44 changes: 22 additions & 22 deletions packages/ai/scripts/generate-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -417,33 +417,33 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
}
}

// Process zAi models
if (data.zai?.models) {
for (const [modelId, model] of Object.entries(data.zai.models)) {
// Process zAi Coding Plan models
if (data["zai-coding-plan"]?.models) {
for (const [modelId, model] of Object.entries(data["zai-coding-plan"].models)) {
const m = model as ModelsDevModel;
if (m.tool_call !== true) continue;
const supportsImage = m.modalities?.input?.includes("image")

models.push({
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "zai",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
reasoning: m.reasoning === true,
input: supportsImage ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
compat: {
supportsDeveloperRole: false,
thinkingFormat: "zai",
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
id: modelId,
name: m.name || modelId,
api: "openai-completions",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
reasoning: m.reasoning === true,
input: supportsImage ? ["text", "image"] : ["text"],
cost: {
input: m.cost?.input || 0,
output: m.cost?.output || 0,
cacheRead: m.cost?.cache_read || 0,
cacheWrite: m.cost?.cache_write || 0,
},
compat: {
supportsDeveloperRole: false,
thinkingFormat: "zai",
},
contextWindow: m.limit?.context || 4096,
maxTokens: m.limit?.output || 4096,
});
}
}
Expand Down
1 change: 1 addition & 0 deletions packages/ai/src/env-api-keys.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ export function getEnvApiKey(provider: any): string | undefined {
openrouter: "OPENROUTER_API_KEY",
"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
zai: "ZAI_API_KEY",
"zai-coding-plan": "ZAI_API_KEY",
mistral: "MISTRAL_API_KEY",
minimax: "MINIMAX_API_KEY",
"minimax-cn": "MINIMAX_CN_API_KEY",
Expand Down
68 changes: 43 additions & 25 deletions packages/ai/src/models.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4886,7 +4886,7 @@ export const MODELS = {
input: ["text"],
cost: {
input: 0.09,
output: 0.39999999999999997,
output: 0.44999999999999996,
cacheRead: 0,
cacheWrite: 0,
},
Expand Down Expand Up @@ -11171,20 +11171,20 @@ export const MODELS = {
maxTokens: 4096,
} satisfies Model<"openai-completions">,
},
"zai": {
"zai-coding-plan": {
"glm-4.5": {
id: "glm-4.5",
name: "GLM-4.5",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text"],
cost: {
input: 0.6,
output: 2.2,
cacheRead: 0.11,
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
Expand All @@ -11194,15 +11194,15 @@ export const MODELS = {
id: "glm-4.5-air",
name: "GLM-4.5-Air",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text"],
cost: {
input: 0.2,
output: 1.1,
cacheRead: 0.03,
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
Expand All @@ -11212,7 +11212,7 @@ export const MODELS = {
id: "glm-4.5-flash",
name: "GLM-4.5-Flash",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
Expand All @@ -11230,14 +11230,14 @@ export const MODELS = {
id: "glm-4.5v",
name: "GLM-4.5V",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.6,
output: 1.8,
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
Expand All @@ -11248,15 +11248,15 @@ export const MODELS = {
id: "glm-4.6",
name: "GLM-4.6",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text"],
cost: {
input: 0.6,
output: 2.2,
cacheRead: 0.11,
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 204800,
Expand All @@ -11266,14 +11266,14 @@ export const MODELS = {
id: "glm-4.6v",
name: "GLM-4.6V",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.3,
output: 0.9,
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
Expand All @@ -11284,19 +11284,37 @@ export const MODELS = {
id: "glm-4.7",
name: "GLM-4.7",
api: "openai-completions",
provider: "zai",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text"],
cost: {
input: 0.6,
output: 2.2,
cacheRead: 0.11,
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 204800,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"glm-4.7-flash": {
id: "glm-4.7-flash",
name: "GLM-4.7-Flash",
api: "openai-completions",
provider: "zai-coding-plan",
baseUrl: "https://api.z.ai/api/coding/paas/v4",
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
},
} as const;
36 changes: 29 additions & 7 deletions packages/ai/src/providers/openai-completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
const client = createClient(model, context, apiKey, options?.headers);
const params = buildParams(model, context, options);
options?.onPayload?.(params);
const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
const openaiStream = await client.chat.completions.create(params, {
signal: options?.signal,
});
stream.push({ type: "start", partial: output });

let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
Expand Down Expand Up @@ -183,7 +185,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
finishCurrentBlock(currentBlock);
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
stream.push({
type: "text_start",
contentIndex: blockIndex(),
partial: output,
});
}

if (currentBlock.type === "text") {
Expand Down Expand Up @@ -225,7 +231,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
thinkingSignature: foundReasoningField,
};
output.content.push(currentBlock);
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
stream.push({
type: "thinking_start",
contentIndex: blockIndex(),
partial: output,
});
}

if (currentBlock.type === "thinking") {
Expand Down Expand Up @@ -256,7 +266,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions", OpenA
partialArgs: "",
};
output.content.push(currentBlock);
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
stream.push({
type: "toolcall_start",
contentIndex: blockIndex(),
partial: output,
});
}

if (currentBlock.type === "toolCall") {
Expand Down Expand Up @@ -439,7 +453,9 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
if (compat.thinkingFormat === "zai" && model.reasoning) {
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
// Must explicitly disable since z.ai defaults to thinking enabled
(params as any).thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
(params as any).thinking = {
type: options?.reasoningEffort ? "enabled" : "disabled",
};
} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
// OpenAI-style reasoning_effort
params.reasoning_effort = options.reasoningEffort;
Expand Down Expand Up @@ -508,7 +524,10 @@ export function convertMessages(
if (context.systemPrompt) {
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
const role = useDeveloperRole ? "developer" : "system";
params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
params.push({
role: role,
content: sanitizeSurrogates(context.systemPrompt),
});
}

let lastRole: string | null = null;
Expand Down Expand Up @@ -585,7 +604,10 @@ export function convertMessages(
if (compat.requiresThinkingAsText) {
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
const textContent = assistantMsg.content as Array<{
type: "text";
text: string;
}> | null;
if (textContent) {
textContent.unshift({ type: "text", text: thinkingText });
} else {
Expand Down
2 changes: 1 addition & 1 deletion packages/ai/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export type KnownProvider =
| "cerebras"
| "openrouter"
| "vercel-ai-gateway"
| "zai"
| "zai-coding-plan"
| "mistral"
| "minimax"
| "minimax-cn"
Expand Down
2 changes: 1 addition & 1 deletion packages/ai/test/context-overflow.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@

describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
it("glm-4.5-flash - should detect overflow via isContextOverflow (silent overflow or rate limit)", async () => {
const model = getModel("zai", "glm-4.5-flash");
const model = getModel("zai-coding-plan", "glm-4.5-flash");
const result = await testContextOverflow(model, process.env.ZAI_API_KEY!);
logResult(result);

Expand Down Expand Up @@ -450,7 +450,7 @@
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
// Anthropic backend
it("anthropic/claude-sonnet-4 via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getModel("openrouter", "anthropic/claude-sonnet-4");

Check failure on line 453 in packages/ai/test/context-overflow.test.ts

View workflow job for this annotation

GitHub Actions / build-check-test

Argument of type '"anthropic/claude-sonnet-4"' is not assignable to parameter of type '"openrouter/auto"'.
const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
logResult(result);

Expand All @@ -461,7 +461,7 @@

// DeepSeek backend
it("deepseek/deepseek-v3.2 via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getModel("openrouter", "deepseek/deepseek-v3.2");

Check failure on line 464 in packages/ai/test/context-overflow.test.ts

View workflow job for this annotation

GitHub Actions / build-check-test

Argument of type '"deepseek/deepseek-v3.2"' is not assignable to parameter of type '"openrouter/auto"'.
const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
logResult(result);

Expand All @@ -472,7 +472,7 @@

// Mistral backend
it("mistralai/mistral-large-2512 via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getModel("openrouter", "mistralai/mistral-large-2512");

Check failure on line 475 in packages/ai/test/context-overflow.test.ts

View workflow job for this annotation

GitHub Actions / build-check-test

Argument of type '"mistralai/mistral-large-2512"' is not assignable to parameter of type '"openrouter/auto"'.
const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
logResult(result);

Expand All @@ -483,7 +483,7 @@

// Google backend
it("google/gemini-2.5-flash via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getModel("openrouter", "google/gemini-2.5-flash");

Check failure on line 486 in packages/ai/test/context-overflow.test.ts

View workflow job for this annotation

GitHub Actions / build-check-test

Argument of type '"google/gemini-2.5-flash"' is not assignable to parameter of type '"openrouter/auto"'.
const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
logResult(result);

Expand All @@ -494,7 +494,7 @@

// Meta/Llama backend
it("meta-llama/llama-4-maverick via OpenRouter - should detect overflow via isContextOverflow", async () => {
const model = getModel("openrouter", "meta-llama/llama-4-maverick");

Check failure on line 497 in packages/ai/test/context-overflow.test.ts

View workflow job for this annotation

GitHub Actions / build-check-test

Argument of type '"meta-llama/llama-4-maverick"' is not assignable to parameter of type '"openrouter/auto"'.
const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
logResult(result);

Expand Down
2 changes: 1 addition & 1 deletion packages/ai/test/empty.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ describe("AI Providers Empty Message Tests", () => {
});

describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => {
const llm = getModel("zai", "glm-4.5-air");
const llm = getModel("zai-coding-plan", "glm-4.5-air");

it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
Expand Down
Loading
Loading