diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 1c400d52f..28cf73e50 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -326,16 +326,13 @@ describe('convertToFunctionResponse', () => { { text: 'Another text part' }, ]; const result = convertToFunctionResponse(toolName, callId, llmContent); - expect(result).toEqual([ - { - functionResponse: { - name: toolName, - id: callId, - response: { output: 'Tool execution succeeded.' }, - }, + expect(result).toEqual({ + functionResponse: { + name: toolName, + id: callId, + response: { output: 'Some textual descriptionAnother text part' }, }, - ...llmContent, - ]); + }); }); it('should handle llmContent as an array with a single inlineData Part', () => { @@ -384,15 +381,13 @@ describe('convertToFunctionResponse', () => { it('should handle llmContent as an empty array', () => { const llmContent: PartListUnion = []; const result = convertToFunctionResponse(toolName, callId, llmContent); - expect(result).toEqual([ - { - functionResponse: { - name: toolName, - id: callId, - response: { output: 'Tool execution succeeded.' }, - }, + expect(result).toEqual({ + functionResponse: { + name: toolName, + id: callId, + response: { output: '' }, }, - ]); + }); }); it('should handle llmContent as a Part with undefined inlineData/fileData/text', () => { diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 5a2bb85da..8962a32db 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -24,6 +24,7 @@ import { } from '../index.js'; import { Part, PartListUnion } from '@google/genai'; import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js'; +import { partToString } from '../utils/partUtils.js'; import { isModifiableDeclarativeTool, ModifyContext, @@ -161,12 +162,11 @@ export function convertToFunctionResponse( } if (Array.isArray(contentToProcess)) { - const functionResponse = createFunctionResponsePart( + return createFunctionResponsePart( callId, toolName, - 'Tool execution succeeded.', + partToString(contentToProcess) ); - return [functionResponse, ...contentToProcess]; } // After this point, contentToProcess is a single Part object. diff --git a/packages/core/src/core/tokenLimits.test.ts b/packages/core/src/core/tokenLimits.test.ts new file mode 100644 index 000000000..e96384369 --- /dev/null +++ b/packages/core/src/core/tokenLimits.test.ts @@ -0,0 +1,223 @@ +import { describe, it, expect } from 'vitest'; +import { normalize, tokenLimit, DEFAULT_TOKEN_LIMIT } from './tokenLimits.js'; + +describe('normalize', () => { + it('should lowercase and trim the model string', () => { + expect(normalize(' GEMINI-1.5-PRO ')).toBe('gemini-1.5-pro'); + }); + + it('should strip provider prefixes', () => { + expect(normalize('google/gemini-1.5-pro')).toBe('gemini-1.5-pro'); + expect(normalize('anthropic/claude-3.5-sonnet')).toBe('claude-3.5-sonnet'); + }); + + it('should handle pipe and colon separators', () => { + expect(normalize('qwen|qwen2.5:qwen2.5-1m')).toBe('qwen2.5-1m'); + }); + + it('should collapse whitespace to a single hyphen', () => { + expect(normalize('claude 3.5 sonnet')).toBe('claude-3.5-sonnet'); + }); + + it('should remove date and version suffixes', () => { + expect(normalize('gemini-1.5-pro-20250219')).toBe('gemini-1.5-pro'); + expect(normalize('gpt-4o-mini-v1')).toBe('gpt-4o-mini'); + expect(normalize('claude-3.7-sonnet-20240715')).toBe('claude-3.7-sonnet'); + expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1'); + expect(normalize('gemini-2.0-flash-preview-20250520')).toBe('gemini-2.0-flash'); + }); + + it('should remove quantization and numeric suffixes', () => { + expect(normalize('qwen3-coder-7b-4bit')).toBe('qwen3-coder-7b'); + expect(normalize('llama-4-scout-int8')).toBe('llama-4-scout'); + expect(normalize('mistral-large-2-bf16')).toBe('mistral-large-2'); + expect(normalize('deepseek-v3.1-q4')).toBe('deepseek-v3.1'); + expect(normalize('qwen2.5-quantized')).toBe('qwen2.5'); + }); + + it('should handle a combination of normalization rules', () => { + expect(normalize(' Google/GEMINI-2.5-PRO:gemini-2.5-pro-20250605 ')).toBe('gemini-2.5-pro'); + }); + + it('should handle empty or null input', () => { + expect(normalize('')).toBe(''); + expect(normalize(undefined as unknown as string)).toBe(''); + expect(normalize(null as unknown as string)).toBe(''); + }); + + it('should remove preview suffixes', () => { + expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash'); + }); + + it('should remove version numbers with dots when they are at the end', () => { + expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1'); + expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1'); + }); +}); + +describe('tokenLimit', () => { + // Test cases for each model family + describe('Google Gemini', () => { + it('should return the correct limit for Gemini 1.5 Pro', () => { + expect(tokenLimit('gemini-1.5-pro')).toBe(2097152); + }); + it('should return the correct limit for Gemini 1.5 Flash', () => { + expect(tokenLimit('gemini-1.5-flash')).toBe(1048576); + }); + it('should return the correct limit for Gemini 2.5 Pro', () => { + expect(tokenLimit('gemini-2.5-pro')).toBe(1048576); + }); + it('should return the correct limit for Gemini 2.5 Flash', () => { + expect(tokenLimit('gemini-2.5-flash')).toBe(1048576); + }); + it('should return the correct limit for Gemini 2.0 Flash with image generation', () => { + expect(tokenLimit('gemini-2.0-flash-image-generation')).toBe(32768); + }); + it('should return the correct limit for Gemini 2.0 Flash', () => { + expect(tokenLimit('gemini-2.0-flash')).toBe(1048576); + }); + }); + + describe('OpenAI', () => { + it('should return the correct limit for o3-mini', () => { + expect(tokenLimit('o3-mini')).toBe(200000); + }); + it('should return the correct limit for o3 models', () => { + expect(tokenLimit('o3')).toBe(200000); + }); + it('should return the correct limit for o4-mini', () => { + expect(tokenLimit('o4-mini')).toBe(200000); + }); + it('should return the correct limit for gpt-4o-mini', () => { + expect(tokenLimit('gpt-4o-mini')).toBe(131072); + }); + it('should return the correct limit for gpt-4o', () => { + expect(tokenLimit('gpt-4o')).toBe(131072); + }); + it('should return the correct limit for gpt-4.1-mini', () => { + expect(tokenLimit('gpt-4.1-mini')).toBe(1048576); + }); + it('should return the correct limit for gpt-4.1 models', () => { + expect(tokenLimit('gpt-4.1')).toBe(1048576); + }); + it('should return the correct limit for gpt-4', () => { + expect(tokenLimit('gpt-4')).toBe(131072); + }); + }); + + describe('Anthropic Claude', () => { + it('should return the correct limit for Claude 3.5 Sonnet', () => { + expect(tokenLimit('claude-3.5-sonnet')).toBe(200000); + }); + it('should return the correct limit for Claude 3.7 Sonnet', () => { + expect(tokenLimit('claude-3.7-sonnet')).toBe(1048576); + }); + it('should return the correct limit for Claude Sonnet 4', () => { + expect(tokenLimit('claude-sonnet-4')).toBe(1048576); + }); + it('should return the correct limit for Claude Opus 4', () => { + expect(tokenLimit('claude-opus-4')).toBe(1048576); + }); + }); + + describe('Alibaba Qwen', () => { + it('should return the correct limit for qwen3-coder commercial models', () => { + expect(tokenLimit('qwen3-coder-plus')).toBe(1048576); + expect(tokenLimit('qwen3-coder-plus-20250601')).toBe(1048576); + expect(tokenLimit('qwen3-coder-flash')).toBe(1048576); + expect(tokenLimit('qwen3-coder-flash-20250601')).toBe(1048576); + }); + + it('should return the correct limit for qwen3-coder open source models', () => { + expect(tokenLimit('qwen3-coder-7b')).toBe(262144); + expect(tokenLimit('qwen3-coder-480b-a35b-instruct')).toBe(262144); + expect(tokenLimit('qwen3-coder-30b-a3b-instruct')).toBe(262144); + }); + + it('should return the correct limit for qwen3 2507 variants', () => { + expect(tokenLimit('qwen3-some-model-2507-instruct')).toBe(262144); + }); + + it('should return the correct limit for qwen2.5-1m', () => { + expect(tokenLimit('qwen2.5-1m')).toBe(1048576); + expect(tokenLimit('qwen2.5-1m-instruct')).toBe(1048576); + }); + + it('should return the correct limit for qwen2.5', () => { + expect(tokenLimit('qwen2.5')).toBe(131072); + expect(tokenLimit('qwen2.5-instruct')).toBe(131072); + }); + + it('should return the correct limit for qwen-plus', () => { + expect(tokenLimit('qwen-plus-latest')).toBe(1048576); + expect(tokenLimit('qwen-plus')).toBe(131072); + }); + + it('should return the correct limit for qwen-flash', () => { + expect(tokenLimit('qwen-flash-latest')).toBe(1048576); + }); + + it('should return the correct limit for qwen-turbo', () => { + expect(tokenLimit('qwen-turbo')).toBe(131072); + expect(tokenLimit('qwen-turbo-latest')).toBe(131072); + }); + }); + + describe('ByteDance Seed-OSS', () => { + it('should return the correct limit for seed-oss', () => { + expect(tokenLimit('seed-oss')).toBe(524288); + }); + }); + + describe('Zhipu GLM', () => { + it('should return the correct limit for glm-4.5v', () => { + expect(tokenLimit('glm-4.5v')).toBe(65536); + }); + it('should return the correct limit for glm-4.5-air', () => { + expect(tokenLimit('glm-4.5-air')).toBe(131072); + }); + it('should return the correct limit for glm-4.5', () => { + expect(tokenLimit('glm-4.5')).toBe(131072); + }); + }); + + describe('Other models', () => { + it('should return the correct limit for deepseek-r1', () => { + expect(tokenLimit('deepseek-r1')).toBe(131072); + }); + it('should return the correct limit for deepseek-v3', () => { + expect(tokenLimit('deepseek-v3')).toBe(131072); + }); + it('should return the correct limit for deepseek-v3.1', () => { + expect(tokenLimit('deepseek-v3.1')).toBe(131072); + }); + it('should return the correct limit for kimi-k2-instruct', () => { + expect(tokenLimit('kimi-k2-instruct')).toBe(131072); + }); + it('should return the correct limit for gpt-oss', () => { + expect(tokenLimit('gpt-oss')).toBe(131072); + }); + it('should return the correct limit for llama-4-scout', () => { + expect(tokenLimit('llama-4-scout')).toBe(10485760); + }); + it('should return the correct limit for mistral-large-2', () => { + expect(tokenLimit('mistral-large-2')).toBe(131072); + }); + }); + + // Test for default limit + it('should return the default token limit for an unknown model', () => { + expect(tokenLimit('unknown-model-v1.0')).toBe(DEFAULT_TOKEN_LIMIT); + }); + + // Test with complex model string + it('should return the correct limit for a complex model string', () => { + expect(tokenLimit(' a/b/c|GPT-4o:gpt-4o-2024-05-13-q4 ')).toBe(131072); + }); + + // Test case-insensitive matching + it('should handle case-insensitive model names', () => { + expect(tokenLimit('GPT-4O')).toBe(131072); + expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000); + }); +}); diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index d238cdb3a..3b0953c2b 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -1,32 +1,151 @@ +type Model = string; +type TokenCount = number; + +export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two) + /** - * @license - * Copyright 2025 Google LLC - * SPDX-License-Identifier: Apache-2.0 + * Accurate numeric limits: + * - power-of-two approximations (128K -> 131072, 256K -> 262144, etc.) + * - vendor-declared exact values (e.g., 200k -> 200000) are used as stated in docs. */ +const LIMITS = { + "32k": 32_768, + "64k": 65_536, + "128k": 131_072, + "200k": 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k) + "256k": 262_144, + "512k": 524_288, + "1m": 1_048_576, + "2m": 2_097_152, + "10m": 10_485_760, // 10 million tokens +} as const; -type Model = string; -type TokenCount = number; +/** Robust normalizer: strips provider prefixes, pipes/colons, date/version suffixes, etc. */ +export function normalize(model: string): string { + let s = (model ?? "").toLowerCase().trim(); + + // keep final path segment (strip provider prefixes), handle pipe/colon + s = s.replace(/^.*\//, ""); + s = s.split("|").pop() ?? s; + s = s.split(":").pop() ?? s; + + // collapse whitespace to single hyphen + s = s.replace(/\s+/g, "-"); + + // remove trailing build / date / revision suffixes: + // - dates (e.g., -20250219), -v1, version numbers, 'latest', 'preview' etc. + s = s.replace(/-preview/g, ""); + // Special handling for Qwen model names that include "-latest" as part of the model name + if (!s.match(/^qwen-(?:plus|flash)-latest$/)) { + // \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates) + // \d+x\d+b - Match patterns like 4x8b, -7b, -70b + // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3 + // -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash), + // like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test; + // Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context. + // latest - Match the literal string "latest" + s = s.replace(/-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g, ""); + } + + // remove quantization / numeric / precision suffixes common in local/community models + s = s.replace(/-(?:\d?bit|int[48]|bf16|fp16|q[45]|quantized)$/g, ""); + + return s; +} -export const DEFAULT_TOKEN_LIMIT = 1_048_576; +/** Ordered regex patterns: most specific -> most general (first match wins). */ +const PATTERNS: Array<[RegExp, TokenCount]> = [ + // ------------------- + // Google Gemini + // ------------------- + [/^gemini-1\.5-pro$/, LIMITS["2m"]], + [/^gemini-1\.5-flash$/, LIMITS["1m"]], + [/^gemini-2\.5-pro.*$/, LIMITS["1m"]], + [/^gemini-2\.5-flash.*$/, LIMITS["1m"]], + [/^gemini-2\.0-flash-image-generation$/, LIMITS["32k"]], + [/^gemini-2\.0-flash.*$/, LIMITS["1m"]], + // ------------------- + // OpenAI (o3 / o4-mini / gpt-4.1 / gpt-4o family) + // o3 and o4-mini document a 200,000-token context window (decimal). + // Note: GPT-4.1 models typically report 1_048_576 (1M) context in OpenAI announcements. + [/^o3(?:-mini|$).*$/, LIMITS["200k"]], + [/^o3.*$/, LIMITS["200k"]], + [/^o4-mini.*$/, LIMITS["200k"]], + [/^gpt-4\.1-mini.*$/, LIMITS["1m"]], + [/^gpt-4\.1.*$/, LIMITS["1m"]], + [/^gpt-4o-mini.*$/, LIMITS["128k"]], + [/^gpt-4o.*$/, LIMITS["128k"]], + [/^gpt-4.*$/, LIMITS["128k"]], + + // ------------------- + // Anthropic Claude + // - Claude Sonnet / Sonnet 3.5 and related Sonnet variants: 200,000 tokens documented. + // - Some Sonnet/Opus models offer 1M in beta/enterprise tiers (handled separately if needed). + [/^claude-3\.5-sonnet.*$/, LIMITS["200k"]], + [/^claude-3\.7-sonnet.*$/, LIMITS["1m"]], // some Sonnet 3.7/Opus variants advertise 1M beta in docs + [/^claude-sonnet-4.*$/, LIMITS["1m"]], + [/^claude-opus-4.*$/, LIMITS["1m"]], + + // ------------------- + // Alibaba / Qwen + // ------------------- + // Commercial Qwen3-Coder-Plus: 1M token context + [/^qwen3-coder-plus(-.*)?$/, LIMITS["1m"]], // catches "qwen3-coder-plus" and date variants + + // Commercial Qwen3-Coder-Flash: 1M token context + [/^qwen3-coder-flash(-.*)?$/, LIMITS["1m"]], // catches "qwen3-coder-flash" and date variants + + // Open-source Qwen3-Coder variants: 256K native + [/^qwen3-coder-.*$/, LIMITS["256k"]], + // Open-source Qwen3 2507 variants: 256K native + [/^qwen3-.*-2507-.*$/, LIMITS["256k"]], + + // Open-source long-context Qwen2.5-1M + [/^qwen2\.5-1m.*$/, LIMITS["1m"]], + + // Standard Qwen2.5: 128K + [/^qwen2\.5.*$/, LIMITS["128k"]], + + // Studio commercial Qwen-Plus / Qwen-Flash / Qwen-Turbo + [/^qwen-plus-latest$/, LIMITS["1m"]], // Commercial latest: 1M + [/^qwen-plus.*$/, LIMITS["128k"]], // Standard: 128K + [/^qwen-flash-latest$/, LIMITS["1m"]], + [/^qwen-turbo.*$/, LIMITS["128k"]], + + // ------------------- + // ByteDance Seed-OSS (512K) + // ------------------- + [/^seed-oss.*$/, LIMITS["512k"]], + + // ------------------- + // Zhipu GLM + // ------------------- + [/^glm-4\.5v.*$/, LIMITS["64k"]], + [/^glm-4\.5-air.*$/, LIMITS["128k"]], + [/^glm-4\.5.*$/, LIMITS["128k"]], + + // ------------------- + // DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples + // ------------------- + [/^deepseek-r1.*$/, LIMITS["128k"]], + [/^deepseek-v3(?:\.1)?.*$/, LIMITS["128k"]], + [/^kimi-k2-instruct.*$/, LIMITS["128k"]], + [/^gpt-oss.*$/, LIMITS["128k"]], + [/^llama-4-scout.*$/, LIMITS["10m"] as unknown as TokenCount], // ultra-long variants - handle carefully + [/^mistral-large-2.*$/, LIMITS["128k"]], +]; + +/** Return the token limit for a model string (uses normalize + ordered regex list). */ export function tokenLimit(model: Model): TokenCount { - // Add other models as they become relevant or if specified by config - // Pulled from https://ai.google.dev/gemini-api/docs/models - switch (model) { - case 'gemini-1.5-pro': - return 2_097_152; - case 'gemini-1.5-flash': - case 'gemini-2.5-pro-preview-05-06': - case 'gemini-2.5-pro-preview-06-05': - case 'gemini-2.5-pro': - case 'gemini-2.5-flash-preview-05-20': - case 'gemini-2.5-flash': - case 'gemini-2.5-flash-lite': - case 'gemini-2.0-flash': - return 1_048_576; - case 'gemini-2.0-flash-preview-image-generation': - return 32_000; - default: - return DEFAULT_TOKEN_LIMIT; + const norm = normalize(model); + + for (const [regex, limit] of PATTERNS) { + if (regex.test(norm)) { + return limit; + } } + + // final fallback: DEFAULT_TOKEN_LIMIT (power-of-two 128K) + return DEFAULT_TOKEN_LIMIT; }