From c2f7be5245d93fa6a6d31945a3dd1486318f1d2f Mon Sep 17 00:00:00 2001 From: Luca Steeb Date: Mon, 22 Sep 2025 23:59:38 +0000 Subject: [PATCH 1/3] refactor(chat): refine token estimation logic in estimateTokens function - Removed redundant estimation of tokens when promptTokens and completionTokens are provided by the API. - Now only estimate prompt tokens if promptTokens is missing and messages are available. - Only estimate completion tokens if completionTokens is missing and content is available. - Improved code clarity by separating conditions for prompt and completion token estimation. - Maintained fallback logic with error logging for encoding failures. Co-authored-by: terragon-labs[bot] --- .../gateway/src/chat/tools/estimate-tokens.ts | 73 +++++++++---------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/apps/gateway/src/chat/tools/estimate-tokens.ts b/apps/gateway/src/chat/tools/estimate-tokens.ts index c62343d54..a6367686b 100644 --- a/apps/gateway/src/chat/tools/estimate-tokens.ts +++ b/apps/gateway/src/chat/tools/estimate-tokens.ts @@ -19,47 +19,42 @@ export function estimateTokens( let calculatedPromptTokens = promptTokens; let calculatedCompletionTokens = completionTokens; - // Always estimate missing tokens for any provider - if (!promptTokens || !completionTokens) { - // Estimate prompt tokens using encodeChat for better accuracy - if (!promptTokens && messages && messages.length > 0) { - try { - // Convert messages to the format expected by gpt-tokenizer - const chatMessages: ChatMessage[] = messages.map((m) => ({ - role: m.role, - content: - typeof m.content === "string" - ? m.content - : JSON.stringify(m.content), - name: m.name, - })); - calculatedPromptTokens = encodeChat( - chatMessages, - DEFAULT_TOKENIZER_MODEL, - ).length; - } catch (error) { - // Fallback to simple estimation if encoding fails - logger.error( - "Failed to encode chat messages in estimate tokens", - error instanceof Error ? error : new Error(String(error)), - ); - calculatedPromptTokens = - messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4; - } + // Estimate prompt tokens only if not provided by the API + if (!promptTokens && messages && messages.length > 0) { + try { + // Convert messages to the format expected by gpt-tokenizer + const chatMessages: ChatMessage[] = messages.map((m) => ({ + role: m.role, + content: + typeof m.content === "string" ? m.content : JSON.stringify(m.content), + name: m.name, + })); + calculatedPromptTokens = encodeChat( + chatMessages, + DEFAULT_TOKENIZER_MODEL, + ).length; + } catch (error) { + // Fallback to simple estimation if encoding fails + logger.error( + "Failed to encode chat messages in estimate tokens", + error instanceof Error ? error : new Error(String(error)), + ); + calculatedPromptTokens = + messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4; } + } - // Estimate completion tokens using encode for better accuracy - if (!completionTokens && content) { - try { - calculatedCompletionTokens = encode(JSON.stringify(content)).length; - } catch (error) { - // Fallback to simple estimation if encoding fails - logger.error( - "Failed to encode completion text", - error instanceof Error ? error : new Error(String(error)), - ); - calculatedCompletionTokens = content.length / 4; - } + // Estimate completion tokens only if not provided by the API + if (!completionTokens && content) { + try { + calculatedCompletionTokens = encode(JSON.stringify(content)).length; + } catch (error) { + // Fallback to simple estimation if encoding fails + logger.error( + "Failed to encode completion text", + error instanceof Error ? error : new Error(String(error)), + ); + calculatedCompletionTokens = content.length / 4; } } From 3e265ffc859b41ceedcab19f2bd0046c3e8e26be Mon Sep 17 00:00:00 2001 From: Luca Steeb Date: Tue, 23 Sep 2025 01:17:47 +0100 Subject: [PATCH 2/3] fix(chat): round token estimates to nearest integer Rounded token estimation values in `estimateTokens` to ensure accurate integer predictions for both prompt and completion tokens. Enhanced size estimation for base64 data in `process-image-url` by rounding results. --- apps/gateway/src/chat/tools/estimate-tokens.ts | 7 ++++--- packages/models/src/process-image-url.ts | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/apps/gateway/src/chat/tools/estimate-tokens.ts b/apps/gateway/src/chat/tools/estimate-tokens.ts index a6367686b..128142827 100644 --- a/apps/gateway/src/chat/tools/estimate-tokens.ts +++ b/apps/gateway/src/chat/tools/estimate-tokens.ts @@ -39,8 +39,9 @@ export function estimateTokens( "Failed to encode chat messages in estimate tokens", error instanceof Error ? error : new Error(String(error)), ); - calculatedPromptTokens = - messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4; + calculatedPromptTokens = Math.round( + messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4, + ); } } @@ -54,7 +55,7 @@ export function estimateTokens( "Failed to encode completion text", error instanceof Error ? error : new Error(String(error)), ); - calculatedCompletionTokens = content.length / 4; + calculatedCompletionTokens = Math.round(content.length / 4); } } diff --git a/packages/models/src/process-image-url.ts b/packages/models/src/process-image-url.ts index 55be3ed7d..d2a0d04e6 100644 --- a/packages/models/src/process-image-url.ts +++ b/packages/models/src/process-image-url.ts @@ -28,7 +28,7 @@ export async function processImageUrl( const base64Data = isBase64 ? data : btoa(data); // Validate size (estimate: base64 adds ~33% overhead) - const estimatedSize = (base64Data.length * 3) / 4; + const estimatedSize = Math.round((base64Data.length * 3) / 4); if (estimatedSize > 20 * 1024 * 1024) { logger.warn("Data URL image size exceeds limit", { estimatedSize }); throw new Error("Image size exceeds 20MB limit"); From 200850923cd53dd749339bf72b8f69af85aac174 Mon Sep 17 00:00:00 2001 From: Luca Steeb Date: Tue, 23 Sep 2025 01:21:13 +0100 Subject: [PATCH 3/3] refactor(chat): streamline token and cost logic Removed redundant token estimation when `promptTokens` and `completionTokens` are provided. Simplified token-related calculations and ensured consistent handling across all relevant functions. --- apps/gateway/src/chat/chat.ts | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts index 005b2244b..3658173be 100644 --- a/apps/gateway/src/chat/chat.ts +++ b/apps/gateway/src/chat/chat.ts @@ -3011,25 +3011,11 @@ chat.openapi(completions, async (c) => { images, } = parseProviderResponse(usedProvider, json, messages); - // Debug: Log images found in response - logger.debug("Gateway - parseProviderResponse extracted images", { images }); - logger.debug("Gateway - Used provider", { usedProvider }); - logger.debug("Gateway - Used model", { usedModel }); - - // Estimate tokens if not provided by the API - const { calculatedPromptTokens, calculatedCompletionTokens } = estimateTokens( - usedProvider, - messages, - content, - promptTokens, - completionTokens, - ); - const costs = calculateCosts( usedModel, usedProvider, - calculatedPromptTokens, - calculatedCompletionTokens, + promptTokens, + completionTokens, cachedTokens, { prompt: messages.map((m) => m.content).join("\n"), @@ -3046,11 +3032,9 @@ chat.openapi(completions, async (c) => { content, reasoningContent, finishReason, - calculatedPromptTokens, - calculatedCompletionTokens, - (calculatedPromptTokens || 0) + - (calculatedCompletionTokens || 0) + - (reasoningTokens || 0), + promptTokens, + completionTokens, + (promptTokens || 0) + (completionTokens || 0) + (reasoningTokens || 0), reasoningTokens, cachedTokens, toolResults, @@ -3097,13 +3081,10 @@ chat.openapi(completions, async (c) => { content: content, reasoningContent: reasoningContent, finishReason: finishReason, - promptTokens: calculatedPromptTokens?.toString() || null, - completionTokens: calculatedCompletionTokens?.toString() || null, + promptTokens: promptTokens?.toString() || null, + completionTokens: completionTokens?.toString() || null, totalTokens: - totalTokens || - ( - (calculatedPromptTokens || 0) + (calculatedCompletionTokens || 0) - ).toString(), + totalTokens || ((promptTokens || 0) + (completionTokens || 0)).toString(), reasoningTokens: reasoningTokens, cachedTokens: cachedTokens?.toString() || null, hasError: false,