From c2f7be5245d93fa6a6d31945a3dd1486318f1d2f Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Mon, 22 Sep 2025 23:59:38 +0000
Subject: [PATCH 1/3] refactor(chat): refine token estimation logic in
 estimateTokens function

- Removed redundant estimation of tokens when promptTokens and completionTokens are provided by the API.
- Now only estimate prompt tokens if promptTokens is missing and messages are available.
- Only estimate completion tokens if completionTokens is missing and content is available.
- Improved code clarity by separating conditions for prompt and completion token estimation.
- Maintained fallback logic with error logging for encoding failures.

Co-authored-by: terragon-labs[bot] <terragon-labs[bot]@users.noreply.github.com>
---
 .../gateway/src/chat/tools/estimate-tokens.ts | 73 +++++++++----------
 1 file changed, 34 insertions(+), 39 deletions(-)

diff --git a/apps/gateway/src/chat/tools/estimate-tokens.ts b/apps/gateway/src/chat/tools/estimate-tokens.ts
index c62343d54..a6367686b 100644
--- a/apps/gateway/src/chat/tools/estimate-tokens.ts
+++ b/apps/gateway/src/chat/tools/estimate-tokens.ts
@@ -19,47 +19,42 @@ export function estimateTokens(
 	let calculatedPromptTokens = promptTokens;
 	let calculatedCompletionTokens = completionTokens;
 
-	// Always estimate missing tokens for any provider
-	if (!promptTokens || !completionTokens) {
-		// Estimate prompt tokens using encodeChat for better accuracy
-		if (!promptTokens && messages && messages.length > 0) {
-			try {
-				// Convert messages to the format expected by gpt-tokenizer
-				const chatMessages: ChatMessage[] = messages.map((m) => ({
-					role: m.role,
-					content:
-						typeof m.content === "string"
-							? m.content
-							: JSON.stringify(m.content),
-					name: m.name,
-				}));
-				calculatedPromptTokens = encodeChat(
-					chatMessages,
-					DEFAULT_TOKENIZER_MODEL,
-				).length;
-			} catch (error) {
-				// Fallback to simple estimation if encoding fails
-				logger.error(
-					"Failed to encode chat messages in estimate tokens",
-					error instanceof Error ? error : new Error(String(error)),
-				);
-				calculatedPromptTokens =
-					messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4;
-			}
+	// Estimate prompt tokens only if not provided by the API
+	if (!promptTokens && messages && messages.length > 0) {
+		try {
+			// Convert messages to the format expected by gpt-tokenizer
+			const chatMessages: ChatMessage[] = messages.map((m) => ({
+				role: m.role,
+				content:
+					typeof m.content === "string" ? m.content : JSON.stringify(m.content),
+				name: m.name,
+			}));
+			calculatedPromptTokens = encodeChat(
+				chatMessages,
+				DEFAULT_TOKENIZER_MODEL,
+			).length;
+		} catch (error) {
+			// Fallback to simple estimation if encoding fails
+			logger.error(
+				"Failed to encode chat messages in estimate tokens",
+				error instanceof Error ? error : new Error(String(error)),
+			);
+			calculatedPromptTokens =
+				messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4;
 		}
+	}
 
-		// Estimate completion tokens using encode for better accuracy
-		if (!completionTokens && content) {
-			try {
-				calculatedCompletionTokens = encode(JSON.stringify(content)).length;
-			} catch (error) {
-				// Fallback to simple estimation if encoding fails
-				logger.error(
-					"Failed to encode completion text",
-					error instanceof Error ? error : new Error(String(error)),
-				);
-				calculatedCompletionTokens = content.length / 4;
-			}
+	// Estimate completion tokens only if not provided by the API
+	if (!completionTokens && content) {
+		try {
+			calculatedCompletionTokens = encode(JSON.stringify(content)).length;
+		} catch (error) {
+			// Fallback to simple estimation if encoding fails
+			logger.error(
+				"Failed to encode completion text",
+				error instanceof Error ? error : new Error(String(error)),
+			);
+			calculatedCompletionTokens = content.length / 4;
 		}
 	}
 

From 3e265ffc859b41ceedcab19f2bd0046c3e8e26be Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Tue, 23 Sep 2025 01:17:47 +0100
Subject: [PATCH 2/3] fix(chat): round token estimates to nearest integer

Rounded token estimation values in `estimateTokens` to ensure accurate integer predictions for both prompt and completion tokens. Enhanced size estimation for base64 data in `process-image-url` by rounding results.
---
 apps/gateway/src/chat/tools/estimate-tokens.ts | 7 ++++---
 packages/models/src/process-image-url.ts       | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/apps/gateway/src/chat/tools/estimate-tokens.ts b/apps/gateway/src/chat/tools/estimate-tokens.ts
index a6367686b..128142827 100644
--- a/apps/gateway/src/chat/tools/estimate-tokens.ts
+++ b/apps/gateway/src/chat/tools/estimate-tokens.ts
@@ -39,8 +39,9 @@ export function estimateTokens(
 				"Failed to encode chat messages in estimate tokens",
 				error instanceof Error ? error : new Error(String(error)),
 			);
-			calculatedPromptTokens =
-				messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4;
+			calculatedPromptTokens = Math.round(
+				messages.reduce((acc, m) => acc + (m.content?.length || 0), 0) / 4,
+			);
 		}
 	}
 
@@ -54,7 +55,7 @@ export function estimateTokens(
 				"Failed to encode completion text",
 				error instanceof Error ? error : new Error(String(error)),
 			);
-			calculatedCompletionTokens = content.length / 4;
+			calculatedCompletionTokens = Math.round(content.length / 4);
 		}
 	}
 
diff --git a/packages/models/src/process-image-url.ts b/packages/models/src/process-image-url.ts
index 55be3ed7d..d2a0d04e6 100644
--- a/packages/models/src/process-image-url.ts
+++ b/packages/models/src/process-image-url.ts
@@ -28,7 +28,7 @@ export async function processImageUrl(
 		const base64Data = isBase64 ? data : btoa(data);
 
 		// Validate size (estimate: base64 adds ~33% overhead)
-		const estimatedSize = (base64Data.length * 3) / 4;
+		const estimatedSize = Math.round((base64Data.length * 3) / 4);
 		if (estimatedSize > 20 * 1024 * 1024) {
 			logger.warn("Data URL image size exceeds limit", { estimatedSize });
 			throw new Error("Image size exceeds 20MB limit");

From 200850923cd53dd749339bf72b8f69af85aac174 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Tue, 23 Sep 2025 01:21:13 +0100
Subject: [PATCH 3/3] refactor(chat): streamline token and cost logic

Removed redundant token estimation when `promptTokens` and `completionTokens` are provided. Simplified token-related calculations and ensured consistent handling across all relevant functions.
---
 apps/gateway/src/chat/chat.ts | 35 ++++++++---------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
index 005b2244b..3658173be 100644
--- a/apps/gateway/src/chat/chat.ts
+++ b/apps/gateway/src/chat/chat.ts
@@ -3011,25 +3011,11 @@ chat.openapi(completions, async (c) => {
 		images,
 	} = parseProviderResponse(usedProvider, json, messages);
 
-	// Debug: Log images found in response
-	logger.debug("Gateway - parseProviderResponse extracted images", { images });
-	logger.debug("Gateway - Used provider", { usedProvider });
-	logger.debug("Gateway - Used model", { usedModel });
-
-	// Estimate tokens if not provided by the API
-	const { calculatedPromptTokens, calculatedCompletionTokens } = estimateTokens(
-		usedProvider,
-		messages,
-		content,
-		promptTokens,
-		completionTokens,
-	);
-
 	const costs = calculateCosts(
 		usedModel,
 		usedProvider,
-		calculatedPromptTokens,
-		calculatedCompletionTokens,
+		promptTokens,
+		completionTokens,
 		cachedTokens,
 		{
 			prompt: messages.map((m) => m.content).join("\n"),
@@ -3046,11 +3032,9 @@ chat.openapi(completions, async (c) => {
 		content,
 		reasoningContent,
 		finishReason,
-		calculatedPromptTokens,
-		calculatedCompletionTokens,
-		(calculatedPromptTokens || 0) +
-			(calculatedCompletionTokens || 0) +
-			(reasoningTokens || 0),
+		promptTokens,
+		completionTokens,
+		(promptTokens || 0) + (completionTokens || 0) + (reasoningTokens || 0),
 		reasoningTokens,
 		cachedTokens,
 		toolResults,
@@ -3097,13 +3081,10 @@ chat.openapi(completions, async (c) => {
 		content: content,
 		reasoningContent: reasoningContent,
 		finishReason: finishReason,
-		promptTokens: calculatedPromptTokens?.toString() || null,
-		completionTokens: calculatedCompletionTokens?.toString() || null,
+		promptTokens: promptTokens?.toString() || null,
+		completionTokens: completionTokens?.toString() || null,
 		totalTokens:
-			totalTokens ||
-			(
-				(calculatedPromptTokens || 0) + (calculatedCompletionTokens || 0)
-			).toString(),
+			totalTokens || ((promptTokens || 0) + (completionTokens || 0)).toString(),
 		reasoningTokens: reasoningTokens,
 		cachedTokens: cachedTokens?.toString() || null,
 		hasError: false,