thedotmack · vnz · Mar 17, 2026
diff --git a/plugin/scripts/context-generator.cjs b/plugin/scripts/context-generator.cjs
diff --git a/plugin/scripts/mcp-server.cjs b/plugin/scripts/mcp-server.cjs
diff --git a/plugin/scripts/worker-service.cjs b/plugin/scripts/worker-service.cjs
diff --git a/plugin/ui/viewer-bundle.js b/plugin/ui/viewer-bundle.js
diff --git a/src/services/worker/GeminiAgent.ts b/src/services/worker/GeminiAgent.ts
@@ -18,6 +18,8 @@ import { logger } from '../../utils/logger.js';
 import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
 import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
 import { getCredential } from '../../shared/EnvManager.js';
+import { USER_SETTINGS_PATH } from '../../shared/paths.js';
+import { estimateTokens } from '../../shared/timeline-formatting.js';
 import type { ActiveSession, ConversationMessage } from '../worker-types.js';
 import { ModeManager } from '../domain/ModeManager.js';
 import {
@@ -56,6 +58,10 @@ const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
 // Track last request time for rate limiting
 let lastRequestTime = 0;
 
+// Context window limits (prevents O(N²) token cost growth)
+const DEFAULT_MAX_CONTEXT_MESSAGES = 20;  // Maximum messages to keep in conversation history
+const DEFAULT_MAX_ESTIMATED_TOKENS = 100000;  // ~100k tokens max context (safety limit)
+
 /**
  * Enforce RPM rate limit for Gemini free tier.
  * Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
@@ -342,6 +348,54 @@ export class GeminiAgent {
     }
   }
 
+  /**
+   * Truncate conversation history to prevent runaway context costs.
+   * Keeps most recent messages within both message count and token budget.
+   * Returns a new array — never mutates the original history.
+   */
+  private truncateHistory(history: ConversationMessage[]): ConversationMessage[] {
+    const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
+
+    const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES;
+    const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS;
+
+    if (history.length <= MAX_CONTEXT_MESSAGES) {
+      // Check token count even if message count is ok
+      const totalTokens = history.reduce((sum, m) => sum + estimateTokens(m.content), 0);
+      if (totalTokens <= MAX_ESTIMATED_TOKENS) {
+        return history;
+      }
+    }
+
+    // Sliding window: keep most recent messages within limits
+    const truncated: ConversationMessage[] = [];
+    let tokenCount = 0;
+
+    // Process messages in reverse (most recent first)
+    for (let i = history.length - 1; i >= 0; i--) {
+      const msg = history[i];
+      const msgTokens = estimateTokens(msg.content);
+
+      // Always include at least the newest message — an empty contents array
+      // would cause a hard Gemini API error, which is worse than an oversized request.
+      if (truncated.length > 0 && (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS)) {
+        logger.warn('SDK', 'Context window truncated to prevent runaway costs', {
+          originalMessages: history.length,
+          keptMessages: truncated.length,
+          droppedMessages: i + 1,
+          estimatedTokens: tokenCount,
+          tokenLimit: MAX_ESTIMATED_TOKENS
+        });
+        break;
+      }
+
+      truncated.unshift(msg);  // Add to beginning
+      tokenCount += msgTokens;
+    }
+
+    return truncated;
+  }
+
   /**
    * Convert shared ConversationMessage array to Gemini's contents format
    * Maps 'assistant' role to 'model' for Gemini API compatibility
@@ -354,20 +408,22 @@ export class GeminiAgent {
   }
 
   /**
-   * Query Gemini via REST API with full conversation history (multi-turn)
-   * Sends the entire conversation context for coherent responses
+   * Query Gemini via REST API with truncated conversation history (multi-turn)
+   * Truncates history to prevent O(N²) token cost growth, then sends for coherent responses
    */
   private async queryGeminiMultiTurn(
     history: ConversationMessage[],
     apiKey: string,
     model: GeminiModel,
     rateLimitingEnabled: boolean
   ): Promise<{ content: string; tokensUsed?: number }> {
-    const contents = this.conversationToGeminiContents(history);
-    const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
+    const truncatedHistory = this.truncateHistory(history);
+    const contents = this.conversationToGeminiContents(truncatedHistory);
+    const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0);
 
     logger.debug('SDK', `Querying Gemini multi-turn (${model})`, {
-      turns: history.length,
+      turns: truncatedHistory.length,
+      totalTurns: history.length,
       totalChars
     });
 

diff --git a/src/services/worker/http/routes/SettingsRoutes.ts b/src/services/worker/http/routes/SettingsRoutes.ts
@@ -94,6 +94,8 @@ export class SettingsRoutes extends BaseRouteHandler {
       'CLAUDE_MEM_GEMINI_API_KEY',
       'CLAUDE_MEM_GEMINI_MODEL',
       'CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED',
+      'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES',
+      'CLAUDE_MEM_GEMINI_MAX_TOKENS',
       // OpenRouter Configuration
       'CLAUDE_MEM_OPENROUTER_API_KEY',
       'CLAUDE_MEM_OPENROUTER_MODEL',
@@ -248,6 +250,22 @@ export class SettingsRoutes extends BaseRouteHandler {
       }
     }
 
+    // Validate CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES
+    if (settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) {
+      const count = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES, 10);
+      if (isNaN(count) || count < 1 || count > 100) {
+        return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES must be between 1 and 100' };
+      }
+    }
+
+    // Validate CLAUDE_MEM_GEMINI_MAX_TOKENS
+    if (settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) {
+      const tokens = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS, 10);
+      if (isNaN(tokens) || tokens < 1000 || tokens > 1000000) {
+        return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_TOKENS must be between 1000 and 1000000' };
+      }
+    }
+
     // Validate CLAUDE_MEM_CONTEXT_OBSERVATIONS
     if (settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS) {
       const obsCount = parseInt(settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS, 10);

diff --git a/src/shared/SettingsDefaultsManager.ts b/src/shared/SettingsDefaultsManager.ts
@@ -23,6 +23,8 @@ export interface SettingsDefaults {
   CLAUDE_MEM_GEMINI_API_KEY: string;
   CLAUDE_MEM_GEMINI_MODEL: string;  // 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-3-flash-preview'
   CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: string;  // 'true' | 'false' - enable rate limiting for free tier
+  CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: string;  // Max messages in Gemini context window (prevents O(N²) cost growth)
+  CLAUDE_MEM_GEMINI_MAX_TOKENS: string;  // Max estimated tokens for Gemini context (~100k safety limit)
   CLAUDE_MEM_OPENROUTER_API_KEY: string;
   CLAUDE_MEM_OPENROUTER_MODEL: string;
   CLAUDE_MEM_OPENROUTER_SITE_URL: string;
@@ -82,6 +84,8 @@ export class SettingsDefaultsManager {
     CLAUDE_MEM_GEMINI_API_KEY: '',  // Empty by default, can be set via UI or env
     CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite',  // Default Gemini model (highest free tier RPM)
     CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'true',  // Rate limiting ON by default for free tier users
+    CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20',  // Max messages in Gemini context window
+    CLAUDE_MEM_GEMINI_MAX_TOKENS: '100000',  // Max estimated tokens (~100k safety limit)
     CLAUDE_MEM_OPENROUTER_API_KEY: '',  // Empty by default, can be set via UI or env
     CLAUDE_MEM_OPENROUTER_MODEL: 'xiaomi/mimo-v2-flash:free',  // Default OpenRouter model (free tier)
     CLAUDE_MEM_OPENROUTER_SITE_URL: '',  // Optional: for OpenRouter analytics

diff --git a/tests/gemini_agent.test.ts b/tests/gemini_agent.test.ts
@@ -358,6 +358,90 @@ describe('GeminiAgent', () => {
     }
   });
 
+  describe('conversation history truncation', () => {
+    it('should truncate history when message count exceeds limit', async () => {
+      // Build a history with 25 small messages (limit is 20)
+      const history: any[] = [];
+      for (let i = 0; i < 25; i++) {
+        history.push({ role: i % 2 === 0 ? 'user' : 'assistant', content: `message ${i}` });
+      }
+
+      const session = {
+        sessionDbId: 1,
+        contentSessionId: 'test-session',
+        memorySessionId: 'mem-session-123',
+        project: 'test-project',
+        userPrompt: 'test prompt',
+        conversationHistory: history,
+        lastPromptNumber: 2,
+        cumulativeInputTokens: 0,
+        cumulativeOutputTokens: 0,
+        pendingMessages: [],
+        abortController: new AbortController(),
+        generatorPromise: null,
+        earliestPendingTimestamp: null,
+        currentProvider: null,
+        startTime: Date.now(),
+        processingMessageIds: []
+      } as any;
+
+      global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
+        candidates: [{ content: { parts: [{ text: 'response' }] } }]
+      }))));
+
+      await agent.startSession(session);
+
+      // The request body should have truncated contents (init adds 1 more, so 26 total → truncated to 20)
+      const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
+      expect(body.contents.length).toBeLessThanOrEqual(20);
+    });
+
+    it('should always keep at least the newest message even if it exceeds token limit', async () => {
+      // Override settings to have a very low token limit
+      loadFromFileSpy.mockImplementation(() => ({
+        ...SettingsDefaultsManager.getAllDefaults(),
+        CLAUDE_MEM_GEMINI_API_KEY: 'test-api-key',
+        CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite',
+        CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'false',
+        CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20',
+        CLAUDE_MEM_GEMINI_MAX_TOKENS: '1000',  // Very low: ~250 chars
+        CLAUDE_MEM_DATA_DIR: '/tmp/claude-mem-test',
+      }));
+
+      // Create a single large message that exceeds the token limit
+      const largeContent = 'x'.repeat(8000);  // ~2000 tokens, well above 1000 limit
+
+      const session = {
+        sessionDbId: 1,
+        contentSessionId: 'test-session',
+        memorySessionId: 'mem-session-123',
+        project: 'test-project',
+        userPrompt: largeContent,
+        conversationHistory: [],
+        lastPromptNumber: 1,
+        cumulativeInputTokens: 0,
+        cumulativeOutputTokens: 0,
+        pendingMessages: [],
+        abortController: new AbortController(),
+        generatorPromise: null,
+        earliestPendingTimestamp: null,
+        currentProvider: null,
+        startTime: Date.now(),
+        processingMessageIds: []
+      } as any;
+
+      global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
+        candidates: [{ content: { parts: [{ text: 'response' }] } }]
+      }))));
+
+      await agent.startSession(session);
+
+      // Should still send at least 1 message (the newest), not empty contents
+      const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
+      expect(body.contents.length).toBeGreaterThanOrEqual(1);
+    });
+  });
+
   describe('gemini-3-flash-preview model support', () => {
     it('should accept gemini-3-flash-preview as a valid model', async () => {
       // The GeminiModel type includes gemini-3-flash-preview - compile-time check