Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions plugin/scripts/context-generator.cjs

Large diffs are not rendered by default.

48 changes: 24 additions & 24 deletions plugin/scripts/mcp-server.cjs

Large diffs are not rendered by default.

499 changes: 251 additions & 248 deletions plugin/scripts/worker-service.cjs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions plugin/ui/viewer-bundle.js

Large diffs are not rendered by default.

66 changes: 61 additions & 5 deletions src/services/worker/GeminiAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import { logger } from '../../utils/logger.js';
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
import { getCredential } from '../../shared/EnvManager.js';
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
import { estimateTokens } from '../../shared/timeline-formatting.js';
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
import { ModeManager } from '../domain/ModeManager.js';
import {
Expand Down Expand Up @@ -56,6 +58,10 @@ const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
// Track last request time for rate limiting
let lastRequestTime = 0;

// Context window limits (prevents O(N²) token cost growth)
const DEFAULT_MAX_CONTEXT_MESSAGES = 20; // Maximum messages to keep in conversation history
const DEFAULT_MAX_ESTIMATED_TOKENS = 100000; // ~100k tokens max context (safety limit)

/**
* Enforce RPM rate limit for Gemini free tier.
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
Expand Down Expand Up @@ -342,6 +348,54 @@ export class GeminiAgent {
}
}

/**
* Truncate conversation history to prevent runaway context costs.
* Keeps most recent messages within both message count and token budget.
* Returns a new array — never mutates the original history.
*/
private truncateHistory(history: ConversationMessage[]): ConversationMessage[] {
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);

const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES;
const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS;

if (history.length <= MAX_CONTEXT_MESSAGES) {
// Check token count even if message count is ok
const totalTokens = history.reduce((sum, m) => sum + estimateTokens(m.content), 0);
if (totalTokens <= MAX_ESTIMATED_TOKENS) {
return history;
}
}

// Sliding window: keep most recent messages within limits
const truncated: ConversationMessage[] = [];
let tokenCount = 0;

// Process messages in reverse (most recent first)
for (let i = history.length - 1; i >= 0; i--) {
const msg = history[i];
const msgTokens = estimateTokens(msg.content);

// Always include at least the newest message — an empty contents array
// would cause a hard Gemini API error, which is worse than an oversized request.
if (truncated.length > 0 && (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS)) {
logger.warn('SDK', 'Context window truncated to prevent runaway costs', {
originalMessages: history.length,
keptMessages: truncated.length,
droppedMessages: i + 1,
estimatedTokens: tokenCount,
tokenLimit: MAX_ESTIMATED_TOKENS
});
break;
}

truncated.unshift(msg); // Add to beginning
tokenCount += msgTokens;
}

return truncated;
}

/**
* Convert shared ConversationMessage array to Gemini's contents format
* Maps 'assistant' role to 'model' for Gemini API compatibility
Expand All @@ -354,20 +408,22 @@ export class GeminiAgent {
}

/**
* Query Gemini via REST API with full conversation history (multi-turn)
* Sends the entire conversation context for coherent responses
* Query Gemini via REST API with truncated conversation history (multi-turn)
* Truncates history to prevent O(N²) token cost growth, then sends for coherent responses
*/
private async queryGeminiMultiTurn(
history: ConversationMessage[],
apiKey: string,
model: GeminiModel,
rateLimitingEnabled: boolean
): Promise<{ content: string; tokensUsed?: number }> {
const contents = this.conversationToGeminiContents(history);
const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
const truncatedHistory = this.truncateHistory(history);
const contents = this.conversationToGeminiContents(truncatedHistory);
const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0);

logger.debug('SDK', `Querying Gemini multi-turn (${model})`, {
turns: history.length,
turns: truncatedHistory.length,
totalTurns: history.length,
totalChars
});

Expand Down
18 changes: 18 additions & 0 deletions src/services/worker/http/routes/SettingsRoutes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ export class SettingsRoutes extends BaseRouteHandler {
'CLAUDE_MEM_GEMINI_API_KEY',
'CLAUDE_MEM_GEMINI_MODEL',
'CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED',
'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES',
'CLAUDE_MEM_GEMINI_MAX_TOKENS',
// OpenRouter Configuration
'CLAUDE_MEM_OPENROUTER_API_KEY',
'CLAUDE_MEM_OPENROUTER_MODEL',
Expand Down Expand Up @@ -248,6 +250,22 @@ export class SettingsRoutes extends BaseRouteHandler {
}
}

// Validate CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES
if (settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) {
const count = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES, 10);
if (isNaN(count) || count < 1 || count > 100) {
return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES must be between 1 and 100' };
}
}

// Validate CLAUDE_MEM_GEMINI_MAX_TOKENS
if (settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) {
const tokens = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS, 10);
if (isNaN(tokens) || tokens < 1000 || tokens > 1000000) {
return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_TOKENS must be between 1000 and 1000000' };
}
}

// Validate CLAUDE_MEM_CONTEXT_OBSERVATIONS
if (settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS) {
const obsCount = parseInt(settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS, 10);
Expand Down
4 changes: 4 additions & 0 deletions src/shared/SettingsDefaultsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ export interface SettingsDefaults {
CLAUDE_MEM_GEMINI_API_KEY: string;
CLAUDE_MEM_GEMINI_MODEL: string; // 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-3-flash-preview'
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: string; // 'true' | 'false' - enable rate limiting for free tier
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: string; // Max messages in Gemini context window (prevents O(N²) cost growth)
CLAUDE_MEM_GEMINI_MAX_TOKENS: string; // Max estimated tokens for Gemini context (~100k safety limit)
CLAUDE_MEM_OPENROUTER_API_KEY: string;
CLAUDE_MEM_OPENROUTER_MODEL: string;
CLAUDE_MEM_OPENROUTER_SITE_URL: string;
Expand Down Expand Up @@ -82,6 +84,8 @@ export class SettingsDefaultsManager {
CLAUDE_MEM_GEMINI_API_KEY: '', // Empty by default, can be set via UI or env
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite', // Default Gemini model (highest free tier RPM)
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'true', // Rate limiting ON by default for free tier users
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20', // Max messages in Gemini context window
CLAUDE_MEM_GEMINI_MAX_TOKENS: '100000', // Max estimated tokens (~100k safety limit)
CLAUDE_MEM_OPENROUTER_API_KEY: '', // Empty by default, can be set via UI or env
CLAUDE_MEM_OPENROUTER_MODEL: 'xiaomi/mimo-v2-flash:free', // Default OpenRouter model (free tier)
CLAUDE_MEM_OPENROUTER_SITE_URL: '', // Optional: for OpenRouter analytics
Expand Down
84 changes: 84 additions & 0 deletions tests/gemini_agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,90 @@ describe('GeminiAgent', () => {
}
});

describe('conversation history truncation', () => {
it('should truncate history when message count exceeds limit', async () => {
// Build a history with 25 small messages (limit is 20)
const history: any[] = [];
for (let i = 0; i < 25; i++) {
history.push({ role: i % 2 === 0 ? 'user' : 'assistant', content: `message ${i}` });
}

const session = {
sessionDbId: 1,
contentSessionId: 'test-session',
memorySessionId: 'mem-session-123',
project: 'test-project',
userPrompt: 'test prompt',
conversationHistory: history,
lastPromptNumber: 2,
cumulativeInputTokens: 0,
cumulativeOutputTokens: 0,
pendingMessages: [],
abortController: new AbortController(),
generatorPromise: null,
earliestPendingTimestamp: null,
currentProvider: null,
startTime: Date.now(),
processingMessageIds: []
} as any;

global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
candidates: [{ content: { parts: [{ text: 'response' }] } }]
}))));

await agent.startSession(session);

// The request body should have truncated contents (init adds 1 more, so 26 total → truncated to 20)
const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
expect(body.contents.length).toBeLessThanOrEqual(20);
});

it('should always keep at least the newest message even if it exceeds token limit', async () => {
// Override settings to have a very low token limit
loadFromFileSpy.mockImplementation(() => ({
...SettingsDefaultsManager.getAllDefaults(),
CLAUDE_MEM_GEMINI_API_KEY: 'test-api-key',
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite',
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'false',
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20',
CLAUDE_MEM_GEMINI_MAX_TOKENS: '1000', // Very low: ~250 chars
CLAUDE_MEM_DATA_DIR: '/tmp/claude-mem-test',
}));

// Create a single large message that exceeds the token limit
const largeContent = 'x'.repeat(8000); // ~2000 tokens, well above 1000 limit

const session = {
sessionDbId: 1,
contentSessionId: 'test-session',
memorySessionId: 'mem-session-123',
project: 'test-project',
userPrompt: largeContent,
conversationHistory: [],
lastPromptNumber: 1,
cumulativeInputTokens: 0,
cumulativeOutputTokens: 0,
pendingMessages: [],
abortController: new AbortController(),
generatorPromise: null,
earliestPendingTimestamp: null,
currentProvider: null,
startTime: Date.now(),
processingMessageIds: []
} as any;

global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
candidates: [{ content: { parts: [{ text: 'response' }] } }]
}))));

await agent.startSession(session);

// Should still send at least 1 message (the newest), not empty contents
const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
expect(body.contents.length).toBeGreaterThanOrEqual(1);
});
});

describe('gemini-3-flash-preview model support', () => {
it('should accept gemini-3-flash-preview as a valid model', async () => {
// The GeminiModel type includes gemini-3-flash-preview - compile-time check
Expand Down
Loading