diff --git a/src/__tests__/integration/issue-token-limit-channel-query.test.ts b/src/__tests__/integration/issue-token-limit-channel-query.test.ts new file mode 100644 index 0000000..0054e67 --- /dev/null +++ b/src/__tests__/integration/issue-token-limit-channel-query.test.ts @@ -0,0 +1,146 @@ +/** + * Test for token limit issue with channel queries + * + * Reproduces the bug where context_get with limit: 50 still exceeds token limits + * when querying a channel with large items. + */ + +import { describe, it, expect, beforeEach, afterEach } from '@jest/globals'; +import { DatabaseManager } from '../../utils/database.js'; +import { RepositoryManager } from '../../repositories/RepositoryManager.js'; +import { checkTokenLimit, getTokenConfig } from '../../utils/token-limits.js'; + +describe('Token Limit with Channel Query Bug', () => { + let dbManager: DatabaseManager; + let repositories: RepositoryManager; + let sessionId: string; + + beforeEach(() => { + dbManager = new DatabaseManager({ filename: ':memory:' }); + repositories = new RepositoryManager(dbManager); + const session = repositories.sessions.create({ + name: 'Test Session', + defaultChannel: 'test-channel', + }); + sessionId = session.id; + }); + + afterEach(() => { + dbManager.close(); + }); + + it('should enforce token limits even when limit parameter is provided', () => { + // Create 50 large context items (each ~1000 chars) + const largeValue = 'x'.repeat(1000); + for (let i = 0; i < 50; i++) { + repositories.contexts.save(sessionId, { + key: `large-item-${i}`, + value: largeValue, + channel: 'outbound-call-center', + priority: 'normal', + }); + } + + // Query with limit: 50 + const result = repositories.contexts.queryEnhanced({ + sessionId, + channel: 'outbound-call-center', + limit: 50, + sort: 'updated_desc', + includeMetadata: false, + }); + + expect(result.items.length).toBe(50); + + // Check if response would exceed token limit + const tokenConfig = getTokenConfig(); + const { exceedsLimit, safeItemCount } = checkTokenLimit(result.items, false, tokenConfig); + + // Build the actual response structure + const response = { + items: result.items, + pagination: { + total: result.totalCount, + returned: result.items.length, + offset: 0, + hasMore: false, + nextOffset: null, + truncated: false, + truncatedCount: 0, + }, + }; + + const responseJson = JSON.stringify(response, null, 2); + const actualTokens = Math.ceil(responseJson.length / tokenConfig.charsPerToken); + + // The bug: even with limit: 50, the response can exceed token limits + if (actualTokens > tokenConfig.mcpMaxTokens) { + // BUG REPRODUCED: The response exceeds token limits + // Verify that checkTokenLimit correctly detected the issue + expect(exceedsLimit).toBe(true); + expect(safeItemCount).toBeLessThan(50); + expect(actualTokens).toBeGreaterThan(tokenConfig.mcpMaxTokens); + } + }); + + it('should respect token limits over user-provided limit parameter', () => { + // Create 100 large context items (each ~800 chars) + const largeValue = 'y'.repeat(800); + for (let i = 0; i < 100; i++) { + repositories.contexts.save(sessionId, { + key: `item-${i}`, + value: largeValue, + channel: 'test-channel', + priority: 'normal', + }); + } + + // Query with limit: 50 (user expectation) + const result = repositories.contexts.queryEnhanced({ + sessionId, + channel: 'test-channel', + limit: 50, + sort: 'created_desc', + includeMetadata: false, + }); + + // Simulate the context_get handler logic + const tokenConfig = getTokenConfig(); + const { exceedsLimit, safeItemCount } = checkTokenLimit(result.items, false, tokenConfig); + + let actualItems = result.items; + let wasTruncated = false; + + if (exceedsLimit && safeItemCount < result.items.length) { + actualItems = result.items.slice(0, safeItemCount); + wasTruncated = true; + } + + // Build response + const response = { + items: actualItems, + pagination: { + total: result.totalCount, + returned: actualItems.length, + offset: 0, + hasMore: wasTruncated || actualItems.length < result.totalCount, + nextOffset: wasTruncated ? actualItems.length : null, + truncated: wasTruncated, + truncatedCount: wasTruncated ? result.items.length - actualItems.length : 0, + }, + }; + + const responseJson = JSON.stringify(response, null, 2); + const actualTokens = Math.ceil(responseJson.length / tokenConfig.charsPerToken); + + // Verify token limit is not exceeded + expect(actualTokens).toBeLessThanOrEqual(tokenConfig.mcpMaxTokens); + + // Verify truncation occurred if needed + if (exceedsLimit) { + expect(wasTruncated).toBe(true); + expect(actualItems.length).toBeLessThan(50); + expect(actualItems.length).toBe(safeItemCount); + } + }); +}); diff --git a/src/index.ts b/src/index.ts index 33af232..91dcd0a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -711,6 +711,10 @@ server.setRequestHandler(CallToolRequestSchema, async request => { let wasTruncated = false; let truncatedCount = 0; + debugLog( + `Token limit check: exceedsLimit=${exceedsLimit}, estimatedTokens=${estimatedTokens}, safeItemCount=${safeItemCount}, items=${result.items.length}` + ); + if (exceedsLimit) { // Truncate to safe item count if (safeItemCount < result.items.length) { @@ -841,11 +845,18 @@ server.setRequestHandler(CallToolRequestSchema, async request => { 'Large result set approaching token limits. Consider using smaller limit or more specific filters.'; } + const responseJson = JSON.stringify(response, null, 2); + const actualResponseTokens = Math.ceil(responseJson.length / tokenConfig.charsPerToken); + + debugLog( + `Final response: actualTokens=${actualResponseTokens}, maxTokens=${tokenConfig.mcpMaxTokens}, length=${responseJson.length}` + ); + return { content: [ { type: 'text', - text: JSON.stringify(response, null, 2), + text: responseJson, }, ], }; diff --git a/src/utils/token-limits.ts b/src/utils/token-limits.ts index 2bbfda6..ef9b467 100644 --- a/src/utils/token-limits.ts +++ b/src/utils/token-limits.ts @@ -213,6 +213,7 @@ export function calculateSafeItemLimit( // Log calculation details for debugging if (process.env.MCP_DEBUG_LOGGING) { + // eslint-disable-next-line no-console console.log('[Token Calculation]', { safeTokenLimit, responseOverhead, @@ -301,7 +302,9 @@ export function checkTokenLimit( } }; - // Transform items if needed + // Transform items to match what will actually be returned + // IMPORTANT: Always transform to ensure accurate token estimation + // The response includes all database fields, so we need to account for them const itemsForCalculation = includeMetadata ? items.map(item => ({ key: item.key, @@ -314,7 +317,7 @@ export function checkTokenLimit( created_at: item.created_at, updated_at: item.updated_at, })) - : items; + : items; // When includeMetadata is false, items still contain all DB fields // Build full response structure matching actual handler response const response = { @@ -333,8 +336,11 @@ export function checkTokenLimit( const responseJson = JSON.stringify(response, null, JSON_INDENT_SPACES); const estimatedTokens = estimateTokens(responseJson, config.charsPerToken); - const safeLimit = Math.floor(config.mcpMaxTokens * config.safetyBuffer); - const exceedsLimit = estimatedTokens > safeLimit; + + // IMPORTANT: Compare against actual MCP limit, not safe limit + // The safety buffer is used when calculating safe item counts, but for detection + // we need to check against the hard MCP protocol limit to prevent rejections + const exceedsLimit = estimatedTokens > config.mcpMaxTokens; const safeItemCount = exceedsLimit ? calculateSafeItemLimit(items, includeMetadata, config)