Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions src/lib/agents/search/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { classify } from './classifier';
import Researcher from './researcher';
import { getWriterPrompt } from '@/lib/prompts/search/writer';
import { WidgetExecutor } from './widgets';
import { buildSearchResultsContext } from './context';

class APISearchAgent {
async searchAsync(session: SessionManager, input: SearchAgentInput) {
Expand Down Expand Up @@ -52,13 +53,9 @@ class APISearchAgent {
type: 'researchComplete',
});

const finalContext =
searchResults?.searchFindings
.map(
(f, index) =>
`<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
)
.join('\n') || '';
const finalContext = buildSearchResultsContext(
searchResults?.searchFindings || [],
);

const widgetContext = widgetOutputs
.map((o) => {
Expand Down
60 changes: 60 additions & 0 deletions src/lib/agents/search/context.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { Chunk } from '@/lib/types';
import { getTokenCount, truncateTextByTokens } from '@/lib/utils/splitText';

const MAX_TOTAL_SEARCH_CONTEXT_TOKENS = 20000;
const MAX_RESULT_CONTEXT_TOKENS = 2500;
const TRUNCATION_NOTE =
'\n[Result content truncated to fit the model context window.]';

const escapeAttribute = (value: string) =>
value.replace(/[<>]/g, '').replace(/"/g, '&quot;');

export const buildSearchResultsContext = (searchFindings: Chunk[] = []) => {
let remainingTokens = MAX_TOTAL_SEARCH_CONTEXT_TOKENS;
const contextParts: string[] = [];

for (const [index, finding] of searchFindings.entries()) {
if (remainingTokens <= 0) {
break;
}

const title = escapeAttribute(
String(finding.metadata?.title || `Result ${index + 1}`),
);
const prefix = `<result index=${index + 1} title="${title}">`;
const suffix = `</result>`;
const wrapperTokens = getTokenCount(prefix) + getTokenCount(suffix);
const availableContentTokens = Math.min(
MAX_RESULT_CONTEXT_TOKENS,
remainingTokens - wrapperTokens,
);

if (availableContentTokens <= 0) {
break;
}

const fullContent = String(finding.content || '');
const fullContentTokens = getTokenCount(fullContent);
let content = truncateTextByTokens(fullContent, availableContentTokens);

if (fullContentTokens > availableContentTokens) {
const noteBudget = Math.max(
0,
availableContentTokens - getTokenCount(TRUNCATION_NOTE),
);
content = `${truncateTextByTokens(fullContent, noteBudget)}${TRUNCATION_NOTE}`;
}

const entry = `${prefix}${content}${suffix}`;
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Unescaped search result content is inserted into <result> blocks, so injected </result> or <result> text can break prompt boundaries and corrupt citations.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At src/lib/agents/search/context.ts, line 48:

<comment>Unescaped search result content is inserted into `<result>` blocks, so injected `</result>` or `<result>` text can break prompt boundaries and corrupt citations.</comment>

<file context>
@@ -0,0 +1,60 @@
+      content = `${truncateTextByTokens(fullContent, noteBudget)}${TRUNCATION_NOTE}`;
+    }
+
+    const entry = `${prefix}${content}${suffix}`;
+    const entryTokens = getTokenCount(entry);
+
</file context>
Fix with Cubic

const entryTokens = getTokenCount(entry);

if (entryTokens > remainingTokens) {
break;
}

contextParts.push(entry);
remainingTokens -= entryTokens;
}

return contextParts.join('\n');
};
11 changes: 4 additions & 7 deletions src/lib/agents/search/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import db from '@/lib/db';
import { chats, messages } from '@/lib/db/schema';
import { and, eq, gt } from 'drizzle-orm';
import { TextBlock } from '@/lib/types';
import { buildSearchResultsContext } from './context';

class SearchAgent {
async searchAsync(session: SessionManager, input: SearchAgentInput) {
Expand Down Expand Up @@ -98,13 +99,9 @@ class SearchAgent {
type: 'researchComplete',
});

const finalContext =
searchResults?.searchFindings
.map(
(f, index) =>
`<result index=${index + 1} title=${f.metadata.title}>${f.content}</result>`,
)
.join('\n') || '';
const finalContext = buildSearchResultsContext(
searchResults?.searchFindings || [],
);

const widgetContext = widgetOutputs
.map((o) => {
Expand Down
33 changes: 32 additions & 1 deletion src/lib/utils/splitText.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,45 @@ const splitRegex = /(?<=\. |\n|! |\? |; |:\s|\d+\.\s|- |\* )/g;

const enc = getEncoding('cl100k_base');

const getTokenCount = (text: string): number => {
export const getTokenCount = (text: string): number => {
try {
return enc.encode(text).length;
} catch {
return Math.ceil(text.length / 4);
}
};

export const truncateTextByTokens = (
text: string,
maxTokens: number,
): string => {
if (maxTokens <= 0 || text.length === 0) {
return '';
}

if (getTokenCount(text) <= maxTokens) {
return text;
}

let low = 0;
let high = text.length;
let best = '';

while (low <= high) {
const mid = Math.floor((low + high) / 2);
const candidate = text.slice(0, mid);

if (getTokenCount(candidate) <= maxTokens) {
best = candidate;
low = mid + 1;
} else {
high = mid - 1;
}
}

return best.trimEnd();
};

export const splitText = (
text: string,
maxTokens = 512,
Expand Down