diff --git a/.changeset/four-toys-relax.md b/.changeset/four-toys-relax.md new file mode 100644 index 000000000000..771b7e3dc54b --- /dev/null +++ b/.changeset/four-toys-relax.md @@ -0,0 +1,6 @@ +--- +"@langchain/core": patch +"langchain": patch +--- + +fix(core): use getBufferString for message summarization diff --git a/libs/langchain-core/src/language_models/tests/chat_models.test.ts b/libs/langchain-core/src/language_models/tests/chat_models.test.ts index 60eb54ba2a39..d3f4d86703fb 100644 --- a/libs/langchain-core/src/language_models/tests/chat_models.test.ts +++ b/libs/langchain-core/src/language_models/tests/chat_models.test.ts @@ -268,6 +268,10 @@ test("Test ChatModel can cache complex messages", async () => { }); const prompt = getBufferString([humanMessage]); + // getBufferString now uses the `text` property which extracts only text content + // from content blocks, producing compact output to avoid token inflation + expect(prompt).toBe("Human: Hello there!"); + const llmKey = model._getSerializedCacheKeyParametersForCall({}); // Invoke model to trigger cache update @@ -277,12 +281,14 @@ test("Test ChatModel can cache complex messages", async () => { expect(value).toBeDefined(); if (!value) return; - expect(value[0].text).toEqual(JSON.stringify(contentToCache, null, 2)); + // FakeChatModel returns m.text for text content (extracts text from blocks) + // This is consistent with using the text property for compact representation + expect(value[0].text).toEqual("Hello there!"); expect("message" in value[0]).toBeTruthy(); if (!("message" in value[0])) return; const cachedMsg = value[0].message as AIMessage; - expect(cachedMsg.content).toEqual(JSON.stringify(contentToCache, null, 2)); + expect(cachedMsg.content).toEqual("Hello there!"); }); test("Test ChatModel with cache does not start multiple chat model runs", async () => { diff --git a/libs/langchain-core/src/messages/tests/message_utils.test.ts b/libs/langchain-core/src/messages/tests/message_utils.test.ts index b068ced1730c..7fd90b6ee674 100644 --- a/libs/langchain-core/src/messages/tests/message_utils.test.ts +++ b/libs/langchain-core/src/messages/tests/message_utils.test.ts @@ -619,38 +619,85 @@ test("getBufferString can handle complex messages", () => { expect(bufferString1).toBe("Human: Hello there!"); const bufferString2 = getBufferString(messageArr2); - expect(bufferString2).toBe( - `AI: ${JSON.stringify( - [ - { - type: "text", - text: "Hello there!", - }, - ], - null, - 2 - )}` - ); + // getBufferString now uses the `text` property which extracts only text content + // from content blocks, producing compact output to avoid token inflation + expect(bufferString2).toBe("AI: Hello there!"); const bufferString3 = getBufferString(messageArr3); - expect(bufferString3).toBe( - `Human: ${JSON.stringify( - [ - { - type: "image_url", - image_url: { - url: "https://example.com/image.jpg", - }, - }, - { - type: "image_url", - image_url: "https://example.com/image.jpg", - }, - ], - null, - 2 - )}` + // Image content should return empty string for text property + expect(bufferString3).toBe("Human: "); +}); + +test("getBufferString includes tool_calls for AI messages", () => { + const toolCalls = [ + { name: "get_weather", args: { city: "NYC" }, id: "call_123" }, + ]; + + const messageWithToolCalls = new AIMessage({ + content: "Let me check the weather for you.", + tool_calls: toolCalls, + }); + + const messageWithFunctionCall = new AIMessage({ + content: "Let me check the weather.", + additional_kwargs: { + function_call: { name: "get_weather", arguments: '{"city": "NYC"}' }, + }, + }); + + const messageWithoutTools = new AIMessage({ + content: "The weather is sunny!", + }); + + // AI message with tool_calls should include them in output + const bufferWithToolCalls = getBufferString([messageWithToolCalls]); + expect(bufferWithToolCalls).toBe( + `AI: Let me check the weather for you.${JSON.stringify(toolCalls)}` ); + + // AI message with legacy function_call should include it + const bufferWithFunctionCall = getBufferString([messageWithFunctionCall]); + expect(bufferWithFunctionCall).toContain("AI: Let me check the weather."); + expect(bufferWithFunctionCall).toContain("get_weather"); + + // AI message without tools should not have tool info + const bufferWithoutTools = getBufferString([messageWithoutTools]); + expect(bufferWithoutTools).toBe("AI: The weather is sunny!"); +}); + +test("getBufferString uses text property to avoid metadata inflation", () => { + // Create messages with metadata that would inflate str() representation + const messages = [ + new HumanMessage("What is the weather in NYC?"), + new AIMessage({ + content: "Let me check the weather for you.", + tool_calls: [ + { name: "get_weather", args: { city: "NYC" }, id: "call_123" }, + ], + }), + new ToolMessage({ + content: "72F and sunny", + tool_call_id: "call_123", + name: "get_weather", + }), + new AIMessage({ + content: "It is 72F and sunny in NYC!", + }), + ]; + + const bufferString = getBufferString(messages); + + // Should produce compact output + expect(bufferString).toContain("Human: What is the weather in NYC?"); + expect(bufferString).toContain("AI: Let me check the weather for you."); + expect(bufferString).toContain("get_weather"); + expect(bufferString).toContain("Tool: get_weather, 72F and sunny"); + expect(bufferString).toContain("AI: It is 72F and sunny in NYC!"); + + // Should NOT contain metadata fields that would be in JSON.stringify + expect(bufferString).not.toContain("usage_metadata"); + expect(bufferString).not.toContain("response_metadata"); + expect(bufferString).not.toContain("additional_kwargs"); }); describe("chat message conversions", () => { diff --git a/libs/langchain-core/src/messages/utils.ts b/libs/langchain-core/src/messages/utils.ts index 023eec8db668..ffcc971d379f 100644 --- a/libs/langchain-core/src/messages/utils.ts +++ b/libs/langchain-core/src/messages/utils.ts @@ -307,6 +307,15 @@ export function coerceMessageLikeToMessage( /** * This function is used by memory classes to get a string representation * of the chat message history, based on the message content and role. + * + * Produces compact output like: + * ``` + * Human: What's the weather? + * AI: Let me check...[tool_calls] + * Tool: 72°F and sunny + * ``` + * + * This avoids token inflation from metadata when stringifying message objects directly. */ export function getBufferString( messages: BaseMessage[], @@ -316,25 +325,42 @@ export function getBufferString( const string_messages: string[] = []; for (const m of messages) { let role: string; - if (m._getType() === "human") { + if (m.type === "human") { role = humanPrefix; - } else if (m._getType() === "ai") { + } else if (m.type === "ai") { role = aiPrefix; - } else if (m._getType() === "system") { + } else if (m.type === "system") { role = "System"; - } else if (m._getType() === "tool") { + } else if (m.type === "tool") { role = "Tool"; - } else if (m._getType() === "generic") { + } else if (m.type === "generic") { role = (m as ChatMessage).role; } else { - throw new Error(`Got unsupported message type: ${m._getType()}`); + throw new Error(`Got unsupported message type: ${m.type}`); } const nameStr = m.name ? `${m.name}, ` : ""; - const readableContent = - typeof m.content === "string" - ? m.content - : JSON.stringify(m.content, null, 2); - string_messages.push(`${role}: ${nameStr}${readableContent}`); + + // Use m.text property which extracts only text content, avoiding metadata + // For non-string content (e.g., content blocks), m.text extracts only text blocks + const readableContent = m.text; + + let message = `${role}: ${nameStr}${readableContent}`; + + // Include tool calls for AI messages (matching Python's get_buffer_string behavior) + if (m.type === "ai") { + const aiMessage = m as AIMessage; + if (aiMessage.tool_calls && aiMessage.tool_calls.length > 0) { + message += JSON.stringify(aiMessage.tool_calls); + } else if ( + aiMessage.additional_kwargs && + "function_call" in aiMessage.additional_kwargs + ) { + // Legacy behavior assumes only one function call per message + message += JSON.stringify(aiMessage.additional_kwargs.function_call); + } + } + + string_messages.push(message); } return string_messages.join("\n"); } diff --git a/libs/langchain/src/agents/middleware/summarization.ts b/libs/langchain/src/agents/middleware/summarization.ts index dc9820ec4792..d1179de42206 100644 --- a/libs/langchain/src/agents/middleware/summarization.ts +++ b/libs/langchain/src/agents/middleware/summarization.ts @@ -9,6 +9,7 @@ import { RemoveMessage, trimMessages, HumanMessage, + getBufferString, } from "@langchain/core/messages"; import { BaseLanguageModel, @@ -905,10 +906,22 @@ async function createSummary( return "Previous conversation was too long to summarize."; } + /** + * Format messages using getBufferString to avoid token inflation from metadata + * when str() / JSON.stringify is called on message objects. + * This produces compact output like: + * ``` + * Human: What's the weather? + * AI: Let me check...[tool_calls] + * Tool: 72°F and sunny + * ``` + */ + const formattedMessages = getBufferString(trimmedMessages); + try { const formattedPrompt = summaryPrompt.replace( "{messages}", - JSON.stringify(trimmedMessages, null, 2) + formattedMessages ); /** * Invoke the model with an empty callbacks array to prevent the internal diff --git a/libs/langchain/src/agents/middleware/tests/summarization.test.ts b/libs/langchain/src/agents/middleware/tests/summarization.test.ts index 4a66c5da0a3d..286e428e1916 100644 --- a/libs/langchain/src/agents/middleware/tests/summarization.test.ts +++ b/libs/langchain/src/agents/middleware/tests/summarization.test.ts @@ -492,13 +492,10 @@ describe("summarizationMiddleware", () => { expect(summarizationModel.invoke).toHaveBeenCalledTimes(1); const summaryPrompt = summarizationModel.invoke.mock.calls[0][0]; expect(summaryPrompt).toContain("Messages to summarize:"); - expect(summaryPrompt).not.toContain( - '"content": "Message 1: xxxxxxxxxxxxxxx' - ); - expect(summaryPrompt).toContain('"content": "Response 2: xxxxxxxxxxxxxxx'); - expect(summaryPrompt).not.toContain( - '"content": "Message 3: xxxxxxxxxxxxxxx' - ); + // Uses getBufferString format (Human:, AI:) instead of JSON format + expect(summaryPrompt).not.toContain("Human: Message 1: xxxxxxxxxxxxxxx"); + expect(summaryPrompt).toContain("AI: Response 2: xxxxxxxxxxxxxxx"); + expect(summaryPrompt).not.toContain("Human: Message 3: xxxxxxxxxxxxxxx"); // Should trigger summarization expect(result.messages.length).toBe(5); @@ -558,12 +555,11 @@ describe("summarizationMiddleware", () => { expect(summarizationModel.invoke).toHaveBeenCalledTimes(1); const summaryPrompt = summarizationModel.invoke.mock.calls[0][0]; expect(summaryPrompt).toContain("Messages to summarize:"); - expect(summaryPrompt).toContain('"content": "Message 1: xxxxxxxxxxxxxxx'); - expect(summaryPrompt).toContain('"content": "Response 2: xxxxxxxxxxxxxxx'); - expect(summaryPrompt).toContain('"content": "Message 3: xxxxxxxxxxxxxxx'); - expect(summaryPrompt).not.toContain( - '"content": "Response 3: xxxxxxxxxxxxxxx' - ); + // Uses getBufferString format (Human:, AI:) instead of JSON format + expect(summaryPrompt).toContain("Human: Message 1: xxxxxxxxxxxxxxx"); + expect(summaryPrompt).toContain("AI: Response 2: xxxxxxxxxxxxxxx"); + expect(summaryPrompt).toContain("Human: Message 3: xxxxxxxxxxxxxxx"); + expect(summaryPrompt).not.toContain("AI: Response 3: xxxxxxxxxxxxxxx"); // Should trigger summarization expect(result.messages.length).toBe(4); @@ -1187,4 +1183,74 @@ describe("summarizationMiddleware", () => { expect(matchingToolMessages.length).toBe(preservedAI.tool_calls?.length); } }); + + it("should use getBufferString format to avoid token inflation from message metadata", async () => { + // Track the actual prompt sent to the summarization model + let capturedPrompt = ""; + const summarizationModel = { + invoke: vi.fn().mockImplementation(async (prompt: string) => { + capturedPrompt = prompt; + return { content: "Summary of the conversation." }; + }), + getName: () => "mock-summarizer", + _modelType: "mock", + lc_runnable: true, + profile: {}, + }; + + const model = createMockMainModel(); + + const middleware = summarizationMiddleware({ + model: summarizationModel as any, + trigger: { tokens: 50 }, + keep: { messages: 1 }, + }); + + const agent = createAgent({ + model, + middleware: [middleware], + }); + + // Create messages with metadata that would inflate JSON.stringify representation + const inputMessages = [ + new HumanMessage("What is the weather in NYC?"), + new AIMessage({ + content: "Let me check the weather for you.", + tool_calls: [ + { name: "get_weather", args: { city: "NYC" }, id: "call_123" }, + ], + }), + new ToolMessage({ + content: "72F and sunny", + tool_call_id: "call_123", + name: "get_weather", + }), + new AIMessage({ + content: `It is 72F and sunny in NYC! ${"x".repeat(200)}`, // Add enough chars to trigger summarization + }), + new HumanMessage("Thanks!"), + ]; + + await agent.invoke({ messages: inputMessages }); + + // Verify summarization was triggered + expect(summarizationModel.invoke).toHaveBeenCalled(); + + // Verify the prompt uses getBufferString format (compact) instead of JSON.stringify + // The prompt should contain role prefixes like "Human:", "AI:", "Tool:" instead of + // full JSON with all metadata fields + expect(capturedPrompt).toContain("Human:"); + expect(capturedPrompt).toContain("AI:"); + expect(capturedPrompt).toContain("Tool:"); + + // Verify the prompt does NOT contain verbose metadata that would be in JSON.stringify + // These fields would appear if we used JSON.stringify(messages, null, 2) + expect(capturedPrompt).not.toContain('"type": "human"'); + expect(capturedPrompt).not.toContain('"type": "ai"'); + expect(capturedPrompt).not.toContain('"additional_kwargs"'); + expect(capturedPrompt).not.toContain('"response_metadata"'); + + // The tool calls should still be included (as JSON appended to the AI message) + expect(capturedPrompt).toContain("get_weather"); + }); });