diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 841df41c11bb0..af5d2ee98bb14 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -266,6 +266,9 @@ def text(self) -> TextAccessor: Can be used as both property (`message.text`) and method (`message.text()`). + Handles both string and list content types (e.g. for content blocks). Only + extracts blocks with `type: 'text'`; other block types are ignored. + !!! deprecated As of `langchain-core` 1.0.0, calling `.text()` as a method is deprecated. Use `.text` as a property instead. This method will be removed in 2.0.0. @@ -277,7 +280,7 @@ def text(self) -> TextAccessor: if isinstance(self.content, str): text_value = self.content else: - # must be a list + # Must be a list blocks = [ block for block in self.content diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 6fe390753627a..274be99b2ec2a 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -148,13 +148,16 @@ def get_buffer_string( else: msg = f"Got unsupported message type: {m}" raise ValueError(msg) # noqa: TRY004 + message = f"{role}: {m.text}" + if isinstance(m, AIMessage): if m.tool_calls: message += f"{m.tool_calls}" elif "function_call" in m.additional_kwargs: # Legacy behavior assumes only one function call per message message += f"{m.additional_kwargs['function_call']}" + string_messages.append(message) return "\n".join(string_messages) @@ -1845,26 +1848,29 @@ def count_tokens_approximately( """Approximate the total number of tokens in messages. The token count includes stringified message content, role, and (optionally) name. + - For AI messages, the token count also includes stringified tool calls. - For tool messages, the token count also includes the tool call ID. Args: messages: List of messages to count tokens for. chars_per_token: Number of characters per token to use for the approximation. + One token corresponds to ~4 chars for common English text. + You can also specify `float` values for more fine-grained control. [See more here](https://platform.openai.com/tokenizer). extra_tokens_per_message: Number of extra tokens to add per message, e.g. special tokens, including beginning/end of message. + You can also specify `float` values for more fine-grained control. [See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb). count_name: Whether to include message names in the count. - Enabled by default. Returns: Approximate number of tokens in the messages. - !!! note + Note: This is a simple approximation that may not match the exact token count used by specific models. For accurate counts, use model-specific tokenizers. @@ -1872,7 +1878,6 @@ def count_tokens_approximately( This function does not currently support counting image tokens. !!! version-added "Added in `langchain-core` 0.3.46" - """ token_count = 0.0 for message in convert_to_messages(messages): diff --git a/libs/langchain_v1/langchain/agents/middleware/summarization.py b/libs/langchain_v1/langchain/agents/middleware/summarization.py index 10baf724662c0..37c782842b8c1 100644 --- a/libs/langchain_v1/langchain/agents/middleware/summarization.py +++ b/libs/langchain_v1/langchain/agents/middleware/summarization.py @@ -13,7 +13,11 @@ ToolMessage, ) from langchain_core.messages.human import HumanMessage -from langchain_core.messages.utils import count_tokens_approximately, trim_messages +from langchain_core.messages.utils import ( + count_tokens_approximately, + get_buffer_string, + trim_messages, +) from langgraph.graph.message import ( REMOVE_ALL_MESSAGES, ) @@ -491,8 +495,12 @@ def _create_summary(self, messages_to_summarize: list[AnyMessage]) -> str: if not trimmed_messages: return "Previous conversation was too long to summarize." + # Format messages to avoid token inflation from metadata when str() is called on + # message objects + formatted_messages = get_buffer_string(trimmed_messages) + try: - response = self.model.invoke(self.summary_prompt.format(messages=trimmed_messages)) + response = self.model.invoke(self.summary_prompt.format(messages=formatted_messages)) return response.text.strip() except Exception as e: return f"Error generating summary: {e!s}" @@ -506,9 +514,13 @@ async def _acreate_summary(self, messages_to_summarize: list[AnyMessage]) -> str if not trimmed_messages: return "Previous conversation was too long to summarize." + # Format messages to avoid token inflation from metadata when str() is called on + # message objects + formatted_messages = get_buffer_string(trimmed_messages) + try: response = await self.model.ainvoke( - self.summary_prompt.format(messages=trimmed_messages) + self.summary_prompt.format(messages=formatted_messages) ) return response.text.strip() except Exception as e: diff --git a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py index 2507f6f3e5d61..7c2995061a60a 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py +++ b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py @@ -4,6 +4,7 @@ from langchain_core.language_models import ModelProfile from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, RemoveMessage, ToolMessage +from langchain_core.messages.utils import count_tokens_approximately, get_buffer_string from langchain_core.outputs import ChatGeneration, ChatResult from langgraph.graph.message import REMOVE_ALL_MESSAGES @@ -891,3 +892,58 @@ def test_summarization_middleware_cutoff_at_start_of_tool_sequence() -> None: # Index 2 is an AIMessage (safe cutoff point), so no adjustment needed cutoff = middleware._find_safe_cutoff(messages, messages_to_keep=4) assert cutoff == 2 + + +def test_create_summary_uses_get_buffer_string_format() -> None: + """Test that `_create_summary` formats messages using `get_buffer_string`. + + Ensures that messages are formatted efficiently for the summary prompt, avoiding + token inflation from metadata when `str()` is called on message objects. + + This ensures the token count of the formatted prompt stays below what + `count_tokens_approximately` estimates for the raw messages. + """ + # Create messages with metadata that would inflate str() representation + messages: list[AnyMessage] = [ + HumanMessage(content="What is the weather in NYC?"), + AIMessage( + content="Let me check the weather for you.", + tool_calls=[{"name": "get_weather", "args": {"city": "NYC"}, "id": "call_123"}], + usage_metadata={"input_tokens": 50, "output_tokens": 30, "total_tokens": 80}, + response_metadata={"model": "gpt-4", "finish_reason": "tool_calls"}, + ), + ToolMessage( + content="72F and sunny", + tool_call_id="call_123", + name="get_weather", + ), + AIMessage( + content="It is 72F and sunny in NYC!", + usage_metadata={ + "input_tokens": 100, + "output_tokens": 25, + "total_tokens": 125, + }, + response_metadata={"model": "gpt-4", "finish_reason": "stop"}, + ), + ] + + # Verify the token ratio is favorable (get_buffer_string < str) + approx_tokens = count_tokens_approximately(messages) + buffer_string = get_buffer_string(messages) + buffer_tokens_estimate = len(buffer_string) / 4 # ~4 chars per token + + # The ratio should be less than 1.0 (buffer_string uses fewer tokens than counted) + ratio = buffer_tokens_estimate / approx_tokens + assert ratio < 1.0, ( + f"get_buffer_string should produce fewer tokens than count_tokens_approximately. " + f"Got ratio {ratio:.2f}x (expected < 1.0)" + ) + + # Verify str() would have been worse + str_tokens_estimate = len(str(messages)) / 4 + str_ratio = str_tokens_estimate / approx_tokens + assert str_ratio > 1.5, ( + f"str(messages) should produce significantly more tokens. " + f"Got ratio {str_ratio:.2f}x (expected > 1.5)" + ) diff --git a/libs/langchain_v1/uv.lock b/libs/langchain_v1/uv.lock index c8765dca4c88b..4380778de8e0d 100644 --- a/libs/langchain_v1/uv.lock +++ b/libs/langchain_v1/uv.lock @@ -2102,7 +2102,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.2.5" +version = "1.2.6" source = { editable = "../core" } dependencies = [ { name = "jsonpatch" }, @@ -2380,12 +2380,12 @@ requires-dist = [ ] [package.metadata.requires-dev] -lint = [{ name = "ruff", specifier = ">=0.13.1,<0.14.0" }] +lint = [{ name = "ruff", specifier = ">=0.14.10,<0.15.0" }] test = [{ name = "langchain-core", editable = "../core" }] test-integration = [] typing = [ { name = "langchain-core", editable = "../core" }, - { name = "mypy", specifier = ">=1.18.1,<1.19.0" }, + { name = "mypy", specifier = ">=1.19.1,<1.20.0" }, { name = "types-pyyaml", specifier = ">=6.0.12.2,<7.0.0.0" }, ] @@ -2407,7 +2407,7 @@ dev = [ ] lint = [ { name = "langchain-core", editable = "../core" }, - { name = "ruff", specifier = ">=0.13.1,<0.14.0" }, + { name = "ruff", specifier = ">=0.14.10,<0.15.0" }, ] test = [ { name = "freezegun", specifier = ">=1.2.2,<2.0.0" }, @@ -2433,7 +2433,7 @@ test-integration = [ typing = [ { name = "beautifulsoup4", specifier = ">=4.13.5,<5.0.0" }, { name = "lxml-stubs", specifier = ">=0.5.1,<1.0.0" }, - { name = "mypy", specifier = ">=1.18.1,<1.19.0" }, + { name = "mypy", specifier = ">=1.19.1,<1.20.0" }, { name = "tiktoken", specifier = ">=0.8.0,<1.0.0" }, { name = "types-requests", specifier = ">=2.31.0.20240218,<3.0.0.0" }, ]