Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion libs/core/langchain_core/messages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ def text(self) -> TextAccessor:

Can be used as both property (`message.text`) and method (`message.text()`).

Handles both string and list content types (e.g. for content blocks). Only
extracts blocks with `type: 'text'`; other block types are ignored.

!!! deprecated
As of `langchain-core` 1.0.0, calling `.text()` as a method is deprecated.
Use `.text` as a property instead. This method will be removed in 2.0.0.
Expand All @@ -277,7 +280,7 @@ def text(self) -> TextAccessor:
if isinstance(self.content, str):
text_value = self.content
else:
# must be a list
# Must be a list
blocks = [
block
for block in self.content
Expand Down
11 changes: 8 additions & 3 deletions libs/core/langchain_core/messages/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,16 @@ def get_buffer_string(
else:
msg = f"Got unsupported message type: {m}"
raise ValueError(msg) # noqa: TRY004

message = f"{role}: {m.text}"

if isinstance(m, AIMessage):
if m.tool_calls:
message += f"{m.tool_calls}"
elif "function_call" in m.additional_kwargs:
# Legacy behavior assumes only one function call per message
message += f"{m.additional_kwargs['function_call']}"

string_messages.append(message)

return "\n".join(string_messages)
Expand Down Expand Up @@ -1845,34 +1848,36 @@ def count_tokens_approximately(
"""Approximate the total number of tokens in messages.

The token count includes stringified message content, role, and (optionally) name.

- For AI messages, the token count also includes stringified tool calls.
- For tool messages, the token count also includes the tool call ID.

Args:
messages: List of messages to count tokens for.
chars_per_token: Number of characters per token to use for the approximation.

One token corresponds to ~4 chars for common English text.

You can also specify `float` values for more fine-grained control.
[See more here](https://platform.openai.com/tokenizer).
extra_tokens_per_message: Number of extra tokens to add per message, e.g.
special tokens, including beginning/end of message.

You can also specify `float` values for more fine-grained control.
[See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
count_name: Whether to include message names in the count.
Enabled by default.

Returns:
Approximate number of tokens in the messages.

!!! note
Note:
This is a simple approximation that may not match the exact token count used by
specific models. For accurate counts, use model-specific tokenizers.

Warning:
This function does not currently support counting image tokens.

!!! version-added "Added in `langchain-core` 0.3.46"

"""
token_count = 0.0
for message in convert_to_messages(messages):
Expand Down
18 changes: 15 additions & 3 deletions libs/langchain_v1/langchain/agents/middleware/summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
ToolMessage,
)
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.utils import count_tokens_approximately, trim_messages
from langchain_core.messages.utils import (
count_tokens_approximately,
get_buffer_string,
trim_messages,
)
from langgraph.graph.message import (
REMOVE_ALL_MESSAGES,
)
Expand Down Expand Up @@ -491,8 +495,12 @@ def _create_summary(self, messages_to_summarize: list[AnyMessage]) -> str:
if not trimmed_messages:
return "Previous conversation was too long to summarize."

# Format messages to avoid token inflation from metadata when str() is called on
# message objects
formatted_messages = get_buffer_string(trimmed_messages)

try:
response = self.model.invoke(self.summary_prompt.format(messages=trimmed_messages))
response = self.model.invoke(self.summary_prompt.format(messages=formatted_messages))
return response.text.strip()
except Exception as e:
return f"Error generating summary: {e!s}"
Expand All @@ -506,9 +514,13 @@ async def _acreate_summary(self, messages_to_summarize: list[AnyMessage]) -> str
if not trimmed_messages:
return "Previous conversation was too long to summarize."

# Format messages to avoid token inflation from metadata when str() is called on
# message objects
formatted_messages = get_buffer_string(trimmed_messages)

try:
response = await self.model.ainvoke(
self.summary_prompt.format(messages=trimmed_messages)
self.summary_prompt.format(messages=formatted_messages)
)
return response.text.strip()
except Exception as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from langchain_core.language_models import ModelProfile
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, RemoveMessage, ToolMessage
from langchain_core.messages.utils import count_tokens_approximately, get_buffer_string
from langchain_core.outputs import ChatGeneration, ChatResult
from langgraph.graph.message import REMOVE_ALL_MESSAGES

Expand Down Expand Up @@ -891,3 +892,58 @@ def test_summarization_middleware_cutoff_at_start_of_tool_sequence() -> None:
# Index 2 is an AIMessage (safe cutoff point), so no adjustment needed
cutoff = middleware._find_safe_cutoff(messages, messages_to_keep=4)
assert cutoff == 2


def test_create_summary_uses_get_buffer_string_format() -> None:
"""Test that `_create_summary` formats messages using `get_buffer_string`.

Ensures that messages are formatted efficiently for the summary prompt, avoiding
token inflation from metadata when `str()` is called on message objects.

This ensures the token count of the formatted prompt stays below what
`count_tokens_approximately` estimates for the raw messages.
"""
# Create messages with metadata that would inflate str() representation
messages: list[AnyMessage] = [
HumanMessage(content="What is the weather in NYC?"),
AIMessage(
content="Let me check the weather for you.",
tool_calls=[{"name": "get_weather", "args": {"city": "NYC"}, "id": "call_123"}],
usage_metadata={"input_tokens": 50, "output_tokens": 30, "total_tokens": 80},
response_metadata={"model": "gpt-4", "finish_reason": "tool_calls"},
),
ToolMessage(
content="72F and sunny",
tool_call_id="call_123",
name="get_weather",
),
AIMessage(
content="It is 72F and sunny in NYC!",
usage_metadata={
"input_tokens": 100,
"output_tokens": 25,
"total_tokens": 125,
},
response_metadata={"model": "gpt-4", "finish_reason": "stop"},
),
]

# Verify the token ratio is favorable (get_buffer_string < str)
approx_tokens = count_tokens_approximately(messages)
buffer_string = get_buffer_string(messages)
buffer_tokens_estimate = len(buffer_string) / 4 # ~4 chars per token

# The ratio should be less than 1.0 (buffer_string uses fewer tokens than counted)
ratio = buffer_tokens_estimate / approx_tokens
assert ratio < 1.0, (
f"get_buffer_string should produce fewer tokens than count_tokens_approximately. "
f"Got ratio {ratio:.2f}x (expected < 1.0)"
)

# Verify str() would have been worse
str_tokens_estimate = len(str(messages)) / 4
str_ratio = str_tokens_estimate / approx_tokens
assert str_ratio > 1.5, (
f"str(messages) should produce significantly more tokens. "
f"Got ratio {str_ratio:.2f}x (expected > 1.5)"
)
10 changes: 5 additions & 5 deletions libs/langchain_v1/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.