langchain-ai · mdrxy · Jan 7, 2026 · Jan 5, 2026 · Jan 5, 2026 · Jan 6, 2026
@@ -266,6 +266,9 @@ def text(self) -> TextAccessor:
 
         Can be used as both property (`message.text`) and method (`message.text()`).
 
+        Handles both string and list content types (e.g. for content blocks). Only
+        extracts blocks with `type: 'text'`; other block types are ignored.
+
         !!! deprecated
             As of `langchain-core` 1.0.0, calling `.text()` as a method is deprecated.
             Use `.text` as a property instead. This method will be removed in 2.0.0.
@@ -277,7 +280,7 @@ def text(self) -> TextAccessor:
         if isinstance(self.content, str):
             text_value = self.content
         else:
-            # must be a list
+            # Must be a list
             blocks = [
                 block
                 for block in self.content

@@ -148,13 +148,16 @@ def get_buffer_string(
         else:
             msg = f"Got unsupported message type: {m}"
             raise ValueError(msg)  # noqa: TRY004
+
         message = f"{role}: {m.text}"
+
         if isinstance(m, AIMessage):
             if m.tool_calls:
                 message += f"{m.tool_calls}"
             elif "function_call" in m.additional_kwargs:
                 # Legacy behavior assumes only one function call per message
                 message += f"{m.additional_kwargs['function_call']}"
+
         string_messages.append(message)
 
     return "\n".join(string_messages)
@@ -1845,34 +1848,36 @@ def count_tokens_approximately(
     """Approximate the total number of tokens in messages.
 
     The token count includes stringified message content, role, and (optionally) name.
+
     - For AI messages, the token count also includes stringified tool calls.
     - For tool messages, the token count also includes the tool call ID.
 
     Args:
         messages: List of messages to count tokens for.
         chars_per_token: Number of characters per token to use for the approximation.
+
             One token corresponds to ~4 chars for common English text.
+
             You can also specify `float` values for more fine-grained control.
             [See more here](https://platform.openai.com/tokenizer).
         extra_tokens_per_message: Number of extra tokens to add per message, e.g.
             special tokens, including beginning/end of message.
+
             You can also specify `float` values for more fine-grained control.
             [See more here](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb).
         count_name: Whether to include message names in the count.
-            Enabled by default.
 
     Returns:
         Approximate number of tokens in the messages.
 
-    !!! note
+    Note:
         This is a simple approximation that may not match the exact token count used by
         specific models. For accurate counts, use model-specific tokenizers.
 
     Warning:
         This function does not currently support counting image tokens.
 
     !!! version-added "Added in `langchain-core` 0.3.46"
-
     """
     token_count = 0.0
     for message in convert_to_messages(messages):

diff --git a/libs/langchain_v1/langchain/agents/middleware/summarization.py b/libs/langchain_v1/langchain/agents/middleware/summarization.py
@@ -13,7 +13,11 @@
     ToolMessage,
 )
 from langchain_core.messages.human import HumanMessage
-from langchain_core.messages.utils import count_tokens_approximately, trim_messages
+from langchain_core.messages.utils import (
+    count_tokens_approximately,
+    get_buffer_string,
+    trim_messages,
+)
 from langgraph.graph.message import (
     REMOVE_ALL_MESSAGES,
 )
@@ -491,8 +495,12 @@ def _create_summary(self, messages_to_summarize: list[AnyMessage]) -> str:
         if not trimmed_messages:
             return "Previous conversation was too long to summarize."
 
+        # Format messages to avoid token inflation from metadata when str() is called on
+        # message objects
+        formatted_messages = get_buffer_string(trimmed_messages)
+
         try:
-            response = self.model.invoke(self.summary_prompt.format(messages=trimmed_messages))
+            response = self.model.invoke(self.summary_prompt.format(messages=formatted_messages))
             return response.text.strip()
         except Exception as e:
             return f"Error generating summary: {e!s}"
@@ -506,9 +514,13 @@ async def _acreate_summary(self, messages_to_summarize: list[AnyMessage]) -> str
         if not trimmed_messages:
             return "Previous conversation was too long to summarize."
 
+        # Format messages to avoid token inflation from metadata when str() is called on
+        # message objects
+        formatted_messages = get_buffer_string(trimmed_messages)
+
         try:
             response = await self.model.ainvoke(
-                self.summary_prompt.format(messages=trimmed_messages)
+                self.summary_prompt.format(messages=formatted_messages)
             )
             return response.text.strip()
         except Exception as e:

diff --git a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py
@@ -4,6 +4,7 @@
 from langchain_core.language_models import ModelProfile
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, RemoveMessage, ToolMessage
+from langchain_core.messages.utils import count_tokens_approximately, get_buffer_string
 from langchain_core.outputs import ChatGeneration, ChatResult
 from langgraph.graph.message import REMOVE_ALL_MESSAGES
 
@@ -891,3 +892,58 @@ def test_summarization_middleware_cutoff_at_start_of_tool_sequence() -> None:
     # Index 2 is an AIMessage (safe cutoff point), so no adjustment needed
     cutoff = middleware._find_safe_cutoff(messages, messages_to_keep=4)
     assert cutoff == 2
+
+
+def test_create_summary_uses_get_buffer_string_format() -> None:
+    """Test that `_create_summary` formats messages using `get_buffer_string`.
+
+    Ensures that messages are formatted efficiently for the summary prompt, avoiding
+    token inflation from metadata when `str()` is called on message objects.
+
+    This ensures the token count of the formatted prompt stays below what
+    `count_tokens_approximately` estimates for the raw messages.
+    """
+    # Create messages with metadata that would inflate str() representation
+    messages: list[AnyMessage] = [
+        HumanMessage(content="What is the weather in NYC?"),
+        AIMessage(
+            content="Let me check the weather for you.",
+            tool_calls=[{"name": "get_weather", "args": {"city": "NYC"}, "id": "call_123"}],
+            usage_metadata={"input_tokens": 50, "output_tokens": 30, "total_tokens": 80},
+            response_metadata={"model": "gpt-4", "finish_reason": "tool_calls"},
+        ),
+        ToolMessage(
+            content="72F and sunny",
+            tool_call_id="call_123",
+            name="get_weather",
+        ),
+        AIMessage(
+            content="It is 72F and sunny in NYC!",
+            usage_metadata={
+                "input_tokens": 100,
+                "output_tokens": 25,
+                "total_tokens": 125,
+            },
+            response_metadata={"model": "gpt-4", "finish_reason": "stop"},
+        ),
+    ]
+
+    # Verify the token ratio is favorable (get_buffer_string < str)
+    approx_tokens = count_tokens_approximately(messages)
+    buffer_string = get_buffer_string(messages)
+    buffer_tokens_estimate = len(buffer_string) / 4  # ~4 chars per token
+
+    # The ratio should be less than 1.0 (buffer_string uses fewer tokens than counted)
+    ratio = buffer_tokens_estimate / approx_tokens
+    assert ratio < 1.0, (
+        f"get_buffer_string should produce fewer tokens than count_tokens_approximately. "
+        f"Got ratio {ratio:.2f}x (expected < 1.0)"
+    )
+
+    # Verify str() would have been worse
+    str_tokens_estimate = len(str(messages)) / 4
+    str_ratio = str_tokens_estimate / approx_tokens
+    assert str_ratio > 1.5, (
+        f"str(messages) should produce significantly more tokens. "
+        f"Got ratio {str_ratio:.2f}x (expected > 1.5)"
+    )
diff --git a/libs/langchain_v1/uv.lock b/libs/langchain_v1/uv.lock