Add Agent.run_stream_sync method and sync convenience methods on StreamedRunResult (#3146)

ajac-zero · DouweM · web-flow · commit a58dd47f9cd6 · 2025-11-03T21:38:58.000Z
Co-authored-by: Douwe Maan &lt;douwe@pydantic.dev&gt;
diff --git a/docs/agents.md b/docs/agents.md
@@ -65,7 +65,7 @@ There are five ways to run an agent:
 
 1. [`agent.run()`][pydantic_ai.agent.AbstractAgent.run] — an async function which returns a [`RunResult`][pydantic_ai.agent.AgentRunResult] containing a completed response.
 2. [`agent.run_sync()`][pydantic_ai.agent.AbstractAgent.run_sync] — a plain, synchronous function which returns a [`RunResult`][pydantic_ai.agent.AgentRunResult] containing a completed response (internally, this just calls `loop.run_until_complete(self.run())`).
-3. [`agent.run_stream()`][pydantic_ai.agent.AbstractAgent.run_stream] — an async context manager which returns a [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult], which contains methods to stream text and structured output as an async iterable.
+3. [`agent.run_stream()`][pydantic_ai.agent.AbstractAgent.run_stream] — an async context manager which returns a [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult], which contains methods to stream text and structured output as an async iterable. [`agent.run_stream_sync()`][pydantic_ai.agent.AbstractAgent.run_stream_sync] is a synchronous variation that returns a [`StreamedRunResultSync`][pydantic_ai.result.StreamedRunResultSync] with synchronous versions of the same methods.
 4. [`agent.run_stream_events()`][pydantic_ai.agent.AbstractAgent.run_stream_events] — a function which returns an async iterable of [`AgentStreamEvent`s][pydantic_ai.messages.AgentStreamEvent] and a [`AgentRunResultEvent`][pydantic_ai.run.AgentRunResultEvent] containing the final run result.
 5. [`agent.iter()`][pydantic_ai.Agent.iter] — a context manager which returns an [`AgentRun`][pydantic_ai.agent.AgentRun], an async iterable over the nodes of the agent's underlying [`Graph`][pydantic_graph.graph.Graph].
 
diff --git a/pydantic_ai_slim/pydantic_ai/_utils.py b/pydantic_ai_slim/pydantic_ai/_utils.py
@@ -234,6 +234,15 @@ def sync_anext(iterator: Iterator[T]) -> T:
         raise StopAsyncIteration() from e
 
 
+def sync_async_iterator(async_iter: AsyncIterator[T]) -> Iterator[T]:
+    loop = get_event_loop()
+    while True:
+        try:
+            yield loop.run_until_complete(anext(async_iter))
+        except StopAsyncIteration:
+            break
+
+
 def now_utc() -> datetime:
     return datetime.now(tz=timezone.utc)
 
@@ -489,3 +498,12 @@ def get_union_args(tp: Any) -> tuple[Any, ...]:
         return tuple(_unwrap_annotated(arg) for arg in get_args(tp))
     else:
         return ()
+
+
+def get_event_loop():
+    try:
+        event_loop = asyncio.get_event_loop()
+    except RuntimeError:  # pragma: lax no cover
+        event_loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(event_loop)
+    return event_loop
diff --git a/pydantic_ai_slim/pydantic_ai/agent/abstract.py b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
@@ -12,7 +12,6 @@
 from typing_extensions import Self, TypeIs, TypeVar
 
 from pydantic_graph import End
-from pydantic_graph._utils import get_event_loop
 
 from .. import (
     _agent_graph,
@@ -335,7 +334,7 @@ def run_sync(
         if infer_name and self.name is None:
             self._infer_name(inspect.currentframe())
 
-        return get_event_loop().run_until_complete(
+        return _utils.get_event_loop().run_until_complete(
             self.run(
                 user_prompt,
                 output_type=output_type,
@@ -581,6 +580,133 @@ async def on_complete() -> None:
         if not yielded:
             raise exceptions.AgentRunError('Agent run finished without producing a final result')  # pragma: no cover
 
+    @overload
+    def run_stream_sync(
+        self,
+        user_prompt: str | Sequence[_messages.UserContent] | None = None,
+        *,
+        output_type: None = None,
+        message_history: Sequence[_messages.ModelMessage] | None = None,
+        deferred_tool_results: DeferredToolResults | None = None,
+        model: models.Model | models.KnownModelName | str | None = None,
+        deps: AgentDepsT = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: _usage.UsageLimits | None = None,
+        usage: _usage.RunUsage | None = None,
+        infer_name: bool = True,
+        toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+        builtin_tools: Sequence[AbstractBuiltinTool] | None = None,
+        event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+    ) -> result.StreamedRunResultSync[AgentDepsT, OutputDataT]: ...
+
+    @overload
+    def run_stream_sync(
+        self,
+        user_prompt: str | Sequence[_messages.UserContent] | None = None,
+        *,
+        output_type: OutputSpec[RunOutputDataT],
+        message_history: Sequence[_messages.ModelMessage] | None = None,
+        deferred_tool_results: DeferredToolResults | None = None,
+        model: models.Model | models.KnownModelName | str | None = None,
+        deps: AgentDepsT = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: _usage.UsageLimits | None = None,
+        usage: _usage.RunUsage | None = None,
+        infer_name: bool = True,
+        toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+        builtin_tools: Sequence[AbstractBuiltinTool] | None = None,
+        event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+    ) -> result.StreamedRunResultSync[AgentDepsT, RunOutputDataT]: ...
+
+    def run_stream_sync(
+        self,
+        user_prompt: str | Sequence[_messages.UserContent] | None = None,
+        *,
+        output_type: OutputSpec[RunOutputDataT] | None = None,
+        message_history: Sequence[_messages.ModelMessage] | None = None,
+        deferred_tool_results: DeferredToolResults | None = None,
+        model: models.Model | models.KnownModelName | str | None = None,
+        deps: AgentDepsT = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: _usage.UsageLimits | None = None,
+        usage: _usage.RunUsage | None = None,
+        infer_name: bool = True,
+        toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+        builtin_tools: Sequence[AbstractBuiltinTool] | None = None,
+        event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+    ) -> result.StreamedRunResultSync[AgentDepsT, Any]:
+        """Run the agent with a user prompt in sync streaming mode.
+
+        This is a convenience method that wraps [`run_stream()`][pydantic_ai.agent.AbstractAgent.run_stream] with `loop.run_until_complete(...)`.
+        You therefore can't use this method inside async code or if there's an active event loop.
+
+        This method builds an internal agent graph (using system prompts, tools and output schemas) and then
+        runs the graph until the model produces output matching the `output_type`, for example text or structured data.
+        At this point, a streaming run result object is yielded from which you can stream the output as it comes in,
+        and -- once this output has completed streaming -- get the complete output, message history, and usage.
+
+        As this method will consider the first output matching the `output_type` to be the final output,
+        it will stop running the agent graph and will not execute any tool calls made by the model after this "final" output.
+        If you want to always run the agent graph to completion and stream events and output at the same time,
+        use [`agent.run()`][pydantic_ai.agent.AbstractAgent.run] with an `event_stream_handler` or [`agent.iter()`][pydantic_ai.agent.AbstractAgent.iter] instead.
+
+        Example:
+        ```python
+        from pydantic_ai import Agent
+
+        agent = Agent('openai:gpt-4o')
+
+        def main():
+            response = agent.run_stream_sync('What is the capital of the UK?')
+            print(response.get_output())
+            #> The capital of the UK is London.
+        ```
+
+        Args:
+            user_prompt: User input to start/continue the conversation.
+            output_type: Custom output type to use for this run, `output_type` may only be used if the agent has no
+                output validators since output validators would expect an argument that matches the agent's output type.
+            message_history: History of the conversation so far.
+            deferred_tool_results: Optional results for deferred tool calls in the message history.
+            model: Optional model to use for this run, required if `model` was not set when creating the agent.
+            deps: Optional dependencies to use for this run.
+            model_settings: Optional settings to use for this model's request.
+            usage_limits: Optional limits on model request count or token usage.
+            usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
+            infer_name: Whether to try to infer the agent name from the call frame if it's not set.
+            toolsets: Optional additional toolsets for this run.
+            builtin_tools: Optional additional builtin tools for this run.
+            event_stream_handler: Optional handler for events from the model's streaming response and the agent's execution of tools to use for this run.
+                It will receive all the events up until the final result is found, which you can then read or stream from inside the context manager.
+                Note that it does _not_ receive any events after the final result is found.
+
+        Returns:
+            The result of the run.
+        """
+        if infer_name and self.name is None:
+            self._infer_name(inspect.currentframe())
+
+        async def _consume_stream():
+            async with self.run_stream(
+                user_prompt,
+                output_type=output_type,
+                message_history=message_history,
+                deferred_tool_results=deferred_tool_results,
+                model=model,
+                deps=deps,
+                model_settings=model_settings,
+                usage_limits=usage_limits,
+                usage=usage,
+                infer_name=infer_name,
+                toolsets=toolsets,
+                builtin_tools=builtin_tools,
+                event_stream_handler=event_stream_handler,
+            ) as stream_result:
+                yield stream_result
+
+        async_result = _utils.get_event_loop().run_until_complete(anext(_consume_stream()))
+        return result.StreamedRunResultSync(async_result)
+
     @overload
     def run_stream_events(
         self,
@@ -1217,6 +1343,6 @@ def to_cli_sync(
         agent.to_cli_sync(prog_name='assistant')
         ```
         """
-        return get_event_loop().run_until_complete(
+        return _utils.get_event_loop().run_until_complete(
             self.to_cli(deps=deps, prog_name=prog_name, message_history=message_history)
         )
diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py
@@ -1,6 +1,6 @@
 from __future__ import annotations as _annotations
 
-from collections.abc import AsyncIterator, Awaitable, Callable, Iterable
+from collections.abc import AsyncIterator, Awaitable, Callable, Iterable, Iterator
 from copy import deepcopy
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -35,6 +35,7 @@
     'OutputDataT_inv',
     'ToolOutput',
     'OutputValidatorFunc',
+    'StreamedRunResultSync',
 )
 
 
@@ -555,6 +556,158 @@ async def _marked_completed(self, message: _messages.ModelResponse | None = None
             await self._on_complete()
 
 
+@dataclass(init=False)
+class StreamedRunResultSync(Generic[AgentDepsT, OutputDataT]):
+    """Synchronous wrapper for [`StreamedRunResult`][pydantic_ai.result.StreamedRunResult] that only exposes sync methods."""
+
+    _streamed_run_result: StreamedRunResult[AgentDepsT, OutputDataT]
+
+    def __init__(self, streamed_run_result: StreamedRunResult[AgentDepsT, OutputDataT]) -> None:
+        self._streamed_run_result = streamed_run_result
+
+    def all_messages(self, *, output_tool_return_content: str | None = None) -> list[_messages.ModelMessage]:
+        """Return the history of messages.
+
+        Args:
+            output_tool_return_content: The return content of the tool call to set in the last message.
+                This provides a convenient way to modify the content of the output tool call if you want to continue
+                the conversation and want to set the response to the output tool call. If `None`, the last message will
+                not be modified.
+
+        Returns:
+            List of messages.
+        """
+        return self._streamed_run_result.all_messages(output_tool_return_content=output_tool_return_content)
+
+    def all_messages_json(self, *, output_tool_return_content: str | None = None) -> bytes:  # pragma: no cover
+        """Return all messages from [`all_messages`][pydantic_ai.result.StreamedRunResultSync.all_messages] as JSON bytes.
+
+        Args:
+            output_tool_return_content: The return content of the tool call to set in the last message.
+                This provides a convenient way to modify the content of the output tool call if you want to continue
+                the conversation and want to set the response to the output tool call. If `None`, the last message will
+                not be modified.
+
+        Returns:
+            JSON bytes representing the messages.
+        """
+        return self._streamed_run_result.all_messages_json(output_tool_return_content=output_tool_return_content)
+
+    def new_messages(self, *, output_tool_return_content: str | None = None) -> list[_messages.ModelMessage]:
+        """Return new messages associated with this run.
+
+        Messages from older runs are excluded.
+
+        Args:
+            output_tool_return_content: The return content of the tool call to set in the last message.
+                This provides a convenient way to modify the content of the output tool call if you want to continue
+                the conversation and want to set the response to the output tool call. If `None`, the last message will
+                not be modified.
+
+        Returns:
+            List of new messages.
+        """
+        return self._streamed_run_result.new_messages(output_tool_return_content=output_tool_return_content)
+
+    def new_messages_json(self, *, output_tool_return_content: str | None = None) -> bytes:  # pragma: no cover
+        """Return new messages from [`new_messages`][pydantic_ai.result.StreamedRunResultSync.new_messages] as JSON bytes.
+
+        Args:
+            output_tool_return_content: The return content of the tool call to set in the last message.
+                This provides a convenient way to modify the content of the output tool call if you want to continue
+                the conversation and want to set the response to the output tool call. If `None`, the last message will
+                not be modified.
+
+        Returns:
+            JSON bytes representing the new messages.
+        """
+        return self._streamed_run_result.new_messages_json(output_tool_return_content=output_tool_return_content)
+
+    def stream_output(self, *, debounce_by: float | None = 0.1) -> Iterator[OutputDataT]:
+        """Stream the output as an iterable.
+
+        The pydantic validator for structured data will be called in
+        [partial mode](https://docs.pydantic.dev/dev/concepts/experimental/#partial-validation)
+        on each iteration.
+
+        Args:
+            debounce_by: by how much (if at all) to debounce/group the output chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured outputs to reduce the overhead of
+                performing validation as each token is received.
+
+        Returns:
+            An iterable of the response data.
+        """
+        return _utils.sync_async_iterator(self._streamed_run_result.stream_output(debounce_by=debounce_by))
+
+    def stream_text(self, *, delta: bool = False, debounce_by: float | None = 0.1) -> Iterator[str]:
+        """Stream the text result as an iterable.
+
+        !!! note
+            Result validators will NOT be called on the text result if `delta=True`.
+
+        Args:
+            delta: if `True`, yield each chunk of text as it is received, if `False` (default), yield the full text
+                up to the current point.
+            debounce_by: by how much (if at all) to debounce/group the response chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured responses to reduce the overhead of
+                performing validation as each token is received.
+        """
+        return _utils.sync_async_iterator(self._streamed_run_result.stream_text(delta=delta, debounce_by=debounce_by))
+
+    def stream_responses(self, *, debounce_by: float | None = 0.1) -> Iterator[tuple[_messages.ModelResponse, bool]]:
+        """Stream the response as an iterable of Structured LLM Messages.
+
+        Args:
+            debounce_by: by how much (if at all) to debounce/group the response chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured responses to reduce the overhead of
+                performing validation as each token is received.
+
+        Returns:
+            An iterable of the structured response message and whether that is the last message.
+        """
+        return _utils.sync_async_iterator(self._streamed_run_result.stream_responses(debounce_by=debounce_by))
+
+    def get_output(self) -> OutputDataT:
+        """Stream the whole response, validate and return it."""
+        return _utils.get_event_loop().run_until_complete(self._streamed_run_result.get_output())
+
+    @property
+    def response(self) -> _messages.ModelResponse:
+        """Return the current state of the response."""
+        return self._streamed_run_result.response
+
+    def usage(self) -> RunUsage:
+        """Return the usage of the whole run.
+
+        !!! note
+            This won't return the full usage until the stream is finished.
+        """
+        return self._streamed_run_result.usage()
+
+    def timestamp(self) -> datetime:
+        """Get the timestamp of the response."""
+        return self._streamed_run_result.timestamp()
+
+    def validate_response_output(self, message: _messages.ModelResponse, *, allow_partial: bool = False) -> OutputDataT:
+        """Validate a structured result message."""
+        return _utils.get_event_loop().run_until_complete(
+            self._streamed_run_result.validate_response_output(message, allow_partial=allow_partial)
+        )
+
+    @property
+    def is_complete(self) -> bool:
+        """Whether the stream has all been received.
+
+        This is set to `True` when one of
+        [`stream_output`][pydantic_ai.result.StreamedRunResultSync.stream_output],
+        [`stream_text`][pydantic_ai.result.StreamedRunResultSync.stream_text],
+        [`stream_responses`][pydantic_ai.result.StreamedRunResultSync.stream_responses] or
+        [`get_output`][pydantic_ai.result.StreamedRunResultSync.get_output] completes.
+        """
+        return self._streamed_run_result.is_complete
+
+
 @dataclass(repr=False)
 class FinalResult(Generic[OutputDataT]):
     """Marker class storing the final output of an agent run and associated metadata."""
diff --git a/tests/test_streaming.py b/tests/test_streaming.py