strands-agents · pgrayy · Jul 3, 2025 · Jun 27, 2025 · Jun 30, 2025 · Jun 30, 2025
diff --git a/src/strands/agent/agent.py b/src/strands/agent/agent.py
@@ -9,12 +9,13 @@
 2. Method-style for direct tool access: `agent.tool.tool_name(param1="value")`
 """
 
+import asyncio
 import json
 import logging
 import os
 import random
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, AsyncIterator, Callable, Generator, Mapping, Optional, Type, TypeVar, Union, cast
+from typing import Any, AsyncGenerator, AsyncIterator, Callable, Mapping, Optional, Type, TypeVar, Union, cast
 
 from opentelemetry import trace
 from pydantic import BaseModel
@@ -381,21 +382,22 @@ def __call__(self, prompt: str, **kwargs: Any) -> AgentResult:
                 - metrics: Performance metrics from the event loop
                 - state: The final state of the event loop
         """
-        callback_handler = kwargs.get("callback_handler", self.callback_handler)
 
-        self._start_agent_trace_span(prompt)
+        async def acall() -> AgentResult:
+            callback_handler = kwargs.get("callback_handler", self.callback_handler)
 
-        try:
             events = self._run_loop(callback_handler, prompt, kwargs)
-            for event in events:
+            async for event in events:
                 if "callback" in event:
                     callback_handler(**event["callback"])
 
-            stop_reason, message, metrics, state = event["stop"]
-            result = AgentResult(stop_reason, message, metrics, state)
+            return AgentResult(*event["stop"])
 
-            self._end_agent_trace_span(response=result)
+        self._start_agent_trace_span(prompt)
 
+        try:
+            result = asyncio.run(acall())
+            self._end_agent_trace_span(response=result)
             return result
 
         except Exception as e:
@@ -417,6 +419,15 @@ def structured_output(self, output_model: Type[T], prompt: Optional[str] = None)
                 that the agent will use when responding.
             prompt: The prompt to use for the agent.
         """
+
+        async def acall(messages: Messages) -> T:
+            events = self.model.structured_output(output_model, messages)
+            async for event in events:
+                if "callback" in event:
+                    self.callback_handler(**cast(dict, event["callback"]))
+
+            return event["output"]
+
         messages = self.messages
         if not messages and not prompt:
             raise ValueError("No conversation history or prompt provided")
@@ -425,13 +436,7 @@ def structured_output(self, output_model: Type[T], prompt: Optional[str] = None)
         if prompt:
             messages.append({"role": "user", "content": [{"text": prompt}]})
 
-        # get the structured output from the model
-        events = self.model.structured_output(output_model, messages)
-        for event in events:
-            if "callback" in event:
-                self.callback_handler(**cast(dict, event["callback"]))
-
-        return event["output"]
+        return asyncio.run(acall(messages))
 
     async def stream_async(self, prompt: str, **kwargs: Any) -> AsyncIterator[Any]:
         """Process a natural language prompt and yield events as an async iterator.
@@ -469,23 +474,21 @@ async def stream_async(self, prompt: str, **kwargs: Any) -> AsyncIterator[Any]:
 
         try:
             events = self._run_loop(callback_handler, prompt, kwargs)
-            for event in events:
+            async for event in events:
                 if "callback" in event:
                     callback_handler(**event["callback"])
                     yield event["callback"]
 
-            stop_reason, message, metrics, state = event["stop"]
-            result = AgentResult(stop_reason, message, metrics, state)
-
+            result = AgentResult(*event["stop"])
             self._end_agent_trace_span(response=result)
 
         except Exception as e:
             self._end_agent_trace_span(error=e)
             raise
 
-    def _run_loop(
+    async def _run_loop(
         self, callback_handler: Callable[..., Any], prompt: str, kwargs: dict[str, Any]
-    ) -> Generator[dict[str, Any], None, None]:
+    ) -> AsyncGenerator[dict[str, Any], None]:
         """Execute the agent's event loop with the given prompt and parameters."""
         try:
             # Extract key parameters
@@ -497,14 +500,16 @@ def _run_loop(
             self.messages.append(new_message)
 
             # Execute the event loop cycle with retry logic for context limits
-            yield from self._execute_event_loop_cycle(callback_handler, kwargs)
+            events = self._execute_event_loop_cycle(callback_handler, kwargs)
+            async for event in events:
+                yield event
 
         finally:
             self.conversation_manager.apply_management(self)
 
-    def _execute_event_loop_cycle(
+    async def _execute_event_loop_cycle(
         self, callback_handler: Callable[..., Any], kwargs: dict[str, Any]
-    ) -> Generator[dict[str, Any], None, None]:
+    ) -> AsyncGenerator[dict[str, Any], None]:
         """Execute the event loop cycle with retry logic for context window limits.
 
         This internal method handles the execution of the event loop cycle and implements
@@ -527,7 +532,7 @@ def _execute_event_loop_cycle(
 
         try:
             # Execute the main event loop cycle
-            yield from event_loop_cycle(
+            events = event_loop_cycle(
                 model=model,
                 system_prompt=system_prompt,
                 messages=messages,  # will be modified by event_loop_cycle
@@ -540,11 +545,15 @@ def _execute_event_loop_cycle(
                 event_loop_parent_span=self.trace_span,
                 **kwargs,
             )
+            async for event in events:
+                yield event
 
         except ContextWindowOverflowException as e:
             # Try reducing the context size and retrying
             self.conversation_manager.reduce_context(self, e=e)
-            yield from self._execute_event_loop_cycle(callback_handler_override, kwargs)
+            events = self._execute_event_loop_cycle(callback_handler_override, kwargs)
+            async for event in events:
+                yield event
 
     def _record_tool_execution(
         self,

diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py
@@ -12,7 +12,7 @@
 import time
 import uuid
 from functools import partial
-from typing import Any, Callable, Generator, Optional, cast
+from typing import Any, AsyncGenerator, Callable, Optional, cast
 
 from ..telemetry.metrics import EventLoopMetrics, Trace
 from ..telemetry.tracer import get_tracer
@@ -33,7 +33,7 @@
 MAX_DELAY = 240  # 4 minutes
 
 
-def event_loop_cycle(
+async def event_loop_cycle(
     model: Model,
     system_prompt: Optional[str],
     messages: Messages,
@@ -42,7 +42,7 @@ def event_loop_cycle(
     tool_handler: Optional[ToolHandler],
     tool_execution_handler: Optional[ParallelToolExecutorInterface] = None,
     **kwargs: Any,
-) -> Generator[dict[str, Any], None, None]:
+) -> AsyncGenerator[dict[str, Any], None]:
     """Execute a single cycle of the event loop.
 
     This core function processes a single conversation turn, handling model inference, tool execution, and error
@@ -132,7 +132,7 @@ def event_loop_cycle(
         try:
             # TODO: To maintain backwards compatability, we need to combine the stream event with kwargs before yielding
             #       to the callback handler. This will be revisited when migrating to strongly typed events.
-            for event in stream_messages(model, system_prompt, messages, tool_config):
+            async for event in stream_messages(model, system_prompt, messages, tool_config):
                 if "callback" in event:
                     yield {"callback": {**event["callback"], **(kwargs if "delta" in event["callback"] else {})}}
 
@@ -202,7 +202,7 @@ def event_loop_cycle(
                 )
 
             # Handle tool execution
-            yield from _handle_tool_execution(
+            events = _handle_tool_execution(
                 stop_reason,
                 message,
                 model,
@@ -218,6 +218,9 @@ def event_loop_cycle(
                 cycle_start_time,
                 kwargs,
             )
+            async for event in events:
+                yield event
+
             return
 
         # End the cycle and return results
@@ -250,9 +253,9 @@ def event_loop_cycle(
     yield {"stop": (stop_reason, message, event_loop_metrics, kwargs["request_state"])}
 
 
-def recurse_event_loop(
+async def recurse_event_loop(
     **kwargs: Any,
-) -> Generator[dict[str, Any], None, None]:
+) -> AsyncGenerator[dict[str, Any], None]:
     """Make a recursive call to event_loop_cycle with the current state.
 
     This function is used when the event loop needs to continue processing after tool execution.
@@ -284,12 +287,14 @@ def recurse_event_loop(
     cycle_trace.add_child(recursive_trace)
 
     yield {"callback": {"start": True}}
-    yield from event_loop_cycle(**kwargs)
+    events = event_loop_cycle(**kwargs)
+    async for event in events:
+        yield event
 
     recursive_trace.end()
 
 
-def _handle_tool_execution(
+async def _handle_tool_execution(
     stop_reason: StopReason,
     message: Message,
     model: Model,
@@ -304,7 +309,7 @@ def _handle_tool_execution(
     cycle_span: Any,
     cycle_start_time: float,
     kwargs: dict[str, Any],
-) -> Generator[dict[str, Any], None, None]:
+) -> AsyncGenerator[dict[str, Any], None]:
     tool_uses: list[ToolUse] = []
     tool_results: list[ToolResult] = []
     invalid_tool_use_ids: list[str] = []
@@ -385,7 +390,7 @@ def _handle_tool_execution(
         yield {"stop": (stop_reason, message, event_loop_metrics, kwargs["request_state"])}
         return
 
-    yield from recurse_event_loop(
+    events = recurse_event_loop(
         model=model,
         system_prompt=system_prompt,
         messages=messages,
@@ -394,3 +399,5 @@ def _handle_tool_execution(
         tool_handler=tool_handler,
         **kwargs,
     )
+    async for event in events:
+        yield event
diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py
@@ -2,7 +2,7 @@
 
 import json
 import logging
-from typing import Any, Generator, Iterable, Optional
+from typing import Any, AsyncGenerator, AsyncIterable, Optional
 
 from ..types.content import ContentBlock, Message, Messages
 from ..types.models import Model
@@ -251,10 +251,10 @@ def extract_usage_metrics(event: MetadataEvent) -> tuple[Usage, Metrics]:
     return usage, metrics
 
 
-def process_stream(
-    chunks: Iterable[StreamEvent],
+async def process_stream(
+    chunks: AsyncIterable[StreamEvent],
     messages: Messages,
-) -> Generator[dict[str, Any], None, None]:
+) -> AsyncGenerator[dict[str, Any], None]:
     """Processes the response stream from the API, constructing the final message and extracting usage metrics.
 
     Args:
@@ -278,7 +278,7 @@ def process_stream(
     usage: Usage = Usage(inputTokens=0, outputTokens=0, totalTokens=0)
     metrics: Metrics = Metrics(latencyMs=0)
 
-    for chunk in chunks:
+    async for chunk in chunks:
         yield {"callback": {"event": chunk}}
 
         if "messageStart" in chunk:
@@ -300,12 +300,12 @@ def process_stream(
     yield {"stop": (stop_reason, state["message"], usage, metrics)}
 
 
-def stream_messages(
+async def stream_messages(
     model: Model,
     system_prompt: Optional[str],
     messages: Messages,
     tool_config: Optional[ToolConfig],
-) -> Generator[dict[str, Any], None, None]:
+) -> AsyncGenerator[dict[str, Any], None]:
     """Streams messages to the model and processes the response.
 
     Args:
@@ -323,4 +323,5 @@ def stream_messages(
     tool_specs = [tool["toolSpec"] for tool in tool_config.get("tools", [])] or None if tool_config else None
 
     chunks = model.converse(messages, tool_specs, system_prompt)
-    yield from process_stream(chunks, messages)
+    async for event in process_stream(chunks, messages):
+        yield event
diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py
@@ -7,7 +7,7 @@
 import json
 import logging
 import mimetypes
-from typing import Any, Generator, Iterable, Optional, Type, TypedDict, TypeVar, Union, cast
+from typing import Any, AsyncGenerator, Optional, Type, TypedDict, TypeVar, Union, cast
 
 import anthropic
 from pydantic import BaseModel
@@ -344,7 +344,7 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
                 raise RuntimeError(f"event_type=<{event['type']} | unknown type")
 
     @override
-    def stream(self, request: dict[str, Any]) -> Iterable[dict[str, Any]]:
+    async def stream(self, request: dict[str, Any]) -> AsyncGenerator[dict[str, Any], None]:
         """Send the request to the Anthropic model and get the streaming response.
 
         Args:
@@ -376,9 +376,9 @@ def stream(self, request: dict[str, Any]) -> Iterable[dict[str, Any]]:
             raise error
 
     @override
-    def structured_output(
+    async def structured_output(
         self, output_model: Type[T], prompt: Messages
-    ) -> Generator[dict[str, Union[T, Any]], None, None]:
+    ) -> AsyncGenerator[dict[str, Union[T, Any]], None]:
         """Get structured output from the model.
 
         Args:
@@ -391,7 +391,7 @@ def structured_output(
         tool_spec = convert_pydantic_to_tool_spec(output_model)
 
         response = self.converse(messages=prompt, tool_specs=[tool_spec])
-        for event in process_stream(response, prompt):
+        async for event in process_stream(response, prompt):
             yield event
 
         stop_reason, messages, _, _ = event["stop"]