GetStream · dangusev · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/agents-core/vision_agents/core/agents/agents.py b/agents-core/vision_agents/core/agents/agents.py
@@ -537,6 +537,8 @@ async def join(self, call: Call, wait_for_participant=True) -> "AgentSessionCont
 
         # wait for conversation creation coro at the very end of the join flow
         self.conversation = await create_conversation_coro
+        # Provide conversation to the LLM so it can access the chat history.
+        self.llm.set_conversation(self.conversation)
 
         if wait_for_participant:
             self.logger.info("Agent is ready, waiting for participant to join")

diff --git a/agents-core/vision_agents/core/llm/llm.py b/agents-core/vision_agents/core/llm/llm.py
@@ -26,7 +26,7 @@
 from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import Participant
 from getstream.video.rtc import AudioStreamTrack, PcmData
 from vision_agents.core.processors import Processor
-from vision_agents.core.utils.utils import parse_instructions
+from vision_agents.core.utils.utils import Instructions, parse_instructions
 from vision_agents.core.events.manager import EventManager
 from .function_registry import FunctionRegistry
 from .llm_types import ToolSchema, NormalizedToolCallItem
@@ -50,7 +50,6 @@ class LLM(abc.ABC):
     before_response_listener: BeforeCb
     after_response_listener: AfterCb
     agent: Optional["Agent"]
-    _conversation: Optional["Conversation"]
     function_registry: FunctionRegistry
 
     def __init__(self):
@@ -59,6 +58,9 @@ def __init__(self):
         self.events = EventManager()
         self.events.register_events_from_module(events)
         self.function_registry = FunctionRegistry()
+        self.instructions: Optional[str] = None
+        self.parsed_instructions: Optional[Instructions] = None
+        self._conversation: Optional[Conversation] = None
 
     async def warmup(self) -> None:
         """
@@ -187,9 +189,20 @@ def _attach_agent(self, agent: Agent):
         Attach agent to the llm
         """
         self.agent = agent
-        self._conversation = agent.conversation
         self._set_instructions(agent.instructions)
 
+    def set_conversation(self, conversation: Conversation):
+        """
+        Provide the Conversation object to the LLM to access the chat history.
+        To be called by the Agent after it joins the call.
+
+        Args:
+            conversation: a Conversation object
+
+        Returns:
+        """
+        self._conversation = conversation
+
     def _set_instructions(self, instructions: str):
         self.instructions = instructions
 

diff --git a/agents-core/vision_agents/core/utils/video_utils.py b/agents-core/vision_agents/core/utils/video_utils.py
@@ -1,27 +1,63 @@
 """Video frame utilities."""
 
+import io
+
 import av
+from PIL.Image import Resampling
 
 
 def ensure_even_dimensions(frame: av.VideoFrame) -> av.VideoFrame:
     """
     Ensure frame has even dimensions for H.264 yuv420p encoding.
-    
+
     Crops by 1 pixel if width or height is odd.
     """
     needs_width_adjust = frame.width % 2 != 0
     needs_height_adjust = frame.height % 2 != 0
-    
+
     if not needs_width_adjust and not needs_height_adjust:
         return frame
-    
+
     new_width = frame.width - (1 if needs_width_adjust else 0)
     new_height = frame.height - (1 if needs_height_adjust else 0)
-    
+
     cropped = frame.reformat(width=new_width, height=new_height)
     cropped.pts = frame.pts
     if frame.time_base is not None:
         cropped.time_base = frame.time_base
-    
+
     return cropped
 
+
+def frame_to_jpeg_bytes(
+    frame: av.VideoFrame, target_width: int, target_height: int, quality: int = 85
+) -> bytes:
+    """
+    Convert a video frame to JPEG bytes with resizing.
+
+    Args:
+        frame: an instance of `av.VideoFrame`.
+        target_width: target width in pixels.
+        target_height: target height in pixels.
+        quality: JPEG quality. Default is 85.
+
+    Returns: frame as JPEG bytes.
+
+    """
+    # Convert frame to a PIL image
+    img = frame.to_image()
+
+    # Calculate scaling to maintain aspect ratio
+    src_width, src_height = img.size
+    # Calculate scale factor (fit within target dimensions)
+    scale = min(target_width / src_width, target_height / src_height)
+    new_width = int(src_width * scale)
+    new_height = int(src_height * scale)
+
+    # Resize with aspect ratio maintained
+    resized = img.resize((new_width, new_height), Resampling.LANCZOS)
+
+    # Save as JPEG with quality control
+    buf = io.BytesIO()
+    resized.save(buf, "JPEG", quality=quality, optimize=True)
+    return buf.getvalue()
diff --git a/plugins/anthropic/tests/test_anthropic_llm.py b/plugins/anthropic/tests/test_anthropic_llm.py
@@ -18,7 +18,7 @@ class TestClaudeLLM:
     async def llm(self) -> ClaudeLLM:
         """Test ClaudeLLM initialization with a provided client."""
         llm = ClaudeLLM(model="claude-sonnet-4-20250514")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.asyncio
@@ -58,7 +58,7 @@ async def test_native_api(self, llm: ClaudeLLM):
     @pytest.mark.integration
     async def test_stream(self, llm: ClaudeLLM):
         streamingWorks = False
-        
+
         @llm.events.subscribe
         async def passed(event: LLMResponseChunkEvent):
             nonlocal streamingWorks
@@ -70,7 +70,6 @@ async def passed(event: LLMResponseChunkEvent):
 
         assert streamingWorks
 
-
     @pytest.mark.integration
     async def test_memory(self, llm: ClaudeLLM):
         await llm.simple_response(

diff --git a/plugins/aws/tests/test_aws.py b/plugins/aws/tests/test_aws.py
@@ -35,7 +35,7 @@ def assert_response_successful(self, response):
     async def llm(self) -> BedrockLLM:
         """Test BedrockLLM initialization with a provided client."""
         llm = BedrockLLM(model="qwen.qwen3-32b-v1:0", region_name="us-east-1")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.asyncio

diff --git a/plugins/gemini/tests/test_gemini_llm.py b/plugins/gemini/tests/test_gemini_llm.py
@@ -14,9 +14,7 @@
 load_dotenv()
 
 
-
 class TestGeminiLLM:
-
     def test_message(self):
         messages = GeminiLLM._normalize_message("say hi")
         assert isinstance(messages[0], Message)
@@ -32,7 +30,7 @@ def test_advanced_message(self):
     @pytest.fixture
     async def llm(self) -> GeminiLLM:
         llm = GeminiLLM(model="gemini-2.0-flash-exp")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.integration
@@ -51,14 +49,14 @@ async def test_native_api(self, llm: GeminiLLM):
     @pytest.mark.integration
     async def test_stream(self, llm: GeminiLLM):
         streamingWorks = False
-        
+
         @llm.events.subscribe
         async def passed(event: LLMResponseChunkEvent):
             nonlocal streamingWorks
             streamingWorks = True
-        
+
         await llm.simple_response("Explain magma to a 5 year old")
-        
+
         # Wait for all events in queue to be processed
         await llm.events.wait()
 
@@ -67,7 +65,9 @@ async def passed(event: LLMResponseChunkEvent):
     @pytest.mark.integration
     async def test_memory(self, llm: GeminiLLM):
         await llm.simple_response(text="There are 2 dogs in the room")
-        response = await llm.simple_response(text="How many paws are there in the room?")
+        response = await llm.simple_response(
+            text="How many paws are there in the room?"
+        )
 
         assert "8" in response.text or "eight" in response.text
 
@@ -82,7 +82,7 @@ async def test_native_memory(self, llm: GeminiLLM):
     @pytest.mark.integration
     async def test_instruction_following(self):
         llm = GeminiLLM(model="gemini-2.0-flash-exp")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
 
         llm._set_instructions("only reply in 2 letter country shortcuts")
 
@@ -165,11 +165,11 @@ async def handle_error_event(event: events.GeminiErrorEvent):
             )
             chunk_item_ids.add(chunk_event.item_id)
             total_delta_text += chunk_event.delta
-            
+
             # Validate content_index: should be sequential (0, 1, 2, ...) or None
             if chunk_event.content_index is not None:
                 content_indices.append(chunk_event.content_index)
-        
+
         # Verify content_index sequencing if any are provided
         if content_indices:
             # Should be sequential starting from 0