Skip to content
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions agents-core/vision_agents/core/agents/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,8 @@ async def join(self, call: Call, wait_for_participant=True) -> "AgentSessionCont

# wait for conversation creation coro at the very end of the join flow
self.conversation = await create_conversation_coro
# Provide conversation to the LLM so it can access the chat history.
self.llm.set_conversation(self.conversation)

if wait_for_participant:
self.logger.info("Agent is ready, waiting for participant to join")
Expand Down
19 changes: 16 additions & 3 deletions agents-core/vision_agents/core/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import Participant
from getstream.video.rtc import AudioStreamTrack, PcmData
from vision_agents.core.processors import Processor
from vision_agents.core.utils.utils import parse_instructions
from vision_agents.core.utils.utils import Instructions, parse_instructions
from vision_agents.core.events.manager import EventManager
from .function_registry import FunctionRegistry
from .llm_types import ToolSchema, NormalizedToolCallItem
Expand All @@ -50,7 +50,6 @@ class LLM(abc.ABC):
before_response_listener: BeforeCb
after_response_listener: AfterCb
agent: Optional["Agent"]
_conversation: Optional["Conversation"]
function_registry: FunctionRegistry

def __init__(self):
Expand All @@ -59,6 +58,9 @@ def __init__(self):
self.events = EventManager()
self.events.register_events_from_module(events)
self.function_registry = FunctionRegistry()
self.instructions: Optional[str] = None
self.parsed_instructions: Optional[Instructions] = None
self._conversation: Optional[Conversation] = None

async def warmup(self) -> None:
"""
Expand Down Expand Up @@ -187,9 +189,20 @@ def _attach_agent(self, agent: Agent):
Attach agent to the llm
"""
self.agent = agent
self._conversation = agent.conversation
self._set_instructions(agent.instructions)

def set_conversation(self, conversation: Conversation):
"""
Provide the Conversation object to the LLM to access the chat history.
To be called by the Agent after it joins the call.
Args:
conversation: a Conversation object
Returns:
"""
self._conversation = conversation

def _set_instructions(self, instructions: str):
self.instructions = instructions

Expand Down
46 changes: 41 additions & 5 deletions agents-core/vision_agents/core/utils/video_utils.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,63 @@
"""Video frame utilities."""

import io

import av
from PIL.Image import Resampling


def ensure_even_dimensions(frame: av.VideoFrame) -> av.VideoFrame:
"""
Ensure frame has even dimensions for H.264 yuv420p encoding.
Crops by 1 pixel if width or height is odd.
"""
needs_width_adjust = frame.width % 2 != 0
needs_height_adjust = frame.height % 2 != 0

if not needs_width_adjust and not needs_height_adjust:
return frame

new_width = frame.width - (1 if needs_width_adjust else 0)
new_height = frame.height - (1 if needs_height_adjust else 0)

cropped = frame.reformat(width=new_width, height=new_height)
cropped.pts = frame.pts
if frame.time_base is not None:
cropped.time_base = frame.time_base

return cropped


def frame_to_jpeg_bytes(
frame: av.VideoFrame, target_width: int, target_height: int, quality: int = 85
) -> bytes:
"""
Convert a video frame to JPEG bytes with resizing.
Args:
frame: an instance of `av.VideoFrame`.
target_width: target width in pixels.
target_height: target height in pixels.
quality: JPEG quality. Default is 85.
Returns: frame as JPEG bytes.
"""
# Convert frame to a PIL image
img = frame.to_image()

# Calculate scaling to maintain aspect ratio
src_width, src_height = img.size
# Calculate scale factor (fit within target dimensions)
scale = min(target_width / src_width, target_height / src_height)
new_width = int(src_width * scale)
new_height = int(src_height * scale)

# Resize with aspect ratio maintained
resized = img.resize((new_width, new_height), Resampling.LANCZOS)

# Save as JPEG with quality control
buf = io.BytesIO()
resized.save(buf, "JPEG", quality=quality, optimize=True)
return buf.getvalue()
5 changes: 2 additions & 3 deletions plugins/anthropic/tests/test_anthropic_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class TestClaudeLLM:
async def llm(self) -> ClaudeLLM:
"""Test ClaudeLLM initialization with a provided client."""
llm = ClaudeLLM(model="claude-sonnet-4-20250514")
llm._conversation = InMemoryConversation("be friendly", [])
llm.set_conversation(InMemoryConversation("be friendly", []))
return llm

@pytest.mark.asyncio
Expand Down Expand Up @@ -58,7 +58,7 @@ async def test_native_api(self, llm: ClaudeLLM):
@pytest.mark.integration
async def test_stream(self, llm: ClaudeLLM):
streamingWorks = False

@llm.events.subscribe
async def passed(event: LLMResponseChunkEvent):
nonlocal streamingWorks
Expand All @@ -70,7 +70,6 @@ async def passed(event: LLMResponseChunkEvent):

assert streamingWorks


@pytest.mark.integration
async def test_memory(self, llm: ClaudeLLM):
await llm.simple_response(
Expand Down
2 changes: 1 addition & 1 deletion plugins/aws/tests/test_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def assert_response_successful(self, response):
async def llm(self) -> BedrockLLM:
"""Test BedrockLLM initialization with a provided client."""
llm = BedrockLLM(model="qwen.qwen3-32b-v1:0", region_name="us-east-1")
llm._conversation = InMemoryConversation("be friendly", [])
llm.set_conversation(InMemoryConversation("be friendly", []))
return llm

@pytest.mark.asyncio
Expand Down
20 changes: 10 additions & 10 deletions plugins/gemini/tests/test_gemini_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
load_dotenv()



class TestGeminiLLM:

def test_message(self):
messages = GeminiLLM._normalize_message("say hi")
assert isinstance(messages[0], Message)
Expand All @@ -32,7 +30,7 @@ def test_advanced_message(self):
@pytest.fixture
async def llm(self) -> GeminiLLM:
llm = GeminiLLM(model="gemini-2.0-flash-exp")
llm._conversation = InMemoryConversation("be friendly", [])
llm.set_conversation(InMemoryConversation("be friendly", []))
return llm

@pytest.mark.integration
Expand All @@ -51,14 +49,14 @@ async def test_native_api(self, llm: GeminiLLM):
@pytest.mark.integration
async def test_stream(self, llm: GeminiLLM):
streamingWorks = False

@llm.events.subscribe
async def passed(event: LLMResponseChunkEvent):
nonlocal streamingWorks
streamingWorks = True

await llm.simple_response("Explain magma to a 5 year old")

# Wait for all events in queue to be processed
await llm.events.wait()

Expand All @@ -67,7 +65,9 @@ async def passed(event: LLMResponseChunkEvent):
@pytest.mark.integration
async def test_memory(self, llm: GeminiLLM):
await llm.simple_response(text="There are 2 dogs in the room")
response = await llm.simple_response(text="How many paws are there in the room?")
response = await llm.simple_response(
text="How many paws are there in the room?"
)

assert "8" in response.text or "eight" in response.text

Expand All @@ -82,7 +82,7 @@ async def test_native_memory(self, llm: GeminiLLM):
@pytest.mark.integration
async def test_instruction_following(self):
llm = GeminiLLM(model="gemini-2.0-flash-exp")
llm._conversation = InMemoryConversation("be friendly", [])
llm.set_conversation(InMemoryConversation("be friendly", []))

llm._set_instructions("only reply in 2 letter country shortcuts")

Expand Down Expand Up @@ -165,11 +165,11 @@ async def handle_error_event(event: events.GeminiErrorEvent):
)
chunk_item_ids.add(chunk_event.item_id)
total_delta_text += chunk_event.delta

# Validate content_index: should be sequential (0, 1, 2, ...) or None
if chunk_event.content_index is not None:
content_indices.append(chunk_event.content_index)

# Verify content_index sequencing if any are provided
if content_indices:
# Should be sequential starting from 0
Expand Down
Loading