GetStream · Nash0x7E2 · Nov 7, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
diff --git a/plugins/moondream/example/README.md b/plugins/moondream/example/README.md
diff --git a/plugins/moondream/example/__init__.py b/plugins/moondream/example/__init__.py
diff --git a/plugins/moondream/example/moondream_vlm_example.py b/plugins/moondream/example/moondream_vlm_example.py
@@ -0,0 +1,48 @@
+import asyncio
+from uuid import uuid4
+from dotenv import load_dotenv
+
+from vision_agents.core import User, Agent
+from vision_agents.plugins import deepgram, getstream, vogent, elevenlabs, moondream
+from vision_agents.core.events import CallSessionParticipantJoinedEvent
+import os
+
+load_dotenv()
+
+async def start_agent() -> None:
+    llm = moondream.CloudVLM(
+        api_key=os.getenv("MOONDREAM_API_KEY"),
+        conf_threshold=0.3,
+    )
+    # create an agent to run with Stream's edge, openAI llm
+    agent = Agent(
+        edge=getstream.Edge(),  # low latency edge. clients for React, iOS, Android, RN, Flutter etc.
+        agent_user=User(
+            name="My happy AI friend", id="agent"
+        ),
+        llm=llm,
+        tts=elevenlabs.TTS(),
+        stt=deepgram.STT(),
+        turn_detection=vogent.TurnDetection(),
+    )
+
+    # Create a call
+    call = agent.edge.client.video.call("default", str(uuid4()))
+
+    @agent.events.subscribe
+    async def on_participant_joined(event: CallSessionParticipantJoinedEvent):
+        if event.participant.user.id != "agent":
+            await asyncio.sleep(2)
+            await agent.simple_response("Describe what you currently see")
+
+
+    # Have the agent join the call/room
+    with await agent.join(call):
+        # Open the demo UI
+        await agent.edge.open_demo(call)
+        # run till the call ends
+        await agent.finish()
+
+if __name__ == "__main__":
+    # setup_telemetry()
+    asyncio.run(start_agent())
diff --git a/plugins/moondream/example/pyproject.toml b/plugins/moondream/example/pyproject.toml
@@ -0,0 +1,22 @@
+[project]
+name = "moondream-example"
+version = "0.1.0"
+description = "Example using Moondream Detect and VLM with Vision Agents"
+requires-python = ">=3.10"
+dependencies = [
+    "vision-agents",
+    "vision-agents-plugins-moondream",
+    "vision-agents-plugins-getstream",
+    "vision-agents-plugins-deepgram",
+    "vision-agents-plugins-elevenlabs",
+    "vision-agents-plugins-vogent",
+    "python-dotenv",
+]
+
+[tool.uv.sources]
+vision-agents = { workspace = true }
+vision-agents-plugins-moondream = { workspace = true }
+vision-agents-plugins-getstream = { workspace = true }
+vision-agents-plugins-deepgram = { workspace = true }
+vision-agents-plugins-elevenlabs = { workspace = true }
+vision-agents-plugins-vogent = { workspace = true }
diff --git a/plugins/moondream/tests/test_moondream_vlm.py b/plugins/moondream/tests/test_moondream_vlm.py
@@ -0,0 +1,105 @@
+"""
+Tests for the Moondream CloudVLM plugin.
+
+Integration tests require MOONDREAM_API_KEY environment variable:
+
+    export MOONDREAM_API_KEY="your-key-here"
+    uv run pytest plugins/moondream/tests/test_moondream_vlm.py -m integration -v
+
+To run only unit tests (no API key needed):
+
+    uv run pytest plugins/moondream/tests/test_moondream_vlm.py -m "not integration" -v
+"""
+import os
+from pathlib import Path
+from typing import Iterator
+
+import pytest
+import av
+from PIL import Image
+
+from vision_agents.plugins.moondream import CloudVLM
+
+
+@pytest.fixture(scope="session")
+def golf_image(assets_dir) -> Iterator[Image.Image]:
+    """Load the local golf swing test image from tests/test_assets."""
+    asset_path = Path(assets_dir) / "golf_swing.png"
+    with Image.open(asset_path) as img:
+        yield img.convert("RGB")
+
+
+@pytest.fixture
+def golf_frame(golf_image: Image.Image) -> av.VideoFrame:
+    """Create an av.VideoFrame from the golf image."""
+    return av.VideoFrame.from_image(golf_image)
+
+
+@pytest.fixture
+async def vlm_vqa() -> CloudVLM:
+    """Create CloudVLM in VQA mode."""
+    api_key = os.getenv("MOONDREAM_API_KEY")
+    if not api_key:
+        pytest.skip("MOONDREAM_API_KEY not set")
+
+    vlm = CloudVLM(api_key=api_key, mode="vqa")
+    try:
+        yield vlm
+    finally:
+        vlm.close()
+
+
+@pytest.fixture
+async def vlm_caption() -> CloudVLM:
+    """Create CloudVLM in caption mode."""
+    api_key = os.getenv("MOONDREAM_API_KEY")
+    if not api_key:
+        pytest.skip("MOONDREAM_API_KEY not set")
+
+    vlm = CloudVLM(api_key=api_key, mode="caption")
+    try:
+        yield vlm
+    finally:
+        vlm.close()
+
+
+@pytest.mark.integration
+@pytest.mark.skipif(not os.getenv("MOONDREAM_API_KEY"), reason="MOONDREAM_API_KEY not set")
+async def test_vqa_mode(golf_frame: av.VideoFrame, vlm_vqa: CloudVLM):
+    """Test VQA mode with a question about the image."""
+    # Set the latest frame so _process_frame can access it
+    vlm_vqa._latest_frame = golf_frame
+
+    # Ask a question about the image
+    question = "What sport is being played in this image?"
+    response = await vlm_vqa.simple_response(question)
+
+    # Verify we got a response
+    assert response is not None
+    assert response.text is not None
+    assert len(response.text) > 0
+    assert response.exception is None
+
+    # Verify the response mentions golf (should be in the image)
+    assert "golf" in response.text.lower()
+
+
+@pytest.mark.integration
+@pytest.mark.skipif(not os.getenv("MOONDREAM_API_KEY"), reason="MOONDREAM_API_KEY not set")
+async def test_caption_mode(golf_frame: av.VideoFrame, vlm_caption: CloudVLM):
+    """Test caption mode to generate a description of the image."""
+    # Set the latest frame so _process_frame can access it
+    vlm_caption._latest_frame = golf_frame
+
+    # Generate caption (text is not needed for caption mode)
+    response = await vlm_caption.simple_response("")
+
+    # Verify we got a response
+    assert response is not None
+    assert response.text is not None
+    assert len(response.text) > 0
+    assert response.exception is None
+
+    # Verify the caption is descriptive (not empty)
+    assert len(response.text.strip()) > 0
+
diff --git a/plugins/moondream/vision_agents/plugins/moondream/__init__.py b/plugins/moondream/vision_agents/plugins/moondream/__init__.py
@@ -15,10 +15,15 @@
     MoondreamVideoTrack,
 )
 
+from .moondream_vlm import (
+    CloudVLM,
+)
+
 __path__ = __import__("pkgutil").extend_path(__path__, __name__)
 
 __all__ = [
     "CloudDetectionProcessor",
+    "CloudVLM",
     "LocalDetectionProcessor",
     "MoondreamVideoTrack",
 ]