diff --git a/agents-core/pyproject.toml b/agents-core/pyproject.toml index 1ecde9e6..d24e8e16 100644 --- a/agents-core/pyproject.toml +++ b/agents-core/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.10" dependencies = [ "getstream[webrtc,telemetry]>=2.5.7", "python-dotenv>=1.1.1", - "pillow>=11.3.0", + "pillow>=10.4.0", # Compatible with moondream SDK (<11.0.0) "numpy>=1.24.0", "mcp>=1.16.0", "colorlog>=6.10.1", diff --git a/plugins/moondream/README.md b/plugins/moondream/README.md new file mode 100644 index 00000000..7457f6f4 --- /dev/null +++ b/plugins/moondream/README.md @@ -0,0 +1,172 @@ +# Moondream Plugin + +This plugin provides Moondream 3 detection capabilities for vision-agents, enabling real-time zero-shot object detection on video streams. Choose between cloud-hosted or local processing depending on your needs. + +## Installation + +```bash +uv add vision-agents-plugins-moondream +``` + +## Choosing the Right Processor + +### CloudDetectionProcessor (Recommended for Most Users) +- **Use when:** You want a simple setup with no infrastructure management +- **Pros:** No model download, no GPU required, automatic updates +- **Cons:** Requires API key, 2 RPS rate limit by default (can be increased) +- **Best for:** Development, testing, low-to-medium volume applications + +### LocalDetectionProcessor (For Advanced Users) +- **Use when:** You need higher throughput, have your own GPU infrastructure, or want to avoid rate limits +- **Pros:** No rate limits, no API costs, full control over hardware +- **Cons:** Requires GPU for best performance, model download on first use, infrastructure management +- **Best for:** Production deployments, high-volume applications, Digital Ocean Gradient AI GPUs, or custom infrastructure + +## Quick Start + +### Using CloudDetectionProcessor (Hosted) + +The `CloudDetectionProcessor` uses Moondream's hosted API. By default it has a 2 RPS (requests per second) rate limit and requires an API key. The rate limit can be adjusted by contacting the Moondream team to request a higher limit. + +```python +from vision_agents.plugins import moondream +from vision_agents.core import Agent + +# Create a cloud processor with detection +processor = moondream.CloudDetectionProcessor( + api_key="your-api-key", # or set MOONDREAM_API_KEY env var + detect_objects="person", # or ["person", "car", "dog"] for multiple + fps=30 +) + +# Use in an agent +agent = Agent( + processors=[processor], + llm=your_llm, + # ... other components +) +``` + +### Using LocalDetectionProcessor (On-Device) + +If you are running on your own infrastructure or using a service like Digital Ocean's Gradient AI GPUs, you can use the `LocalDetectionProcessor` which downloads the model from HuggingFace and runs on device. By default it will use CUDA for best performance. Performance will vary depending on your specific hardware configuration. + +**Note:** The moondream3-preview model is gated and requires HuggingFace authentication: +- Request access at https://huggingface.co/moondream/moondream3-preview +- Set `HF_TOKEN` environment variable: `export HF_TOKEN=your_token_here` +- Or run: `huggingface-cli login` + +```python +from vision_agents.plugins import moondream +from vision_agents.core import Agent + +# Create a local processor (no API key needed) +processor = moondream.LocalDetectionProcessor( + detect_objects=["person", "car", "dog"], + conf_threshold=0.3, + device="cuda", # Auto-detects CUDA, MPS, or CPU + fps=30 +) + +# Use in an agent +agent = Agent( + processors=[processor], + llm=your_llm, + # ... other components +) +``` + +### Detect Multiple Objects + +```python +# Detect multiple object types with zero-shot detection +processor = moondream.CloudDetectionProcessor( + api_key="your-api-key", + detect_objects=["person", "car", "dog", "basketball"], + conf_threshold=0.3 +) + +# Access results for LLM +state = processor.state() +print(state["detections_summary"]) # "Detected: 2 persons, 1 car" +print(state["detections_count"]) # Total number of detections +print(state["last_image"]) # PIL Image for vision models +``` + +## Configuration + +### CloudDetectionProcessor Parameters + +- `api_key`: str - API key for Moondream Cloud API. If not provided, will attempt to read from `MOONDREAM_API_KEY` environment variable. +- `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"` +- `conf_threshold`: float - Confidence threshold for detections (default: 0.3) +- `fps`: int - Frame processing rate (default: 30) +- `interval`: int - Processing interval in seconds (default: 0) +- `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10) + +**Rate Limits:** By default, the Moondream Cloud API has a 2rps (requests per second) rate limit. Contact the Moondream team to request a higher limit. + +### LocalDetectionProcessor Parameters + +- `detect_objects`: str | List[str] - Object(s) to detect using zero-shot detection. Can be any object name like "person", "car", "basketball". Default: `"person"` +- `conf_threshold`: float - Confidence threshold for detections (default: 0.3) +- `fps`: int - Frame processing rate (default: 30) +- `interval`: int - Processing interval in seconds (default: 0) +- `max_workers`: int - Thread pool size for CPU-intensive operations (default: 10) +- `device`: str - Device to run inference on ('cuda', 'mps', or 'cpu'). Auto-detects CUDA, then MPS (Apple Silicon), then defaults to CPU. Default: `None` (auto-detect) +- `model_name`: str - Hugging Face model identifier (default: "moondream/moondream3-preview") +- `options`: AgentOptions - Model directory configuration. If not provided, uses default which defaults to tempfile.gettempdir() + +**Performance:** Performance will vary depending on your hardware configuration. CUDA is recommended for best performance on NVIDIA GPUs. The model will be downloaded from HuggingFace on first use. + +## Video Publishing + +The processor publishes annotated video frames with bounding boxes drawn on detected objects: + +```python +processor = moondream.CloudDetectionProcessor( + api_key="your-api-key", + detect_objects=["person", "car"] +) + +# The track will show: +# - Green bounding boxes around detected objects +# - Labels with confidence scores +# - Real-time annotation overlay +``` + +## Testing + +The plugin includes comprehensive tests: + +```bash +# Run all tests +pytest plugins/moondream/tests/ -v + +# Run specific test categories +pytest plugins/moondream/tests/ -k "inference" -v +pytest plugins/moondream/tests/ -k "annotation" -v +pytest plugins/moondream/tests/ -k "state" -v +``` + +## Dependencies + +### Required +- `vision-agents` - Core framework +- `moondream` - Moondream SDK for cloud API (CloudDetectionProcessor only) +- `numpy>=2.0.0` - Array operations +- `pillow>=10.0.0` - Image processing +- `opencv-python>=4.8.0` - Video annotation +- `aiortc` - WebRTC support + +### LocalDetectionProcessor Additional Dependencies +- `torch` - PyTorch for model inference +- `transformers` - HuggingFace transformers library for model loading + +## Links + +- [Moondream Documentation](https://docs.moondream.ai/) +- [Vision Agents Documentation](https://visionagents.ai/) +- [GitHub Repository](https://github.com/GetStream/Vision-Agents) + + diff --git a/plugins/moondream/py.typed b/plugins/moondream/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/plugins/moondream/pyproject.toml b/plugins/moondream/pyproject.toml new file mode 100644 index 00000000..da6f04a3 --- /dev/null +++ b/plugins/moondream/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "vision-agents-plugins-moondream" +dynamic = ["version"] +description = "Moondream 3 vision processor plugin for Vision Agents" +readme = "README.md" +requires-python = ">=3.10" +license = "MIT" +dependencies = [ + "vision-agents", + "numpy>=2.0.0", + "pillow>=10.4.0", + "opencv-python>=4.8.0", + "moondream>=0.1.1", # Now compatible with vision-agents pillow>=10.4.0 + "transformers>=4.40.0", # For local model loading + "torch>=2.0.0", # PyTorch for model inference + "accelerate>=0.20.0", # Required for device_map and device management +] + +[project.urls] +Documentation = "https://visionagents.ai/" +Website = "https://visionagents.ai/" +Source = "https://github.com/GetStream/Vision-Agents" + +[tool.hatch.version] +source = "vcs" +raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" } + +[tool.hatch.build.targets.wheel] +packages = [".", "vision_agents"] + +[tool.uv.sources] +vision-agents = { workspace = true } + +[dependency-groups] +dev = [ + "pytest>=8.4.1", + "pytest-asyncio>=1.0.0", +] + diff --git a/plugins/moondream/tests/test_moondream.py b/plugins/moondream/tests/test_moondream.py new file mode 100644 index 00000000..55b66ae8 --- /dev/null +++ b/plugins/moondream/tests/test_moondream.py @@ -0,0 +1,466 @@ +""" +Moondream processor tests. + +Unit tests run without API keys. +Integration tests require MOONDREAM_API_KEY environment variable: + + export MOONDREAM_API_KEY="your-key-here" + uv run pytest plugins/moondream/tests/ -m integration -v + +To run only unit tests (no API key needed): + + uv run pytest plugins/moondream/tests/ -m "not integration" -v +""" +import os +import pytest +import av +import numpy as np +from typing import Dict, Any +from PIL import Image + +from vision_agents.plugins.moondream import ( + CloudDetectionProcessor, + MoondreamVideoTrack, +) +from vision_agents.plugins.moondream.moondream_utils import annotate_detections + + +@pytest.fixture +def sample_image(): + """Test image fixture for Moondream testing.""" + return Image.new("RGB", (640, 480), color="blue") + + +@pytest.fixture +def sample_frame(sample_image): + """Test av.VideoFrame fixture.""" + return av.VideoFrame.from_image(sample_image) + + +def test_processor_initialization(): + """Test that processor can be initialized with basic config.""" + processor = CloudDetectionProcessor(api_key="test_key") + assert processor is not None + processor.close() + + +@pytest.mark.asyncio +async def test_video_track_frame_queuing(sample_frame): + """Test that video track can queue and receive frames.""" + track = MoondreamVideoTrack() + await track.add_frame(sample_frame) + received_frame = await track.recv() + assert received_frame is not None + assert received_frame.width == 640 + assert received_frame.height == 480 + track.stop() + + +def test_processor_publishes_track(): + """Test that processor publishes a MoondreamVideoTrack.""" + processor = CloudDetectionProcessor(api_key="test_key") + track = processor.publish_video_track() + assert isinstance(track, MoondreamVideoTrack) + processor.close() + + +@pytest.mark.asyncio +async def test_cloud_inference_structure(sample_image): + """Test that cloud inference returns proper structure.""" + processor = CloudDetectionProcessor(api_key="test_key") + + # Mock the SDK detection call + def mock_detection_sync(image): + return [{"label": "test", "bbox": [0.1, 0.1, 0.5, 0.5], "confidence": 0.9}] + + processor._run_detection_sync = mock_detection_sync + + frame_array = np.array(sample_image) + result = await processor._run_inference(frame_array) + + assert isinstance(result, dict) + assert "detections" in result + processor.close() + + +@pytest.mark.asyncio +async def test_run_inference(sample_image): + """Test that run_inference works correctly.""" + frame_array = np.array(sample_image) + + # Mock SDK detection + def mock_detection_sync(image): + return [] + + # Test inference + processor = CloudDetectionProcessor(api_key="test_key") + processor._run_detection_sync = mock_detection_sync + result = await processor._run_inference(frame_array) + assert isinstance(result, dict) + processor.close() + + +def test_annotate_detections_with_normalized_coords(sample_image): + """Test annotation with normalized coordinates.""" + processor = CloudDetectionProcessor(api_key="test_key") + + frame_array = np.array(sample_image) + + # Mock detection results with normalized coordinates + mock_results = { + "detections": [ + {"bbox": [0.1, 0.1, 0.5, 0.5], "label": "person", "confidence": 0.95} + ] + } + + # Call annotate_detections directly with styling parameters + annotated = annotate_detections( + frame_array, + mock_results, + font=processor._font, + font_scale=processor._font_scale, + font_thickness=processor._font_thickness, + bbox_color=processor._bbox_color, + text_color=processor._text_color, + ) + + # Verify frame was modified + assert not np.array_equal(frame_array, annotated) + assert annotated.shape == frame_array.shape + processor.close() + + +def test_annotate_detections_with_pixel_coords(sample_image): + """Test annotation with pixel coordinates.""" + processor = CloudDetectionProcessor(api_key="test_key") + + frame_array = np.array(sample_image) + + # Mock detection results with pixel coordinates + mock_results = { + "detections": [ + {"bbox": [10, 10, 100, 100], "label": "car", "confidence": 0.88} + ] + } + + # Call annotate_detections directly with styling parameters + annotated = annotate_detections( + frame_array, + mock_results, + font=processor._font, + font_scale=processor._font_scale, + font_thickness=processor._font_thickness, + bbox_color=processor._bbox_color, + text_color=processor._text_color, + ) + + # Verify frame was modified + assert not np.array_equal(frame_array, annotated) + assert annotated.shape == frame_array.shape + processor.close() + + +def test_annotate_detections_multiple_objects(sample_image): + """Test annotation with multiple detections.""" + processor = CloudDetectionProcessor(api_key="test_key") + + frame_array = np.array(sample_image) + + # Mock multiple detections + mock_results = { + "detections": [ + {"bbox": [0.1, 0.1, 0.3, 0.3], "label": "person", "confidence": 0.95}, + {"bbox": [0.5, 0.5, 0.9, 0.9], "label": "car", "confidence": 0.88}, + {"bbox": [100, 200, 300, 400], "label": "dog", "confidence": 0.92}, + ] + } + + # Call annotate_detections directly with styling parameters + annotated = annotate_detections( + frame_array, + mock_results, + font=processor._font, + font_scale=processor._font_scale, + font_thickness=processor._font_thickness, + bbox_color=processor._bbox_color, + text_color=processor._text_color, + ) + + # Verify frame was modified + assert not np.array_equal(frame_array, annotated) + processor.close() + + +def test_annotate_detections_empty_results(sample_image): + """Test annotation with no detections.""" + processor = CloudDetectionProcessor(api_key="test_key") + + frame_array = np.array(sample_image) + mock_results: Dict[str, Any] = {"detections": []} + + # Call annotate_detections directly with styling parameters + annotated = annotate_detections( + frame_array, + mock_results, + font=processor._font, + font_scale=processor._font_scale, + font_thickness=processor._font_thickness, + bbox_color=processor._bbox_color, + text_color=processor._text_color, + ) + + # Frame should be unchanged + assert np.array_equal(frame_array, annotated) + processor.close() + + +@pytest.mark.asyncio +async def test_process_and_add_frame(sample_frame): + """Test the full frame processing pipeline.""" + processor = CloudDetectionProcessor(api_key="test_key") + + # Mock the run_inference method to return test data + async def mock_inference(frame_array): + return {"detections": [{"bbox": [0.1, 0.1, 0.5, 0.5], "label": "test", "confidence": 0.9}]} + + processor.run_inference = mock_inference + + # Process a frame + await processor._process_and_add_frame(sample_frame) + + # Verify results were stored + assert hasattr(processor, "_last_results") + assert "detections" in processor._last_results + processor.close() + + +@pytest.mark.integration +@pytest.mark.skipif(not os.getenv("MOONDREAM_API_KEY"), reason="MOONDREAM_API_KEY not set") +@pytest.mark.asyncio +async def test_live_detection_api(): + """Test live detection API with real Moondream service.""" + processor = CloudDetectionProcessor( + api_key=os.getenv("MOONDREAM_API_KEY"), + conf_threshold=0.5 + ) + + # Use existing test image + from pathlib import Path + assets_dir = Path(__file__).parent.parent.parent.parent / "tests" / "test_assets" + image_path = assets_dir / "golf_swing.png" + + if image_path.exists(): + image = Image.open(image_path) + frame_array = np.array(image) + + # Run inference + result = await processor._run_inference(frame_array) + + # Verify we got real detections + assert "detections" in result + assert isinstance(result["detections"], list) + + # Log what we detected + if result["detections"]: + print(f"\nāœ… Detected {len(result['detections'])} objects:") + for det in result["detections"]: + print(f" - {det.get('label')} ({det.get('confidence', 0):.2f})") + else: + print("\nāš ļø No objects detected (this might be expected)") + else: + pytest.skip(f"Test image not found: {image_path}") + + processor.close() + + +@pytest.mark.integration +@pytest.mark.skipif(not os.getenv("MOONDREAM_API_KEY"), reason="MOONDREAM_API_KEY not set") +@pytest.mark.asyncio +async def test_live_detection_with_annotation(): + """Test that detection results are properly annotated on frames.""" + processor = CloudDetectionProcessor( + api_key=os.getenv("MOONDREAM_API_KEY") + ) + + # Create a simple test image + test_image = Image.new("RGB", (640, 480), color="blue") + frame_array = np.array(test_image) + + # Run inference + result = await processor._run_inference(frame_array) + + # If we got detections, test annotation + if result.get("detections"): + # Call annotate_detections directly with styling parameters + annotated = annotate_detections( + frame_array, + result, + font=processor._font, + font_scale=processor._font_scale, + font_thickness=processor._font_thickness, + bbox_color=processor._bbox_color, + text_color=processor._text_color, + ) + + # Verify frame was modified + assert not np.array_equal(frame_array, annotated) + + # Optionally save for visual inspection + # Image.fromarray(annotated).save("/tmp/moondream_test_annotated.jpg") + + processor.close() + + +def test_missing_api_key(monkeypatch): + """Test that missing API key raises ValueError when env var is also missing.""" + # Remove the environment variable to test the error case + monkeypatch.delenv("MOONDREAM_API_KEY", raising=False) + + with pytest.raises(ValueError, match="api_key is required"): + CloudDetectionProcessor(api_key=None) + + +def test_api_key_from_env(monkeypatch): + """Test that API key is loaded from environment variable.""" + monkeypatch.setenv("MOONDREAM_API_KEY", "test_env_key") + + processor = CloudDetectionProcessor() + assert processor.api_key == "test_env_key" + processor.close() + + +def test_api_key_explicit_override(monkeypatch): + """Test that explicit API key overrides environment variable.""" + monkeypatch.setenv("MOONDREAM_API_KEY", "env_key") + + processor = CloudDetectionProcessor(api_key="explicit_key") + assert processor.api_key == "explicit_key" + processor.close() + + +def test_detect_objects_default(): + """Test default detect_objects is 'person'.""" + processor = CloudDetectionProcessor(api_key="test_key") + assert processor.detect_objects == ["person"] + processor.close() + + +def test_detect_objects_single_string(): + """Test detect_objects with single string.""" + processor = CloudDetectionProcessor( + api_key="test_key", + detect_objects="car" + ) + assert processor.detect_objects == ["car"] + processor.close() + + +def test_detect_objects_list(): + """Test detect_objects with list.""" + processor = CloudDetectionProcessor( + api_key="test_key", + detect_objects=["person", "car", "dog"] + ) + assert processor.detect_objects == ["person", "car", "dog"] + processor.close() + + +def test_detect_objects_invalid_type(): + """Test detect_objects with invalid type raises error.""" + with pytest.raises(ValueError, match="detect_objects must be str or list"): + CloudDetectionProcessor( + api_key="test_key", + detect_objects=123 # Invalid: not a string or list + ) + + +def test_detect_objects_invalid_list_contents(): + """Test detect_objects with non-string list items raises error.""" + with pytest.raises(ValueError, match="detect_objects must be str or list"): + CloudDetectionProcessor( + api_key="test_key", + detect_objects=["person", 123, "car"] # Invalid: contains non-string + ) + + +@pytest.mark.integration +@pytest.mark.skipif(not os.getenv("MOONDREAM_API_KEY"), reason="MOONDREAM_API_KEY not set") +@pytest.mark.asyncio +async def test_custom_object_detection(): + """Test detection with custom object type (not 'person').""" + processor = CloudDetectionProcessor( + api_key=os.getenv("MOONDREAM_API_KEY"), + detect_objects="car" # Detect cars instead of persons + ) + + # Use golf_swing.png - might not have cars, but test should run + from pathlib import Path + assets_dir = Path(__file__).parent.parent.parent.parent / "tests" / "test_assets" + image_path = assets_dir / "golf_swing.png" + + if image_path.exists(): + image = Image.open(image_path) + frame_array = np.array(image) + + # Run inference - may return empty if no cars in image + result = await processor._run_inference(frame_array) + + # Verify structure is correct + assert "detections" in result + assert isinstance(result["detections"], list) + + # If any detections, verify label is "car" + for det in result.get("detections", []): + assert det["label"] == "car", f"Expected 'car' but got '{det['label']}'" + + print(f"\nšŸš— Car detection test: Found {len(result.get('detections', []))} cars") + else: + pytest.skip(f"Test image not found: {image_path}") + + processor.close() + + +@pytest.mark.integration +@pytest.mark.skipif(not os.getenv("MOONDREAM_API_KEY"), reason="MOONDREAM_API_KEY not set") +@pytest.mark.asyncio +async def test_multiple_object_detection(): + """Test detection with multiple object types.""" + processor = CloudDetectionProcessor( + api_key=os.getenv("MOONDREAM_API_KEY"), + detect_objects=["person", "grass", "sky"] # Multiple types + ) + + # Use golf_swing.png - likely has person and grass + from pathlib import Path + assets_dir = Path(__file__).parent.parent.parent.parent / "tests" / "test_assets" + image_path = assets_dir / "golf_swing.png" + + if image_path.exists(): + image = Image.open(image_path) + frame_array = np.array(image) + + # Run inference + result = await processor._run_inference(frame_array) + + # Verify structure + assert "detections" in result + assert isinstance(result["detections"], list) + + # Log what was detected + detected_labels = [det["label"] for det in result.get("detections", [])] + unique_labels = set(detected_labels) + + print("\nšŸŽÆ Multiple object detection test:") + print(f" Searched for: {processor.detect_objects}") + print(f" Found {len(result.get('detections', []))} total detections") + print(f" Unique object types: {unique_labels}") + + # Verify all labels are from our configured list + for label in detected_labels: + assert label in processor.detect_objects, \ + f"Detected '{label}' but it's not in configured objects {processor.detect_objects}" + else: + pytest.skip(f"Test image not found: {image_path}") + + processor.close() + diff --git a/plugins/moondream/tests/test_moondream_local.py b/plugins/moondream/tests/test_moondream_local.py new file mode 100644 index 00000000..cbe42198 --- /dev/null +++ b/plugins/moondream/tests/test_moondream_local.py @@ -0,0 +1,290 @@ +""" +Tests for the Moondream local processor plugin. + +Integration tests require HF_TOKEN environment variable (for gated model access): + + export HF_TOKEN="your-token-here" + uv run pytest plugins/moondream/tests/test_moondream_local.py -m integration -v + +To run only unit tests (no model loading): + + uv run pytest plugins/moondream/tests/test_moondream_local.py -m "not integration" -v +""" +import asyncio +import os +from pathlib import Path +from typing import Iterator + +import numpy as np +import pytest +import torch +from PIL import Image +import av + +from vision_agents.plugins.moondream import LocalDetectionProcessor +from vision_agents.plugins.moondream.moondream_utils import annotate_detections +import logging + +logger = logging.getLogger(__name__) + + +class TestMoondreamLocalProcessor: + """Test cases for MoondreamLocalProcessor.""" + + @pytest.fixture(scope="session") + def golf_image(self, assets_dir) -> Iterator[Image.Image]: + """Load the local golf swing test image from tests/test_assets.""" + asset_path = Path(assets_dir) / "golf_swing.png" + with Image.open(asset_path) as img: + yield img.convert("RGB") + + @pytest.fixture + def moondream_processor(self) -> Iterator[LocalDetectionProcessor]: + """Create and manage MoondreamLocalProcessor lifecycle.""" + processor = LocalDetectionProcessor(device="cpu") + try: + yield processor + finally: + processor.close() + + @pytest.mark.integration + @pytest.mark.skipif( + not os.getenv("HF_TOKEN"), + reason="HF_TOKEN environment variable not set (required for model access)", + ) + async def test_model_loads_correctly(self, moondream_processor: LocalDetectionProcessor): + """Test that start() successfully loads the model.""" + # Model should be None initially + assert moondream_processor.model is None + + # Start the processor (loads the model) + await moondream_processor.warmup() + + # Verify model is loaded + assert moondream_processor.model is not None + # Verify model is in eval mode + assert moondream_processor.model.training is False + + @pytest.mark.integration + @pytest.mark.skipif( + not os.getenv("HF_TOKEN"), + reason="HF_TOKEN environment variable not set (required for model access)", + ) + async def test_run_inference_on_image( + self, golf_image: Image.Image, moondream_processor: LocalDetectionProcessor + ): + """Test _run_inference() with a test image.""" + # Ensure model is loaded + await moondream_processor.warmup() + + # Convert PIL image to numpy array + frame_array = np.array(golf_image) + + # Run inference + result = await moondream_processor._run_inference(frame_array) + + # Verify result structure + assert isinstance(result, dict) + assert "detections" in result + assert isinstance(result["detections"], list) + + @pytest.mark.integration + @pytest.mark.skipif( + not os.getenv("HF_TOKEN"), + reason="HF_TOKEN environment variable not set (required for model access)", + ) + async def test_run_detection_sync( + self, golf_image: Image.Image, moondream_processor: LocalDetectionProcessor + ): + """Test _run_detection_sync() directly with PIL Image.""" + # Ensure model is loaded + await moondream_processor.warmup() + + # Run detection in executor (simulating async context) + detections = await asyncio.get_event_loop().run_in_executor( + moondream_processor.executor, + moondream_processor._run_detection_sync, + golf_image, + ) + + # Verify return value + assert isinstance(detections, list) + + # If detections found, verify structure + if detections: + for detection in detections: + assert "label" in detection + assert "bbox" in detection + assert "confidence" in detection + assert isinstance(detection["bbox"], list) + assert len(detection["bbox"]) == 4 + + @pytest.mark.integration + @pytest.mark.skipif( + not os.getenv("HF_TOKEN"), + reason="HF_TOKEN environment variable not set (required for model access)", + ) + async def test_annotated_frame_output( + self, golf_image: Image.Image, moondream_processor: LocalDetectionProcessor + ): + """Test end-to-end frame processing with annotations.""" + # Ensure model is loaded + await moondream_processor.warmup() + + # Convert PIL Image to av.VideoFrame + frame = av.VideoFrame.from_image(golf_image) + + # Process the frame + await moondream_processor._process_and_add_frame(frame) + + # Verify results were stored + assert hasattr(moondream_processor, "_last_results") + assert "detections" in moondream_processor._last_results + + # Verify annotated frame was added to video track + # (We can't easily verify the exact frame without more complex setup, + # but we can verify the processing didn't fail) + + @pytest.mark.integration + async def test_annotate_detections_with_results( + self, golf_image: Image.Image, moondream_processor: LocalDetectionProcessor + ): + """Test annotation function directly with mock results.""" + frame_array = np.array(golf_image) + + # Create mock detection results + mock_results = { + "detections": [ + {"bbox": [0.1, 0.1, 0.5, 0.5], "label": "person", "confidence": 0.95}, + {"bbox": [100, 200, 300, 400], "label": "car", "confidence": 0.88}, + ] + } + + annotated = annotate_detections( + frame_array, + mock_results, + font=moondream_processor._font, + font_scale=moondream_processor._font_scale, + font_thickness=moondream_processor._font_thickness, + bbox_color=moondream_processor._bbox_color, + text_color=moondream_processor._text_color, + ) + + # Verify output shape matches input + assert annotated.shape == frame_array.shape + # Verify frame is modified (not array_equal) + assert not np.array_equal(frame_array, annotated) + + def test_device_auto_detection_cuda(self, monkeypatch): + """Test CUDA auto-detection.""" + # Mock CUDA available, MPS not available + monkeypatch.setattr(torch.cuda, "is_available", lambda: True) + + # Ensure torch.backends.mps exists for the test + if not hasattr(torch.backends, "mps"): + # Create a mock mps module + class MockMPS: + @staticmethod + def is_available(): + return False + + monkeypatch.setattr(torch.backends, "mps", MockMPS()) + else: + monkeypatch.setattr( + torch.backends.mps, + "is_available", + lambda: False, + ) + + # Initialize processor without device param + processor = LocalDetectionProcessor() + try: + assert processor.device == "cuda" + finally: + processor.close() + + def test_device_auto_detection_cpu(self, monkeypatch): + """Test CPU fallback when CUDA and MPS are unavailable.""" + # Mock both CUDA and MPS as unavailable + monkeypatch.setattr(torch.cuda, "is_available", lambda: False) + + # Ensure torch.backends.mps exists for the test + if not hasattr(torch.backends, "mps"): + # Create a mock mps module + class MockMPS: + @staticmethod + def is_available(): + return False + + monkeypatch.setattr(torch.backends, "mps", MockMPS()) + else: + monkeypatch.setattr( + torch.backends.mps, + "is_available", + lambda: False, + ) + + # Initialize processor without device param + processor = LocalDetectionProcessor() + try: + assert processor.device == "cpu" + finally: + processor.close() + + def test_device_mps_converted_to_cpu(self, monkeypatch): + """Test MPS override to CPU (moondream doesn't work with MPS).""" + # Mock CUDA not available + monkeypatch.setattr(torch.cuda, "is_available", lambda: False) + + # Ensure torch.backends.mps exists and mock it as available + if not hasattr(torch.backends, "mps"): + # Create a mock mps module with is_available returning True + class MockMPS: + @staticmethod + def is_available(): + return True + + monkeypatch.setattr(torch.backends, "mps", MockMPS()) + else: + monkeypatch.setattr( + torch.backends.mps, + "is_available", + lambda: True, + ) + + # Initialize processor - should auto-detect and convert MPS to CPU + processor = LocalDetectionProcessor() + try: + # Verify MPS is converted to CPU + assert processor.device == "cpu" + finally: + processor.close() + + # Also test explicit MPS parameter + processor2 = LocalDetectionProcessor(device="mps") + try: + # Verify explicit MPS is also converted to CPU + assert processor2.device == "cpu" + finally: + processor2.close() + + def test_device_explicit_cpu(self): + """Test explicit CPU device selection.""" + processor = LocalDetectionProcessor(device="cpu") + try: + assert processor.device == "cpu" + finally: + processor.close() + + @pytest.mark.skipif( + not torch.cuda.is_available(), + reason="CUDA not available on this system", + ) + def test_device_explicit_cuda(self): + """Test explicit CUDA device selection (only if CUDA available).""" + processor = LocalDetectionProcessor(device="cuda") + try: + assert processor.device == "cuda" + finally: + processor.close() + diff --git a/plugins/moondream/vision_agents/plugins/moondream/__init__.py b/plugins/moondream/vision_agents/plugins/moondream/__init__.py new file mode 100644 index 00000000..8e77720f --- /dev/null +++ b/plugins/moondream/vision_agents/plugins/moondream/__init__.py @@ -0,0 +1,25 @@ +""" +Moondream plugin for vision-agents. + +This plugin provides Moondream 3 vision capabilities including object detection, +visual question answering, counting, and captioning. +""" + +from .moondream_cloud_processor import ( + CloudDetectionProcessor, +) +from .moondream_local_processor import ( + LocalDetectionProcessor, +) +from .moondream_video_track import ( + MoondreamVideoTrack, +) + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) + +__all__ = [ + "CloudDetectionProcessor", + "LocalDetectionProcessor", + "MoondreamVideoTrack", +] + diff --git a/plugins/moondream/vision_agents/plugins/moondream/moondream_cloud_processor.py b/plugins/moondream/vision_agents/plugins/moondream/moondream_cloud_processor.py new file mode 100644 index 00000000..ebfcb2d5 --- /dev/null +++ b/plugins/moondream/vision_agents/plugins/moondream/moondream_cloud_processor.py @@ -0,0 +1,243 @@ +import asyncio +import logging +import os +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Dict, List, Optional, Union + +import aiortc +import av +import cv2 +import numpy as np +from PIL import Image + +from vision_agents.core.processors.base_processor import ( + VideoProcessorMixin, + VideoPublisherMixin, + AudioVideoProcessor, +) +from vision_agents.plugins.moondream.moondream_utils import annotate_detections, parse_detection_bbox +from vision_agents.plugins.moondream.moondream_video_track import MoondreamVideoTrack +from vision_agents.core.utils.video_forwarder import VideoForwarder +import moondream as md + + +logger = logging.getLogger(__name__) + +DEFAULT_WIDTH = 640 +DEFAULT_HEIGHT = 480 + + +class CloudDetectionProcessor(AudioVideoProcessor, VideoProcessorMixin, VideoPublisherMixin): + """Performs real-time object detection on video streams using Moondream Cloud API. By default the Moondream Cloud API has a 2rps second limit however this can be changed by contacting the Moondream team. If you are deploying to your own infrastructure, consider using the LocalProcessor instead. + + Args: + api_key: API key for Moondream Cloud API. If not provided, will attempt to read + from MOONDREAM_API_KEY environment variable. + conf_threshold: Confidence threshold for detections + detect_objects: Object(s) to detect. Moondream uses zero-shot detection, + so any object string works. Examples: "person", "car", + "basketball", ["person", "car", "dog"]. Default: "person" + fps: Frame processing rate + interval: Processing interval in seconds + max_workers: Number of worker threads + """ + + def __init__( + self, + api_key: Optional[str] = None, + conf_threshold: float = 0.3, + detect_objects: Union[str, List[str]] = "person", + fps: int = 30, + interval: int = 0, + max_workers: int = 10, + ): + super().__init__(interval=interval, receive_audio=False, receive_video=True) + + self.api_key = api_key or os.getenv("MOONDREAM_API_KEY") + self.conf_threshold = conf_threshold + self.fps = fps + self.max_workers = max_workers + self._shutdown = False + + # Initialize state tracking attributes + self._last_results: Dict[str, Any] = {} + self._last_frame_time: Optional[float] = None + self._last_frame_pil: Optional[Image.Image] = None + + # Font configuration constants for drawing efficiency + self._font = cv2.FONT_HERSHEY_SIMPLEX + self._font_scale = 0.5 + self._font_thickness = 2 + self._bbox_color = (0, 255, 0) + self._text_color = (0, 0, 0) + + if not detect_objects: + raise ValueError("detect_objects must not be empty") + # Normalize detect_objects to list of strings + if isinstance(detect_objects, str): + self.detect_objects = [detect_objects] + elif isinstance(detect_objects, list): + if not all(isinstance(obj, str) for obj in detect_objects): + raise ValueError("detect_objects must be str or list of strings") + self.detect_objects = detect_objects + else: + raise ValueError("detect_objects must be str or list of strings") + + # Thread pool for CPU-intensive inference + self.executor = ThreadPoolExecutor( + max_workers=max_workers, thread_name_prefix="moondream_processor" + ) + + # Video track for publishing (if used as video publisher) + self._video_track: MoondreamVideoTrack = MoondreamVideoTrack() + self._video_forwarder: Optional[VideoForwarder] = None + + # Initialize model + self._load_model() + + logger.info("šŸŒ™ Moondream Processor initialized") + logger.info(f"šŸŽÆ Detection configured for objects: {self.detect_objects}") + + async def process_video( + self, + incoming_track: aiortc.mediastreams.MediaStreamTrack, + participant: Any, + shared_forwarder=None, + ): + """ + Process incoming video track. + + This method sets up the video processing pipeline: + 1. Uses shared VideoForwarder if provided, otherwise creates own + 2. Starts event consumer that calls _process_and_add_frame for each frame + 3. Frames are processed, annotated, and published via the video track + """ + logger.info("āœ… Moondream process_video starting") + + if shared_forwarder is not None: + # Use the shared forwarder + self._video_forwarder = shared_forwarder + logger.info( + f"šŸŽ„ Moondream subscribing to shared VideoForwarder at {self.fps} FPS" + ) + await self._video_forwarder.start_event_consumer( + self._process_and_add_frame, + fps=float(self.fps), + consumer_name="moondream" + ) + else: + # Create our own VideoForwarder + self._video_forwarder = VideoForwarder( + incoming_track, # type: ignore[arg-type] + max_buffer=30, # 1 second at 30fps + fps=self.fps, + name="moondream_forwarder", + ) + + # Start the forwarder + await self._video_forwarder.start() + await self._video_forwarder.start_event_consumer( + self._process_and_add_frame + ) + + logger.info("āœ… Moondream video processing pipeline started") + + def publish_video_track(self): + logger.info("šŸ“¹ publish_video_track called") + return self._video_track + + def _load_model(self): + try: + # Validate API key + if not self.api_key: + raise ValueError("api_key is required for Moondream Cloud API") + + # Initialize cloud model + self.model = md.vl(api_key=self.api_key) + logger.info("āœ… Moondream SDK initialized") + + except Exception as e: + logger.exception(f"āŒ Failed to load Moondream model: {e}") + raise + + async def _run_inference(self, frame_array: np.ndarray) -> Dict[str, Any]: + try: + # Call SDK for each object type + # The SDK's detect() is synchronous, so wrap in executor + loop = asyncio.get_event_loop() + all_detections = await loop.run_in_executor( + self.executor, self._run_detection_sync, frame_array + ) + + return {"detections": all_detections} + except Exception as e: + logger.exception(f"āŒ Cloud inference failed: {e}") + return {"detections": []} + + def _run_detection_sync(self, frame_array: np.ndarray) -> List[Dict]: + image = Image.fromarray(frame_array) + + if self._shutdown: + return [] + + all_detections = [] + + # Call SDK for each object type + for object_type in self.detect_objects: + logger.debug(f"šŸ” Detecting '{object_type}' via Moondream SDK") + try: + # Call SDK's detect method + result = self.model.detect(image, object_type) + except Exception as e: + logger.warning(f"āš ļø Failed to detect '{object_type}': {e}") + continue + + # Parse SDK response format + # SDK returns: {"objects": [{"x_min": ..., "y_min": ..., "x_max": ..., "y_max": ...}, ...]} + for obj in result.get("objects", []): + detection = parse_detection_bbox(obj, object_type, self.conf_threshold) + if detection: + all_detections.append(detection) + + logger.debug(f"šŸ” SDK returned {len(all_detections)} objects across {len(self.detect_objects)} types") + return all_detections + + async def _process_and_add_frame(self, frame: av.VideoFrame): + try: + frame_array = frame.to_ndarray(format="rgb24") + + results = await self._run_inference(frame_array) + + self._last_results = results + self._last_frame_time = asyncio.get_event_loop().time() + self._last_frame_pil = Image.fromarray(frame_array) + + # Annotate frame with detections + if results.get("detections"): + frame_array = annotate_detections( + frame_array, + results, + font=self._font, + font_scale=self._font_scale, + font_thickness=self._font_thickness, + bbox_color=self._bbox_color, + text_color=self._text_color, + ) + + # Convert back to av.VideoFrame and publish + processed_frame = av.VideoFrame.from_ndarray(frame_array, format="rgb24") + await self._video_track.add_frame(processed_frame) + + except Exception as e: + logger.exception(f"āŒ Frame processing failed: {e}") + # Pass through original frame on error + await self._video_track.add_frame(frame) + + + def close(self): + """Clean up resources.""" + self._shutdown = True + if hasattr(self, "executor"): + self.executor.shutdown(wait=False) + logger.info("šŸ›‘ Moondream Processor closed") + diff --git a/plugins/moondream/vision_agents/plugins/moondream/moondream_local_processor.py b/plugins/moondream/vision_agents/plugins/moondream/moondream_local_processor.py new file mode 100644 index 00000000..1b32db8f --- /dev/null +++ b/plugins/moondream/vision_agents/plugins/moondream/moondream_local_processor.py @@ -0,0 +1,360 @@ +import asyncio +import logging +import os +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Dict, List, Optional, Union + +import aiortc +import av +import cv2 +import numpy as np +import torch +from PIL import Image +from transformers import AutoModelForCausalLM + +from vision_agents.core.agents.agents import AgentOptions, default_agent_options +from vision_agents.core.processors.base_processor import ( + VideoProcessorMixin, + VideoPublisherMixin, + AudioVideoProcessor, +) +from vision_agents.core.utils.video_forwarder import VideoForwarder +from vision_agents.plugins.moondream.moondream_utils import parse_detection_bbox, annotate_detections +from vision_agents.plugins.moondream.moondream_video_track import MoondreamVideoTrack + +logger = logging.getLogger(__name__) + + +class LocalDetectionProcessor(AudioVideoProcessor, VideoProcessorMixin, VideoPublisherMixin): + """Performs real-time object detection on video streams using local Moondream 3 model. + + This processor downloads and runs the moondream3-preview model locally from Hugging Face, + providing the same functionality as the cloud API version without requiring an API key. + + Note: The moondream3-preview model is gated and requires authentication: + - Request access at https://huggingface.co/moondream/moondream3-preview + - Once approved, authenticate using one of: + - Set HF_TOKEN environment variable: export HF_TOKEN=your_token_here + - Run: huggingface-cli login + + Args: + conf_threshold: Confidence threshold for detections + detect_objects: Object(s) to detect. Moondream uses zero-shot detection, + so any object string works. Examples: "person", "car", + "basketball", ["person", "car", "dog"]. Default: "person" + fps: Frame processing rate + interval: Processing interval in seconds + max_workers: Number of worker threads + device: Device to run inference on ('cuda', 'mps', or 'cpu'). + Auto-detects CUDA, then MPS (Apple Silicon), then defaults to CPU. + model_name: Hugging Face model identifier (default: "moondream/moondream3-preview") + options: AgentOptions for model directory configuration. If not provided, + uses default_agent_options() which defaults to tempfile.gettempdir() + """ + + def __init__( + self, + conf_threshold: float = 0.3, + detect_objects: Union[str, List[str]] = "person", + fps: int = 30, + interval: int = 0, + max_workers: int = 10, + device: Optional[str] = None, + model_name: str = "moondream/moondream3-preview", + options: Optional[AgentOptions] = None, + ): + super().__init__(interval=interval, receive_audio=False, receive_video=True) + + if options is None: + self.options = default_agent_options() + else: + self.options = options + self.model_name = model_name + self.conf_threshold = conf_threshold + self.fps = fps + self.max_workers = max_workers + self._shutdown = False + + # Auto-detect device if not specified + if device is None: + if torch.cuda.is_available(): + self.device = "cuda" + elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): + # Moondream model has CUDA dependencies that don't work on MPS + # Use CPU instead to avoid runtime errors + self.device = "cpu" + logger.info("āš ļø MPS detected but using CPU (moondream model has CUDA dependencies incompatible with MPS)") + else: + self.device = "cpu" + else: + # Override MPS to CPU if explicitly set (moondream doesn't work with MPS) + if device == "mps": + self.device = "cpu" + logger.warning("āš ļø MPS device requested but using CPU instead (moondream model has CUDA dependencies incompatible with MPS)") + else: + self.device = device + + # Initialize state tracking attributes + self._last_results: Dict[str, Any] = {} + self._last_frame_time: Optional[float] = None + self._last_frame_pil: Optional[Image.Image] = None + + # Font configuration constants for drawing efficiency + self._font = cv2.FONT_HERSHEY_SIMPLEX + self._font_scale = 0.5 + self._font_thickness = 2 + self._bbox_color = (0, 255, 0) + self._text_color = (0, 0, 0) + + # Normalize detect_objects to list + self.detect_objects = [detect_objects] if isinstance(detect_objects, str) else list(detect_objects) + + # Thread pool for CPU-intensive inference + self.executor = ThreadPoolExecutor( + max_workers=max_workers, thread_name_prefix="moondream_local_processor" + ) + + # Video track for publishing (if used as video publisher) + self._video_track: MoondreamVideoTrack = MoondreamVideoTrack() + self._video_forwarder: Optional[VideoForwarder] = None + + # Model will be loaded in start() method + self.model = None + + logger.info("šŸŒ™ Moondream Local Processor initialized") + logger.info(f"šŸŽÆ Detection configured for objects: {self.detect_objects}") + logger.info(f"šŸ”§ Device: {self.device}") + + async def warmup(self): + """Initialize and load the model.""" + # Ensure model directory exists + os.makedirs(self.options.model_dir, exist_ok=True) + + # Prepare model asynchronously + await self._prepare_moondream() + + async def _prepare_moondream(self): + """Load the Moondream model from Hugging Face.""" + logger.info(f"Loading Moondream model: {self.model_name}") + logger.info(f"Device: {self.device}") + + # Load model in thread pool to avoid blocking event loop + # Transformers handles downloading and caching automatically via Hugging Face Hub + self.model = await asyncio.to_thread( # type: ignore[func-returns-value] + lambda: self._load_model_sync() + ) + logger.info("āœ… Moondream model loaded") + + def _load_model_sync(self): + """Synchronous model loading function run in thread pool.""" + try: + # Check for Hugging Face token (required for gated models) + hf_token = os.getenv("HF_TOKEN") + if not hf_token: + logger.warning( + "āš ļø HF_TOKEN environment variable not set. " + "This model requires authentication. " + "Set HF_TOKEN or run 'huggingface-cli login'" + ) + + load_kwargs = { + "trust_remote_code": True, + "dtype": torch.bfloat16 if self.device == "cuda" else torch.float32, + "cache_dir": self.options.model_dir, # Use agent's model directory for caching + } + + # Add token if available (transformers will use env var automatically, but explicit is clearer) + if hf_token: + load_kwargs["token"] = hf_token + else: + # Use True to let transformers try to read from environment or cached login + load_kwargs["token"] = True + + # Handle device placement based on device type + if self.device == "cuda": + # CUDA: Use device_map for efficient multi-GPU support + load_kwargs["device_map"] = {"": "cuda"} + else: + # CPU: load directly on CPU (MPS is automatically converted to CPU in __init__) + load_kwargs["device_map"] = "cpu" + + model = AutoModelForCausalLM.from_pretrained( + self.model_name, + **load_kwargs, + ) + + # Ensure model is in eval mode for inference + model.eval() + + if self.device == "cuda": + logger.info("āœ… Model loaded on CUDA device") + else: + logger.info("āœ… Model loaded on CPU device") + + # Compile model for fast inference (as per HF documentation) + try: + model.compile() + except Exception as compile_error: + # If compilation fails, log and continue without compilation + logger.warning(f"āš ļø Model compilation failed, continuing without compilation: {compile_error}") + + return model + except Exception as e: + error_msg = str(e) + if "gated repo" in error_msg.lower() or "403" in error_msg or "authorized" in error_msg.lower(): + logger.exception( + "āŒ Failed to load Moondream model: Model requires authentication.\n" + "This model is gated and requires access approval:\n" + f"1. Visit https://huggingface.co/{self.model_name} to request access\n" + "2. Once approved, authenticate using one of:\n" + " - Set HF_TOKEN environment variable: export HF_TOKEN=your_token_here\n" + " - Run: huggingface-cli login\n" + f"Original error: {e}" + ) + else: + logger.exception(f"āŒ Failed to load Moondream model: {e}") + raise + + async def process_video( + self, + incoming_track: aiortc.mediastreams.MediaStreamTrack, + participant: Any, + shared_forwarder=None, + ): + """ + Process incoming video track. + + This method sets up the video processing pipeline: + 1. Uses shared VideoForwarder if provided, otherwise creates own + 2. Starts event consumer that calls _process_and_add_frame for each frame + 3. Frames are processed, annotated, and published via the video track + """ + logger.info("āœ… Moondream process_video starting") + + # Ensure model is loaded + if self.model is None: + await self._prepare_moondream() + + if shared_forwarder is not None: + # Use the shared forwarder + self._video_forwarder = shared_forwarder + logger.info( + f"šŸŽ„ Moondream subscribing to shared VideoForwarder at {self.fps} FPS" + ) + await self._video_forwarder.start_event_consumer( + self._process_and_add_frame, + fps=float(self.fps), + consumer_name="moondream_local" + ) + else: + # Create our own VideoForwarder + self._video_forwarder = VideoForwarder( + incoming_track, # type: ignore[arg-type] + max_buffer=30, # 1 second at 30fps + fps=self.fps, + name="moondream_local_forwarder", + ) + + # Start the forwarder + await self._video_forwarder.start() + await self._video_forwarder.start_event_consumer( + self._process_and_add_frame + ) + + logger.info("āœ… Moondream video processing pipeline started") + + def publish_video_track(self): + logger.info("šŸ“¹ publish_video_track called") + return self._video_track + + async def _run_inference(self, frame_array: np.ndarray) -> Dict[str, Any]: + try: + # Convert frame to PIL Image + image = Image.fromarray(frame_array) + + # Call model for each object type + # The model's detect() is synchronous, so wrap in executor + loop = asyncio.get_event_loop() + all_detections = await loop.run_in_executor( + self.executor, self._run_detection_sync, image + ) + + return {"detections": all_detections} + except Exception as e: + logger.exception(f"āŒ Local inference failed: {e}") + return {} + + def _run_detection_sync(self, image: Image.Image) -> List[Dict]: + if self._shutdown or self.model is None: + return [] + + all_detections = [] + + # Call model for each object type + for object_type in self.detect_objects: + try: + logger.debug(f"šŸ” Detecting '{object_type}' via Moondream model") + + # Call model's detect method + result = self.model.detect(image, object_type) + + # Parse model response format + # Model returns: {"objects": [{"x_min": ..., "y_min": ..., "x_max": ..., "y_max": ...}, ...]} + if "objects" in result: + for obj in result["objects"]: + detection = parse_detection_bbox(obj, object_type, self.conf_threshold) + if detection: + all_detections.append(detection) + + except Exception as e: + logger.warning(f"āš ļø Failed to detect '{object_type}': {e}") + continue + + logger.debug(f"šŸ” Model returned {len(all_detections)} objects across {len(self.detect_objects)} types") + return all_detections + + async def _process_and_add_frame(self, frame: av.VideoFrame): + try: + # Convert to numpy array + frame_array = frame.to_ndarray(format="rgb24") + + # Run inference + results = await self._run_inference(frame_array) + + # Store results for state() method and LLM access + self._last_results = results + self._last_frame_time = asyncio.get_event_loop().time() + self._last_frame_pil = Image.fromarray(frame_array) + + # Annotate frame with detections + if results.get("detections"): + frame_array = annotate_detections( + frame_array, + results, + font=self._font, + font_scale=self._font_scale, + font_thickness=self._font_thickness, + bbox_color=self._bbox_color, + text_color=self._text_color, + ) + + # Convert back to av.VideoFrame and publish + processed_frame = av.VideoFrame.from_ndarray(frame_array, format="rgb24") + await self._video_track.add_frame(processed_frame) + + except Exception as e: + logger.exception(f"āŒ Frame processing failed: {e}") + # Pass through original frame on error + await self._video_track.add_frame(frame) + + def close(self): + """Clean up resources.""" + self._shutdown = True + self.executor.shutdown(wait=False) + if self.model is not None: + # Clear model reference to free memory + del self.model + self.model = None + logger.info("šŸ›‘ Moondream Local Processor closed") + + diff --git a/plugins/moondream/vision_agents/plugins/moondream/moondream_utils.py b/plugins/moondream/vision_agents/plugins/moondream/moondream_utils.py new file mode 100644 index 00000000..1ffad182 --- /dev/null +++ b/plugins/moondream/vision_agents/plugins/moondream/moondream_utils.py @@ -0,0 +1,104 @@ +from typing import List, Optional, Dict, Any +import cv2 +import numpy as np + + +def parse_detection_bbox(obj: Dict, object_type: str, conf_threshold: float) -> Optional[Dict]: + confidence = obj.get("confidence", 1.0) + + # Filter by confidence threshold + if confidence < conf_threshold: + return None + + bbox = [ + obj.get("x_min", 0), + obj.get("y_min", 0), + obj.get("x_max", 0), + obj.get("y_max", 0) + ] + + return { + "label": object_type, + "bbox": bbox, + "confidence": confidence + } + +def normalize_bbox_coordinates(bbox: List[float], width: int, height: int) -> tuple: + if len(bbox) != 4: + return (0, 0, 0, 0) + + x1, y1, x2, y2 = bbox + + # Check if normalized coordinates (between 0 and 1) + if x1 <= 1.0 and y1 <= 1.0 and x2 <= 1.0 and y2 <= 1.0: + # Convert to pixel coordinates + return int(x1 * width), int(y1 * height), int(x2 * width), int(y2 * height) + else: + # Already pixel coordinates + return int(x1), int(y1), int(x2), int(y2) + + +def annotate_detections( + frame_array: np.ndarray, + results: Dict[str, Any], + font: int = cv2.FONT_HERSHEY_SIMPLEX, + font_scale: float = 0.5, + font_thickness: int = 2, + bbox_color: tuple = (0, 255, 0), + text_color: tuple = (0, 0, 0), +) -> np.ndarray: + annotated = frame_array.copy() + + detections = results.get("detections", []) + if not detections: + return annotated + + height, width = frame_array.shape[:2] + + # Pre-calculate baseline text metrics once per frame for efficiency + sample_text = "object 0.00" # Representative text for baseline calculation + (_, text_height), baseline = cv2.getTextSize( + sample_text, font, font_scale, font_thickness + ) + + for detection in detections: + # Parse bounding box and normalize to pixel coordinates + bbox = detection.get("bbox", []) + x1, y1, x2, y2 = normalize_bbox_coordinates(bbox, width, height) + + # Skip invalid bounding boxes + if x1 == 0 and y1 == 0 and x2 == 0 and y2 == 0: + continue + + # Get label and confidence + label = detection.get("label", "object") + conf = detection.get("confidence", 0.0) + + cv2.rectangle(annotated, (x1, y1), (x2, y2), bbox_color, 2) + + # Draw label background + label_text = f"{label} {conf:.2f}" + # Calculate text width for this specific label (varies by content) + (text_width, _), _ = cv2.getTextSize( + label_text, font, font_scale, font_thickness + ) + cv2.rectangle( + annotated, + (x1, y1 - text_height - baseline - 5), + (x1 + text_width, y1), + bbox_color, + -1 + ) + + # Draw label text using cached parameters + cv2.putText( + annotated, + label_text, + (x1, y1 - baseline - 5), + font, + font_scale, + text_color, + font_thickness + ) + + return annotated \ No newline at end of file diff --git a/plugins/moondream/vision_agents/plugins/moondream/moondream_video_track.py b/plugins/moondream/vision_agents/plugins/moondream/moondream_video_track.py new file mode 100644 index 00000000..c764614c --- /dev/null +++ b/plugins/moondream/vision_agents/plugins/moondream/moondream_video_track.py @@ -0,0 +1,76 @@ +import asyncio +import logging + +import av +from PIL import Image +from aiortc import VideoStreamTrack + +from vision_agents.core.utils.queue import LatestNQueue + +logger = logging.getLogger(__name__) + +DEFAULT_WIDTH = 640 +DEFAULT_HEIGHT = 480 + + +class MoondreamVideoTrack(VideoStreamTrack): + """ + Video track for publishing Moondream-processed frames. + + Uses a LatestNQueue to buffer processed frames and publishes them + at the configured frame rate. + """ + + def __init__(self, width: int = DEFAULT_WIDTH, height: int = DEFAULT_HEIGHT): + super().__init__() + logger.info("MoondreamVideoTrack: initializing") + self.frame_queue: LatestNQueue[av.VideoFrame] = LatestNQueue(maxlen=10) + + # Set video quality parameters + self.width = width + self.height = height + empty_image = Image.new("RGB", (self.width, self.height), color="blue") + self.empty_frame = av.VideoFrame.from_image(empty_image) + self.last_frame: av.VideoFrame = self.empty_frame + self._stopped = False + + async def add_frame(self, frame: av.VideoFrame): + if self._stopped: + return + + self.frame_queue.put_latest_nowait(frame) + + async def recv(self) -> av.frame.Frame: + """ + Receive the next video frame for publishing. + + Returns: + Video frame with proper PTS and time_base + """ + if self._stopped: + raise Exception("Track stopped") + + try: + # Try to get a frame from queue with short timeout + frame = await asyncio.wait_for(self.frame_queue.get(), timeout=0.02) + if frame: + self.last_frame = frame + logger.debug("šŸ“„ Got new frame from queue") + except asyncio.TimeoutError: + pass + except Exception as e: + logger.warning(f"āš ļø Error getting frame from queue: {e}") + + # Get timestamp for the frame + pts, time_base = await self.next_timestamp() + + # Create av.VideoFrame from last frame + av_frame = self.last_frame + av_frame.pts = pts + av_frame.time_base = time_base + + return av_frame + + def stop(self): + """Stop the video track.""" + self._stopped = True diff --git a/pyproject.toml b/pyproject.toml index 15fb8ccf..034bbbfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ vision-agents-plugins-ultralytics = { workspace = true } vision-agents-plugins-krisp = { workspace = true } vision-agents-plugins-smart-turn = { workspace = true } vision-agents-plugins-wizper = { workspace = true } +vision-agents-plugins-moondream = { workspace = true } vision-agents-plugins-vogent = { workspace = true } [tool.uv] @@ -49,6 +50,7 @@ members = [ "plugins/krisp", "plugins/smart_turn", "plugins/wizper", + "plugins/moondream", "plugins/vogent" ] exclude = [ diff --git a/uv.lock b/uv.lock index 08c54f52..27624ced 100644 --- a/uv.lock +++ b/uv.lock @@ -21,6 +21,7 @@ members = [ "vision-agents-plugins-getstream", "vision-agents-plugins-kokoro", "vision-agents-plugins-krisp", + "vision-agents-plugins-moondream", "vision-agents-plugins-openai", "vision-agents-plugins-openrouter", "vision-agents-plugins-smart-turn", @@ -49,6 +50,24 @@ dev = [ { name = "scalene", specifier = ">=1.5.54" }, ] +[[package]] +name = "accelerate" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "psutil" }, + { name = "pyyaml" }, + { name = "safetensors" }, + { name = "torch" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/60/2757c4f03a8705dbf80b1268b03881927878dca5ed07d74f733fb6c219e0/accelerate-1.11.0.tar.gz", hash = "sha256:bb1caf2597b4cd632b917b5000c591d10730bb024a79746f1ee205bba80bd229", size = 393715, upload-time = "2025-10-20T14:42:25.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/85/85951bc0f9843e2c10baaa1b6657227056095de08f4d1eea7d8b423a6832/accelerate-1.11.0-py3-none-any.whl", hash = "sha256:a628fa6beb069b8e549460fc449135d5bd8d73e7a11fd09f0bc9fc4ace7f06f1", size = 375777, upload-time = "2025-10-20T14:42:23.256Z" }, +] + [[package]] name = "addict" version = "2.4.0" @@ -2427,6 +2446,18 @@ en = [ { name = "spacy-curated-transformers" }, ] +[[package]] +name = "moondream" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/e4/921c236172c1b8505a04889a579fc6843b331ad61c5e5a3d6772c83117ef/moondream-0.1.1.tar.gz", hash = "sha256:6da586f030eceaf8e74f8c5d8a84fe3db9dc5f6f8d5c2011db08837afef9f1f8", size = 96285, upload-time = "2025-06-21T01:11:14.786Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/6b/c9de9fdff76b2067eaedc627c2b7644512d0b432e2e32b0b883cf1d71354/moondream-0.1.1-py3-none-any.whl", hash = "sha256:d60b60b1689eead8bc0925456ff497f1dbff2118db7c7c28343f0467c284914a", size = 94862, upload-time = "2025-06-21T01:11:13.551Z" }, +] + [[package]] name = "more-itertools" version = "10.8.0" @@ -2789,7 +2820,7 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, @@ -2800,7 +2831,7 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, @@ -2827,9 +2858,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform != 'win32'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'win32'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, @@ -2840,7 +2871,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'win32'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, @@ -3239,71 +3270,32 @@ wheels = [ [[package]] name = "pillow" -version = "12.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" }, - { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" }, - { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" }, - { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" }, - { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" }, - { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" }, - { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" }, - { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" }, - { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" }, - { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" }, - { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" }, - { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" }, - { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" }, - { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" }, - { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" }, - { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" }, - { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" }, - { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" }, - { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" }, - { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" }, - { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" }, - { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" }, - { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" }, - { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" }, - { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" }, - { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" }, - { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" }, - { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" }, - { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" }, - { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" }, - { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" }, - { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" }, - { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" }, - { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" }, - { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" }, - { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" }, - { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" }, - { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" }, - { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" }, - { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" }, - { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" }, - { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" }, - { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" }, - { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" }, - { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" }, - { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" }, - { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" }, - { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" }, - { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" }, - { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" }, - { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" }, - { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" }, - { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" }, - { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" }, - { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" }, - { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" }, - { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" }, - { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, +version = "10.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/74/ad3d526f3bf7b6d3f408b73fde271ec69dfac8b81341a318ce825f2b3812/pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06", size = 46555059, upload-time = "2024-07-01T09:48:43.583Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/cb/0353013dc30c02a8be34eb91d25e4e4cf594b59e5a55ea1128fde1e5f8ea/pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94", size = 3509350, upload-time = "2024-07-01T09:46:17.177Z" }, + { url = "https://files.pythonhosted.org/packages/e7/cf/5c558a0f247e0bf9cec92bff9b46ae6474dd736f6d906315e60e4075f737/pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597", size = 3374980, upload-time = "2024-07-01T09:46:19.169Z" }, + { url = "https://files.pythonhosted.org/packages/84/48/6e394b86369a4eb68b8a1382c78dc092245af517385c086c5094e3b34428/pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80", size = 4343799, upload-time = "2024-07-01T09:46:21.883Z" }, + { url = "https://files.pythonhosted.org/packages/3b/f3/a8c6c11fa84b59b9df0cd5694492da8c039a24cd159f0f6918690105c3be/pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca", size = 4459973, upload-time = "2024-07-01T09:46:24.321Z" }, + { url = "https://files.pythonhosted.org/packages/7d/1b/c14b4197b80150fb64453585247e6fb2e1d93761fa0fa9cf63b102fde822/pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef", size = 4370054, upload-time = "2024-07-01T09:46:26.825Z" }, + { url = "https://files.pythonhosted.org/packages/55/77/40daddf677897a923d5d33329acd52a2144d54a9644f2a5422c028c6bf2d/pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a", size = 4539484, upload-time = "2024-07-01T09:46:29.355Z" }, + { url = "https://files.pythonhosted.org/packages/40/54/90de3e4256b1207300fb2b1d7168dd912a2fb4b2401e439ba23c2b2cabde/pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b", size = 4477375, upload-time = "2024-07-01T09:46:31.756Z" }, + { url = "https://files.pythonhosted.org/packages/13/24/1bfba52f44193860918ff7c93d03d95e3f8748ca1de3ceaf11157a14cf16/pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9", size = 4608773, upload-time = "2024-07-01T09:46:33.73Z" }, + { url = "https://files.pythonhosted.org/packages/55/04/5e6de6e6120451ec0c24516c41dbaf80cce1b6451f96561235ef2429da2e/pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42", size = 2235690, upload-time = "2024-07-01T09:46:36.587Z" }, + { url = "https://files.pythonhosted.org/packages/74/0a/d4ce3c44bca8635bd29a2eab5aa181b654a734a29b263ca8efe013beea98/pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a", size = 2554951, upload-time = "2024-07-01T09:46:38.777Z" }, + { url = "https://files.pythonhosted.org/packages/b5/ca/184349ee40f2e92439be9b3502ae6cfc43ac4b50bc4fc6b3de7957563894/pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9", size = 2243427, upload-time = "2024-07-01T09:46:43.15Z" }, + { url = "https://files.pythonhosted.org/packages/c3/00/706cebe7c2c12a6318aabe5d354836f54adff7156fd9e1bd6c89f4ba0e98/pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3", size = 3525685, upload-time = "2024-07-01T09:46:45.194Z" }, + { url = "https://files.pythonhosted.org/packages/cf/76/f658cbfa49405e5ecbfb9ba42d07074ad9792031267e782d409fd8fe7c69/pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb", size = 3374883, upload-time = "2024-07-01T09:46:47.331Z" }, + { url = "https://files.pythonhosted.org/packages/46/2b/99c28c4379a85e65378211971c0b430d9c7234b1ec4d59b2668f6299e011/pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70", size = 4339837, upload-time = "2024-07-01T09:46:49.647Z" }, + { url = "https://files.pythonhosted.org/packages/f1/74/b1ec314f624c0c43711fdf0d8076f82d9d802afd58f1d62c2a86878e8615/pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be", size = 4455562, upload-time = "2024-07-01T09:46:51.811Z" }, + { url = "https://files.pythonhosted.org/packages/4a/2a/4b04157cb7b9c74372fa867096a1607e6fedad93a44deeff553ccd307868/pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0", size = 4366761, upload-time = "2024-07-01T09:46:53.961Z" }, + { url = "https://files.pythonhosted.org/packages/ac/7b/8f1d815c1a6a268fe90481232c98dd0e5fa8c75e341a75f060037bd5ceae/pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc", size = 4536767, upload-time = "2024-07-01T09:46:56.664Z" }, + { url = "https://files.pythonhosted.org/packages/e5/77/05fa64d1f45d12c22c314e7b97398ffb28ef2813a485465017b7978b3ce7/pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a", size = 4477989, upload-time = "2024-07-01T09:46:58.977Z" }, + { url = "https://files.pythonhosted.org/packages/12/63/b0397cfc2caae05c3fb2f4ed1b4fc4fc878f0243510a7a6034ca59726494/pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309", size = 4610255, upload-time = "2024-07-01T09:47:01.189Z" }, + { url = "https://files.pythonhosted.org/packages/7b/f9/cfaa5082ca9bc4a6de66ffe1c12c2d90bf09c309a5f52b27759a596900e7/pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060", size = 2235603, upload-time = "2024-07-01T09:47:03.918Z" }, + { url = "https://files.pythonhosted.org/packages/01/6a/30ff0eef6e0c0e71e55ded56a38d4859bf9d3634a94a88743897b5f96936/pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea", size = 2554972, upload-time = "2024-07-01T09:47:06.152Z" }, + { url = "https://files.pythonhosted.org/packages/48/2c/2e0a52890f269435eee38b21c8218e102c621fe8d8df8b9dd06fabf879ba/pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d", size = 2243375, upload-time = "2024-07-01T09:47:09.065Z" }, ] [[package]] @@ -5347,7 +5339,7 @@ requires-dist = [ { name = "mcp", specifier = ">=1.16.0" }, { name = "mypy", marker = "extra == 'dev'" }, { name = "numpy", specifier = ">=1.24.0" }, - { name = "pillow", specifier = ">=11.3.0" }, + { name = "pillow", specifier = ">=10.4.0" }, { name = "pytest", marker = "extra == 'dev'" }, { name = "python-dotenv", specifier = ">=1.1.1" }, { name = "ruff", marker = "extra == 'dev'" }, @@ -5660,6 +5652,44 @@ dev = [ { name = "pytest-asyncio", specifier = ">=1.0.0" }, ] +[[package]] +name = "vision-agents-plugins-moondream" +source = { editable = "plugins/moondream" } +dependencies = [ + { name = "accelerate" }, + { name = "moondream" }, + { name = "numpy" }, + { name = "opencv-python" }, + { name = "pillow" }, + { name = "torch" }, + { name = "transformers" }, + { name = "vision-agents" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, +] + +[package.metadata] +requires-dist = [ + { name = "accelerate", specifier = ">=0.20.0" }, + { name = "moondream", specifier = ">=0.1.1" }, + { name = "numpy", specifier = ">=2.0.0" }, + { name = "opencv-python", specifier = ">=4.8.0" }, + { name = "pillow", specifier = ">=10.4.0" }, + { name = "torch", specifier = ">=2.0.0" }, + { name = "transformers", specifier = ">=4.40.0" }, + { name = "vision-agents", editable = "agents-core" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=8.4.1" }, + { name = "pytest-asyncio", specifier = ">=1.0.0" }, +] + [[package]] name = "vision-agents-plugins-moonshine" version = "0.1.11"