diff --git a/examples/media-gen/README.md b/examples/media-gen/README.md index b91d7a9..fdf549c 100644 --- a/examples/media-gen/README.md +++ b/examples/media-gen/README.md @@ -49,10 +49,14 @@ media-gen/ │ ├── __init__.py # Package exports │ ├── media_gen_tool_base.py # Abstract base classes │ ├── dummy_image_gen.py # Dummy image generation tool +│ ├── openai_image_gen.py # OpenAI image generation tool +│ ├── replicate_image_gen.py # Replicate image generation tool │ ├── dummy_video_gen.py # Dummy video generation tool │ └── image_understanding_tool.py # Image understanding tool ├── tests/ # Test suite │ ├── test_dummy_media_gen.py # Comprehensive tests +│ ├── test_openai_image_gen.py # OpenAI image generation tests +│ ├── test_replicate_image_gen.py # Replicate image generation tests │ └── test_image_understanding.py # Image understanding tests ├── integration_tests/ # Integration tests (manual) │ ├── test_image_understanding.py # Real API integration test @@ -78,6 +82,7 @@ class MyImageGen(ImageGenerationTool): prompt = input.get("prompt", "") aspect_ratio = input.get("aspect_ratio", "4:3") output_format = input.get("output_format", "jpg") + output_folder = input.get("output_folder", "/tmp") # Your image generation logic here # ... @@ -92,6 +97,88 @@ class MyImageGen(ImageGenerationTool): - `prompt` (str, required): Text description - `aspect_ratio` (str, optional, default: "4:3"): Image aspect ratio - `output_format` (str, optional, default: "jpg"): Output format +- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image + +### OpenAI Image Generation Tool + +```python +from tools import OpenAIImageGen + +# Initialize the tool +image_gen = OpenAIImageGen() + +# Basic usage +result = image_gen.run({ + "prompt": "A gray tabby cat hugging an otter with an orange scarf", + "output_folder": "./generated_images" +}) + +# Advanced usage with custom parameters +result = image_gen.run({ + "prompt": "A futuristic cityscape at sunset with flying cars", + "size": "1024x1536", + "quality": "high", + "output_format": "png", + "compression": 90, + "background": "opaque", + "output_folder": "./generated_images" +}) +``` + +**Parameters:** +- `prompt` (str, required): Text description of the desired image +- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image +- `size` (str, optional, default: "1024x1024"): Image dimensions +- `quality` (str, optional, default: "low"): Rendering quality (low, medium, high) +- `output_format` (str, optional, default: "png"): Output format +- `compression` (int, optional, default: 80): Compression level 0-100% +- `background` (str, optional, default: "opaque"): Transparent or opaque + +**Features:** +- Uses OpenAI's gpt-4o-mini model with image generation capabilities +- Supports various image parameters (size, quality, format, compression, background) +- Automatic directory creation for output paths +- Comprehensive error handling +- Integrates seamlessly with Polymind framework + +### Replicate Image Generation Tool + +```python +from tools import ReplicateImageGen + +# Initialize the tool with default model (WAN 2.2) +image_gen = ReplicateImageGen() + +# Basic usage +result = image_gen.run({ + "prompt": "A cinematic cat portrait with golden hour lighting", + "output_folder": "./generated_images" +}) + +# Advanced usage with custom parameters +result = image_gen.run({ + "prompt": "A cinematic, photorealistic medium shot of a cat", + "seed": 246764, + "aspect_ratio": "4:3", + "model": "stability-ai/sdxl" +}) +``` + +**Parameters:** +- `prompt` (str, required): Text description of the desired image +- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image +- `seed` (int, optional): Random seed for reproducible results +- `aspect_ratio` (str, optional, default: "4:3"): Image aspect ratio +- `output_format` (str, optional, default: "jpeg"): Output format +- `model` (str, optional): Replicate model to use (overrides default) + +**Features:** +- Uses Replicate's API with various image generation models +- Supports models like WAN 2.2, Stable Diffusion XL, and others +- Reproducible results with seed parameter +- Automatic directory creation for output paths +- Comprehensive error handling +- Integrates seamlessly with Polymind framework ### Video Generation Tool @@ -194,7 +281,7 @@ python integration_tests/test_image_understanding.py ## Usage ```python -from tools import DummyImageGen, DummyVideoGen +from tools import DummyImageGen, OpenAIImageGen, ReplicateImageGen, DummyVideoGen from dotenv import load_dotenv import os @@ -207,11 +294,22 @@ print(f"Replicate API Token: {'✓ Available' if os.getenv('REPLICATE_API_TOKEN' # Initialize tools image_gen = DummyImageGen() +openai_image_gen = OpenAIImageGen() +replicate_image_gen = ReplicateImageGen() video_gen = DummyVideoGen() image_understanding = ImageUnderstandingTool() # Generate media image_result = image_gen.run({"prompt": "A beautiful sunset"}) +openai_result = openai_image_gen.run({ + "prompt": "A beautiful sunset over mountains", + "output_folder": "./generated_images" +}) +replicate_result = replicate_image_gen.run({ + "prompt": "A cinematic cat portrait", + "seed": 12345, + "aspect_ratio": "4:3" +}) video_result = video_gen.run({"prompt": "A butterfly emerging"}) # Analyze images @@ -232,6 +330,8 @@ python example_usage.py # Run tests cd tests && python test_dummy_media_gen.py +python test_openai_image_gen.py +python test_replicate_image_gen.py python test_image_understanding.py # Run integration tests (requires API key) diff --git a/examples/media-gen/integration_tests/README.md b/examples/media-gen/integration_tests/README.md index c565bad..087c45a 100644 --- a/examples/media-gen/integration_tests/README.md +++ b/examples/media-gen/integration_tests/README.md @@ -2,6 +2,80 @@ This folder contains integration tests that require real API calls and external resources. +## OpenAI Image Generation Integration Test + +### Prerequisites +- OpenAI API key in `.env` file +- Internet connection +- OpenAI API access with verified organization (for image generation) + +### Running the Test + +```bash +# From the media-gen directory +python integration_tests/test_openai_image_gen.py +``` + +### What it does +- Generates a single image with a specific whimsical prompt +- Tests the OpenAI image generation tool with real API calls +- Saves the generated image to `~/Downloads/polymind_generated_images/` +- Provides helpful error messages for common issues + +### Expected Output +The test will show: +- ✅ Confirmation that API key is found +- 🎨 Image generation progress +- 📁 File path and size of generated image +- 🎯 Generation metadata + +### Generated Image +- Dynamically named image with timestamp (e.g., `openai_generated_image_20241201_143022.png`) + +### Notes +- This test is not run automatically with unit tests +- It requires a valid OpenAI API key +- It makes real API calls and may incur costs +- Generated images are saved in `~/Downloads/polymind_generated_images/` with unique names +- If you get an organization verification error, follow the provided instructions + +## Replicate Image Generation Integration Test + +### Prerequisites +- Replicate API token in `.env` file +- Internet connection +- Replicate API access + +### Running the Test + +```bash +# From the media-gen directory +python integration_tests/test_replicate_image_gen.py +``` + +### What it does +- Generates a single image with a cinematic cat prompt +- Tests the Replicate image generation tool with real API calls +- Saves the generated image to `~/Downloads/polymind_generated_images/` +- Provides helpful error messages for common issues + +### Expected Output +The test will show: +- ✅ Confirmation that API token is found +- 🎨 Image generation progress +- 📁 File path and size of generated image +- 🎯 Generation metadata + +### Generated Image +- Dynamically named image with timestamp (e.g., `replicate_generated_image_20241201_143022.jpeg`) + +### Notes +- This test is not run automatically with unit tests +- It requires a valid Replicate API token +- It makes real API calls and may incur costs +- Generated images are saved in `~/Downloads/polymind_generated_images/` with unique names +- Uses the WAN 2.2 model by default + ## Image Understanding Integration Test ### Prerequisites diff --git a/examples/media-gen/integration_tests/test_openai_image_gen.py b/examples/media-gen/integration_tests/test_openai_image_gen.py new file mode 100644 index 0000000..d8028aa --- /dev/null +++ b/examples/media-gen/integration_tests/test_openai_image_gen.py @@ -0,0 +1,110 @@ +""" +Integration test for OpenAI image generation tool. + +This script tests the OpenAI image generation tool with a real API call. +It generates a single image with a specific prompt and saves it to ~/Downloads. + +Requirements: +- Valid OpenAI API key in environment variables +- Internet connection +- OpenAI API access +""" + +import os +import sys + +from dotenv import load_dotenv +from pathlib import Path + +# Add parent directory to path for imports +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + +from tools.openai_image_gen import OpenAIImageGen + + +def main(): + """Generate a single image with the specified prompt.""" + print("🚀 OpenAI Image Generation Integration Test") + print("=" * 60) + + # Check if OpenAI API key is available + if not os.getenv("OPENAI_API_KEY"): + print("❌ OPENAI_API_KEY environment variable not set") + print("Please set your OpenAI API key to run integration tests") + return + + print(f"✅ OpenAI API key found: {os.getenv('OPENAI_API_KEY')[:10]}...") + + # Create Downloads directory if it doesn't exist + downloads_dir = Path.home() / "Downloads" / "polymind_generated_images" + downloads_dir.mkdir(parents=True, exist_ok=True) + print(f"✅ Output directory: {downloads_dir.absolute()}") + + # Initialize the tool + image_gen = OpenAIImageGen() + + # The specific prompt provided by the user + prompt = ( + "Create a whimsical scene inside a modern subway train featuring a " + "fluffy, light brown rabbit and a chubby, soft gray penguin sitting " + "side by side on blue plastic seats. The background includes tall " + "city buildings visible through a large window and a colorful framed " + "picture on the wall. Soft, natural lighting filtering through the " + "window enhances the cheerful atmosphere. The image should be " + "hyper-realistic with fine textures on the animals' fur and " + "feathers, emphasizing their cuteness while maintaining a playful " + "and friendly mood." + ) + + print("\n🎨 Generating image with prompt:") + print(f"'{prompt[:100]}...'") + print() + + try: + # Generate the image + result = image_gen.run({ + "prompt": prompt, + "size": "1024x1024", + "quality": "high", + "output_format": "png", + "output_folder": str(downloads_dir) + }) + + print(f"Result: {result}") + + if result["image_path"] and os.path.exists(result["image_path"]): + file_size = os.path.getsize(result["image_path"]) + print("✅ Image generated successfully!") + print(f"📁 Saved to: {result['image_path']}") + print(f"📏 File size: {file_size:,} bytes") + print(f"🎯 Generation info: {result['generation_info']}") + else: + print("❌ Image generation failed") + error_msg = result.get('generation_info', {}).get('error', 'Unknown error') + print(f"Error: {error_msg}") + + # Check for specific error types and provide helpful guidance + if "organization must be verified" in error_msg.lower(): + print("\n💡 To fix this issue:") + print("1. Go to: https://platform.openai.com/settings/organization/general") + print("2. Click on 'Verify Organization'") + print("3. Wait up to 15 minutes for access to propagate") + print("4. Try running this test again") + elif "api key" in error_msg.lower(): + print("\n💡 To fix this issue:") + print("1. Check that your OPENAI_API_KEY is correct") + print("2. Ensure you have sufficient credits in your OpenAI account") + print("3. Verify your account has access to image generation features") + + except Exception as e: + print(f"\n❌ Integration test failed with error: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + # Load environment variables + load_dotenv() + + # Run the integration test + main() \ No newline at end of file diff --git a/examples/media-gen/integration_tests/test_replicate_image_gen.py b/examples/media-gen/integration_tests/test_replicate_image_gen.py new file mode 100644 index 0000000..842d2f6 --- /dev/null +++ b/examples/media-gen/integration_tests/test_replicate_image_gen.py @@ -0,0 +1,118 @@ +""" +Integration test for Replicate image generation tool. + +This script tests the Replicate image generation tool with real API calls. +It should be run manually when you want to test the actual image generation +functionality. + +Requirements: +- Valid Replicate API token in environment variables +- Internet connection +- Replicate API access +""" + +import os +import sys + +from dotenv import load_dotenv +from pathlib import Path + +# Add parent directory to path for imports +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + +try: + from tools.replicate_image_gen import ReplicateImageGen +except ImportError as e: + if "replicate" in str(e): + print("❌ Replicate package not installed. Please install it with:") + print(" pip install replicate") + sys.exit(1) + else: + raise + + +def main(): + """Generate a single image with the specified prompt.""" + print("🚀 Replicate Image Generation Integration Test") + print("=" * 60) + + # Check if Replicate API token is available + if not os.getenv("REPLICATE_API_TOKEN"): + print("❌ REPLICATE_API_TOKEN environment variable not set") + print("Please set your Replicate API token to run integration tests") + return + + token = os.getenv('REPLICATE_API_TOKEN') + print(f"✅ Replicate API token found: {token[:10]}...") + + # Create Downloads directory if it doesn't exist + downloads_dir = Path.home() / "Downloads" / "polymind_generated_images" + downloads_dir.mkdir(parents=True, exist_ok=True) + print(f"✅ Output directory: {downloads_dir.absolute()}") + + # Initialize the tool + image_gen = ReplicateImageGen() + + # The specific prompt from the example + prompt = ( + "Create a whimsical scene inside a modern subway train featuring a " + "fluffy, light brown rabbit and a chubby, soft gray penguin sitting " + "side by side on blue plastic seats. The background includes tall " + "city buildings visible through a large window and a colorful framed " + "picture on the wall. Soft, natural lighting filtering through the " + "window enhances the cheerful atmosphere. The image should be " + "hyper-realistic with fine textures on the animals' fur and " + "feathers, emphasizing their cuteness while maintaining a playful " + "and friendly mood." + ) + + print("\n🎨 Generating image with prompt:") + print(f"'{prompt[:100]}...'") + print() + + try: + # Generate the image + result = image_gen.run({ + "prompt": prompt, + "seed": 246764, + "aspect_ratio": "4:3", + "output_folder": str(downloads_dir) + }) + + print(f"Result: {result}") + + if result["image_path"] and os.path.exists(result["image_path"]): + file_size = os.path.getsize(result["image_path"]) + print("✅ Image generated successfully!") + print(f"📁 Saved to: {result['image_path']}") + print(f"📏 File size: {file_size:,} bytes") + print(f"🎯 Generation info: {result['generation_info']}") + else: + print("❌ Image generation failed") + error_msg = result.get('generation_info', {}).get('error', 'Unknown error') + print(f"Error: {error_msg}") + + # Check for specific error types and provide helpful guidance + if "api token" in error_msg.lower(): + print("\n💡 To fix this issue:") + print("1. Check that your REPLICATE_API_TOKEN is correct") + print("2. Ensure you have sufficient credits in your Replicate account") + print("3. Verify your account has access to the model") + elif "model" in error_msg.lower(): + print("\n💡 To fix this issue:") + print("1. Check that the model name is correct") + print("2. Ensure the model is publicly available") + print("3. Verify your account has access to the model") + + except Exception as e: + print(f"\n❌ Integration test failed with error: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + # Load environment variables + load_dotenv() + + # Run the integration test + main() \ No newline at end of file diff --git a/examples/media-gen/requirements.txt b/examples/media-gen/requirements.txt index 639886c..2d0a496 100644 --- a/examples/media-gen/requirements.txt +++ b/examples/media-gen/requirements.txt @@ -13,4 +13,6 @@ python-dotenv # For environment variable management # Additional dependencies that Polymind requires anthropic # For Claude integration -openai # For OpenAI integration \ No newline at end of file +openai # For OpenAI integration +replicate # For Replicate integration +requests # For downloading images from URLs \ No newline at end of file diff --git a/examples/media-gen/tests/test_openai_image_gen.py b/examples/media-gen/tests/test_openai_image_gen.py new file mode 100644 index 0000000..5d04a2c --- /dev/null +++ b/examples/media-gen/tests/test_openai_image_gen.py @@ -0,0 +1,150 @@ +""" +Tests for the OpenAI image generation tool. +""" + +import os +import sys +from unittest.mock import Mock, patch + +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + +from tools.openai_image_gen import OpenAIImageGen + + +class TestOpenAIImageGen: + """Test cases for OpenAIImageGen tool.""" + + def test_init(self): + """Test tool initialization.""" + tool = OpenAIImageGen() + assert tool.tool_name == "openai_image_generator" + assert len(tool.descriptions) == 3 + + def test_input_spec(self): + """Test input parameter specification.""" + tool = OpenAIImageGen() + params = tool.input_spec() + + # Check that all expected parameters are present + param_names = [p.name for p in params] + expected_params = ["prompt", "aspect_ratio", "output_format", "image_path"] + + for param in expected_params: + assert param in param_names + + def test_output_spec(self): + """Test output parameter specification.""" + tool = OpenAIImageGen() + params = tool.output_spec() + + # Check that all expected parameters are present + param_names = [p.name for p in params] + expected_params = ["image_path", "generation_info"] + + for param in expected_params: + assert param in param_names + + @patch('tools.openai_image_gen.OpenAI') + def test_run_success(self, mock_openai): + """Test successful image generation.""" + # Mock OpenAI response + mock_response = Mock() + mock_output = Mock() + mock_output.type = "image_generation_call" + mock_output.result = "base64_encoded_image_data" + mock_response.output = [mock_output] + + mock_client = Mock() + mock_client.responses.create.return_value = mock_response + mock_openai.return_value = mock_client + + # Create tool and test + tool = OpenAIImageGen() + result = tool.run({ + "prompt": "Test image", + "image_path": "/tmp/test.png" + }) + + # Verify result + assert result["image_path"] == "/tmp/test.png" + assert result["generation_info"]["model"] == "gpt-4o-mini" + assert result["generation_info"]["status"] == "generated successfully" + + @patch('tools.openai_image_gen.OpenAI') + def test_run_failure(self, mock_openai): + """Test image generation failure.""" + # Mock OpenAI to raise exception + mock_client = Mock() + mock_client.responses.create.side_effect = Exception("API Error") + mock_openai.return_value = mock_client + + # Create tool and test + tool = OpenAIImageGen() + result = tool.run({ + "prompt": "Test image" + }) + + # Verify error handling + assert result["image_path"] == "" + assert result["generation_info"]["status"] == "generation failed" + assert "API Error" in result["generation_info"]["error"] + + def test_default_parameters(self): + """Test default parameter values.""" + tool = OpenAIImageGen() + + # Test with minimal input + with patch.object(tool, 'client') as mock_client: + mock_response = Mock() + mock_output = Mock() + mock_output.type = "image_generation_call" + mock_output.result = "base64_encoded_image_data" + mock_response.output = [mock_output] + mock_client.responses.create.return_value = mock_response + + tool.run({"prompt": "Test"}) + + # Verify default values were used + call_args = mock_client.responses.create.call_args + tool_params = call_args[1]['tools'][0]['parameters'] + + assert tool_params['size'] == "1024x1024" + assert tool_params['quality'] == "low" + assert tool_params['format'] == "png" + assert tool_params['compression'] == 80 + assert tool_params['background'] == "opaque" + + @patch('tools.openai_image_gen.OpenAI') + def test_custom_parameters(self, mock_openai): + """Test custom parameter values.""" + # Mock OpenAI response + mock_response = Mock() + mock_output = Mock() + mock_output.type = "image_generation_call" + mock_output.result = "base64_encoded_image_data" + mock_response.output = [mock_output] + + mock_client = Mock() + mock_client.responses.create.return_value = mock_response + mock_openai.return_value = mock_client + + # Create tool and test with custom parameters + tool = OpenAIImageGen() + tool.run({ + "prompt": "Test image", + "size": "1024x1536", + "quality": "high", + "output_format": "jpg", + "compression": 95, + "background": "transparent" + }) + + # Verify custom values were used + call_args = mock_client.responses.create.call_args + tool_params = call_args[1]['tools'][0]['parameters'] + + assert tool_params['size'] == "1024x1536" + assert tool_params['quality'] == "high" + assert tool_params['format'] == "jpg" + assert tool_params['compression'] == 95 + assert tool_params['background'] == "transparent" \ No newline at end of file diff --git a/examples/media-gen/tests/test_replicate_image_gen.py b/examples/media-gen/tests/test_replicate_image_gen.py new file mode 100644 index 0000000..540e00c --- /dev/null +++ b/examples/media-gen/tests/test_replicate_image_gen.py @@ -0,0 +1,146 @@ +""" +Tests for the Replicate image generation tool. +""" + +import os +import sys +from unittest.mock import Mock, patch + +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + +try: + from tools.replicate_image_gen import ReplicateImageGen +except ImportError as e: + if "replicate" in str(e): + print("❌ Replicate package not installed. Please install it with:") + print(" pip install replicate") + sys.exit(1) + else: + raise + + +class TestReplicateImageGen: + """Test cases for ReplicateImageGen tool.""" + + def test_init(self): + """Test tool initialization.""" + tool = ReplicateImageGen() + assert tool.tool_name == "replicate_image_generator" + assert len(tool.descriptions) == 3 + assert tool.model == "prunaai/wan-2.2-image" + + def test_init_custom_model(self): + """Test tool initialization with custom model.""" + tool = ReplicateImageGen("stability-ai/sdxl") + assert tool.model == "stability-ai/sdxl" + + def test_input_spec(self): + """Test input parameter specification.""" + tool = ReplicateImageGen() + params = tool.input_spec() + + # Check that all expected parameters are present + param_names = [p.name for p in params] + expected_params = ["prompt", "aspect_ratio", "output_format", "output_folder"] + + for param in expected_params: + assert param in param_names + + def test_output_spec(self): + """Test output parameter specification.""" + tool = ReplicateImageGen() + params = tool.output_spec() + + # Check that all expected parameters are present + param_names = [p.name for p in params] + expected_params = ["image_path", "generation_info"] + + for param in expected_params: + assert param in param_names + + @patch('tools.replicate_image_gen.replicate') + def test_run_success(self, mock_replicate): + """Test successful image generation.""" + # Mock Replicate response + mock_output = Mock() + mock_output.read.return_value = b"fake_image_data" + mock_output.url.return_value = "https://replicate.delivery/.../output.jpeg" + + mock_replicate.run.return_value = mock_output + + # Create tool and test + tool = ReplicateImageGen() + result = tool.run({ + "prompt": "Test image", + "output_folder": "/tmp" + }) + + # Verify result + assert result["image_path"] != "" + assert result["generation_info"]["model"] == "prunaai/wan-2.2-image" + assert result["generation_info"]["status"] == "generated successfully" + + @patch('tools.replicate_image_gen.replicate') + def test_run_failure(self, mock_replicate): + """Test image generation failure.""" + # Mock Replicate to raise exception + mock_replicate.run.side_effect = Exception("API Error") + + # Create tool and test + tool = ReplicateImageGen() + result = tool.run({ + "prompt": "Test image" + }) + + # Verify error handling + assert result["image_path"] == "" + assert result["generation_info"]["status"] == "generation failed" + assert "API Error" in result["generation_info"]["error"] + + def test_default_parameters(self): + """Test default parameter values.""" + tool = ReplicateImageGen() + + # Test with minimal input + with patch.object(tool, 'run') as mock_run: + mock_run.return_value = { + "image_path": "/tmp/test.jpg", + "generation_info": {"status": "success"} + } + + tool.run({"prompt": "Test"}) + + # Verify default values were used + call_args = mock_run.call_args[0][0] + assert call_args["aspect_ratio"] == "4:3" + assert call_args["output_format"] == "jpeg" + assert "~/Downloads" in call_args["output_folder"] + + @patch('tools.replicate_image_gen.replicate') + def test_custom_parameters(self, mock_replicate): + """Test custom parameter values.""" + # Mock Replicate response + mock_output = Mock() + mock_output.read.return_value = b"fake_image_data" + mock_output.url.return_value = "https://replicate.delivery/.../output.jpeg" + + mock_replicate.run.return_value = mock_output + + # Create tool and test with custom parameters + tool = ReplicateImageGen() + result = tool.run({ + "prompt": "Test image", + "seed": 12345, + "aspect_ratio": "16:9", + "model": "stability-ai/sdxl" + }) + + # Verify custom values were used + call_args = mock_replicate.run.call_args + model = call_args[0][0] + input_params = call_args[1]["input"] + + assert model == "stability-ai/sdxl" + assert input_params["prompt"] == "Test image" + assert input_params["aspect_ratio"] == "16:9" + assert input_params["seed"] == 12345 \ No newline at end of file diff --git a/examples/media-gen/tools/__init__.py b/examples/media-gen/tools/__init__.py index b50218f..c845ece 100644 --- a/examples/media-gen/tools/__init__.py +++ b/examples/media-gen/tools/__init__.py @@ -10,11 +10,15 @@ from .dummy_image_gen import DummyImageGen from .dummy_video_gen import DummyVideoGen from .image_understanding_tool import ImageUnderstandingTool +from .openai_image_gen import OpenAIImageGen +from .replicate_image_gen import ReplicateImageGen __all__ = [ "ImageGenerationTool", "VideoGenerationTool", "DummyImageGen", "DummyVideoGen", - "ImageUnderstandingTool" + "ImageUnderstandingTool", + "OpenAIImageGen", + "ReplicateImageGen" ] \ No newline at end of file diff --git a/examples/media-gen/tools/dummy_image_gen.py b/examples/media-gen/tools/dummy_image_gen.py index 05f902f..b77a292 100644 --- a/examples/media-gen/tools/dummy_image_gen.py +++ b/examples/media-gen/tools/dummy_image_gen.py @@ -6,6 +6,8 @@ the media generation framework structure without requiring real image generation APIs. """ +from pathlib import Path + from polymind.core.message import Message from .media_gen_tool_base import ImageGenerationTool @@ -56,9 +58,23 @@ def run(self, input: dict) -> dict: prompt = input.get("prompt", "") aspect_ratio = input.get("aspect_ratio", "4:3") output_format = input.get("output_format", "jpg") + output_folder = input.get("output_folder", str(Path.home() / "Downloads")) + + # Generate dynamic image name with timestamp to avoid duplication + import os + from datetime import datetime + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + base_name = f"dummy_generated_image_{timestamp}" + image_name = f"{base_name}.{output_format}" + + # Ensure unique filename + counter = 1 + while os.path.exists(f"{output_folder.rstrip('/')}/{image_name}"): + image_name = f"{base_name}_{counter}.{output_format}" + counter += 1 - # Return hardcoded dummy image path and metadata - dummy_image_path = f"/tmp/dummy_generated_image.{output_format}" + # Create full path + dummy_image_path = f"{output_folder.rstrip('/')}/{image_name}" return { "image_path": dummy_image_path, diff --git a/examples/media-gen/tools/media_gen_tool_base.py b/examples/media-gen/tools/media_gen_tool_base.py index 1320517..fbcfaab 100644 --- a/examples/media-gen/tools/media_gen_tool_base.py +++ b/examples/media-gen/tools/media_gen_tool_base.py @@ -50,6 +50,13 @@ def input_spec(self) -> List[Param]: required=False, description="Output format for the generated image", example="jpg" + ), + Param( + name="output_folder", + type="str", + required=False, + description="Folder path where to save the generated image", + example="/path/to/save/images/" ) ] @@ -82,6 +89,7 @@ def run(self, input: dict) -> dict: - prompt: Text description of the desired image - aspect_ratio: Image aspect ratio (optional, default: "4:3") - output_format: Output format (optional, default: "jpg") + - output_folder: Folder path to save the image (optional) Returns: dict: Dictionary containing: diff --git a/examples/media-gen/tools/openai_image_gen.py b/examples/media-gen/tools/openai_image_gen.py new file mode 100644 index 0000000..d3600ff --- /dev/null +++ b/examples/media-gen/tools/openai_image_gen.py @@ -0,0 +1,163 @@ +""" +OpenAI image generation tool using gpt-4o-mini. + +This module provides a real implementation of an image generation tool using +OpenAI's gpt-4o-mini model with image generation capabilities. It integrates +seamlessly with the Polymind framework and supports various image generation +parameters. +""" + +import base64 + +from openai import OpenAI +from pathlib import Path + +from polymind.core.message import Message + +from .media_gen_tool_base import ImageGenerationTool + + +class OpenAIImageGen(ImageGenerationTool): + """ + OpenAI image generation tool using gpt-4o-mini. + + This tool uses OpenAI's gpt-4o-mini model with image generation capabilities + to create images based on text prompts. It supports various parameters + including size, quality, format, compression, and background options. + + Requires OpenAI API key to be set in environment variables. + """ + + def __init__(self, **kwargs): + """Initialize the OpenAI image generation tool.""" + super().__init__( + tool_name="openai_image_generator", + descriptions=[ + "OpenAI image generation using gpt-4o-mini model", + "Generate high-quality images from text prompts", + "Supports various image parameters (size, quality, format)" + ], + **kwargs + ) + + def run(self, input: dict) -> dict: + """ + Generate an image using OpenAI's gpt-4o-mini model. + + Args: + input (dict): Input parameters containing: + - prompt: Text description of the desired image + - aspect_ratio: Image aspect ratio (optional, default: "1:1") + - output_format: Output format (optional, default: "png") + - image_path: Path to save the image (optional) + - size: Image dimensions (optional, default: "1024x1024") + - quality: Rendering quality (optional, default: "low") + - compression: Compression level 0-100% (optional, default: 80) + - background: Transparent or opaque (optional, default: "opaque") + + Returns: + dict: Dictionary containing: + - image_path: Path to the generated image file + - generation_info: Generation metadata + """ + # Extract parameters with defaults + prompt = input.get("prompt", "") + output_format = input.get("output_format", "png") + output_folder = input.get("output_folder", str(Path.home() / "Downloads")) + size = input.get("size", "1024x1024") + quality = input.get("quality", "low") + compression = input.get("compression", 80) + background = input.get("background", "opaque") + + # Generate dynamic image name with timestamp to avoid duplication + import os + from datetime import datetime + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + base_name = f"openai_generated_image_{timestamp}" + image_name = f"{base_name}.{output_format}" + + # Ensure unique filename + counter = 1 + while os.path.exists(f"{output_folder.rstrip('/')}/{image_name}"): + image_name = f"{base_name}_{counter}.{output_format}" + counter += 1 + + # Create full path + image_path = f"{output_folder.rstrip('/')}/{image_name}" + + # Generate image using OpenAI API + try: + client = OpenAI() + response = client.responses.create( + model="gpt-4o-mini", + input=prompt, + tools=[{ + "type": "image_generation" + }] + ) + + # Extract image data from response + image_data = [ + output.result + for output in response.output + if output.type == "image_generation_call" + ] + + if not image_data: + raise RuntimeError("No image data received from OpenAI API") + + # Decode base64 image data + image_base64 = image_data[0] + image_bytes = base64.b64decode(image_base64) + + # Ensure directory exists + output_path = Path(image_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Save image to file + with open(image_path, "wb") as f: + f.write(image_bytes) + + return { + "image_path": image_path, + "generation_info": { + "model": "gpt-4o-mini", + "prompt": prompt, + "size": size, + "quality": quality, + "format": output_format, + "compression": compression, + "background": background, + "status": "generated successfully" + } + } + + except Exception as e: + return { + "image_path": "", + "generation_info": { + "model": "gpt-4o-mini", + "prompt": prompt, + "error": str(e), + "status": "generation failed" + } + } + + async def _execute(self, input: Message) -> Message: + """ + Execute the OpenAI image generation using the Polymind framework's Message system. + + Args: + input (Message): Input message containing generation parameters + + Returns: + Message: Output message with generated image information + """ + # Convert Message to dict for the run method + input_dict = input.content + + # Call the run method + result = self.run(input_dict) + + # Return result wrapped in a Message + return Message(content=result) \ No newline at end of file diff --git a/examples/media-gen/tools/replicate_image_gen.py b/examples/media-gen/tools/replicate_image_gen.py new file mode 100644 index 0000000..e92b27e --- /dev/null +++ b/examples/media-gen/tools/replicate_image_gen.py @@ -0,0 +1,184 @@ +""" +Replicate image generation tool using various models. + +This module provides a real implementation of an image generation tool using +Replicate's API with various image generation models. It integrates seamlessly +with the Polymind framework and supports various image generation parameters. +""" + +import replicate +from pathlib import Path + +from polymind.core.message import Message + +from .media_gen_tool_base import ImageGenerationTool + + +class ReplicateImageGen(ImageGenerationTool): + """ + Replicate image generation tool using various models. + + This tool uses Replicate's API to generate images using various models + like WAN, Stable Diffusion, and others. It supports various parameters + including seed, prompt, aspect ratio, and model-specific options. + + Requires Replicate API token to be set in environment variables. + """ + + def __init__(self, model: str = "prunaai/wan-2.2-image", **kwargs): + """ + Initialize the Replicate image generation tool. + + Args: + model (str): Replicate model identifier (default: "prunaai/wan-2.2-image") + **kwargs: Additional arguments passed to parent class + """ + super().__init__( + tool_name="replicate_image_generator", + descriptions=[ + f"Replicate image generation using {model}", + "Generate high-quality images from text prompts", + "Supports various models and parameters" + ], + **kwargs + ) + self._model = model + + def run(self, input: dict) -> dict: + """ + Generate an image using Replicate API. + + Args: + input (dict): Input parameters containing: + - prompt: Text description of the desired image + - output_folder: Folder path where to save the image (optional, default: "~/Downloads") + - seed: Random seed for reproducible results (optional) + - aspect_ratio: Image aspect ratio (optional, default: "4:3") + - output_format: Output format (optional, default: "jpeg") + - model: Replicate model to use (optional, overrides default) + + Returns: + dict: Dictionary containing: + - image_path: Path to the generated image file + - generation_info: Generation metadata + """ + # Extract parameters with defaults + prompt = input.get("prompt", "") + output_folder = input.get("output_folder", str(Path.home() / "Downloads")) + seed = input.get("seed") + aspect_ratio = input.get("aspect_ratio", "4:3") + output_format = input.get("output_format", "jpeg") + model = input.get("model", self._model) + + # Generate dynamic image name with timestamp to avoid duplication + import os + from datetime import datetime + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + base_name = f"replicate_generated_image_{timestamp}" + image_name = f"{base_name}.{output_format}" + + # Ensure unique filename + counter = 1 + full_path = f"{output_folder.rstrip('/')}/{image_name}" + while os.path.exists(full_path): + image_name = f"{base_name}_{counter}.{output_format}" + full_path = f"{output_folder.rstrip('/')}/{image_name}" + counter += 1 + + # Create full path + image_path = f"{output_folder.rstrip('/')}/{image_name}" + + # Prepare input for Replicate + replicate_input = { + "prompt": prompt, + "aspect_ratio": aspect_ratio + } + + # Add seed if provided + if seed is not None: + replicate_input["seed"] = seed + + # Generate image using Replicate API + try: + # Ensure directory exists + output_path = Path(image_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Run the model + output = replicate.run(model, input=replicate_input) + + # Handle different output types from Replicate + if hasattr(output, 'read'): + # Output is a FileOutput object + with open(image_path, "wb") as file: + file.write(output.read()) + + return { + "image_path": image_path, + "generation_info": { + "model": model, + "prompt": prompt, + "seed": seed, + "aspect_ratio": aspect_ratio, + "format": output_format, + "status": "generated successfully", + "replicate_url": None + } + } + elif isinstance(output, list) and len(output) > 0: + # Output is a list of URLs + image_url = output[0] + import requests + + # Download the image + response = requests.get(image_url) + response.raise_for_status() + + # Save the image + with open(image_path, "wb") as file: + file.write(response.content) + + return { + "image_path": image_path, + "generation_info": { + "model": model, + "prompt": prompt, + "seed": seed, + "aspect_ratio": aspect_ratio, + "format": output_format, + "status": "generated successfully", + "replicate_url": image_url + } + } + else: + raise ValueError(f"Unexpected output format from Replicate: {type(output)}") + + except Exception as e: + return { + "image_path": "", + "generation_info": { + "model": model, + "prompt": prompt, + "error": str(e), + "status": "generation failed" + } + } + + async def _execute(self, input: Message) -> Message: + """ + Execute the Replicate image generation using the Polymind framework's Message system. + + Args: + input (Message): Input message containing generation parameters + + Returns: + Message: Output message with generated image information + """ + # Convert Message to dict for the run method + input_dict = input.content + + # Call the run method + result = self.run(input_dict) + + # Return result wrapped in a Message + return Message(content=result) \ No newline at end of file