small-thinking · yxjiang · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025
diff --git a/examples/media-gen/README.md b/examples/media-gen/README.md
@@ -49,10 +49,14 @@ media-gen/
 │   ├── __init__.py                # Package exports
 │   ├── media_gen_tool_base.py     # Abstract base classes
 │   ├── dummy_image_gen.py         # Dummy image generation tool
+│   ├── openai_image_gen.py        # OpenAI image generation tool
+│   ├── replicate_image_gen.py     # Replicate image generation tool
 │   ├── dummy_video_gen.py         # Dummy video generation tool
 │   └── image_understanding_tool.py # Image understanding tool
 ├── tests/                         # Test suite
 │   ├── test_dummy_media_gen.py    # Comprehensive tests
+│   ├── test_openai_image_gen.py   # OpenAI image generation tests
+│   ├── test_replicate_image_gen.py # Replicate image generation tests
 │   └── test_image_understanding.py # Image understanding tests
 ├── integration_tests/             # Integration tests (manual)
 │   ├── test_image_understanding.py # Real API integration test
@@ -78,6 +82,7 @@ class MyImageGen(ImageGenerationTool):
         prompt = input.get("prompt", "")
         aspect_ratio = input.get("aspect_ratio", "4:3")
         output_format = input.get("output_format", "jpg")
+        output_folder = input.get("output_folder", "/tmp")
 
         # Your image generation logic here
         # ...
@@ -92,6 +97,88 @@ class MyImageGen(ImageGenerationTool):
 - `prompt` (str, required): Text description
 - `aspect_ratio` (str, optional, default: "4:3"): Image aspect ratio
 - `output_format` (str, optional, default: "jpg"): Output format
+- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image
+
+### OpenAI Image Generation Tool
+
+```python
+from tools import OpenAIImageGen
+
+# Initialize the tool
+image_gen = OpenAIImageGen()
+
+# Basic usage
+result = image_gen.run({
+    "prompt": "A gray tabby cat hugging an otter with an orange scarf",
+    "output_folder": "./generated_images"
+})
+
+# Advanced usage with custom parameters
+result = image_gen.run({
+    "prompt": "A futuristic cityscape at sunset with flying cars",
+    "size": "1024x1536",
+    "quality": "high",
+    "output_format": "png",
+    "compression": 90,
+    "background": "opaque",
+    "output_folder": "./generated_images"
+})
+```
+
+**Parameters:**
+- `prompt` (str, required): Text description of the desired image
+- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image
+- `size` (str, optional, default: "1024x1024"): Image dimensions
+- `quality` (str, optional, default: "low"): Rendering quality (low, medium, high)
+- `output_format` (str, optional, default: "png"): Output format
+- `compression` (int, optional, default: 80): Compression level 0-100%
+- `background` (str, optional, default: "opaque"): Transparent or opaque
+
+**Features:**
+- Uses OpenAI's gpt-4o-mini model with image generation capabilities
+- Supports various image parameters (size, quality, format, compression, background)
+- Automatic directory creation for output paths
+- Comprehensive error handling
+- Integrates seamlessly with Polymind framework
+
+### Replicate Image Generation Tool
+
+```python
+from tools import ReplicateImageGen
+
+# Initialize the tool with default model (WAN 2.2)
+image_gen = ReplicateImageGen()
+
+# Basic usage
+result = image_gen.run({
+    "prompt": "A cinematic cat portrait with golden hour lighting",
+    "output_folder": "./generated_images"
+})
+
+# Advanced usage with custom parameters
+result = image_gen.run({
+    "prompt": "A cinematic, photorealistic medium shot of a cat",
+    "seed": 246764,
+    "aspect_ratio": "4:3",
+    "model": "stability-ai/sdxl"
+})
+```
+
+**Parameters:**
+- `prompt` (str, required): Text description of the desired image
+- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image
+- `seed` (int, optional): Random seed for reproducible results
+- `aspect_ratio` (str, optional, default: "4:3"): Image aspect ratio
+- `output_format` (str, optional, default: "jpeg"): Output format
+- `model` (str, optional): Replicate model to use (overrides default)
+
+**Features:**
+- Uses Replicate's API with various image generation models
+- Supports models like WAN 2.2, Stable Diffusion XL, and others
+- Reproducible results with seed parameter
+- Automatic directory creation for output paths
+- Comprehensive error handling
+- Integrates seamlessly with Polymind framework
 
 ### Video Generation Tool
 
@@ -194,7 +281,7 @@ python integration_tests/test_image_understanding.py
 ## Usage
 
 ```python
-from tools import DummyImageGen, DummyVideoGen
+from tools import DummyImageGen, OpenAIImageGen, ReplicateImageGen, DummyVideoGen
 from dotenv import load_dotenv
 import os
 
@@ -207,11 +294,22 @@ print(f"Replicate API Token: {'✓ Available' if os.getenv('REPLICATE_API_TOKEN'
 
 # Initialize tools
 image_gen = DummyImageGen()
+openai_image_gen = OpenAIImageGen()
+replicate_image_gen = ReplicateImageGen()
 video_gen = DummyVideoGen()
 image_understanding = ImageUnderstandingTool()
 
 # Generate media
 image_result = image_gen.run({"prompt": "A beautiful sunset"})
+openai_result = openai_image_gen.run({
+    "prompt": "A beautiful sunset over mountains",
+    "output_folder": "./generated_images"
+})
+replicate_result = replicate_image_gen.run({
+    "prompt": "A cinematic cat portrait",
+    "seed": 12345,
+    "aspect_ratio": "4:3"
+})
 video_result = video_gen.run({"prompt": "A butterfly emerging"})
 
 # Analyze images
@@ -232,6 +330,8 @@ python example_usage.py
 
 # Run tests
 cd tests && python test_dummy_media_gen.py
+python test_openai_image_gen.py
+python test_replicate_image_gen.py
 python test_image_understanding.py
 
 # Run integration tests (requires API key)

diff --git a/examples/media-gen/integration_tests/README.md b/examples/media-gen/integration_tests/README.md
@@ -2,6 +2,80 @@
 
 This folder contains integration tests that require real API calls and external resources.
 
+## OpenAI Image Generation Integration Test
+
+### Prerequisites
+- OpenAI API key in `.env` file
+- Internet connection
+- OpenAI API access with verified organization (for image generation)
+
+### Running the Test
+
+```bash
+# From the media-gen directory
+python integration_tests/test_openai_image_gen.py
+```
+
+### What it does
+- Generates a single image with a specific whimsical prompt
+- Tests the OpenAI image generation tool with real API calls
+- Saves the generated image to `~/Downloads/polymind_generated_images/`
+- Provides helpful error messages for common issues
+
+### Expected Output
+The test will show:
+- ✅ Confirmation that API key is found
+- 🎨 Image generation progress
+- 📁 File path and size of generated image
+- 🎯 Generation metadata
+
+### Generated Image
+- Dynamically named image with timestamp (e.g., `openai_generated_image_20241201_143022.png`)
+
+### Notes
+- This test is not run automatically with unit tests
+- It requires a valid OpenAI API key
+- It makes real API calls and may incur costs
+- Generated images are saved in `~/Downloads/polymind_generated_images/` with unique names
+- If you get an organization verification error, follow the provided instructions
+
+## Replicate Image Generation Integration Test
+
+### Prerequisites
+- Replicate API token in `.env` file
+- Internet connection
+- Replicate API access
+
+### Running the Test
+
+```bash
+# From the media-gen directory
+python integration_tests/test_replicate_image_gen.py
+```
+
+### What it does
+- Generates a single image with a cinematic cat prompt
+- Tests the Replicate image generation tool with real API calls
+- Saves the generated image to `~/Downloads/polymind_generated_images/`
+- Provides helpful error messages for common issues
+
+### Expected Output
+The test will show:
+- ✅ Confirmation that API token is found
+- 🎨 Image generation progress
+- 📁 File path and size of generated image
+- 🎯 Generation metadata
+
+### Generated Image
+- Dynamically named image with timestamp (e.g., `replicate_generated_image_20241201_143022.jpeg`)
+
+### Notes
+- This test is not run automatically with unit tests
+- It requires a valid Replicate API token
+- It makes real API calls and may incur costs
+- Generated images are saved in `~/Downloads/polymind_generated_images/` with unique names
+- Uses the WAN 2.2 model by default
+
 ## Image Understanding Integration Test
 
 ### Prerequisites

diff --git a/examples/media-gen/integration_tests/test_openai_image_gen.py b/examples/media-gen/integration_tests/test_openai_image_gen.py
@@ -0,0 +1,110 @@
+"""
+Integration test for OpenAI image generation tool.
+
+This script tests the OpenAI image generation tool with a real API call.
+It generates a single image with a specific prompt and saves it to ~/Downloads.
+
+Requirements:
+- Valid OpenAI API key in environment variables
+- Internet connection
+- OpenAI API access
+"""
+
+import os
+import sys
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+from tools.openai_image_gen import OpenAIImageGen
+
+
+def main():
+    """Generate a single image with the specified prompt."""
+    print("🚀 OpenAI Image Generation Integration Test")
+    print("=" * 60)
+
+    # Check if OpenAI API key is available
+    if not os.getenv("OPENAI_API_KEY"):
+        print("❌ OPENAI_API_KEY environment variable not set")
+        print("Please set your OpenAI API key to run integration tests")
+        return
+
+    print(f"✅ OpenAI API key found: {os.getenv('OPENAI_API_KEY')[:10]}...")
+
+    # Create Downloads directory if it doesn't exist
+    downloads_dir = Path.home() / "Downloads" / "polymind_generated_images"
+    downloads_dir.mkdir(parents=True, exist_ok=True)
+    print(f"✅ Output directory: {downloads_dir.absolute()}")
+
+    # Initialize the tool
+    image_gen = OpenAIImageGen()
+
+    # The specific prompt provided by the user
+    prompt = (
+        "Create a whimsical scene inside a modern subway train featuring a "
+        "fluffy, light brown rabbit and a chubby, soft gray penguin sitting "
+        "side by side on blue plastic seats. The background includes tall "
+        "city buildings visible through a large window and a colorful framed "
+        "picture on the wall. Soft, natural lighting filtering through the "
+        "window enhances the cheerful atmosphere. The image should be "
+        "hyper-realistic with fine textures on the animals' fur and "
+        "feathers, emphasizing their cuteness while maintaining a playful "
+        "and friendly mood."
+    )
+
+    print("\n🎨 Generating image with prompt:")
+    print(f"'{prompt[:100]}...'")
+    print()
+
+    try:
+        # Generate the image
+        result = image_gen.run({
+            "prompt": prompt,
+            "size": "1024x1024",
+            "quality": "high",
+            "output_format": "png",
+            "output_folder": str(downloads_dir)
+        })
+
+        print(f"Result: {result}")
+
+        if result["image_path"] and os.path.exists(result["image_path"]):
+            file_size = os.path.getsize(result["image_path"])
+            print("✅ Image generated successfully!")
+            print(f"📁 Saved to: {result['image_path']}")
+            print(f"📏 File size: {file_size:,} bytes")
+            print(f"🎯 Generation info: {result['generation_info']}")
+        else:
+            print("❌ Image generation failed")
+            error_msg = result.get('generation_info', {}).get('error', 'Unknown error')
+            print(f"Error: {error_msg}")
+
+            # Check for specific error types and provide helpful guidance
+            if "organization must be verified" in error_msg.lower():
+                print("\n💡 To fix this issue:")
+                print("1. Go to: https://platform.openai.com/settings/organization/general")
+                print("2. Click on 'Verify Organization'")
+                print("3. Wait up to 15 minutes for access to propagate")
+                print("4. Try running this test again")
+            elif "api key" in error_msg.lower():
+                print("\n💡 To fix this issue:")
+                print("1. Check that your OPENAI_API_KEY is correct")
+                print("2. Ensure you have sufficient credits in your OpenAI account")
+                print("3. Verify your account has access to image generation features")
+
+    except Exception as e:
+        print(f"\n❌ Integration test failed with error: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    # Load environment variables
+    load_dotenv()
+
+    # Run the integration test
+    main()