Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 101 additions & 1 deletion examples/media-gen/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,14 @@ media-gen/
│ ├── __init__.py # Package exports
│ ├── media_gen_tool_base.py # Abstract base classes
│ ├── dummy_image_gen.py # Dummy image generation tool
│ ├── openai_image_gen.py # OpenAI image generation tool
│ ├── replicate_image_gen.py # Replicate image generation tool
│ ├── dummy_video_gen.py # Dummy video generation tool
│ └── image_understanding_tool.py # Image understanding tool
├── tests/ # Test suite
│ ├── test_dummy_media_gen.py # Comprehensive tests
│ ├── test_openai_image_gen.py # OpenAI image generation tests
│ ├── test_replicate_image_gen.py # Replicate image generation tests
│ └── test_image_understanding.py # Image understanding tests
├── integration_tests/ # Integration tests (manual)
│ ├── test_image_understanding.py # Real API integration test
Expand All @@ -78,6 +82,7 @@ class MyImageGen(ImageGenerationTool):
prompt = input.get("prompt", "")
aspect_ratio = input.get("aspect_ratio", "4:3")
output_format = input.get("output_format", "jpg")
output_folder = input.get("output_folder", "/tmp")

# Your image generation logic here
# ...
Expand All @@ -92,6 +97,88 @@ class MyImageGen(ImageGenerationTool):
- `prompt` (str, required): Text description
- `aspect_ratio` (str, optional, default: "4:3"): Image aspect ratio
- `output_format` (str, optional, default: "jpg"): Output format
- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image

### OpenAI Image Generation Tool

```python
from tools import OpenAIImageGen

# Initialize the tool
image_gen = OpenAIImageGen()

# Basic usage
result = image_gen.run({
"prompt": "A gray tabby cat hugging an otter with an orange scarf",
"output_folder": "./generated_images"
})

# Advanced usage with custom parameters
result = image_gen.run({
"prompt": "A futuristic cityscape at sunset with flying cars",
"size": "1024x1536",
"quality": "high",
"output_format": "png",
"compression": 90,
"background": "opaque",
"output_folder": "./generated_images"
})
```

**Parameters:**
- `prompt` (str, required): Text description of the desired image
- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image
- `size` (str, optional, default: "1024x1024"): Image dimensions
- `quality` (str, optional, default: "low"): Rendering quality (low, medium, high)
- `output_format` (str, optional, default: "png"): Output format
- `compression` (int, optional, default: 80): Compression level 0-100%
- `background` (str, optional, default: "opaque"): Transparent or opaque

**Features:**
- Uses OpenAI's gpt-4o-mini model with image generation capabilities
- Supports various image parameters (size, quality, format, compression, background)
- Automatic directory creation for output paths
- Comprehensive error handling
- Integrates seamlessly with Polymind framework

### Replicate Image Generation Tool

```python
from tools import ReplicateImageGen

# Initialize the tool with default model (WAN 2.2)
image_gen = ReplicateImageGen()

# Basic usage
result = image_gen.run({
"prompt": "A cinematic cat portrait with golden hour lighting",
"output_folder": "./generated_images"
})

# Advanced usage with custom parameters
result = image_gen.run({
"prompt": "A cinematic, photorealistic medium shot of a cat",
"seed": 246764,
"aspect_ratio": "4:3",
"model": "stability-ai/sdxl"
})
```

**Parameters:**
- `prompt` (str, required): Text description of the desired image
- `output_folder` (str, optional, default: "~/Downloads"): Folder path where to save the generated image
- `seed` (int, optional): Random seed for reproducible results
- `aspect_ratio` (str, optional, default: "4:3"): Image aspect ratio
- `output_format` (str, optional, default: "jpeg"): Output format
- `model` (str, optional): Replicate model to use (overrides default)

**Features:**
- Uses Replicate's API with various image generation models
- Supports models like WAN 2.2, Stable Diffusion XL, and others
- Reproducible results with seed parameter
- Automatic directory creation for output paths
- Comprehensive error handling
- Integrates seamlessly with Polymind framework

### Video Generation Tool

Expand Down Expand Up @@ -194,7 +281,7 @@ python integration_tests/test_image_understanding.py
## Usage

```python
from tools import DummyImageGen, DummyVideoGen
from tools import DummyImageGen, OpenAIImageGen, ReplicateImageGen, DummyVideoGen
from dotenv import load_dotenv
import os

Expand All @@ -207,11 +294,22 @@ print(f"Replicate API Token: {'✓ Available' if os.getenv('REPLICATE_API_TOKEN'

# Initialize tools
image_gen = DummyImageGen()
openai_image_gen = OpenAIImageGen()
replicate_image_gen = ReplicateImageGen()
video_gen = DummyVideoGen()
image_understanding = ImageUnderstandingTool()

# Generate media
image_result = image_gen.run({"prompt": "A beautiful sunset"})
openai_result = openai_image_gen.run({
"prompt": "A beautiful sunset over mountains",
"output_folder": "./generated_images"
})
replicate_result = replicate_image_gen.run({
"prompt": "A cinematic cat portrait",
"seed": 12345,
"aspect_ratio": "4:3"
})
video_result = video_gen.run({"prompt": "A butterfly emerging"})

# Analyze images
Expand All @@ -232,6 +330,8 @@ python example_usage.py

# Run tests
cd tests && python test_dummy_media_gen.py
python test_openai_image_gen.py
python test_replicate_image_gen.py
python test_image_understanding.py

# Run integration tests (requires API key)
Expand Down
74 changes: 74 additions & 0 deletions examples/media-gen/integration_tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,80 @@

This folder contains integration tests that require real API calls and external resources.

## OpenAI Image Generation Integration Test

### Prerequisites
- OpenAI API key in `.env` file
- Internet connection
- OpenAI API access with verified organization (for image generation)

### Running the Test

```bash
# From the media-gen directory
python integration_tests/test_openai_image_gen.py
```

### What it does
- Generates a single image with a specific whimsical prompt
- Tests the OpenAI image generation tool with real API calls
- Saves the generated image to `~/Downloads/polymind_generated_images/`
- Provides helpful error messages for common issues

### Expected Output
The test will show:
- ✅ Confirmation that API key is found
- 🎨 Image generation progress
- 📁 File path and size of generated image
- 🎯 Generation metadata

### Generated Image
- Dynamically named image with timestamp (e.g., `openai_generated_image_20241201_143022.png`)

### Notes
- This test is not run automatically with unit tests
- It requires a valid OpenAI API key
- It makes real API calls and may incur costs
- Generated images are saved in `~/Downloads/polymind_generated_images/` with unique names
- If you get an organization verification error, follow the provided instructions

## Replicate Image Generation Integration Test

### Prerequisites
- Replicate API token in `.env` file
- Internet connection
- Replicate API access

### Running the Test

```bash
# From the media-gen directory
python integration_tests/test_replicate_image_gen.py
```

### What it does
- Generates a single image with a cinematic cat prompt
- Tests the Replicate image generation tool with real API calls
- Saves the generated image to `~/Downloads/polymind_generated_images/`
- Provides helpful error messages for common issues

### Expected Output
The test will show:
- ✅ Confirmation that API token is found
- 🎨 Image generation progress
- 📁 File path and size of generated image
- 🎯 Generation metadata

### Generated Image
- Dynamically named image with timestamp (e.g., `replicate_generated_image_20241201_143022.jpeg`)

### Notes
- This test is not run automatically with unit tests
- It requires a valid Replicate API token
- It makes real API calls and may incur costs
- Generated images are saved in `~/Downloads/polymind_generated_images/` with unique names
- Uses the WAN 2.2 model by default

## Image Understanding Integration Test

### Prerequisites
Expand Down
110 changes: 110 additions & 0 deletions examples/media-gen/integration_tests/test_openai_image_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""
Integration test for OpenAI image generation tool.

This script tests the OpenAI image generation tool with a real API call.
It generates a single image with a specific prompt and saves it to ~/Downloads.

Requirements:
- Valid OpenAI API key in environment variables
- Internet connection
- OpenAI API access
"""

import os
import sys

from dotenv import load_dotenv
from pathlib import Path

# Add parent directory to path for imports
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

from tools.openai_image_gen import OpenAIImageGen


def main():
"""Generate a single image with the specified prompt."""
print("🚀 OpenAI Image Generation Integration Test")
print("=" * 60)

# Check if OpenAI API key is available
if not os.getenv("OPENAI_API_KEY"):
print("❌ OPENAI_API_KEY environment variable not set")
print("Please set your OpenAI API key to run integration tests")
return

print(f"✅ OpenAI API key found: {os.getenv('OPENAI_API_KEY')[:10]}...")

# Create Downloads directory if it doesn't exist
downloads_dir = Path.home() / "Downloads" / "polymind_generated_images"
downloads_dir.mkdir(parents=True, exist_ok=True)
print(f"✅ Output directory: {downloads_dir.absolute()}")

# Initialize the tool
image_gen = OpenAIImageGen()

# The specific prompt provided by the user
prompt = (
"Create a whimsical scene inside a modern subway train featuring a "
"fluffy, light brown rabbit and a chubby, soft gray penguin sitting "
"side by side on blue plastic seats. The background includes tall "
"city buildings visible through a large window and a colorful framed "
"picture on the wall. Soft, natural lighting filtering through the "
"window enhances the cheerful atmosphere. The image should be "
"hyper-realistic with fine textures on the animals' fur and "
"feathers, emphasizing their cuteness while maintaining a playful "
"and friendly mood."
)

print("\n🎨 Generating image with prompt:")
print(f"'{prompt[:100]}...'")
print()

try:
# Generate the image
result = image_gen.run({
"prompt": prompt,
"size": "1024x1024",
"quality": "high",
"output_format": "png",
"output_folder": str(downloads_dir)
})

print(f"Result: {result}")

if result["image_path"] and os.path.exists(result["image_path"]):
file_size = os.path.getsize(result["image_path"])
print("✅ Image generated successfully!")
print(f"📁 Saved to: {result['image_path']}")
print(f"📏 File size: {file_size:,} bytes")
print(f"🎯 Generation info: {result['generation_info']}")
else:
print("❌ Image generation failed")
error_msg = result.get('generation_info', {}).get('error', 'Unknown error')
print(f"Error: {error_msg}")

# Check for specific error types and provide helpful guidance
if "organization must be verified" in error_msg.lower():
print("\n💡 To fix this issue:")
print("1. Go to: https://platform.openai.com/settings/organization/general")
print("2. Click on 'Verify Organization'")
print("3. Wait up to 15 minutes for access to propagate")
print("4. Try running this test again")
elif "api key" in error_msg.lower():
print("\n💡 To fix this issue:")
print("1. Check that your OPENAI_API_KEY is correct")
print("2. Ensure you have sufficient credits in your OpenAI account")
print("3. Verify your account has access to image generation features")

except Exception as e:
print(f"\n❌ Integration test failed with error: {e}")
import traceback
traceback.print_exc()


if __name__ == "__main__":
# Load environment variables
load_dotenv()

# Run the integration test
main()
Loading
Loading