Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions examples/media-gen/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,48 @@ The pipeline uses a modular design with two main components:
- Simple two-parameter API
- Path expansion support

## Video Generation

The media generation framework now includes video generation capabilities using the Replicate WAN 2.2 i2v fast model. This allows you to generate videos from images and text prompts.

### Video Generation Example

```python
from tools.replicate_video_gen import ReplicateVideoGen

# Initialize the video generation tool
video_gen = ReplicateVideoGen()

# Generate video from image and text prompt
result = video_gen.run({
"image": "path/to/your/image.jpg",
"prompt": "A serene landscape with gentle movement and natural lighting",
"output_folder": "~/Downloads/polymind_videos",
"output_format": "mp4"
})

print(f"Video saved to: {result['video_path']}")
```

### Video Generation Parameters

- **image**: Image path, URL, or data URI (required)
- **prompt**: Text description of the desired video (required)
- **output_folder**: Folder path where to save the video (optional, default: "~/Downloads")
- **output_format**: Output format (optional, default: "mp4")
- **model**: Replicate model to use (optional, overrides default)

### Testing Video Generation

Run the video generation integration test:

```bash
python integration_tests/test_replicate_video_gen.py
```

## Future Extensions

The modular design allows easy extension to other media types:
- **Image to Video**: Add video generation step
- **Video Understanding**: Add video analysis capabilities
- **Multi-modal**: Support for text, audio, and other media

Expand All @@ -184,9 +222,15 @@ media-gen/
├── tools/ # Media generation tools
│ ├── image_understanding_tool.py
│ ├── openai_image_gen.py
│ ├── replicate_image_gen.py
│ ├── replicate_video_gen.py
│ ├── dummy_image_gen.py
│ ├── dummy_video_gen.py
│ └── media_gen_tool_base.py
├── integration_tests/ # Test files and examples
├── tests/ # Test files
│ └── test_replicate_video_gen.py
├── integration_tests/ # Integration test files and examples
│ └── test_replicate_video_gen.py
└── ~/Downloads/ # Default output location
```

Expand Down
127 changes: 127 additions & 0 deletions examples/media-gen/integration_tests/test_replicate_video_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env python3
"""
Simple script to generate videos using Replicate's WAN 2.2 i2v fast model.

Usage:
python integration_tests/test_replicate_video_gen.py [image_path] [prompt] \
[--timeout SECONDS] [--progress-interval SECONDS]

Examples:
python integration_tests/test_replicate_video_gen.py
python integration_tests/test_replicate_video_gen.py test_image.png \
"animals playing football"
python integration_tests/test_replicate_video_gen.py /path/to/image.jpg \
"a magical forest scene"
python integration_tests/test_replicate_video_gen.py test_image.png \
"magical scene" --timeout 300 --progress-interval 10

Requirements:
- REPLICATE_API_TOKEN environment variable set
- Default test image: integration_tests/test_image.png
"""

import os
import sys

from dotenv import load_dotenv
from pathlib import Path

# Add parent directory to path for imports
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

# Load environment variables from .env file
load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))

from tools.replicate_video_gen import ReplicateVideoGen


def main():
"""Generate a video from an image and prompt using Replicate."""
# Check for API token
if not os.getenv("REPLICATE_API_TOKEN"):
print("❌ REPLICATE_API_TOKEN not found in environment variables")
print("Please set: export REPLICATE_API_TOKEN='your_token_here'")
sys.exit(1)

# Parse command line arguments
if len(sys.argv) > 1:
image_path = sys.argv[1]
# If it's a relative path, make it relative to the script directory
if not Path(image_path).is_absolute():
image_path = Path(__file__).parent / image_path
else:
# Default to test image
image_path = Path(__file__).parent / "test_image.png"

if len(sys.argv) > 2:
prompt = sys.argv[2]
else:
# Default prompt
prompt = "the animals standup and start playing football"

# Parse optional timeout and progress interval
timeout = 600 # 10 minutes default
progress_interval = 5 # 5 seconds default

# Simple argument parsing for timeout and progress interval
for i, arg in enumerate(sys.argv[3:], 3):
if arg == "--timeout" and i + 1 < len(sys.argv):
timeout = int(sys.argv[i + 1])
elif arg == "--progress-interval" and i + 1 < len(sys.argv):
progress_interval = int(sys.argv[i + 1])

# Validate image path
if not Path(image_path).exists():
print(f"❌ Image not found: {image_path}")
sys.exit(1)

print(f"🎬 Generating video from: {image_path}")
print(f"📝 Prompt: {prompt}")
print("📁 Output: ~/Downloads/polymind_video_generation/")
print("-" * 60)

# Initialize and run video generation
video_gen = ReplicateVideoGen()

# Debug: Check if image exists and get its size
image_path_obj = Path(image_path)
if image_path_obj.exists():
size_mb = image_path_obj.stat().st_size / (1024 * 1024)
print(f"📏 Input image size: {size_mb:.2f} MB")
else:
print(f"❌ Image file not found: {image_path}")
sys.exit(1)

# Expand the output folder path
output_folder = os.path.expanduser("~/Downloads/polymind_video_generation")

try:
result = video_gen.run({
"image": str(image_path),
"prompt": prompt,
"output_folder": output_folder,
"output_format": "mp4",
"timeout": timeout,
"progress_interval": progress_interval
})

if result["video_path"]:
print("✅ Video generated successfully!")
print(f"📁 Saved to: {result['video_path']}")

# Show file size if available
video_path = Path(result["video_path"])
if video_path.exists():
size_mb = video_path.stat().st_size / (1024 * 1024)
print(f"📏 File size: {size_mb:.1f} MB")
else:
print(f"❌ Generation failed: {result['generation_info']}")
sys.exit(1)

except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)


if __name__ == "__main__":
main()
174 changes: 174 additions & 0 deletions examples/media-gen/tests/test_replicate_video_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env python3
"""
Test script for Replicate video generation tool using WAN 2.2 i2v fast model.

This script demonstrates how to use the ReplicateVideoGen tool to generate
videos from images and text prompts using the WAN 2.2 i2v fast model.

Usage:
python tests/test_replicate_video_gen.py

Requirements:
- Replicate API token set in environment variables
- Test image file: tests/test_image.png
"""

import base64
import os
import sys

from pathlib import Path

# Add parent directory to path for imports
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

from tools.replicate_video_gen import ReplicateVideoGen


def test_replicate_video_generation():
"""Test the Replicate video generation functionality."""
print("🎬 Replicate Video Generation Test")
print("=" * 50)

# Check for Replicate API token
if not os.getenv("REPLICATE_API_TOKEN"):
print("❌ REPLICATE_API_TOKEN not found in environment variables")
print("Please set your Replicate API token:")
print("export REPLICATE_API_TOKEN='your_token_here'")
return False

# Path to test image
test_image_path = Path(__file__).parent / "test_image.png"

if not test_image_path.exists():
print(f"❌ Test image not found at: {test_image_path}")
print("Please ensure test_image.png exists in the tests directory")
return False

print(f"✅ Test image found: {test_image_path}")
print(f"📏 File size: {test_image_path.stat().st_size:,} bytes")

# Initialize the video generation tool
video_gen = ReplicateVideoGen()

# Test parameters
test_prompt = (
"Close-up shot of an elderly sailor wearing a yellow raincoat, "
"seated on the deck of a catamaran, slowly puffing on a pipe. "
"His cat lies quietly beside him with eyes closed, enjoying the "
"calm. The warm glow of the setting sun bathes the scene, with "
"gentle waves lapping against the hull and a few seabirds "
"circling slowly above. The camera slowly pushes in, capturing "
"this peaceful and harmonious moment."
)

print(f"📝 Test prompt: {test_prompt[:100]}...")
print()

# Generate video
print("🔄 Generating video from image and text prompt...")
print("-" * 50)

try:
result = video_gen.run({
"image": str(test_image_path),
"prompt": test_prompt,
"output_folder": "~/Downloads/polymind_video_generation",
"output_format": "mp4"
})

if result["video_path"]:
print(f"✅ Video generated successfully!")
print(f"📁 Video saved to: {result['video_path']}")
print(f"📊 Generation info: {result['generation_info']}")

# Check if file exists and get size
video_path = Path(result["video_path"])
if video_path.exists():
print(f"📏 Video file size: {video_path.stat().st_size:,} bytes")
else:
print("⚠️ Video file not found at expected location")

return True
else:
print(f"❌ Video generation failed: {result['generation_info']}")
return False

except Exception as e:
print(f"❌ Video generation failed with exception: {e}")
return False


def test_with_data_uri():
"""Test video generation using data URI for image input."""
print("\n🔄 Testing with data URI image input...")
print("-" * 50)

# Path to test image
test_image_path = Path(__file__).parent / "test_image.png"

if not test_image_path.exists():
print("❌ Test image not found for data URI test")
return False

# Convert image to data URI
with open(test_image_path, 'rb') as file:
data = base64.b64encode(file.read()).decode('utf-8')
data_uri = f"data:application/octet-stream;base64,{data}"

print(f"✅ Converted image to data URI ({len(data_uri)} chars)")

# Initialize the video generation tool
video_gen = ReplicateVideoGen()

# Test parameters
test_prompt = "A serene landscape with gentle movement and natural lighting"

try:
result = video_gen.run({
"image": data_uri,
"prompt": test_prompt,
"output_folder": "~/Downloads/polymind_video_generation",
"output_format": "mp4"
})

if result["video_path"]:
print(f"✅ Video generated successfully with data URI!")
print(f"📁 Video saved to: {result['video_path']}")
return True
else:
print(f"❌ Video generation failed: {result['generation_info']}")
return False

except Exception as e:
print(f"❌ Video generation failed with exception: {e}")
return False


def main():
"""Run all video generation tests."""
print("🎬 Replicate Video Generation Tool Tests")
print("=" * 60)

# Test 1: Basic video generation
success1 = test_replicate_video_generation()

# Test 2: Data URI input
success2 = test_with_data_uri()

# Summary
print("\n📊 Test Summary")
print("=" * 60)
print(f"✅ Basic video generation: {'PASS' if success1 else 'FAIL'}")
print(f"✅ Data URI input: {'PASS' if success2 else 'FAIL'}")

if success1 and success2:
print("\n🎉 All tests passed!")
else:
print("\n⚠️ Some tests failed. Check the output above for details.")

print("\n💡 Generated videos are saved to ~/Downloads/polymind_video_generation/")


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion examples/media-gen/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .image_understanding_tool import ImageUnderstandingTool
from .openai_image_gen import OpenAIImageGen
from .replicate_image_gen import ReplicateImageGen
from .replicate_video_gen import ReplicateVideoGen

__all__ = [
"ImageGenerationTool",
Expand All @@ -20,5 +21,6 @@
"DummyVideoGen",
"ImageUnderstandingTool",
"OpenAIImageGen",
"ReplicateImageGen"
"ReplicateImageGen",
"ReplicateVideoGen"
]
Loading
Loading