small-thinking · yxjiang · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/examples/media-gen/README.md b/examples/media-gen/README.md
@@ -166,10 +166,48 @@ The pipeline uses a modular design with two main components:
 - Simple two-parameter API
 - Path expansion support
 
+## Video Generation
+
+The media generation framework now includes video generation capabilities using the Replicate WAN 2.2 i2v fast model. This allows you to generate videos from images and text prompts.
+
+### Video Generation Example
+
+```python
+from tools.replicate_video_gen import ReplicateVideoGen
+
+# Initialize the video generation tool
+video_gen = ReplicateVideoGen()
+
+# Generate video from image and text prompt
+result = video_gen.run({
+    "image": "path/to/your/image.jpg",
+    "prompt": "A serene landscape with gentle movement and natural lighting",
+    "output_folder": "~/Downloads/polymind_videos",
+    "output_format": "mp4"
+})
+
+print(f"Video saved to: {result['video_path']}")
+```
+
+### Video Generation Parameters
+
+- **image**: Image path, URL, or data URI (required)
+- **prompt**: Text description of the desired video (required)
+- **output_folder**: Folder path where to save the video (optional, default: "~/Downloads")
+- **output_format**: Output format (optional, default: "mp4")
+- **model**: Replicate model to use (optional, overrides default)
+
+### Testing Video Generation
+
+Run the video generation integration test:
+
+```bash
+python integration_tests/test_replicate_video_gen.py
+```
+
 ## Future Extensions
 
 The modular design allows easy extension to other media types:
-- **Image to Video**: Add video generation step
 - **Video Understanding**: Add video analysis capabilities
 - **Multi-modal**: Support for text, audio, and other media
 
@@ -184,9 +222,15 @@ media-gen/
 ├── tools/                   # Media generation tools
 │   ├── image_understanding_tool.py
 │   ├── openai_image_gen.py
+│   ├── replicate_image_gen.py
+│   ├── replicate_video_gen.py
 │   ├── dummy_image_gen.py
+│   ├── dummy_video_gen.py
 │   └── media_gen_tool_base.py
-├── integration_tests/       # Test files and examples
+├── tests/                   # Test files
+│   └── test_replicate_video_gen.py
+├── integration_tests/       # Integration test files and examples
+│   └── test_replicate_video_gen.py
 └── ~/Downloads/            # Default output location
 ```
 

diff --git a/examples/media-gen/integration_tests/test_replicate_video_gen.py b/examples/media-gen/integration_tests/test_replicate_video_gen.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+"""
+Simple script to generate videos using Replicate's WAN 2.2 i2v fast model.
+
+Usage:
+    python integration_tests/test_replicate_video_gen.py [image_path] [prompt] \
+        [--timeout SECONDS] [--progress-interval SECONDS]
+
+Examples:
+    python integration_tests/test_replicate_video_gen.py
+    python integration_tests/test_replicate_video_gen.py test_image.png \
+        "animals playing football"
+    python integration_tests/test_replicate_video_gen.py /path/to/image.jpg \
+        "a magical forest scene"
+    python integration_tests/test_replicate_video_gen.py test_image.png \
+        "magical scene" --timeout 300 --progress-interval 10
+
+Requirements:
+- REPLICATE_API_TOKEN environment variable set
+- Default test image: integration_tests/test_image.png
+"""
+
+import os
+import sys
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+# Load environment variables from .env file
+load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
+
+from tools.replicate_video_gen import ReplicateVideoGen
+
+
+def main():
+    """Generate a video from an image and prompt using Replicate."""
+    # Check for API token
+    if not os.getenv("REPLICATE_API_TOKEN"):
+        print("❌ REPLICATE_API_TOKEN not found in environment variables")
+        print("Please set: export REPLICATE_API_TOKEN='your_token_here'")
+        sys.exit(1)
+
+    # Parse command line arguments
+    if len(sys.argv) > 1:
+        image_path = sys.argv[1]
+        # If it's a relative path, make it relative to the script directory
+        if not Path(image_path).is_absolute():
+            image_path = Path(__file__).parent / image_path
+    else:
+        # Default to test image
+        image_path = Path(__file__).parent / "test_image.png"
+
+    if len(sys.argv) > 2:
+        prompt = sys.argv[2]
+    else:
+        # Default prompt
+        prompt = "the animals standup and start playing football"
+
+    # Parse optional timeout and progress interval
+    timeout = 600  # 10 minutes default
+    progress_interval = 5  # 5 seconds default
+
+    # Simple argument parsing for timeout and progress interval
+    for i, arg in enumerate(sys.argv[3:], 3):
+        if arg == "--timeout" and i + 1 < len(sys.argv):
+            timeout = int(sys.argv[i + 1])
+        elif arg == "--progress-interval" and i + 1 < len(sys.argv):
+            progress_interval = int(sys.argv[i + 1])
+
+    # Validate image path
+    if not Path(image_path).exists():
+        print(f"❌ Image not found: {image_path}")
+        sys.exit(1)
+
+    print(f"🎬 Generating video from: {image_path}")
+    print(f"📝 Prompt: {prompt}")
+    print("📁 Output: ~/Downloads/polymind_video_generation/")
+    print("-" * 60)
+
+    # Initialize and run video generation
+    video_gen = ReplicateVideoGen()
+
+    # Debug: Check if image exists and get its size
+    image_path_obj = Path(image_path)
+    if image_path_obj.exists():
+        size_mb = image_path_obj.stat().st_size / (1024 * 1024)
+        print(f"📏 Input image size: {size_mb:.2f} MB")
+    else:
+        print(f"❌ Image file not found: {image_path}")
+        sys.exit(1)
+
+    # Expand the output folder path
+    output_folder = os.path.expanduser("~/Downloads/polymind_video_generation")
+
+    try:
+        result = video_gen.run({
+            "image": str(image_path),
+            "prompt": prompt,
+            "output_folder": output_folder,
+            "output_format": "mp4",
+            "timeout": timeout,
+            "progress_interval": progress_interval
+        })
+
+        if result["video_path"]:
+            print("✅ Video generated successfully!")
+            print(f"📁 Saved to: {result['video_path']}")
+
+            # Show file size if available
+            video_path = Path(result["video_path"])
+            if video_path.exists():
+                size_mb = video_path.stat().st_size / (1024 * 1024)
+                print(f"📏 File size: {size_mb:.1f} MB")
+        else:
+            print(f"❌ Generation failed: {result['generation_info']}")
+            sys.exit(1)
+
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main() 
diff --git a/examples/media-gen/tests/test_replicate_video_gen.py b/examples/media-gen/tests/test_replicate_video_gen.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+Test script for Replicate video generation tool using WAN 2.2 i2v fast model.
+
+This script demonstrates how to use the ReplicateVideoGen tool to generate
+videos from images and text prompts using the WAN 2.2 i2v fast model.
+
+Usage:
+    python tests/test_replicate_video_gen.py
+
+Requirements:
+- Replicate API token set in environment variables
+- Test image file: tests/test_image.png
+"""
+
+import base64
+import os
+import sys
+
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+from tools.replicate_video_gen import ReplicateVideoGen
+
+
+def test_replicate_video_generation():
+    """Test the Replicate video generation functionality."""
+    print("🎬 Replicate Video Generation Test")
+    print("=" * 50)
+
+    # Check for Replicate API token
+    if not os.getenv("REPLICATE_API_TOKEN"):
+        print("❌ REPLICATE_API_TOKEN not found in environment variables")
+        print("Please set your Replicate API token:")
+        print("export REPLICATE_API_TOKEN='your_token_here'")
+        return False
+
+    # Path to test image
+    test_image_path = Path(__file__).parent / "test_image.png"
+
+    if not test_image_path.exists():
+        print(f"❌ Test image not found at: {test_image_path}")
+        print("Please ensure test_image.png exists in the tests directory")
+        return False
+
+    print(f"✅ Test image found: {test_image_path}")
+    print(f"📏 File size: {test_image_path.stat().st_size:,} bytes")
+
+    # Initialize the video generation tool
+    video_gen = ReplicateVideoGen()
+
+    # Test parameters
+    test_prompt = (
+        "Close-up shot of an elderly sailor wearing a yellow raincoat, "
+        "seated on the deck of a catamaran, slowly puffing on a pipe. "
+        "His cat lies quietly beside him with eyes closed, enjoying the "
+        "calm. The warm glow of the setting sun bathes the scene, with "
+        "gentle waves lapping against the hull and a few seabirds "
+        "circling slowly above. The camera slowly pushes in, capturing "
+        "this peaceful and harmonious moment."
+    )
+
+    print(f"📝 Test prompt: {test_prompt[:100]}...")
+    print()
+
+    # Generate video
+    print("🔄 Generating video from image and text prompt...")
+    print("-" * 50)
+
+    try:
+        result = video_gen.run({
+            "image": str(test_image_path),
+            "prompt": test_prompt,
+            "output_folder": "~/Downloads/polymind_video_generation",
+            "output_format": "mp4"
+        })
+
+        if result["video_path"]:
+            print(f"✅ Video generated successfully!")
+            print(f"📁 Video saved to: {result['video_path']}")
+            print(f"📊 Generation info: {result['generation_info']}")
+
+            # Check if file exists and get size
+            video_path = Path(result["video_path"])
+            if video_path.exists():
+                print(f"📏 Video file size: {video_path.stat().st_size:,} bytes")
+            else:
+                print("⚠️  Video file not found at expected location")
+
+            return True
+        else:
+            print(f"❌ Video generation failed: {result['generation_info']}")
+            return False
+
+    except Exception as e:
+        print(f"❌ Video generation failed with exception: {e}")
+        return False
+
+
+def test_with_data_uri():
+    """Test video generation using data URI for image input."""
+    print("\n🔄 Testing with data URI image input...")
+    print("-" * 50)
+
+    # Path to test image
+    test_image_path = Path(__file__).parent / "test_image.png"
+
+    if not test_image_path.exists():
+        print("❌ Test image not found for data URI test")
+        return False
+
+    # Convert image to data URI
+    with open(test_image_path, 'rb') as file:
+        data = base64.b64encode(file.read()).decode('utf-8')
+        data_uri = f"data:application/octet-stream;base64,{data}"
+
+    print(f"✅ Converted image to data URI ({len(data_uri)} chars)")
+
+    # Initialize the video generation tool
+    video_gen = ReplicateVideoGen()
+
+    # Test parameters
+    test_prompt = "A serene landscape with gentle movement and natural lighting"
+
+    try:
+        result = video_gen.run({
+            "image": data_uri,
+            "prompt": test_prompt,
+            "output_folder": "~/Downloads/polymind_video_generation",
+            "output_format": "mp4"
+        })
+
+        if result["video_path"]:
+            print(f"✅ Video generated successfully with data URI!")
+            print(f"📁 Video saved to: {result['video_path']}")
+            return True
+        else:
+            print(f"❌ Video generation failed: {result['generation_info']}")
+            return False
+
+    except Exception as e:
+        print(f"❌ Video generation failed with exception: {e}")
+        return False
+
+
+def main():
+    """Run all video generation tests."""
+    print("🎬 Replicate Video Generation Tool Tests")
+    print("=" * 60)
+
+    # Test 1: Basic video generation
+    success1 = test_replicate_video_generation()
+
+    # Test 2: Data URI input
+    success2 = test_with_data_uri()
+
+    # Summary
+    print("\n📊 Test Summary")
+    print("=" * 60)
+    print(f"✅ Basic video generation: {'PASS' if success1 else 'FAIL'}")
+    print(f"✅ Data URI input: {'PASS' if success2 else 'FAIL'}")
+
+    if success1 and success2:
+        print("\n🎉 All tests passed!")
+    else:
+        print("\n⚠️  Some tests failed. Check the output above for details.")
+
+    print("\n💡 Generated videos are saved to ~/Downloads/polymind_video_generation/")
+
+
+if __name__ == "__main__":
+    main() 
diff --git a/examples/media-gen/tools/__init__.py b/examples/media-gen/tools/__init__.py
@@ -12,6 +12,7 @@
 from .image_understanding_tool import ImageUnderstandingTool
 from .openai_image_gen import OpenAIImageGen
 from .replicate_image_gen import ReplicateImageGen
+from .replicate_video_gen import ReplicateVideoGen
 
 __all__ = [
     "ImageGenerationTool",
@@ -20,5 +21,6 @@
     "DummyVideoGen",
     "ImageUnderstandingTool",
     "OpenAIImageGen",
-    "ReplicateImageGen"
+    "ReplicateImageGen",
+    "ReplicateVideoGen"
 ]