RichardAtCT · haripatel07 · Mar 7, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/src/bot/features/image_handler.py b/src/bot/features/image_handler.py
@@ -9,23 +9,27 @@
 """
 
 import base64
-from dataclasses import dataclass
+import uuid
+from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Dict, Optional
 
 from telegram import PhotoSize
 
 from src.config import Settings
 
+_TEMP_DIR = Path("/tmp/claude_bot_files")
+
 
 @dataclass
 class ProcessedImage:
     """Processed image result"""
 
     prompt: str
     image_type: str
-    base64_data: str
     size: int
-    metadata: Dict[str, any] = None
+    metadata: Dict[str, object] = field(default_factory=dict)
+    base64_data: str = ""
 
 
 class ImageHandler:
@@ -38,37 +42,51 @@ def __init__(self, config: Settings):
     async def process_image(
         self, photo: PhotoSize, caption: Optional[str] = None
     ) -> ProcessedImage:
-        """Process uploaded image"""
-
-        # Download image
+        """Process uploaded image — save to temp file and build a path-based prompt."""
+        # Download image bytes
         file = await photo.get_file()
-        image_bytes = await file.download_as_bytearray()
-
-        # Detect image type
+        image_bytes = bytes(await file.download_as_bytearray())
+
+        # Detect format and save to temp file so Claude CLI can read it
+        fmt = self._detect_format(image_bytes)
+        if fmt == "unknown":
+            raise ValueError(
+                "Unsupported image format. Please upload a PNG, JPEG, GIF, or WebP image."
+            )
+        _TEMP_DIR.mkdir(mode=0o700, exist_ok=True)
+        image_path = _TEMP_DIR / f"image_{uuid.uuid4()}.{fmt}"
+        try:
+            image_path.write_bytes(image_bytes)
+            image_path.chmod(0o600)
+        except Exception:
+            image_path.unlink(missing_ok=True)
+            raise
+
+        # Detect image type for prompt tailoring
         image_type = self._detect_image_type(image_bytes)
 
-        # Create appropriate prompt
+        # Build prompt with actual file path so Claude CLI can see the image
         if image_type == "screenshot":
-            prompt = self._create_screenshot_prompt(caption)
+            prompt = self._create_screenshot_prompt(caption, image_path)
         elif image_type == "diagram":
-            prompt = self._create_diagram_prompt(caption)
+            prompt = self._create_diagram_prompt(caption, image_path)
         elif image_type == "ui_mockup":
-            prompt = self._create_ui_prompt(caption)
+            prompt = self._create_ui_prompt(caption, image_path)
         else:
-            prompt = self._create_generic_prompt(caption)
+            prompt = self._create_generic_prompt(caption, image_path)
 
-        # Convert to base64 for Claude (if supported in future)
+        # Retained for future multimodal SDK support — not currently used
         base64_image = base64.b64encode(image_bytes).decode("utf-8")
-
         return ProcessedImage(
             prompt=prompt,
             image_type=image_type,
-            base64_data=base64_image,
             size=len(image_bytes),
             metadata={
-                "format": self._detect_format(image_bytes),
+                "format": fmt,
                 "has_caption": caption is not None,
+                "temp_path": str(image_path),
             },
+            base64_data=base64_image,
         )
 
     def _detect_image_type(self, image_bytes: bytes) -> str:
@@ -93,61 +111,35 @@ def _detect_format(self, image_bytes: bytes) -> str:
         else:
             return "unknown"
 
-    def _create_screenshot_prompt(self, caption: Optional[str]) -> str:
+    def _create_screenshot_prompt(
+        self, caption: Optional[str], image_path: Path
+    ) -> str:
         """Create prompt for screenshot analysis"""
-        base_prompt = """I'm sharing a screenshot with you. Please analyze it and help me with:
-
-1. Identifying what application or website this is from
-2. Understanding the UI elements and their purpose
-3. Any issues or improvements you notice
-4. Answering any specific questions I have
-
-"""
+        base = f"I'm sharing a screenshot with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze it and help me with:\n1. Identifying what application or website this is from\n2. Understanding the UI elements and their purpose\n3. Any issues or improvements you notice\n4. Answering any specific questions I have\n"
         if caption:
-            base_prompt += f"Specific request: {caption}"
+            base += f"\nSpecific request: {caption}"
+        return base
 
-        return base_prompt
-
-    def _create_diagram_prompt(self, caption: Optional[str]) -> str:
+    def _create_diagram_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create prompt for diagram analysis"""
-        base_prompt = """I'm sharing a diagram with you. Please help me:
-
-1. Understand the components and their relationships
-2. Identify the type of diagram (flowchart, architecture, etc.)
-3. Explain any technical concepts shown
-4. Suggest improvements or clarifications
-
-"""
+        base = f"I'm sharing a diagram with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease help me:\n1. Understand the components and their relationships\n2. Identify the type of diagram\n3. Explain any technical concepts shown\n4. Suggest improvements or clarifications\n"
         if caption:
-            base_prompt += f"Specific request: {caption}"
-
-        return base_prompt
+            base += f"\nSpecific request: {caption}"
+        return base
 
-    def _create_ui_prompt(self, caption: Optional[str]) -> str:
+    def _create_ui_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create prompt for UI mockup analysis"""
-        base_prompt = """I'm sharing a UI mockup with you. Please analyze:
-
-1. The layout and visual hierarchy
-2. User experience considerations
-3. Accessibility aspects
-4. Implementation suggestions
-5. Any potential improvements
-
-"""
+        base = f"I'm sharing a UI mockup with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze:\n1. The layout and visual hierarchy\n2. UX improvements\n3. Accessibility concerns\n"
         if caption:
-            base_prompt += f"Specific request: {caption}"
-
-        return base_prompt
+            base += f"\nSpecific request: {caption}"
+        return base
 
-    def _create_generic_prompt(self, caption: Optional[str]) -> str:
+    def _create_generic_prompt(self, caption: Optional[str], image_path: Path) -> str:
         """Create generic image analysis prompt"""
-        base_prompt = """I'm sharing an image with you. Please analyze it and provide relevant insights.
-
-"""
+        base = f"I'm sharing an image with you. Please read the image file at this path using your Read tool: {image_path}\n\nPlease analyze and describe what you see.\n"
         if caption:
-            base_prompt += f"Context: {caption}"
-
-        return base_prompt
+            base += f"\nSpecific request: {caption}"
+        return base
 
     def supports_format(self, filename: str) -> bool:
         """Check if image format is supported"""

diff --git a/src/bot/orchestrator.py b/src/bot/orchestrator.py
@@ -1276,6 +1276,7 @@ async def agentic_photo(
         await chat.send_action("typing")
         progress_msg = await update.message.reply_text("Working...")
 
+        processed_image = None
         try:
             photo = update.message.photo[-1]
             processed_image = await image_handler.process_image(
@@ -1297,6 +1298,11 @@ async def agentic_photo(
             logger.error(
                 "Claude photo processing failed", error=str(e), user_id=user_id
             )
+        finally:
+            if processed_image is not None:
+                temp_path = processed_image.metadata.get("temp_path")
+                if temp_path:
+                    Path(temp_path).unlink(missing_ok=True)
 
     async def agentic_voice(
         self, update: Update, context: ContextTypes.DEFAULT_TYPE