Bump LLM Vision integration version.

dannytsang · Jan 19, 2025 · 11d5424 · 11d5424
1 parent 6d154e2
commit 11d5424
Show file tree

Hide file tree

Showing 8 changed files with 1,131 additions and 915 deletions.
diff --git a/custom_components/llmvision/__init__.py b/custom_components/llmvision/__init__.py
diff --git a/custom_components/llmvision/calendar.py b/custom_components/llmvision/calendar.py
@@ -37,10 +37,12 @@ def __init__(self, hass: HomeAssistant, config_entry: ConfigEntry):
         self._attr_supported_features = (CalendarEntityFeature.DELETE_EVENT)
         # Path to the JSON file where events are stored
         self._file_path = os.path.join(
-            self.hass.config.path("custom_components/llmvision"), "events.json"
+            self.hass.config.path("llmvision"), "events.json"
         )
+        # Ensure the directory exists
+        os.makedirs(os.path.dirname(self._file_path), exist_ok=True)
         self.hass.loop.create_task(self.async_update())
-
+        
     def _ensure_datetime(self, dt):
         """Ensure the input is a datetime.datetime object."""
         if isinstance(dt, datetime.date) and not isinstance(dt, datetime.datetime):

diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py
diff --git a/custom_components/llmvision/const.py b/custom_components/llmvision/const.py
@@ -5,6 +5,10 @@
 
 # Configuration values from setup
 CONF_OPENAI_API_KEY = 'openai_api_key'
+CONF_AZURE_API_KEY = 'azure_api_key'
+CONF_AZURE_BASE_URL = 'azure_base_url'
+CONF_AZURE_DEPLOYMENT = 'azure_deployment'
+CONF_AZURE_VERSION = 'azure_version'
 CONF_ANTHROPIC_API_KEY = 'anthropic_api_key'
 CONF_GOOGLE_API_KEY = 'google_api_key'
 CONF_GROQ_API_KEY = 'groq_api_key'
@@ -31,22 +35,19 @@
 EVENT_ID = 'event_id'
 INTERVAL = 'interval'
 DURATION = 'duration'
+FRIGATE_RETRY_ATTEMPTS = 'frigate_retry_attempts'
+FRIGATE_RETRY_SECONDS = 'frigate_retry_seconds'
 MAX_FRAMES = 'max_frames'
-DETAIL = 'detail'
 TEMPERATURE = 'temperature'
 INCLUDE_FILENAME = 'include_filename'
 EXPOSE_IMAGES = 'expose_images'
+EXPOSE_IMAGES_PERSIST = 'expose_images_persist'
+GENERATE_TITLE = 'generate_title'
 SENSOR_ENTITY = 'sensor_entity'
 
 # Error messages
-ERROR_OPENAI_NOT_CONFIGURED = "OpenAI is not configured"
-ERROR_ANTHROPIC_NOT_CONFIGURED = "Anthropic is not configured"
-ERROR_GOOGLE_NOT_CONFIGURED = "Google is not configured"
-ERROR_GROQ_NOT_CONFIGURED = "Groq is not configured"
+ERROR_NOT_CONFIGURED = "{provider} is not configured"
 ERROR_GROQ_MULTIPLE_IMAGES = "Groq does not support videos or streams"
-ERROR_LOCALAI_NOT_CONFIGURED = "LocalAI is not configured"
-ERROR_OLLAMA_NOT_CONFIGURED = "Ollama is not configured"
-ERROR_CUSTOM_OPENAI_NOT_CONFIGURED = "Custom OpenAI provider is not configured"
 ERROR_NO_IMAGE_INPUT = "No image input provided"
 ERROR_HANDSHAKE_FAILED = "Connection could not be established"
 
@@ -60,4 +61,5 @@
 ENDPOINT_GOOGLE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
 ENDPOINT_GROQ = "https://api.groq.com/openai/v1/chat/completions"
 ENDPOINT_LOCALAI = "{protocol}://{ip_address}:{port}/v1/chat/completions"
-ENDPOINT_OLLAMA = "{protocol}://{ip_address}:{port}/api/chat"
+ENDPOINT_OLLAMA = "{protocol}://{ip_address}:{port}/api/chat"
+ENDPOINT_AZURE = "{base_url}openai/deployments/{deployment}/chat/completions?api-version={api_version}"
diff --git a/custom_components/llmvision/media_handlers.py b/custom_components/llmvision/media_handlers.py
@@ -1,10 +1,12 @@
 import base64
 import io
 import os
+import uuid
 import shutil
 import logging
 import time
 import asyncio
+from homeassistant.helpers.aiohttp_client import async_get_clientsession
 from functools import partial
 from PIL import Image, UnidentifiedImageError
 import numpy as np
@@ -19,6 +21,7 @@
 class MediaProcessor:
     def __init__(self, hass, client):
         self.hass = hass
+        self.session = async_get_clientsession(self.hass)
         self.client = client
         self.base64_images = []
         self.filenames = []
@@ -135,6 +138,28 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
                 base64_image = await self._encode_image(img)
 
         return base64_image
+
+    async def _fetch(self, url, max_retries=2, retry_delay=1):
+        """Fetch image from url and return image data"""
+        retries = 0
+        while retries < max_retries:
+            _LOGGER.info(
+                f"Fetching {url} (attempt {retries + 1}/{max_retries})")
+            try:
+                response = await self.session.get(url)
+                if response.status != 200:
+                    _LOGGER.warning(
+                        f"Couldn't fetch frame (status code: {response.status})")
+                    retries += 1
+                    await asyncio.sleep(retry_delay)
+                    continue
+                data = await response.read()
+                return data
+            except Exception as e:
+                _LOGGER.error(f"Fetch failed: {e}")
+                retries += 1
+                await asyncio.sleep(retry_delay)
+        _LOGGER.warning(f"Failed to fetch {url} after {max_retries} retries")
 
     async def record(self, image_entities, duration, max_frames, target_width, include_filename, expose_images):
         """Wrapper for client.add_frame with integrated recorder
@@ -162,7 +187,7 @@ async def record_camera(image_entity, camera_number):
                 frame_url = base_url + \
                     self.hass.states.get(image_entity).attributes.get(
                         'entity_picture')
-                frame_data = await self.client._fetch(frame_url)
+                frame_data = await self._fetch(frame_url)
 
                 # Skip frame if fetch failed
                 if not frame_data:
@@ -251,7 +276,7 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
                     image_url = base_url + \
                         self.hass.states.get(image_entity).attributes.get(
                             'entity_picture')
-                    image_data = await self.client._fetch(image_url)
+                    image_data = await self._fetch(image_url)
 
                     # Skip frame if fetch failed
                     if not image_data:
@@ -294,10 +319,11 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
                     raise ServiceValidationError(f"Error: {e}")
         return self.client
 
-    async def add_videos(self, video_paths, event_ids, max_frames, target_width, include_filename, expose_images):
+    async def add_videos(self, video_paths, event_ids, max_frames, target_width, include_filename, expose_images, expose_images_persist, frigate_retry_attempts, frigate_retry_seconds):
         """Wrapper for client.add_frame for videos"""
         tmp_clips_dir = f"/config/custom_components/{DOMAIN}/tmp_clips"
         tmp_frames_dir = f"/config/custom_components/{DOMAIN}/tmp_frames"
+        processed_event_ids = []
 
         if not video_paths:
             video_paths = []
@@ -306,8 +332,9 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
                 try:
                     base_url = get_url(self.hass)
                     frigate_url = base_url + "/api/frigate/notifications/" + event_id + "/clip.mp4"
-                    clip_data = await self.client._fetch(frigate_url)
 
+                    clip_data = await self._fetch(frigate_url, max_retries=frigate_retry_attempts, retry_delay=frigate_retry_seconds)
+
                     if not clip_data:
                         raise ServiceValidationError(
                             f"Failed to fetch frigate clip {event_id}")
@@ -323,6 +350,7 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
                         f"Saved frigate clip to {clip_path} (temporarily)")
                     # append to video_paths
                     video_paths.append(clip_path)
+                    processed_event_ids.append(event_id)
 
                 except AttributeError as e:
                     raise ServiceValidationError(
@@ -331,6 +359,8 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
             _LOGGER.debug(f"Processing videos: {video_paths}")
             for video_path in video_paths:
                 try:
+                    current_event_id = str(uuid.uuid4())
+                    processed_event_ids.append(current_event_id)
                     video_path = video_path.strip()
                     if os.path.exists(video_path):
                         # create tmp dir to store extracted frames
@@ -347,8 +377,9 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
                         ffmpeg_cmd = [
                             "ffmpeg",
                             "-i", video_path,
-                            "-vf", f"fps=1/{interval},select='eq(n\\,0)+not(mod(n\\,{interval}))'", os.path.join(
-                                tmp_frames_dir, "frame%04d.jpg")
+                            "-vf", f"fps=fps='source_fps',select='eq(n\\,0)+not(mod(n\\,{interval}))'", 
+                            "-fps_mode", "passthrough",
+                            os.path.join(tmp_frames_dir, "frame%04d.jpg")
                         ]
                         # Run ffmpeg command
                         await self.hass.loop.run_in_executor(None, os.system, " ".join(ffmpeg_cmd))
@@ -391,16 +422,17 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
                             sorted_frames.append(frames[0])
 
                         # Add frames to client
-                        counter = 1
-                        for frame_path, _ in sorted_frames:
+                        for counter, (frame_path, _) in enumerate(sorted_frames, start=1):
                             resized_image = await self.resize_image(image_path=frame_path, target_width=target_width)
                             if expose_images:
-                                await self._save_clip(image_path="/config/www/llmvision/" + frame_path.split("/")[-1], image_data=resized_image)
+                                persist_filename = f"/config/www/llmvision/" + frame_path.split("/")[-1]
+                                if expose_images_persist:
+                                    persist_filename = f"/config/www/llmvision/{current_event_id}-" + frame_path.split("/")[-1]
+                                await self._save_clip(image_data=resized_image, image_path=persist_filename)
                             self.client.add_frame(
                                 base64_image=resized_image,
                                 filename=video_path.split('/')[-1].split('.')[-2] + " (frame " + str(counter) + ")" if include_filename else "Video frame " + str(counter)
                             )
-                            counter += 1
 
                     else:
                         raise ServiceValidationError(