Skip to content

Commit

Permalink
Bump LLM Vision integration version.
Browse files Browse the repository at this point in the history
  • Loading branch information
dannytsang committed Jan 19, 2025
1 parent 6d154e2 commit 11d5424
Show file tree
Hide file tree
Showing 8 changed files with 1,131 additions and 915 deletions.
325 changes: 194 additions & 131 deletions custom_components/llmvision/__init__.py

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions custom_components/llmvision/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ def __init__(self, hass: HomeAssistant, config_entry: ConfigEntry):
self._attr_supported_features = (CalendarEntityFeature.DELETE_EVENT)
# Path to the JSON file where events are stored
self._file_path = os.path.join(
self.hass.config.path("custom_components/llmvision"), "events.json"
self.hass.config.path("llmvision"), "events.json"
)
# Ensure the directory exists
os.makedirs(os.path.dirname(self._file_path), exist_ok=True)
self.hass.loop.create_task(self.async_update())

def _ensure_datetime(self, dt):
"""Ensure the input is a datetime.datetime object."""
if isinstance(dt, datetime.date) and not isinstance(dt, datetime.datetime):
Expand Down
304 changes: 87 additions & 217 deletions custom_components/llmvision/config_flow.py

Large diffs are not rendered by default.

20 changes: 11 additions & 9 deletions custom_components/llmvision/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

# Configuration values from setup
CONF_OPENAI_API_KEY = 'openai_api_key'
CONF_AZURE_API_KEY = 'azure_api_key'
CONF_AZURE_BASE_URL = 'azure_base_url'
CONF_AZURE_DEPLOYMENT = 'azure_deployment'
CONF_AZURE_VERSION = 'azure_version'
CONF_ANTHROPIC_API_KEY = 'anthropic_api_key'
CONF_GOOGLE_API_KEY = 'google_api_key'
CONF_GROQ_API_KEY = 'groq_api_key'
Expand All @@ -31,22 +35,19 @@
EVENT_ID = 'event_id'
INTERVAL = 'interval'
DURATION = 'duration'
FRIGATE_RETRY_ATTEMPTS = 'frigate_retry_attempts'
FRIGATE_RETRY_SECONDS = 'frigate_retry_seconds'
MAX_FRAMES = 'max_frames'
DETAIL = 'detail'
TEMPERATURE = 'temperature'
INCLUDE_FILENAME = 'include_filename'
EXPOSE_IMAGES = 'expose_images'
EXPOSE_IMAGES_PERSIST = 'expose_images_persist'
GENERATE_TITLE = 'generate_title'
SENSOR_ENTITY = 'sensor_entity'

# Error messages
ERROR_OPENAI_NOT_CONFIGURED = "OpenAI is not configured"
ERROR_ANTHROPIC_NOT_CONFIGURED = "Anthropic is not configured"
ERROR_GOOGLE_NOT_CONFIGURED = "Google is not configured"
ERROR_GROQ_NOT_CONFIGURED = "Groq is not configured"
ERROR_NOT_CONFIGURED = "{provider} is not configured"
ERROR_GROQ_MULTIPLE_IMAGES = "Groq does not support videos or streams"
ERROR_LOCALAI_NOT_CONFIGURED = "LocalAI is not configured"
ERROR_OLLAMA_NOT_CONFIGURED = "Ollama is not configured"
ERROR_CUSTOM_OPENAI_NOT_CONFIGURED = "Custom OpenAI provider is not configured"
ERROR_NO_IMAGE_INPUT = "No image input provided"
ERROR_HANDSHAKE_FAILED = "Connection could not be established"

Expand All @@ -60,4 +61,5 @@
ENDPOINT_GOOGLE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
ENDPOINT_GROQ = "https://api.groq.com/openai/v1/chat/completions"
ENDPOINT_LOCALAI = "{protocol}://{ip_address}:{port}/v1/chat/completions"
ENDPOINT_OLLAMA = "{protocol}://{ip_address}:{port}/api/chat"
ENDPOINT_OLLAMA = "{protocol}://{ip_address}:{port}/api/chat"
ENDPOINT_AZURE = "{base_url}openai/deployments/{deployment}/chat/completions?api-version={api_version}"
52 changes: 42 additions & 10 deletions custom_components/llmvision/media_handlers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import base64
import io
import os
import uuid
import shutil
import logging
import time
import asyncio
from homeassistant.helpers.aiohttp_client import async_get_clientsession
from functools import partial
from PIL import Image, UnidentifiedImageError
import numpy as np
Expand All @@ -19,6 +21,7 @@
class MediaProcessor:
def __init__(self, hass, client):
self.hass = hass
self.session = async_get_clientsession(self.hass)
self.client = client
self.base64_images = []
self.filenames = []
Expand Down Expand Up @@ -135,6 +138,28 @@ async def resize_image(self, target_width, image_path=None, image_data=None, img
base64_image = await self._encode_image(img)

return base64_image

async def _fetch(self, url, max_retries=2, retry_delay=1):
"""Fetch image from url and return image data"""
retries = 0
while retries < max_retries:
_LOGGER.info(
f"Fetching {url} (attempt {retries + 1}/{max_retries})")
try:
response = await self.session.get(url)
if response.status != 200:
_LOGGER.warning(
f"Couldn't fetch frame (status code: {response.status})")
retries += 1
await asyncio.sleep(retry_delay)
continue
data = await response.read()
return data
except Exception as e:
_LOGGER.error(f"Fetch failed: {e}")
retries += 1
await asyncio.sleep(retry_delay)
_LOGGER.warning(f"Failed to fetch {url} after {max_retries} retries")

async def record(self, image_entities, duration, max_frames, target_width, include_filename, expose_images):
"""Wrapper for client.add_frame with integrated recorder
Expand Down Expand Up @@ -162,7 +187,7 @@ async def record_camera(image_entity, camera_number):
frame_url = base_url + \
self.hass.states.get(image_entity).attributes.get(
'entity_picture')
frame_data = await self.client._fetch(frame_url)
frame_data = await self._fetch(frame_url)

# Skip frame if fetch failed
if not frame_data:
Expand Down Expand Up @@ -251,7 +276,7 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
image_url = base_url + \
self.hass.states.get(image_entity).attributes.get(
'entity_picture')
image_data = await self.client._fetch(image_url)
image_data = await self._fetch(image_url)

# Skip frame if fetch failed
if not image_data:
Expand Down Expand Up @@ -294,10 +319,11 @@ async def add_images(self, image_entities, image_paths, target_width, include_fi
raise ServiceValidationError(f"Error: {e}")
return self.client

async def add_videos(self, video_paths, event_ids, max_frames, target_width, include_filename, expose_images):
async def add_videos(self, video_paths, event_ids, max_frames, target_width, include_filename, expose_images, expose_images_persist, frigate_retry_attempts, frigate_retry_seconds):
"""Wrapper for client.add_frame for videos"""
tmp_clips_dir = f"/config/custom_components/{DOMAIN}/tmp_clips"
tmp_frames_dir = f"/config/custom_components/{DOMAIN}/tmp_frames"
processed_event_ids = []

if not video_paths:
video_paths = []
Expand All @@ -306,8 +332,9 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
try:
base_url = get_url(self.hass)
frigate_url = base_url + "/api/frigate/notifications/" + event_id + "/clip.mp4"
clip_data = await self.client._fetch(frigate_url)

clip_data = await self._fetch(frigate_url, max_retries=frigate_retry_attempts, retry_delay=frigate_retry_seconds)

if not clip_data:
raise ServiceValidationError(
f"Failed to fetch frigate clip {event_id}")
Expand All @@ -323,6 +350,7 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
f"Saved frigate clip to {clip_path} (temporarily)")
# append to video_paths
video_paths.append(clip_path)
processed_event_ids.append(event_id)

except AttributeError as e:
raise ServiceValidationError(
Expand All @@ -331,6 +359,8 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
_LOGGER.debug(f"Processing videos: {video_paths}")
for video_path in video_paths:
try:
current_event_id = str(uuid.uuid4())
processed_event_ids.append(current_event_id)
video_path = video_path.strip()
if os.path.exists(video_path):
# create tmp dir to store extracted frames
Expand All @@ -347,8 +377,9 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
ffmpeg_cmd = [
"ffmpeg",
"-i", video_path,
"-vf", f"fps=1/{interval},select='eq(n\\,0)+not(mod(n\\,{interval}))'", os.path.join(
tmp_frames_dir, "frame%04d.jpg")
"-vf", f"fps=fps='source_fps',select='eq(n\\,0)+not(mod(n\\,{interval}))'",
"-fps_mode", "passthrough",
os.path.join(tmp_frames_dir, "frame%04d.jpg")
]
# Run ffmpeg command
await self.hass.loop.run_in_executor(None, os.system, " ".join(ffmpeg_cmd))
Expand Down Expand Up @@ -391,16 +422,17 @@ async def add_videos(self, video_paths, event_ids, max_frames, target_width, inc
sorted_frames.append(frames[0])

# Add frames to client
counter = 1
for frame_path, _ in sorted_frames:
for counter, (frame_path, _) in enumerate(sorted_frames, start=1):
resized_image = await self.resize_image(image_path=frame_path, target_width=target_width)
if expose_images:
await self._save_clip(image_path="/config/www/llmvision/" + frame_path.split("/")[-1], image_data=resized_image)
persist_filename = f"/config/www/llmvision/" + frame_path.split("/")[-1]
if expose_images_persist:
persist_filename = f"/config/www/llmvision/{current_event_id}-" + frame_path.split("/")[-1]
await self._save_clip(image_data=resized_image, image_path=persist_filename)
self.client.add_frame(
base64_image=resized_image,
filename=video_path.split('/')[-1].split('.')[-2] + " (frame " + str(counter) + ")" if include_filename else "Video frame " + str(counter)
)
counter += 1

else:
raise ServiceValidationError(
Expand Down
Loading

0 comments on commit 11d5424

Please sign in to comment.