Merge pull request #15 from jhakulin/jhakulin/reconnect

Add reconnect configuration
jhakulin · Dec 1, 2024 · 65fc36e · 65fc36e
2 parents f31c2ec + bdedff0
commit 65fc36e
Show file tree

Hide file tree

Showing 11 changed files with 557 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -50,7 +50,9 @@ options = RealtimeAIOptions(
    tools=functions.definitions,
    tool_choice="auto",
    temperature=0.8,
-   max_output_tokens=None
+   max_output_tokens=None,
+   voice="sage",
+   enable_auto_reconnect=True,
 )
 
 # Define AudioStreamOptions (currently only 16bit PCM 24kHz mono is supported)
@@ -131,6 +133,20 @@ For example, the Lenovo ThinkPad P16S has been tested and provides a reliable co
    - Click **OK** to exit the Microphone Properties dialog.
    - Click **OK** in the Sound settings window to close it.
 
+### Audio Configuration on Mac
+
+1. **Install the PyAudio**:
+
+    If you encounter installation problems in Mac, ensure you have installed portaudio by `brew install portaudio` first.
+
+2. **Install the SSL certificates**:
+
+   If you encounter SSL certification problems when running the samples, install certificates via `/Applications/Python 3.x/Install Certificates.command`
+
+3. **Audio Echo Cancellation**:
+
+   If your Mac do not have integrated audio echo cancellation, using e.g. AirPods is recommended to prevent assistant voice leaking into microphone input.
+
 ### Alternative Audio Options
 
 If you encounter issues with audio echo that cannot be resolved through configuration changes, consider using a headset with an integrated microphone and speakers. This setup naturally avoids problems with echo, as the audio output from the speakers is isolated from the microphone input. This can provide a more seamless audio experience without relying on device-based audio echo cancellation.

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,7 @@
-https://github.com/jhakulin/realtime-ai/releases/download/v0.1.3/realtime_ai-0.1.3-py3-none-any.whl
+https://github.com/jhakulin/realtime-ai/releases/download/v0.1.4/realtime_ai-0.1.4-py3-none-any.whl
 pyaudio
 numpy
 websockets
 websocket-client
 azure-cognitiveservices-speech
+scipy
diff --git a/samples/async/sample_realtime_ai_with_keyword_and_vad.py b/samples/async/sample_realtime_ai_with_keyword_and_vad.py
diff --git a/samples/sample_realtime_ai_with_keyword_and_vad.py b/samples/sample_realtime_ai_with_keyword_and_vad.py
@@ -171,6 +171,12 @@ def on_error(self, event: ErrorEvent):
     def on_input_audio_buffer_speech_stopped(self, event: InputAudioBufferSpeechStopped):
         logger.info(f"Server VAD: Speech stopped at {event.audio_end_ms}ms, Item ID: {event.item_id}")
 
+    def on_input_audio_buffer_cleared(self, event: InputAudioBufferCleared):
+        logger.info("Input audio buffer cleared.")
+
+    def on_reconnected(self, event: ReconnectedEvent):
+        logger.info("Reconnected...")
+
     def on_input_audio_buffer_committed(self, event: InputAudioBufferCommitted):
         logger.debug(f"Audio Buffer Committed: {event.item_id}")
 
@@ -336,6 +342,7 @@ def main():
             temperature=0.8,
             max_output_tokens=None,
             voice="sage",
+            enable_auto_reconnect=True,
         )
 
         # Define AudioStreamOptions

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="realtime-ai",
-    version="0.1.3",
+    version="0.1.4",
     description="Python SDK for real-time audio processing with OpenAI's Realtime REST API.",
     long_description=open("README.md").read(),
     long_description_content_type="text/markdown",

diff --git a/src/realtime_ai/aio/realtime_ai_service_manager.py b/src/realtime_ai/aio/realtime_ai_service_manager.py
@@ -29,7 +29,9 @@
     InputAudioBufferSpeechStarted,
     ResponseOutputItemAdded,
     ResponseFunctionCallArgumentsDelta,
-    ResponseFunctionCallArgumentsDone
+    ResponseFunctionCallArgumentsDone,
+    InputAudioBufferCleared,
+    ReconnectedEvent
 )
 
 logger = logging.getLogger(__name__)
@@ -90,10 +92,18 @@ async def send_event(self, event: dict):
         except Exception as e:
             logger.error(f"RealtimeAIServiceManager: Failed to send event {event.get('type')}: {e}")
 
-    async def on_connected(self):
+    async def on_connected(self, reconnection: bool = False):
         self.is_connected = True
         logger.info("RealtimeAIServiceManager: Connected to WebSocket.")
         await self.send_event(self.session_update_event)
+        if reconnection:
+            # If it's a reconnection, trigger a ReconnectedEvent
+            reconnect_event = ReconnectedEvent(
+                event_id=self._generate_event_id(), 
+                type="reconnect",
+            )
+            await self.on_message_received(json.dumps(reconnect_event.__dict__))  # Sending ReconnectedEvent as JSON string
+            logger.debug("RealtimeAIServiceManager: ReconnectedEvent sent.")
         logger.debug("RealtimeAIServiceManager: session.update event sent.")
 
     async def on_disconnected(self, status_code: int, reason: str):
@@ -179,6 +189,8 @@ def _get_event_class(self, event_type: str) -> Optional[Type[EventBase]]:
             "response.output_item.added": ResponseOutputItemAdded,
             "response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta,
             "response.function_call_arguments.done": ResponseFunctionCallArgumentsDone,
+            "input_audio_buffer.cleared": InputAudioBufferCleared,
+            "reconnected": ReconnectedEvent
         }
         return event_mapping.get(event_type)
 

diff --git a/src/realtime_ai/aio/web_socket_manager.py b/src/realtime_ai/aio/web_socket_manager.py
@@ -22,15 +22,21 @@ def __init__(self, options: RealtimeAIOptions, service_manager):
             "OpenAI-Beta": "realtime=v1",
         }
 
-    async def connect(self):
+        self.reconnect_delay = 5 # Time to wait before attempting to reconnect, in seconds
+
+    async def connect(self, reconnection=False):
         """
         Establishes a WebSocket connection.
         """
         try:
+            if self.websocket and self.websocket.open:
+                logger.info("WebSocketManager: Already connected.")
+                return
+
             logger.info(f"WebSocketManager: Connecting to {self.url}")
             self.websocket = await websockets.connect(self.url, extra_headers=self.headers)
             logger.info("WebSocketManager: WebSocket connection established.")
-            await self.service_manager.on_connected()
+            await self.service_manager.on_connected(reconnection=reconnection)
 
             asyncio.create_task(self._receive_messages())  # Begin listening as a separate task
         except Exception as e:
@@ -43,6 +49,11 @@ async def _receive_messages(self):
         try:
             async for message in self.websocket:
                 await self.service_manager.on_message_received(message)
+                logger.debug(f"WebSocketManager: Received message: {message}")
+                if "session_expired" in message and "maximum duration of 15 minutes" in message:
+                    logger.info("WebSocketManager: Reconnecting due to maximum duration reached.")
+                    await asyncio.sleep(self.reconnect_delay)
+                    await self.connect(reconnection=True)
         except websockets.exceptions.ConnectionClosed as e:
             logger.warning(f"WebSocketManager: Connection closed during receive: {e.code} - {e.reason}")
             await self.service_manager.on_disconnected(e.code, e.reason)

diff --git a/src/realtime_ai/models/realtime_ai_events.py b/src/realtime_ai/models/realtime_ai_events.py
@@ -150,3 +150,11 @@ class ResponseFunctionCallArgumentsDone(EventBase):
     output_index: int
     call_id: str
     arguments: str
+
+@dataclass
+class InputAudioBufferCleared(EventBase):
+    pass
+
+@dataclass
+class ReconnectedEvent(EventBase):
+    pass
diff --git a/src/realtime_ai/models/realtime_ai_options.py b/src/realtime_ai/models/realtime_ai_options.py
@@ -24,6 +24,7 @@ class RealtimeAIOptions:
     tool_choice: str = "auto"
     temperature: float = 0.8
     max_output_tokens: Optional[int] = None
+    enable_auto_reconnect: bool = False
 
     def __post_init__(self):
         self.validate_options()

diff --git a/src/realtime_ai/realtime_ai_service_manager.py b/src/realtime_ai/realtime_ai_service_manager.py
@@ -31,7 +31,9 @@
     InputAudioBufferSpeechStarted,
     ResponseOutputItemAdded,
     ResponseFunctionCallArgumentsDelta,
-    ResponseFunctionCallArgumentsDone
+    ResponseFunctionCallArgumentsDone,
+    InputAudioBufferCleared,
+    ReconnectedEvent
 )
 
 logger = logging.getLogger(__name__)
@@ -94,9 +96,17 @@ def send_event(self, event: dict):
         except Exception as e:
             logger.error(f"RealtimeAIServiceManager: Failed to send event {event.get('type')}: {e}")
 
-    def on_connected(self):
+    def on_connected(self, reconnection: bool = False):
         logger.info("RealtimeAIServiceManager: WebSocket connected.")
         self.send_event(self.session_update_event)
+        if reconnection:
+            # If it's a reconnection, trigger a ReconnectedEvent
+            reconnect_event = ReconnectedEvent(
+                event_id=self._generate_event_id(), 
+                type="reconnect",
+            )
+            self.on_message_received(json.dumps(reconnect_event.__dict__))  # Sending ReconnectedEvent as JSON string
+            logger.debug("RealtimeAIServiceManager: ReconnectedEvent sent.")
         logger.debug("RealtimeAIServiceManager: session.update event sent.")
 
     def on_disconnected(self, status_code: int, reason: str):
@@ -179,6 +189,8 @@ def _get_event_class(self, event_type: str) -> Optional[Type[EventBase]]:
             "response.output_item.added": ResponseOutputItemAdded,
             "response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta,
             "response.function_call_arguments.done": ResponseFunctionCallArgumentsDone,
+            "input_audio_buffer.cleared": InputAudioBufferCleared,
+            "reconnected": ReconnectedEvent
         }
         return event_mapping.get(event_type)
 

diff --git a/src/realtime_ai/web_socket_manager.py b/src/realtime_ai/web_socket_manager.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import threading
+import time
 import websocket  # pip install websocket-client
 from realtime_ai.models.realtime_ai_options import RealtimeAIOptions
 
@@ -23,12 +24,18 @@ def __init__(self, options : RealtimeAIOptions, service_manager):
 
         self.ws = None
         self._receive_thread = None
+        self.reconnect_delay = 5 # Time to wait before attempting to reconnect, in seconds
+        self.is_reconnection = False
 
     def connect(self):
         """
         Establishes a WebSocket connection.
         """
         try:
+            if self.ws and self.ws.sock and self.ws.sock.connected:
+                logger.info("WebSocketManager: Already connected.")
+                return
+
             logger.info(f"WebSocketManager: Connecting to {self.url}")
             self.ws = websocket.WebSocketApp(
                 self.url,
@@ -69,7 +76,15 @@ def send(self, message: dict):
 
     def _on_open(self, ws):
         logger.info("WebSocketManager: WebSocket connection opened.")
-        self.service_manager.on_connected()
+        if self.is_reconnection:
+            logger.info("WebSocketManager: Connection reopened (Reconnection).")
+            self.service_manager.on_connected(reconnection=True)
+            self.is_reconnection = False
+        else:
+            logger.info("WebSocketManager: Connection opened (Initial).")
+            self.service_manager.on_connected()
+
+        self.is_reconnection = False 
 
     def _on_message(self, ws, message):
         logger.debug(f"WebSocketManager: Received message: {message}")
@@ -83,3 +98,14 @@ def _on_close(self, ws, close_status_code, close_msg):
         logger.warning(f"WebSocketManager: WebSocket connection closed: {close_status_code} - {close_msg}")
         self.service_manager.on_disconnected(close_status_code, close_msg)
 
+        # If the session ended due to maximum duration, attempt to reconnect
+        if close_status_code == 1001 and "maximum duration of 15 minutes" in close_msg:
+            logger.debug("WebSocketManager: Session ended due to maximum duration. Reconnecting...")
+            if self.options.enable_auto_reconnect:
+                self._schedule_reconnect()
+
+    def _schedule_reconnect(self):
+        logger.info("WebSocketManager: Scheduling reconnection...")
+        time.sleep(self.reconnect_delay)
+        self.is_reconnection = True
+        self.connect()