Skip to content

Commit

Permalink
Merge pull request #15 from jhakulin/jhakulin/reconnect
Browse files Browse the repository at this point in the history
Add reconnect configuration
  • Loading branch information
jhakulin authored Dec 1, 2024
2 parents f31c2ec + bdedff0 commit 65fc36e
Show file tree
Hide file tree
Showing 11 changed files with 557 additions and 10 deletions.
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ options = RealtimeAIOptions(
tools=functions.definitions,
tool_choice="auto",
temperature=0.8,
max_output_tokens=None
max_output_tokens=None,
voice="sage",
enable_auto_reconnect=True,
)

# Define AudioStreamOptions (currently only 16bit PCM 24kHz mono is supported)
Expand Down Expand Up @@ -131,6 +133,20 @@ For example, the Lenovo ThinkPad P16S has been tested and provides a reliable co
- Click **OK** to exit the Microphone Properties dialog.
- Click **OK** in the Sound settings window to close it.

### Audio Configuration on Mac

1. **Install the PyAudio**:

If you encounter installation problems in Mac, ensure you have installed portaudio by `brew install portaudio` first.

2. **Install the SSL certificates**:

If you encounter SSL certification problems when running the samples, install certificates via `/Applications/Python 3.x/Install Certificates.command`

3. **Audio Echo Cancellation**:

If your Mac do not have integrated audio echo cancellation, using e.g. AirPods is recommended to prevent assistant voice leaking into microphone input.

### Alternative Audio Options

If you encounter issues with audio echo that cannot be resolved through configuration changes, consider using a headset with an integrated microphone and speakers. This setup naturally avoids problems with echo, as the audio output from the speakers is isolated from the microphone input. This can provide a more seamless audio experience without relying on device-based audio echo cancellation.
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
https://github.com/jhakulin/realtime-ai/releases/download/v0.1.3/realtime_ai-0.1.3-py3-none-any.whl
https://github.com/jhakulin/realtime-ai/releases/download/v0.1.4/realtime_ai-0.1.4-py3-none-any.whl
pyaudio
numpy
websockets
websocket-client
azure-cognitiveservices-speech
scipy
453 changes: 453 additions & 0 deletions samples/async/sample_realtime_ai_with_keyword_and_vad.py

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions samples/sample_realtime_ai_with_keyword_and_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ def on_error(self, event: ErrorEvent):
def on_input_audio_buffer_speech_stopped(self, event: InputAudioBufferSpeechStopped):
logger.info(f"Server VAD: Speech stopped at {event.audio_end_ms}ms, Item ID: {event.item_id}")

def on_input_audio_buffer_cleared(self, event: InputAudioBufferCleared):
logger.info("Input audio buffer cleared.")

def on_reconnected(self, event: ReconnectedEvent):
logger.info("Reconnected...")

def on_input_audio_buffer_committed(self, event: InputAudioBufferCommitted):
logger.debug(f"Audio Buffer Committed: {event.item_id}")

Expand Down Expand Up @@ -336,6 +342,7 @@ def main():
temperature=0.8,
max_output_tokens=None,
voice="sage",
enable_auto_reconnect=True,
)

# Define AudioStreamOptions
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="realtime-ai",
version="0.1.3",
version="0.1.4",
description="Python SDK for real-time audio processing with OpenAI's Realtime REST API.",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
Expand Down
16 changes: 14 additions & 2 deletions src/realtime_ai/aio/realtime_ai_service_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
InputAudioBufferSpeechStarted,
ResponseOutputItemAdded,
ResponseFunctionCallArgumentsDelta,
ResponseFunctionCallArgumentsDone
ResponseFunctionCallArgumentsDone,
InputAudioBufferCleared,
ReconnectedEvent
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -90,10 +92,18 @@ async def send_event(self, event: dict):
except Exception as e:
logger.error(f"RealtimeAIServiceManager: Failed to send event {event.get('type')}: {e}")

async def on_connected(self):
async def on_connected(self, reconnection: bool = False):
self.is_connected = True
logger.info("RealtimeAIServiceManager: Connected to WebSocket.")
await self.send_event(self.session_update_event)
if reconnection:
# If it's a reconnection, trigger a ReconnectedEvent
reconnect_event = ReconnectedEvent(
event_id=self._generate_event_id(),
type="reconnect",
)
await self.on_message_received(json.dumps(reconnect_event.__dict__)) # Sending ReconnectedEvent as JSON string
logger.debug("RealtimeAIServiceManager: ReconnectedEvent sent.")
logger.debug("RealtimeAIServiceManager: session.update event sent.")

async def on_disconnected(self, status_code: int, reason: str):
Expand Down Expand Up @@ -179,6 +189,8 @@ def _get_event_class(self, event_type: str) -> Optional[Type[EventBase]]:
"response.output_item.added": ResponseOutputItemAdded,
"response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta,
"response.function_call_arguments.done": ResponseFunctionCallArgumentsDone,
"input_audio_buffer.cleared": InputAudioBufferCleared,
"reconnected": ReconnectedEvent
}
return event_mapping.get(event_type)

Expand Down
15 changes: 13 additions & 2 deletions src/realtime_ai/aio/web_socket_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,21 @@ def __init__(self, options: RealtimeAIOptions, service_manager):
"OpenAI-Beta": "realtime=v1",
}

async def connect(self):
self.reconnect_delay = 5 # Time to wait before attempting to reconnect, in seconds

async def connect(self, reconnection=False):
"""
Establishes a WebSocket connection.
"""
try:
if self.websocket and self.websocket.open:
logger.info("WebSocketManager: Already connected.")
return

logger.info(f"WebSocketManager: Connecting to {self.url}")
self.websocket = await websockets.connect(self.url, extra_headers=self.headers)
logger.info("WebSocketManager: WebSocket connection established.")
await self.service_manager.on_connected()
await self.service_manager.on_connected(reconnection=reconnection)

asyncio.create_task(self._receive_messages()) # Begin listening as a separate task
except Exception as e:
Expand All @@ -43,6 +49,11 @@ async def _receive_messages(self):
try:
async for message in self.websocket:
await self.service_manager.on_message_received(message)
logger.debug(f"WebSocketManager: Received message: {message}")
if "session_expired" in message and "maximum duration of 15 minutes" in message:
logger.info("WebSocketManager: Reconnecting due to maximum duration reached.")
await asyncio.sleep(self.reconnect_delay)
await self.connect(reconnection=True)
except websockets.exceptions.ConnectionClosed as e:
logger.warning(f"WebSocketManager: Connection closed during receive: {e.code} - {e.reason}")
await self.service_manager.on_disconnected(e.code, e.reason)
Expand Down
8 changes: 8 additions & 0 deletions src/realtime_ai/models/realtime_ai_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,11 @@ class ResponseFunctionCallArgumentsDone(EventBase):
output_index: int
call_id: str
arguments: str

@dataclass
class InputAudioBufferCleared(EventBase):
pass

@dataclass
class ReconnectedEvent(EventBase):
pass
1 change: 1 addition & 0 deletions src/realtime_ai/models/realtime_ai_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class RealtimeAIOptions:
tool_choice: str = "auto"
temperature: float = 0.8
max_output_tokens: Optional[int] = None
enable_auto_reconnect: bool = False

def __post_init__(self):
self.validate_options()
Expand Down
16 changes: 14 additions & 2 deletions src/realtime_ai/realtime_ai_service_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
InputAudioBufferSpeechStarted,
ResponseOutputItemAdded,
ResponseFunctionCallArgumentsDelta,
ResponseFunctionCallArgumentsDone
ResponseFunctionCallArgumentsDone,
InputAudioBufferCleared,
ReconnectedEvent
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -94,9 +96,17 @@ def send_event(self, event: dict):
except Exception as e:
logger.error(f"RealtimeAIServiceManager: Failed to send event {event.get('type')}: {e}")

def on_connected(self):
def on_connected(self, reconnection: bool = False):
logger.info("RealtimeAIServiceManager: WebSocket connected.")
self.send_event(self.session_update_event)
if reconnection:
# If it's a reconnection, trigger a ReconnectedEvent
reconnect_event = ReconnectedEvent(
event_id=self._generate_event_id(),
type="reconnect",
)
self.on_message_received(json.dumps(reconnect_event.__dict__)) # Sending ReconnectedEvent as JSON string
logger.debug("RealtimeAIServiceManager: ReconnectedEvent sent.")
logger.debug("RealtimeAIServiceManager: session.update event sent.")

def on_disconnected(self, status_code: int, reason: str):
Expand Down Expand Up @@ -179,6 +189,8 @@ def _get_event_class(self, event_type: str) -> Optional[Type[EventBase]]:
"response.output_item.added": ResponseOutputItemAdded,
"response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta,
"response.function_call_arguments.done": ResponseFunctionCallArgumentsDone,
"input_audio_buffer.cleared": InputAudioBufferCleared,
"reconnected": ReconnectedEvent
}
return event_mapping.get(event_type)

Expand Down
28 changes: 27 additions & 1 deletion src/realtime_ai/web_socket_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
import threading
import time
import websocket # pip install websocket-client
from realtime_ai.models.realtime_ai_options import RealtimeAIOptions

Expand All @@ -23,12 +24,18 @@ def __init__(self, options : RealtimeAIOptions, service_manager):

self.ws = None
self._receive_thread = None
self.reconnect_delay = 5 # Time to wait before attempting to reconnect, in seconds
self.is_reconnection = False

def connect(self):
"""
Establishes a WebSocket connection.
"""
try:
if self.ws and self.ws.sock and self.ws.sock.connected:
logger.info("WebSocketManager: Already connected.")
return

logger.info(f"WebSocketManager: Connecting to {self.url}")
self.ws = websocket.WebSocketApp(
self.url,
Expand Down Expand Up @@ -69,7 +76,15 @@ def send(self, message: dict):

def _on_open(self, ws):
logger.info("WebSocketManager: WebSocket connection opened.")
self.service_manager.on_connected()
if self.is_reconnection:
logger.info("WebSocketManager: Connection reopened (Reconnection).")
self.service_manager.on_connected(reconnection=True)
self.is_reconnection = False
else:
logger.info("WebSocketManager: Connection opened (Initial).")
self.service_manager.on_connected()

self.is_reconnection = False

def _on_message(self, ws, message):
logger.debug(f"WebSocketManager: Received message: {message}")
Expand All @@ -83,3 +98,14 @@ def _on_close(self, ws, close_status_code, close_msg):
logger.warning(f"WebSocketManager: WebSocket connection closed: {close_status_code} - {close_msg}")
self.service_manager.on_disconnected(close_status_code, close_msg)

# If the session ended due to maximum duration, attempt to reconnect
if close_status_code == 1001 and "maximum duration of 15 minutes" in close_msg:
logger.debug("WebSocketManager: Session ended due to maximum duration. Reconnecting...")
if self.options.enable_auto_reconnect:
self._schedule_reconnect()

def _schedule_reconnect(self):
logger.info("WebSocketManager: Scheduling reconnection...")
time.sleep(self.reconnect_delay)
self.is_reconnection = True
self.connect()

0 comments on commit 65fc36e

Please sign in to comment.