Skip to content

Commit

Permalink
Merge pull request #16 from jhakulin/jhakulin/azure-support
Browse files Browse the repository at this point in the history
Jhakulin/azure support
  • Loading branch information
jhakulin authored Dec 6, 2024
2 parents 65fc36e + 17f833e commit 099e140
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 25 deletions.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,16 @@ audio_capture.start()
- After that go to generated `dist` folder and install the generated wheel using following command: `pip install --force-reinstall realtime_ai-0.1.0-py3-none-any.whl`

2. **Setup**:
- Replace placeholders like `"OPENAI_API_KEY"` in the sample script with real information.
- Check system microphone access and settings to align with the project's audio requirements (e.g., 16bit PCM 24kHz mono).
- You need to setup following environment variables in order to use the service.

- **OPEN_AI**
- export OPENAI_API_KEY="Your OpenAI Key"
- Check system microphone access and settings to align with the project's audio requirements (e.g., 16bit PCM 24kHz mono).

- **AZURE_OPEN_AI**
- export AZURE_OPENAI_API_KEY="Your Azure OpenAI Key"
- export AZURE_OPENAI_ENDPOINT="Your Azure OpenAI Endpoint, shall be in the format: `wss://<service-name>.openai.azure.com/openai/realtime`"
- export AZURE_OPENAI_API_VERSION="Azure OpenAI version"

3. **Execution**:
- Run the script via command-line or an IDE:
Expand Down
37 changes: 30 additions & 7 deletions samples/async/sample_realtime_ai_with_keyword_and_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
logging.getLogger("utils.audio_playback").setLevel(logging.ERROR)
logging.getLogger("utils.audio_capture").setLevel(logging.ERROR)
logging.getLogger("utils.vad").setLevel(logging.ERROR)
logging.getLogger("realtime_ai").setLevel(logging.INFO)
logging.getLogger("realtime_ai").setLevel(logging.ERROR)

# Root logger for general logging
logger = logging.getLogger()
Expand Down Expand Up @@ -336,6 +336,28 @@ def get_vad_configuration(use_server_vad=False):
return None # Local VAD typically requires no special configuration


def get_openai_configuration():
# The Azure endpoint shall be in the format: "wss://<service-name>.openai.azure.com/openai/realtime"
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_key = None
azure_api_version = None

if not azure_endpoint:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
logger.error("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
return None, None, None
else:
api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-10-01-preview")

if not api_key or not azure_endpoint or not azure_api_version:
logger.error("Please set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_API_VERSION environment variables.")
return None, None, None

return azure_endpoint, api_key, azure_api_version


async def main():
"""
Main function to initialize and run the audio processing and realtime client asynchronously.
Expand All @@ -345,27 +367,28 @@ async def main():
audio_capture = None

try:
# Retrieve OpenAI API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")

azure_openai_endpoint, api_key, azure_api_version = get_openai_configuration()
if not api_key:
logger.error("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
return

functions = FunctionTool(functions=user_functions)

# Define RealtimeOptions
options = RealtimeAIOptions(
api_key=api_key,
model="gpt-4o-realtime-preview-2024-10-01",
model="gpt-4o-realtime-preview",
modalities=["audio", "text"],
instructions="You are a helpful assistant. Respond concisely. You have access to a variety of tools to analyze, translate and review text and code.",
turn_detection=get_vad_configuration(use_server_vad=False),
tools=functions.definitions,
tool_choice="auto",
temperature=0.8,
max_output_tokens=None,
voice="sage",
enable_auto_reconnect=True
voice="echo",
enable_auto_reconnect=True,
azure_openai_endpoint=azure_openai_endpoint,
azure_openai_api_version=azure_api_version
)

# Define AudioStreamOptions
Expand Down
33 changes: 28 additions & 5 deletions samples/sample_realtime_ai_with_keyword_and_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,28 @@ def get_vad_configuration(use_server_vad=False):
return None # Local VAD typically requires no special configuration


def get_openai_configuration():
# The Azure endpoint shall be in the format: "wss://<service-name>.openai.azure.com/openai/realtime"
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
api_key = None
azure_api_version = None

if not azure_endpoint:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
logger.error("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
return None, None, None
else:
api_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-10-01-preview")

if not api_key or not azure_endpoint or not azure_api_version:
logger.error("Please set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_API_VERSION environment variables.")
return None, None, None

return azure_endpoint, api_key, azure_api_version


def main():
"""
Main function to initialize and run the audio processing and realtime client asynchronously.
Expand All @@ -322,27 +344,28 @@ def main():
audio_capture = None

try:
# Retrieve OpenAI API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")

azure_openai_endpoint, api_key, azure_api_version = get_openai_configuration()
if not api_key:
logger.error("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
return

functions = FunctionTool(functions=user_functions)

# Define RealtimeOptions
options = RealtimeAIOptions(
api_key=api_key,
model="gpt-4o-realtime-preview-2024-10-01",
model="gpt-4o-realtime-preview",
modalities=["audio", "text"],
instructions="You are a helpful assistant. Respond concisely. You have access to a variety of tools to analyze, translate and review text and code.",
turn_detection=get_vad_configuration(use_server_vad=False),
tools=functions.definitions,
tool_choice="auto",
temperature=0.8,
max_output_tokens=None,
voice="sage",
voice="echo",
enable_auto_reconnect=True,
azure_openai_endpoint=azure_openai_endpoint,
azure_openai_api_version=azure_api_version
)

# Define AudioStreamOptions
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="realtime-ai",
version="0.1.4",
version="0.1.5",
description="Python SDK for real-time audio processing with OpenAI's Realtime REST API.",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
Expand Down
22 changes: 22 additions & 0 deletions src/realtime_ai/aio/realtime_ai_service_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,28 @@ def parse_realtime_event(self, json_object: dict) -> Optional[EventBase]:
rate_limits_data = json_object['rate_limits']
rate_limits = [RateLimit(**rate) for rate in rate_limits_data]
return RateLimitsUpdated(event_id=json_object['event_id'], type=event_type, rate_limits=rate_limits)
elif event_type == "response.content_part.done":
# Ensure only relevant fields are passed
return ResponseContentPartDone(
event_id=json_object['event_id'],
type=event_type,
response_id=json_object.get('response_id'),
item_id=json_object.get('item_id'),
output_index=json_object.get('output_index'),
content_index=json_object.get('content_index'),
part=json_object.get('part')
)
elif event_type == "response.content_part.added":
# Ensure only relevant fields are passed
return ResponseContentPartAdded(
event_id=json_object['event_id'],
type=event_type,
response_id=json_object.get('response_id'),
item_id=json_object.get('item_id'),
output_index=json_object.get('output_index'),
content_index=json_object.get('content_index'),
part=json_object.get('part')
)
elif event_type == "response.function_call_arguments.done":
# Ensure only relevant fields are passed
return ResponseFunctionCallArgumentsDone(
Expand Down
20 changes: 15 additions & 5 deletions src/realtime_ai/aio/web_socket_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import websockets
import uuid
from realtime_ai.models.realtime_ai_options import RealtimeAIOptions

logger = logging.getLogger(__name__)
Expand All @@ -16,11 +17,20 @@ def __init__(self, options: RealtimeAIOptions, service_manager):
self.options = options
self.service_manager = service_manager
self.websocket = None
self.url = f"wss://api.openai.com/v1/realtime?model={self.options.model}"
self.headers = {
"Authorization": f"Bearer {self.options.api_key}",
"OpenAI-Beta": "realtime=v1",
}

if self.options.azure_openai_endpoint:
self.request_id = uuid.uuid4()
self.url = self.options.azure_openai_endpoint + f"?api-version={self.options.azure_openai_api_version}" + f"&deployment={self.options.model}"
self.headers = {
"x-ms-client-request-id": str(self.request_id),
"api-key": self.options.api_key,
}
else:
self.url = f"wss://api.openai.com/v1/realtime?model={self.options.model}"
self.headers = {
"Authorization": f"Bearer {self.options.api_key}",
"openai-beta": "realtime=v1",
}

self.reconnect_delay = 5 # Time to wait before attempting to reconnect, in seconds

Expand Down
2 changes: 2 additions & 0 deletions src/realtime_ai/models/realtime_ai_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class RealtimeAIOptions:
model: str
modalities: List[str]
instructions: str
azure_openai_endpoint: Optional[str] = None
azure_openai_api_version: Optional[str] = None
voice: str = "alloy"
input_audio_format: str = "pcm16"
output_audio_format: str = "pcm16"
Expand Down
22 changes: 22 additions & 0 deletions src/realtime_ai/realtime_ai_service_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,28 @@ def parse_realtime_event(self, json_object: dict) -> Optional[EventBase]:
rate_limits_data = json_object['rate_limits']
rate_limits = [RateLimit(**rate) for rate in rate_limits_data]
return RateLimitsUpdated(event_id=json_object['event_id'], type=event_type, rate_limits=rate_limits)
elif event_type == "response.content_part.done":
# Ensure only relevant fields are passed
return ResponseContentPartDone(
event_id=json_object['event_id'],
type=event_type,
response_id=json_object.get('response_id'),
item_id=json_object.get('item_id'),
output_index=json_object.get('output_index'),
content_index=json_object.get('content_index'),
part=json_object.get('part')
)
elif event_type == "response.content_part.added":
# Ensure only relevant fields are passed
return ResponseContentPartAdded(
event_id=json_object['event_id'],
type=event_type,
response_id=json_object.get('response_id'),
item_id=json_object.get('item_id'),
output_index=json_object.get('output_index'),
content_index=json_object.get('content_index'),
part=json_object.get('part')
)
elif event_type == "response.function_call_arguments.done":
# Ensure only relevant fields are passed
return ResponseFunctionCallArgumentsDone(
Expand Down
20 changes: 15 additions & 5 deletions src/realtime_ai/web_socket_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import threading
import time
import uuid
import websocket # pip install websocket-client
from realtime_ai.models.realtime_ai_options import RealtimeAIOptions

Expand All @@ -16,11 +17,20 @@ class WebSocketManager:
def __init__(self, options : RealtimeAIOptions, service_manager):
self.options = options
self.service_manager = service_manager
self.url = f"wss://api.openai.com/v1/realtime?model={self.options.model}"
self.headers = [
f"Authorization: Bearer {self.options.api_key}",
"OpenAI-Beta: realtime=v1"
]

if self.options.azure_openai_endpoint:
self.request_id = uuid.uuid4()
self.url = self.options.azure_openai_endpoint + f"?api-version={self.options.azure_openai_api_version}" + f"&deployment={self.options.model}"
self.headers = {
"x-ms-client-request-id": str(self.request_id),
"api-key": self.options.api_key,
}
else:
self.url = f"wss://api.openai.com/v1/realtime?model={self.options.model}"
self.headers = {
"Authorization": f"Bearer {self.options.api_key}",
"openai-beta": "realtime=v1",
}

self.ws = None
self._receive_thread = None
Expand Down

0 comments on commit 099e140

Please sign in to comment.