diff --git a/.env.sample b/.env.sample index 62e8d5bd..cacd4371 100644 --- a/.env.sample +++ b/.env.sample @@ -65,7 +65,6 @@ AZURE_OPENAI_STT_TTS_KEY=your-azure-speech-service-key # Optio # Azure Voice Live Integration (Optional - for Azure Voice Live API) # ============================================================================ AZURE_VOICE_LIVE_ENDPOINT=https://your-voice-live-endpoint.voice.azure.com/ # Optional: Azure Voice Live API endpoint -AZURE_VOICE_LIVE_KEY=optional-key # Optional: Azure Voice Live API key AZURE_VOICE_LIVE_API_KEY=your-voice-live-api-key # Optional: Alternative API key name AZURE_VOICE_LIVE_MODEL=gpt-4o # Optional: Voice Live model deployment (default: gpt-4o) AZURE_VOICE_LIVE_API_VERSION=2024-10-01-preview # Optional: Voice Live API version @@ -73,7 +72,7 @@ AZURE_VOICE_LIVE_API_VERSION=2024-10-01-preview # Optio # Azure AI Foundry Integration (Optional) AZURE_AI_FOUNDRY_ENDPOINT=https://your-foundry-endpoint.services.ai.azure.com/api/projects/your-project # Optional: AI Foundry project endpoint AI_FOUNDRY_PROJECT_NAME=your-ai-foundry-project # Optional: AI Foundry project name - +AI_FOUNDRY_AGENT_ID=your-ai-foundry-agent-id # Optional: AI Foundry agent ID # ============================================================================ # Base URL Configuration (Required for Webhooks) # ============================================================================ diff --git a/.github/workflows/README.md b/.github/workflows/README.md index a9b4c52e..8921b132 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -126,7 +126,7 @@ Customize where Terraform state is stored: - **Manual Approval**: Staging/prod require manual triggers - **Secret Management**: Azure Key Vault for application secrets -## � Monitoring & Troubleshooting +## Monitoring & Troubleshooting ### Workflow Monitoring - **GitHub Actions**: Check Actions tab for deployment status diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 792ebd75..25fe01d1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,84 +4,90 @@ on: push: branches: - main - - feature/improve_docs # Deploy from your current branch too - pull_request: - branches: - - main - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'requirements-docs.txt' + - '.github/workflows/docs.yml' + workflow_dispatch: +# Permissions for GitHub Actions deployment permissions: contents: read pages: write id-token: write -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. concurrency: group: "pages" cancel-in-progress: false jobs: - build: + # build: + # runs-on: ubuntu-latest + # steps: + # - name: Checkout + # uses: actions/checkout@v4 + # with: + # fetch-depth: 0 + + # - name: Setup Python + # uses: actions/setup-python@v5 + # with: + # python-version: '3.11' + + # - name: Install documentation dependencies + # run: | + # python -m pip install --upgrade pip + # pip install -r requirements-docs.txt + + # - name: Install minimal project dependencies + # run: | + # pip install fastapi pydantic uvicorn starlette + # continue-on-error: true + + # - name: Setup Pages + # id: pages + # uses: actions/configure-pages@v4 + + # - name: Build documentation + # run: | + # mkdocs build --clean --strict + # touch ./site/.nojekyll + # env: + # AZURE_SPEECH_KEY: "dummy-key-for-docs" + # AZURE_SPEECH_REGION: "eastus" + + # - name: Upload artifact + # uses: actions/upload-pages-artifact@v3 + # with: + # path: ./site + + deploy: + environment: + name: github-pages runs-on: ubuntu-latest + # needs: build + permissions: + pages: write + id-token: write steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 + persist-credentials: false # critical: disables GITHUB_TOKEN for git + - name: Setup Python uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Cache dependencies - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Install documentation dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements-docs.txt - - - name: Install project dependencies (for mkdocstrings) - run: | - pip install -r requirements.txt - # Install the project in editable mode so mkdocstrings can import it - pip install -e . - - - name: Setup Pages - id: pages - uses: actions/configure-pages@v4 - - - name: Build documentation - run: | - mkdocs build --clean --strict + - name: Deploy docs + uses: mhausenblas/mkdocs-deploy-gh-pages@1.26 env: - # Set a dummy Azure Speech key for documentation build + GITHUB_TOKEN: ${{ secrets.PERSONAL_TOKEN }} + CONFIG_FILE: mkdocs.yml + REQUIREMENTS: requirements-docs.txt + # Set dummy Azure env vars for build AZURE_SPEECH_KEY: "dummy-key-for-docs" - AZURE_SPEECH_REGION: "eastus" - - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - path: ./site - -# deploy: -# if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/feature/improve_docs' -# environment: -# name: github-pages -# url: ${{ steps.deployment.outputs.page_url }} -# runs-on: ubuntu-latest -# needs: build -# permissions: -# pages: write -# id-token: write -# steps: -# - name: Deploy to GitHub Pages -# id: deployment -# uses: actions/deploy-pages@v3 + AZURE_SPEECH_REGION: "eastus" \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 060b3a4e..70d29c37 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,8 +22,8 @@ Create an issue for bugs, feature requests, or enhancements before starting work ### 2. Clone and Setup ```bash -git clone https://github.com/pablosalvador10/gbb-ai-audio-agent.git -cd gbb-ai-audio-agent +git clone https://github.com/Azure-Samples/art-voice-agent-accelerator.git +cd art-voice-agent-accelerator ``` ### 3. Environment Setup diff --git a/Makefile b/Makefile index 2bd6b29b..82fe4260 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ ############################################################ -# Makefile for gbb-ai-audio-agent +# Makefile for art-voice-agent-accelerator # Purpose: Manage code quality, environment, and app tasks # Each target is documented for clarity and maintainability ############################################################ @@ -124,14 +124,15 @@ run_load_test: $(eval CONCURRENT ?= 20) $(eval RECORD ?= ) $(eval RECORD_RATE ?= 0.2) - @python $(SCRIPTS_LOAD_DIR)/detailed_statistics_analyzer.py \ - --url $(URL) \ - --turns $(TURNS) \ - --conversations $(CONVERSATIONS) \ - --concurrent $(CONCURRENT) \ - $(if $(RECORD),--record) \ - $(if $(RECORD_RATE),--record-rate $(RECORD_RATE)) \ - $(EXTRA_ARGS) + @locust -f $(SCRIPTS_LOAD_DIR)/locustfile.py --headless -u $(CONVERSATIONS) -r $(CONCURRENT) --run-time 10m --host $(URL) --stop-timeout 60 --csv=locust_report --only-summary +# @python $(SCRIPTS_LOAD_DIR)/detailed_statistics_analyzer.py \ +# --url $(URL) \ +# --turns $(TURNS) \ +# --conversations $(CONVERSATIONS) \ +# --concurrent $(CONCURRENT) \ +# $(if $(RECORD),--record) \ +# $(if $(RECORD_RATE),--record-rate $(RECORD_RATE)) \ +# $(EXTRA_ARGS) # Conversation Analysis Targets list-conversations: @@ -142,484 +143,11 @@ FILE_TO_ANALYZE = tests\load\results\recorded_conversations_20250829_085350.json playback-conversations: python $(SCRIPTS_LOAD_DIR)/conversation_playback.py --conversation-file $(FILE_TO_ANALYZE) -# ACS call load testing (real phone calls - requires phone numbers) -run_acs_call_load_test: - @echo "⚠️ WARNING: This will initiate real ACS phone calls!" - @echo "⚠️ Make sure you have test numbers and sufficient credits!" - @echo "⚠️ Press Ctrl+C to cancel in the next 5 seconds..." - @sleep 5 - python $(SCRIPTS_LOAD_DIR)/acs_call_load_test.py - -# Development ACS testing (single call to specified phone) -run_acs_dev_test: - python $(SCRIPTS_LOAD_DIR)/acs_call_load_test_dev.py --environment dev --target-phones $(PHONE) - -# Staging ACS testing (5 calls) -run_acs_staging_test: - python $(SCRIPTS_LOAD_DIR)/acs_call_load_test_dev.py --environment staging --target-phones $(PHONE) - -############################################################ -# Azure App Service Deployment Artifacts -# Purpose: Generate build artifacts and deployment packages -############################################################ - -# Directories and files to include in backend deployment -BACKEND_DIRS = src utils apps/rtagent/backend -BACKEND_FILES = requirements.txt .deploy/runtime.txt .deploy/.python-version -EXCLUDE_PATTERNS = __pycache__ *.pyc .pytest_cache *.log .coverage htmlcov .DS_Store .git node_modules *.tmp *.temp dist .env -DEPLOY_DIR = .deploy/backend -TIMESTAMP = $(shell date +%Y%m%d_%H%M%S) -GIT_HASH = $(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") -DEPLOY_ZIP = backend_deployment_$(GIT_HASH)_$(TIMESTAMP).zip - -# Generate frontend deployment artifacts -generate_frontend_deployment: - @echo "🏗️ Generating Frontend Deployment Artifacts" - @echo "==============================================" - @echo "" - - # Clean and create deployment directory - @echo "🧹 Cleaning previous frontend deployment artifacts..." - @rm -rf .deploy/frontend - @mkdir -p .deploy/frontend - - # Copy frontend directory - @echo "📦 Copying frontend directory..." - @if [ -d "apps/rtagent/frontend" ]; then \ - rsync -av --exclude='node_modules' --exclude='.env' --exclude='.DS_Store' apps/rtagent/frontend/ .deploy/frontend/; \ - else \ - echo " ❌ Error: apps/rtagent/frontend directory not found."; \ - exit 1; \ - fi - - # Create deployment zip - @FRONTEND_DEPLOY_ZIP=frontend_deployment_$(GIT_HASH)_$(TIMESTAMP).zip; \ - echo "📦 Creating deployment zip: $$FRONTEND_DEPLOY_ZIP"; \ - cd .deploy/frontend && zip -rq "../$$FRONTEND_DEPLOY_ZIP" .; \ - echo ""; \ - echo "✅ Frontend deployment artifacts generated successfully!"; \ - echo "📊 Deployment Summary:"; \ - echo " 📁 Artifacts directory: .deploy/frontend"; \ - echo " 📦 Deployment package: .deploy/$$FRONTEND_DEPLOY_ZIP"; \ - echo " 📏 Package size: $$(du -h .deploy/$$FRONTEND_DEPLOY_ZIP | cut -f1)"; \ - echo " 🔢 Git commit: $(GIT_HASH)"; \ - echo " 🕐 Timestamp: $(TIMESTAMP)"; \ - echo ""; \ - echo "🚀 Ready for Azure App Service deployment!" - -# Generate backend deployment artifacts -generate_backend_deployment: - @echo "🏗️ Generating Backend Deployment Artifacts" - @echo "==============================================" - @echo "" - # Clean and create deployment directory - @echo "🧹 Cleaning previous deployment artifacts..." - @rm -rf $(DEPLOY_DIR) - @mkdir -p $(DEPLOY_DIR) - - # Copy backend directories with exclusions - @echo "📦 Copying backend directories..." - @echo "$(EXCLUDE_PATTERNS)" | tr ' ' '\n' > .deploy-excludes.tmp - @for dir in $(BACKEND_DIRS); do \ - if [ -d "$$dir" ]; then \ - echo " Copying: $$dir"; \ - rsync -av --exclude-from=.deploy-excludes.tmp "$$dir/" "$(DEPLOY_DIR)/$$dir/"; \ - else \ - echo " ⚠️ Warning: Directory not found: $$dir"; \ - fi \ - done - @rm -f .deploy-excludes.tmp - - # Copy required files - @echo "📄 Copying required files..." - @for file in $(BACKEND_FILES); do \ - if [ -f "$$file" ]; then \ - echo " Copying: $$file"; \ - mkdir -p "$(DEPLOY_DIR)/$$(dirname "$$file")"; \ - cp "$$file" "$(DEPLOY_DIR)/$$file"; \ - else \ - echo " ❌ Error: Required file missing: $$file"; \ - exit 1; \ - fi \ - done - - # Copy runtime files to root for Oryx detection, create if missing - @echo "🐍 Setting up Python runtime configuration..." - @if [ -f ".deploy/runtime.txt" ]; then \ - cp ".deploy/runtime.txt" "$(DEPLOY_DIR)/runtime.txt"; \ - echo " ✅ Copied runtime.txt to deployment root"; \ - else \ - echo "python-3.11" > "$(DEPLOY_DIR)/runtime.txt"; \ - echo " ⚠️ .deploy/runtime.txt not found, created default runtime.txt"; \ - fi - @if [ -f ".deploy/.python-version" ]; then \ - cp ".deploy/.python-version" "$(DEPLOY_DIR)/.python-version"; \ - echo " ✅ Copied .python-version to deployment root"; \ - else \ - echo "3.11" > "$(DEPLOY_DIR)/.python-version"; \ - echo " ⚠️ .deploy/.python-version not found, created default .python-version"; \ - fi - - # Create deployment zip - @echo "📦 Creating deployment zip: $(DEPLOY_ZIP)" - @cd $(DEPLOY_DIR) && zip -rq "../$(DEPLOY_ZIP)" . \ - $(foreach pattern,$(EXCLUDE_PATTERNS),-x "$(pattern)") - - # Show deployment summary - @echo "" - @echo "✅ Backend deployment artifacts generated successfully!" - @echo "📊 Deployment Summary:" - @echo " 📁 Artifacts directory: $(DEPLOY_DIR)" - @echo " 📦 Deployment package: .deploy/$(DEPLOY_ZIP)" - @echo " 📏 Package size: $$(du -h .deploy/$(DEPLOY_ZIP) | cut -f1)" - @echo " 🔢 Git commit: $(GIT_HASH)" - @echo " 🕐 Timestamp: $(TIMESTAMP)" - @echo "" - @echo "🚀 Ready for Azure App Service deployment!" - - -# Clean deployment artifacts -clean_deployment_artifacts: - @echo "🧹 Cleaning deployment artifacts..." - @rm -rf .deploy/backend - @rm -f .deploy/backend_deployment_*.zip - @echo "✅ Deployment artifacts cleaned" - - -# Show deployment package info -show_deployment_info: - @echo "📊 Deployment Package Information" - @echo "=================================" - @echo "" - @if [ -d "$(DEPLOY_DIR)" ]; then \ - echo "📁 Artifacts directory: $(DEPLOY_DIR)"; \ - echo "📄 Directory contents:"; \ - find $(DEPLOY_DIR) -type f | head -20 | sed 's/^/ /'; \ - echo ""; \ - else \ - echo "❌ No deployment artifacts found. Run 'make generate_backend_deployment' first."; \ - fi - @echo "📦 Available deployment packages:" - @ls -la .deploy/backend_deployment_*.zip 2>/dev/null | sed 's/^/ /' || echo " No deployment packages found" - - # Run pylint on all Python files (excluding tests), output to report file run_pylint: @echo "Running linter" find . -type f -name "*.py" ! -path "./tests/*" | xargs pylint -disable=logging-fstring-interpolation > utils/pylint_report/pylint_report.txt - -############################################################ -# Terraform State to Environment File -# Purpose: Extract values from Terraform remote state and create local .env file -############################################################ - -# Environment variables for Terraform state extraction -AZURE_ENV_NAME ?= dev -# Automatically set AZURE_SUBSCRIPTION_ID from Azure CLI if not provided -AZURE_SUBSCRIPTION_ID ?= $(shell az account show --query id -o tsv 2>/dev/null) -TF_DIR = infra/terraform -ENV_FILE = .env.$(AZURE_ENV_NAME) - -# Generate environment file from Terraform remote state outputs -generate_env_from_terraform: - @echo "🔧 Generating Environment File from Terraform State" - @echo "============================================================" - @./devops/scripts/generate-env-from-terraform.sh $(AZURE_ENV_NAME) $(AZURE_SUBSCRIPTION_ID) generate - -# Check if Terraform is initialized (now handled by script) -check_terraform_initialized: - @echo "⚠️ Note: Terraform initialization check is now handled by the generation script" - -# Show current environment file (if it exists) -show_env_file: - @./devops/scripts/generate-env-from-terraform.sh $(AZURE_ENV_NAME) $(AZURE_SUBSCRIPTION_ID) show - -# Extract sensitive values from Azure Key Vault and update environment file -update_env_with_secrets: - @echo "🔧 Updating Environment File with Key Vault Secrets" - @echo "============================================================" - @./devops/scripts/generate-env-from-terraform.sh $(AZURE_ENV_NAME) $(AZURE_SUBSCRIPTION_ID) update-secrets - -# Generate environment file from Terraform remote state outputs (PowerShell) -generate_env_from_terraform_ps: - @echo "🔧 Generating Environment File from Terraform State (PowerShell)" - @echo "============================================================" - @powershell -ExecutionPolicy Bypass -File devops/scripts/Generate-EnvFromTerraform.ps1 -EnvironmentName $(AZURE_ENV_NAME) -SubscriptionId $(AZURE_SUBSCRIPTION_ID) -Action generate - -# Show current environment file (PowerShell) -show_env_file_ps: - @powershell -ExecutionPolicy Bypass -File devops/scripts/Generate-EnvFromTerraform.ps1 -EnvironmentName $(AZURE_ENV_NAME) -SubscriptionId $(AZURE_SUBSCRIPTION_ID) -Action show - -# Update environment file with Key Vault secrets (PowerShell) -update_env_with_secrets_ps: - @echo "🔧 Updating Environment File with Key Vault Secrets (PowerShell)" - @echo "============================================================" - @powershell -ExecutionPolicy Bypass -File devops/scripts/Generate-EnvFromTerraform.ps1 -EnvironmentName $(AZURE_ENV_NAME) -SubscriptionId $(AZURE_SUBSCRIPTION_ID) -Action update-secrets - -# Deploy a user-provided directory to Azure Web App using Azure CLI -# Usage: make deploy_to_webapp WEBAPP_NAME= DEPLOY_DIR= -deploy_to_webapp: - @if [ -z "$(WEBAPP_NAME)" ]; then \ - if [ -f ".env" ]; then \ - WEBAPP_NAME_ENV=$$(grep '^BACKEND_APP_SERVICE_URL=' .env | cut -d'=' -f2 | sed 's|https\?://||;s|/.*||'); \ - if [ -n "$$WEBAPP_NAME_ENV" ]; then \ - echo "ℹ️ Using BACKEND_APP_SERVICE_URL from .env: $$WEBAPP_NAME_ENV"; \ - WEBAPP_NAME=$$WEBAPP_NAME_ENV; \ - else \ - echo "❌ WEBAPP_NAME not set and BACKEND_APP_SERVICE_URL not found in .env"; \ - exit 1; \ - fi \ - else \ - echo "❌ WEBAPP_NAME not set and .env file not found"; \ - exit 1; \ - fi \ - fi - @if [ -z "$(DEPLOY_DIR)" ]; then \ - echo "❌ Usage: make deploy_to_webapp WEBAPP_NAME= DEPLOY_DIR= AZURE_RESOURCE_GROUP="; \ - exit 1; \ - fi - @if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \ - if [ -f ".env" ]; then \ - RESOURCE_GROUP_ENV=$$(grep '^AZURE_RESOURCE_GROUP=' .env | cut -d'=' -f2); \ - if [ -n "$$RESOURCE_GROUP_ENV" ]; then \ - echo "ℹ️ Using AZURE_RESOURCE_GROUP from .env: $$RESOURCE_GROUP_ENV"; \ - AZURE_RESOURCE_GROUP=$$RESOURCE_GROUP_ENV; \ - else \ - echo "❌ AZURE_RESOURCE_GROUP not set and not found in .env"; \ - exit 1; \ - fi \ - else \ - echo "❌ AZURE_RESOURCE_GROUP not set and .env file not found"; \ - exit 1; \ - fi \ - fi - @echo "🚀 Deploying '$(DEPLOY_DIR)' to Azure Web App '$(WEBAPP_NAME)' in resource group '$(AZURE_RESOURCE_GROUP)'" - @echo "⏳ Note: Large deployments may take 10+ minutes. Please be patient..." - @echo "" - @echo "📊 Deployment Progress Monitor:" - @echo " 🌐 Azure Portal: https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)/deploymentCenter" - @echo " 📋 Deployment Logs: https://$(WEBAPP_NAME).scm.azurewebsites.net/api/deployments/latest/log" - @echo "" - @set -e; \ - if az webapp deploy --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME) --src-path $(DEPLOY_DIR) --type zip; then \ - DEPLOY_EXIT_CODE=$$?; \ - echo ""; \ - echo "⚠️ Deployment command timed out or encountered an issue (exit code: $$DEPLOY_EXIT_CODE)"; \ - echo ""; \ - if [ "$$DEPLOY_EXIT_CODE" = "124" ]; then \ - echo "🔄 TIMEOUT NOTICE: The deployment is likely still in progress in the background."; \ - echo " The 15-minute timeout is a safety measure to prevent hanging builds."; \ - echo ""; \ - fi; \ - echo "📋 Next Steps:"; \ - echo " 1. 🔍 Check deployment status in Azure Portal:"; \ - echo " https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)/deploymentCenter"; \ - echo ""; \ - echo " 2. 📊 Monitor real-time logs via VS Code:"; \ - echo " • Install 'Azure App Service' extension"; \ - echo " • Right-click on '$(WEBAPP_NAME)' → 'Start Streaming Logs'"; \ - echo " • Or use Command Palette: 'Azure App Service: Start Streaming Logs'"; \ - echo ""; \ - echo " 3. 🖥️ Monitor logs via Azure CLI:"; \ - echo " az webapp log tail --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME)"; \ - echo ""; \ - echo " 4. 🌐 Check deployment logs directly:"; \ - echo " https://$(WEBAPP_NAME).scm.azurewebsites.net/api/deployments/latest/log"; \ - echo ""; \ - echo " 5. 🔄 If deployment fails, retry with:"; \ - echo " make deploy_to_webapp WEBAPP_NAME=$(WEBAPP_NAME) DEPLOY_DIR=$(DEPLOY_DIR) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP)"; \ - echo ""; \ - echo "💡 Pro Tip: Large Node.js builds (like Vite) typically take 5-15 minutes."; \ - echo " The site may show 'Application Error' until build completes."; \ - exit $$DEPLOY_EXIT_CODE; \ - else \ - echo ""; \ - echo "✅ Deployment command completed successfully!"; \ - echo "🌐 Your app should be available at: https://$(WEBAPP_NAME).azurewebsites.net"; \ - echo ""; \ - echo "📋 Post-Deployment Verification:"; \ - echo " • Wait 2-3 minutes for app startup"; \ - echo " • Check app health: https://$(WEBAPP_NAME).azurewebsites.net/health (if available)"; \ - echo " • Monitor logs for any startup issues"; \ - echo ""; \ - fi - -# Deploy frontend to Azure Web App using Terraform outputs and deployment artifacts -deploy_frontend: - @echo "🚀 Deploying frontend to Azure Web App using Terraform outputs" - $(MAKE) generate_frontend_deployment - $(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw FRONTEND_APP_SERVICE_NAME 2>/dev/null)) - $(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null)) - $(eval DEPLOY_ZIP := $(shell ls -1t .deploy/frontend_deployment_*.zip 2>/dev/null | head -n1)) - @if [ -z "$(WEBAPP_NAME)" ]; then \ - echo "❌ Could not determine frontend web app name from Terraform outputs."; \ - exit 1; \ - fi - @if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \ - echo "❌ Could not determine resource group name from Terraform outputs."; \ - exit 1; \ - fi - @if [ -z "$(DEPLOY_ZIP)" ]; then \ - echo "❌ No frontend deployment zip found. Run 'make generate_frontend_deployment' first."; \ - exit 1; \ - fi - $(MAKE) deploy_to_webapp WEBAPP_NAME=$(WEBAPP_NAME) DEPLOY_DIR=$(DEPLOY_ZIP) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP) - -# Deploy backend to Azure Web App using Terraform outputs and deployment artifacts -deploy_backend: - @echo "🚀 Deploying backend to Azure Web App using Terraform outputs" - $(MAKE) generate_backend_deployment - $(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw BACKEND_APP_SERVICE_NAME 2>/dev/null)) - $(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null)) - $(eval DEPLOY_ZIP := $(shell ls -1t .deploy/backend_deployment_*.zip 2>/dev/null | head -n1)) - @if [ -z "$(WEBAPP_NAME)" ]; then \ - echo "❌ Could not determine backend web app name from Terraform outputs."; \ - exit 1; \ - fi - @if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \ - echo "❌ Could not determine resource group name from Terraform outputs."; \ - exit 1; \ - fi - @if [ -z "$(DEPLOY_ZIP)" ]; then \ - echo "❌ No backend deployment zip found. Run 'make generate_backend_deployment' first."; \ - exit 1; \ - fi - $(MAKE) deploy_to_webapp WEBAPP_NAME=$(WEBAPP_NAME) DEPLOY_DIR=$(DEPLOY_ZIP) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP) - -# Monitor deployment status and logs for any webapp -# Usage: make monitor_deployment WEBAPP_NAME= AZURE_RESOURCE_GROUP= -monitor_deployment: - @if [ -z "$(WEBAPP_NAME)" ]; then \ - echo "❌ Usage: make monitor_deployment WEBAPP_NAME= AZURE_RESOURCE_GROUP="; \ - exit 1; \ - fi - @if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \ - if [ -f ".env" ]; then \ - RESOURCE_GROUP_ENV=$$(grep '^AZURE_RESOURCE_GROUP=' .env | cut -d'=' -f2); \ - if [ -n "$$RESOURCE_GROUP_ENV" ]; then \ - echo "ℹ️ Using AZURE_RESOURCE_GROUP from .env: $$RESOURCE_GROUP_ENV"; \ - AZURE_RESOURCE_GROUP=$$RESOURCE_GROUP_ENV; \ - else \ - echo "❌ AZURE_RESOURCE_GROUP not set and not found in .env"; \ - exit 1; \ - fi \ - else \ - echo "❌ AZURE_RESOURCE_GROUP not set and .env file not found"; \ - exit 1; \ - fi \ - fi - @echo "📊 Monitoring Azure Web App: $(WEBAPP_NAME)" - @echo "=============================================" - @echo "" - @echo "🔍 App Service Status:" - @az webapp show --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME) --query "{name:name,state:state,defaultHostName:defaultHostName,lastModifiedTime:lastModifiedTime}" --output table 2>/dev/null || echo "❌ Could not retrieve app status" - @echo "" - @echo "📋 Recent Deployment Status:" - @az webapp deployment list --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME) --query "[0].{status:status,deploymentId:id,startTime:startTime,endTime:endTime,message:message}" --output table 2>/dev/null || echo "❌ Could not retrieve deployment status" - @echo "" - @echo "🌐 Useful Links:" - @echo " • App URL: https://$(WEBAPP_NAME).azurewebsites.net" - @echo " • Azure Portal: https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)" - @echo " • Deployment Center: https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)/deploymentCenter" - @echo " • Kudu Console: https://$(WEBAPP_NAME).scm.azurewebsites.net" - @echo " • Deployment Logs: https://$(WEBAPP_NAME).scm.azurewebsites.net/api/deployments/latest/log" - @echo "" - @echo "📊 VS Code Log Streaming:" - @echo " 1. Install 'Azure App Service' extension" - @echo " 2. Sign in to Azure account" - @echo " 3. Right-click '$(WEBAPP_NAME)' → 'Start Streaming Logs'" - @echo " 4. Or use Command Palette: 'Azure App Service: Start Streaming Logs'" - @echo "" - @echo "🖥️ CLI Log Streaming (run in separate terminal):" - @echo " az webapp log tail --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME)" - @echo "" - -# Stream logs for backend app service -monitor_backend_deployment: - @echo "📊 Monitoring Backend Deployment" - $(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw BACKEND_APP_SERVICE_NAME 2>/dev/null)) - $(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null)) - @if [ -z "$(WEBAPP_NAME)" ]; then \ - echo "❌ Could not determine backend web app name from Terraform outputs."; \ - exit 1; \ - fi - @if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \ - echo "❌ Could not determine resource group name from Terraform outputs."; \ - exit 1; \ - fi - $(MAKE) monitor_deployment WEBAPP_NAME=$(WEBAPP_NAME) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP) - -# Stream logs for frontend app service -monitor_frontend_deployment: - @echo "📊 Monitoring Frontend Deployment" - $(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw FRONTEND_APP_SERVICE_NAME 2>/dev/null)) - $(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null)) - @if [ -z "$(WEBAPP_NAME)" ]; then \ - echo "❌ Could not determine frontend web app name from Terraform outputs."; \ - exit 1; \ - fi - @if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \ - echo "❌ Could not determine resource group name from Terraform outputs."; \ - exit 1; \ - fi - $(MAKE) monitor_deployment WEBAPP_NAME=$(WEBAPP_NAME) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP) - -.PHONY: generate_env_from_terraform check_terraform_initialized show_env_file update_env_with_secrets generate_env_from_terraform_ps show_env_file_ps update_env_with_secrets_ps monitor_deployment monitor_backend_deployment monitor_frontend_deployment - - -############################################################ -# Azure Communication Services Phone Number Management -# Purpose: Purchase and manage ACS phone numbers -############################################################ - -# Purchase ACS phone number and store in environment file -# Usage: make purchase_acs_phone_number [ENV_FILE=custom.env] [COUNTRY_CODE=US] [AREA_CODE=833] [PHONE_TYPE=TOLL_FREE] -purchase_acs_phone_number: - @echo "📞 Azure Communication Services - Phone Number Purchase" - @echo "======================================================" - @echo "" - # Set default parameters - $(eval ENV_FILE ?= .env.$(AZURE_ENV_NAME)) - $(eval COUNTRY_CODE ?= US) - $(eval AREA_CODE ?= 866) - $(eval PHONE_TYPE ?= TOLL_FREE) - - # Extract ACS endpoint from environment file - @echo "🔍 Extracting ACS endpoint from $(ENV_FILE)" - $(eval ACS_ENDPOINT := $(shell grep '^ACS_ENDPOINT=' $(ENV_FILE) | cut -d'=' -f2)) - - @if [ -z "$(ACS_ENDPOINT)" ]; then \ - echo "❌ ACS_ENDPOINT not found in $(ENV_FILE). Please ensure the environment file contains ACS_ENDPOINT."; \ - exit 1; \ - fi - - @echo "📞 Creating a new ACS phone number using Python script..." - python3 devops/scripts/azd/helpers/acs_phone_number_manager.py --endpoint $(ACS_ENDPOINT) purchase --country $(COUNTRY_CODE) --area $(AREA_CODE) --phone-number-type $(PHONE_TYPE) - -# Purchase ACS phone number using PowerShell (Windows) -# Usage: make purchase_acs_phone_number_ps [ENV_FILE=custom.env] [COUNTRY_CODE=US] [AREA_CODE=833] [PHONE_TYPE=TOLL_FREE] -purchase_acs_phone_number_ps: - @echo "📞 Azure Communication Services - Phone Number Purchase (PowerShell)" - @echo "==================================================================" - @echo "" - - # Set default parameters - $(eval ENV_FILE ?= .env.$(AZURE_ENV_NAME)) - $(eval COUNTRY_CODE ?= US) - $(eval AREA_CODE ?= 866) - $(eval PHONE_TYPE ?= TOLL_FREE) - - # Execute the PowerShell script with parameters - @powershell -ExecutionPolicy Bypass -File devops/scripts/Purchase-AcsPhoneNumber.ps1 \ - -EnvFile "$(ENV_FILE)" \ - -AzureEnvName "$(AZURE_ENV_NAME)" \ - -CountryCode "$(COUNTRY_CODE)" \ - -AreaCode "$(AREA_CODE)" \ - -PhoneType "$(PHONE_TYPE)" \ - -TerraformDir "$(TF_DIR)" - - ############################################################ # Azure Redis Management # Purpose: Connect to Azure Redis using Azure AD authentication @@ -686,10 +214,10 @@ connect_redis: @echo " Username: $(USER_OBJECT_ID)" @echo " Password: [Azure Access Token]" @echo "" - @echo "� Debug: Using command:" + @echo " Debug: Using command:" @echo " redis-cli -h $(REDIS_HOST) -p $(REDIS_PORT) --tls -u $(USER_OBJECT_ID) -a [ACCESS_TOKEN]" @echo "" - @echo "�📝 Note: You are now connected to Redis. Use Redis commands as needed." + @echo "📝 Note: You are now connected to Redis. Use Redis commands as needed." @echo " Example commands: PING, INFO, KEYS *, GET , SET " @echo " Type 'quit' or 'exit' to disconnect." @echo "" @@ -779,7 +307,7 @@ test_redis_connection: # Show help information help: @echo "" - @echo "🛠️ gbb-ai-audio-agent Makefile" + @echo "🛠️ art-voice-agent-accelerator Makefile" @echo "==============================" @echo "" @echo "📋 Code Quality:" @@ -804,37 +332,6 @@ help: @echo "⚡ Load Testing:" @echo " generate_audio Generate PCM audio files for load testing" @echo " run_load_test Run WebSocket endpoint load testing (safe)" - @echo " run_acs_dev_test Run 1 ACS call to +8165019907 (development)" - @echo " run_acs_staging_test Run 5 ACS calls (staging environment)" - @echo " run_acs_prod_test Run 20 ACS calls (production testing)" - @echo " show_acs_test_config Show ACS test configurations without running" - @echo "" - @echo "📦 Deployment Artifacts:" - @echo " generate_backend_deployment Generate backend deployment artifacts and zip" - @echo " generate_frontend_deployment Generate frontend deployment artifacts and zip" - @echo " clean_deployment_artifacts Clean deployment artifacts" - @echo " show_deployment_info Show deployment package information" - @echo "" - @echo "🌐 Azure Web App Deployment:" - @echo " deploy_backend Deploy backend to Azure App Service (using Terraform outputs)" - @echo " deploy_frontend Deploy frontend to Azure App Service (using Terraform outputs)" - @echo " deploy_to_webapp Generic Web App deployment (manual parameters)" - @echo " monitor_deployment Monitor any webapp deployment status and logs" - @echo " monitor_backend_deployment Monitor backend deployment (using Terraform outputs)" - @echo " monitor_frontend_deployment Monitor frontend deployment (using Terraform outputs)" - @echo "" - @echo "🏗️ Terraform Environment Management:" - @echo " generate_env_from_terraform Generate .env file from Terraform state (Bash)" - @echo " generate_env_from_terraform_ps Generate .env file from Terraform state (PowerShell)" - @echo " show_env_file Display current environment file info (Bash)" - @echo " show_env_file_ps Display current environment file info (PowerShell)" - @echo " update_env_with_secrets Update .env file with Key Vault secrets (Bash)" - @echo " update_env_with_secrets_ps Update .env file with Key Vault secrets (PowerShell)" - @echo " check_terraform_initialized Check if Terraform is properly initialized" - @echo "" - @echo "📞 Azure Communication Services:" - @echo " purchase_acs_phone_number Purchase ACS phone number and store in env file" - @echo " purchase_acs_phone_number_ps Purchase ACS phone number (PowerShell version)" @echo "" @echo "🔴 Azure Redis Management:" @echo " connect_redis Connect to Azure Redis using Azure AD authentication" @@ -850,17 +347,6 @@ help: @echo " make generate_env_from_terraform" @echo " make update_env_with_secrets" @echo "" - @echo "💡 Quick Start for Full Terraform Deployment (Alternative to azd):" - @echo " 1. cd infra/terraform && terraform init && terraform apply" - @echo " 2. cd ../.. && make generate_env_from_terraform" - @echo " 3. make update_env_with_secrets" - @echo " 4. make generate_backend_deployment && make deploy_backend" - @echo " 5. make generate_frontend_deployment && make deploy_frontend" - @echo "" - @echo "💡 Quick Start for ACS Phone Number Purchase:" - @echo " make purchase_acs_phone_number # Bash/Python version" - @echo " make purchase_acs_phone_number_ps # PowerShell version" - @echo "" @echo "💡 Deployment Monitoring Tips:" @echo " • Large deployments may timeout after 15 minutes but continue in background" @echo " • Use monitor_deployment targets to check status during/after deployment" diff --git a/README.md b/README.md index 3904f158..f61a6e89 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,24 @@ # **ARTVoice Accelerator Framework** +
+
+
📚 COMPLETE DOCUMENTATION PORTAL
+
Everything you need: tutorials, API references, deployment guides & more
+ + 🚀 EXPLORE FULL DOCS → + +
!
+
+
+ > **TL;DR**: Build real-time voice agents on Azure—one hyperscale stack, omnichannel (ACS), code-first, modular, ops-friendly & extensible. -ARTAgent Logo + +ARTAgent Logo You own the agentic design; this repo handles the end-to-end voice plumbing. We keep a clean separation of concerns—telephony (ACS), app middleware, AI inference loop (STT → LLM → TTS), and orchestration—so you can swap parts without starting from zero. We know, shipping voice agents is more than “voice-to-voice.” You need predictable latency budgets, media handoffs, error paths, channel fan-out, barge-in, noise cancellation, and more. This framework gives you the e2e working spine so you can focus on what differentiates you— your tools, agentic design, and orchestration logic (multi-agent ready). @@ -43,7 +58,7 @@ We ship the scaffolding to make that last mile fast: structured logging, metrics
- Demo Video - ARTAgent in Action + Demo Video - ARTAgent in Action

Click the image to watch the ARTAgent Demo. @@ -63,7 +78,7 @@ Pick one of three ways to run the voice inference layer—the rest of the framew

Build the AI voice pipeline from scratch (maximum control)
-ARTAgent Arch +ARTAgent Arch - **Own the event loop**: STT → LLM/Tools → TTS, with granular hooks. - **Swap services per stage**: Azure Speech, Azure OpenAI, etc. @@ -82,7 +97,7 @@ Pick one of three ways to run the voice inference layer—the rest of the framew
-LIVEVOICEApi +LIVEVOICEApi - **Enterprise Managed voice-to-voice**: barge-in, noise suppression, elastic scale. - **Agent runtime**: connect to Azure AI Foundry Agents for built-in tool/function calling and orchestration. diff --git a/apps/rtagent/backend/api/v1/events/demo.py b/apps/rtagent/backend/api/v1/events/demo.py deleted file mode 100644 index 140d73de..00000000 --- a/apps/rtagent/backend/api/v1/events/demo.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -V1 Event Processor Demo -====================== - -Demonstrates how to use the new V1 Event Processor inspired by Azure's Event Processor pattern. -This shows integration with legacy handlers and simplified event processing. -""" - -import asyncio -import json -from azure.core.messaging import CloudEvent -from fastapi import FastAPI, Request -from fastapi.responses import JSONResponse - -# Import the V1 event system -from apps.rtagent.backend.api.v1.events import ( - get_call_event_processor, - register_default_handlers, - get_processor_stats, - get_active_calls, - ACSEventTypes, -) - - -async def demo_v1_event_processing(): - """ - Demo showing how to use the V1 Event Processor. - """ - print("🚀 V1 Event Processor Demo") - print("=" * 50) - - # 1. Register default handlers (adapted from legacy) - print("📋 Registering default handlers...") - register_default_handlers() - - # 2. Get processor instance - processor = get_call_event_processor() - - # 3. Show initial stats - print("📊 Initial processor stats:") - stats = get_processor_stats() - print(json.dumps(stats, indent=2)) - - # 4. Create sample CloudEvents (like from ACS webhook) - sample_events = [ - CloudEvent( - source="azure.communication.callautomation", - type=ACSEventTypes.CALL_CONNECTED, - data={ - "callConnectionId": "demo-call-123", - "callConnectionProperties": {"connectedTime": "2025-08-11T10:30:00Z"}, - }, - ), - CloudEvent( - source="azure.communication.callautomation", - type=ACSEventTypes.PARTICIPANTS_UPDATED, - data={ - "callConnectionId": "demo-call-123", - "participants": [ - { - "identifier": { - "phoneNumber": {"value": "+1234567890"}, - "rawId": "4:+1234567890", - } - } - ], - }, - ), - CloudEvent( - source="azure.communication.callautomation", - type=ACSEventTypes.DTMF_TONE_RECEIVED, - data={"callConnectionId": "demo-call-123", "tone": "1", "sequenceId": 1}, - ), - ] - - # 5. Process events through V1 processor - print("🔄 Processing sample events...") - - # Mock request state (normally from FastAPI request.app.state) - class MockRequestState: - def __init__(self): - self.redis = None - self.acs_caller = None - self.clients = [] - - mock_state = MockRequestState() - - # Process the events - result = await processor.process_events(sample_events, mock_state) - - print("✅ Processing result:") - print(json.dumps(result, indent=2)) - - # 6. Show updated stats - print("📊 Updated processor stats:") - final_stats = get_processor_stats() - print(json.dumps(final_stats, indent=2)) - - # 7. Show active calls - print("📞 Active calls:") - active_calls = get_active_calls() - print(list(active_calls)) - - print("✅ Demo completed!") - - -def create_webhook_handler_example(): - """ - Example of how to integrate V1 Event Processor with FastAPI webhook endpoint. - """ - app = FastAPI() - - @app.post("/webhook/acs-events") - async def handle_acs_webhook(request: Request): - """ - Example webhook handler using V1 Event Processor. - - This replaces the complex event registry with simple, direct processing. - """ - try: - # Parse CloudEvents from webhook - events_data = await request.json() - - # Convert to CloudEvent objects - cloud_events = [] - for event_data in events_data: - cloud_event = CloudEvent( - source="azure.communication.callautomation", - type=event_data.get("eventType", "Unknown"), - data=event_data.get("data", event_data), - ) - cloud_events.append(cloud_event) - - # Ensure handlers are registered - register_default_handlers() - - # Process through V1 Event Processor - processor = get_call_event_processor() - result = await processor.process_events(cloud_events, request.app.state) - - return JSONResponse( - { - "status": "success", - "processed": result.get("processed", 0), - "api_version": "v1", - "processor_type": "v1_event_processor", - } - ) - - except Exception as e: - return JSONResponse({"error": str(e), "api_version": "v1"}, status_code=500) - - return app - - -if __name__ == "__main__": - # Run the demo - asyncio.run(demo_v1_event_processing()) - - print("\n" + "=" * 50) - print("📖 Integration Example:") - print("See create_webhook_handler_example() for FastAPI integration") - print("Key benefits of V1 Event Processor:") - print("- ✅ Simple handler registration") - print("- ✅ Call correlation by callConnectionId") - print("- ✅ Direct integration with legacy handlers") - print("- ✅ No complex middleware or retry logic") - print("- ✅ Inspired by Azure's Event Processor pattern") diff --git a/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py b/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py index 40bf6e4b..0948c0ad 100644 --- a/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py +++ b/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py @@ -90,7 +90,7 @@ class ThreadBridge: Implements the non-blocking patterns described in the documentation. """ - def __init__(self): + def __init__(self, call_connection_id: Optional[str] = None): """ Initialize cross-thread communication bridge. @@ -98,8 +98,9 @@ def __init__(self): :type main_loop: Optional[asyncio.AbstractEventLoop] """ self.main_loop: Optional[asyncio.AbstractEventLoop] = None + self.call_connection_id = call_connection_id # Create shorthand for call connection ID (last 8 chars) - self.call_id_short = "unknown" + self.call_id_short = call_connection_id[-8:] if call_connection_id else "unknown" def set_main_loop( self, loop: asyncio.AbstractEventLoop, call_connection_id: str = None @@ -454,6 +455,7 @@ class RouteTurnThread: def __init__( self, + call_connection_id: Optional[str], speech_queue: asyncio.Queue, orchestrator_func: Callable, memory_manager: Optional[MemoManager], @@ -468,11 +470,8 @@ def __init__( self.running = False self._stopped = False # Get call ID shorthand from websocket if available - self.call_id_short = ( - getattr(websocket, "_call_connection_id", "unknown")[-8:] - if hasattr(websocket, "_call_connection_id") - else "unknown" - ) + self.call_connection_id = call_connection_id + self.call_id_short = call_connection_id[-8:] if call_connection_id else "unknown" async def start(self): """Start the route turn processing loop.""" @@ -917,10 +916,11 @@ def __init__( # Cross-thread communication self.speech_queue = asyncio.Queue(maxsize=10) - self.thread_bridge = ThreadBridge() + self.thread_bridge = ThreadBridge(call_connection_id=self.call_connection_id) # Initialize threads self.route_turn_thread = RouteTurnThread( + call_connection_id=self.call_connection_id, speech_queue=self.speech_queue, orchestrator_func=orchestrator_func, memory_manager=memory_manager, diff --git a/apps/rtagent/backend/config/app_settings_new.py b/apps/rtagent/backend/config/app_settings_new.py deleted file mode 100644 index c5579387..00000000 --- a/apps/rtagent/backend/config/app_settings_new.py +++ /dev/null @@ -1,95 +0,0 @@ -""" -Application Settings -=================== - -Main configuration module that consolidates all settings from specialized -configuration modules for easy access throughout the application. -""" - -# Import all settings from specialized modules -from .voice_config import * -from .connection_config import * -from .feature_flags import * -from .ai_config import * -from .security_config import * - -# ============================================================================== -# VALIDATION FUNCTIONS -# ============================================================================== - - -def validate_app_settings(): - """ - Validate current application settings and return validation results. - - Returns: - Dict containing validation status, issues, warnings, and settings count - """ - issues = [] - warnings = [] - - # Check critical pool settings - if POOL_SIZE_TTS < 1: - issues.append("POOL_SIZE_TTS must be at least 1") - elif POOL_SIZE_TTS < 10: - warnings.append(f"POOL_SIZE_TTS ({POOL_SIZE_TTS}) is quite low for production") - - if POOL_SIZE_STT < 1: - issues.append("POOL_SIZE_STT must be at least 1") - elif POOL_SIZE_STT < 10: - warnings.append(f"POOL_SIZE_STT ({POOL_SIZE_STT}) is quite low for production") - - # Check connection settings - if MAX_WEBSOCKET_CONNECTIONS < 1: - issues.append("MAX_WEBSOCKET_CONNECTIONS must be at least 1") - elif MAX_WEBSOCKET_CONNECTIONS > 1000: - warnings.append( - f"MAX_WEBSOCKET_CONNECTIONS ({MAX_WEBSOCKET_CONNECTIONS}) is very high" - ) - - # Check timeout settings - if CONNECTION_TIMEOUT_SECONDS < 60: - warnings.append( - f"CONNECTION_TIMEOUT_SECONDS ({CONNECTION_TIMEOUT_SECONDS}) is quite short" - ) - - # Check voice settings - if not GREETING_VOICE_TTS: - issues.append("GREETING_VOICE_TTS is empty") - - # Count all settings from current module - import sys - - current_module = sys.modules[__name__] - settings_count = len( - [ - name - for name in dir(current_module) - if name.isupper() and not name.startswith("_") - ] - ) - - return { - "valid": len(issues) == 0, - "issues": issues, - "warnings": warnings, - "settings_count": settings_count, - } - - -if __name__ == "__main__": - # Quick validation check - result = validate_app_settings() - print(f"App Settings Validation: {'✅ VALID' if result['valid'] else '❌ INVALID'}") - - if result["issues"]: - print("Issues:") - for issue in result["issues"]: - print(f" ❌ {issue}") - - if result["warnings"]: - print("Warnings:") - for warning in result["warnings"]: - print(f" ⚠️ {warning}") - - print(f"Total settings: {result['settings_count']}") diff --git a/apps/rtagent/backend/config/infrastructure.py b/apps/rtagent/backend/config/infrastructure.py index 123ddfd9..c6e23156 100644 --- a/apps/rtagent/backend/config/infrastructure.py +++ b/apps/rtagent/backend/config/infrastructure.py @@ -64,7 +64,8 @@ def __str__(self): ACS_ENDPOINT: str = os.getenv("ACS_ENDPOINT", "") ACS_CONNECTION_STRING: str = os.getenv("ACS_CONNECTION_STRING", "") ACS_SOURCE_PHONE_NUMBER: str = os.getenv("ACS_SOURCE_PHONE_NUMBER", "") -BASE_URL: str = os.getenv("BASE_URL", "") +# Base application URL (ensure no trailing slash) +BASE_URL: str = os.getenv("BASE_URL", "").rstrip("/") # ACS Streaming configuration ACS_STREAMING_MODE: StreamMode = StreamMode( diff --git a/apps/rtagent/backend/config/voice_config.py b/apps/rtagent/backend/config/voice_config.py index 35172f25..dacff91c 100644 --- a/apps/rtagent/backend/config/voice_config.py +++ b/apps/rtagent/backend/config/voice_config.py @@ -93,6 +93,9 @@ def get_agent_voice(agent_config_path: str) -> str: # AZURE VOICE LIVE SETTINGS # ============================================================================== -AZURE_VOICE_LIVE_ENDPOINT = os.getenv("AZURE_VOICE_LIVE_ENDPOINT", "") -AZURE_VOICE_API_KEY = os.getenv("AZURE_VOICE_API_KEY", "") +AZURE_VOICE_LIVE_ENDPOINT = os.getenv("AZURE_SPEECH_ENDPOINT", "") +AZURE_VOICE_API_KEY = os.getenv("AZURE_SPEECH_KEY", "") AZURE_VOICE_LIVE_MODEL = os.getenv("AZURE_VOICE_LIVE_MODEL", "gpt-4o") +# AZURE_VOICE_LIVE_ENDPOINT = os.getenv("AZURE_VOICE_LIVE_ENDPOINT", "") +# AZURE_VOICE_API_KEY = os.getenv("AZURE_VOICE_API_KEY", "") +# AZURE_VOICE_LIVE_MODEL = os.getenv("AZURE_VOICE_LIVE_MODEL", "gpt-4o") diff --git a/apps/rtagent/backend/main.py b/apps/rtagent/backend/main.py index 8922cecd..4d85cd40 100644 --- a/apps/rtagent/backend/main.py +++ b/apps/rtagent/backend/main.py @@ -141,7 +141,7 @@ async def lifespan(app: FastAPI): try: app.state.redis = AzureRedisManager() await app.state.redis.initialize() - logger.info("Redis initialized successfully") + logger.info("Redis initialized successfully with cluster support and retry logic") except Exception as e: logger.error(f"Redis initialization failed: {e}") raise RuntimeError(f"Redis initialization failed: {e}") @@ -244,10 +244,10 @@ async def make_stt() -> StreamingSpeechRecognizerFromBytes: if os.getenv("AOAI_POOL_ENABLED", "true").lower() == "true": logger.info("Initializing AOAI client pool during startup...") - start_time = time.time() + aoai_start = time.perf_counter() aoai_pool = await get_aoai_pool() if aoai_pool: - init_time = time.time() - start_time + init_time = time.perf_counter() - aoai_start logger.info( f"AOAI client pool pre-initialized in {init_time:.2f}s with {len(aoai_pool.clients)} clients" ) @@ -256,16 +256,6 @@ async def make_stt() -> StreamingSpeechRecognizerFromBytes: else: logger.info("AOAI pool disabled, skipping startup initialization") - # if ACS_STREAMING_MODE == StreamMode.VOICE_LIVE: - # # Initialize Voice Live warm pool (pre-connect agents) - # span.set_attribute("startup.stage", "voice_live_pool") - # try: - # # Use background prewarm to avoid blocking startup time - # app.state.voice_live_pool = await get_voice_live_pool(background_prewarm=True) - # logger.info("Voice Live pool initialization scheduled (background prewarm)") - # except Exception as e: - # logger.error(f"Voice Live pool initialization failed: {e}") - # ------------------------ Other singletons --------------------------- span.set_attribute("startup.stage", "cosmos_db") app.state.cosmos = CosmosDBMongoCoreManager( diff --git a/azure.yaml b/azure.yaml index 91d3d05d..c814411a 100644 --- a/azure.yaml +++ b/azure.yaml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json -name: gbb-ai-audio-agent +name: art-voice-agent-accelerator metadata: template: azd-init@1.15.1 diff --git a/devops/azure-bicep.yaml b/devops/azure-bicep.yaml index 2db9f17a..51ec797f 100644 --- a/devops/azure-bicep.yaml +++ b/devops/azure-bicep.yaml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json -name: gbb-ai-audio-agent +name: art-voice-agent-accelerator metadata: template: azd-init@1.15.1 diff --git a/devops/scripts/Generate-EnvFromTerraform.ps1 b/devops/scripts/Generate-EnvFromTerraform.ps1 deleted file mode 100644 index 233b4192..00000000 --- a/devops/scripts/Generate-EnvFromTerraform.ps1 +++ /dev/null @@ -1,444 +0,0 @@ -# ============================================================================== -# Environment File Generation from Terraform Remote State (PowerShell) -# ============================================================================== -# This script extracts values from Terraform remote state and creates a local -# .env file matching the project's expected format. -# -# Usage: -# .\Generate-EnvFromTerraform.ps1 [-EnvironmentName ] [-SubscriptionId ] [-Action ] -# -# Parameters: -# -EnvironmentName Environment name (default: dev) -# -SubscriptionId Azure subscription ID (auto-detected if not provided) -# -Action Action to perform: generate, update-secrets, show (default: generate) -# -# Requirements: -# - Terraform CLI installed and configured -# - Azure CLI installed and authenticated -# - Terraform state properly initialized with remote backend -# ============================================================================== - -[CmdletBinding()] -param( - [Parameter(Position = 0)] - [string]$EnvironmentName = $env:AZURE_ENV_NAME, - - [Parameter(Position = 1)] - [string]$SubscriptionId = $env:AZURE_SUBSCRIPTION_ID, - - [Parameter(Position = 2)] - [ValidateSet("generate", "update-secrets", "show")] - [string]$Action = "generate" -) - -# Set error action preference -$ErrorActionPreference = "Stop" - -# Configuration -$ScriptRoot = Split-Path -Parent $MyInvocation.MyCommand.Definition -$ProjectRoot = Resolve-Path (Join-Path $ScriptRoot "../..") -$TerraformDir = Join-Path $ProjectRoot "infra/terraform" - -# Default values -if (-not $EnvironmentName) { $EnvironmentName = "dev" } -if (-not $SubscriptionId) { - try { - $SubscriptionId = (az account show --query id -o tsv 2>$null) - } - catch { - $SubscriptionId = "" - } -} - -$EnvFile = Join-Path $ProjectRoot ".env.$EnvironmentName" - -# Logging functions -function Write-LogInfo { - param([string]$Message) - Write-Host "ℹ️ $Message" -ForegroundColor Blue -} - -function Write-LogSuccess { - param([string]$Message) - Write-Host "✅ $Message" -ForegroundColor Green -} - -function Write-LogWarning { - param([string]$Message) - Write-Host "⚠️ $Message" -ForegroundColor Yellow -} - -function Write-LogError { - param([string]$Message) - Write-Host "❌ $Message" -ForegroundColor Red -} - -function Write-LogSection { - param([string]$Message) - Write-Host "" - Write-Host "🔧 $Message" -ForegroundColor Cyan - Write-Host "============================================================" -ForegroundColor Cyan -} - -# Validation functions -function Test-Prerequisites { - Write-LogSection "Checking Prerequisites" - - # Check Terraform CLI - try { - $terraformVersion = terraform version 2>$null | Select-Object -First 1 - Write-LogInfo "Terraform CLI: $terraformVersion" - } - catch { - Write-LogError "Terraform CLI is not installed or not in PATH" - exit 1 - } - - # Check Azure CLI - try { - $azVersion = (az version --query '"azure-cli"' -o tsv 2>$null) - Write-LogInfo "Azure CLI: $azVersion" - } - catch { - Write-LogError "Azure CLI is not installed or not in PATH" - exit 1 - } - - # Check Azure CLI authentication - try { - az account show 2>$null | Out-Null - } - catch { - Write-LogError "Azure CLI is not authenticated. Run 'az login' first" - exit 1 - } - - # Validate subscription ID - if (-not $SubscriptionId) { - Write-LogError "AZURE_SUBSCRIPTION_ID is not set" - Write-LogError "Please provide it as a parameter or set the environment variable" - exit 1 - } - Write-LogInfo "Azure Subscription: $SubscriptionId" - - # Check Terraform directory - if (-not (Test-Path $TerraformDir)) { - Write-LogError "Terraform directory not found: $TerraformDir" - exit 1 - } - - # Check Terraform initialization - $terraformStateFile = Join-Path $TerraformDir ".terraform/terraform.tfstate" - if (-not (Test-Path $terraformStateFile)) { - Write-LogError "Terraform is not initialized in $TerraformDir" - Write-LogError "Run 'terraform init' in the terraform directory first" - exit 1 - } - - Write-LogSuccess "All prerequisites satisfied" -} - -# Get Terraform output value with error handling (fallback function) -function Get-TerraformOutput { - param( - [string]$OutputName, - [string]$DefaultValue = "" - ) - - Push-Location $TerraformDir - try { - $value = terraform output -raw $OutputName 2>$null - if ($LASTEXITCODE -ne 0) { - return $DefaultValue - } - return $value - } - catch { - return $DefaultValue - } - finally { - Pop-Location - } -} - -# Get all Terraform outputs in one operation for efficiency -function Get-AllTerraformOutputs { - Push-Location $TerraformDir - try { - $outputsJson = terraform output -json 2>$null - if ($LASTEXITCODE -ne 0) { - return "{}" - } - return $outputsJson - } - catch { - return "{}" - } - finally { - Pop-Location - } -} - -# Extract specific output value from JSON with error handling -function Get-OutputValue { - param( - [string]$OutputsJson, - [string]$OutputName, - [string]$DefaultValue = "" - ) - - try { - # Convert JSON string to PowerShell object - $outputs = $OutputsJson | ConvertFrom-Json - - # Check if the output exists and has a value property - if ($outputs.PSObject.Properties.Name -contains $OutputName) { - $value = $outputs.$OutputName.value - if ($null -ne $value -and $value -ne "") { - return $value - } - } - - # Return default value if not found or empty - return $DefaultValue - } - catch { - # Fallback: use individual terraform output calls if JSON parsing fails - Write-LogWarning "JSON parsing failed, falling back to individual terraform output calls" - return Get-TerraformOutput $OutputName $DefaultValue - } -} - -# Generate environment file -function New-EnvironmentFile { - Write-LogSection "Generating Environment File from Terraform State" - - Write-LogInfo "Extracting values from Terraform remote state..." - Write-LogInfo "Target file: $EnvFile" - - # Get all Terraform outputs in one operation - Write-LogInfo "Fetching all Terraform outputs..." - $terraformOutputs = Get-AllTerraformOutputs - - # Create the environment file content - $content = @" -# Generated automatically on $(Get-Date) -# Environment: $EnvironmentName -# Source: Terraform remote state -# Subscription: $SubscriptionId -# ================================================================= - -# Application Insights Configuration -APPLICATIONINSIGHTS_CONNECTION_STRING=$(Get-OutputValue $terraformOutputs "APPLICATIONINSIGHTS_CONNECTION_STRING") - -# Azure OpenAI Configuration -AZURE_OPENAI_KEY= -AZURE_OPENAI_ENDPOINT=$(Get-OutputValue $terraformOutputs "AZURE_OPENAI_ENDPOINT") -AZURE_OPENAI_DEPLOYMENT=gpt-4o -AZURE_OPENAI_API_VERSION=$(Get-OutputValue $terraformOutputs "AZURE_OPENAI_API_VERSION" "2025-01-01-preview") -AZURE_OPENAI_CHAT_DEPLOYMENT_ID=$(Get-OutputValue $terraformOutputs "AZURE_OPENAI_CHAT_DEPLOYMENT_ID" "gpt-4o") -AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-10-01-preview - -# Azure Speech Services Configuration -AZURE_SPEECH_ENDPOINT=$(Get-OutputValue $terraformOutputs "AZURE_SPEECH_ENDPOINT") -AZURE_SPEECH_KEY= -AZURE_SPEECH_RESOURCE_ID=$(Get-OutputValue $terraformOutputs "AZURE_SPEECH_RESOURCE_ID") -AZURE_SPEECH_REGION=$(Get-OutputValue $terraformOutputs "AZURE_SPEECH_REGION") -TTS_ENABLE_LOCAL_PLAYBACK=true - -# Base URL Configuration -# Prompt user for BASE_URL if not set in azd env -BASE_URL="" - -# Backend App Service URL (from Terraform output if available) -BACKEND_APP_SERVICE_URL=$(Get-OutputValue $terraformOutputs "BACKEND_APP_SERVICE_URL" "") - -# Azure Communication Services Configuration -ACS_CONNECTION_STRING= -ACS_SOURCE_PHONE_NUMBER= -ACS_ENDPOINT=$(Get-OutputValue $terraformOutputs "ACS_ENDPOINT") - -# Redis Configuration -REDIS_HOST=$(Get-OutputValue $terraformOutputs "REDIS_HOSTNAME") -REDIS_PORT=$(Get-OutputValue $terraformOutputs "REDIS_PORT" "10000") -REDIS_PASSWORD= - -# Azure Storage Configuration -AZURE_STORAGE_CONNECTION_STRING= -AZURE_STORAGE_CONTAINER_URL=$(Get-OutputValue $terraformOutputs "AZURE_STORAGE_CONTAINER_URL") -AZURE_STORAGE_ACCOUNT_NAME=$(Get-OutputValue $terraformOutputs "AZURE_STORAGE_ACCOUNT_NAME") - -# Azure Cosmos DB Configuration -AZURE_COSMOS_DATABASE_NAME=$(Get-OutputValue $terraformOutputs "AZURE_COSMOS_DATABASE_NAME" "audioagentdb") -AZURE_COSMOS_COLLECTION_NAME=$(Get-OutputValue $terraformOutputs "AZURE_COSMOS_COLLECTION_NAME" "audioagentcollection") -AZURE_COSMOS_CONNECTION_STRING=$(Get-OutputValue $terraformOutputs "AZURE_COSMOS_CONNECTION_STRING") - -# Azure Identity Configuration -AZURE_SUBSCRIPTION_ID=$SubscriptionId - -# Azure Resource Configuration -AZURE_RESOURCE_GROUP=$(Get-OutputValue $terraformOutputs "AZURE_RESOURCE_GROUP") -AZURE_LOCATION=$(Get-OutputValue $terraformOutputs "AZURE_LOCATION") - -# Application Configuration -ACS_STREAMING_MODE=media -ENVIRONMENT=$EnvironmentName - -# Logging Configuration -LOG_LEVEL=INFO -ENABLE_DEBUG=false -"@ - - # Write content to file - $content | Out-File -FilePath $EnvFile -Encoding UTF8 - - # Generate summary - $varCount = (Get-Content $EnvFile | Where-Object { $_ -match '^[A-Z]' }).Count - - Write-LogSuccess "Environment file generated successfully: $EnvFile" - Write-LogInfo "Configuration contains $varCount variables" - Write-Host "" - Write-LogWarning "Note: Some values like keys and connection strings may be empty" - Write-LogWarning "These sensitive values should be retrieved separately using Azure CLI or Key Vault" - Write-Host "" - Write-LogInfo "Next steps:" - Write-Host " 1. Review the generated file: Get-Content $EnvFile" - Write-Host " 2. Set missing sensitive values (keys, connection strings)" - Write-Host " 3. Update BASE_URL with your actual backend URL" - Write-Host " 4. Import the variables (see documentation for your shell)" -} - -# Update environment file with secrets from Key Vault -function Update-EnvironmentWithSecrets { - Write-LogSection "Updating Environment File with Secrets from Key Vault" - - if (-not (Test-Path $EnvFile)) { - Write-LogError "Environment file $EnvFile does not exist" - Write-LogError "Run this script first to generate the base file" - exit 1 - } - - Write-LogInfo "Retrieving secrets from Azure Key Vault..." - - # Get Key Vault name from Terraform (single operation) - $terraformOutputs = Get-AllTerraformOutputs - $kvName = Get-OutputValue $terraformOutputs "AZURE_KEY_VAULT_NAME" - - if ($kvName -and $kvName -ne "" -and $kvName -ne "null") { - Write-LogInfo "Using Key Vault: $kvName" - - # Helper function to update environment variable - function Update-EnvVar { - param( - [string]$VarName, - [string]$SecretName - ) - - Write-LogInfo "Updating $VarName..." - try { - $secretValue = az keyvault secret show --name $SecretName --vault-name $kvName --query value -o tsv 2>$null - if ($LASTEXITCODE -eq 0 -and $secretValue) { - # Read file content - $content = Get-Content $EnvFile - - # Update the specific line - $updatedContent = $content | ForEach-Object { - if ($_ -match "^$VarName=") { - "$VarName=$secretValue" - } else { - $_ - } - } - - # Write back to file - $updatedContent | Out-File -FilePath $EnvFile -Encoding UTF8 - Write-LogSuccess "$VarName updated" - } else { - Write-LogWarning "$VarName secret not found in Key Vault" - } - } - catch { - Write-LogWarning "Failed to retrieve $VarName from Key Vault: $($_.Exception.Message)" - } - } - - # Update secrets - Update-EnvVar "AZURE_OPENAI_KEY" "AZURE-OPENAI-KEY" - Update-EnvVar "AZURE_SPEECH_KEY" "AZURE-SPEECH-KEY" - Update-EnvVar "ACS_CONNECTION_STRING" "ACS-CONNECTION-STRING" - Update-EnvVar "REDIS_PASSWORD" "REDIS-PASSWORD" - Update-EnvVar "AZURE_STORAGE_CONNECTION_STRING" "AZURE-STORAGE-CONNECTION-STRING" - - Write-LogSuccess "Secrets updated successfully" - } else { - Write-LogWarning "Key Vault name not found in Terraform outputs" - Write-LogWarning "Secrets will need to be set manually" - } -} - -# Show environment file information -function Show-EnvironmentFile { - if (Test-Path $EnvFile) { - Write-LogInfo "Current environment file: $EnvFile" - - $content = Get-Content $EnvFile - $generationDate = ($content | Select-Object -First 1) -replace '# Generated automatically on ', '' - Write-LogInfo "Generated: $generationDate" - - $varCount = ($content | Where-Object { $_ -match '^[A-Z]' }).Count - Write-LogInfo "Variables: $varCount" - - Write-Host "" - Write-Host "Content preview:" - Write-Host "================" - $content | Select-Object -First 20 - Write-Host "... (truncated, use 'Get-Content $EnvFile' to see full content)" - } else { - Write-LogError "Environment file $EnvFile does not exist" - Write-LogError "Run this script to create it" - } -} - -# Main execution -function Invoke-Main { - switch ($Action) { - "generate" { - Test-Prerequisites - New-EnvironmentFile - } - "update-secrets" { - Test-Prerequisites - Update-EnvironmentWithSecrets - } - "show" { - Show-EnvironmentFile - } - } -} - -# Show usage if no parameters provided and script is run directly -if ($MyInvocation.InvocationName -eq $MyInvocation.MyCommand.Name) { - if (-not $PSBoundParameters.Count -and -not $args.Count) { - Write-Host "" - Write-Host "Environment File Generation from Terraform Remote State (PowerShell)" -ForegroundColor Cyan - Write-Host "====================================================================" -ForegroundColor Cyan - Write-Host "" - Write-Host "Usage:" - Write-Host " .\Generate-EnvFromTerraform.ps1 [-EnvironmentName ] [-SubscriptionId ] [-Action ]" - Write-Host "" - Write-Host "Parameters:" - Write-Host " -EnvironmentName Environment name (default: dev)" - Write-Host " -SubscriptionId Azure subscription ID (auto-detected if not provided)" - Write-Host " -Action Action to perform: generate, update-secrets, show (default: generate)" - Write-Host "" - Write-Host "Examples:" - Write-Host " .\Generate-EnvFromTerraform.ps1 -EnvironmentName dev" - Write-Host " .\Generate-EnvFromTerraform.ps1 -EnvironmentName prod -SubscriptionId `$env:AZURE_SUBSCRIPTION_ID" - Write-Host " .\Generate-EnvFromTerraform.ps1 -Action update-secrets" - Write-Host "" - exit 0 - } -} - -# Execute main function -Invoke-Main diff --git a/devops/scripts/README.md b/devops/scripts/README.md deleted file mode 100644 index 1fce8394..00000000 --- a/devops/scripts/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# **DevOps Scripts** - -**Automation scripts** for Azure deployment pipeline setup and management for ARTVoice Accelerator. - -## **Quick Start** - -```bash -# Complete CI/CD setup for azd deployment -./setup-gha-config.sh --interactive -``` - -This configures: -- Azure App Registration for OIDC authentication -- GitHub Actions federated credentials -- Azure permissions and Terraform state storage -- Optional GitHub secrets/variables setup - -## **Scripts Overview** - -### **CI/CD Setup** -- **[`setup-gha-config.sh`](./setup-gha-config.sh)** - Complete CI/CD setup (start here) - -### **Azure Developer CLI Helpers** -- **[`azd/`](./azd/)** - AZD lifecycle hooks and utilities - - [`postprovision.sh`](./azd/postprovision.sh) - Post-deployment configuration - - [`preprovision.sh`](./azd/preprovision.sh) - Pre-deployment setup - -### **Infrastructure Management** -- **[`generate-env-from-terraform.sh`](./generate-env-from-terraform.sh)** - Generate .env from Terraform outputs -- **[`validate-terraform-backend.sh`](./validate-terraform-backend.sh)** - Validate Terraform backend -- **[`webapp-deploy.sh`](./webapp-deploy.sh)** - Direct webapp deployment - -## **Prerequisites** - -- **Azure CLI** (`az`) - [Install Guide](https://docs.microsoft.com/cli/azure/install-azure-cli) -- **Azure Developer CLI** (`azd`) - [Install Guide](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd) -- **jq** - JSON processor -- **OpenSSL** - For generating random values - -### Optional Tools -- **GitHub CLI** (`gh`) - For automatic secret configuration -- **Terraform** - If using direct Terraform deployment - -### Permissions -- **Azure**: Contributor + User Access Administrator on target subscription -- **GitHub**: Admin access to repository for secrets/variables configuration - -## 🔐 Authentication Setup - -### Azure Authentication -```bash -# Login to Azure -az login - -# Set default subscription (if needed) -az account set --subscription "your-subscription-id" -``` - -### GitHub Authentication (Optional) -```bash -# Login to GitHub CLI (for automatic secret setup) -gh auth login -``` - -## 🎯 Usage Examples - -### Interactive Setup (Recommended for first-time users) -```bash -./setup-gha-config.sh --interactive -``` - -### Automated Setup with Environment Variables -```bash -export GITHUB_ORG="your-org" -export GITHUB_REPO="your-repo" -export AZURE_LOCATION="eastus" -export AZURE_ENV_NAME="dev" - -./setup-gha-config.sh -``` - -### Production Environment Setup -```bash -AZURE_ENV_NAME=prod AZURE_LOCATION=westus2 ./setup-gha-config.sh -``` - -## 📤 Output - -After running the setup script, you'll get: - -### 1. **Azure Resources Created** -- App Registration for OIDC authentication -- Service Principal with proper permissions -- Terraform remote state storage account -- Federated credentials for GitHub Actions - -### 2. **GitHub Configuration** -- Repository secrets for Azure authentication -- Repository variables for deployment configuration -- Ready-to-use workflows in `.github/workflows/` - -### 3. **Configuration Summary** -- Saved to `.azd-cicd-config.txt` in project root -- Contains all IDs, names, and next steps - -## 🔍 Troubleshooting - -### Common Issues - -**Permission Denied Errors:** -```bash -# Check your Azure permissions -az role assignment list --assignee $(az ad signed-in-user show --query id -o tsv) --output table -``` - -**GitHub CLI Not Authenticated:** -```bash -# Re-authenticate to GitHub -gh auth login --git-protocol https -``` - -**Storage Account Access Issues:** -```bash -# Test storage access -az storage container list --account-name YOUR_STORAGE_ACCOUNT --auth-mode login -``` - -### Debug Mode -```bash -# Run with debug output -bash -x ./setup-gha-config.sh --interactive -``` - -## 🔄 Updating Configuration - -To update existing configuration: - -1. **Add new environments**: Run script with `AZURE_ENV_NAME=newenv` -2. **Update permissions**: Re-run the script (it's idempotent) -3. **Rotate credentials**: Delete app registration and re-run - -## 📚 Related Documentation - -- [GitHub Secrets Configuration Guide](../../.github/SECRETS.md) -- [Azure Developer CLI Deployment](../../docs/AZD-DEPLOYMENT.md) -- [CI/CD Pipeline Guide](../../docs/CICDGuide.md) -- [Microsoft Docs: Container Apps GitHub Actions](https://learn.microsoft.com/azure/container-apps/github-actions-cli) - -## 💡 Best Practices - -1. **Start with dev environment** - Test thoroughly before production -2. **Use environment-specific configurations** - Separate dev/staging/prod -3. **Review permissions regularly** - Follow principle of least privilege -4. **Monitor deployment logs** - Use Azure Monitor and GitHub Actions logs -5. **Keep secrets up to date** - Rotate credentials periodically - -## 🆘 Support - -Need help? Check these resources: - -1. **Script help**: `./setup-gha-config.sh --help` -2. **Project documentation**: Check `docs/` directory -3. **Azure support**: [Azure Portal Support](https://portal.azure.com/#blade/Microsoft_Azure_Support/HelpAndSupportBlade) -4. **GitHub support**: [GitHub Actions documentation](https://docs.github.com/actions) - ---- - -**Happy Deploying! 🚀** diff --git a/devops/scripts/azd/helpers/appsvc-deploy.sh b/devops/scripts/azd/helpers/appsvc-deploy.sh deleted file mode 100755 index 6236cfc4..00000000 --- a/devops/scripts/azd/helpers/appsvc-deploy.sh +++ /dev/null @@ -1,433 +0,0 @@ -#!/bin/bash - - -# ================# Helper functions -log_info() { - echo -e "${BLUE}ℹ️ [INFO]${NC} $*" -} - -log_success() { - echo -e "${GREEN}✅ [SUCCESS]${NC} $*" -} - -log_warning() { - echo -e "${YELLOW}⚠️ [WARNING]${NC} $*" -} - -log_error() { - echo -e "${RED}❌ [ERROR]${NC} $*" >&2 -} - -set -euo pipefail - -# Colors for output -readonly RED='\033[0;31m' -readonly GREEN='\033[0;32m' -readonly YELLOW='\033[1;33m' -readonly BLUE='\033[0;34m' -readonly NC='\033[0m' # No Color - -# Constants -readonly SCRIPT_NAME="$(basename "$0")" -readonly REQUIRED_COMMANDS=("az" "azd" "rsync" "zip" "curl") - -# Configuration -readonly AGENT="${1:-ARTAgent}" -readonly AGENT_BACKEND="apps/$AGENT/backend" -readonly BACKEND_DIRS=("src" "utils") -readonly REQUIRED_FILES=("requirements.txt") -readonly EXCLUDE_PATTERNS=("__pycache__" "*.pyc" ".pytest_cache" "*.log" ".coverage" "htmlcov" ".DS_Store" ".git" "node_modules" "*.tmp" "*.temp") - -echo "🚀 Deploying $AGENT to App Service" - -# Get AZD variables and validate -RG=$(azd env get-value AZURE_RESOURCE_GROUP) -BACKEND_APP=$(azd env get-value BACKEND_APP_SERVICE_NAME) -AZD_ENV=$(azd env get-value AZURE_ENV_NAME) - -[[ -z "$RG" || -z "$BACKEND_APP" || -z "$AZD_ENV" ]] && { echo "❌ Missing AZD environment variables"; exit 1; } - -echo "✅ Validated: $BACKEND_APP in $RG (env: $AZD_ENV)" - -# Prepare deployment package -TEMP_DIR=".azure/$AZD_ENV/backend" -echo "� Preparing deployment in: $TEMP_DIR" -rm -rf "$TEMP_DIR" && mkdir -p "$TEMP_DIR" - - -# Check if required commands exist -check_dependencies() { - local missing_commands=() - - for cmd in "${REQUIRED_COMMANDS[@]}"; do - if ! command -v "$cmd" &> /dev/null; then - missing_commands+=("$cmd") - fi - done - - if [[ ${#missing_commands[@]} -gt 0 ]]; then - log_error "Missing required commands: ${missing_commands[*]}" - log_error "Please install missing dependencies and try again." - exit 1 - fi -} - -# Get azd environment variable value -get_azd_env_value() { - local key="$1" - local value - - if value=$(azd env get-value "$key" 2>/dev/null); then - echo "$value" - else - echo "" - fi -} - -# 🚀 Azure App Service Deployment Script -# ======================================================================== -# This script deploys backend applications to Azure App Service with -# configurable file inclusion/exclusion patterns and automatic validation. -# -# Usage: ./appsvc-deploy.sh [AGENT_NAME] -# -# ======================================================================== - - -# Copy files with exclusions -copy_with_excludes() { - local src="$1" - local dest="$2" - - if [[ ! -d "$src" ]]; then - log_warning "Source directory not found: $src" - return 1 - fi - - log_info "Copying: $src -> $dest" - - # Build exclude arguments - local exclude_args=() - for pattern in "${EXCLUDE_PATTERNS[@]}"; do - exclude_args+=(--exclude="$pattern") - done - - rsync -a "${exclude_args[@]}" "$src/" "$dest/" -} - -# Prepare deployment package -prepare_deployment_package() { - local temp_dir="$1" - - log_info "Preparing deployment package..." - - # Clean and create temp deployment directory - rm -rf "$temp_dir" && mkdir -p "$temp_dir" - - # Copy agent backend - mkdir -p "$temp_dir/$AGENT_BACKEND" - copy_with_excludes "$AGENT_BACKEND" "$temp_dir/$AGENT_BACKEND" - - # Copy shared directories - for dir in "${BACKEND_DIRS[@]}"; do - if [[ -d "$dir" ]]; then - copy_with_excludes "$dir" "$temp_dir/$dir" - else - log_warning "Configured directory not found: $dir" - fi - done - - # Copy required files - for file in "${REQUIRED_FILES[@]}"; do - if [[ -f "$file" ]]; then - log_info "Copying required file: $file" - cp "$file" "$temp_dir/" - else - log_error "Required file missing: $file" - exit 1 - fi - done - - log_success "Deployment package prepared successfully" -} - -# Create deployment zip -create_deployment_zip() { - local temp_dir="$1" - - log_info "Creating deployment zip..." - - cd "$temp_dir" - - # Build zip exclusion arguments - local zip_exclude_args=() - for pattern in "${EXCLUDE_PATTERNS[@]}"; do - zip_exclude_args+=(-x "$pattern") - done - - zip -rq backend.zip . "${zip_exclude_args[@]}" - - if [[ ! -f "backend.zip" ]]; then - log_error "Failed to create backend.zip" - exit 1 - fi - - local zip_size - zip_size=$(du -h backend.zip | cut -f1) - log_success "Deployment zip created successfully (size: $zip_size)" - - cd - > /dev/null -} - -# Configure App Service settings -configure_app_service() { - log_info "Configuring App Service settings..." - - # Get current backend app service URL from azd - BACKEND_APP_SERVICE_URL=$(azd env get-value BACKEND_APP_SERVICE_URL) - if [[ -z "$BACKEND_APP_SERVICE_URL" ]]; then - log_warning "BACKEND_APP_SERVICE_URL not set in azd environment" - BACKEND_APP_SERVICE_URL="https://$BACKEND_APP.azurewebsites.net" - fi - - # Set startup command - az webapp config set \ - --resource-group "$RG" \ - --name "$BACKEND_APP" \ - --startup-file "python -m uvicorn rtagents.${AGENT}.backend.main:app --host 0.0.0.0 --port 8000" \ - --output none - - # Set BASE_URL environment variable for the web app - az webapp config appsettings set \ - --resource-group "$RG" \ - --name "$BACKEND_APP" \ - --settings \ - "PYTHONPATH=/home/site/wwwroot" \ - "SCM_DO_BUILD_DURING_DEPLOYMENT=true" \ - "ENABLE_ORYX_BUILD=true" \ - "ORYX_APP_TYPE=webapps" \ - "WEBSITES_PORT=8000" \ - "BASE_URL=$BACKEND_APP_SERVICE_URL" \ - --output none - - log_success "App Service configured successfully" -} -deploy_to_app_service() { - local temp_dir="$1" - - log_info "Deploying to Azure App Service..." - - cd "$temp_dir" - - # Attempt deployment with timeout handling - local deployment_status=0 - local deployment_output - - if deployment_output=$(az webapp deploy \ - --resource-group "$RG" \ - --name "$BACKEND_APP" \ - --src-path "backend.zip" \ - --type zip 2>&1); then - log_success "Deployment command completed successfully" - deployment_status=0 - else - local exit_code=$? - log_warning "Deployment command returned exit code: $exit_code" - - # Check if it's likely a timeout or server-side issue - if echo "$deployment_output" | grep -qi -E "(timeout|timed out|request timeout|gateway timeout|502|503|504)"; then - log_warning "Deployment appears to have timed out on client side" - log_info "This doesn't necessarily mean the deployment failed on the server" - log_info "Will continue to check deployment status..." - deployment_status=2 # Timeout/uncertain status - elif echo "$deployment_output" | grep -qi -E "(conflict|409|deployment.*progress|another deployment)"; then - log_warning "Another deployment may be in progress" - log_info "Will continue to check deployment status..." - deployment_status=2 # Concurrent deployment - else - log_error "Deployment command failed with actual error:" - echo "$deployment_output" | head -10 # Show first 10 lines of error - deployment_status=1 # Actual failure - fi - fi - - cd - > /dev/null - return $deployment_status -} - -# Wait for app to be ready and verify deployment -wait_for_app_ready() { - log_info "Waiting for app to be ready..." - - local max_attempts=30 - local deployment_verified=false - - for i in $(seq 1 $max_attempts); do - local app_state - app_state=$(az webapp show --resource-group "$RG" --name "$BACKEND_APP" --query "state" -o tsv 2>/dev/null || echo "Unknown") - - if [[ "$app_state" == "Running" ]]; then - log_success "App is running and ready" - deployment_verified=true - break - elif [[ "$app_state" == "Stopped" ]]; then - log_warning "App is stopped, attempting to start..." - az webapp start --resource-group "$RG" --name "$BACKEND_APP" --output none 2>/dev/null || true - fi - - echo " App state: $app_state (attempt $i/$max_attempts)" - sleep 5 - done - - if [[ "$deployment_verified" == "true" ]]; then - return 0 - else - log_warning "App state verification timed out after $((max_attempts * 5)) seconds" - return 1 - fi -} - -# Perform health check -perform_health_check() { - local app_url="$1" - - log_info "Performing health check..." - - if curl -sf --max-time 5 "https://$app_url/health" >/dev/null 2>&1; then - log_success "Health endpoint is responding" - else - log_warning "Health endpoint not responding (application may be starting up)" - fi -} - -# Cleanup deployment artifacts -cleanup_deployment() { - local temp_dir="$1" - log_info "Cleaning up deployment artifacts..." - - if [[ -f "$temp_dir/backend.zip" ]]; then - rm "$temp_dir/backend.zip" - fi - - if [[ -d "$temp_dir" ]]; then - rm -rf "$temp_dir" - fi - - log_success "Cleanup completed" -} - -# Display deployment summary -show_deployment_summary() { - local app_url="$1" - local deployment_status="$2" - - echo "" - echo "=========================================================================" - - case "$deployment_status" in - "success") - log_success "Deployment completed successfully!" - ;; - "uncertain") - log_warning "Deployment completed with uncertain status" - echo " The deployment command timed out, but the app appears to be running." - echo " This is common with large deployments and usually indicates success." - ;; - *) - log_success "Deployment process completed!" - ;; - esac - - echo "" - echo "📊 Deployment Summary:" - echo " Agent: $AGENT" - echo " App Service: $BACKEND_APP" - echo " Resource Group: $RG" - echo " Environment: $AZD_ENV" - echo " App URL: https://$app_url" - echo " Status: $deployment_status" - echo "" - echo "🌐 Test your deployment at: https://$app_url" - echo "=========================================================================" -} - -# Main function -main() { - echo "=========================================================================" - echo "🚀 Azure App Service Deployment for $AGENT backend" - echo "=========================================================================" - # Prompt user for confirmation before deploying - read -p "Are you sure you want to deploy '$AGENT' to App Service '$BACKEND_APP' in resource group '$RG'? [y/N]: " confirm - if [[ ! "$confirm" =~ ^[Yy]$ ]]; then - log_warning "$AGENT backend deployment cancelled by user." - exit 0 - fi - # Check dependencies - check_dependencies - - # Set deployment directory - local temp_dir=".azure/$AZD_ENV/backend" - log_info "Using deployment directory: $temp_dir" - - # Prepare deployment - prepare_deployment_package "$temp_dir" - create_deployment_zip "$temp_dir" - - # Configure and deploy - configure_app_service - - # Attempt deployment with spinner for long-running operation - local deployment_result - local spinner_pid - - # Spinner function - spinner() { - local chars="/-\|" - local i=0 - while :; do - printf "\r⏳ Deploying... %c" "${chars:i++%${#chars}:1}" - sleep 0.2 - done - } - - spinner & - spinner_pid=$! - - if deploy_to_app_service "$temp_dir"; then - deployment_result="success" - kill "$spinner_pid" >/dev/null 2>&1 - printf "\r" - log_success "Deployment completed successfully" - else - local deploy_exit_code=$? - kill "$spinner_pid" >/dev/null 2>&1 - printf "\r" - if [[ $deploy_exit_code -eq 2 ]]; then - deployment_result="uncertain" - log_warning "Deployment status uncertain due to timeout/server issues" - log_info "Continuing with verification steps..." - else - deployment_result="failed" - log_error "Deployment failed with actual error" - log_error "Exiting due to deployment failure" - exit 1 - fi - fi - # Wait for app and get URL - wait_for_app_ready - local app_url - app_url=$(az webapp show --resource-group "$RG" --name "$BACKEND_APP" --query "defaultHostName" -o tsv) - - # Health check and cleanup - perform_health_check "$app_url" - cleanup_deployment "$temp_dir" - - # Show summary - show_deployment_summary "$app_url" "$deployment_result" -} - -# Handle script interruption -trap 'log_error "Script interrupted by user"; exit 130' INT - -# Run main function -main "$@" \ No newline at end of file diff --git a/devops/scripts/generate-env-from-terraform.sh b/devops/scripts/generate-env-from-terraform.sh deleted file mode 100755 index d81aa6f0..00000000 --- a/devops/scripts/generate-env-from-terraform.sh +++ /dev/null @@ -1,419 +0,0 @@ -#!/bin/bash - -# ============================================================================== -# Environment File Generation from Terraform Remote State -# ============================================================================== -# This script extracts values from Terraform remote state and creates a local -# .env file matching the project's expected format. -# -# Usage: -# ./generate-env-from-terraform.sh [environment_name] [subscription_id] -# -# Parameters: -# environment_name - Environment name (default: dev) -# subscription_id - Azure subscription ID (auto-detected if not provided) -# -# Requirements: -# - terraform CLI installed and configured -# - Azure CLI installed and authenticated -# - Terraform state properly initialized with remote backend -# ============================================================================== - -set -euo pipefail - -# Configuration -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -TF_DIR="${PROJECT_ROOT}/infra/terraform" - -# Default values -AZURE_ENV_NAME="${1:-${AZURE_ENV_NAME:-dev}}" -AZURE_SUBSCRIPTION_ID="${2:-${AZURE_SUBSCRIPTION_ID:-$(az account show --query id -o tsv 2>/dev/null || echo "")}}" -ENV_FILE="${PROJECT_ROOT}/.env.${AZURE_ENV_NAME}" - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -CYAN='\033[0;36m' -NC='\033[0m' # No Color - -# Logging functions -log_info() { - echo -e "${BLUE}ℹ️ $1${NC}" -} - -log_success() { - echo -e "${GREEN}✅ $1${NC}" -} - -log_warning() { - echo -e "${YELLOW}⚠️ $1${NC}" -} - -log_error() { - echo -e "${RED}❌ $1${NC}" -} - -log_section() { - echo "" - echo -e "${CYAN}🔧 $1${NC}" - echo "============================================================" -} - -# Validation functions -check_prerequisites() { - log_section "Checking Prerequisites" - - # Check terraform CLI - if ! command -v terraform &> /dev/null; then - log_error "Terraform CLI is not installed or not in PATH" - exit 1 - fi - log_info "Terraform CLI: $(terraform version | head -1)" - - # Check Azure CLI - if ! command -v az &> /dev/null; then - log_error "Azure CLI is not installed or not in PATH" - exit 1 - fi - log_info "Azure CLI: $(az version --query '"azure-cli"' -o tsv)" - - # Check jq for JSON parsing (optional but recommended for performance) - if command -v jq &> /dev/null; then - log_info "jq: $(jq --version) (optimal performance mode)" - else - log_warning "jq not found - falling back to individual terraform output calls" - log_warning "For better performance, install jq:" - log_warning " macOS: brew install jq" - log_warning " Ubuntu/Debian: apt-get install jq" - log_warning " RHEL/CentOS: yum install jq" - fi - - # Check Azure CLI authentication - if ! az account show &> /dev/null; then - log_error "Azure CLI is not authenticated. Run 'az login' first" - exit 1 - fi - - # Validate subscription ID - if [[ -z "${AZURE_SUBSCRIPTION_ID}" ]]; then - log_error "AZURE_SUBSCRIPTION_ID is not set" - log_error "Please provide it as a parameter or set the environment variable" - exit 1 - fi - log_info "Azure Subscription: ${AZURE_SUBSCRIPTION_ID}" - - # Check Terraform directory - if [[ ! -d "${TF_DIR}" ]]; then - log_error "Terraform directory not found: ${TF_DIR}" - exit 1 - fi - - # Check Terraform initialization - if [[ ! -f "${TF_DIR}/.terraform/terraform.tfstate" ]]; then - log_error "Terraform is not initialized in ${TF_DIR}" - log_error "Run 'terraform init' in the terraform directory first" - exit 1 - fi - - log_success "All prerequisites satisfied" -} - -# Get all Terraform outputs in one operation for efficiency -get_all_terraform_outputs() { - local outputs_json - - # Change to terraform directory, get all outputs, then return - pushd "${TF_DIR}" > /dev/null - outputs_json=$(terraform output -json 2>/dev/null || echo "{}") - popd > /dev/null - - echo "${outputs_json}" -} - -# Extract specific output value from JSON with error handling -extract_output_value() { - local outputs_json="$1" - local output_name="$2" - local default_value="${3:-}" - - # Try jq first if available, otherwise fallback to terraform output -raw - if command -v jq &> /dev/null; then - local value - value=$(echo "${outputs_json}" | jq -r ".\"${output_name}\".value // \"${default_value}\"" 2>/dev/null || echo "${default_value}") - echo "${value}" - else - # Fallback: use terraform output -raw for individual values - pushd "${TF_DIR}" > /dev/null - local value - value=$(terraform output -raw "${output_name}" 2>/dev/null || echo "${default_value}") - popd > /dev/null - echo "${value}" - fi -} - -# Generate environment file -generate_env_file() { - log_section "Generating Environment File from Terraform State" - - log_info "Extracting values from Terraform remote state..." - log_info "Target file: ${ENV_FILE}" - - # Get all Terraform outputs in one operation - log_info "Fetching all Terraform outputs..." - local terraform_outputs - terraform_outputs=$(get_all_terraform_outputs) - - # Create the environment file with header - cat > "${ENV_FILE}" << EOF -# Generated automatically on $(date) -# Environment: ${AZURE_ENV_NAME} -# Source: Terraform remote state -# Subscription: ${AZURE_SUBSCRIPTION_ID} -# ================================================================= - -EOF - - # Application Insights Configuration - cat >> "${ENV_FILE}" << EOF -# Application Insights Configuration -APPLICATIONINSIGHTS_CONNECTION_STRING=$(extract_output_value "${terraform_outputs}" "APPLICATIONINSIGHTS_CONNECTION_STRING") - -EOF - - # Azure OpenAI Configuration - cat >> "${ENV_FILE}" << EOF -# Azure OpenAI Configuration -AZURE_OPENAI_KEY= -AZURE_OPENAI_ENDPOINT=$(extract_output_value "${terraform_outputs}" "AZURE_OPENAI_ENDPOINT") -AZURE_OPENAI_DEPLOYMENT=gpt-4o -AZURE_OPENAI_API_VERSION=$(extract_output_value "${terraform_outputs}" "AZURE_OPENAI_API_VERSION" "2025-01-01-preview") -AZURE_OPENAI_CHAT_DEPLOYMENT_ID=$(extract_output_value "${terraform_outputs}" "AZURE_OPENAI_CHAT_DEPLOYMENT_ID" "gpt-4o") -AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-10-01-preview - -EOF - - # Azure Speech Services Configuration - cat >> "${ENV_FILE}" << EOF -# Azure Speech Services Configuration -AZURE_SPEECH_ENDPOINT=$(extract_output_value "${terraform_outputs}" "AZURE_SPEECH_ENDPOINT") -AZURE_SPEECH_KEY= -AZURE_SPEECH_RESOURCE_ID=$(extract_output_value "${terraform_outputs}" "AZURE_SPEECH_RESOURCE_ID") -AZURE_SPEECH_REGION=$(extract_output_value "${terraform_outputs}" "AZURE_SPEECH_REGION") - -# For local development, you can enable local playback of TTS audio. -TTS_ENABLE_LOCAL_PLAYBACK=true -EOF - - # Base URL Configuration - cat >> "${ENV_FILE}" << EOF -# Base URL Configuration -# Prompt user for BASE_URL if not set in azd env -BASE_URL="" - -# Backend App Service/Container App URL (from Terraform output if available) -BACKEND_APP_SERVICE_URL=$(extract_output_value "${terraform_outputs}" "BACKEND_APP_SERVICE_URL" "") -BACKEND_CONTAINER_APP_URL=$(extract_output_value "${terraform_outputs}" "BACKEND_CONTAINER_APP_URL" "") -EOF - - # Azure Communication Services Configuration - cat >> "${ENV_FILE}" << EOF -# Azure Communication Services Configuration -ACS_CONNECTION_STRING=$(extract_output_value "${terraform_outputs}" "ACS_CONNECTION_STRING") -ACS_SOURCE_PHONE_NUMBER=$(extract_output_value "${terraform_outputs}" "ACS_SOURCE_PHONE_NUMBER") -ACS_ENDPOINT=$(extract_output_value "${terraform_outputs}" "ACS_ENDPOINT") - -EOF - - # Redis Configuration - cat >> "${ENV_FILE}" << EOF -# Redis Configuration -REDIS_HOST=$(extract_output_value "${terraform_outputs}" "REDIS_HOSTNAME") -REDIS_PORT=$(extract_output_value "${terraform_outputs}" "REDIS_PORT" "10000") -REDIS_PASSWORD= - -EOF - - # Azure Storage Configuration - cat >> "${ENV_FILE}" << EOF -# Azure Storage Configuration -AZURE_STORAGE_CONNECTION_STRING=$(extract_output_value "${terraform_outputs}" "AZURE_STORAGE_CONNECTION_STRING") -AZURE_STORAGE_CONTAINER_URL=$(extract_output_value "${terraform_outputs}" "AZURE_STORAGE_CONTAINER_URL") -AZURE_STORAGE_ACCOUNT_NAME=$(extract_output_value "${terraform_outputs}" "AZURE_STORAGE_ACCOUNT_NAME") - -EOF - - # Azure Cosmos DB Configuration - cat >> "${ENV_FILE}" << EOF -# Azure Cosmos DB Configuration -AZURE_COSMOS_DATABASE_NAME=$(extract_output_value "${terraform_outputs}" "AZURE_COSMOS_DATABASE_NAME" "audioagentdb") -AZURE_COSMOS_COLLECTION_NAME=$(extract_output_value "${terraform_outputs}" "AZURE_COSMOS_COLLECTION_NAME" "audioagentcollection") -AZURE_COSMOS_CONNECTION_STRING=$(extract_output_value "${terraform_outputs}" "AZURE_COSMOS_CONNECTION_STRING") - -EOF - - # Azure Identity Configuration - cat >> "${ENV_FILE}" << EOF -# Azure Identity Configuration -AZURE_SUBSCRIPTION_ID=${AZURE_SUBSCRIPTION_ID} - -EOF - - # Azure Resource Configuration - cat >> "${ENV_FILE}" << EOF -# Azure Resource Configuration -AZURE_RESOURCE_GROUP=$(extract_output_value "${terraform_outputs}" "AZURE_RESOURCE_GROUP") -AZURE_LOCATION=$(extract_output_value "${terraform_outputs}" "AZURE_LOCATION") - -EOF - - # Application Configuration - cat >> "${ENV_FILE}" << EOF -# Application Configuration -ACS_STREAMING_MODE=media # Options: media, transcription -ENVIRONMENT=${AZURE_ENV_NAME} - -EOF - -# # Logging Configuration -# cat >> "${ENV_FILE}" << EOF -# # Logging Configuration -# LOG_LEVEL=INFO -# ENABLE_DEBUG=false -# EOF - - # Generate summary - local var_count - var_count=$(grep -c '^[A-Z]' "${ENV_FILE}") - - log_success "Environment file generated successfully: ${ENV_FILE}" - log_info "Configuration contains ${var_count} variables" - echo "" - log_warning "Note: Some values like keys and connection strings may be empty" - log_warning "These sensitive values should be retrieved separately using Azure CLI or Key Vault" - echo "" - log_info "Next steps:" - echo " 1. Review the generated file: cat ${ENV_FILE}" - echo " 2. Set missing sensitive values (keys, connection strings)" - echo " 3. Update BASE_URL with your actual backend URL" - echo " 4. Source the file: source ${ENV_FILE}" -} - -# Update environment file with secrets from Key Vault -update_env_with_secrets() { - log_section "Updating Environment File with Secrets from Key Vault" - - if [[ ! -f "${ENV_FILE}" ]]; then - log_error "Environment file ${ENV_FILE} does not exist" - log_error "Run this script first to generate the base file" - exit 1 - fi - - log_info "Retrieving secrets from Azure Key Vault..." - - # Get Key Vault name from Terraform (single operation) - local terraform_outputs - terraform_outputs=$(get_all_terraform_outputs) - local kv_name - kv_name=$(extract_output_value "${terraform_outputs}" "AZURE_KEY_VAULT_NAME") - - if [[ -n "${kv_name}" && "${kv_name}" != "null" ]]; then - log_info "Using Key Vault: ${kv_name}" - - # Helper function to update environment variable - update_env_var() { - local var_name="$1" - local secret_name="$2" - local secret_value - - log_info "Updating ${var_name}..." - secret_value=$(az keyvault secret show --name "${secret_name}" --vault-name "${kv_name}" --query value -o tsv 2>/dev/null || echo "") - - if [[ -n "${secret_value}" ]]; then - # Use different sed syntax for different variable types - if [[ "${var_name}" == *"CONNECTION_STRING"* ]]; then - sed -i.bak "s|^${var_name}=.*|${var_name}=${secret_value}|" "${ENV_FILE}" - else - sed -i.bak "s/^${var_name}=.*/${var_name}=${secret_value}/" "${ENV_FILE}" - fi - log_success "${var_name} updated" - else - log_warning "${var_name} secret not found in Key Vault" - fi - } - - # Update secrets - update_env_var "AZURE_OPENAI_KEY" "AZURE-OPENAI-KEY" - update_env_var "AZURE_SPEECH_KEY" "AZURE-SPEECH-KEY" - update_env_var "ACS_CONNECTION_STRING" "ACS-CONNECTION-STRING" - update_env_var "REDIS_PASSWORD" "REDIS-PASSWORD" - update_env_var "AZURE_STORAGE_CONNECTION_STRING" "AZURE-STORAGE-CONNECTION-STRING" - - # Clean up backup file - rm -f "${ENV_FILE}.bak" - - log_success "Secrets updated successfully" - else - log_warning "Key Vault name not found in Terraform outputs" - log_warning "Secrets will need to be set manually" - fi -} - -# Show environment file information -show_env_file() { - if [[ -f "${ENV_FILE}" ]]; then - log_info "Current environment file: ${ENV_FILE}" - local generation_date - generation_date=$(head -1 "${ENV_FILE}" | sed 's/# Generated automatically on //') - log_info "Generated: ${generation_date}" - local var_count - var_count=$(grep -c '^[A-Z]' "${ENV_FILE}") - log_info "Variables: ${var_count}" - echo "" - echo "Content preview:" - echo "================" - head -20 "${ENV_FILE}" - echo "... (truncated, use 'cat ${ENV_FILE}' to see full content)" - else - log_error "Environment file ${ENV_FILE} does not exist" - log_error "Run this script to create it" - fi -} - -# Main execution -main() { - local action="${3:-generate}" - - case "${action}" in - "generate") - check_prerequisites - generate_env_file - ;; - "update-secrets") - check_prerequisites - update_env_with_secrets - ;; - "show") - show_env_file - ;; - *) - echo "Usage: $0 [environment_name] [subscription_id] [action]" - echo "" - echo "Actions:" - echo " generate Generate .env file from Terraform state (default)" - echo " update-secrets Update .env file with Key Vault secrets" - echo " show Show current .env file information" - echo "" - echo "Examples:" - echo " $0 dev" - echo " $0 prod \${AZURE_SUBSCRIPTION_ID}" - echo " $0 dev \${AZURE_SUBSCRIPTION_ID} update-secrets" - exit 1 - ;; - esac -} - -# Execute main function with all parameters -main "$@" diff --git a/devops/scripts/generate_test_audio.py b/devops/scripts/generate_test_audio.py deleted file mode 100644 index 1fe7619d..00000000 --- a/devops/scripts/generate_test_audio.py +++ /dev/null @@ -1,424 +0,0 @@ -#!/usr/bin/env python3 -""" -Standalone Audio File Generator for Load Testing - -This script generates realistic customer audio files using Azure Speech Services -for testing the various agent flows (Auth, FNOL, General Info). - -Usage: - python generate_test_audio.py - python generate_test_audio.py --output-dir ./test_audio --count 10 - python generate_test_audio.py --agent-type auth --voice "en-US-AriaNeural" -""" - -import argparse -import json -import os -import sys -from pathlib import Path -from typing import Dict, List, Optional -import logging - -# Add project root to path for imports -project_root = Path(__file__).parent -sys.path.insert(0, str(project_root)) - -from dotenv import load_dotenv - -# Load environment variables -env_path = project_root / ".env" -if env_path.exists(): - load_dotenv(env_path) - -# Import Azure Speech SDK and identity libraries -try: - import azure.cognitiveservices.speech as speechsdk - from azure.identity import DefaultAzureCredential -except ImportError: - logging.error("Required Azure libraries not installed. Please install:") - logging.error("pip install azure-cognitiveservices-speech azure-identity") - sys.exit(1) - -# Configure logging -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - -# Customer conversation samples organized by agent type -CUSTOMER_CONVERSATIONS = { - "auth": [ - "Hi, my name is Alice Brown, my zip code is 60610, and my last four digits of social security are 1234", - "Hello, this is John Smith. My ZIP is 90210 and the last four of my SSN are 5678", - "This is Maria Garcia, ZIP code 33101, last four social security digits 9876", - "My name is David Wilson, I live in ZIP 10001, last four of my social are 4321", - "Hi there, I'm Sarah Johnson. My ZIP code is 78701 and my last four SSN digits are 8765", - ], - "fnol_new_claim": [ - "I need to file a claim. I was rear-ended on Highway 95 about an hour ago", - "I was in a car accident this morning. A truck hit my passenger side door", - "My car was damaged in a parking lot. Someone hit it and left a note with their information", - "I need to report a claim. My windshield was cracked by flying debris on the freeway", - "I was backing out of my driveway and hit my neighbor's fence. I need to file a claim", - "A tree fell on my car during the storm last night. I need to start a claim", - "Someone broke into my car and stole my laptop. I want to file a theft claim", - ], - "fnol_existing_claim": [ - "I'm calling about my claim. The adjuster hasn't called me back yet", - "I filed a claim last week and need an update on the status", - "My claim number is 12345 and I wanted to check if you received the photos I sent", - "I submitted my claim documents three days ago but haven't heard anything back", - "The repair shop is asking for authorization. When will my claim be approved?", - "I got an estimate but it's higher than what the adjuster quoted. What do I do?", - ], - "general_info": [ - "What does my comprehensive coverage include exactly?", - "I want to know what my deductible is for collision coverage", - "Can you explain what roadside assistance covers under my policy?", - "I'm moving to a new state. Do I need to update my policy?", - "How much does it cost to add a teenage driver to my policy?", - "What's the difference between liability and full coverage?", - "I want to increase my coverage limits. How much would that cost?", - "Can you help me understand what uninsured motorist coverage does?", - ], - "emergency": [ - "I've been in an accident and my passenger is bleeding. We need help immediately", - "There's smoke coming from my engine and I smell gas. What should I do?", - "My car went off the road and we're trapped inside. Please send help", - "I hit a pedestrian. They're not moving. I need emergency services now", - "My car is on fire after the accident. I got out but I need the fire department", - ], - "escalation": [ - "I've been trying to reach someone about my claim for three weeks. This is ridiculous", - "Your adjuster denied my claim but I think it should be covered. I want to speak to a supervisor", - "I'm not satisfied with the settlement offer. I want to escalate this to management", - "This is the fourth time I've called about the same issue. I need to speak to someone in charge", - "I'm considering hiring a lawyer if this isn't resolved today", - ], -} - -# Voice options for different conversation types -VOICE_OPTIONS = { - "default": "en-US-JennyMultilingualNeural", - "male": "en-US-BrianMultilingualNeural", - "female": "en-US-EmmaMultilingualNeural", - "calm": "en-US-AriaNeural", - "urgent": "en-US-DavisNeural", - "frustrated": "en-US-GuyNeural", -} - - -class TestAudioGenerator: - """Generate audio files for load testing customer conversations.""" - - def __init__(self, output_dir: str = "./test_audio"): - """Initialize the audio generator. - - Args: - output_dir: Directory to save generated audio files - """ - self.output_dir = Path(output_dir) - self.output_dir.mkdir(parents=True, exist_ok=True) - - # Initialize Azure Speech configuration with DefaultAzureCredential - speech_region = os.getenv("AZURE_SPEECH_REGION", "centralus") - speech_endpoint = os.getenv("AZURE_SPEECH_ENDPOINT") - - logger.info("Authenticating with DefaultAzureCredential...") - - try: - # Get access token using DefaultAzureCredential - credential = DefaultAzureCredential() - token = credential.get_token("https://cognitiveservices.azure.com/.default") - - # Create speech config with endpoint if available, otherwise use region - if speech_endpoint: - logger.info(f"Using Azure Speech endpoint: {speech_endpoint}") - self.speech_config = speechsdk.SpeechConfig(endpoint=speech_endpoint) - self.speech_config.authorization_token = token.token - else: - logger.info(f"Using Azure Speech region: {speech_region}") - self.speech_config = speechsdk.SpeechConfig(region=speech_region) - self.speech_config.authorization_token = token.token - - # Store credential for token refresh if needed - self._credential = credential - - logger.info("Successfully authenticated with DefaultAzureCredential") - - except Exception as e: - logger.error(f"Failed to authenticate with DefaultAzureCredential: {e}") - logger.error( - "Please ensure you are logged in with 'az login' or have appropriate credentials configured" - ) - raise - - # Set default voice - self.speech_config.speech_synthesis_voice_name = VOICE_OPTIONS["default"] - - logger.info( - f"Initialized audio generator with output directory: {self.output_dir}" - ) - logger.info(f"Using Azure Speech region: {speech_region}") - - def generate_audio_file( - self, - text: str, - filename: str, - voice: str = None, - style: str = "chat", - rate: str = "+5%", - ) -> Optional[str]: - """Generate a single audio file from text. - - Args: - text: Text to synthesize - filename: Output filename (without extension) - voice: Voice to use (defaults to current voice) - style: Speech style - rate: Speech rate - - Returns: - Path to generated file or None if failed - """ - try: - # Use the existing authenticated speech config - config = self.speech_config - - # Set voice for this synthesis - current_voice = voice or config.speech_synthesis_voice_name - config.speech_synthesis_voice_name = current_voice - - # Set output file - file_path = self.output_dir / f"{filename}.wav" - audio_config = speechsdk.audio.AudioOutputConfig(filename=str(file_path)) - - # Create synthesizer and synthesize - synthesizer = speechsdk.SpeechSynthesizer( - speech_config=config, audio_config=audio_config - ) - - # Use simple text synthesis first to test - result = synthesizer.speak_text_async(text).get() - - if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: - logger.info(f"Generated audio file: {file_path}") - return str(file_path) - else: - logger.error(f"Speech synthesis failed with reason: {result.reason}") - # Try to get error details safely - try: - if hasattr(result, "error_details") and result.error_details: - logger.error(f"Error details: {result.error_details}") - except: - logger.error("Could not retrieve error details") - return None - - except Exception as e: - logger.error(f"Error generating audio for '{filename}': {e}") - return None - - def generate_agent_conversations( - self, agent_type: str, count: int = None, voice: str = None - ) -> List[str]: - """Generate audio files for a specific agent type. - - Args: - agent_type: Type of agent conversation (auth, fnol_new_claim, etc.) - count: Number of files to generate (None = all available) - voice: Voice to use for generation - - Returns: - List of generated file paths - """ - if agent_type not in CUSTOMER_CONVERSATIONS: - logger.error(f"Unknown agent type: {agent_type}") - return [] - - conversations = CUSTOMER_CONVERSATIONS[agent_type] - if count: - conversations = conversations[:count] - - generated_files = [] - - # Select appropriate voice based on agent type - if not voice: - if agent_type == "emergency": - voice = VOICE_OPTIONS["urgent"] - elif agent_type == "escalation": - voice = VOICE_OPTIONS["frustrated"] - else: - voice = VOICE_OPTIONS["default"] - - for i, text in enumerate(conversations, 1): - filename = f"{agent_type}_{i:02d}" - file_path = self.generate_audio_file(text, filename, voice=voice) - if file_path: - generated_files.append(file_path) - - logger.info(f"Generated {len(generated_files)} files for {agent_type}") - return generated_files - - def generate_all_conversations( - self, count_per_type: int = None - ) -> Dict[str, List[str]]: - """Generate audio files for all agent types. - - Args: - count_per_type: Max number of files per agent type - - Returns: - Dictionary mapping agent types to generated file paths - """ - all_generated = {} - - for agent_type in CUSTOMER_CONVERSATIONS.keys(): - generated_files = self.generate_agent_conversations( - agent_type, count_per_type - ) - all_generated[agent_type] = generated_files - - return all_generated - - def create_manifest(self, generated_files: Dict[str, List[str]]) -> str: - """Create a JSON manifest of generated audio files. - - Args: - generated_files: Dictionary of agent types to file paths - - Returns: - Path to manifest file - """ - manifest = { - "generated_at": "", - "total_files": sum(len(files) for files in generated_files.values()), - "agent_types": {}, - } - - import datetime - - manifest["generated_at"] = datetime.datetime.now().isoformat() - - for agent_type, file_paths in generated_files.items(): - manifest["agent_types"][agent_type] = { - "count": len(file_paths), - "files": [ - { - "filename": Path(fp).name, - "path": fp, - "text": self._get_text_for_file(agent_type, Path(fp).name), - } - for fp in file_paths - ], - } - - manifest_path = self.output_dir / "audio_manifest.json" - with open(manifest_path, "w") as f: - json.dump(manifest, f, indent=2) - - logger.info(f"Created manifest: {manifest_path}") - return str(manifest_path) - - def _get_text_for_file(self, agent_type: str, filename: str) -> str: - """Get the original text for a generated file.""" - try: - # Extract index from filename (e.g., "auth_01.wav" -> 0) - index = int(filename.split("_")[1].split(".")[0]) - 1 - return CUSTOMER_CONVERSATIONS[agent_type][index] - except (IndexError, ValueError): - return "Unknown text" - - -def main(): - """Main function with command-line interface.""" - parser = argparse.ArgumentParser( - description="Generate customer audio files for load testing" - ) - - parser.add_argument( - "--output-dir", - "-o", - default="./test_audio", - help="Output directory for audio files (default: ./test_audio)", - ) - - parser.add_argument( - "--agent-type", - "-a", - choices=list(CUSTOMER_CONVERSATIONS.keys()) + ["all"], - default="all", - help="Type of agent conversation to generate (default: all)", - ) - - parser.add_argument( - "--count", - "-c", - type=int, - help="Number of files to generate per agent type (default: all available)", - ) - - parser.add_argument( - "--voice", - "-v", - choices=list(VOICE_OPTIONS.values()), - help="Voice to use for synthesis", - ) - - parser.add_argument( - "--list-voices", action="store_true", help="List available voice options" - ) - - args = parser.parse_args() - - if args.list_voices: - print("Available voice options:") - for name, voice in VOICE_OPTIONS.items(): - print(f" {name}: {voice}") - return - - # Check for required environment variables for DefaultAzureCredential - speech_region = os.getenv("AZURE_SPEECH_REGION", "centralus") - - logger.info(f"Using Azure Speech Services in region: {speech_region}") - logger.info("Authenticating with DefaultAzureCredential (no API key required)") - logger.info( - "Make sure you're logged in with 'az login' or have appropriate Azure credentials configured" - ) - - # Initialize generator - generator = TestAudioGenerator(args.output_dir) - - try: - if args.agent_type == "all": - logger.info("Generating audio files for all agent types...") - generated_files = generator.generate_all_conversations(args.count) - else: - logger.info(f"Generating audio files for {args.agent_type} agent...") - files = generator.generate_agent_conversations( - args.agent_type, args.count, args.voice - ) - generated_files = {args.agent_type: files} - - # Create manifest - manifest_path = generator.create_manifest(generated_files) - - # Summary - total_files = sum(len(files) for files in generated_files.values()) - logger.info(f"\n=== Generation Complete ===") - logger.info(f"Total files generated: {total_files}") - logger.info(f"Output directory: {generator.output_dir}") - logger.info(f"Manifest file: {manifest_path}") - - for agent_type, files in generated_files.items(): - logger.info(f" {agent_type}: {len(files)} files") - - except KeyboardInterrupt: - logger.info("Generation interrupted by user") - sys.exit(1) - except Exception as e: - logger.error(f"Error during generation: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/devops/scripts/EventGrid-EntraAppConfig.ps1 b/devops/scripts/misc/EventGrid-EntraAppConfig.ps1 similarity index 100% rename from devops/scripts/EventGrid-EntraAppConfig.ps1 rename to devops/scripts/misc/EventGrid-EntraAppConfig.ps1 diff --git a/devops/scripts/EventGrid-EntraAppConfig.sh b/devops/scripts/misc/EventGrid-EntraAppConfig.sh similarity index 100% rename from devops/scripts/EventGrid-EntraAppConfig.sh rename to devops/scripts/misc/EventGrid-EntraAppConfig.sh diff --git a/devops/scripts/misc/generate_openapi.py b/devops/scripts/misc/generate_openapi.py new file mode 100755 index 00000000..4f3b30ad --- /dev/null +++ b/devops/scripts/misc/generate_openapi.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +""" +Generate OpenAPI JSON schema from FastAPI application +==================================================== + +This script generates the OpenAPI/Swagger JSON schema from the FastAPI app +and saves it to the docs/api/ directory for inclusion in MkDocs. + +Usage: + python scripts/generate_openapi.py + python scripts/generate_openapi.py --output docs/api/openapi.json --pretty +""" + +import sys +import os +import json +import argparse +from pathlib import Path + +# Add the backend directory to Python path +backend_dir = Path(__file__).parent.parent / "apps" / "rtagent" / "backend" +sys.path.insert(0, str(backend_dir)) + +def generate_openapi_json(output_path: str = "docs/api/openapi.json", pretty: bool = True): + """ + Generate OpenAPI JSON from FastAPI application. + + Args: + output_path: Path where to save the OpenAPI JSON file + pretty: Whether to format JSON with indentation for readability + """ + try: + # Import the FastAPI app + from main import app + + # Get the OpenAPI schema + openapi_schema = app.openapi() + + # Ensure output directory exists + output_file = Path(output_path) + output_file.parent.mkdir(parents=True, exist_ok=True) + + # Write the schema to file + with open(output_file, 'w', encoding='utf-8') as f: + if pretty: + json.dump(openapi_schema, f, indent=2, ensure_ascii=False) + else: + json.dump(openapi_schema, f, ensure_ascii=False) + + print(f"✅ OpenAPI schema generated successfully: {output_file}") + print(f"📊 Found {len(openapi_schema.get('paths', {}))} API paths") + print(f"🏷️ API Title: {openapi_schema.get('info', {}).get('title', 'N/A')}") + print(f"📝 API Version: {openapi_schema.get('info', {}).get('version', 'N/A')}") + + # Print summary of endpoints + paths = openapi_schema.get('paths', {}) + if paths: + print(f"\n📋 API Endpoints Summary:") + for path, methods in paths.items(): + method_list = [method.upper() for method in methods.keys() if method != 'parameters'] + if method_list: + print(f" {', '.join(method_list)} {path}") + + return output_file + + except ImportError as e: + print(f"❌ Failed to import FastAPI app: {e}") + print("💡 Make sure you're running this from the project root and all dependencies are installed") + sys.exit(1) + except Exception as e: + print(f"❌ Failed to generate OpenAPI schema: {e}") + sys.exit(1) + +def main(): + parser = argparse.ArgumentParser( + description="Generate OpenAPI JSON schema from FastAPI application" + ) + parser.add_argument( + "--output", + "-o", + default="docs/api/openapi.json", + help="Output path for the OpenAPI JSON file (default: docs/api/openapi.json)" + ) + parser.add_argument( + "--pretty", + action="store_true", + help="Format JSON with indentation for readability" + ) + parser.add_argument( + "--minify", + action="store_true", + help="Generate minified JSON (opposite of --pretty)" + ) + + args = parser.parse_args() + + # Determine pretty formatting + pretty = args.pretty if args.pretty else not args.minify + + print(f"🚀 Generating OpenAPI schema from FastAPI application...") + print(f"📁 Output file: {args.output}") + print(f"🎨 Pretty formatting: {'Yes' if pretty else 'No'}") + + generate_openapi_json(args.output, pretty) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/devops/scripts/validate-terraform-backend.sh b/devops/scripts/misc/validate-terraform-backend.sh similarity index 100% rename from devops/scripts/validate-terraform-backend.sh rename to devops/scripts/misc/validate-terraform-backend.sh diff --git a/devops/scripts/setup-gha-config.sh b/devops/scripts/setup-gha-config.sh deleted file mode 100755 index fb01ee35..00000000 --- a/devops/scripts/setup-gha-config.sh +++ /dev/null @@ -1,573 +0,0 @@ -#!/bin/bash - -# ======================================================================== -# 🚀 Setup CI/CD Configuration for Azure Developer CLI (AZD) Deployment -# ======================================================================== -# This script provisions GitHub Actions secrets and variables needed for -# automated deployment using Azure Developer CLI (azd) with OIDC authentication. -# -# Based on: https://learn.microsoft.com/en-us/azure/container-apps/github-actions-cli -# Usage: ./setup-cicd-config.sh [--interactive] [--help] - -set -euo pipefail - -# ======================================================================== -# CONFIGURATION & CONSTANTS -# ======================================================================== - -readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -readonly PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -readonly APP_REGISTRATION_NAME="GitHub-Actions-RTAudio-AZD" - -# Colors for output -readonly RED='\033[0;31m' -readonly GREEN='\033[0;32m' -readonly YELLOW='\033[1;33m' -readonly BLUE='\033[0;34m' -readonly CYAN='\033[0;36m' -readonly NC='\033[0m' # No Color - -# Default values -INTERACTIVE_MODE=false -GITHUB_ORG="" -GITHUB_REPO="" -AZURE_LOCATION="eastus" -AZURE_ENV_NAME="dev" - -# ======================================================================== -# HELPER FUNCTIONS -# ======================================================================== - -log_info() { - echo -e "${BLUE}ℹ️ [INFO]${NC} $*" -} - -log_success() { - echo -e "${GREEN}✅ [SUCCESS]${NC} $*" -} - -log_warning() { - echo -e "${YELLOW}⚠️ [WARNING]${NC} $*" -} - -log_error() { - echo -e "${RED}❌ [ERROR]${NC} $*" >&2 -} - -log_section() { - echo "" - echo -e "${CYAN}🔧 $*${NC}" - echo "=========================================================================" -} - -show_help() { - cat << EOF -🚀 Setup CI/CD Configuration for Azure Developer CLI (AZD) Deployment - -USAGE: - $0 [OPTIONS] - -OPTIONS: - --interactive Run in interactive mode (prompts for all values) - --help Show this help message - -DESCRIPTION: - This script sets up GitHub Actions secrets and variables for automated - deployment using Azure Developer CLI (azd) with OIDC authentication. - - It will: - 1. Create Azure App Registration for OIDC authentication - 2. Configure federated credentials for GitHub Actions - 3. Assign necessary Azure permissions - 4. Set up Terraform remote state storage - 5. Display the secrets/variables to configure in GitHub - -PREREQUISITES: - - Azure CLI installed and authenticated - - GitHub CLI installed and authenticated (optional) - - Contributor permissions on Azure subscription - - GitHub repository already created - -ENVIRONMENT VARIABLES: - GITHUB_ORG GitHub organization/user name - GITHUB_REPO GitHub repository name - AZURE_LOCATION Azure region (default: eastus) - AZURE_ENV_NAME Environment name (default: dev) - -EXAMPLES: - # Interactive mode - $0 --interactive - - # Using environment variables - GITHUB_ORG=myorg GITHUB_REPO=myrepo $0 - - # With custom values - AZURE_LOCATION=westus2 AZURE_ENV_NAME=prod $0 - -EOF -} - -check_dependencies() { - log_info "Checking dependencies..." - - local deps=("az" "jq") - local missing=() - - for dep in "${deps[@]}"; do - if ! command -v "$dep" &> /dev/null; then - missing+=("$dep") - fi - done - - if [[ ${#missing[@]} -gt 0 ]]; then - log_error "Missing dependencies: ${missing[*]}" - log_error "Please install them and try again" - exit 1 - fi - - # Check if GitHub CLI is available (optional) - if command -v "gh" &> /dev/null; then - log_info "GitHub CLI detected (optional features available)" - else - log_warning "GitHub CLI not found (manual secret configuration required)" - fi - - log_success "Dependencies verified" -} - -check_azure_auth() { - log_info "Checking Azure authentication..." - - if ! az account show &> /dev/null; then - log_error "Azure CLI not authenticated" - log_error "Please run 'az login' first" - exit 1 - fi - - local subscription_name - subscription_name=$(az account show --query "name" -o tsv) - log_success "Authenticated to Azure subscription: $subscription_name" -} - -prompt_for_values() { - if [[ "$INTERACTIVE_MODE" == "true" ]] || [[ -z "$GITHUB_ORG" ]] || [[ -z "$GITHUB_REPO" ]]; then - echo "" - log_info "Please provide the following information:" - - if [[ -z "$GITHUB_ORG" ]]; then - read -p "GitHub organization/username: " GITHUB_ORG - fi - - if [[ -z "$GITHUB_REPO" ]]; then - read -p "GitHub repository name: " GITHUB_REPO - fi - - if [[ "$INTERACTIVE_MODE" == "true" ]]; then - read -p "Azure location [$AZURE_LOCATION]: " input_location - AZURE_LOCATION="${input_location:-$AZURE_LOCATION}" - - read -p "Environment name [$AZURE_ENV_NAME]: " input_env - AZURE_ENV_NAME="${input_env:-$AZURE_ENV_NAME}" - fi - fi - - # Validate required values - if [[ -z "$GITHUB_ORG" ]] || [[ -z "$GITHUB_REPO" ]]; then - log_error "GitHub organization and repository name are required" - log_error "Set GITHUB_ORG and GITHUB_REPO environment variables or use --interactive" - exit 1 - fi - - log_info "Configuration:" - log_info " GitHub: $GITHUB_ORG/$GITHUB_REPO" - log_info " Azure Location: $AZURE_LOCATION" - log_info " Environment: $AZURE_ENV_NAME" -} - -create_app_registration() { - log_section "Creating Azure App Registration for OIDC" - - # Check if app registration already exists - local existing_app_id - existing_app_id=$(az ad app list --display-name "$APP_REGISTRATION_NAME" --query "[0].appId" -o tsv 2>/dev/null || echo "") - - if [[ -n "$existing_app_id" && "$existing_app_id" != "null" ]]; then - log_warning "App registration '$APP_REGISTRATION_NAME' already exists" - APP_ID="$existing_app_id" - else - log_info "Creating app registration: $APP_REGISTRATION_NAME" - APP_ID=$(az ad app create --display-name "$APP_REGISTRATION_NAME" --query "appId" -o tsv) - - # Create service principal - log_info "Creating service principal..." - az ad sp create --id "$APP_ID" > /dev/null - - log_success "Created app registration: $APP_ID" - fi - - # Get tenant and subscription info - TENANT_ID=$(az account show --query "tenantId" -o tsv) - SUBSCRIPTION_ID=$(az account show --query "id" -o tsv) - SP_OBJECT_ID=$(az ad sp show --id "$APP_ID" --query "id" -o tsv) - - log_info "App Registration Details:" - log_info " Application ID: $APP_ID" - log_info " Tenant ID: $TENANT_ID" - log_info " Subscription ID: $SUBSCRIPTION_ID" - log_info " Service Principal Object ID: $SP_OBJECT_ID" -} - -configure_federated_credentials() { - log_section "Configuring OIDC Federated Credentials" - - local credentials=( - "main-branch:repo:$GITHUB_ORG/$GITHUB_REPO:ref:refs/heads/main:Main branch deployments" - "cleanup-deployment:repo:$GITHUB_ORG/$GITHUB_REPO:ref:refs/heads/cleanup/deployment:Cleanup deployment branch" - "pull-requests:repo:$GITHUB_ORG/$GITHUB_REPO:pull_request:Pull request validation" - "workflow-dispatch:repo:$GITHUB_ORG/$GITHUB_REPO:environment:$AZURE_ENV_NAME:Manual workflow triggers" - ) - - for credential in "${credentials[@]}"; do - IFS=':' read -r name subject_prefix org_repo subject_suffix description <<< "$credential" - local full_subject="${subject_prefix}:${org_repo}:${subject_suffix}" - - log_info "Creating federated credential: $name" - - # Check if credential already exists - local existing_cred - existing_cred=$(az ad app federated-credential list --id "$APP_ID" --query "[?name=='$name'].name" -o tsv 2>/dev/null || echo "") - - if [[ -n "$existing_cred" ]]; then - log_warning "Federated credential '$name' already exists, skipping..." - continue - fi - - # Create the federated credential - az ad app federated-credential create \ - --id "$APP_ID" \ - --parameters "{ - \"name\": \"$name\", - \"issuer\": \"https://token.actions.githubusercontent.com\", - \"subject\": \"$full_subject\", - \"description\": \"$description\", - \"audiences\": [\"api://AzureADTokenExchange\"] - }" > /dev/null - - log_success "Created federated credential: $name" - done -} - -assign_azure_permissions() { - log_section "Assigning Azure Permissions" - - local roles=("Contributor" "User Access Administrator") - - for role in "${roles[@]}"; do - log_info "Assigning '$role' role to service principal..." - - # Check if role assignment already exists - local existing_assignment - existing_assignment=$(az role assignment list \ - --assignee "$SP_OBJECT_ID" \ - --role "$role" \ - --scope "/subscriptions/$SUBSCRIPTION_ID" \ - --query "[0].id" -o tsv 2>/dev/null || echo "") - - if [[ -n "$existing_assignment" && "$existing_assignment" != "null" ]]; then - log_warning "Role '$role' already assigned, skipping..." - continue - fi - - az role assignment create \ - --assignee "$SP_OBJECT_ID" \ - --role "$role" \ - --scope "/subscriptions/$SUBSCRIPTION_ID" > /dev/null - - log_success "Assigned '$role' role" - done -} - -setup_terraform_state_storage() { - log_section "Setting up Terraform Remote State Storage" - - local resource_group="rg-terraform-state-${AZURE_ENV_NAME}" - local storage_account="tfstate${AZURE_ENV_NAME}$(openssl rand -hex 4)" - local container_name="tfstate" - - log_info "Creating resource group: $resource_group" - az group create \ - --name "$resource_group" \ - --location "$AZURE_LOCATION" \ - --tags "purpose=terraform-state" "environment=$AZURE_ENV_NAME" > /dev/null - - log_info "Creating storage account: $storage_account" - az storage account create \ - --name "$storage_account" \ - --resource-group "$resource_group" \ - --location "$AZURE_LOCATION" \ - --sku "Standard_LRS" \ - --encryption-services blob \ - --allow-blob-public-access false \ - --tags "purpose=terraform-state" "environment=$AZURE_ENV_NAME" > /dev/null - - log_info "Creating container: $container_name" - az storage container create \ - --name "$container_name" \ - --account-name "$storage_account" \ - --auth-mode login > /dev/null - - # Assign permissions to service principal for state storage - log_info "Assigning storage permissions to service principal..." - az role assignment create \ - --assignee "$SP_OBJECT_ID" \ - --role "Storage Blob Data Contributor" \ - --scope "/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$resource_group/providers/Microsoft.Storage/storageAccounts/$storage_account" > /dev/null - - # Store values for later use - TF_RESOURCE_GROUP="$resource_group" - TF_STORAGE_ACCOUNT="$storage_account" - TF_CONTAINER_NAME="$container_name" - - log_success "Terraform state storage configured" - log_info " Resource Group: $TF_RESOURCE_GROUP" - log_info " Storage Account: $TF_STORAGE_ACCOUNT" - log_info " Container: $TF_CONTAINER_NAME" -} - -configure_github_secrets() { - log_section "GitHub Repository Configuration" - - if command -v "gh" &> /dev/null && gh auth status &> /dev/null; then - log_info "GitHub CLI detected and authenticated" - - # Ask if user wants to automatically configure secrets - if [[ "$INTERACTIVE_MODE" == "true" ]]; then - read -p "Automatically configure GitHub secrets and variables? (y/N): " configure_auto - if [[ "$configure_auto" =~ ^[Yy]$ ]]; then - setup_github_secrets_auto - return - fi - fi - fi - - # Manual configuration instructions - show_manual_configuration -} - -setup_github_secrets_auto() { - log_info "Configuring GitHub secrets and variables automatically..." - - # Set repository secrets - local secrets=( - "AZURE_CLIENT_ID:$APP_ID" - "AZURE_TENANT_ID:$TENANT_ID" - "AZURE_SUBSCRIPTION_ID:$SUBSCRIPTION_ID" - "AZURE_PRINCIPAL_ID:$SP_OBJECT_ID" - ) - - for secret in "${secrets[@]}"; do - IFS=':' read -r name value <<< "$secret" - log_info "Setting secret: $name" - echo "$value" | gh secret set "$name" --repo "$GITHUB_ORG/$GITHUB_REPO" - done - - # Set repository variables - local variables=( - "AZURE_LOCATION:$AZURE_LOCATION" - "AZURE_ENV_NAME:$AZURE_ENV_NAME" - "RS_RESOURCE_GROUP:$TF_RESOURCE_GROUP" - "RS_STORAGE_ACCOUNT:$TF_STORAGE_ACCOUNT" - "RS_CONTAINER_NAME:$TF_CONTAINER_NAME" - ) - - for variable in "${variables[@]}"; do - IFS=':' read -r name value <<< "$variable" - log_info "Setting variable: $name" - echo "$value" | gh variable set "$name" --repo "$GITHUB_ORG/$GITHUB_REPO" - done - - log_success "GitHub secrets and variables configured automatically!" -} - -show_manual_configuration() { - cat << EOF - -${CYAN}📝 Manual GitHub Configuration Required${NC} -======================================================================== - -Navigate to your GitHub repository: https://github.com/$GITHUB_ORG/$GITHUB_REPO -Go to Settings → Secrets and variables → Actions - -${YELLOW}Repository Secrets:${NC} -Add these under "Repository secrets": - -AZURE_CLIENT_ID: $APP_ID -AZURE_TENANT_ID: $TENANT_ID -AZURE_SUBSCRIPTION_ID: $SUBSCRIPTION_ID -AZURE_PRINCIPAL_ID: $SP_OBJECT_ID - -${YELLOW}Repository Variables:${NC} -Add these under "Repository variables": - -AZURE_LOCATION: $AZURE_LOCATION -AZURE_ENV_NAME: $AZURE_ENV_NAME -RS_RESOURCE_GROUP: $TF_RESOURCE_GROUP -RS_STORAGE_ACCOUNT: $TF_STORAGE_ACCOUNT -RS_CONTAINER_NAME: $TF_CONTAINER_NAME - -${YELLOW}Optional Secrets:${NC} -If you have an ACS phone number: - -ACS_SOURCE_PHONE_NUMBER: +1234567890 - -EOF -} - -create_summary_file() { - local summary_file="$PROJECT_ROOT/.azd-cicd-config.txt" - - cat > "$summary_file" << EOF -# Azure Developer CLI (AZD) CI/CD Configuration Summary -# Generated on: $(date) -# Script: $0 - -## Azure App Registration -Application ID: $APP_ID -Tenant ID: $TENANT_ID -Subscription ID: $SUBSCRIPTION_ID -Service Principal Object ID: $SP_OBJECT_ID - -## Terraform State Storage -Resource Group: $TF_RESOURCE_GROUP -Storage Account: $TF_STORAGE_ACCOUNT -Container: $TF_CONTAINER_NAME - -## GitHub Repository -Organization/User: $GITHUB_ORG -Repository: $GITHUB_REPO -URL: https://github.com/$GITHUB_ORG/$GITHUB_REPO - -## Next Steps -1. Configure GitHub secrets and variables (see output above) -2. Test the deployment workflow -3. Purchase ACS phone number if needed -4. Configure environment-specific settings - -## Useful Commands -# Test authentication -az login --service-principal --username $APP_ID --tenant $TENANT_ID - -# View role assignments -az role assignment list --assignee $SP_OBJECT_ID --output table - -# Test azd deployment -azd auth login --client-id $APP_ID --federated-credential-provider github --tenant-id $TENANT_ID -azd up - -EOF - - log_success "Configuration summary saved to: $summary_file" -} - -verify_configuration() { - log_section "Verifying Configuration" - - # Test service principal permissions - log_info "Testing service principal permissions..." - local test_result - test_result=$(az role assignment list --assignee "$SP_OBJECT_ID" --output table 2>/dev/null | wc -l) - - if [[ "$test_result" -gt 1 ]]; then - log_success "Service principal has role assignments" - else - log_warning "Service principal may not have proper permissions" - fi - - # Test federated credentials - log_info "Checking federated credentials..." - local cred_count - cred_count=$(az ad app federated-credential list --id "$APP_ID" --query "length(@)" -o tsv 2>/dev/null || echo "0") - - if [[ "$cred_count" -gt 0 ]]; then - log_success "Found $cred_count federated credential(s)" - else - log_warning "No federated credentials found" - fi - - # Test storage account access - log_info "Testing storage account access..." - if az storage container show --name "$TF_CONTAINER_NAME" --account-name "$TF_STORAGE_ACCOUNT" --auth-mode login &> /dev/null; then - log_success "Storage account accessible" - else - log_warning "Storage account access may be limited" - fi -} - -# ======================================================================== -# MAIN EXECUTION -# ======================================================================== - -main() { - # Parse command line arguments - while [[ $# -gt 0 ]]; do - case $1 in - --interactive) - INTERACTIVE_MODE=true - shift - ;; - --help) - show_help - exit 0 - ;; - *) - log_error "Unknown option: $1" - show_help - exit 1 - ;; - esac - done - - # Set values from environment variables if provided - GITHUB_ORG="${GITHUB_ORG:-}" - GITHUB_REPO="${GITHUB_REPO:-}" - AZURE_LOCATION="${AZURE_LOCATION:-eastus}" - AZURE_ENV_NAME="${AZURE_ENV_NAME:-dev}" - - # Display banner - echo -e "${CYAN}" - echo "🚀 Azure Developer CLI (AZD) CI/CD Configuration Setup" - echo "=======================================================" - echo -e "${NC}" - - # Run setup steps - check_dependencies - check_azure_auth - prompt_for_values - - log_info "Starting CI/CD configuration setup..." - - create_app_registration - configure_federated_credentials - assign_azure_permissions - setup_terraform_state_storage - configure_github_secrets - verify_configuration - create_summary_file - - echo "" - log_success "🎉 CI/CD configuration setup completed!" - log_info "Your azd deployment workflows should now be ready to run." - log_info "Test your setup by pushing to the main or cleanup/deployment branch." - - echo "" - echo -e "${YELLOW}💡 Next Steps:${NC}" - echo "1. Review the configuration summary file" - echo "2. Test the GitHub Actions workflow" - echo "3. Purchase an ACS phone number if needed" - echo "4. Configure any additional environment-specific settings" -} - -# Run main function if script is executed directly -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - main "$@" -fi diff --git a/devops/scripts/webapp-deploy.sh b/devops/scripts/webapp-deploy.sh deleted file mode 100644 index 0a640b9e..00000000 --- a/devops/scripts/webapp-deploy.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# Simple script to deploy a zipped webapp to Azure using remote Oryx build - -set -e - -if [ "$#" -ne 3 ]; then - echo "Usage: $0 " - exit 1 -fi - -WEBAPP_NAME="$1" -RESOURCE_GROUP="$2" -ZIP_PATH="$3" - -if [ ! -f "$ZIP_PATH" ]; then - echo "Error: ZIP file '$ZIP_PATH' not found." - exit 1 -fi - -az webapp deploy \ - --resource-group "$RESOURCE_GROUP" \ - --name "$WEBAPP_NAME" \ - --src-path "$ZIP_PATH" \ - --type zip \ - --build-remote true - -echo "Deployment triggered for $WEBAPP_NAME in $RESOURCE_GROUP using $ZIP_PATH" \ No newline at end of file diff --git a/docs/ACSBargeInFlow.md b/docs/ACSBargeInFlow.md deleted file mode 100644 index 3974a568..00000000 --- a/docs/ACSBargeInFlow.md +++ /dev/null @@ -1,415 +0,0 @@ -# ACS Barge-In Flow - -This document describes the core event loop logic for handling barge-in interruptions in the ACS Media Handler. - -## Architecture Overview - -The barge-in system uses three threads to maintain low-latency interruption handling: - -1. **Speech SDK Thread**: Continuous audio recognition, never blocks -2. **Route Turn Thread**: AI processing and response generation -3. **Main Event Loop**: WebSocket handling and task coordination - -```mermaid -graph TB - subgraph SpeechSDK["Speech SDK Thread"] - A1["Audio Recognition"] - A2["on_partial → Barge-in"] - A3["on_final → Queue Result"] - end - - subgraph RouteLoop["Route Turn Thread"] - B1["await queue.get()"] - B2["AI Processing"] - B3["TTS Generation"] - end - - subgraph MainLoop["Main Event Loop"] - C1["WebSocket Handler"] - C2["Task Cancellation"] - C3["Stop Audio Commands"] - end - - %% Barge-in flag logic - A2 -->|"Set barge-in flag (on_partial)"| C2 - A2 -->|"Set barge-in flag (on_partial)"| C3 - A3 -->|"Clear barge-in flag (on_final)"| B1 - A3 --> B1 - B3 --> C1 -``` - ---- - -## 🔄➡️🧵 Architecture Evolution: From Parallel Overview to Thread Focus - -The **Parallel Thread Architecture** diagram above provides a comprehensive view of all physical threads and their interconnections. This bird's-eye view shows how three distinct threads collaborate through non-blocking communication patterns. - -### 🎯 Why Two Architectural Views? - -1. **🔄 Parallel Thread Architecture (Above)**: - - **Purpose**: Complete system overview showing all thread interactions - - **Focus**: Physical thread boundaries and cross-thread communication mechanisms - - **Audience**: System architects and developers debugging complex threading issues - -2. **🧵 Thread Architecture (Below)**: - - **Purpose**: Simplified view emphasizing thread responsibilities and performance characteristics - - **Focus**: Core design principles and operational flow - - **Audience**: Developers implementing features or optimizing performance - -### 🌉 Bridging the Views - -Both diagrams represent the **same underlying system** but with different levels of detail: - -- **Detailed Physical View** → Shows exact callback mechanisms (`on_partial`, `on_final`) and precise communication paths -- **Simplified Logical View** → Emphasizes thread roles, blocking behavior, and performance requirements - -The transition from detailed to simplified helps you understand: -- 🔧 **How** the system works (detailed view) -- 🎯 **Why** it's designed this way (simplified view) - ---- - -## 🧵 Thread Architecture & Non-Blocking Communication - -### 🏗️ Three-Thread Architecture Design - -The ACS Media Handler employs a **three-thread architecture** designed for **maximum responsiveness** and **clean separation of concerns**. Each thread has a specific role in ensuring uninterrupted voice interactions: - -```mermaid -graph TB - subgraph ThreadDesign["🖥️ Three-Thread Architecture"] - subgraph SpeechSDK["🎤 Speech SDK Thread
❌ Never Blocks"] - direction TB - S1["🔄 Continuous Audio Recognition"] - S2["⚡ on_partial → Immediate Barge-in"] - S3["✅ on_final → Queue Speech Result"] - - S1 --> S2 - S1 --> S3 - end - - subgraph RouteLoop["🔄 Route Turn Thread
✅ Blocks on Queue Only"] - direction TB - R1["📥 await queue.get()"] - R2["🤖 AI Processing (LLM + TTS)"] - R3["🎵 Create Playback Task (TTS through ACS)"] - - R1 --> R2 --> R3 - end - - subgraph MainEvent["🌐 Main Event Loop
❌ Never Blocks"] - direction TB - M1["📡 WebSocket Media Handler"] - M2["🚫 Barge-in Response"] - M3["🛑 Task Cancellation"] - - M1 --> M2 --> M3 - end - end - - %% Critical Communication Paths - S2 -.->|"⚡ < 10ms
run_coroutine_threadsafe"| M2 - S3 -.->|"📋 < 5ms
queue.put()"| R1 - R3 -.->|"🎵 Task Reference"| M1 - M2 -.->|"❌ cancel()"| R2 - - %% Performance indicators - S2 -.->|"🛑 Stop Audio"| M3 - - classDef speechStyle fill:#9B59B6,stroke:#6B3E99,stroke-width:3px,color:#FFFFFF - classDef routeStyle fill:#FF6B35,stroke:#E55100,stroke-width:3px,color:#FFFFFF - classDef mainStyle fill:#4A90E2,stroke:#2E5C8A,stroke-width:3px,color:#FFFFFF - - class S1,S2,S3 speechStyle - class R1,R2,R3 routeStyle - class M1,M2,M3 mainStyle -``` - -### 🎯 Design Principles - -#### 🎤 **Speech Recognition Isolation** -- **Never blocks** on AI processing or network operations -- **Immediate response** to user voice input (< 10ms) -- **Continuous operation** regardless of system load - -#### 🔄 **Dedicated AI Processing** -- **Isolated compute thread** for LLM and TTS generation -- **Safe cancellation** without affecting speech recognition -- **Controlled blocking** only on queue operations - -#### 🌐 **WebSocket Responsiveness** -- **Always available** for real-time commands -- **Instant task management** for barge-in scenarios -- **Non-blocking operations** for media streaming - -### 🎯 Thread Responsibility & Performance Matrix - -| Thread | Primary Role | Blocking? | Barge-in Role | Response Time | -|--------|--------------|-----------|---------------|---------------| -| **🎤 Speech SDK** | Real-time audio recognition | ❌ Never | ✅ Detection | ⚡ < 10ms | -| **🔄 Route Turn** | AI processing & response | ✅ Queue only | ❌ None | 🎯 < 5s | -| **🌐 Main Event** | WebSocket & cancellation | ❌ Never | ✅ Execution | ⚡ < 50ms | - -### 🚀 Key Non-Blocking Benefits - -- **🎤 Speech Recognition Isolation**: Never blocked by AI processing, enables immediate barge-in detection -- **🔄 AI Processing Isolation**: Dedicated thread prevents blocking speech recognition or WebSocket handling -- **🌐 WebSocket Responsiveness**: Always available for real-time commands and task cancellation -- **⚡ Cross-Thread Communication**: `run_coroutine_threadsafe()` and `asyncio.Queue` enable safe async bridging - -## 🔄 Asynchronous Task Architecture - -### 🎯 Three Core Processing Loops - -#### 1. **Main Event Loop** (`route_turn_loop`) -```python -async def route_turn_loop(): - """Background task that processes finalized speech""" - while True: - # Blocks until final speech is available - speech_result = await self.route_turn_queue.get() - - # Cancel any existing AI response - if self.playback_task and not self.playback_task.done(): - self.playback_task.cancel() - - # Create new AI processing task - self.playback_task = asyncio.create_task( - self.route_and_playback(speech_result) - ) -``` - -#### 2. **Speech Recognition Thread** (Azure SDK Background) -```python -# SDK callbacks bridge to main event loop -def on_partial(text, confidence, language): - """Immediate barge-in trigger - synchronous callback""" - if self.playback_task: - self.playback_task.cancel() # Immediate cancellation - self.send_stop_audio_command() - -def on_final(text, confidence, language): - """Queue final speech for AI processing""" - try: - self.route_turn_queue.put_nowait(speech_result) - except asyncio.QueueFull: - # Handle queue overflow gracefully -``` - -#### 3. **Playback Task** (`route_and_playback`) -```python -async def route_and_playback(speech_result): - """Individual task for each AI response - can be cancelled""" - try: - # Process with AI agent - response = await self.ai_agent.process(speech_result.text) - - # Generate and stream audio - async for audio_chunk in self.tts_service.generate(response): - await self.send_audio_to_acs(audio_chunk) - - except asyncio.CancelledError: - # Clean cancellation from barge-in - logger.info("🛑 Playback task cancelled by barge-in") - raise # Re-raise to complete cancellation -``` - -### ⚡ Barge-In Flow Interaction - -1. **User Speaks During AI Response** - - `on_partial()` callback fires immediately (< 10ms) - - Synchronous cancellation of `playback_task` - - Stop audio command sent to ACS - -2. **Task Cancellation Chain** - ``` - on_partial() → playback_task.cancel() → CancelledError raised - → Clean task cleanup - → ACS stops audio output - ``` - -3. **New Speech Processing** - - `on_final()` queues completed speech - - `route_turn_loop` picks up queued speech - - New `playback_task` created for fresh AI response - -### 🔄 Queue-Based Serialization - -The `route_turn_queue` ensures: -- **Sequential Processing**: Only one AI response generated at a time -- **Backpressure Handling**: Prevents memory overflow during rapid speech -- **Clean State Management**: Clear separation between speech input and AI processing - -This architecture provides **sub-50ms barge-in response time** while maintaining clean async task lifecycle management. - ---- - -## 🔄➡️⚙️ From Threading Model to Task Implementation - -The **Thread Architecture** above establishes the **foundational design principles**, while the **Asynchronous Task Architecture** below dives into the **concrete implementation details**. - -### 🌉 Implementation Bridge - -**Threading Model** focuses on: -- 🏗️ **Structural design** → Which threads handle what responsibilities -- ⚡ **Performance requirements** → Response time guarantees for each thread -- 🔗 **Communication patterns** → How threads safely exchange data - -**Task Implementation** focuses on: -- 🔧 **Code organization** → How async tasks are structured and managed -- 🔄 **Lifecycle management** → Task creation, cancellation, and cleanup -- 📋 **Queue mechanics** → How speech results flow through the system - -This transition helps you understand: -- 🎯 **What** each thread should accomplish (threading model) -- 🛠️ **How** to implement those goals in Python asyncio (task implementation) - ---- -## 🔄 Non-Blocking Thread Communication Sequence - -```mermaid -sequenceDiagram - participant SpeechSDK as 🧵 Speech SDK Thread - participant MainLoop as 🧵 Main Event Loop - participant RouteLoop as 🧵 Route Turn Thread - participant ACS as 🔊 Azure Communication Services - participant User as 👤 User - - Note over SpeechSDK,User: 🎵 AI Currently Playing Audio - MainLoop->>ACS: 🔊 Streaming TTS Audio Response - ACS->>User: 🎵 Audio Playback Active - - rect rgba(255, 149, 0, 0.15) - Note over SpeechSDK,User: 🚨 USER SPEAKS (BARGE-IN EVENT) - User->>SpeechSDK: 🗣️ Audio Input (Partial Recognition) - - Note right of SpeechSDK: ⚡ IMMEDIATE ACTION
🚫 NO BLOCKING - SpeechSDK->>SpeechSDK: 🔍 on_partial() callback triggered - end - - rect rgba(255, 59, 48, 0.2) - Note over SpeechSDK,MainLoop: 🔗 CROSS-THREAD COMMUNICATION - SpeechSDK-->>MainLoop: 🚀 run_coroutine_threadsafe(_handle_barge_in_async) - Note right of SpeechSDK: ✅ Speech thread continues
� NOT BLOCKED - - Note over MainLoop: 🛑 BARGE-IN HANDLER EXECUTES - MainLoop->>MainLoop: ❌ playback_task.cancel() - MainLoop->>MainLoop: 🧹 Clear route_turn_queue - MainLoop->>ACS: 🛑 Send StopAudio command - end - - rect rgba(52, 199, 89, 0.15) - ACS-->>User: 🔇 Audio Playback STOPPED - Note right of MainLoop: ✅ Previous AI response
cancelled cleanly - end - - rect rgba(0, 122, 255, 0.1) - Note over SpeechSDK,RouteLoop: 📝 USER CONTINUES SPEAKING - User->>SpeechSDK: 🗣️ Continues Speaking - SpeechSDK->>SpeechSDK: � on_final() callback triggered - - Note over SpeechSDK,MainLoop: 🔗 FINAL RESULT COMMUNICATION - SpeechSDK-->>MainLoop: � run_coroutine_threadsafe(_handle_final_async) - MainLoop->>MainLoop: � route_turn_queue.put(final_text) - Note right of SpeechSDK: ✅ Speech thread continues
🚫 NOT BLOCKED - end - - rect rgba(102, 51, 153, 0.1) - Note over RouteLoop,ACS: 🤖 NEW AI PROCESSING - RouteLoop->>RouteLoop: 📥 queue.get() receives final_text - Note right of RouteLoop: ⏳ ONLY thread that blocks
🎯 Dedicated AI processing - - RouteLoop->>MainLoop: 🎵 Create new playback_task - MainLoop->>ACS: 🔊 Send New TTS Response - ACS->>User: 🎵 Play New AI Response - end - - Note over SpeechSDK,User: ✅ COMPLETE NON-BLOCKING CYCLE -``` - -### 🚀 Critical Non-Blocking Characteristics - -| Event | Thread Source | Target Thread | Blocking? | Communication Method | Response Time | -|-------|---------------|---------------|-----------|---------------------|---------------| -| **🚨 Barge-in Detection** | Speech SDK | Main Event Loop | ❌ NO | `run_coroutine_threadsafe` | < 10ms | -| **📋 Final Speech** | Speech SDK | Route Turn Thread | ❌ NO | `asyncio.Queue.put()` | < 5ms | -| **🎵 AI Processing** | Route Turn | Main Event Loop | ❌ NO | `asyncio.create_task` | < 1ms | -| **🛑 Task Cancellation** | Main Event Loop | Playback Task | ❌ NO | `task.cancel()` | < 1ms | - -> **🎯 Key Insight**: Only the **Route Turn Thread** blocks (on `queue.get()`), ensuring Speech SDK and Main Event Loop remain responsive for real-time barge-in detection. - ---- - -## Key Implementation Details - -### Barge-In Detection - - -### Thread Responsibility Matrix - -| Thread | Primary Responsibility | Can Block? | Handles Barge-in? | Performance Critical? | -|--------|------------------------|------------|-------------------|----------------------| -| **🎤 Speech SDK** | Real-time audio recognition | ❌ Never | ✅ Detection only | ⚡ Ultra-high (< 10ms) | -| **🔄 Route Turn** | AI processing & response generation | ✅ On queue.get() | ❌ No | 🎯 Medium (< 5s) | -| **🌐 Main Event** | WebSocket & task management | ❌ Never | ✅ Action execution | ⚡ High (< 50ms) | - -### 🚀 Non-Blocking Benefits - -1. **🎤 Speech Recognition Isolation** - - Runs independently of AI processing - - Never blocked by slow LLM responses - - Immediate barge-in detection capability - -2. **🔄 AI Processing Isolation** - - Dedicated thread for compute-heavy operations - - Can be safely cancelled without affecting speech - - Queue-based serialization prevents race conditions - -3. **🌐 WebSocket Responsiveness** - - Always available for real-time commands - - Immediate task cancellation capability - - No blocking on network or AI operations - -4. **⚡ Cross-Thread Communication** - - `run_coroutine_threadsafe()` enables safe async bridging - - `asyncio.Queue` provides thread-safe message passing - - Task cancellation works across thread boundaries - - -## 🔧 Key Implementation Details - -This section provides **concrete implementation specifics** for developers working with the ACS Media Handler threading architecture. - -### 🚨 Barge-In Detection -- **Trigger**: `on_partial` callback from Speech Recognizer detects user speech -- **Immediate Action**: Synchronous cancellation of `playback_task` using `asyncio.Task.cancel()` -- **Stop Signal**: Send `{"Kind": "StopAudio", "StopAudio": {}}` JSON command to ACS via WebSocket -- **Logging**: Comprehensive logging with emojis for real-time debugging - -### 🔄 Async Background Task Management -- **Route Turn Queue**: Serializes final speech processing using `asyncio.Queue()` -- **Playback Task**: Tracks current AI response generation/playback with `self.playback_task` -- **Task Lifecycle**: Clean creation, cancellation, and cleanup of background tasks -- **Cancellation Safety**: Proper `try/except asyncio.CancelledError` handling - -### 🛑 Stop Audio Signal Protocol -```json -{ - "Kind": "StopAudio", - "AudioData": null, - "StopAudio": {} -} -``` -This JSON message is sent to ACS to immediately halt any ongoing audio playback. - -### ⚡ Error Handling & Resilience -- **Event Loop Detection**: Graceful handling when no event loop is available -- **WebSocket Validation**: Connection state checks before sending messages -- **Task Cancellation**: Proper cleanup with `await task` after cancellation -- **Queue Management**: Full queue detection and message dropping strategies - -### 📊 Performance Optimizations -- **Immediate Cancellation**: Barge-in triggers instant playback stop (< 50ms) -- **Background Processing**: Non-blocking AI response generation -- **Memory Management**: Proper task cleanup prevents memory leaks -- **Concurrent Safety**: Thread-safe queue operations for speech processing diff --git a/docs/ApplicationInsights.md b/docs/ApplicationInsights.md deleted file mode 100644 index 6930becf..00000000 --- a/docs/ApplicationInsights.md +++ /dev/null @@ -1,257 +0,0 @@ -# Application Insights Integration Guide - -This guide explains how to configure and troubleshoot Azure Application Insights telemetry for the real-time audio agent application. - -## Overview - -The application uses Azure Monitor OpenTelemetry to send telemetry data to Application Insights, including: -- Structured logging -- Request tracing -- Performance metrics -- Live metrics (when permissions allow) - -## Quick Fix for Permission Errors - -If you're seeing "Forbidden" errors related to Application Insights telemetry, apply this immediate fix: - -```bash -# Set environment variable to disable live metrics -export AZURE_MONITOR_DISABLE_LIVE_METRICS=true - -# Set development environment -export ENVIRONMENT=dev -``` - -Or add to your `.env` file: -```bash -AZURE_MONITOR_DISABLE_LIVE_METRICS=true -ENVIRONMENT=dev -``` - -## Configuration - -### Environment Variables - -| Variable | Description | Default | Required | -|----------|-------------|---------|----------| -| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Application Insights connection string | None | Yes | -| `AZURE_MONITOR_DISABLE_LIVE_METRICS` | Disable live metrics to reduce permission requirements | `false` | No | -| `ENVIRONMENT` | Environment type (dev/staging/prod) | None | No | -| `AZURE_MONITOR_LOGGER_NAME` | Custom logger name | `default` | No | - -### Connection String Format - -``` -InstrumentationKey=your-instrumentation-key;IngestionEndpoint=https://your-region.in.applicationinsights.azure.com/;LiveEndpoint=https://your-region.livediagnostics.monitor.azure.com/ -``` - -## Authentication - -The telemetry configuration uses Azure credential chain in this order: - -1. **Managed Identity** (for Azure-hosted applications) - - App Service: System-assigned or user-assigned managed identity - - Container Apps: System-assigned or user-assigned managed identity - -2. **DefaultAzureCredential** (for local development) - - Azure CLI credentials - - Visual Studio Code credentials - - Environment variables (if configured) - -## Permissions Required - -### Basic Telemetry (Logs, Traces, Metrics) -- `Microsoft.Insights/components/read` -- `Microsoft.Insights/telemetry/write` - -### Live Metrics (Real-time monitoring) -- `Microsoft.Insights/components/write` -- Additional live metrics API permissions - -### Recommended Roles - -1. **Application Insights Component Contributor** - ```bash - az role assignment create \ - --assignee \ - --role "Application Insights Component Contributor" \ - --scope "/subscriptions/{subscription}/resourceGroups/{rg}/providers/Microsoft.Insights/components/{name}" - ``` - -2. **Monitoring Contributor** (broader access) - ```bash - az role assignment create \ - --assignee \ - --role "Monitoring Contributor" \ - --scope "/subscriptions/{subscription}/resourceGroups/{rg}" - ``` - -## Troubleshooting - -### Common Error: "The Agent/SDK does not have permissions to send telemetry" - -**Symptoms:** -``` -azure.core.exceptions.HttpResponseError: Operation returned an invalid status 'Forbidden' -Content: {"Code":"InvalidOperation","Message":"The Agent/SDK does not have permissions to send telemetry to this resource."} -``` - -**Solutions:** - -1. **Immediate Fix (Disable Live Metrics):** - ```bash - export AZURE_MONITOR_DISABLE_LIVE_METRICS=true - ``` - -2. **Grant Permissions for Local Development:** - ```bash - # Get your user principal name - az account show --query user.name -o tsv - - # Grant Application Insights permissions - az role assignment create \ - --assignee $(az account show --query user.name -o tsv) \ - --role "Application Insights Component Contributor" \ - --scope - ``` - -3. **Configure Managed Identity for Production:** - ```bash - # Enable system-assigned managed identity (App Service example) - az webapp identity assign --resource-group --name - - # Grant permissions to the managed identity - az role assignment create \ - --assignee \ - --assignee-principal-type ServicePrincipal \ - --role "Application Insights Component Contributor" \ - --scope - ``` - -### Environment-Specific Behavior - -The telemetry configuration automatically adjusts based on environment: - -- **Development (`dev`, `development`, `local`)**: Live metrics disabled by default -- **Production (`prod`, `production`)**: Live metrics enabled if permissions allow -- **Azure-hosted**: Attempts to use managed identity credentials - -## Testing - -### Test Basic Telemetry -```python -import logging -from utils.telemetry_config import setup_azure_monitor - -# Configure telemetry -setup_azure_monitor("test-logger") - -# Test logging -logger = logging.getLogger("test-logger") -logger.info("Test message", extra={"custom_property": "test_value"}) -``` - -### Run Diagnostics -```bash -python utils/fix_appinsights.py -``` - -## Integration with FastAPI - -The telemetry is automatically configured when the application starts: - -```python -from utils.telemetry_config import setup_azure_monitor - -# In your main.py or startup code -setup_azure_monitor("audioagent") -``` - -OpenTelemetry will automatically instrument: -- FastAPI requests and responses -- Azure SDK calls -- HTTP client requests (aiohttp, requests) -- Custom logging - -## Production Deployment - -For production deployment with Azure Container Apps or App Service: - -1. **Enable Managed Identity:** - ```bicep - identity: { - type: 'SystemAssigned' - } - ``` - -2. **Grant Permissions in Bicep:** - ```bicep - resource appInsightsRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { - name: guid(appInsights.id, containerApp.id, 'ae349356-3a1b-4a5e-921d-050484c6347e') - scope: appInsights - properties: { - roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ae349356-3a1b-4a5e-921d-050484c6347e') // Application Insights Component Contributor - principalId: containerApp.identity.principalId - principalType: 'ServicePrincipal' - } - } - ``` - -3. **Configure Environment Variables:** - ```bicep - environmentVariables: [ - { - name: 'APPLICATIONINSIGHTS_CONNECTION_STRING' - value: appInsights.properties.ConnectionString - } - { - name: 'ENVIRONMENT' - value: 'production' - } - ] - ``` - -## Monitoring and Alerting - -Once properly configured, you can monitor your application through: - -1. **Application Insights Portal** - - Live metrics (if enabled) - - Application map - - Performance counters - - Custom telemetry - -2. **Log Analytics Queries** - ```kusto - traces - | where timestamp > ago(1h) - | where severityLevel >= 2 - | order by timestamp desc - ``` - -3. **Custom Metrics** - ```python - from azure.monitor.opentelemetry import configure_azure_monitor - from opentelemetry import metrics - - # Get meter - meter = metrics.get_meter(__name__) - - # Create custom counter - request_counter = meter.create_counter("custom_requests_total") - request_counter.add(1, {"endpoint": "/api/health"}) - ``` - -## Security Considerations - -- Never hardcode connection strings in source code -- Use Key Vault to store connection strings in production -- Grant minimal required permissions -- Regularly audit role assignments -- Enable diagnostic settings for audit logging - -## Support and Resources - -- [Azure Monitor OpenTelemetry Documentation](https://docs.microsoft.com/en-us/azure/azure-monitor/app/opentelemetry-overview) -- [Application Insights Troubleshooting](https://docs.microsoft.com/en-us/azure/azure-monitor/app/troubleshoot) -- [Azure RBAC Documentation](https://docs.microsoft.com/en-us/azure/role-based-access-control/) diff --git a/docs/Architecture.md b/docs/Architecture.md deleted file mode 100644 index 5f2ead4d..00000000 --- a/docs/Architecture.md +++ /dev/null @@ -1,461 +0,0 @@ -# Azure Communication Services (ACS) Real-Time Audio Agent Architecture - -## Table of Contents -1. [Overview](#overview) -2. [Production Deployment Architecture](#production-deployment-architecture) -3. [Logical Architecture Overview](#logical-architecture-overview) -4. [Component Mapping & Integration](#component-mapping--integration) -5. [Event-Driven Architecture & Data Flow](#event-driven-architecture--data-flow) -6. [State Management & Data Persistence](#state-management--data-persistence) -7. [Key Architectural Advantages](#key-architectural-advantages) - -## Overview - -This document provides a comprehensive architectural overview of the Azure Communication Services (ACS) Real-Time Audio Agent system, designed for enterprise-scale voice AI applications. The architecture supports modular, industry-specific AI agents with real-time voice processing capabilities. - -**Architecture Perspectives:** -- **Production Deployment**: Enterprise Azure infrastructure with scalability, security, and reliability -- **Logical Components**: Application-layer architecture focusing on modular agent design -- **Event-Driven Flow**: Real-time data processing and state management patterns -- **Integration Patterns**: How production infrastructure maps to logical components - -The system enables plug-and-play AI agents for various industries (insurance, healthcare, legal, support) while maintaining a unified communication interface through Azure Communication Services. - -## Production Deployment Architecture - -![Production Deployment Diagram](assets/RTAudio.v0.png) - -The production deployment architecture illustrates the enterprise-ready Azure infrastructure hosting our real-time audio agent system. This deployment emphasizes **scalability**, **reliability**, and **security** across multiple Azure services. - -### Key Infrastructure Components - -#### 🔀 **Flexible Ingress Layer** -- **App Gateway (WAF)**: SSL termination, backend health probing, and Web Application Firewall protection -- **Azure Front Door**: Optional global entry point for geo-failover and CDN capabilities -- **Load Balancing**: Intelligent traffic distribution across healthy backend instances - -#### 🔐 **Secured API Exposure** -- **Azure API Management (Internal)**: Centralized API gateway behind App Gateway -- **WAF Protection**: Advanced threat protection and traffic filtering -- **Private VNet Integration**: Secured internal network connectivity - -#### 🧠 **Modular Agent Platform** -- **ARTAgent Orchestrator**: Supports multiple IVR engines and swappable LLM chains -- **Dynamic Routing**: Redis-based session metadata for intelligent agent selection -- **Multi-Agent Framework**: Industry-specific agents with standardized interfaces - -#### ⚙️ **Flexible Compute Options** -- **Primary**: Azure Container Apps with auto-scaling capabilities -- **Alternative**: Azure Functions, AKS, or VMs for specific SLA requirements -- **Microservices**: Each agent runs as an independent, scalable service - -#### 🧭 **Private Connectivity** -- **Private Endpoints**: All Azure services (Redis, Cosmos DB, OpenAI) use private networking -- **Private DNS Zones**: Linked across hub/spoke VNet topology -- **Network Security**: End-to-end private connectivity with no public internet exposure - -#### 🛡️ **Security & Compliance** -- **Security Layers**: Prompt shields, LLM safety checks, and API firewalls -- **Monitoring**: Microsoft Defender, Azure Sentinel for threat detection -- **Message Sanitization**: Input validation and content filtering - -#### 🧬 **Reliability & Health Monitoring** -- **Azure Service Health**: Multi-region availability monitoring -- **Health Probes**: Application Gateway backend health checks -- **Failover Support**: Active/passive fallback for RTAgents and Speech services - ---- - -## Logical Architecture Overview - -The logical architecture abstracts the underlying Azure infrastructure to focus on application components, data flow, and business logic. This view emphasizes the **modular agent design** and **real-time processing capabilities**. - -### Core Architecture Diagram - -```mermaid -flowchart TD - %% ─────────────────────────── User Interface Layer ───────────────────────────── - subgraph "👥 User Interface Layer" - Phone["📞 Phone Calls
Inbound / Outbound"] - WebMobile["📱💻 Web & Mobile Apps
Voice Interface"] - end - - %% ─────────────────────────── Communication Bridge ───────────────────────────── - subgraph "⚡ Azure Communication Bridge" - ACS["🔗 Azure Communication Services
Voice & Media Transport"] - Speech["🗣️ Azure Speech Services
STT ↔ TTS Processing"] - end - - %% ─────────────────────────── AI Processing Engine ───────────────────────────── - subgraph "🧠 Real-Time Processing" - WebSocket["⚡ Transcription Stream
via WebSocket"] - Orchestrator["🎯 Intent Orchestrator
Agent Routing + Registration"] - end - - %% ─────────────────────────── Core Agent Framework ───────────────────────────── - subgraph "🏗️ Modular Agent Network" - subgraph "🔌 Core Agent Framework" - AgentRegistry["📋 Agent Registry
Component Health + Hot Swap"] - AgentInterface["🧩 Standardized Agent Interface
Common API Contract"] - end - end - - %% ─────────────────────────── Agent Intelligence Hub ─────────────────────────── - subgraph "🧠 Agent Intelligence Hub" - AIHub["🧠 Agent Hub
Central Control Layer"] - - %% ── Subgraph: Model-Agnostic Reasoning - subgraph "🔄 Model-Agnostic Routing" - TaskRouter{🎯 Model Router} - TaskRouter -->|Complex Reasoning| O1["🔬 o1-preview
Advanced Analytics"] - TaskRouter -->|Speed Critical| GPT4o["⚡ GPT-4o
Real-Time Output"] - TaskRouter -->|Cost Sensitive| GPT4oMini["💰 GPT-4o-mini
Standard Tasks"] - end - - %% ── Subgraph: Industry-Specific Agents - subgraph "🏥 Industry-Specific Agents" - FNOL["📋 FNOL Agent"] - Healthcare["🏥 Healthcare Agent"] - Legal["⚖️ Legal Agent"] - Support["🛠️ Support Agent"] - NAgents["💬 Custom Agents"] - end - - %% ── Subgraph: Memory Store - subgraph "🧠 Memory Store" - ShortTermMemory["📊 Short-Term Memory
Real-Time Session"] - LongTermMemory["🗃️ Long-Term Memory
Historical Context"] - end - - %% ── Subgraph: Tool Store - subgraph "🧰 Tool Store" - FetchData["📄 Fetch Data
from Docs or APIs"] - SyncAction["🔁 Sync Action
External Services"] - CustomFunction["💻 Custom Function
Python Logic"] - end - end - - %% ─────────────────────────── Session Management ───────────────────────────── - subgraph "📦 Session Management" - Queue["📥 Agent-Agnostic Queue"] - Redis["💾 Redis State
Live Context"] - Manager["🧠 Conversation Manager
Dynamic Agent Assignment"] - end - - %% ─────────────────────────── Flow Connections ───────────────────────────── - Phone <--> ACS - WebMobile <--> ACS - ACS --> Speech - Speech --> WebSocket - WebSocket --> Orchestrator - - Orchestrator --> AgentRegistry - AgentRegistry --> AgentInterface - AgentInterface --> AIHub - - AIHub --> FNOL - AIHub --> Healthcare - AIHub --> Legal - AIHub --> Support - AIHub --> NAgents - AIHub --> TaskRouter - - TaskRouter --> Queue - Queue --> Speech - - Orchestrator --> Manager - Manager --> Redis - ShortTermMemory --> Redis - - %% Tools & Memory Used by Custom Agents - NAgents --> ShortTermMemory - NAgents --> LongTermMemory - NAgents --> FetchData - NAgents --> SyncAction - NAgents --> CustomFunction - - %% ─────────────────────────── Styles ───────────────────────────── - classDef user fill:#4CAF50,stroke:#2E7D32,stroke-width:3px,color:#FFFFFF - classDef bridge fill:#2196F3,stroke:#1565C0,stroke-width:3px,color:#FFFFFF - classDef process fill:#FF9800,stroke:#E65100,stroke-width:3px,color:#FFFFFF - classDef agent fill:#9C27B0,stroke:#6A1B9A,stroke-width:3px,color:#FFFFFF - classDef infra fill:#F44336,stroke:#C62828,stroke-width:3px,color:#FFFFFF - - class Phone,WebMobile user - class ACS,Speech bridge - class WebSocket,Orchestrator process - class FNOL,Healthcare,Legal,Support,NAgents,AIHub,O1,GPT4o,GPT4oMini,TaskRouter agent - class Queue,Redis,Manager infra -``` - ---- - -## Component Mapping & Integration - -This section maps the production Azure infrastructure to the logical application components, showing how the enterprise deployment supports the modular agent architecture. - -### Infrastructure-to-Logic Mapping - -| **Production Component** | **Logical Component** | **Integration Pattern** | **Scalability Model** | -|-------------------------|----------------------|------------------------|----------------------| -| **Azure App Gateway + WAF** | User Interface Layer | HTTP/HTTPS ingress, SSL termination | Horizontal scaling with health probes | -| **Azure Communication Services** | Communication Bridge | WebRTC, PSTN integration | Auto-scaling based on concurrent calls | -| **Container Apps (ARTAgent)** | Agent Network | Microservices with REST APIs | KEDA-based auto-scaling | -| **Azure OpenAI Service** | AI Model Hub | HTTP API with managed identity | Request-based throttling and quotas | -| **Azure Redis Cache** | Session Management | In-memory state with TTL | Cluster mode with read replicas | -| **Azure Cosmos DB** | Persistent Storage | NoSQL with global distribution | Provisioned or serverless throughput | -| **Azure Speech Services** | STT/TTS Processing | REST API with streaming | Concurrent request scaling | -| **Event Grid** | Event Processing | Pub/sub with guaranteed delivery | Topic-based fan-out scaling | - -### Integration Patterns - -#### **🔄 State Synchronization** -- **Redis**: Real-time session state shared across all agent instances -- **Cosmos DB**: Persistent conversation history and user profiles -- **Event Grid**: Asynchronous state change notifications - -#### **🎯 Dynamic Agent Routing** -- **Agent Registry**: Container Apps self-register with health endpoints -- **Load Balancer**: App Gateway routes based on agent availability -- **Session Affinity**: Redis maintains agent-to-session mapping - -#### **🔐 Security Integration** -- **Managed Identity**: All Azure services use credential-less authentication -- **Private Endpoints**: No public internet exposure for backend services -- **API Management**: Centralized authentication and rate limiting - ---- - -## Event-Driven Architecture & Data Flow - -The system operates on an event-driven architecture that enables real-time processing and seamless integration with external systems. - -### Real-Time Event Processing Flow - -```mermaid -flowchart TD - -%% ──────────────────────────────── User Interface ─────────────────────────── -subgraph "👥 User Interface Layer" - Phone["📞 Phone Calls
Inbound/Outbound"] - WebMobile["📱💻 Web & Mobile Apps
Real-time Voice Interface"] -end - -%% ─────────────────────────── Azure Communication Bridge ────────────────────── -subgraph "⚡ Azure Communication Bridge" - ACS["🔗 Azure Communication Services
Voice & Media Management"] - Speech["🗣️ Azure Speech Services
STT ↔ TTS Processing"] -end - -%% ────────────────────────────── AI Processing ────────────────────────────── -subgraph "🧠 AI Processing Engine" - WebSocket["⚡ Real-time WebSocket
Transcription Stream"] - Orchestrator["🎯 Intent Orchestrator
Smart Agent Routing
🔌 Dynamic Agent Registration"] -end - -%% ─────────────────────────── Modular Agent Network ───────────────────────── -subgraph "🤖 Modular Agent Network" - - %% ── Core Agent Framework - subgraph "🔌 Core Agent Framework" - AgentRegistry["📋 Agent Registry
🔄 Hot-Swappable Components
📊 Health Monitoring"] - AgentInterface["🔗 Standardized Agent Interface
🏗️ Common API Contract
⚙️ Plugin Architecture"] - AgentInterface --> AgentRegistry - end - - %% ── AI Model Hub (parent container for the two branches) - subgraph "🧠 AI Model Hub" - direction TB - - %% ── Branch 1: Industry-Specific Agents - subgraph "🏢 Industry-Specific Agents" - Auth["🔐 Auth Agent
🔌 Pluggable"] - FNOL["📋 FNOL Agent
🔌 Pluggable"] - Healthcare["🏥 Healthcare Agent
🔌 Pluggable"] - Legal["⚖️ Legal Agent
🔌 Pluggable"] - Support["🛠️ Support Agent
🔌 Pluggable"] - NAgents["💬 Custom Agents
🔌 Unlimited Extensibility"] - end - - %% ── Branch 2: Model-Specific Execution - subgraph "🧬 Model-Specific Execution" - TaskRouter{🎯 Model Router} - TaskRouter -->|Complex Reasoning| O1["🔬 o1-preview
Advanced Analytics"] - TaskRouter -->|Speed Critical| GPT4o["⚡ GPT-4o
Real-time Response"] - TaskRouter -->|Cost Efficient| GPT4oMini["💰 GPT-4o-mini
Standard Queries"] - end - end -end - -%% ───────────────────────────── Session Management ──────────────────────────── -subgraph "🏗️ Session Management" - Queue["📥 Message Queue
🔌 Agent-Agnostic Routing"] - Redis["💾 Redis State
📊 Real-time Session Data"] - Manager["🔄 Conversation Manager
🎯 Dynamic Agent Assignment"] -end - -%% ──────────────────────────── Flow Connections ───────────────────────────── -Phone <--> ACS -WebMobile <--> ACS -ACS <--> Speech -Speech <--> WebSocket -WebSocket <--> Orchestrator - -Orchestrator <--> AgentRegistry %% Orchestrator sees live registry status - -%% Industry agents to router -Auth --> TaskRouter -FNOL --> TaskRouter -Healthcare --> TaskRouter -Legal --> TaskRouter -Support --> TaskRouter -NAgents --> TaskRouter - -%% Router to queue → TTS chain -TaskRouter --> Queue -Queue --> Speech - -%% State management links -Orchestrator <--> Manager -Manager <--> Redis - -%% ────────────────────────────── Styling ──────────────────────────────────── -classDef user fill:#4CAF50,stroke:#2E7D32,stroke-width:3px,color:#FFFFFF -classDef bridge fill:#2196F3,stroke:#1565C0,stroke-width:3px,color:#FFFFFF -classDef proc fill:#FF9800,stroke:#E65100,stroke-width:3px,color:#FFFFFF -classDef agent fill:#9C27B0,stroke:#6A1B9A,stroke-width:3px,color:#FFFFFF -classDef infra fill:#F44336,stroke:#C62828,stroke-width:3px,color:#FFFFFF - -class Phone,WebMobile user -class ACS,Speech bridge -class WebSocket,Orchestrator proc -class Auth,FNOL,Healthcare,Legal,Support,NAgents,O1,GPT4o,GPT4oMini,TaskRouter agent -class Queue,Redis,Manager infra -``` - -### Key Event Types - -| **Event Type** | **Trigger** | **Payload** | **Subscribers** | -|---------------|-------------|-------------|-----------------| -| `CALL_STARTED` | ACS call connection | Session ID, caller info | Analytics, logging, resource allocation | -| `PARTICIPANT_JOINED` | New participant | Participant details | Authentication, notifications | -| `TRANSCRIPTION_RECEIVED` | STT completion | Text, confidence, timestamp | AI processing, storage | -| `AGENT_RESPONSE_GENERATED` | AI completion | Response text, agent type | TTS, analytics | -| `RECORDING_STARTED` | Call recording begins | Recording ID, metadata | Compliance, storage | -| `ERROR_OCCURRED` | System errors | Error details, context | Alerting, diagnostics | -| `CALL_ENDED` | Call termination | Duration, outcome | Billing, analytics, cleanup | - ---- - -## State Management & Data Persistence - -The architecture implements a multi-tiered data strategy optimized for real-time performance and long-term persistence. - -### Data Storage Strategy - -| **Data Type** | **Storage** | **Shared** | **Persistence** | **Use Case** | **TTL/Retention** | -|---------------|:-----------:|:----------:|:---------------:|--------------|:------------------:| -| **Session Metadata** | 🗄️ Cosmos DB | ✅ Yes | 📅 Long-term | Call history, compliance | 7 years | -| **Active Session State** | 🔄 Redis | ✅ Yes | ⏱️ Short-term | Real-time conversation | 24 hours | -| **Transcription History** | 🗄️ Cosmos DB | ✅ Yes | 📅 Long-term | Audit, training data | 7 years | -| **Live Transcription** | 🔄 Redis | ✅ Yes | ⏱️ Real-time | Active conversation | 1 hour | -| **Agent Performance** | 🗄️ Cosmos DB | ✅ Yes | 📅 Long-term | Analytics, optimization | 2 years | -| **User Profiles** | 🗄️ Cosmos DB | ✅ Yes | 📅 Long-term | Personalization | Indefinite | -| **WebSocket Connections** | 💾 In-Memory | ❌ No | ⚡ Process-bound | Real-time communication | Process lifetime | -| **Audio Streams** | 💾 In-Memory | ❌ No | ⚡ Process-bound | STT processing | Stream duration | -| **Conversation Context** | 🔄 Redis | ✅ Yes | ⏱️ Short-term | AI grounding | Session + 1 hour | - -### Cross-Worker State Management - -```mermaid -graph LR - subgraph "Worker A Process" - WA_Memory["🧠 In-Memory
• WebSocket
• Audio Stream
• AsyncIO Tasks"] - WA_Redis["🔄 Redis Access
• Session State
• Conversation History
• Agent Assignment"] - end - - subgraph "Worker B Process" - WB_Memory["🧠 In-Memory
• WebSocket
• Audio Stream
• AsyncIO Tasks"] - WB_Redis["🔄 Redis Access
• Session State
• Conversation History
• Agent Assignment"] - end - - subgraph "Shared State Layer" - Redis["💾 Redis Cluster
• Cross-worker coordination
• Session persistence
• Real-time updates"] - Cosmos["🗄️ Cosmos DB
• Long-term storage
• Analytics
• Compliance"] - end - - WA_Redis <--> Redis - WB_Redis <--> Redis - Redis --> Cosmos - - classDef worker fill:#e3f2fd,stroke:#1976d2,stroke-width:2px - classDef shared fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px - - class WA_Memory,WB_Memory,WA_Redis,WB_Redis worker - class Redis,Cosmos shared -``` - -### Key Design Principles - -#### **🔄 Eventual Consistency** -- Redis provides immediate consistency for active sessions -- Cosmos DB ensures eventual consistency for historical data -- Event Grid guarantees at-least-once delivery for state changes - -#### **⚡ Performance Optimization** -- Hot data (active sessions) stored in Redis for sub-millisecond access -- Cold data (historical records) stored in Cosmos DB with query optimization -- In-memory objects for process-bound, real-time operations - -#### **🛡️ Fault Tolerance** -- Redis cluster with read replicas for high availability -- Cosmos DB multi-region distribution with automatic failover -- Graceful degradation when shared state is temporarily unavailable - ---- - -## Key Architectural Advantages - -### 🔌 **Enterprise-Grade Modularity** - -#### **Plug-and-Play Agent Ecosystem** -- **🏗️ Standardized Interface**: Common API contract ensures seamless agent integration -- **📋 Dynamic Discovery**: Agents self-register with capability metadata for automatic routing -- **🔄 Hot-Swapping**: Deploy new agents without system downtime or session interruption -- **⚙️ Configuration-Driven**: Add industry-specific agents through YAML configuration changes - -#### **Channel-Agnostic Communication** -- **🔌 Unified Hub**: ACS abstracts communication protocols (voice, SMS, Teams, WhatsApp) -- **📡 Event-Driven**: New channels integrate through existing event processing pipeline -- **🌐 Protocol Independence**: Voice, text, and video flow through standardized interfaces - -### ⚡ **Zero-Friction Scalability** - -#### **Azure-Native Auto-Scaling** -- **🏗️ Container Apps**: Built-in KEDA scaling based on HTTP requests and queue depth -- **⚖️ Intelligent Load Balancing**: Application Gateway with health probes and session affinity -- **📈 Predictive Scaling**: Azure Monitor integration for pattern-based resource allocation - -#### **Microservices Excellence** -- **🔄 Service Mesh Ready**: Istio support for advanced traffic management and observability -- **🚀 Zero-Downtime Deployments**: Blue-green deployments with Container Apps revisions -- **🔐 Workload Identity**: Managed identities for credential-less Azure service access -- **📊 Distributed Telemetry**: End-to-end tracing with Application Insights - -### 🚀 **Future-Proof Design** - -#### **Technology-Agnostic Foundation** -- **🧠 Model Router**: Intelligent routing across GPT, Claude, Gemini based on performance and cost -- **🔌 Cloud-Agnostic**: Vendor-neutral design supporting hybrid and multi-cloud deployments -- **📈 Unlimited Extensibility**: Support for unlimited agent types with specialized knowledge domains - -> **Note**: Model routing capability uses simple YAML configurations mapping agents/tasks to specific models. Advanced routing features are planned for future implementation. - -#### **Adaptive Intelligence** -- **🎯 Context-Aware Routing**: Automatic agent selection based on detected intent and user context -- **📊 Performance Optimization**: Continuous optimization based on success rates and satisfaction metrics -- **🔄 Machine Learning Integration**: Agent performance data feeds back into routing algorithms - ---- - -This architecture documentation provides a comprehensive view from enterprise deployment to detailed implementation, enabling both infrastructure teams and developers to understand how the production Azure environment supports the modular, real-time AI agent system. diff --git a/docs/IntegrationPoints.md b/docs/IntegrationPoints.md deleted file mode 100644 index 6e2c2e06..00000000 --- a/docs/IntegrationPoints.md +++ /dev/null @@ -1,355 +0,0 @@ -# Integration Points: Adopting FastAPI Azure Voice Backend with AWS - -> **Disclaimer:** The service mappings, API flows, and integration patterns in this document are based on current AWS and Azure documentation as of June 2025. Actual compatibility and performance characteristics may vary. These integrations should be validated in a proof-of-concept environment and reviewed against the latest provider documentation for updates or breaking changes. - -## Table of Contents - -1. [Overview](#overview) -2. [Integration Scenarios](#integration-scenarios) - - [2.1 AWS Connect with Azure Voice Backend](#21-aws-connect-with-azure-voice-backend) - - [2.2 AWS Connect to Azure Communication Services](#22-aws-connect-to-azure-communication-services) - - [2.3 Cross-Cloud General Considerations](#23-cross-cloud-general-considerations) -3. [Service Mapping & Interchangeability](#service-mapping--interchangeability) -4. [LLM Integration Patterns](#llm-integration-patterns) -5. [Validation & Testing](#validation--testing) - ---- - -## 1. Overview - -This document outlines key integration points and scenarios for adopting the FastAPI-based Azure Voice Agent backend into existing AWS-based client environments, including AWS Connect and telephony backends. - ---- - -## 2. Integration Scenarios - -### 2.1 AWS Connect with Azure Voice Backend - -**Scenario:** Enterprise uses AWS Connect for contact center telephony and wants to leverage Azure's real-time voice agent backend for advanced speech, transcription, or LLM-powered workflows. - -**Integration Points:** - -- **API Gateway:** - - Expose FastAPI endpoints (e.g., `/api/call`, `/call/stream`) via public API Gateway (AWS API Gateway or Azure API Management) - - Secure endpoints with OAuth2/JWT or AWS IAM roles - -- **AWS Lambda / Step Functions:** - - Use Lambda functions to invoke FastAPI endpoints for call events, transcription, or agent actions - - Pass call metadata (session/correlation IDs) for traceability - -- **WebSocket Streaming:** - - AWS Connect streams audio to Lambda or Kinesis; adapt to forward PCM audio frames to FastAPI `/call/stream` WebSocket endpoint - - Use AWS SDKs or custom connectors to bridge audio streams - -- **State Management:** - - Use Redis for ephemeral session state, accessible from both AWS and Azure (via VPC peering or managed Redis) - -- **Event Handling:** - - Integrate AWS EventBridge or SNS/SQS with FastAPI's event endpoints for cross-cloud event propagation - -### 2.2 AWS Connect to Azure Communication Services - -**Scenario:** AWS Connect is the entry point for inbound calls, but calls need to be routed to Azure Communication Services (ACS) for advanced processing or agent handoff. - -#### 2.2.1 High-Level Support Matrix - -| Capability | Supported? | Where It Runs | Notes | -|------------|-----------|---------------|-------| -| SIP trunk hand-off | **Yes** | AWS Connect → SBC → ACS | External Voice Transfer Connector (Connect) ⟷ Direct Routing (ACS) | -| Keep existing IVR | **Yes** | AWS Connect | Only the live agent leg is transferred to ACS | -| Dual call recording | **Yes** | Both | Correlate Connect ContactId ↔ ACS serverCallId | -| WebSocket media stream | **Yes (ACS)** | ACS | Bidirectional PCM frames for ASR/TTS/Bot | -| Live media from Connect | **Yes (Kinesis)** | Amazon Connect | Use Live Media Streaming block → Kinesis | -| Chat / messaging federation | **No** | — | Separate SDKs, build your own bridge | -| Unified agent desktop | **Not OOTB** | — | Build/buy custom desktop for CCP + ACS | -| Cross-platform metrics | **Custom** | — | Pipe EventBridge + Azure Monitor → BI | - -#### 2.2.2 Reference Architecture - -```mermaid -flowchart TB - %% Define styles - classDef awsStyle fill:#FF9900,stroke:#232F3E,stroke-width:2px,color:#fff - classDef azureStyle fill:#0078D4,stroke:#0053A0,stroke-width:2px,color:#fff - classDef sbcStyle fill:#9C27B0,stroke:#4A148C,stroke-width:2px,color:#fff - classDef dataStyle fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff - classDef eventStyle fill:#FF5722,stroke:#BF360C,stroke-width:2px,color:#fff - - %% Caller - Caller["👤 Caller"] - - %% AWS Services - subgraph AWS ["🔶 Amazon Web Services"] - direction TB - Connect["📞 AWS Connect
Contact Center & IVR"] - EVTC["🔄 External Voice Transfer
Connector"] - EventBridge["📊 EventBridge
Event Routing"] - S3["🗄️ S3
Call Recording"] - - Connect --> EVTC - Connect --> EventBridge - Connect -.->|"Recording"| S3 - end - - %% Session Border Controller - subgraph SBCCloud ["🔐 Session Border Controller"] - SBC["🛡️ Certified SBC
Chime Voice Connector +
ACS Direct Routing
"] - end - - %% Azure Services - subgraph Azure ["☁️ Microsoft Azure"] - direction TB - ACS["📱 Communication Services
Call Automation & DID"] - BotService["🤖 Voice Agent Backend
ASR/TTS/LLM Processing"] - EventGrid["🔀 Event Grid
Event Fanout Hub"] - - %% Parallel Event Consumers - subgraph EventConsumers ["⚡ Parallel Event Processing"] - RecordingProcessor["🎙️ Recording Processor
Blob Storage Handler"] - AnalyticsProcessor["📊 Analytics Processor
Real-time Metrics"] - AuditProcessor["📋 Audit Processor
Compliance Logging"] - NotificationProcessor["🔔 Notification Processor
Alerts & Webhooks"] - end - - BlobStorage["🗄️ Blob Storage
Call Recording"] - Dashboard["📈 Dashboard
Analytics & BI"] - - ACS --> EventGrid - EventGrid --> RecordingProcessor - EventGrid --> AnalyticsProcessor - EventGrid --> AuditProcessor - EventGrid --> NotificationProcessor - - RecordingProcessor --> BlobStorage - AnalyticsProcessor --> Dashboard - end - - %% Call Flow - Caller -->|"📞 Inbound Call"| Connect - EVTC -->|"🔗 SIP/TLS + SRTP"| SBC - SBC -->|"📡 Direct Routing"| ACS - ACS -->|"🎵 Bidirectional
WebSocket PCM"| BotService - - %% Event Flow - EventBridge -->|"📤 HTTPS Target"| EventGrid - - %% Response Flow - BotService -.->|"🧠 AI Response"| ACS - ACS -.->|"📞 Audio Response"| SBC - SBC -.->|"🔊 Agent Output"| EVTC - EVTC -.->|"📢 To Caller"| Connect - - %% Apply styles - class Connect,EVTC,EventBridge,S3 awsStyle - class ACS,BotService,EventGrid,RecordingProcessor,AnalyticsProcessor,AuditProcessor,NotificationProcessor,BlobStorage,Dashboard azureStyle - class SBC sbcStyle - class Caller dataStyle - class EventGrid eventStyle -``` - -## Call Flow Documentation: AWS Connect to Azure Communication Services Integration - -This sequence diagram illustrates the complete call flow for escalating a customer interaction from an AI voice agent to a human agent across different cloud platforms. - -### Flow Overview - -The integration demonstrates a sophisticated call routing mechanism that seamlessly transitions between automated AI assistance and human agent support using multiple cloud services and telephony components. - -### Key Components - -- **AWS Connect**: Primary contact center platform handling initial customer calls and final agent queuing -- **SIP SBC (Session Border Controller)**: Optional intermediary component for SIP protocol translation and security -- **Azure Communication Services (ACS)**: Microsoft's communication platform hosting the AI voice agent -- **Bot Backend**: AI engine combining FastAPI web framework, speech processing, and GPT language models -- **AWS Connect Agent Queue**: Human agent pool for escalated interactions - -### Detailed Flow Steps - -1. **Initial Contact**: Customer initiates call to AWS Connect contact center -2. **Transfer Decision**: AWS Connect routes call to AI agent via SIP transfer to ACS endpoint -3. **Protocol Bridging**: SBC facilitates SIP communication between AWS and Azure ecosystems -4. **AI Engagement**: ACS establishes real-time media streaming with bot backend for voice processing -5. **Interactive AI Session**: Bot provides intelligent responses with interrupt handling capabilities -6. **Escalation Trigger**: AI determines need for human intervention based on conversation context -7. **Seamless Handoff**: ACS initiates SIP REFER/INVITE to route call back to AWS Connect -8. **Agent Connection**: Call successfully transferred to human agent queue for resolution - -### Technical Benefits - -- **Cross-cloud integration** enables leveraging best-of-breed services from multiple providers -- **Real-time media streaming** ensures natural conversation flow without noticeable delays -- **Intelligent escalation** preserves conversation context during human handoff -- **SIP protocol compatibility** ensures standard telephony interoperability -```mermaid - sequenceDiagram - participant Caller as 👤 Caller - participant Connect as 🔶 AWS Connect - participant SBC as 🛡️ SIP Bridge - participant ACS as ☁️ Azure ACS - participant Bot as 🤖 Voice Agent - participant Agent as 👥 Human Agent - - Note over Caller,Agent: Cross-Cloud Voice Integration - - Caller->>Connect: 📞 Call - Connect->>SBC: 🔄 Transfer to AI - SBC->>ACS: 📡 Route call - ACS->>Bot: 🎵 Stream audio - - loop AI Conversation - Bot->>ACS: 🗣️ Voice response - end - - Bot->>ACS: 🚨 Escalate to human - ACS->>SBC: 🔀 Transfer back - SBC->>Connect: 📞 Route to agent - Connect->>Agent: 👨‍💼 Connect agent - Agent-->>Caller: 🤝 Human takeover -``` - - -#### 2.2.3 Key Integration Steps - -**🔧 Infrastructure Setup** -1. Deploy certified SBC for both Amazon Chime Voice Connector and ACS Direct Routing -2. Configure AWS Connect External Voice Transfer Connector -3. Register ACS Direct Routing with the SBC - -**🎵 Real-Time Media Processing** -4. Initialize Voice Agent Processing with ACS Call Automation API -5. Establish bidirectional WebSocket media stream to voice agent backend -6. Configure PCM audio streaming for ASR/TTS processing - -**🎙️ Optional: Dual Recording Setup** -5. Configure parallel call recording: - - **AWS Connect**: `StartContactRecording` → S3 bucket - - **Azure ACS**: `StartRecording` API → Blob Storage - - Correlate recordings using shared session/contact identifiers - -**🔍 Validation Checklist** -- [ ] End-to-end SIP trunk connectivity between AWS Connect and ACS -- [ ] Bidirectional audio quality and latency testing (<500ms target) -- [ ] Event correlation and session state synchronization -- [ ] Recording file correlation and compliance validation -- [ ] Failover and error handling scenarios - -### 2.3 Cross-Cloud General Considerations - -- **Authentication & Security:** - - Use mutual TLS, signed JWTs, or cross-cloud identity federation - - Implement secure API/WebSocket access patterns - -- **Network Connectivity:** - - Ensure low-latency, secure connectivity between AWS and Azure - - Consider VPN, ExpressRoute, or VPC peering options - -- **Monitoring & Logging:** - - Centralize logs and metrics (Azure Monitor, AWS CloudWatch, or third-party SIEM) - - Implement unified observability across clouds - -- **Compliance:** - - Ensure data residency and compliance requirements are met - - Validate cross-cloud data transfer policies - ---- - -## 3. Service Mapping & Interchangeability - - -![AWS Service Mapping Architecture](assets/RTAudio_AWSMapped.png) - - -| Azure Service | AWS Equivalent | Interchangeable? | Notes | -|---------------|----------------|------------------|-------| -| Azure Communication Services (ACS) | Amazon Chime SDK, Connect | Partial | AWS Connect is managed contact center; Chime SDK offers programmable voice/video. SIP trunking can bridge both. | -| Azure Redis Cache | Amazon ElastiCache (Redis) | Yes | Both offer managed Redis. Session state and pub/sub patterns are portable. | -| Azure App Service / Container Apps | AWS Elastic Beanstalk, ECS | Yes | Both support containerized and web app hosting. | -| Azure API Management (APIM) | AWS API Gateway | Yes | Both provide API gateway, security, and throttling. | -| Azure Event Grid | Amazon EventBridge, SNS/SQS | Yes | Event-driven integration is possible with both. | -| Azure Monitor / Log Analytics | AWS CloudWatch, X-Ray | Yes | Centralized logging and monitoring. | -| Azure Cognitive Services (Speech) | Amazon Transcribe, Polly | Partial | Both offer STT/TTS, but APIs and models differ. Real-time streaming may require adaptation. | -| Azure Cosmos DB | Amazon DynamoDB, DocumentDB | Yes | Both are NoSQL/document DBs. | -| Azure Blob Storage | Amazon S3 | Yes | Both are object storage. | -| Azure Virtual Network (VNet) | Amazon VPC | Yes | Both provide private networking. | -| Azure ExpressRoute/VPN Gateway | AWS Direct Connect/VPN | Yes | For cross-cloud secure connectivity. | -| Azure Application Gateway | AWS Application Load Balancer | Yes | Both provide Layer 7 load balancing, SSL termination, and WAF. | -| Azure Private DNS Zone | AWS Route 53 Private Hosted Zone | Yes | Both provide private DNS resolution within virtual networks. | -| Azure AI Foundry | AWS SageMaker Studio | Partial | Both provide MLOps platforms. AI Foundry focuses on responsible AI and evaluation workflows. | -| Azure AI Studio | AWS Bedrock Studio | Partial | Both offer no-code/low-code AI model interaction interfaces. | -| Azure Key Vault | AWS Secrets Manager, Parameter Store | Yes | Both provide secure secret storage. Key Vault supports certificates; Secrets Manager focuses on rotation. | - -**General Guidance:** -- Most infrastructure and data services are interchangeable with configuration and API adaptation -- Voice/telephony and real-time media services may require significant integration work -- LLM services (Azure OpenAI vs. Amazon Bedrock) may need prompt/response pattern adaptation - ---- - -## 4. LLM Integration Patterns - -**Scenario:** FastAPI backend needs to invoke LLMs for chat intelligence, summarization, or agent decision-making. - -### 4.1 Provider Integration Points - -**Azure OpenAI Service:** -- Use `azure-ai` Python SDK or REST API for deployed models (e.g., `gpt-4o-mini`) -- Secure with managed identity or service principal from Azure Key Vault -- Endpoint: `POST https://{your-resource}.openai.azure.com/openai/deployments/{deployment}/completions` - -**AWS Bedrock / SageMaker:** -- Route LLM requests via AWS Lambda or direct SDK calls -- Secure with IAM roles and AWS SDK credentials - -**Other LLM Providers:** -- Support pluggable adapter pattern for OpenAI, Cohere, Anthropic, or self-hosted models -- Consider LangChain abstractions for provider flexibility - -### 4.2 LLM Evaluation & Fine-Tuning - -**Azure AI Foundry Integration:** -- Comprehensive evaluation metrics (safety, coherence, relevance, fluency) -- Built-in evaluation flows for A/B testing prompts, models, and configurations -- Use Azure AI Foundry's evaluation SDK: `from azure.ai.evaluation import evaluate_conversation` - -**AWS vs Azure Evaluation Support:** -- **Azure**: AI Foundry offers end-to-end evaluation pipelines with visual dashboards -- **AWS**: Bedrock provides model evaluation but requires more manual setup for conversation metrics -- Azure's evaluation tooling is more mature for chat AI scenarios - -**Iterative Improvement Workflow:** -- Implement evaluation checkpoints in FastAPI endpoints -- Use session-based evaluation data for prompt improvements -- Set up automated evaluation pipelines triggered by deployment - -### 4.3 Design Considerations - -- Abstract provider-specific logic behind common interface in `handlers/llm` module -- Manage prompt templates and rate limits -- Ensure chat context (session_id) is passed for continuity -- Implement retry and fallback strategies - ---- - -## 5. Validation & Testing - -**Proof-of-Concept Validation:** -- Validate prompt/response formats and latency for each provider under real-world load -- Benchmark throughput and error rates -- Test cross-cloud connectivity and security -- Validate compliance and data residency requirements - -**Performance Testing:** -- End-to-end latency testing (<500ms target for voice applications) -- Load testing for concurrent sessions -- Failover and error handling scenarios -- Audio quality validation for media streaming - -**Security Testing:** -- Cross-cloud authentication and authorization -- Network security and encryption validation -- API security and rate limiting -- Compliance audit trail verification - ---- - -For further details, see backend API documentation and architecture diagrams. diff --git a/docs/LoadTesting.md b/docs/LoadTesting.md deleted file mode 100644 index b834ce9b..00000000 --- a/docs/LoadTesting.md +++ /dev/null @@ -1,902 +0,0 @@ -# 🚀 Load Testing Strategy for Real-Time Call Center Automation - -> **WORK IN PROGRESS** - -## 📋 Overview - -This document outlines comprehensive load testing strategies for the real-time voice agent system, focusing on Azure-native solutions and testing frameworks that can validate performance at call center scale. The system aims to handle real-time voice processing with sub-100ms latency requirements across multiple Azure services. - ---- - -## 🎯 Testing Objectives - -### Performance Targets -| Metric | Target | Critical Threshold | -|--------|--------|--------------------| -| **Call Setup Latency** | <2 seconds | <5 seconds | -| **Voice-to-Response Latency** | <150ms (STT→LLM→TTS) | <300ms | -| **WebSocket Message Latency** | <50ms | <100ms | -| **Concurrent Calls** | 1,000+ | 500 minimum | -| **Call Success Rate** | >99.5% | >95% | -| **Audio Quality** | MOS >4.0 | MOS >3.5 | - -### Scale Requirements -- **Peak Load:** 10,000 concurrent calls -- **Daily Volume:** 100,000+ calls -- **Geographic Distribution:** Multi-region (eastus2, westus2, westeurope) -- **Agent Response Accuracy:** >95% intent recognition - ---- - -## 🏗️ Architecture Components to Test - -### Core Components -```mermaid -graph TB - subgraph "Load Testing Scope" - LB[Azure Load Balancer/Front Door] - API[FastAPI Backend] - WS[WebSocket Handlers] - ACS[Azure Communication Services] - Redis[Azure Redis Cache] - Speech[Azure Speech Services] - OpenAI[Azure OpenAI] - EventGrid[Azure Event Grid] - Storage[Azure Storage/Blob] - end - - LB --> API - API --> WS - API --> ACS - API --> Redis - WS --> Speech - Speech --> OpenAI - API --> EventGrid - EventGrid --> Storage -``` - ---- - -## 🛠️ Testing Approaches - -## 1. Azure-Native Load Testing Solutions - -### Azure Load Testing (Recommended) -**Best for:** End-to-end HTTP/WebSocket testing with Azure integration - -```yaml -# azure-load-test.yaml -testName: "RTMedAgent-Peak-Load" -engineInstances: 10 -testPlan: - - name: "call-initialization" - threadGroups: - - threads: 100 - rampUp: 60s - duration: 300s - scenarios: - - name: "start-call" - requests: - - url: "${API_BASE_URL}/api/v1/calls/start" - method: POST - headers: - Content-Type: application/json - body: | - { - "agent_type": "medical", - "caller_number": "+1${__Random(1000000000,9999999999)}", - "language": "en-US" - } - - name: "websocket-connection" - websocket: - url: "wss://${API_BASE_URL}/ws/call/{call_id}" - duration: 120s - messageRate: 10/s -``` - -**Implementation Steps:** -1. **Setup Azure Load Testing Resource** - ```bash - az load create \ - --name "rtmedagent-loadtest" \ - --resource-group "rg-rtmedagent-prod" \ - --location "eastus2" - ``` - -2. **Configure Test Parameters** - - VNet injection for private endpoint testing - - Managed identity for Azure service authentication - - Custom metrics collection for business KPIs - -3. **Integrate with CI/CD Pipeline** - ```yaml - # azure-pipelines.yml - - task: AzureLoadTest@1 - inputs: - azureSubscription: $(serviceConnection) - loadTestConfigFile: 'tests/load/azure-load-test.yaml' - loadTestResource: 'rtmedagent-loadtest' - resourceGroup: 'rg-rtmedagent-prod' - ``` - -### Azure Container Instances (ACI) + Custom Testing -**Best for:** Distributed testing with custom protocols - -```python -# distributed_load_test.py -import asyncio -import aiohttp -import websockets -from azure.identity import DefaultAzureCredential -from azure.communication.callautomation import CallAutomationClient - -class RTMedAgentLoadTester: - def __init__(self, base_url: str, acs_endpoint: str): - self.base_url = base_url - self.acs_client = CallAutomationClient( - acs_endpoint, - DefaultAzureCredential() - ) - self.session = None - - async def simulate_call_flow(self, call_id: str): - """Simulate complete call lifecycle""" - try: - # 1. Start call session - async with aiohttp.ClientSession() as session: - start_time = asyncio.get_event_loop().time() - - # Initialize call - async with session.post( - f"{self.base_url}/api/v1/calls/start", - json={ - "agent_type": "medical", - "caller_number": f"+1555{call_id:07d}", - "language": "en-US" - } - ) as resp: - call_data = await resp.json() - session_id = call_data["session_id"] - - # 2. WebSocket connection - ws_uri = f"wss://{self.base_url}/ws/call/{session_id}" - async with websockets.connect(ws_uri) as websocket: - # 3. Simulate audio streaming - await self.simulate_audio_interaction(websocket) - - # 4. Measure response latency - response_time = asyncio.get_event_loop().time() - start_time - - return { - "call_id": call_id, - "session_id": session_id, - "response_time": response_time, - "success": True - } - - except Exception as e: - return { - "call_id": call_id, - "error": str(e), - "success": False - } - - async def simulate_audio_interaction(self, websocket): - """Simulate realistic voice interaction patterns""" - # Send audio chunk simulation - await websocket.send(json.dumps({ - "type": "audio_chunk", - "data": self.generate_mock_audio_data(), - "timestamp": time.time() - })) - - # Wait for STT response - stt_response = await websocket.recv() - - # Wait for LLM + TTS response - tts_response = await websocket.recv() - - return json.loads(tts_response) -``` - ---- - -## 2. Specialized Testing Frameworks - -### Artillery.io for WebSocket & HTTP Load Testing -**Best for:** Real-time WebSocket performance testing - -```yaml -# artillery-test.yml -config: - target: 'https://rtmedagent-api.azurewebsites.net' - phases: - - duration: 300 - arrivalRate: 50 - name: "Ramp up" - - duration: 600 - arrivalRate: 100 - name: "Sustained load" - ws: - engine: ws - plugins: - azure-metrics: - endpoint: "{{ $environment.AZURE_MONITOR_ENDPOINT }}" - -scenarios: - - name: "Complete Call Flow" - weight: 80 - engine: http - flow: - - post: - url: "/api/v1/calls/start" - json: - agent_type: "medical" - caller_number: "+1{{ $randomInt(1000000000, 9999999999) }}" - language: "en-US" - capture: - - json: "$.session_id" - as: "sessionId" - - ws: - url: "/ws/call/{{ sessionId }}" - subprotocols: - - "call-protocol" - loop: - - send: - payload: | - { - "type": "audio_chunk", - "data": "{{ $randomString() }}", - "timestamp": {{ $timestamp() }} - } - - think: 2 - - send: - payload: | - { - "type": "end_speech" - } - - wait: - - for: "message" - match: - json: "$.type" - value: "agent_response" - - think: 3 - count: 10 -``` - -### Locust for Python-Native Testing -**Best for:** Complex business logic testing with Python ecosystem - -```python -# locust_test.py -from locust import HttpUser, task, between, events -from locust.contrib.fasthttp import FastHttpUser -import json -import asyncio -import websockets -import concurrent.futures - -class RTMedAgentUser(FastHttpUser): - wait_time = between(1, 3) - - def on_start(self): - """Initialize user session""" - response = self.client.post("/api/v1/calls/start", json={ - "agent_type": "medical", - "caller_number": f"+1555{self.environment.runner.user_count:07d}", - "language": "en-US" - }) - - if response.status_code == 200: - self.session_data = response.json() - self.session_id = self.session_data["session_id"] - else: - self.session_id = None - - @task(3) - def websocket_interaction(self): - """Test WebSocket voice interaction""" - if not self.session_id: - return - - # Use thread pool for WebSocket testing - with concurrent.futures.ThreadPoolExecutor() as executor: - future = executor.submit(self._ws_interaction) - try: - result = future.result(timeout=30) - events.request.fire( - request_type="WebSocket", - name="voice_interaction", - response_time=result["response_time"], - response_length=result["response_length"], - exception=None - ) - except Exception as e: - events.request.fire( - request_type="WebSocket", - name="voice_interaction", - response_time=0, - response_length=0, - exception=e - ) - - def _ws_interaction(self): - """Synchronous WebSocket interaction""" - import time - start_time = time.time() - - try: - # Simulate WebSocket interaction - # Note: Locust doesn't natively support WebSocket, - # so we use a separate connection - - return { - "response_time": (time.time() - start_time) * 1000, - "response_length": 256 - } - except Exception as e: - raise e - - @task(1) - def health_check(self): - """Basic health endpoint testing""" - self.client.get("/health") - - @task(1) - def metrics_endpoint(self): - """Test metrics collection""" - self.client.get(f"/api/v1/calls/{self.session_id}/metrics") -``` - ---- - -## 3. Azure Communication Services (ACS) Load Testing - -### ACS Call Automation Testing -**Focus:** Testing ACS calling capacity and voice quality - -```python -# acs_load_test.py -import asyncio -from azure.communication.callautomation import CallAutomationClient -from azure.communication.callautomation.models import ( - CallInvite, - CreateCallOptions, - RecognizeInputType, - DtmfOptions -) - -class ACSLoadTester: - def __init__(self, connection_string: str, callback_uri: str): - self.client = CallAutomationClient.from_connection_string( - connection_string - ) - self.callback_uri = callback_uri - - async def create_concurrent_calls(self, num_calls: int): - """Create multiple concurrent calls to test ACS capacity""" - tasks = [] - - for i in range(num_calls): - task = asyncio.create_task( - self.create_single_call(f"test-call-{i}") - ) - tasks.append(task) - - results = await asyncio.gather(*tasks, return_exceptions=True) - return results - - async def create_single_call(self, call_id: str): - """Create a single test call""" - try: - # Create outbound call - call_invite = CallInvite( - target=PhoneNumberIdentifier("+1555123456"), # Test number - source_caller_id_number=PhoneNumberIdentifier("+1555654321") - ) - - create_call_options = CreateCallOptions( - call_invites=[call_invite], - callback_uri=f"{self.callback_uri}?callId={call_id}" - ) - - call_result = self.client.create_call(create_call_options) - - # Simulate call interaction - await asyncio.sleep(2) # Wait for call establishment - - # Test recognition capabilities - recognize_options = RecognizeInputType.DTMF - dtmf_options = DtmfOptions( - max_tones_to_collect=1, - timeout_in_seconds=10 - ) - - # Hang up call - self.client.hang_up_call(call_result.call_connection_id) - - return { - "call_id": call_id, - "success": True, - "call_connection_id": call_result.call_connection_id - } - - except Exception as e: - return { - "call_id": call_id, - "success": False, - "error": str(e) - } -``` - ---- - -## 4. Component-Specific Testing - -### Redis Cache Performance Testing -```python -# redis_load_test.py -import asyncio -import aioredis -import time -from concurrent.futures import ThreadPoolExecutor - -class RedisLoadTester: - def __init__(self, redis_url: str): - self.redis_url = redis_url - - async def test_session_operations(self, num_sessions: int): - """Test Redis session storage under load""" - redis = aioredis.from_url(self.redis_url) - - tasks = [] - for i in range(num_sessions): - task = asyncio.create_task( - self.simulate_session_lifecycle(redis, f"session-{i}") - ) - tasks.append(task) - - results = await asyncio.gather(*tasks) - await redis.close() - return results - - async def simulate_session_lifecycle(self, redis, session_id: str): - """Simulate complete session data operations""" - start_time = time.time() - - try: - # Create session - session_data = { - "call_id": session_id, - "participant_count": 2, - "conversation_history": [], - "metadata": {"created_at": time.time()} - } - - await redis.hset(f"session:{session_id}", mapping=session_data) - - # Simulate conversation updates (20 messages) - for i in range(20): - conversation_update = { - f"message_{i}": f"User message {i}", - f"response_{i}": f"Agent response {i}" - } - await redis.hset( - f"session:{session_id}:conversation", - mapping=conversation_update - ) - - # Read session data - session = await redis.hgetall(f"session:{session_id}") - conversation = await redis.hgetall(f"session:{session_id}:conversation") - - # Cleanup - await redis.delete(f"session:{session_id}") - await redis.delete(f"session:{session_id}:conversation") - - return { - "session_id": session_id, - "duration": time.time() - start_time, - "success": True - } - - except Exception as e: - return { - "session_id": session_id, - "duration": time.time() - start_time, - "success": False, - "error": str(e) - } -``` - -### Azure OpenAI Load Testing -```python -# openai_load_test.py -import openai -import asyncio -import time -from azure.identity import DefaultAzureCredential - -class OpenAILoadTester: - def __init__(self, endpoint: str, deployment_name: str): - credential = DefaultAzureCredential() - token = credential.get_token("https://cognitiveservices.azure.com/.default") - - openai.api_type = "azure_ad" - openai.api_base = endpoint - openai.api_version = "2024-02-01" - openai.api_key = token.token - - self.deployment_name = deployment_name - - async def test_completion_throughput(self, num_requests: int): - """Test OpenAI completion throughput""" - tasks = [] - - for i in range(num_requests): - task = asyncio.create_task( - self.single_completion_test(f"request-{i}") - ) - tasks.append(task) - - results = await asyncio.gather(*tasks, return_exceptions=True) - return results - - async def single_completion_test(self, request_id: str): - """Single completion request test""" - start_time = time.time() - - try: - response = await openai.ChatCompletion.acreate( - engine=self.deployment_name, - messages=[ - {"role": "system", "content": "You are a helpful medical assistant."}, - {"role": "user", "content": "What are the symptoms of a common cold?"} - ], - max_tokens=150, - temperature=0.7 - ) - - return { - "request_id": request_id, - "response_time": time.time() - start_time, - "tokens_used": response.usage.total_tokens, - "success": True - } - - except Exception as e: - return { - "request_id": request_id, - "response_time": time.time() - start_time, - "success": False, - "error": str(e) - } -``` - ---- - -## 5. Monitoring & Observability During Load Testing - -### Azure Monitor Integration -```yaml -# monitoring-setup.bicep -param loadTestResourceGroup string -param applicationInsightsName string - -resource appInsights 'Microsoft.Insights/components@2020-02-02' = { - name: applicationInsightsName - location: resourceGroup().location - kind: 'web' - properties: { - Application_Type: 'web' - RetentionInDays: 30 - } -} - -resource loadTestAlerts 'Microsoft.Insights/metricAlerts@2018-03-01' = { - name: 'loadtest-performance-alert' - location: 'global' - properties: { - description: 'Alert when response time exceeds threshold during load test' - severity: 2 - enabled: true - scopes: [ - appInsights.id - ] - evaluationFrequency: 'PT1M' - windowSize: 'PT5M' - criteria: { - 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' - allOf: [ - { - name: 'ResponseTime' - metricName: 'requests/duration' - operator: 'GreaterThan' - threshold: 300 // 300ms threshold - timeAggregation: 'Average' - } - ] - } - } -} -``` - -### Custom Metrics Collection -```python -# metrics_collector.py -from azure.monitor.opentelemetry import configure_azure_monitor -from opentelemetry import trace, metrics -from opentelemetry.metrics import get_meter - -class LoadTestMetricsCollector: - def __init__(self, connection_string: str): - configure_azure_monitor(connection_string=connection_string) - - self.tracer = trace.get_tracer(__name__) - self.meter = get_meter(__name__) - - # Custom metrics - self.call_duration_histogram = self.meter.create_histogram( - name="rtmedagent.call.duration", - description="Call duration in seconds", - unit="s" - ) - - self.voice_latency_histogram = self.meter.create_histogram( - name="rtmedagent.voice.latency", - description="Voice processing latency in milliseconds", - unit="ms" - ) - - self.concurrent_calls_gauge = self.meter.create_up_down_counter( - name="rtmedagent.calls.concurrent", - description="Number of concurrent active calls" - ) - - def record_call_metrics(self, call_id: str, duration: float, - voice_latency: float, success: bool): - """Record call performance metrics""" - with self.tracer.start_as_current_span("call_metrics") as span: - span.set_attribute("call_id", call_id) - span.set_attribute("success", success) - - if success: - self.call_duration_histogram.record(duration) - self.voice_latency_histogram.record(voice_latency) - else: - span.set_attribute("error", True) -``` - ---- - -## 6. Test Execution & CI/CD Integration - -### GitHub Actions Workflow -```yaml -# .github/workflows/load-test.yml -name: Load Testing - -on: - workflow_dispatch: - inputs: - test_duration: - description: 'Test duration in minutes' - required: true - default: '10' - concurrent_users: - description: 'Number of concurrent users' - required: true - default: '100' - -jobs: - load-test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - pip install -r tests/load/requirements.txt - - - name: Azure Login - uses: azure/login@v1 - with: - creds: ${{ secrets.AZURE_CREDENTIALS }} - - - name: Run Load Test - run: | - python tests/load/run_load_test.py \ - --duration ${{ github.event.inputs.test_duration }} \ - --users ${{ github.event.inputs.concurrent_users }} \ - --endpoint ${{ secrets.API_ENDPOINT }} - - - name: Upload Results - uses: actions/upload-artifact@v3 - with: - name: load-test-results - path: tests/load/results/ - - - name: Publish Results - run: | - az monitor app-insights events show \ - --app ${{ secrets.APP_INSIGHTS_APP_ID }} \ - --event customMetrics \ - --filter "timestamp ge datetime'$(date -u -d '1 hour ago' +%Y-%m-%dT%H:%M:%SZ)'" -``` - ---- - -## 7. Performance Benchmarking & Baseline Establishment - -### Baseline Performance Tests -```python -# baseline_test.py -import json -import statistics -from datetime import datetime - -class PerformanceBaseline: - def __init__(self): - self.baseline_metrics = { - "call_setup_latency": {"p50": 1.2, "p95": 2.1, "p99": 3.5}, - "voice_response_latency": {"p50": 85, "p95": 140, "p99": 200}, - "websocket_latency": {"p50": 25, "p95": 45, "p99": 75}, - "concurrent_call_capacity": 1000, - "success_rate": 99.7 - } - - def validate_against_baseline(self, test_results: list) -> dict: - """Validate test results against performance baseline""" - validation_report = { - "timestamp": datetime.utcnow().isoformat(), - "baseline_version": "v1.0", - "test_summary": {}, - "regressions": [], - "improvements": [] - } - - # Calculate test metrics - response_times = [r["response_time"] for r in test_results if r["success"]] - success_rate = len([r for r in test_results if r["success"]]) / len(test_results) * 100 - - test_metrics = { - "p50": statistics.median(response_times), - "p95": statistics.quantiles(response_times, n=20)[18], # 95th percentile - "p99": statistics.quantiles(response_times, n=100)[98], # 99th percentile - "success_rate": success_rate - } - - validation_report["test_summary"] = test_metrics - - # Check for regressions - for metric, baseline_value in self.baseline_metrics.items(): - if metric in test_metrics: - if isinstance(baseline_value, dict): - for percentile, baseline_val in baseline_value.items(): - test_val = test_metrics.get(percentile) - if test_val and test_val > baseline_val * 1.1: # 10% regression threshold - validation_report["regressions"].append({ - "metric": f"{metric}_{percentile}", - "baseline": baseline_val, - "actual": test_val, - "regression_percent": ((test_val - baseline_val) / baseline_val) * 100 - }) - else: - test_val = test_metrics.get(metric) - if test_val and test_val < baseline_value * 0.9: # 10% degradation threshold - validation_report["regressions"].append({ - "metric": metric, - "baseline": baseline_value, - "actual": test_val, - "regression_percent": ((baseline_value - test_val) / baseline_value) * 100 - }) - - return validation_report -``` - ---- - -## 📊 Recommended Testing Schedule - -### Continuous Testing Strategy -| Test Type | Frequency | Duration | Scope | -|-----------|-----------|----------|-------| -| **Smoke Tests** | Every deployment | 2 minutes | Basic functionality | -| **Performance Regression** | Daily | 15 minutes | Key user flows | -| **Capacity Testing** | Weekly | 1 hour | Peak load simulation | -| **Stress Testing** | Monthly | 2 hours | Breaking point analysis | -| **Chaos Engineering** | Quarterly | 4 hours | Failure resilience | - -### Test Environment Strategy -```mermaid -graph LR - subgraph "Testing Environments" - Dev[Development
5-10 concurrent calls] - Staging[Staging
100-500 concurrent calls] - PreProd[Pre-Production
1000+ concurrent calls] - Prod[Production
Canary testing] - end - - Dev --> Staging - Staging --> PreProd - PreProd --> Prod -``` - ---- - -## 🎯 Success Criteria & KPIs - -### Performance KPIs -- **Response Time:** 95th percentile <150ms for voice processing -- **Throughput:** 1000+ concurrent calls sustained -- **Error Rate:** <0.5% for critical call flows -- **Resource Utilization:** <70% CPU/Memory at peak load -- **Cost Efficiency:** <$0.10 per successful call interaction - -### Quality Gates -```yaml -# quality-gates.yml -performance_gates: - response_time_p95: 150 # milliseconds - success_rate: 99.5 # percentage - concurrent_capacity: 1000 # calls - resource_cpu_max: 70 # percentage - resource_memory_max: 70 # percentage - -regression_thresholds: - response_time_degradation: 10 # percentage - throughput_degradation: 5 # percentage - error_rate_increase: 2 # percentage -``` - ---- - -## 🚀 Getting Started - -### Quick Start Checklist -1. **Setup Azure Load Testing Resource** - ```bash - az extension add --name load - az load create --name "rtmedagent-test" --resource-group "rg-loadtest" - ``` - -2. **Configure Monitoring** - - Enable Application Insights on all services - - Set up custom metrics collection - - Configure alerting rules - -3. **Create Test Data** - - Generate test phone numbers - - Prepare audio samples for testing - - Set up test user personas - -4. **Run Baseline Test** - ```bash - python tests/load/baseline_test.py --endpoint https://your-api.azurewebsites.net - ``` - -5. **Schedule Regular Testing** - - Configure GitHub Actions workflow - - Set up Azure DevOps pipeline integration - - Enable automated regression detection - ---- - -## 📈 Scaling Considerations - -### Azure Service Limits -| Service | Default Limit | Recommended Limit | Notes | -|---------|---------------|-------------------|-------| -| **Azure OpenAI** | 240K TPM | 1M+ TPM | Request quota increase for production | -| **Speech Services** | 20 concurrent | 200+ concurrent | Monitor STT/TTS usage | -| **Event Grid** | 5000 events/sec | 50K+ events/sec | Custom topics may need higher limits | -| **Redis Cache** | 250 connections | 2500+ connections | Use Premium tier for production | -| **ACS Calling** | 1000 concurrent | 10K+ concurrent | Contact Azure support for limits | - -This comprehensive load testing strategy ensures your real-time call center automation system can handle production-scale traffic while maintaining the sub-100ms latency requirements critical for voice interactions. diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 65e322ff..00000000 --- a/docs/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# **ARTVoice Documentation** - -**Comprehensive guides and references** for deploying, configuring, and extending the ARTVoice Accelerator. - -## **Quick Navigation** - -| Document | Purpose | -|----------|---------| -| [`DeploymentGuide.md`](DeploymentGuide.md) | Complete Azure deployment with Terraform/azd | -| [`quickstart-local-development.md`](quickstart-local-development.md) | Local development setup and testing | -| [`repo-structure.md`](repo-structure.md) | Deep repository structure (5 levels) | -| [`Architecture.md`](Architecture.md) | System architecture and design decisions | -| [`Troubleshooting.md`](Troubleshooting.md) | Common issues and solutions | - -## **Getting Started** - -1. **New to ARTVoice?** Start with the main [README.md](../README.md) -2. **Want to deploy?** See [DeploymentGuide.md](DeploymentGuide.md) -3. **Developing locally?** Follow [quickstart-local-development.md](quickstart-local-development.md) -4. **Need to understand the codebase?** Check [repo-structure.md](repo-structure.md) - -## **Additional Resources** - -- **API Documentation**: Available at `/docs` when backend is running -- **Samples**: See [`../samples/`](../samples/) for hands-on tutorials -- **Load Testing**: Framework documentation in [`../tests/load/`](../tests/load/) diff --git a/docs/Troubleshooting.md b/docs/Troubleshooting.md deleted file mode 100644 index c57bad62..00000000 --- a/docs/Troubleshooting.md +++ /dev/null @@ -1,428 +0,0 @@ -# Troubleshooting Guide - -This guide provides quick solutions for common issues with the Real-Time Audio Agent application. - -## Table of Contents - -- [ACS (Azure Communication Services) Issues](#acs-azure-communication-services-issues) -- [WebSocket Connection Issues](#websocket-connection-issues) -- [Networking & Connectivity](#networking--connectivity) -- [Backend API Issues](#backend-api-issues) -- [Frontend Issues](#frontend-issues) -- [Azure AI Services Issues](#azure-ai-services-issues) -- [Redis Connection Issues](#redis-connection-issues) -- [Deployment Issues](#deployment-issues) -- [Performance Issues](#performance-issues) -- [Debugging Tools](#debugging-tools) - ---- - -## ACS (Azure Communication Services) Issues - -### Problem: ACS not making outbound calls - -**Symptoms:** -- Call fails to initiate -- No audio connection established -- ACS callback events not received - -**Solutions:** -1. **Check App Service Logs:** - ```bash - make monitor_backend_deployment - # Or check Azure Container Apps logs - az containerapp logs show --name --resource-group - ``` - -2. **Verify Webhook URL is publicly accessible:** - - Must use HTTPS (not HTTP) - - Use devtunnel for local development: - ```bash - devtunnel host -p 8010 --allow-anonymous - ``` - - Or use ngrok: - ```bash - ngrok http 8010 - ``` - -3. **Test WebSocket connectivity:** - ```bash - # Install wscat if not available - npm install -g wscat - - # Test WebSocket connection - wscat -c wss://your-domain.com/ws/call/{callConnectionId} - ``` - -4. **Check ACS Resource Configuration:** - - Verify ACS connection string in environment variables - - Ensure phone number is properly configured - - Check PSTN calling is enabled - -### Problem: Audio quality issues or dropouts - -**Solutions:** -1. Check network latency to Azure region -2. Verify TTS/STT service health -3. Monitor Redis connection stability -4. Check container resource limits - ---- - -## WebSocket Connection Issues - -### Problem: WebSocket connection fails or drops frequently - -**Symptoms:** -- `WebSocket connection failed` errors -- Frequent reconnections -- Missing real-time updates - -**Solutions:** -1. **Test WebSocket endpoint directly:** - ```bash - wscat -c wss://:8010/call/stream - ``` - -2. **Check CORS configuration:** - - Verify frontend origin is allowed - - Ensure WebSocket upgrade headers are supported - -3. **Monitor connection lifecycle:** - ```bash - # Check backend logs for WebSocket events - tail -f logs/app.log | grep -i websocket - ``` - -4. **Verify environment variables:** - ```bash - # Check if required vars are set - echo $AZURE_ACS_CONNECTION_STRING - echo $REDIS_URL - ``` - ---- - -## Networking & Connectivity - -### Problem: Cannot access application from external networks - -**Solutions:** -1. **For local development:** - ```bash - # Start devtunnel - devtunnel host -p 8010 --allow-anonymous - - # Or use ngrok - ngrok http 8010 - ``` - -2. **Check firewall rules:** - - Ensure ports 8010 (backend) and 5173 (frontend) are open - - Verify Azure NSG rules if deployed - -3. **Verify DNS resolution:** - ```bash - nslookup your-domain.com - dig your-domain.com - ``` - -### Problem: SSL/TLS certificate issues - -**Solutions:** -1. **For development with self-signed certs:** - ```bash - # Accept self-signed certificates in browser - # Or configure proper SSL certificates - ``` - -2. **Check certificate validity:** - ```bash - openssl s_client -connect your-domain.com:443 -servername your-domain.com - ``` - ---- - -## Backend API Issues - -### Problem: FastAPI server won't start - -**Symptoms:** -- Import errors -- Port already in use -- Environment variable errors - -**Solutions:** -1. **Check Python environment:** - ```bash - conda activate audioagent - pip install -r requirements.txt - ``` - -2. **Kill processes using port 8010:** - ```bash - lsof -ti:8010 | xargs kill -9 - ``` - -3. **Run with detailed logging:** - ```bash - uvicorn apps.rtagent.backend.main:app --reload --port 8010 --log-level debug - ``` - -4. **Check environment file:** - ```bash - # Ensure .env file exists and has required variables - cat .env | grep -E "(AZURE_|REDIS_|OPENAI_)" - ``` - -### Problem: API endpoints returning 500 errors - -**Solutions:** -1. **Check backend logs:** - ```bash - tail -f logs/app.log - ``` - -2. **Test individual endpoints:** - ```bash - curl -X GET http://localhost:8010/health - curl -X POST http://localhost:8010/api/v1/calls/start -H "Content-Type: application/json" -d '{}' - ``` - -3. **Verify database connections:** - ```bash - # Test Redis connection - redis-cli -u $REDIS_URL ping - ``` - ---- - -## Frontend Issues - -### Problem: React app won't start or compile errors - -**Solutions:** -1. **Clear node modules and reinstall:** - ```bash - cd apps/rtagent/frontend - rm -rf node_modules package-lock.json - npm install - ``` - -2. **Check Node.js version:** - ```bash - node --version # Should be >= 18 - npm --version - ``` - -3. **Start with verbose logging:** - ```bash - npm run dev -- --verbose - ``` - -### Problem: Frontend can't connect to backend - -**Solutions:** -1. **Check proxy configuration in vite.config.js** -2. **Verify backend is running:** - ```bash - curl http://localhost:8010/health - ``` - -3. **Check network tab in browser dev tools** -4. **Verify CORS settings in backend** - ---- - -## Azure AI Services Issues - -### Problem: Speech-to-Text not working - -**Solutions:** -1. **Check Azure Cognitive Services key:** - ```bash - echo $AZURE_COGNITIVE_SERVICES_KEY - echo $AZURE_COGNITIVE_SERVICES_REGION - ``` - -2. **Test STT service directly:** - ```bash - # Use curl to test Azure Speech API - curl -X POST "https://$AZURE_COGNITIVE_SERVICES_REGION.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1" \ - -H "Ocp-Apim-Subscription-Key: $AZURE_COGNITIVE_SERVICES_KEY" \ - -H "Content-Type: audio/wav" \ - --data-binary @test.wav - ``` - -3. **Check service quotas and limits** -4. **Verify region availability for your subscription** - -### Problem: OpenAI API errors - -**Solutions:** -1. **Check API key and endpoint:** - ```bash - echo $AZURE_OPENAI_ENDPOINT - echo $AZURE_OPENAI_API_KEY - ``` - -2. **Test API connectivity:** - ```bash - curl -X GET "$AZURE_OPENAI_ENDPOINT/openai/deployments?api-version=2023-12-01-preview" \ - -H "api-key: $AZURE_OPENAI_API_KEY" - ``` - -3. **Verify model deployment names match configuration** - ---- - -## Redis Connection Issues - -### Problem: Redis connection timeouts or failures - -**Solutions:** -1. **Test Redis connectivity:** - ```bash - redis-cli -u $REDIS_URL ping - redis-cli -u $REDIS_URL info server - ``` - -2. **Check Redis configuration:** - ```bash - # For local Redis - redis-server --version - - # Check if Redis is running - ps aux | grep redis - ``` - -3. **For Azure Redis Cache:** - - Verify connection string format - - Check firewall rules - - Ensure SSL is enabled if required - ---- - -## Deployment Issues - -### Problem: azd deployment fails - -**Solutions:** -1. **Check Azure authentication:** - ```bash - az account show - az account list-locations - ``` - -2. **Verify subscription and resource group:** - ```bash - azd env get-values - ``` - -3. **Check deployment logs:** - ```bash - azd logs - ``` - -4. **Common fixes:** - ```bash - # Clean and redeploy - azd down --force --purge - azd up - ``` - -### Problem: Container deployment issues - -**Solutions:** -1. **Check container logs:** - ```bash - az containerapp logs show --name --resource-group --follow - ``` - -2. **Verify container registry access:** - ```bash - az acr repository list --name - ``` - -3. **Check resource quotas:** - ```bash - az vm list-usage --location - ``` - ---- - -## Performance Issues - -### Problem: High latency in audio processing - -**Solutions:** -1. **Monitor resource usage:** - ```bash - # Check CPU and memory - top - htop # if available - ``` - -2. **Check Azure region proximity** -3. **Monitor Redis performance** -4. **Review container resource limits** - -### Problem: Memory leaks or high memory usage - -**Solutions:** -1. **Profile Python memory usage:** - ```python - # Add to your code for debugging - import psutil - process = psutil.Process() - print(f"Memory usage: {process.memory_info().rss / 1024 / 1024:.1f} MB") - ``` - -2. **Check for connection leaks** -3. **Monitor WebSocket connections** - ---- - -## Debugging Tools - -### Essential Commands - -```bash -# Check all services health -make health_check - -# Monitor backend deployment -make monitor_backend_deployment - -# View logs -tail -f logs/app.log - -# Test WebSocket connection -wscat -c ws://localhost:8010/ws/call/test-id - -# Check network connectivity -curl -v http://localhost:8010/health - -# Monitor system resources -htop -iotop # for disk I/O -``` - -### Log Locations - -- **Backend logs:** container logs -- **Frontend logs:** Browser console (F12) -- **Azure logs:** Azure Monitor / Application Insights -- **System logs:** `/var/log/` (Linux) or Console.app (macOS) - ---- - -## Getting Help - -If you're still experiencing issues: - -1. **Check GitHub Issues:** Look for similar problems in the repository -2. **Enable debug logging:** Set `LOG_LEVEL=DEBUG` in your environment -3. **Collect logs:** Gather relevant logs before reporting issues -4. **Test with minimal setup:** Try with basic configuration first -5. **Check Azure service health:** Visit Azure status page diff --git a/docs/WebsocketAuth.md b/docs/WebsocketAuth.md deleted file mode 100644 index d15c506c..00000000 --- a/docs/WebsocketAuth.md +++ /dev/null @@ -1,2 +0,0 @@ -https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/secure-webhook-endpoint?pivots=programming-language-python - diff --git a/docs/api/README.md b/docs/api/README.md new file mode 100644 index 00000000..90fa4547 --- /dev/null +++ b/docs/api/README.md @@ -0,0 +1,85 @@ +# API Reference + +Comprehensive REST API and WebSocket documentation for the Real-Time Voice Agent backend built on **Python 3.11 + FastAPI**. + +## Quick Start + +The API provides comprehensive Azure integrations for voice-enabled applications: + +- **[Azure Communication Services](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/audio-streaming-concept)** - Call automation and bidirectional media streaming +- **[Azure Speech Services](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-to-text)** - Neural text-to-speech and speech recognition +- **[Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-websockets)** - Conversational AI and language processing + +## API Endpoints + +The V1 API provides REST and WebSocket endpoints for real-time voice processing: + +### REST Endpoints +- **`/api/v1/calls/`** - Phone call management (initiate, answer, callbacks) +- **`/api/v1/health/`** - Service health monitoring and validation + +### WebSocket Endpoints +- **`/api/v1/media/stream`** - ACS media streaming and session management +- **`/api/v1/realtime/conversation`** - Browser-based voice conversations + +## Interactive API Documentation + +**👉 [Complete API Reference](api-reference.md)** - Interactive OpenAPI documentation with all REST endpoints, WebSocket details, authentication, and configuration. + +### Key Features + +- **Call Management** - Phone call lifecycle through Azure Communication Services +- **Media Streaming** - Real-time audio processing for ACS calls +- **Real-time Communication** - Browser-based voice conversations +- **Health Monitoring** - Service validation and diagnostics + +## WebSocket Protocol + +Real-time **bidirectional audio streaming** following [Azure Communication Services WebSocket specifications](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/audio-streaming-quickstart#set-up-a-websocket-server): + +- **Audio Format**: PCM 16kHz mono (ACS) / PCM 24kHz mono (Azure OpenAI Realtime) +- **Transport**: WebSocket over TCP with full-duplex communication +- **Latency**: Sub-50ms for voice activity detection and response generation + +**� [WebSocket Details](api-reference.md#websocket-endpoints)** - Complete protocol documentation + +## Observability + +**OpenTelemetry Tracing** - Built-in distributed tracing for production monitoring with Azure Monitor integration: + +- Session-level spans for complete request lifecycle +- Service dependency mapping (Speech, Communication Services, Redis, OpenAI) +- Audio processing latency and error rate monitoring + +## Streaming Modes + +The API supports multiple streaming modes configured via `ACS_STREAMING_MODE`: + +- **MEDIA Mode (Default)** - Traditional STT/TTS with orchestrator processing +- **VOICE_LIVE Mode** - [Azure OpenAI Realtime API](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-websockets) integration +- **TRANSCRIPTION Mode** - Real-time transcription without AI responses + +**👉 [Detailed Configuration](../reference/streaming-modes.md)** - Complete streaming mode documentation + +## Architecture + +**Three-Thread Design** - Optimized for real-time conversational AI with sub-10ms barge-in detection following [Azure Speech SDK best practices](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech). + +**� [Architecture Details](../architecture/acs-flows.md)** - Complete three-thread architecture documentation + +## Reliability + +**Graceful Degradation** - Following [Azure Communication Services reliability patterns](https://learn.microsoft.com/en-us/azure/communication-services/concepts/troubleshooting-info): + +- Connection pooling and retry logic with exponential backoff +- Headless environment support with memory-only audio synthesis +- [Managed identity authentication](https://learn.microsoft.com/en-us/azure/ai-services/authentication#authenticate-with-azure-active-directory) with automatic token refresh + +## Related Documentation + +- **[API Reference](api-reference.md)** - Complete OpenAPI specification with interactive testing +- **[Speech Synthesis](../reference/speech-synthesis.md)** - Comprehensive TTS implementation guide +- **[Speech Recognition](../reference/speech-recognition.md)** - Advanced STT capabilities and configuration +- **[Streaming Modes](../reference/streaming-modes.md)** - Audio processing pipeline configuration +- **[Utilities](../reference/utilities.md)** - Supporting services and infrastructure components +- **[Architecture Overview](../architecture/README.md)** - System architecture and deployment patterns diff --git a/docs/api/api-reference.md b/docs/api/api-reference.md new file mode 100644 index 00000000..09a72057 --- /dev/null +++ b/docs/api/api-reference.md @@ -0,0 +1,302 @@ +# API Reference + +**Interactive API documentation** generated from the OpenAPI schema. This provides the definitive reference for all REST endpoints, WebSocket connections, authentication, and configuration. + +## Interactive Documentation + +[OAD(docs/api/openapi.json)] + +## WebSocket Endpoints + +The following WebSocket endpoints provide real-time communication capabilities: + +### Media Streaming WebSocket +**URL**: `wss://api.domain.com/api/v1/media/stream` + +Real-time bidirectional audio streaming for Azure Communication Services calls following [ACS WebSocket protocol](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/audio-streaming-quickstart#set-up-a-websocket-server). + +**Query Parameters**: +- `call_connection_id` (required): ACS call connection identifier +- `session_id` (optional): Browser session ID for UI coordination + +**Audio Formats**: +- **MEDIA/TRANSCRIPTION Mode**: PCM 16kHz mono (16-bit) +- **VOICE_LIVE Mode**: PCM 24kHz mono (24-bit) for Azure OpenAI Realtime API + +**Message Types**: +```json +// Incoming audio data +{ + "kind": "AudioData", + "audioData": { + "timestamp": "2025-09-28T12:00:00Z", + "participantRawID": "8:acs:...", + "data": "base64EncodedPCMAudio", + "silent": false + } +} + +// Outgoing audio data (bidirectional streaming) +{ + "Kind": "AudioData", + "AudioData": { + "Data": "base64EncodedPCMAudio" + } +} +``` + +### Realtime Conversation WebSocket +**URL**: `wss://api.domain.com/api/v1/realtime/conversation` + +Browser-based voice conversations with session persistence and real-time transcription. + +**Query Parameters**: +- `session_id` (optional): Conversation session identifier for session restoration + +**Features**: +- Real-time speech-to-text transcription +- TTS audio streaming for responses +- Conversation context persistence +- Multi-language support + +### Dashboard Relay WebSocket +**URL**: `wss://api.domain.com/api/v1/realtime/dashboard/relay` + +Real-time updates for dashboard clients monitoring ongoing conversations. + +**Query Parameters**: +- `session_id` (optional): Filter updates for specific conversation sessions + +**Use Cases**: +- Live call monitoring and analytics +- Real-time transcript viewing +- Agent performance dashboards + +## Authentication & Security + +All endpoints support **Azure Entra ID** authentication using `DefaultAzureCredential` following [Azure best practices](https://learn.microsoft.com/en-us/azure/ai-services/authentication#authenticate-with-azure-active-directory). + +### Authentication Methods + +**Environment Variables** (Recommended for production): +```bash +# Service Principal Authentication +export AZURE_CLIENT_ID="your-client-id" +export AZURE_CLIENT_SECRET="your-client-secret" +export AZURE_TENANT_ID="your-tenant-id" +``` + +**Azure CLI** (Development): +```bash +az login +``` + +**Managed Identity** (Azure deployment): +- System-assigned or user-assigned managed identity +- No credential management required +- Automatic token refresh + +### Required RBAC Roles + +Grant these Azure roles to your service principal or managed identity: + +| Service | Required Role | Purpose | +|---------|---------------|---------| +| Azure Speech Services | **Cognitive Services User** | STT/TTS operations | +| Azure Cache for Redis | **Redis Cache Contributor** | Session state management | +| Azure Communication Services | **Communication Services Contributor** | Call automation and media streaming | +| Azure Storage | **Storage Blob Data Contributor** | Call recordings and artifacts | +| Azure OpenAI | **Cognitive Services OpenAI User** | AI model inference | + +### Security Features + +- **Credential-less authentication** with managed identity +- **Connection pooling** with automatic token refresh +- **TLS encryption** for all HTTP/WebSocket connections +- **Input validation** and request sanitization +- **Rate limiting** per [Azure service quotas](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits) + +## Configuration + +### Required Environment Variables + +**Azure Services Configuration**: +```bash +# Azure Speech Services +AZURE_SPEECH_REGION=eastus +AZURE_SPEECH_RESOURCE_ID=/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.CognitiveServices/accounts/{name} + +# Azure Cache for Redis +AZURE_REDIS_HOSTNAME=your-redis.redis.cache.windows.net +AZURE_REDIS_USERNAME=default + +# Azure Communication Services +ACS_ENDPOINT=https://your-acs.communication.azure.com +``` + +**Application Configuration**: +```bash +# Streaming Mode (affects audio processing pipeline) +ACS_STREAMING_MODE=MEDIA # MEDIA | VOICE_LIVE | TRANSCRIPTION + +# Optional Settings +AZURE_OPENAI_ENDPOINT=https://your-openai.openai.azure.com # For AI features +AZURE_STORAGE_CONNECTION_STRING=... # For call recordings +``` + +### Streaming Mode Configuration + +Controls the audio processing pipeline and determines handler selection: + +| Mode | Description | Audio Format | Use Case | +|------|-------------|--------------|----------| +| `MEDIA` | Default STT/TTS pipeline | PCM 16kHz mono | Traditional phone calls with AI orchestration | +| `VOICE_LIVE` | Azure OpenAI Realtime API | PCM 24kHz mono | Advanced conversational AI | +| `TRANSCRIPTION` | Real-time transcription only | PCM 16kHz mono | Call recording and analysis | + +**📖 Reference**: [Complete streaming modes documentation](../reference/streaming-modes.md) + +### Performance Tuning + +**Connection Pools** (optional): +```bash +# Speech service connection limits +MAX_STT_POOL_SIZE=4 +MAX_TTS_POOL_SIZE=4 + +# Redis connection pool +REDIS_MAX_CONNECTIONS=20 +REDIS_CONNECTION_TIMEOUT=5 +``` + +**Audio Processing**: +```bash +# Voice Activity Detection (VAD) settings +VAD_TIMEOUT_MS=2000 # Silence timeout +VAD_SENSITIVITY=medium # low | medium | high + +# Barge-in detection +BARGE_IN_ENABLED=true +BARGE_IN_THRESHOLD_MS=10 # Response time for interruption +``` + +## Error Handling + +### Standard Error Response Format + +All endpoints return consistent error responses following [RFC 7807](https://tools.ietf.org/html/rfc7807): + +```json +{ + "detail": "Human-readable error description", + "status_code": 400, + "timestamp": "2025-09-28T12:00:00Z", + "type": "validation_error", + "instance": "/api/v1/calls/initiate", + "errors": [ + { + "field": "phone_number", + "message": "Invalid phone number format", + "code": "format_invalid" + } + ] +} +``` + +### HTTP Status Codes + +| Status | Description | Common Causes | +|--------|-------------|---------------| +| **200** | Success | Request completed successfully | +| **202** | Accepted | Async operation initiated | +| **400** | Bad Request | Invalid request format or parameters | +| **401** | Unauthorized | Missing or invalid authentication | +| **403** | Forbidden | Insufficient permissions or RBAC roles | +| **404** | Not Found | Resource not found | +| **422** | Validation Error | Request body schema validation failed | +| **429** | Rate Limited | Azure service quota exceeded | +| **500** | Internal Server Error | Unexpected server error | +| **502** | Bad Gateway | Azure service unavailable | +| **503** | Service Unavailable | Dependencies not ready | +| **504** | Gateway Timeout | Azure service timeout | + +### Service-Specific Errors + +**Azure Speech Services**: +- `speech_quota_exceeded` - API rate limit reached +- `speech_region_unavailable` - Speech service region down +- `audio_format_unsupported` - Invalid audio format specified + +**Azure Communication Services**: +- `call_not_found` - Call connection ID invalid +- `media_streaming_failed` - WebSocket streaming error +- `pstn_number_invalid` - Phone number format error + +**Azure Cache for Redis**: +- `redis_connection_failed` - Redis cluster unavailable +- `session_expired` - Session data TTL exceeded + +### Retry Strategy + +The API implements exponential backoff for transient errors: + +```bash +# Retry configuration +RETRY_MAX_ATTEMPTS=3 +RETRY_BACKOFF_FACTOR=2.0 +RETRY_JITTER=true + +# Service-specific timeouts +SPEECH_REQUEST_TIMEOUT=30 +ACS_CALL_TIMEOUT=60 +REDIS_OPERATION_TIMEOUT=5 +``` + +**📖 Reference**: [Azure Service reliability patterns](https://learn.microsoft.com/en-us/azure/communication-services/concepts/troubleshooting-info) + +## Getting Started + +### Quick Setup + +1. **Configure Authentication**: + ```bash + export AZURE_TENANT_ID="your-tenant-id" + export AZURE_CLIENT_ID="your-client-id" + export AZURE_CLIENT_SECRET="your-client-secret" + ``` + +2. **Set Required Environment Variables**: + ```bash + export AZURE_SPEECH_REGION="eastus" + export ACS_ENDPOINT="https://your-acs.communication.azure.com" + export AZURE_REDIS_HOSTNAME="your-redis.redis.cache.windows.net" + ``` + +3. **Test Health Endpoint**: + ```bash + curl -X GET https://api.domain.com/api/v1/health/ + ``` + +4. **Initiate a Test Call**: + ```bash + curl -X POST https://api.domain.com/api/v1/calls/initiate \ + -H "Content-Type: application/json" \ + -d '{"phone_number": "+1234567890"}' + ``` + +### Development Resources + +- **[Interactive API Explorer](#interactive-documentation)** - Test all endpoints directly in browser +- **[WebSocket Testing](../reference/streaming-modes.md)** - WebSocket connection examples +- **[Authentication Setup](../getting-started/configuration.md)** - Detailed auth configuration +- **[Architecture Overview](../architecture/README.md)** - System design and deployment patterns + +### Production Considerations + +- Use **managed identity** authentication in Azure deployments +- Configure **connection pooling** for high-throughput scenarios +- Enable **distributed tracing** with Azure Monitor integration +- Implement **health checks** for all dependent services +- Set up **monitoring and alerting** for service reliability + +**📖 Reference**: [Production deployment guide](../deployment/production.md) \ No newline at end of file diff --git a/docs/api/openapi.json b/docs/api/openapi.json new file mode 100644 index 00000000..1b9a7817 --- /dev/null +++ b/docs/api/openapi.json @@ -0,0 +1,1555 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "Real-Time Voice Agent API", + "description": "Real-Time Voice Agent API", + "contact": { + "name": "Real-Time Voice Agent Team", + "email": "support@example.com" + }, + "license": { + "name": "MIT License", + "url": "https://opensource.org/licenses/MIT" + }, + "version": "1.0.0" + }, + "paths": { + "/api/v1/health": { + "get": { + "tags": [ + "health", + "Health" + ], + "summary": "Basic Health Check", + "description": "Basic health check endpoint that returns 200 if the server is running. Used by load balancers for liveness checks.", + "operationId": "health_check_api_v1_health_get", + "responses": { + "200": { + "description": "Service is healthy and running", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + }, + "example": { + "status": "healthy", + "version": "1.0.0", + "timestamp": 1691668800.0, + "message": "Real-Time Audio Agent API v1 is running", + "details": { + "api_version": "v1", + "service": "rtagent-backend" + } + } + } + } + } + } + } + }, + "/api/v1/readiness": { + "get": { + "tags": [ + "health", + "Health" + ], + "summary": "Comprehensive Readiness Check", + "description": "Comprehensive readiness probe that checks all critical dependencies with timeouts.\n \n This endpoint verifies:\n - Redis connectivity and performance\n - Azure OpenAI client health\n - Speech services (TTS/STT) availability\n - ACS caller configuration and connectivity\n - RT Agents initialization\n - Authentication configuration (when ENABLE_AUTH_VALIDATION=True)\n - Event system health\n \n When authentication validation is enabled, checks:\n - BACKEND_AUTH_CLIENT_ID is set and is a valid GUID\n - AZURE_TENANT_ID is set and is a valid GUID \n - ALLOWED_CLIENT_IDS contains at least one valid GUID\n \n Returns 503 if any critical services are unhealthy, 200 if all systems are ready.", + "operationId": "readiness_check_api_v1_readiness_get", + "responses": { + "200": { + "description": "All services are ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ReadinessResponse" + }, + "example": { + "status": "ready", + "timestamp": 1691668800.0, + "response_time_ms": 45.2, + "checks": [ + { + "component": "redis", + "status": "healthy", + "check_time_ms": 12.5, + "details": "Connected to Redis successfully" + }, + { + "component": "auth_configuration", + "status": "healthy", + "check_time_ms": 1.2, + "details": "Auth validation enabled with 2 allowed client(s)" + } + ], + "event_system": { + "is_healthy": true, + "handlers_count": 7, + "domains_count": 2 + } + } + } + } + }, + "503": { + "description": "One or more services are not ready", + "content": { + "application/json": { + "example": { + "status": "not_ready", + "timestamp": 1691668800.0, + "response_time_ms": 1250.0, + "checks": [ + { + "component": "redis", + "status": "unhealthy", + "check_time_ms": 1000.0, + "error": "Connection timeout" + }, + { + "component": "auth_configuration", + "status": "unhealthy", + "check_time_ms": 2.1, + "error": "BACKEND_AUTH_CLIENT_ID is not a valid GUID" + } + ] + } + } + } + } + } + } + }, + "/api/v1/agents": { + "get": { + "tags": [ + "health" + ], + "summary": "Get Agents Info", + "description": "Get information about loaded RT agents including their configuration,\nmodel settings, and voice settings that can be modified.", + "operationId": "get_agents_info_api_v1_agents_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + }, + "/api/v1/agents/{agent_name}": { + "put": { + "tags": [ + "health" + ], + "summary": "Update Agent Config", + "description": "Update configuration for a specific agent (model settings, voice, etc.).\nChanges are applied to the runtime instance but not persisted to YAML files.", + "operationId": "update_agent_config_api_v1_agents__agent_name__put", + "parameters": [ + { + "name": "agent_name", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Agent Name" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/AgentConfigUpdate" + } + } + } + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/api/v1/calls/initiate": { + "post": { + "tags": [ + "Call Management", + "Call Management" + ], + "summary": "Initiate Outbound Call", + "description": "Initiate a new outbound call to the specified phone number.\n \n This endpoint:\n - Validates the phone number format\n - Generates a unique call ID\n - Emits a call initiation event through the V1 event system\n - Returns immediately with call status\n \n The actual call establishment is handled asynchronously through Azure Communication Services.", + "operationId": "initiate_call_api_v1_calls_initiate_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CallInitiateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Call initiation successful", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CallInitiateResponse" + }, + "example": { + "call_id": "call_abc12345", + "status": "initiating", + "target_number": "+1234567890", + "message": "Call initiation requested for +1234567890" + } + } + } + }, + "400": { + "description": "Invalid request (e.g., malformed phone number)", + "content": { + "application/json": { + "example": { + "detail": "Invalid phone number format. Must be in E.164 format (e.g., +1234567890)" + } + } + } + }, + "500": { + "description": "Internal server error during call initiation", + "content": { + "application/json": { + "example": { + "detail": "Failed to initiate call: Azure Communication Service unavailable" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/api/v1/calls/": { + "get": { + "tags": [ + "Call Management", + "Call Management" + ], + "summary": "List Calls", + "description": "Retrieve a paginated list of calls with optional filtering.\n \n Supports:\n - Pagination with page and limit parameters\n - Filtering by call status\n - Sorting by creation time (newest first)", + "operationId": "list_calls_api_v1_calls__get", + "parameters": [ + { + "name": "page", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "minimum": 1, + "description": "Page number (1-based)", + "examples": { + "default": { + "summary": "page number", + "value": 1 + } + }, + "default": 1, + "title": "Page" + }, + "description": "Page number (1-based)" + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "maximum": 100, + "minimum": 1, + "description": "Number of items per page (1-100)", + "examples": { + "default": { + "summary": "items per page", + "value": 10 + } + }, + "default": 10, + "title": "Limit" + }, + "description": "Number of items per page (1-100)" + }, + { + "name": "status_filter", + "in": "query", + "required": false, + "schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Filter calls by status", + "examples": { + "default": { + "summary": "status filter", + "value": "connected" + } + }, + "enum": [ + "initiating", + "ringing", + "connected", + "on_hold", + "disconnected", + "failed" + ], + "title": "Status Filter" + }, + "description": "Filter calls by status" + } + ], + "responses": { + "200": { + "description": "Calls retrieved successfully", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CallListResponse" + }, + "example": { + "calls": [ + { + "call_id": "call_abc12345", + "status": "connected", + "duration": 120, + "participants": [], + "events": [] + } + ], + "total": 25, + "page": 1, + "limit": 10 + } + } + } + }, + "400": { + "description": "Invalid pagination parameters", + "content": { + "application/json": { + "example": { + "detail": "Page number must be positive" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/api/v1/calls/answer": { + "post": { + "tags": [ + "Call Management", + "Call Management" + ], + "summary": "Answer Inbound Call", + "description": "Handle inbound call events and Event Grid subscription validation.\n \n This endpoint:\n - Validates Event Grid subscription requests\n - Answers incoming calls automatically with orchestrator selection\n - Initializes conversation state with features\n - Supports pluggable conversation orchestrators\n - Provides advanced tracing and monitoring\n \n Enhanced V1 features:\n - Pluggable orchestrator injection for conversation handling\n - Enhanced state management with orchestrator metadata\n - Advanced observability and correlation\n - Production-ready error handling", + "operationId": "answer_inbound_call_api_v1_calls_answer_post", + "responses": { + "200": { + "description": "Inbound call processed successfully", + "content": { + "application/json": { + "schema": {}, + "example": { + "status": "call answered", + "orchestrator": "gpt_flow", + "acs_features": { + "orchestrator_support": true, + "advanced_tracing": true, + "api_version": "v1" + } + } + } + } + }, + "400": { + "description": "Invalid request body", + "content": { + "application/json": { + "example": { + "detail": "Invalid Event Grid request format" + } + } + } + }, + "503": { + "description": "Service dependencies not available", + "content": { + "application/json": { + "example": { + "detail": "ACS not initialised" + } + } + } + } + } + } + }, + "/api/v1/calls/callbacks": { + "post": { + "tags": [ + "Call Management", + "Call Events" + ], + "summary": "Handle ACS Callback Events", + "description": "Handle Azure Communication Services callback events.\n \n This endpoint receives webhooks from ACS when call events occur:\n - Call connected/disconnected\n - Participant joined/left\n - Media events (DTMF tones, play completed, etc.)\n - Transfer events\n \n The endpoint validates authentication, processes events through the \n V1 CallEventProcessor system, and returns processing results.", + "operationId": "handle_acs_callbacks_api_v1_calls_callbacks_post", + "responses": { + "200": { + "description": "Events processed successfully", + "content": { + "application/json": { + "schema": {}, + "example": { + "status": "success", + "processed_events": 1, + "call_connection_id": "abc123" + } + } + } + }, + "500": { + "description": "Event processing failed", + "content": { + "application/json": { + "example": { + "error": "Failed to process callback events" + } + } + } + }, + "503": { + "description": "Service dependencies not available", + "content": { + "application/json": { + "example": { + "error": "ACS not initialised" + } + } + } + } + } + } + }, + "/api/v1/media/status": { + "get": { + "tags": [ + "ACS Media Session", + "WebSocket" + ], + "summary": "Get Media Streaming Status", + "description": "Get the current status of media streaming configuration.\n\n:return: Current media streaming configuration and status\n:rtype: dict", + "operationId": "get_media_status_api_v1_media_status_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "additionalProperties": true, + "type": "object", + "title": "Response Get Media Status Api V1 Media Status Get" + } + } + } + } + } + } + }, + "/api/v1/media/sessions": { + "post": { + "tags": [ + "ACS Media Session", + "WebSocket" + ], + "summary": "Create Media Session", + "description": "Create a new media streaming session for Azure Communication Services.\n\nInitializes a media session with specified audio configuration and returns\nWebSocket connection details for real-time audio streaming. This endpoint\nprepares the infrastructure for bidirectional media communication with\nconfigurable audio parameters.\n\nArgs:\n request: Media session configuration including call connection ID, \n audio format, sample rate, and streaming options.\n\nReturns:\n MediaSessionResponse: Session details containing unique session ID,\n WebSocket URL for streaming, status, and audio configuration.\n\nRaises:\n HTTPException: When session creation fails due to invalid configuration\n or system resource constraints.\n\nExample:\n >>> request = MediaSessionRequest(call_connection_id=\"call_123\")\n >>> response = await create_media_session(request)\n >>> print(response.websocket_url)", + "operationId": "create_media_session_api_v1_media_sessions_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MediaSessionRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MediaSessionResponse" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/api/v1/media/sessions/{session_id}": { + "get": { + "tags": [ + "ACS Media Session", + "WebSocket" + ], + "summary": "Get Media Session Status", + "description": "Retrieve status and metadata for a specific media session.\n\nQueries the current state of an active media session including connection\nstatus, WebSocket state, and session configuration details. Used for\nmonitoring and debugging media streaming sessions.\n\nArgs:\n session_id: Unique identifier for the media session to query.\n\nReturns:\n dict: Session information including status, connection state, creation\n timestamp, and API version details.\n\nExample:\n >>> session_info = await get_media_session(\"media_session_123\")\n >>> print(session_info[\"status\"])", + "operationId": "get_media_session_api_v1_media_sessions__session_id__get", + "parameters": [ + { + "name": "session_id", + "in": "path", + "required": true, + "schema": { + "type": "string", + "title": "Session Id" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": true, + "title": "Response Get Media Session Api V1 Media Sessions Session Id Get" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/api/v1/realtime/status": { + "get": { + "tags": [ + "Real-time Communication", + "WebSocket", + "Realtime Status" + ], + "summary": "Get Realtime Service Status", + "description": "Get the current status of the realtime communication service.\n \n Returns information about:\n - Service availability and health\n - Supported protocols and features\n - Active connection counts\n - WebSocket endpoint configurations", + "operationId": "get_realtime_status_api_v1_realtime_status_get", + "responses": { + "200": { + "description": "Service status retrieved successfully", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RealtimeStatusResponse" + }, + "example": { + "status": "available", + "websocket_endpoints": { + "dashboard_relay": "/api/v1/realtime/dashboard/relay", + "conversation": "/api/v1/realtime/conversation" + }, + "features": { + "dashboard_broadcasting": true, + "conversation_streaming": true, + "orchestrator_support": true, + "session_management": true + }, + "active_connections": { + "dashboard_clients": 0, + "conversation_sessions": 0 + }, + "version": "v1" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "AgentConfigUpdate": { + "properties": { + "model": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentModelUpdate" + }, + { + "type": "null" + } + ] + }, + "voice": { + "anyOf": [ + { + "$ref": "#/components/schemas/AgentVoiceUpdate" + }, + { + "type": "null" + } + ] + } + }, + "type": "object", + "title": "AgentConfigUpdate" + }, + "AgentModelUpdate": { + "properties": { + "deployment_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Deployment Id" + }, + "temperature": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Temperature" + }, + "top_p": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Top P" + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Max Tokens" + } + }, + "type": "object", + "title": "AgentModelUpdate" + }, + "AgentVoiceUpdate": { + "properties": { + "voice_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Voice Name" + }, + "voice_style": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Voice Style" + } + }, + "type": "object", + "title": "AgentVoiceUpdate" + }, + "CallInitiateRequest": { + "properties": { + "target_number": { + "type": "string", + "pattern": "^\\+[1-9]\\d{1,14}$", + "title": "Target Number", + "description": "Phone number to call in E.164 format (e.g., +1234567890)", + "example": "+1234567890" + }, + "caller_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Caller Id", + "description": "Caller ID to display (optional, uses system default if not provided)", + "example": "+1987654321" + }, + "context": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Context", + "description": "Additional call context metadata", + "example": { + "customer_id": "cust_12345", + "department": "support", + "priority": "high", + "source": "web_portal" + } + } + }, + "type": "object", + "required": [ + "target_number" + ], + "title": "CallInitiateRequest", + "description": "Request model for initiating a call.", + "example": { + "caller_id": "+1987654321", + "context": { + "customer_id": "cust_12345", + "department": "support" + }, + "target_number": "+1234567890" + } + }, + "CallInitiateResponse": { + "properties": { + "call_id": { + "type": "string", + "title": "Call Id", + "description": "Unique call identifier", + "example": "call_abc12345" + }, + "status": { + "type": "string", + "title": "Status", + "description": "Current call status", + "example": "initiating" + }, + "target_number": { + "type": "string", + "title": "Target Number", + "description": "Target phone number", + "example": "+1234567890" + }, + "message": { + "type": "string", + "title": "Message", + "description": "Human-readable status message", + "example": "Call initiation requested" + } + }, + "type": "object", + "required": [ + "call_id", + "status", + "target_number", + "message" + ], + "title": "CallInitiateResponse", + "description": "Response model for call initiation.", + "example": { + "call_id": "call_abc12345", + "message": "Call initiation requested for +1234567890", + "status": "initiating", + "target_number": "+1234567890" + } + }, + "CallListResponse": { + "properties": { + "calls": { + "items": { + "$ref": "#/components/schemas/CallStatusResponse" + }, + "type": "array", + "title": "Calls", + "description": "List of calls" + }, + "total": { + "type": "integer", + "title": "Total", + "description": "Total number of calls matching criteria", + "example": 25 + }, + "page": { + "type": "integer", + "title": "Page", + "description": "Current page number (1-based)", + "default": 1, + "example": 1 + }, + "limit": { + "type": "integer", + "title": "Limit", + "description": "Number of items per page", + "default": 10, + "example": 10 + } + }, + "type": "object", + "required": [ + "calls", + "total" + ], + "title": "CallListResponse", + "description": "Response model for listing calls.", + "example": { + "calls": [ + { + "call_id": "call_abc12345", + "duration": 120, + "events": [], + "participants": [], + "status": "connected" + } + ], + "limit": 10, + "page": 1, + "total": 25 + } + }, + "CallStatusResponse": { + "properties": { + "call_id": { + "type": "string", + "title": "Call Id", + "description": "Unique call identifier", + "example": "call_abc12345" + }, + "status": { + "type": "string", + "enum": [ + "initiating", + "ringing", + "connected", + "on_hold", + "disconnected", + "failed" + ], + "title": "Status", + "description": "Current call status", + "example": "connected" + }, + "duration": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Duration", + "description": "Call duration in seconds (null if not connected)", + "example": 120 + }, + "participants": { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array", + "title": "Participants", + "description": "List of call participants", + "example": [ + { + "id": "participant_1", + "phone_number": "+1234567890", + "role": "caller", + "status": "connected" + } + ] + }, + "events": { + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array", + "title": "Events", + "description": "Recent call events", + "example": [ + { + "details": { + "connection_established": true + }, + "timestamp": "2025-08-10T13:45:30Z", + "type": "call_connected" + } + ] + } + }, + "type": "object", + "required": [ + "call_id", + "status" + ], + "title": "CallStatusResponse", + "description": "Response model for call status.", + "example": { + "call_id": "call_abc12345", + "duration": 120, + "events": [ + { + "details": { + "connection_established": true + }, + "timestamp": "2025-08-10T13:45:30Z", + "type": "call_connected" + } + ], + "participants": [ + { + "id": "participant_1", + "phone_number": "+1234567890", + "role": "caller", + "status": "connected" + } + ], + "status": "connected" + } + }, + "HTTPValidationError": { + "properties": { + "detail": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "type": "array", + "title": "Detail" + } + }, + "type": "object", + "title": "HTTPValidationError" + }, + "HealthResponse": { + "properties": { + "status": { + "type": "string", + "title": "Status", + "description": "Overall health status", + "example": "healthy" + }, + "version": { + "type": "string", + "title": "Version", + "description": "API version", + "default": "1.0.0", + "example": "1.0.0" + }, + "timestamp": { + "type": "number", + "title": "Timestamp", + "description": "Timestamp when check was performed", + "example": 1691668800.0 + }, + "message": { + "type": "string", + "title": "Message", + "description": "Human-readable status message", + "example": "Real-Time Audio Agent API v1 is running" + }, + "details": { + "additionalProperties": true, + "type": "object", + "title": "Details", + "description": "Additional health details", + "example": { + "api_version": "v1", + "service": "rtagent-backend" + } + }, + "active_sessions": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Active Sessions", + "description": "Current number of active realtime conversation sessions (None if unavailable)", + "example": 3 + }, + "session_metrics": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Session Metrics", + "description": "Optional granular session metrics (connected/disconnected, etc.)", + "example": { + "active": 3, + "connected": 5, + "disconnected": 2 + } + } + }, + "type": "object", + "required": [ + "status", + "timestamp", + "message" + ], + "title": "HealthResponse", + "description": "Health check response model.", + "example": { + "active_sessions": 3, + "details": { + "api_version": "v1", + "service": "rtagent-backend" + }, + "message": "Real-Time Audio Agent API v1 is running", + "session_metrics": { + "active": 3, + "connected": 5, + "disconnected": 2 + }, + "status": "healthy", + "timestamp": 1691668800.0, + "version": "1.0.0" + } + }, + "MediaSessionRequest": { + "properties": { + "call_connection_id": { + "type": "string", + "title": "Call Connection Id", + "description": "ACS call connection identifier", + "example": "call_12345" + }, + "sample_rate": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Sample Rate", + "description": "Audio sample rate in Hz", + "default": 16000, + "example": 16000 + }, + "channels": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Channels", + "description": "Number of audio channels", + "default": 1, + "example": 1 + }, + "audio_format": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Audio Format", + "description": "Audio format (pcm_16, pcm_24, opus, etc.)", + "default": "pcm_16", + "example": "pcm_16" + }, + "chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Chunk Size", + "description": "Audio chunk size in bytes", + "default": 1024, + "example": 1024 + }, + "enable_transcription": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Transcription", + "description": "Enable real-time transcription", + "default": true, + "example": true + }, + "enable_vad": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Enable Vad", + "description": "Enable voice activity detection", + "default": true, + "example": true + } + }, + "type": "object", + "required": [ + "call_connection_id" + ], + "title": "MediaSessionRequest", + "description": "Request schema for starting a media session.", + "example": { + "audio_format": "pcm_16", + "call_connection_id": "call_12345", + "channels": 1, + "chunk_size": 1024, + "enable_transcription": true, + "enable_vad": true, + "sample_rate": 16000 + } + }, + "MediaSessionResponse": { + "properties": { + "session_id": { + "type": "string", + "title": "Session Id", + "description": "Unique media session identifier", + "example": "media_session_123456" + }, + "websocket_url": { + "type": "string", + "title": "Websocket Url", + "description": "WebSocket URL for audio streaming", + "example": "wss://api.example.com/v1/media/stream/media_session_123456" + }, + "status": { + "type": "string", + "title": "Status", + "description": "Session status", + "example": "active" + }, + "created_at": { + "type": "string", + "title": "Created At", + "description": "Session creation timestamp", + "example": "2025-08-10T13:45:00Z" + }, + "configuration": { + "additionalProperties": true, + "type": "object", + "title": "Configuration", + "description": "Session configuration settings", + "example": { + "channels": 1, + "chunk_size": 1024, + "format": "pcm_16", + "sample_rate": 16000 + } + } + }, + "type": "object", + "required": [ + "session_id", + "websocket_url", + "status", + "created_at", + "configuration" + ], + "title": "MediaSessionResponse", + "description": "Response schema for media session creation.", + "example": { + "configuration": { + "channels": 1, + "chunk_size": 1024, + "format": "pcm_16", + "sample_rate": 16000 + }, + "created_at": "2025-08-10T13:45:00Z", + "session_id": "media_session_123456", + "status": "active", + "websocket_url": "wss://api.example.com/v1/media/stream/media_session_123456" + } + }, + "ReadinessResponse": { + "properties": { + "status": { + "type": "string", + "enum": [ + "ready", + "not_ready", + "degraded" + ], + "title": "Status", + "description": "Overall readiness status", + "example": "ready" + }, + "timestamp": { + "type": "number", + "title": "Timestamp", + "description": "Timestamp when check was performed", + "example": 1691668800.0 + }, + "response_time_ms": { + "type": "number", + "title": "Response Time Ms", + "description": "Total time taken for all checks in milliseconds", + "example": 45.2 + }, + "checks": { + "items": { + "$ref": "#/components/schemas/ServiceCheck" + }, + "type": "array", + "title": "Checks", + "description": "Individual component health checks" + }, + "event_system": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Event System", + "description": "Event system status information", + "example": { + "domains_count": 2, + "handlers_count": 7, + "is_healthy": true + } + } + }, + "type": "object", + "required": [ + "status", + "timestamp", + "response_time_ms", + "checks" + ], + "title": "ReadinessResponse", + "description": "Comprehensive readiness check response model.", + "example": { + "checks": [ + { + "check_time_ms": 12.5, + "component": "redis", + "details": "Connected to Redis successfully", + "status": "healthy" + }, + { + "check_time_ms": 8.3, + "component": "azure_openai", + "details": "Client initialized", + "status": "healthy" + } + ], + "event_system": { + "domains_count": 2, + "handlers_count": 7, + "is_healthy": true + }, + "response_time_ms": 45.2, + "status": "ready", + "timestamp": 1691668800.0 + } + }, + "RealtimeStatusResponse": { + "properties": { + "status": { + "type": "string", + "enum": [ + "available", + "degraded", + "unavailable" + ], + "title": "Status", + "description": "Current service status", + "example": "available" + }, + "websocket_endpoints": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "title": "Websocket Endpoints", + "description": "Available WebSocket endpoints", + "example": { + "conversation": "/api/v1/realtime/conversation", + "dashboard_relay": "/api/v1/realtime/dashboard/relay" + } + }, + "features": { + "additionalProperties": { + "type": "boolean" + }, + "type": "object", + "title": "Features", + "description": "Supported features and capabilities", + "example": { + "conversation_streaming": true, + "dashboard_broadcasting": true, + "orchestrator_support": true, + "session_management": true + } + }, + "active_connections": { + "additionalProperties": { + "type": "integer" + }, + "type": "object", + "title": "Active Connections", + "description": "Current active connection counts", + "example": { + "conversation_sessions": 0, + "dashboard_clients": 0 + } + }, + "protocols_supported": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Protocols Supported", + "description": "Supported communication protocols", + "default": [ + "WebSocket" + ], + "example": [ + "WebSocket" + ] + }, + "version": { + "type": "string", + "title": "Version", + "description": "API version", + "default": "v1", + "example": "v1" + } + }, + "type": "object", + "required": [ + "status", + "websocket_endpoints", + "features", + "active_connections" + ], + "title": "RealtimeStatusResponse", + "description": "Response schema for realtime service status endpoint.\n\nProvides comprehensive information about the realtime communication\nservice including availability, features, and active connections." + }, + "ServiceCheck": { + "properties": { + "component": { + "type": "string", + "title": "Component", + "description": "Name of the component being checked", + "example": "redis" + }, + "status": { + "type": "string", + "enum": [ + "healthy", + "unhealthy", + "degraded" + ], + "title": "Status", + "description": "Health status of the component", + "example": "healthy" + }, + "check_time_ms": { + "type": "number", + "title": "Check Time Ms", + "description": "Time taken to perform the check in milliseconds", + "example": 12.5 + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error", + "description": "Error message if check failed", + "example": "Connection timeout" + }, + "details": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Details", + "description": "Additional details about the check", + "example": "Connected to Redis successfully" + } + }, + "type": "object", + "required": [ + "component", + "status", + "check_time_ms" + ], + "title": "ServiceCheck", + "description": "Individual service check result.", + "example": { + "check_time_ms": 12.5, + "component": "redis", + "details": "Connected to Redis successfully", + "status": "healthy" + } + }, + "ValidationError": { + "properties": { + "loc": { + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + }, + "type": "array", + "title": "Location" + }, + "msg": { + "type": "string", + "title": "Message" + }, + "type": { + "type": "string", + "title": "Error Type" + } + }, + "type": "object", + "required": [ + "loc", + "msg", + "type" + ], + "title": "ValidationError" + } + } + } +} \ No newline at end of file diff --git a/docs/api/overview.md b/docs/api/overview.md deleted file mode 100644 index fd203afa..00000000 --- a/docs/api/overview.md +++ /dev/null @@ -1,88 +0,0 @@ -# API Overview - -The Real-Time Voice Agent provides a comprehensive set of APIs for building voice-enabled applications with Azure Cognitive Services. - -## Core Components - -### Speech Synthesis -- **[SpeechSynthesizer](speech-synthesis.md)** - Text-to-speech engine with neural voices -- Multiple output formats: audio files, streaming frames, speaker playback -- Advanced voice control with SSML, styles, and prosody -- Intelligent environment detection for headless deployments - -### Speech Recognition -- **[StreamingSpeechRecognizer](speech-recognition.md)** - Real-time speech-to-text -- Continuous recognition with minimal latency -- Language detection and speaker diarization -- Neural audio processing for improved accuracy - -### Utilities -- **[Text Processing](utilities.md)** - Sentence splitting and language optimization -- **[SSML Generation](utilities.md)** - Advanced markup for voice control -- **[Authentication](utilities.md)** - Azure credential management - -## Authentication - -All components support flexible authentication: - -```python -# API Key (development/testing) -synthesizer = SpeechSynthesizer( - key="your-speech-key", - region="eastus" -) - -# Managed Identity (production) -synthesizer = SpeechSynthesizer( - region="eastus" # Uses DefaultAzureCredential -) -``` - -## Environment Variables - -Configure services using environment variables: - -```bash -# Required for API key authentication -AZURE_SPEECH_KEY=your-subscription-key -AZURE_SPEECH_REGION=eastus - -# Required for managed identity -AZURE_SPEECH_RESOURCE_ID=/subscriptions/.../resourceGroups/.../providers/Microsoft.CognitiveServices/accounts/... - -# Optional configuration -AZURE_SPEECH_ENDPOINT=https://custom-endpoint.cognitiveservices.azure.com -TTS_ENABLE_LOCAL_PLAYBACK=true -``` - -## Observability - -Built-in OpenTelemetry support for production monitoring: - -```python -# Enable distributed tracing -synthesizer = SpeechSynthesizer( - region="eastus", - enable_tracing=True, - call_connection_id="session-12345" -) - -# All operations automatically traced -audio = synthesizer.synthesize_speech("Hello world") -``` - -## Error Handling - -Robust error handling with graceful degradation: - -- Authentication failures with clear error messages -- Network timeouts with automatic retry logic -- Audio hardware unavailable (headless environments) -- Service quota limits and rate limiting - -## Performance Considerations - -- **Concurrent synthesis limiting** - Built-in semaphore prevents service overload -- **Credential caching** - Automatic token refresh and credential reuse -- **Lazy initialization** - Audio components created only when needed -- **Memory efficiency** - Streaming operations minimize memory usage diff --git a/docs/api/speech-synthesis.md b/docs/api/speech-synthesis.md deleted file mode 100644 index d3694a24..00000000 --- a/docs/api/speech-synthesis.md +++ /dev/null @@ -1,105 +0,0 @@ -# Speech Synthesis API - -The `SpeechSynthesizer` class provides comprehensive text-to-speech capabilities using Azure Cognitive Services. This page documents all public methods and their usage. - -## SpeechSynthesizer Class - -::: src.speech.text_to_speech.SpeechSynthesizer - options: - show_source: true - show_signature_annotations: true - separate_signature: true - merge_init_into_class: true - docstring_section_style: table - members_order: source - group_by_category: true - show_category_heading: true - filters: - - "!^_" # Hide private methods except __init__ - - "^__init__$" # But show __init__ - -## Utility Functions - -### Text Processing - -::: src.speech.text_to_speech.split_sentences - options: - show_source: true - show_signature_annotations: true - -::: src.speech.text_to_speech.auto_style - options: - show_source: true - show_signature_annotations: true - -### SSML Generation - -::: src.speech.text_to_speech.ssml_voice_wrap - options: - show_source: true - show_signature_annotations: true - -## Examples - -### Basic Usage - -```python -from src.speech.text_to_speech import SpeechSynthesizer - -# Initialize with API key -synthesizer = SpeechSynthesizer( - key="your-speech-key", - region="eastus", - voice="en-US-JennyMultilingualNeural" -) - -# Synthesize to memory -audio_data = synthesizer.synthesize_speech( - "Hello, welcome to our voice application!", - style="chat", - rate="+10%" -) -``` - -### Advanced Configuration - -```python -# Production configuration with managed identity -synthesizer = SpeechSynthesizer( - region="eastus", # Uses managed identity when key=None - language="en-US", - voice="en-US-AriaNeural", - playback="never", # Headless deployment - enable_tracing=True, - call_connection_id="session-abc123" -) - -# Generate streaming frames for real-time applications -frames = synthesizer.synthesize_to_base64_frames( - "This is real-time audio streaming", - sample_rate=16000, - style="chat" -) -``` - -### Speaker Playback - -```python -# Local audio playback (development/testing) -synthesizer = SpeechSynthesizer( - key="your-key", - region="eastus", - playback="auto" # Only plays if speakers available -) - -# Speak text directly through speakers -synthesizer.start_speaking_text( - "This will play through your speakers", - voice="en-US-JennyNeural", - rate="+15%", - style="excited" -) - -# Stop playback if needed -synthesizer.stop_speaking() -``` diff --git a/docs/architecture/README.md b/docs/architecture/README.md new file mode 100644 index 00000000..f98ff27f --- /dev/null +++ b/docs/architecture/README.md @@ -0,0 +1,137 @@ +# :material-cube-outline: Architecture Overview + +!!! abstract "Real-Time Voice AI Accelerator" + Azure Communication Services voice agent accelerator with modular AI agents, real-time audio processing, and enterprise deployment patterns. + +## :material-view-dashboard: Core Capabilities + +| Feature | What's Included | Purpose | +|---------|-----------------|---------| +| **Real-time Audio** | ACS + Speech Services integration | Voice conversation processing | +| **AI Agent Framework** | Modular, swappable agent system | Industry-specific implementations | +| **Intelligent Barge-in** | Voice activity detection patterns | Natural conversation flow | +| **Serverless Scaling** | Container Apps with auto-scaling | Cost-effective, elastic hosting | +| **Development Ready** | Public endpoints with managed identity | Quick deployment and testing | + +!!! abstract "Deployment Architecture Options" + **Current Terraform**: Container Apps with public endpoints for rapid development + + **Available Bicep**: Enterprise production architecture with API Management, and private networking. (Advanced, WIP) + +## :material-cloud-outline: Deployment Architecture +=== "🏗️ Simplified Azure Production" + **Streamlined deployment with Container Apps and public endpoints** + + ![Simplified Production Architecture](../assets/RTAudio.v0.png) + + *Current Terraform deployment with Container Apps, AI Foundry, and public endpoints. App Gateway, APIM, and private networking are intentionally excluded to maintain simplicity and flexibility for rapid development.* + +=== "🔧 Detailed Component View" + **Agent framework and processing pipeline architecture** + + ![Agent Architecture](../assets/ARTAgentarch.png) + + *Detailed view of the agent orchestration, processing components, and data flow patterns within the simplified production architecture.* + +=== "🎙️ Voice Live Orchestration" + **Real-time voice processing with live orchestration** + + ![Voice Live API](../assets/LIVEVOICEApi.png) + + *Voice live orchestration architecture showing real-time audio processing, conversation management, and agent coordination patterns.* + +!!! note "Infrastructure Deployment Approach" + The Terraform deployment intentionally excludes App Gateway, API Management, and private networking to provide a **malleable foundation** that consumers can extend based on their specific requirements. Production enterprise features are available through separate Bicep templates. + +*Azure infrastructure with Container Apps, AI Foundry, and public endpoints* + +!!! info "Microsoft Learn Resources" + - **[Azure Communication Services](https://learn.microsoft.com/en-us/azure/communication-services/overview)** - Core platform + - **[Audio Streaming Concepts](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/audio-streaming-concept)** - Real-time media + - **[Container Apps](https://learn.microsoft.com/en-us/azure/container-apps/overview)** - Serverless hosting + +!!! warning "Current Terraform Deployment" + **Simplified Public Infrastructure** - The Terraform deployment creates a streamlined development-focused architecture with public endpoints and Container Apps hosting. Advanced features like API Management, AI Gateway, private networking, and Application Gateway are available in the Bicep templates for production scenarios. + +## :material-network: Key Infrastructure Components + +=== "Core Services (Terraform Deployed)" + **Container Apps Environment:** + + - **Auto-scaling** - KEDA-based scaling for frontend and backend containers + - **Public Ingress** - External endpoints for development and testing + - **Managed Identity** - Azure AD authentication across all services + - **Application Insights** - Centralized logging and monitoring + + **AI Services:** + + - **Azure AI Foundry** - LLM Model hosting, unified resource for Speech/Cognitive Services + + + **Data Layer:** + + - **Cosmos DB (MongoDB API)** - Session and conversation storage + - **Redis Enterprise** - High-performance caching with RBAC + - **Storage Account** - Audio files and prompt storage + - **Key Vault** - Secure secret management + +=== "Production Extensions (Bicep Available)" + **Advanced Networking:** + + - Hub-spoke VNet topology with private endpoints + - Application Gateway with WAF protection + - NSG rules and traffic control + + **API Management & AI Gateway:** + + - Token management and PTU optimization + - Load balancing and cost analytics + - Content safety and multi-region routing + + !!! info "Deployment Comparison" + **Terraform**: Streamlined development infrastructure with public endpoints and Container Apps + + **Bicep**: Enterprise-grade production architecture with private networking, API Gateway, and Application Gateway + + **Microsoft Learn References:** + + - **[Container Apps Architecture](https://learn.microsoft.com/en-us/azure/container-apps/overview)** - Serverless hosting patterns + - **[AI Gateway Architecture](https://learn.microsoft.com/en-us/ai/playbook/technology-guidance/generative-ai/dev-starters/genai-gateway/reference-architectures/apim-based)** - Advanced API management (Bicep only) + - **[Private Endpoint Integration](https://learn.microsoft.com/en-us/azure/container-apps/networking)** - Network security patterns (Bicep only) + +## :material-compass: Architecture Deep Dives + +| Document | Focus | What You'll Learn | +|----------|-------|-------------------| +| **[LLM Orchestration](llm-orchestration.md)** | AI routing and conversation management | Multi-agent coordination, dependency injection patterns, orchestrator design | +| **[Speech Recognition](speech-recognition.md)** | Real-time STT processing | Azure Speech integration, WebSocket handling, and transcription accuracy | +| **[Speech Synthesis](speech-synthesis.md)** | Dynamic TTS generation | Low-latency audio synthesis, voice font customization, and output streaming | +| **[ACS Call Flows](acs-flows.md)** | Three-thread voice processing | Real-time audio handling, WebSocket patterns, media lifecycle | +| **[Data Flows](data-flows.md)** | Storage and caching patterns | State management, Redis coordination, Cosmos DB persistence | +| **[Integrations](integrations.md)** | Cross-cloud connectivity | External service patterns, authentication flows | + +## :material-rocket: Quick Start Paths + +=== "👩‍💻 Developers" + 1. **[Getting Started](../getting-started/README.md)** - Environment setup and prerequisites + 2. **[Local Development](../getting-started/local-development.md)** - Run the accelerator locally + 3. **[API Reference](../api/README.md)** - Endpoints and WebSocket protocols + +=== "🏗️ Architects" + 1. **[Data Flow Patterns](data-flows.md)** - Storage strategies and state management + 2. **[Production Deployment](../deployment/production.md)** - Infrastructure and scaling + 3. **[Integrations Overview](integrations.md)** - External service connectivity + +=== "🔧 Operations" + 1. **[Monitoring Guide](../operations/monitoring.md)** - Application insights and observability + 2. **[Load Testing](../operations/load-testing.md)** - Performance validation and capacity planning + 3. **[Troubleshooting](../operations/troubleshooting.md)** - Issue resolution and debugging + +!!! info "Additional Resources" + For more comprehensive guidance on development and operations: + + - **[Repository Structure](../guides/repository-structure.md)** - Understand the codebase layout + - **[Utilities & Services](../guides/utilities.md)** - Core infrastructure components + - **[Deployment Guide](../deployment/README.md)** - Deploy the accelerator to Azure + +--- diff --git a/docs/architecture/acs-flows.md b/docs/architecture/acs-flows.md new file mode 100644 index 00000000..94a58bb8 --- /dev/null +++ b/docs/architecture/acs-flows.md @@ -0,0 +1,265 @@ +# :material-phone-in-talk: ACS Call Automation & Media Flows + +!!! abstract "Three-Thread Voice Processing Architecture" + Comprehensive architecture for Azure Communication Services (ACS) media handling, specifically designed for **real-time voice processing** with integrated **barge-in detection** capabilities. + +## :material-microsoft-azure: Azure Communication Services Integration + +!!! success "Enterprise Voice Processing" + Azure Speech SDK provides continuous speech recognition optimized for real-time conversations with sub-10ms barge-in detection. + +### :material-microphone: Speech Recognition Capabilities + +| :material-feature-search: Feature | :material-information: Description | :material-speedometer: Accelerator Focus | +|------------|-------------|-------------| +| **Real-time Processing** | Immediate partial and final result processing | Low-latency patterns | +| **Barge-in Detection** | Advanced voice activity detection for interruptions | Reference implementation | +| **Multiple Result Types** | Partial results for speed, final results for accuracy | Flexible processing modes | +| **Session Management** | Automatic session handling with connection recovery | Robust connection patterns | +| **Continuous Recognition** | Persistent speech-to-text processing | 24/7 operation templates | + +!!! info "Microsoft Learn Resources" + - **[Audio Streaming Quickstart](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/audio-streaming-quickstart)** - Server-side audio streaming implementation + - **[Call Automation SDK](https://learn.microsoft.com/en-us/azure/communication-services/quickstarts/call-automation/callflows-for-customer-interactions)** - Automated call routing solutions + - **[Media Access Overview](https://learn.microsoft.com/en-us/azure/communication-services/concepts/voice-video-calling/media-access)** - Real-time media stream processing patterns + - **[Speech to Text Service](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-to-text)** - Real-time speech recognition capabilities + - **[Real-time Speech Recognition](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text)** - Implementation patterns for continuous STT processing + - **[Bidirectional Audio Streaming](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/audio-streaming-concept)** - Two-way media streaming architecture + - **[WebSocket Audio Processing](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/audio-streaming-quickstart#handling-audio-streams-in-your-websocket-server)** - Real-time audio stream handling patterns + +## :material-sitemap: Three-Thread Processing Architecture + +!!! tip "Thread Separation Strategy" + The architecture separates concerns across three dedicated threads for optimal performance and reliability. + +```mermaid +graph TB + subgraph SpeechSDK["🎤 Speech SDK Thread"] + A1["Continuous Audio Recognition"] + A2["on_partial → Barge-in Detection"] + A3["on_final → Queue Speech Result"] + A1 --> A2 + A1 --> A3 + end + + subgraph RouteLoop["🔄 Route Turn Thread"] + B1["await speech_queue.get()"] + B2["Orchestrator Processing"] + B3["TTS Generation & Playback"] + B1 --> B2 --> B3 + end + + subgraph MainLoop["🌐 Main Event Loop"] + C1["WebSocket Media Handler"] + C2["Barge-in Response"] + C3["Task Cancellation"] + C1 --> C2 --> C3 + end + + %% Cross-thread communication + A2 -.->|"run_coroutine_threadsafe"| C2 + A3 -.->|"queue.put_nowait"| B1 + B3 -.->|"Task Reference"| C1 + C2 -.->|"cancel()"| B2 + + classDef speechStyle fill:#9B59B6,stroke:#6B3E99,stroke-width:2px,color:#FFFFFF + classDef routeStyle fill:#FF6B35,stroke:#E55100,stroke-width:2px,color:#FFFFFF + classDef mainStyle fill:#4A90E2,stroke:#2E5C8A,stroke-width:2px,color:#FFFFFF + + class A1,A2,A3 speechStyle + class B1,B2,B3 routeStyle + class C1,C2,C3 mainStyle +``` + +## Thread Responsibilities & Communication + +### Core Design Principles + +The three-thread architecture follows these key principles: + +#### 🎤 **Speech SDK Thread** - Never Blocks +- **Continuous audio recognition** using Azure Speech SDK +- **Immediate barge-in detection** via `on_partial` callbacks +- **Cross-thread communication** via `run_coroutine_threadsafe` +- **Performance**: < 10ms response time for barge-in detection + +#### 🔄 **Route Turn Thread** - Blocks Only on Queue +- **AI processing and response generation** through orchestrator +- **Queue-based serialization** of conversation turns +- **Safe cancellation** without affecting speech recognition +- **Performance**: Processes one turn at a time, can be cancelled + +#### 🌐 **Main Event Loop** - Never Blocks +- **WebSocket handling** for real-time media streaming +- **Task cancellation** for barge-in scenarios +- **Non-blocking coordination** between threads +- **Performance**: < 50ms for task cancellation and stop commands + +### Thread Performance Matrix + +| Thread | Primary Role | Blocking Behavior | Barge-in Role | Response Time | +|--------|--------------|-------------------|---------------|--------------| +| **Speech SDK** | Audio recognition | ❌ Never blocks | ✅ Detection | < 10ms | +| **Route Turn** | AI processing | ✅ Queue operations only | ❌ None | Variable | +| **Main Event** | WebSocket & coordination | ❌ Never blocks | ✅ Execution | < 50ms | + +## Implementation Flow + +### Barge-in Detection and Handling + +1. **User speaks during AI response**: + - `on_partial()` callback fires immediately (< 10ms) + - `ThreadBridge.schedule_barge_in()` schedules handler on main event loop + - `MainEventLoop.handle_barge_in()` cancels current processing + +2. **Task cancellation chain**: + ``` + on_partial() → schedule_barge_in() → cancel_current_processing() → send_stop_audio() + ``` + +3. **Speech finalization**: + - `on_final()` callback queues completed speech via `ThreadBridge.queue_speech_result()` + - `RouteTurnThread` picks up speech from queue + - New AI processing task created for response generation + +### Key Components + +#### ThreadBridge +Provides thread-safe communication between Speech SDK Thread and Main Event Loop: +- `schedule_barge_in()` - Schedules barge-in handler execution +- `queue_speech_result()` - Queues final speech for processing +- Uses `run_coroutine_threadsafe` and `asyncio.Queue` for safe cross-thread communication + +#### SpeechSDKThread +Manages Speech SDK in dedicated background thread: +- Pre-initializes `push_stream` to prevent audio data loss +- Never blocks on AI processing or network operations +- Provides immediate callback execution for barge-in detection + +#### RouteTurnThread +Handles AI processing in isolated thread: +- Blocks only on `speech_queue.get()` operations +- Processes speech through orchestrator +- Creates and manages TTS playback tasks + +#### MainEventLoop +Coordinates WebSocket operations and task management: +- Handles incoming media messages and audio data +- Manages barge-in interruption and task cancellation +- Never blocks to ensure real-time responsiveness +## 🔄 Non-Blocking Thread Communication Sequence + +```mermaid +sequenceDiagram + participant SpeechSDK as 🧵 Speech SDK Thread + participant MainLoop as 🧵 Main Event Loop + participant RouteLoop as 🧵 Route Turn Thread + participant ACS as 🔊 Azure Communication Services + participant User as 👤 User + + Note over SpeechSDK,User: 🎵 AI Currently Playing Audio + MainLoop->>ACS: 🔊 Streaming TTS Audio Response + ACS->>User: 🎵 Audio Playback Active + + rect rgba(255, 149, 0, 0.15) + Note over SpeechSDK,User: 🚨 USER SPEAKS (BARGE-IN EVENT) + User->>SpeechSDK: 🗣️ Audio Input (Partial Recognition) + + Note right of SpeechSDK: ⚡ IMMEDIATE ACTION
🚫 NO BLOCKING + SpeechSDK->>SpeechSDK: 🔍 on_partial() callback triggered + end + + rect rgba(255, 59, 48, 0.2) + Note over SpeechSDK,MainLoop: 🔗 CROSS-THREAD COMMUNICATION + SpeechSDK-->>MainLoop: 🚀 run_coroutine_threadsafe(_handle_barge_in_async) + Note right of SpeechSDK: ✅ Speech thread continues
NOT BLOCKED + + Note over MainLoop: 🛑 BARGE-IN HANDLER EXECUTES + MainLoop->>MainLoop: ❌ playback_task.cancel() + MainLoop->>MainLoop: 🧹 Clear route_turn_queue + MainLoop->>ACS: 🛑 Send StopAudio command + end + + rect rgba(52, 199, 89, 0.15) + ACS-->>User: 🔇 Audio Playback STOPPED + Note right of MainLoop: ✅ Previous AI response
cancelled cleanly + end + + rect rgba(0, 122, 255, 0.1) + Note over SpeechSDK,RouteLoop: 📝 USER CONTINUES SPEAKING + User->>SpeechSDK: 🗣️ Continues Speaking + SpeechSDK->>SpeechSDK: on_final() callback triggered + + Note over SpeechSDK,MainLoop: 🔗 FINAL RESULT COMMUNICATION + SpeechSDK-->>MainLoop: run_coroutine_threadsafe(_handle_final_async) + MainLoop->>MainLoop: route_turn_queue.put(final_text) + Note right of SpeechSDK: ✅ Speech thread continues
🚫 NOT BLOCKED + end + + rect rgba(102, 51, 153, 0.1) + Note over RouteLoop,ACS: 🤖 NEW AI PROCESSING + RouteLoop->>RouteLoop: 📥 queue.get() receives final_text + Note right of RouteLoop: ⏳ ONLY thread that blocks
🎯 Dedicated AI processing + + RouteLoop->>MainLoop: 🎵 Create new playback_task + MainLoop->>ACS: 🔊 Send New TTS Response + ACS->>User: 🎵 Play New AI Response + end + + Note over SpeechSDK,User: ✅ COMPLETE NON-BLOCKING CYCLE +``` + +### 🚀 Critical Non-Blocking Characteristics + +| Event | Thread Source | Target Thread | Blocking? | Communication Method | Response Time | +|-------|---------------|---------------|-----------|---------------------|---------------| +| **🚨 Barge-in Detection** | Speech SDK | Main Event Loop | ❌ NO | `run_coroutine_threadsafe` | < 10ms | +| **📋 Final Speech** | Speech SDK | Route Turn Thread | ❌ NO | `asyncio.Queue.put()` | < 5ms | +| **🎵 AI Processing** | Route Turn | Main Event Loop | ❌ NO | `asyncio.create_task` | < 1ms | +| **🛑 Task Cancellation** | Main Event Loop | Playback Task | ❌ NO | `task.cancel()` | < 1ms | + +> **🎯 Key Insight**: Only the **Route Turn Thread** blocks (on `queue.get()`), ensuring Speech SDK and Main Event Loop remain responsive for real-time barge-in detection. + +--- + +## Key Implementation Details + +This section provides **concrete implementation specifics** for developers working with the ACS Media Handler threading architecture. + +### 🚨 Barge-In Detection + +- **Trigger**: `on_partial` callback from Speech Recognizer detects user speech +- **Immediate Action**: Synchronous cancellation of `playback_task` using `asyncio.Task.cancel()` +- **Stop Signal**: Send `{"Kind": "StopAudio", "StopAudio": {}}` JSON command to ACS via WebSocket +- **Logging**: Comprehensive logging with emojis for real-time debugging + +### 🔄 Async Background Task Management + +- **Route Turn Queue**: Serializes final speech processing using `asyncio.Queue()` +- **Playback Task**: Tracks current AI response generation/playback with `self.playback_task` +- **Task Lifecycle**: Clean creation, cancellation, and cleanup of background tasks +- **Cancellation Safety**: Proper `try/except asyncio.CancelledError` handling + +### 🛑 Stop Audio Signal Protocol +```json +{ + "Kind": "StopAudio", + "AudioData": null, + "StopAudio": {} +} +``` +This JSON message is sent to ACS to immediately halt any ongoing audio playback. + +### ⚡ Error Handling & Resilience + +- **Event Loop Detection**: Graceful handling when no event loop is available +- **WebSocket Validation**: Connection state checks before sending messages +- **Task Cancellation**: Proper cleanup with `await task` after cancellation +- **Queue Management**: Full queue detection and message dropping strategies + +### 📊 Performance Optimizations + +- **Immediate Cancellation**: Barge-in triggers instant playback stop (< 50ms) +- **Background Processing**: Non-blocking AI response generation +- **Memory Management**: Proper task cleanup prevents memory leaks +- **Concurrent Safety**: Thread-safe queue operations for speech processing diff --git a/docs/DataArchitecture.md b/docs/architecture/data-flows.md similarity index 81% rename from docs/DataArchitecture.md rename to docs/architecture/data-flows.md index f7748824..e528ecdc 100644 --- a/docs/DataArchitecture.md +++ b/docs/architecture/data-flows.md @@ -1,12 +1,25 @@ -# ARTAgent Voice AI - Data Architecture & Redis Implementation +# :material-database-outline: Data Architecture & Flow Patterns -## Overview +!!! abstract "Three-Tier Data Architecture" + Sophisticated data architecture optimized for **real-time voice processing at scale** with hierarchical key organization, intelligent caching, and seamless data persistence for Azure Communication Services calls. -The ARTAgent Voice AI Backend employs a sophisticated three-tier data architecture optimized for real-time voice processing at scale. This system provides hierarchical key organization, intelligent caching, and seamless data persistence for Azure Communication Services (ACS) calls and conversation sessions. +## :material-chart-timeline-variant: Architecture Overview -## Three-Tier Data Architecture +!!! success "Performance-Optimized Storage Strategy" + The system employs a strategic **three-tier data storage hierarchy** optimized for different access patterns and performance requirements. -The system employs a strategic data storage hierarchy optimized for different access patterns and performance requirements: +### :material-layers: Storage Hierarchy + +| :material-speedometer: Tier | :material-timer: Access Time | :material-database: Use Cases | :material-chart-line: Capacity | +|------|-------------|-------------|------------| +| **🔥 Application Memory** | Microseconds | Active call state, audio buffers, real-time metrics | Limited by RAM | +| **⚡ Redis Enterprise** | Sub-second | Conversation context, session history, worker affinity | 10GB - 1TB | +| **📚 Cosmos DB** | 1-5 seconds | Persistent conversations, analytics, audit logs | Unlimited | + +!!! info "Microsoft Learn Resources" + - **[Azure Cache for Redis Overview](https://learn.microsoft.com/en-us/azure/azure-cache-for-redis/cache-overview)** - High-performance in-memory data store + - **[Azure Cosmos DB Use Cases](https://learn.microsoft.com/en-us/azure/cosmos-db/use-cases)** - NoSQL database for modern applications + - **[Azure Redis Key Scenarios](https://learn.microsoft.com/en-us/azure/azure-cache-for-redis/cache-overview#key-scenarios)** - Session store and caching patterns ```mermaid flowchart TD @@ -53,7 +66,7 @@ flowchart TD | Data Type | Memory | Redis | Cosmos | Access Pattern | Reasoning | |-----------|--------|-------|--------|----------------|-----------| -| **WebSocket Connections** | ✅ | ❌ | ❌ | 100+ ops/sec | Process-specific, ultra-low latency | +| **WebSocket Connections** | ✅ | ❌ | ❌ | High throughput | Process-specific, ultra-low latency | | **Audio Buffers** | ✅ | ❌ | ❌ | Real-time | High-frequency, temporary | | **Conversation Context** | ❌ | ✅ | ❌ | 10-50 ops/sec | Session persistence, shared workers | | **TTS Cache** | ❌ | ✅ | ❌ | Variable | Shared across calls, time-limited | @@ -78,6 +91,10 @@ flowchart TD - **Access**: 10-50 ops/second per call - **Recovery**: Critical, survives restarts +> **📚 Microsoft Learn Resources:** +> - [Azure Cache for Redis Reliability](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/azure-cache-redis/reliability) - Best practices for enterprise Redis deployment +> - [Redis Session Store Pattern](https://learn.microsoft.com/en-us/azure/azure-cache-for-redis/cache-overview#key-scenarios) - Session management and caching strategies + #### 💾 Cosmos DB (Permanent) - **Purpose**: Long-term storage, analytics, compliance - **Examples**: Call transcripts, user profiles, audit logs @@ -85,6 +102,10 @@ flowchart TD - **Access**: 1-10 ops/minute for active calls - **Recovery**: Permanent system of record +> **📚 Microsoft Learn Resources:** +> - [Global Data Distribution with Cosmos DB](https://learn.microsoft.com/en-us/azure/cosmos-db/distribute-data-globally) - Multi-region replication and consistency +> - [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/overview) - Document storage with SQL-like queries + ## Redis Key Architecture ### Hierarchical Key Structure @@ -186,33 +207,20 @@ sequenceDiagram - **Monitor TTL expiration** for critical session data - **Use async operations** throughout for non-blocking performance -### Environment Configuration -```yaml -# Production -ENVIRONMENT: prod -REDIS_HOST: redis-cluster.region.cache.windows.net -REDIS_PORT: 6380 -REDIS_SSL: true -TTL_MULTIPLIER: 2.0 # Extended TTLs for production - -# Development -ENVIRONMENT: dev -REDIS_HOST: localhost -REDIS_PORT: 6379 -TTL_MULTIPLIER: 0.5 # Shorter TTLs for testing -``` ### Monitoring & Production Readiness #### Key Performance Metrics & Baselines **Latency Baselines (Production SLA)**: + - Redis operations: p99 < 5ms, p95 < 2ms - Memory-to-Redis persistence: < 10ms - Redis-to-Cosmos archival: < 100ms - End-to-end call setup: < 500ms **Operational Metrics**: + - Key creation rate: Monitor spikes (>1000 keys/min indicates issues) - TTL distribution: Alert on keys without TTL (potential memory leaks) - Memory usage: Alert at 70% Redis memory capacity @@ -232,7 +240,7 @@ TTL_MULTIPLIER: 0.5 # Shorter TTLs for testing ```yaml # P0 Critical Alert Thresholds redis_availability: - threshold: "availability < 99.9% for 30 seconds" + threshold: "availability degradation for 30 seconds" escalation: "page on-call immediately" memory_pressure: @@ -253,6 +261,7 @@ performance_degradation: #### Security & Compliance Framework **Infrastructure Security**: + - **Network**: Private endpoints, VNet integration, no public Redis access - **Authentication**: Azure Managed Identity for service-to-service auth - **Encryption**: TLS 1.3 in transit, customer-managed keys at rest @@ -301,12 +310,13 @@ async def store_conversation_context(context: dict): ``` 2. **Security Incident Response**: - - Immediate: Rotate Redis access keys - - Within 1 hour: Audit all active sessions - - Within 24 hours: Security assessment report - - Within 72 hours: Compliance notification if required + - Immediate: Rotate Redis access keys + - Within 1 hour: Audit all active sessions + - Within 24 hours: Security assessment report + - Within 72 hours: Compliance notification if required + +#### Capacity Planning -**Capacity Planning**: - **Memory**: Scale Redis cluster at 70% utilization - **Connections**: Monitor connection pool metrics, scale at 80% - **Throughput**: Baseline 10K ops/sec per shard, alert on degradation @@ -331,6 +341,7 @@ async def comprehensive_health_check(): ``` **Production Deployment Checklist**: + - [ ] Redis cluster provisioned with HA configuration - [ ] Private endpoints configured (no public access) - [ ] Monitoring dashboards deployed (Azure Monitor + custom metrics) diff --git a/docs/architecture/integrations.md b/docs/architecture/integrations.md new file mode 100644 index 00000000..3ac2eaec --- /dev/null +++ b/docs/architecture/integrations.md @@ -0,0 +1,139 @@ +# :material-phone-dial: Telephony Integration with Existing IVR Systems + +!!! abstract "Connecting Your Contact Center to Azure" + This guide provides a detailed architecture for integrating an existing on-premises or cloud-based telephony system (PBX, IVR, Contact Center) with this accelerator. By leveraging **Azure Communication Services (ACS) Direct Routing** and a certified **Session Border Controller (SBC)**, you can seamlessly route calls to the AI voice agent while preserving your existing carrier relationships and infrastructure. + +--- + +## :material-lan-connect: Core Concept: ACS Direct Routing + +**ACS Direct Routing** is the key technology that enables a "Bring Your Own Carrier" (BYOC) model. It allows you to connect your own telephony trunks to Azure Communication Services through a certified SBC. This is the ideal pattern for enterprises that want to augment their existing contact center with Azure's advanced AI and voice capabilities without replacing their entire telephony infrastructure. + +!!! info "Official Microsoft Documentation" + - **[Azure Communication Services Direct Routing](https://learn.microsoft.com/en-us/azure/communication-services/concepts/telephony/direct-routing-provisioning)** + - **[Infrastructure Requirements for Direct Routing](https://learn.microsoft.com/en-us/azure/communication-services/concepts/telephony/direct-routing-infrastructure)** + - **[List of Certified Session Border Controllers (SBCs)](https://learn.microsoft.com/en-us/azure/communication-services/concepts/telephony/certified-session-border-controllers)** + +--- + +## :material-sitemap: Integration Architecture & Call Flow + +The following diagram illustrates how a call is routed from an existing IVR system to the Azure-based voice agent and potentially back to a human agent queue. + +```mermaid +graph LR + subgraph YourInfra [Your Infrastructure] + PSTN[📞 PSTN] + Carrier[🏢 Telecom Carrier] + IVR[🤖 IVR / Contact Center] + PSTN --> Carrier + Carrier --> IVR + end + + subgraph DMZEdge [DMZ / Network Edge] + SBC[🛡️ Session Border Controller] + end + + subgraph AzureCloud [Azure Cloud] + ACS[🌐 ACS Direct Routing] + AgentBackend[📱 Voice Agent Backend] + ACS -->|3 Webhook Event| AgentBackend + AgentBackend -->|4 WebSocket Media| ACS + end + + IVR -->|1 SIP Transfer| SBC + SBC -->|2 SIP over TLS| ACS + AgentBackend -->|5 Initiate Transfer| ACS + ACS -->|6 Route back| SBC + SBC -->|7 Transfer to Queue| IVR + + classDef yourinfra fill:#f9f9f9,stroke:#333,stroke-width:2px + classDef azurecloud fill:#eaf5ff,stroke:#0078d4,stroke-width:2px + class IVR,Carrier,PSTN yourinfra + class ACS,AgentBackend azurecloud +``` + +### Call Flow Steps +1. **Initial Call & IVR Handling:** A customer calls a number that routes to your existing carrier and is answered by your current IVR or contact center platform. +2. **Transfer to AI Agent:** Based on a menu selection or business logic, the IVR decides to transfer the call to the AI voice agent. It initiates a SIP transfer (INVITE) to a pre-configured number that points to the SBC. Custom SIP headers can be added here to pass context (e.g., customer ID, reason for call). +3. **SBC to ACS Routing:** The SBC receives the SIP INVITE, validates it, and forwards it securely over TLS to the ACS Direct Routing interface. +4. **ACS to Voice Agent Backend:** ACS receives the call and triggers a webhook (`IncomingCall`) to the voice agent backend. The backend answers the call and establishes a real-time media stream over WebSockets. +5. **AI Conversation:** The voice agent backend processes the audio stream in real-time, using Azure Speech for transcription/synthesis and Azure OpenAI for responses. +6. **Escalation to Human:** If the AI agent determines a human is needed, the backend uses the ACS Call Automation SDK to initiate a transfer. This sends a SIP REFER message back to the original IVR/contact center via the SBC. +7. **Return to IVR/Agent Queue:** The SBC routes the call back to your contact center, placing the customer in a queue for a human agent, passing along any new context gathered by the AI. + +--- + +## :material-format-list-checks: Configuration Steps + +Integrating your existing telephony requires a few key configuration steps, primarily centered around the SBC and ACS. + +=== "Step 1: Prerequisites" + !!! warning "Before You Begin" + Ensure you have the following in place before attempting integration: + + - **A Certified SBC:** Your Session Border Controller must be on the [list of SBCs certified for ACS Direct Routing](https://learn.microsoft.com/en-us/azure/communication-services/concepts/telephony/certified-session-border-controllers). + - **Public IP and FQDN for SBC:** The SBC must have a public IP address and a Fully Qualified Domain Name (FQDN). + - **Publicly Trusted Certificate:** The FQDN for the SBC must have a valid, publicly signed TLS certificate. Wildcard certificates are supported. + - **Verified Domain:** You must add and verify the SBC's domain name within your Azure Communication Services resource. See [Validate Domain Ownership](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/telephony/domain-validation). + +=== "Step 2: Connect the SBC to ACS" + Once the prerequisites are met, you connect your SBC to ACS. This process pairs the SBC with your ACS resource, making it a valid gateway for SIP traffic. + + 1. **Add the SBC in Azure:** In the Azure portal, navigate to your Communication Services resource and select **Direct routing** under "Voice Calling - PSTN". Add your SBC's FQDN and signaling port. + 2. **Configure Voice Routes:** Create outbound voice routing rules that determine how calls are sent. For a simple setup, you can create a rule that sends all calls to your newly added SBC. + 3. **Verify Connection:** After configuration, the SBC status should appear as "Online" in the Azure portal. This is verified by a successful exchange of SIP OPTIONS messages between ACS and your SBC. + + !!! tip "Troubleshooting SBC Connectivity" + If the SBC does not come online, refer to the official [SBC Connectivity Issues Troubleshooting Guide](https://learn.microsoft.com/en-us/azure/communication-services/concepts/telephony/monitoring-troubleshooting-telephony/troubleshoot-tls-certificate-sip-options). Common issues relate to TLS certificates or firewall misconfigurations. + +=== "Step 3: Contextual Call Transfer (IVR to AI)" + To make the handoff from your IVR to the AI agent intelligent, you need to pass contextual data. This is typically done using custom SIP headers. + + - **In your IVR/PBX:** When initiating the transfer to the SBC, add custom SIP headers to the INVITE message. A common practice is to use `X-` prefixed headers. + ```sip + INVITE sip:+18005551234@sbc.yourcompany.com SIP/2.0 + ... + X-Customer-ID: 12345 + X-Transfer-Reason: BillingInquiry + ... + ``` + - **In the Voice Agent Backend:** The ACS Call Automation SDK delivers these headers to your application as part of the `IncomingCall` event payload. You can access them to inform the agent's initial greeting or actions. + ```python + # Example in your event handler + if event.type == "Microsoft.Communication.IncomingCall": + call_connection_id = event.data.get("callConnectionId") + custom_headers = event.data.get("customHeaders", {}) + customer_id = custom_headers.get("X-Customer-ID") + + # Use customer_id to fetch customer data before answering + await answer_call_with_context(call_connection_id, customer_id) + ``` + +=== "Step 4: Escalation & Transfer (AI to Human)" + When the AI agent needs to escalate to a human, it uses the `transfer` action from the ACS Call Automation SDK. + + - The target of the transfer is a phone number corresponding to a human agent queue in your original contact center. + - You can again pass context, this time from the AI conversation, back to the contact center using custom SIP headers. + + ```python + # Example of transferring a call back to a human agent queue + from azure.communication.callautomation import CallAutomationClient, SipHeaders + + # ... inside your agent logic + + target_pstn_number = "+18005559876" # Your human agent queue number + + custom_context = SipHeaders( + custom_headers={ + "X-AI-Summary": "Customer confirmed identity and wants to dispute a charge." + } + ) + + await call_automation_client.transfer_call( + call_connection_id=call_connection_id, + target_participant=PhoneNumberIdentifier(target_pstn_number), + custom_context=custom_context + ) + ``` + This sends a SIP REFER message back through the SBC, instructing your telephony system to route the call to the specified number, with the AI-generated summary included in the SIP headers for your agent desktop to display. diff --git a/docs/architecture/llm-orchestration.md b/docs/architecture/llm-orchestration.md new file mode 100644 index 00000000..c1726c76 --- /dev/null +++ b/docs/architecture/llm-orchestration.md @@ -0,0 +1,297 @@ +# :material-brain: LLM Orchestration Architecture + +!!! abstract "Agent-Based Conversation Orchestration" + Two distinct orchestration approaches: **Custom Multi-Agent** with local dependency injection and **Voice Live API** with Azure AI Foundry-managed orchestration. + +## :material-select-group: Orchestration Approaches + +=== "🎯 Custom Multi-Agent (MEDIA/TRANSCRIPTION)" + **Local orchestration** with full developer control + + - **Orchestration**: Local dependency injection and agent registry + - **Configuration**: YAML-based agent definitions (ARTAgent + FoundryAgent) + - **Tools**: Custom function calling and business logic + - **Control**: Complete customization of conversation flow + - **Implementation**: Fully implemented with examples + +=== "⚡ Voice Live API (VOICE_LIVE)" + **Azure AI Foundry-managed orchestration** for simplified deployment + + !!! warning "Implementation Status" + Voice Live orchestration is **offloaded to Azure AI Foundry agents**. Local orchestration (dependency injection, agent registry) described in this document applies only to Custom Multi-Agent modes. + + **LVAgent integration** (see [`apps/rtagent/backend/src/agents/Lvagent/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents/Lvagent) directory) is **pending full implementation**. + + - **Orchestration**: Managed by Azure AI Foundry (not local) + - **Configuration**: Azure AI agent configurations + - **Tools**: Azure AI native capabilities + - **Control**: Configuration-driven through Azure portal + - **Implementation**: LVAgent framework in development + +## :material-sitemap: Dependency Injection Pattern + +!!! info "Scope: Custom Multi-Agent Orchestration Only" + The dependency injection, agent registry, and orchestration patterns described below apply **only to Custom Multi-Agent modes** (MEDIA/TRANSCRIPTION). + + **Voice Live API** orchestration is handled entirely by Azure AI Foundry agents - see [`apps/rtagent/backend/src/agents/Lvagent/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents/Lvagent) for the integration layer. + +**Simple Function-Based Orchestration:** + +```python title="apps/rtagent/backend/api/v1/dependencies/orchestrator.py" +def get_orchestrator() -> callable: + """FastAPI dependency provider for conversation orchestrator.""" + return route_conversation_turn + +async def route_conversation_turn(cm, transcript, ws, **kwargs): + """Route conversation through agent registry with error handling.""" + await route_turn(cm=cm, transcript=transcript, ws=ws, is_acs=True) +``` + +**Usage in Endpoints:** + +```python title="apps/rtagent/backend/api/v1/endpoints/media.py" +@router.websocket("/stream") +async def acs_media_stream(websocket: WebSocket): + orchestrator = get_orchestrator() # Inject orchestrator function + + handler = await _create_media_handler( + orchestrator=orchestrator, # Pass to handler + # ... other params + ) +``` + +**Plug-and-Play Orchestration:** + +```python title="Swappable Orchestration Strategies" +def get_orchestrator() -> callable: + # return route_conversation_turn # Default ARTAgent routing + # return route_turn_for_fnol # Insurance-specific routing + # return custom_conversation_handler # Custom business logic + return route_conversation_turn +``` + +## :material-cogs: Agent Configuration System + +### ARTAgent Framework (YAML-Driven) + +!!! example "Authentication Agent Configuration" + ```yaml title="apps/rtagent/backend/src/agents/artagent/agent_store/auth_agent.yaml" + agent: + name: AuthAgent + description: Handles caller authentication and routing + + model: + deployment_id: gpt-4o + temperature: 1 + max_completion_tokens: 2040 + + voice: + name: en-US-Ava:DragonHDLatestNeural + style: chat + rate: "+5%" # Slower for authentication clarity + + prompts: + path: voice_agent_authentication.jinja + + tools: + - authenticate_caller + - escalate_emergency + - escalate_human + ``` + +!!! example "Claims Intake Agent Configuration" + ```yaml title="apps/rtagent/backend/src/agents/artagent/agent_store/claim_intake_agent.yaml" + agent: + name: FNOLIntakeAgent + description: First Notice of Loss claim processing + + model: + deployment_id: gpt-4o + temperature: 0.60 + + voice: + name: en-US-Andrew2:DragonHDLatestNeural + rate: "+10%" # Faster for efficient data collection + + tools: + - record_fnol + - authenticate_caller + - escalate_emergency + - handoff_general_agent + ``` + +### FoundryAgent Framework (Instructions-Based) + +!!! example "Customer Service Agent Configuration" + ```yaml title="apps/rtagent/backend/src/agents/foundryagents/agent_store/customer_service_agent.yaml" + agent: + name: CustomerServiceAgent + instructions: | + Professional customer service agent for e-commerce company. + Help customers resolve inquiries quickly and accurately. + + model: + deployment_id: gpt-4o + + tools: + - check_order_status + - search_knowledge_base + - create_support_ticket + - escalate_to_human + ``` + +## :material-database: Agent Registry System + +**Dynamic Agent Registration:** + +```python title="apps/rtagent/backend/src/orchestration/artagent/registry.py" +# Registry for pluggable agents +_REGISTRY: Dict[str, AgentHandler] = {} + +def register_specialist(name: str, handler: AgentHandler) -> None: + """Register an agent handler under a name.""" + _REGISTRY[name] = handler + +def get_specialist(name: str) -> Optional[AgentHandler]: + """Lookup a registered agent handler.""" + return _REGISTRY.get(name) +``` + +**Agent Lookup Flow:** + +```python title="apps/rtagent/backend/src/orchestration/artagent/orchestrator.py" +async def route_turn(cm, transcript, ws, *, is_acs: bool): + # 1. Check active agent from memory + active_agent = cm.get_context("active_agent", "General") + + # 2. Get handler from registry + handler = get_specialist(active_agent) + + # 3. Execute specialized processing + if handler: + await handler(cm, transcript, ws, is_acs=is_acs) + else: + await fallback_handler(cm, transcript, ws, is_acs=is_acs) +``` + +## :material-tools: Tool Integration Patterns + +### ARTAgent Tools + +```python title="apps/rtagent/backend/src/agents/artagent/tool_store/auth.py" +async def authenticate_caller(caller_name: str, phone_number: str): + """Authenticate caller identity.""" + # Implementation for caller verification + pass + +async def escalate_emergency(reason: str, caller_name: str = None): + """Emergency escalation for 911-type situations.""" + # Implementation for emergency routing + pass +``` + +### FoundryAgent Tools + +```python title="apps/rtagent/backend/src/agents/foundryagents/tool_store/customer_support_tools.py" +async def check_order_status(order_id: str): + """Get real-time order information.""" + # Implementation for order lookup + pass + +async def create_support_ticket(issue_description: str, customer_info: dict): + """Create support ticket for complex issues.""" + # Implementation for ticket creation + pass +``` + +## :material-call-split: Orchestration Flow + +```mermaid +sequenceDiagram + participant WS as WebSocket + participant Orch as Orchestrator + participant Reg as Agent Registry + participant Agent as Specialized Agent + participant AI as Azure AI Foundry + + WS->>Orch: Audio → Transcript + Orch->>Reg: Lookup Active Agent + Reg-->>Orch: Return Handler + Orch->>Agent: Execute Agent Logic + Agent->>AI: LLM Request + Tools + AI-->>Agent: Response + Function Calls + Agent-->>WS: TTS Audio Response +``` + +## :material-compare: Mode Comparison + +| **Aspect** | **Custom Multi-Agent** | **Voice Live API** | +|------------|------------------------|--------------------| +| **Orchestration** | Local (this document) | Azure AI Foundry managed | +| **Configuration** | YAML agent definitions | Azure AI agent configs | +| **Dependency Injection** | FastAPI dependencies | Not applicable | +| **Agent Registry** | Local registry system | Azure AI managed | +| **Tool Integration** | Custom function calling | Azure AI native | +| **Agent Switching** | Dynamic via local registry | Azure AI routing | +| **Implementation** | Fully implemented | LVAgent integration pending | + +## :material-code-json: Configuration Examples + +### Environment Configuration + +```bash title="Orchestration Mode Selection" +# Multi-Agent Orchestration +export ACS_STREAMING_MODE=MEDIA +export ACS_STREAMING_MODE=TRANSCRIPTION + +# Voice Live API +export ACS_STREAMING_MODE=VOICE_LIVE +export VOICE_LIVE_AGENT_YAML="path/to/agent.yaml" +``` + +### Custom Agent Development + +```python title="Creating New Agents" +# 1. Create YAML configuration +# agents/custom/my_agent.yaml + +# 2. Implement agent handler +async def my_agent_handler(cm, utterance, ws, *, is_acs): + # Custom agent logic + pass + +# 3. Register with orchestrator +register_specialist("MyAgent", my_agent_handler) + +# 4. Set as active agent +cm.set_context("active_agent", "MyAgent") +``` + +## :material-link-variant: Integration Points + +### Custom Multi-Agent Integration Files: + +- **[`apps/rtagent/backend/api/v1/dependencies/orchestrator.py`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/apps/rtagent/backend/api/v1/dependencies/orchestrator.py)** - Dependency injection provider +- **[`apps/rtagent/backend/src/orchestration/artagent/orchestrator.py`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/apps/rtagent/backend/src/orchestration/artagent/orchestrator.py)** - Main routing logic +- **[`apps/rtagent/backend/src/orchestration/artagent/registry.py`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/apps/rtagent/backend/src/orchestration/artagent/registry.py)** - Agent registration system +- **[`apps/rtagent/backend/src/agents/artagent/agent_store/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents/artagent/agent_store)** - ARTAgent YAML configurations +- **[`apps/rtagent/backend/src/agents/foundryagents/agent_store/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents/foundryagents/agent_store)** - FoundryAgent YAML configurations +- **[`apps/rtagent/backend/src/agents/*/tool_store/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents)** - Function calling implementations + +### Voice Live API Integration (Pending): + +- **[`apps/rtagent/backend/src/agents/Lvagent/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents/Lvagent)** - LVAgent framework for Voice Live integration +- **[`apps/rtagent/backend/src/agents/Lvagent/factory.py`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/apps/rtagent/backend/src/agents/Lvagent/factory.py)** - Agent factory for Voice Live mode +- **[`apps/rtagent/backend/src/agents/Lvagent/agent_store/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/apps/rtagent/backend/src/agents/Lvagent/agent_store)** - Voice Live agent configurations + +!!! warning "Voice Live API Status" + LVAgent integration is **under development**. Current Voice Live mode uses basic passthrough to Azure AI Foundry. Full orchestration capabilities will be available when LVAgent implementation is complete. + +### Extension Patterns (Custom Multi-Agent Only): + +- **Custom Agents** - Add new YAML configs and register handlers +- **Tool Integration** - Extend tool registries with business logic +- **Orchestration Logic** - Modify routing strategies in orchestrator +- **Dependency Injection** - Swap orchestration functions in provider + +This architecture enables **rapid agent development** through YAML configuration while maintaining **full extensibility** through the registry and dependency injection patterns for Custom Multi-Agent modes. \ No newline at end of file diff --git a/docs/architecture/speech-recognition.md b/docs/architecture/speech-recognition.md new file mode 100644 index 00000000..853d99ac --- /dev/null +++ b/docs/architecture/speech-recognition.md @@ -0,0 +1,309 @@ +# Speech Recognition API + +The Real-Time Voice Agent integrates Azure Cognitive Speech Services through multiple API endpoints, each optimized for different interaction patterns and streaming modes. + +## API Integration Points + +### WebSocket Endpoints with STT Integration + +#### `/api/v1/media/stream` - ACS Media Streaming +Real-time speech recognition for Azure Communication Services calls: + +- **Handler**: `ACSMediaHandler` or `VoiceLiveHandler` based on `ACS_STREAMING_MODE` +- **STT Integration**: Pooled `StreamingSpeechRecognizerFromBytes` with three-thread architecture +- **Features**: Immediate barge-in detection, conversation memory, Azure OpenAI orchestration +- **Use Case**: Phone calls through Azure Communication Services + +```javascript +// Connect to ACS media streaming with speech recognition +const ws = new WebSocket( + `wss://api.domain.com/api/v1/media/stream?call_connection_id=${callId}` +); + +// Send audio frames for recognition +ws.send(base64AudioData); + +// Receive transcripts and AI responses +ws.onmessage = (event) => { + const data = JSON.parse(event.data); + if (data.type === 'transcript') { + console.log('Recognized:', data.text); + } +}; +``` + +#### `/api/v1/realtime/conversation` - Browser Voice Conversations +Speech recognition for web-based voice interactions: + +- **Handler**: Dedicated orchestrator with STT/TTS pooling +- **STT Integration**: Per-connection speech recognizer with partial/final callbacks +- **Features**: Session persistence, dashboard broadcasting, connection queuing +- **Use Case**: Browser-based voice conversations and testing + +```javascript +// Connect for browser-based speech recognition +const ws = new WebSocket( + `wss://api.domain.com/api/v1/realtime/conversation?session_id=${sessionId}` +); + +// Send audio bytes for real-time recognition +ws.send(audioBuffer); +``` + +## Core Speech Recognition Class + +All endpoints use the **`StreamingSpeechRecognizerFromBytes`** class for consistent speech processing: + +```python +from src.speech.speech_recognizer import StreamingSpeechRecognizerFromBytes + +# Initialized automatically by handlers based on endpoint +recognizer = StreamingSpeechRecognizerFromBytes( + speech_key="${AZURE_SPEECH_KEY}", # or DefaultAzureCredential + speech_region="eastus", + languages=["en-US", "es-ES"], + enable_diarization=True, +) + +# Callbacks are set by handlers for integration +async def handle_partial_result(text): + # Immediate barge-in detection for ACS calls + print("Partial (barge-in):", text) + +async def handle_final_result(text): + # Complete utterance for orchestrator processing + print("Final transcript:", text) + +recognizer.on_partial_result = handle_partial_result +recognizer.on_final_result = handle_final_result +``` + +## Handler-Specific Speech Recognition + +### ACS Media Handler (`ACSMediaHandler`) + +**Streaming Mode**: `MEDIA` or `TRANSCRIPTION` +**Endpoint**: `/api/v1/media/stream` + +Implements three-thread architecture for sub-50ms barge-in detection: + +```python +# Thread 1: Speech SDK Thread (never blocks) +def on_partial_callback(text: str, lang: str, speaker_id: str): + """Immediate barge-in detection - called from Speech SDK thread""" + # Schedule cancellation on main event loop + main_loop.call_soon_threadsafe(schedule_barge_in, text) + +def on_final_callback(text: str, lang: str): + """Queue final speech for processing - called from Speech SDK thread""" + # Thread-safe queue operation + speech_queue.put_nowait((text, lang)) + +# Thread 2: Route Turn Thread (blocks on queue only) +while True: + final_text, lang = await speech_queue.get() + # Process through orchestrator (may take seconds) + await route_turn(memory_manager, final_text, websocket) + +# Thread 3: Main Event Loop (never blocks) +async def schedule_barge_in(partial_text: str): + """Cancel current TTS playback immediately (< 50ms)""" + if playback_task and not playback_task.done(): + playback_task.cancel() + await send_stop_audio_to_acs() +``` + +**Key Features**: + +- **Immediate barge-in**: Partial results trigger instant TTS cancellation +- **Non-blocking recognition**: Speech SDK runs in dedicated thread +- **Queue-based processing**: Final results processed sequentially +- **Resource pooling**: Shared STT clients across ACS calls + +### Voice Live Handler (`VoiceLiveHandler`) + +**Streaming Mode**: `VOICE_LIVE` +**Endpoint**: `/api/v1/media/stream` + +Integrates with Azure Voice Live API for advanced conversation handling: + +```python +# Voice Live integration handles STT internally +voice_live_agent = build_lva_from_yaml(agent_config) +await voice_live_agent.connect() + +async def handle_audio_data(audio_base64: str): + """Send audio to Voice Live API""" + await voice_live_agent.send_audio(audio_base64) + +# Responses come back through Voice Live websocket +def on_voice_live_response(response): + """Handle AI response from Voice Live""" + await websocket.send_json({ + "type": "assistant_message", + "content": response.text, + "audio": response.audio_data + }) +``` + +**Key Features**: + +- **Azure Voice Live Integration**: Direct API connection to advanced conversational AI +- **Semantic Voice Activity**: Advanced voice activity detection beyond traditional VAD +- **Natural Conversations**: Maintains conversation context and flow +- **Emotion Detection**: Can detect and respond to emotional cues + +### Realtime Conversation Handler + +**Endpoint**: `/api/v1/realtime/conversation` + +Browser-based speech recognition with session persistence: + +```python +# Per-connection STT client with callback registration +stt_client = await stt_pool.acquire() + +def on_partial(text: str, lang: str, speaker_id: str): + """Handle partial results for barge-in""" + if websocket.state.is_synthesizing: + # Stop current TTS synthesis + websocket.state.tts_client.stop_speaking() + websocket.state.is_synthesizing = False + +def on_final(text: str, lang: str): + """Queue final text for orchestrator processing""" + websocket.state.user_buffer += text.strip() + "\n" + +stt_client.set_partial_result_callback(on_partial) +stt_client.set_final_result_callback(on_final) + +# Process accumulated text through orchestrator +if user_buffer.strip(): + await route_turn(memory_manager, user_buffer, websocket, is_acs=False) +``` + +**Key Features**: +- **Session Management**: Persistent conversation state across reconnections +- **Dashboard Integration**: Real-time updates to connected dashboard clients +- **Resource Pooling**: Dedicated STT/TTS clients per browser connection +- **Parallel Processing**: Background orchestration tasks for non-blocking responses + +## Configuration and Best Practices + +### Endpoint Selection + +**Use `/api/v1/media/stream`** when: +- Processing phone calls through Azure Communication Services +- Need sub-50ms barge-in detection for natural conversations +- Working with ACS call automation and media streaming +- Require three-thread architecture for production call centers + +**Use `/api/v1/realtime/conversation`** when: +- Building browser-based voice applications +- Need session persistence across page reloads +- Want dashboard integration and monitoring +- Developing voice-enabled web experiences + +### Authentication Options + +```python +# Option 1: Azure Entra ID (Recommended for production) +recognizer = StreamingSpeechRecognizerFromBytes( + speech_region="eastus", + use_default_credential=True, # Uses DefaultAzureCredential + enable_tracing=True +) + +# Option 2: API Key (Development/testing) +recognizer = StreamingSpeechRecognizerFromBytes( + speech_key=os.getenv("AZURE_SPEECH_KEY"), + speech_region="eastus", + enable_tracing=True +) +``` + +### Audio Format Requirements + +All endpoints expect **16 kHz, mono PCM** audio: + +```python +# Audio preprocessing for optimal recognition +SAMPLE_RATE = 16000 +CHANNELS = 1 +SAMPLE_WIDTH = 2 # 16-bit PCM + +# WebSocket audio streaming +audio_data = resample_audio(raw_audio, target_rate=16000) +base64_audio = base64.b64encode(audio_data).decode('utf-8') +websocket.send_text(base64_audio) +``` + +### Language and Feature Configuration + +```python +# Multi-language auto-detection +recognizer = StreamingSpeechRecognizerFromBytes( + speech_region="eastus", + languages=["en-US", "es-ES", "fr-FR"], # BCP-47 language codes + enable_diarization=True, # Speaker identification + enable_profanity_filter=True, # Content filtering + enable_detailed_results=True # Word-level timing +) +``` + +### Resource Pool Management + +The API uses connection pooling for optimal performance: + +```python +# STT Pool Configuration (managed by application) +STT_POOL_SIZE = 4 # Concurrent speech recognizers +TTS_POOL_SIZE = 4 # Concurrent synthesizers + +# Handlers automatically acquire/release pool resources +# No manual pool management required in client code +``` + +## Integration with State Management + +Speech recognition integrates with conversation memory: + +```python +# Automatic session persistence via MemoManager +memory_manager = MemoManager.from_redis(session_id, redis_mgr) + +# Speech recognition handlers automatically: +# 1. Load conversation history from Redis +# 2. Add recognized text to conversation context +# 3. Pass to orchestrator for response generation +# 4. Persist updated conversation state + +# Access conversation history +history = memory_manager.get_chat_history() +for entry in history: + print(f"{entry.role}: {entry.content}") +``` + +## Observability and Monitoring + +Speech recognition includes comprehensive tracing: + +```python +# OpenTelemetry spans automatically created for: +# - Speech recognition session lifecycle +# - Audio frame processing +# - Partial/final result callbacks +# - Handler routing and processing + +# Correlation with call connection IDs +recognizer.enable_tracing = True +recognizer.call_connection_id = "acs-call-123" # For ACS correlation + +# Custom attributes in spans include: +# - Speech SDK session IDs +# - Language detection results +# - Processing latencies +# - Error conditions and recovery +``` + +See **[Streaming Modes Documentation](streaming-modes.md)** for detailed configuration options and **[Speech Synthesis](speech-synthesis.md)** for TTS integration patterns. diff --git a/docs/architecture/speech-synthesis.md b/docs/architecture/speech-synthesis.md new file mode 100644 index 00000000..12a9d319 --- /dev/null +++ b/docs/architecture/speech-synthesis.md @@ -0,0 +1,237 @@ +# Speech Synthesis API + +The Real-Time Voice Agent provides enterprise-grade text-to-speech capabilities through the `SpeechSynthesizer` class, built on Azure Speech Services with comprehensive integration features. + +## Key Features + +- **Multiple authentication methods**: API Key and Azure Entra ID (Default Credentials) +- **Real-time synthesis**: Base64 frame streaming for WebSocket clients +- **Local speaker playback**: Intelligent headless environment detection +- **OpenTelemetry tracing**: Integration for Application Insights monitoring +- **Concurrent synthesis limiting**: Prevents service overload +- **Advanced voice control**: Neural styles, prosody, multilingual support + +## SpeechSynthesizer Class + +Located in `src/speech/text_to_speech.py`, the `SpeechSynthesizer` provides comprehensive text-to-speech functionality with Azure integration. + +### Authentication Methods + +#### Azure Entra ID (Recommended for Production) +```python +from src.speech.text_to_speech import SpeechSynthesizer + +# Uses DefaultAzureCredential - no API key required +synthesizer = SpeechSynthesizer( + region="eastus", + voice="en-US-JennyMultilingualNeural", + enable_tracing=True +) +``` + +#### API Key (Development/Testing) +```python +# Traditional API key authentication +synthesizer = SpeechSynthesizer( + key="your-speech-key", + region="eastus", + voice="en-US-AriaNeural" +) +``` + +### Basic Usage Examples + +#### Simple Text-to-Speech +```python +# Synthesize to memory +audio_data = synthesizer.synthesize_speech( + "Hello! Welcome to our voice application.", + style="chat", + rate="+10%" +) + +# Save to file +with open("output.wav", "wb") as f: + f.write(audio_data) +``` + +#### Real-time Streaming for WebSocket +```python +# Generate base64-encoded frames for streaming +frames = synthesizer.synthesize_to_base64_frames( + "This is real-time streaming audio", + sample_rate=16000 +) + +# Send frames to WebSocket client +for frame in frames: + websocket.send(frame) +``` + +#### Local Speaker Playback +```python +# Play audio through system speakers (if available) +synthesizer = SpeechSynthesizer( + key="your-key", + region="eastus", + playback="auto" # Automatic hardware detection +) + +# Speak text directly +synthesizer.start_speaking_text( + "This will play through your speakers!", + voice="en-US-AriaNeural", + style="excited" +) + +# Stop playback +import time +time.sleep(3) +synthesizer.stop_speaking() +``` + +### Advanced Configuration + +#### Production Setup with Managed Identity +```python +import os +from src.speech.text_to_speech import SpeechSynthesizer + +# Production configuration +synthesizer = SpeechSynthesizer( + region=os.getenv("AZURE_SPEECH_REGION"), + voice="en-US-JennyMultilingualNeural", + playback="never", # Headless deployment + enable_tracing=True, # OpenTelemetry monitoring + call_connection_id="session-abc123" # Correlation tracking +) + +# Validate configuration +if synthesizer.validate_configuration(): + print("✅ Speech synthesizer ready for production") +else: + print("❌ Configuration validation failed") +``` + +#### Voice Styles and Prosody Control +```python +# Advanced voice styling +audio = synthesizer.synthesize_speech( + "Production-ready voice synthesis", + voice="en-US-AriaNeural", + style="news", # Available: chat, cheerful, sad, angry, etc. + rate="+5%", # Speed adjustment + pitch="+2Hz", # Pitch control + volume="+10dB" # Volume adjustment +) +``` + +### Environment Configuration + +Required environment variables for production deployment: + +```bash +# Azure Speech Services +AZURE_SPEECH_REGION=eastus +AZURE_SPEECH_RESOURCE_ID=/subscriptions/.../resourceGroups/.../providers/Microsoft.CognitiveServices/accounts/... + +# Optional: Custom endpoint +AZURE_SPEECH_ENDPOINT=https://your-custom-endpoint.cognitiveservices.azure.com + +# Optional: Audio playback control +TTS_ENABLE_LOCAL_PLAYBACK=false # Set to false for headless environments +``` + +### Error Handling and Validation + +#### Configuration Validation +```python +# Test configuration before use +if synthesizer.validate_configuration(): + print('✅ Configuration is valid') + + # Test basic synthesis + audio_data = synthesizer.synthesize_speech("Hello, world!") + print(f'✅ Generated {len(audio_data)} bytes of audio') +else: + print('❌ Configuration validation failed') +``` + +#### Common Issues + +**Authentication Errors** +```bash +# Verify Azure credentials +az account show +az cognitiveservices account list +``` + +**Audio Hardware Issues** +```python +# Check headless environment detection +from src.speech.text_to_speech import _is_headless +print(f"Headless environment: {_is_headless()}") +``` + +**Import Errors** +```bash +# Ensure dependencies are installed +pip install azure-cognitiveservices-speech +python -c "import src.speech.text_to_speech; print('✅ Import successful')" +``` + +### OpenTelemetry Integration + +The `SpeechSynthesizer` includes built-in tracing for production monitoring: + +```python +# Enable comprehensive tracing +synthesizer = SpeechSynthesizer( + region="eastus", + enable_tracing=True, + call_connection_id="acs-call-123" # Correlation ID +) + +# All operations automatically traced with: +# - Session-level spans for complete request lifecycle +# - Service dependency mapping for Azure Monitor App Map +# - Call correlation across distributed components +``` + +### Performance Considerations + +- **Connection pooling**: Default limit of 4 concurrent synthesis operations +- **Memory efficiency**: Streaming operations with automatic resource cleanup +- **Lazy initialization**: Audio components initialized only when needed +- **Headless detection**: Automatic fallback for containerized environments + +### Integration with Container Apps + +For Azure Container Apps deployment, ensure proper configuration: + +```dockerfile +# Dockerfile example +FROM python:3.11-slim + +# Set environment for headless operation +ENV TTS_ENABLE_LOCAL_PLAYBACK=false +ENV AZURE_SPEECH_REGION=eastus + +# Install dependencies +COPY requirements.txt . +RUN pip install -r requirements.txt + +# Copy application +COPY src/ ./src/ +CMD ["python", "-m", "your_app"] +``` + +## API Integration + +The speech synthesis functionality integrates with the main API endpoints - see **[API Reference](../api/api-reference.md)** for complete endpoint documentation: + +- **Call Management** - TTS for outbound call prompts and conversation responses +- **Media Streaming** - Real-time TTS synthesis for ACS call conversations +- **Health Monitoring** - TTS service validation and voice testing + +For complete API documentation, see the [API Overview](../api/README.md). \ No newline at end of file diff --git a/docs/architecture/streaming-modes.md b/docs/architecture/streaming-modes.md new file mode 100644 index 00000000..d5e377da --- /dev/null +++ b/docs/architecture/streaming-modes.md @@ -0,0 +1,317 @@ +# ACS Streaming Modes Configuration + +The Real-Time Voice Agent supports multiple audio processing modes through the `ACS_STREAMING_MODE` configuration flag. This flag determines how audio data from Azure Communication Services (ACS) is processed, routed, and orchestrated within the application. + +## Overview + +The `ACS_STREAMING_MODE` environment variable controls the audio processing pipeline, allowing you to choose between different approaches for handling real-time audio streams from ACS calls: + +```bash +# Set the streaming mode +export ACS_STREAMING_MODE=media # Default: Traditional media processing +export ACS_STREAMING_MODE=transcription # ACS transcription-only mode +export ACS_STREAMING_MODE=voice_live # Azure Voice Live integration +``` + +## Available Streaming Modes + +### 1. MEDIA Mode (Default) +**Configuration:** `ACS_STREAMING_MODE=media` + +Traditional bidirectional media processing with comprehensive speech services integration. + +**Audio Flow:** +``` +ACS Call Audio ➜ WebSocket ➜ STT Pool ➜ Orchestrator ➜ TTS Pool ➜ ACS Audio Output +``` + +**Features:** +- **Bi-directional PCM audio streaming** directly to/from ACS WebSocket +- **Connection pooling** for Azure Speech STT/TTS services +- **Orchestrator integration** for conversational logic processing +- **Session management** with Redis-backed state persistence +- **Real-time transcription** with speaker diarization support +- **Neural voice synthesis** with style and prosody control + +**Use Cases:** +- Traditional voice assistants and IVR systems +- Call center automation with human handoff +- Multi-turn conversations requiring context preservation +- Applications needing fine-grained control over speech processing + +**Configuration Example:** +```python +# API automatically uses MEDIA mode handlers +if ACS_STREAMING_MODE == StreamMode.MEDIA: + # Acquire STT and TTS clients from pools + stt_client = await app.state.stt_pool.acquire() + tts_client = await app.state.tts_pool.acquire() + + # Create media handler with orchestrator + handler = ACSMediaHandler( + websocket=websocket, + orchestrator_func=orchestrator, + recognizer=stt_client, + memory_manager=memory_manager, + session_id=session_id + ) +``` + +### 2. TRANSCRIPTION Mode +**Configuration:** `ACS_STREAMING_MODE=transcription` + +Audio-to-text processing focused on real-time transcription and analysis. + +**Audio Flow:** +``` +ACS Call Audio ➜ WebSocket ➜ Azure Speech Recognition ➜ Transcript Processing +``` + +**Features:** +- **Real-time transcription** of ACS call audio streams +- **Multi-language detection** with configurable candidate languages +- **Speaker diarization** for multi-participant calls +- **Streaming text output** via WebSocket to connected clients +- **Minimal latency** optimized for live transcription needs +- **No audio synthesis** - transcription-only pipeline + +**Use Cases:** +- Call transcription and logging systems +- Real-time captioning for accessibility +- Voice analytics and sentiment analysis +- Meeting transcription and note-taking applications + +**Configuration Example:** +```python +# API routes to transcription handler +elif ACS_STREAMING_MODE == StreamMode.TRANSCRIPTION: + await handler.handle_transcription_message(audio_message) +``` + +### 3. VOICE_LIVE Mode +**Configuration:** `ACS_STREAMING_MODE=voice_live` + +Advanced conversational AI using Azure Voice Live for sophisticated dialogue management. + +**Audio Flow:** +``` +ACS Call Audio ➜ WebSocket ➜ Azure Voice Live Agent ➜ Direct Audio Response +``` + +**Features:** +- **Azure Voice Live integration** for advanced conversational AI +- **End-to-end audio processing** with minimal intermediate steps +- **Context-aware responses** using pre-trained conversation models +- **Low-latency interaction** optimized for natural conversation flow +- **Advanced orchestration** through Voice Live agents +- **Intelligent conversation management** with built-in dialogue state + +**Use Cases:** +- Advanced AI assistants with natural conversation flow +- Customer service automation with complex query handling +- Educational applications with interactive tutoring +- Healthcare applications with conversational interfaces + +**Pre-initialization Process:** +```python +# Voice Live agents are pre-initialized during call setup +if ACS_STREAMING_MODE == StreamMode.VOICE_LIVE: + # Create and connect Voice Live agent + agent_yaml = os.getenv("VOICE_LIVE_AGENT_YAML", + "apps/rtagent/backend/src/agents/Lvagent/agent_store/auth_agent.yaml") + lva_agent = build_lva_from_yaml(agent_yaml, enable_audio_io=False) + await asyncio.to_thread(lva_agent.connect) + + # Store agent for WebSocket session to claim later + await conn_manager.set_call_context(call_id, {"lva_agent": lva_agent}) +``` + +**Handler Integration:** +```python +# Voice Live handler with injected agent +handler = VoiceLiveHandler( + azure_endpoint=AZURE_VOICE_LIVE_ENDPOINT, + model_name=AZURE_VOICE_LIVE_MODEL, + session_id=session_id, + websocket=websocket, + orchestrator=orchestrator, + use_lva_agent=True, + lva_agent=injected_agent +) +``` + +### Validation and Error Handling + +The system includes comprehensive validation for streaming mode configuration: + +```python +# Enum-based validation with clear error messages +@classmethod +def from_string(cls, value: str) -> "StreamMode": + """Create StreamMode from string with validation""" + for mode in cls: + if mode.value == value: + return mode + raise ValueError( + f"Invalid stream mode: {value}. Valid options: {[m.value for m in cls]}" + ) +``` + +## API Integration + +### WebSocket Media Streaming + +The streaming mode affects how the media WebSocket endpoint processes audio: + +```python +@router.websocket("/stream") +async def acs_media_stream(websocket: WebSocket) -> None: + """WebSocket endpoint adapts behavior based on ACS_STREAMING_MODE""" + + # Create appropriate handler based on mode + handler = await _create_media_handler( + websocket=websocket, + call_connection_id=call_connection_id, + session_id=session_id, + orchestrator=orchestrator, + conn_id=conn_id + ) + + # Process messages according to mode + while connected: + msg = await websocket.receive_text() + + if ACS_STREAMING_MODE == StreamMode.MEDIA: + await handler.handle_media_message(msg) + elif ACS_STREAMING_MODE == StreamMode.TRANSCRIPTION: + await handler.handle_transcription_message(msg) + elif ACS_STREAMING_MODE == StreamMode.VOICE_LIVE: + await handler.handle_audio_data(msg) +``` + +### Status and Monitoring + +You can query the current streaming mode via the API: + +```bash +# Check current streaming configuration +curl https://your-api.com/api/v1/media/status + +# Response includes current mode +{ + "status": "available", + "streaming_mode": "voice_live", + "websocket_endpoint": "/api/v1/media/stream", + "features": { + "real_time_audio": true, + "transcription": true, + "orchestrator_support": true, + "session_management": true + } +} +``` + +## Performance Considerations + +### Resource Usage by Mode + +| Mode | STT Pool | TTS Pool | Voice Live Agent | Memory Usage | +|------|----------|----------|------------------|--------------| +| **MEDIA** | ✅ High | ✅ High | ❌ None | High | +| **TRANSCRIPTION** | ✅ Medium | ❌ None | ❌ None | Low | +| **VOICE_LIVE** | ❌ None | ❌ None | ✅ High | Medium | + +### Latency Characteristics + +- **MEDIA Mode**: 100-300ms (STT + Orchestrator + TTS pipeline) +- **TRANSCRIPTION Mode**: 50-150ms (STT only, no synthesis) +- **VOICE_LIVE Mode**: 200-400ms (End-to-end Voice Live processing) + +### Scaling Considerations + +```python +# Pool sizing recommendations by mode +MEDIA_MODE_POOLS = { + "stt_pool_size": 10, + "tts_pool_size": 10, + "max_concurrent_calls": 20 +} + +TRANSCRIPTION_MODE_POOLS = { + "stt_pool_size": 15, + "max_concurrent_calls": 50 # Lighter processing +} + +VOICE_LIVE_MODE_POOLS = { + "voice_live_pool_size": 5, # Resource intensive + "max_concurrent_calls": 10 +} +``` + +## Troubleshooting + +### Common Configuration Issues + +**Invalid Mode Error:** +```bash +ValueError: Invalid stream mode: invalid_mode. +Valid options: ['media', 'transcription', 'voice_live'] +``` +**Solution:** Check `ACS_STREAMING_MODE` environment variable spelling and case. + +**Voice Live Agent Not Found:** +```bash +RuntimeError: Voice Live agent YAML not found +``` +**Solution:** Ensure `VOICE_LIVE_AGENT_YAML` points to a valid agent configuration file. + +**Pool Resource Exhaustion:** +```bash +TimeoutError: Unable to acquire STT client from pool +``` +**Solution:** Increase pool size or reduce concurrent call limits based on your mode. + +### Debugging Mode Selection + +Enable debug logging to trace mode selection: + +```python +# Add to logging configuration +import logging +logging.getLogger("config.infrastructure").setLevel(logging.DEBUG) +logging.getLogger("api.v1.endpoints.media").setLevel(logging.DEBUG) +``` + +## Migration Guide + +### Switching Between Modes + +When changing streaming modes, consider the following: + +1. **Update Environment Variables:** + ```bash + # Old configuration + export ACS_STREAMING_MODE=media + + # New configuration + export ACS_STREAMING_MODE=voice_live + ``` + +2. **Restart Application Services:** + - Configuration changes require application restart + - Connection pools will be recreated with appropriate resources + - Existing WebSocket connections will complete with old mode + +3. **Update Client Integration:** + - WebSocket message handling may differ between modes + - Response formats and timing characteristics will change + - Test thoroughly in staging environment + +### Best Practices + +- **Development**: Start with `media` mode for full control and debugging +- **Production Transcription**: Use `transcription` mode for lightweight, high-throughput scenarios +- **Advanced AI**: Use `voice_live` mode for sophisticated conversational experiences +- **Monitoring**: Always monitor resource usage and latency after mode changes + +For detailed implementation examples and handler-specific documentation, see the [API Overview](../api/README.md) and [Architecture Overview](../architecture/README.md). \ No newline at end of file diff --git a/utils/images/ARTAGENT.png b/docs/assets/ARTAGENT.png similarity index 100% rename from utils/images/ARTAGENT.png rename to docs/assets/ARTAGENT.png diff --git a/utils/images/ARTAgentVimeoDemo.png b/docs/assets/ARTAgentVimeoDemo.png similarity index 100% rename from utils/images/ARTAgentVimeoDemo.png rename to docs/assets/ARTAgentVimeoDemo.png diff --git a/utils/images/ARTAgentarch.png b/docs/assets/ARTAgentarch.png similarity index 100% rename from utils/images/ARTAgentarch.png rename to docs/assets/ARTAgentarch.png diff --git a/utils/images/InsuAgent.png b/docs/assets/InsuAgent.png similarity index 100% rename from utils/images/InsuAgent.png rename to docs/assets/InsuAgent.png diff --git a/utils/images/LIVEVOICEApi.png b/docs/assets/LIVEVOICEApi.png similarity index 100% rename from utils/images/LIVEVOICEApi.png rename to docs/assets/LIVEVOICEApi.png diff --git a/utils/images/RTAgentArch.png b/docs/assets/RTAgentArch.png similarity index 100% rename from utils/images/RTAgentArch.png rename to docs/assets/RTAgentArch.png diff --git a/utils/images/arch.png b/docs/assets/arch.png similarity index 100% rename from utils/images/arch.png rename to docs/assets/arch.png diff --git a/utils/images/highlevelarch.png b/docs/assets/highlevelarch.png similarity index 100% rename from utils/images/highlevelarch.png rename to docs/assets/highlevelarch.png diff --git a/utils/images/lab1.png b/docs/assets/lab1.png similarity index 100% rename from utils/images/lab1.png rename to docs/assets/lab1.png diff --git a/utils/images/medagent.png b/docs/assets/medagent.png similarity index 100% rename from utils/images/medagent.png rename to docs/assets/medagent.png diff --git a/docs/assets/oad-styles.css b/docs/assets/oad-styles.css new file mode 100644 index 00000000..242dc52c --- /dev/null +++ b/docs/assets/oad-styles.css @@ -0,0 +1,145 @@ +/* OpenAPI Documentation (OAD) Custom Styles */ + +/* Table of Contents styling */ +.oad-toc { + background: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 0.25rem; + padding: 1rem; + margin-bottom: 2rem; +} + +.oad-toc h3 { + margin-top: 0; + color: #495057; + font-size: 1.1rem; + font-weight: 600; +} + +.oad-toc ul { + list-style-type: none; + padding-left: 0; +} + +.oad-toc li { + margin: 0.25rem 0; +} + +.oad-toc a { + text-decoration: none; + color: #007bff; + padding: 0.25rem 0; + display: block; +} + +.oad-toc a:hover { + color: #0056b3; + background-color: #f1f3f4; + padding-left: 0.5rem; + border-radius: 0.125rem; +} + +/* API sections styling */ +.oad-section { + margin-bottom: 3rem; +} + +.oad-section h2 { + color: #2c3e50; + border-bottom: 2px solid #3498db; + padding-bottom: 0.5rem; +} + +.oad-operation { + margin: 1.5rem 0; + border: 1px solid #e9ecef; + border-radius: 0.5rem; + overflow: hidden; +} + +.oad-operation-header { + background: #f8f9fa; + padding: 1rem; + border-bottom: 1px solid #e9ecef; +} + +.oad-method { + font-weight: bold; + padding: 0.25rem 0.5rem; + border-radius: 0.25rem; + color: white; + font-size: 0.875rem; + margin-right: 0.5rem; +} + +.oad-method.get { background-color: #28a745; } +.oad-method.post { background-color: #007bff; } +.oad-method.put { background-color: #ffc107; color: #212529; } +.oad-method.delete { background-color: #dc3545; } +.oad-method.patch { background-color: #6f42c1; } + +.oad-path { + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + font-size: 1rem; + color: #495057; +} + +.oad-summary { + font-size: 1.1rem; + font-weight: 600; + color: #2c3e50; + margin: 0.5rem 0; +} + +.oad-description { + color: #6c757d; + line-height: 1.5; +} + +/* Response and request styling */ +.oad-responses, .oad-parameters { + padding: 1rem; +} + +.oad-response-code { + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + font-weight: bold; + color: #28a745; +} + +.oad-schema { + background: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 0.25rem; + padding: 1rem; + margin: 0.5rem 0; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + font-size: 0.875rem; + overflow-x: auto; +} + +/* Dark mode support */ +[data-md-color-scheme="slate"] .oad-toc { + background: #2d3748; + border-color: #4a5568; + color: #e2e8f0; +} + +[data-md-color-scheme="slate"] .oad-toc h3 { + color: #e2e8f0; +} + +[data-md-color-scheme="slate"] .oad-toc a { + color: #63b3ed; +} + +[data-md-color-scheme="slate"] .oad-toc a:hover { + background-color: #4a5568; + color: #90cdf4; +} + +[data-md-color-scheme="slate"] .oad-schema { + background: #2d3748; + border-color: #4a5568; + color: #e2e8f0; +} \ No newline at end of file diff --git a/utils/images/omnichannel_rt_voice.png b/docs/assets/omnichannel_rt_voice.png similarity index 100% rename from utils/images/omnichannel_rt_voice.png rename to docs/assets/omnichannel_rt_voice.png diff --git a/docs/DeploymentGuide.md b/docs/deployment/README.md similarity index 70% rename from docs/DeploymentGuide.md rename to docs/deployment/README.md index 13242542..10338e65 100644 --- a/docs/DeploymentGuide.md +++ b/docs/deployment/README.md @@ -1,69 +1,68 @@ -# Deployment Guide +# :material-rocket: Deployment Guide -> A comprehensive guide to deploy your ARTVoice Accelerator using Terraform infrastructure and Azure Container Apps. +!!! success "Production-Ready Deployment" + Comprehensive guide to deploy your Real-Time Voice Agent using Terraform infrastructure and Azure Container Apps. -## Infrastructure Overview +## :material-cloud: Infrastructure Overview -This deployment guide uses **Terraform** as the Infrastructure as Code (IaC) provider with **Azure Container Apps** for hosting. The infrastructure provides: +This deployment uses **Terraform** as Infrastructure as Code with **Azure Container Apps** for hosting, providing: -- **AI Services**: Azure OpenAI (GPT-4.1-mini, O3-mini models) + Speech Services with Live Voice API support -- **Communication**: Azure Communication Services for real-time voice and telephony integration -- **Data Layer**: Cosmos DB (MongoDB API) + Redis Enterprise + Blob Storage for persistent and cached data -- **Security**: Managed Identity authentication with role-based access control (RBAC) -- **Hosting**: Azure Container Apps with auto-scaling, built-in TLS, and container orchestration -- **Monitoring**: Application Insights + Log Analytics with OpenTelemetry distributed tracing +=== "Core Services" + - **:material-brain: AI Services**: Azure OpenAI (GPT-4 models) + Speech Services with Live Voice API + - **:material-phone: Communication**: Azure Communication Services for real-time voice and telephony + - **:material-database: Data Layer**: Cosmos DB (MongoDB API) + Redis Enterprise + Blob Storage + - **:material-security: Security**: Managed Identity with role-based access control (RBAC) -> **Infrastructure Details**: See the complete [Terraform Infrastructure README](../infra/terraform/README.md) for resource specifications and configuration options. +=== "Platform & Monitoring" + - **:material-docker: Hosting**: Azure Container Apps with auto-scaling and built-in TLS + - **:material-chart-line: Monitoring**: Application Insights + Log Analytics with OpenTelemetry tracing + - **:material-network: Networking**: Private endpoints and VNet integration for enhanced security -## Table of Contents +!!! info "Infrastructure Details" + See the complete **[Terraform Infrastructure README](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/infra/terraform/README.md)** for resource specifications and configuration options. -- [Prerequisites](#prerequisites) -- [Quick Start with Azure Developer CLI](#quick-start-with-azure-developer-cli) -- [Alternative: Direct Terraform Deployment](#alternative-direct-terraform-deployment) -- [Detailed Deployment Steps](#detailed-deployment-steps) - - [1. Environment Configuration](#1-environment-configuration) - - [2. Terraform Infrastructure Provisioning](#2-terraform-infrastructure-provisioning) - - [3. Application Deployment](#3-application-deployment) - - [4. Phone Number Configuration](#4-phone-number-configuration) - - [5. Connectivity Testing](#5-connectivity-testing) -- [Environment Management](#environment-management) -- [Backend Storage Configuration](#backend-storage-configuration) -- [Monitoring and Troubleshooting](#monitoring-and-troubleshooting) -- [Cleanup](#cleanup) -- [Advanced Configuration](#advanced-configuration) -- [Support](#support) +## :material-format-list-checks: Prerequisites ---- +!!! warning "Before You Begin" + Ensure you have the following tools and permissions configured. -## Prerequisites +| Tool | Version | Purpose | +| ------------------------------------------------------------------------------------------------ | ---------------- | -------------------------------------- | +| [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) | >=2.50.0 | Azure resource management | +| [Azure Developer CLI (azd)](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd) | Latest | Simplified deployment | +| [Terraform](https://developer.hashicorp.com/terraform/downloads) | >=1.1.7, <2.0.0 | Infrastructure as Code | +| [Docker](https://docs.docker.com/get-docker/) | 20.10+ | Containerization and local testing | +| Node.js | 18+ | Frontend development | +| Python | 3.11+ | Backend development | -Before you begin, ensure you have the following installed and configured: +### Azure Permissions -| Tool | Version | Purpose | -|------|---------|---------| -| [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) | >=2.50.0 | Azure resource management | -| [Azure Developer CLI (azd)](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd) | Latest | Simplified deployment | -| [Terraform](https://developer.hashicorp.com/terraform/downloads) | >=1.1.7, <2.0.0 | Infrastructure as Code | -| [Docker](https://docs.docker.com/get-docker/) | 20.10+ | Containerization and local testing | -| Node.js | 18+ | Frontend development | -| Python | 3.11+ | Backend development | +!!! danger "Required Permissions" + Your Azure account needs these permissions in the target subscription: + + - **Owner** or **Contributor** + **User Access Administrator** + - Permission to create service principals and managed identities + - Permission to assign roles to resources -**Additional Requirements:** -- Azure subscription with appropriate permissions (Contributor role) -- Terraform state storage (see [Backend Storage Configuration](#backend-storage-configuration)) +```bash title="Verify Azure permissions" +# Login to Azure +az login -> **Note**: This deployment uses Azure Container Apps which provides built-in TLS termination with public endpoints, eliminating the need for custom SSL certificate management. +# Check current subscription and permissions +az account show +az role assignment list --assignee $(az account show --query user.name -o tsv) --include-inherited +``` --- -## Quick Start with Azure Developer CLI +## :material-rocket: Quick Start with Azure Developer CLI -The easiest and **recommended** way to deploy this application is using Azure Developer CLI with Terraform backend: +The easiest and **recommended** way to deploy this application is using the Azure Developer CLI with its Terraform backend. ### Step 1: Clone and Initialize ```bash -git clone https://github.com/pablosalvador10/gbb-ai-audio-agent.git -cd gbb-ai-audio-agent +git clone https://github.com/Azure-Samples/art-voice-agent-accelerator.git +cd art-voice-agent-accelerator azd auth login azd init ``` @@ -82,7 +81,12 @@ azd up **Total deployment time**: ~15 minutes for complete infrastructure and application deployment. -> **Note**: The deployment includes multi-agent architecture support (ARTAgent, Live Voice Agent, and AI Foundry Agents) with intelligent model routing between O3-mini and GPT-4.1-mini based on complexity requirements. +!!! info "Additional Resources" + For more comprehensive guidance on development and operations: + + - **[Repository Structure](../guides/repository-structure.md)** - Understand the codebase layout + - **[Utilities & Services](../guides/utilities.md)** - Core infrastructure components + - **[Local Development Guide](../getting-started/local-development.md)** - Set up and test on your local machine --- @@ -119,18 +123,11 @@ terraform plan terraform apply ``` -### Step 4: Generate Environment Files -```bash -cd ../../ # Return to repo root -make generate_env_from_terraform -make update_env_with_secrets -``` +### Step 4: Deploy your application -### Step 5: Deploy Applications -```bash -make deploy_backend -make deploy_frontend -``` +Review the deployment steps to deploy a container application after infrastructure is provisioned. + +[Quickstart: Deploy your first container app with containerapp up](https://learn.microsoft.com/en-us/azure/container-apps/get-started?tabs=bash) --- @@ -176,7 +173,7 @@ disable_local_auth = true redis_sku = "MemoryOptimized_M10" # OpenAI model deployments with latest models -openai_models = [ +model_deployments = [ { name = "gpt-4-1-mini" version = "2024-11-20" @@ -195,7 +192,6 @@ openai_models = [ ### 2. Terraform Infrastructure Provisioning Deploy Azure resources using Terraform: - #### With Azure Developer CLI (Recommended) ```bash # Full deployment (provisions infrastructure and deploys applications) @@ -204,6 +200,19 @@ azd up # Infrastructure only azd provision ``` +**What happens during `azd up`:** + +1. **Pre-provision hooks** (configured in [`azure.yaml`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/azure.yaml)) automatically set up Terraform backend storage +2. **Infrastructure provisioning** uses Terraform modules in [`infra/terraform/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/infra/terraform/) +3. **Post-provision hooks** configure phone numbers and generate environment files +4. **Application deployment** builds and deploys containers to Azure Container Apps + +**Automation scripts** (located in [`devops/scripts/azd/`](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/devops/scripts/azd/)): + +- [`preprovision.sh`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/devops/scripts/azd/preprovision.sh) - Sets up Terraform backend storage and validates prerequisites +- [`postprovision.sh`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/devops/scripts/azd/postprovision.sh) - Configures ACS phone numbers and generates environment files + +See [`azure.yaml`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/azure.yaml) for the complete hook configuration and script orchestration. #### With Direct Terraform ```bash @@ -214,6 +223,7 @@ terraform apply ``` **Resources Created:** + - Azure Container Apps Environment with auto-scaling and ingress management - Azure OpenAI Service (GPT-4.1-mini, O3-mini models) with intelligent model routing - Azure Communication Services with Live Voice API integration @@ -225,7 +235,7 @@ terraform apply - Application Insights & Log Analytics with OpenTelemetry distributed tracing - User-assigned managed identities with comprehensive RBAC permissions -> For detailed infrastructure information, see the [Terraform Infrastructure README](../infra/terraform/README.md). +> For detailed infrastructure information, see the [Terraform Infrastructure README](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/infra/terraform/README.md). ### 3. Application Deployment @@ -564,7 +574,7 @@ az keyvault show \ --query "properties.accessPolicies" ``` -> **Need more help?** For detailed troubleshooting steps, diagnostic commands, and solutions to common issues, see the comprehensive [Troubleshooting Guide](Troubleshooting.md). +> **Need more help?** For detailed troubleshooting steps, diagnostic commands, and solutions to common issues, see the comprehensive [Troubleshooting Guide](../operations/troubleshooting.md). --- @@ -615,7 +625,7 @@ container_apps_configuration = { Customize OpenAI model deployments for the latest supported models: ```hcl -openai_models = [ +model_deployments = [ { name = "gpt-4-1-mini" version = "2024-11-20" @@ -660,50 +670,13 @@ cosmosdb_location = "westus" --- -## Support - -Having deployment issues? Follow this troubleshooting checklist: - -1. **Check Azure Portal** for resource status -2. **Review container app logs** for error details -3. **Verify Terraform state** and resource configuration -4. **Check managed identity permissions** and RBAC assignments -5. **Verify environment variables** in Container Apps -6. **Test connectivity** to Azure services (OpenAI, Speech, Redis) - -### Quick Diagnostic Commands - -```bash -# Check deployment status and health -azd show - -# Verify backend health with detailed output -curl -v $(azd env get-value BACKEND_CONTAINER_APP_URL)/health - -# Check container logs with error filtering -az containerapp logs show \ - --name $(azd env get-value BACKEND_CONTAINER_APP_NAME) \ - --resource-group $(azd env get-value AZURE_RESOURCE_GROUP) \ - --tail 50 --grep "ERROR\|WARN\|Exception" - -# Verify managed identity permissions and role assignments -az role assignment list \ - --assignee $(azd env get-value BACKEND_UAI_PRINCIPAL_ID) \ - --output table - -# Test agent endpoints specifically -BACKEND_URL=$(azd env get-value BACKEND_CONTAINER_APP_URL) -curl $BACKEND_URL/api/v1/agents/artagent/health -curl $BACKEND_URL/api/v1/agents/lvagent/health -``` - -### Additional Resources +## Support & Next Steps -- [Terraform Infrastructure README](../infra/terraform/README.md) - Detailed infrastructure documentation -- [Troubleshooting Guide](Troubleshooting.md) - Comprehensive problem-solving guide -- [Azure Container Apps Documentation](https://learn.microsoft.com/en-us/azure/container-apps/) - Official Microsoft documentation -- [Azure Communication Services Docs](https://learn.microsoft.com/en-us/azure/communication-services/) - ACS specific guidance and API reference -- [Azure AI Speech Live Voice API](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/real-time-synthesis) - Live Voice API documentation -- [ARTVoice Local Development Guide](quickstart-local-development.md) - Local setup and testing +!!! tip "Additional Resources & Best Practices" + Always test locally first to isolate issues before deploying to Azure. Use the comprehensive load testing framework in `tests/load/` to validate performance under realistic conditions. -> **Pro Tip**: Always test locally first using the development setup in `docs/quickstart-local-development.md` to isolate issues before deploying to Azure. Use the comprehensive load testing framework in `tests/load/` to validate performance under realistic conditions. + - **[Local Development Guide](../getting-started/local-development.md)** - Set up and test on your local machine + - **[Troubleshooting Guide](../operations/troubleshooting.md)** - Comprehensive problem-solving guide + - **[Repository Structure](../guides/repository-structure.md)** - Understand the codebase layout + - **[Utilities & Services](../guides/utilities.md)** - Core infrastructure components + - **[Terraform Infrastructure README](https://github.com/Azure-Samples/art-voice-agent-accelerator/tree/main/infra/terraform/README.md)** - Detailed infrastructure documentation diff --git a/docs/CICDGuide.md b/docs/deployment/cicd.md similarity index 100% rename from docs/CICDGuide.md rename to docs/deployment/cicd.md diff --git a/docs/PathToProduction.md b/docs/deployment/production.md similarity index 100% rename from docs/PathToProduction.md rename to docs/deployment/production.md diff --git a/docs/getting-started/README.md b/docs/getting-started/README.md new file mode 100644 index 00000000..bd7f6bba --- /dev/null +++ b/docs/getting-started/README.md @@ -0,0 +1,123 @@ +# :material-rocket: Getting Started + +!!! success "Real-Time Voice AI Accelerator" + Get your voice agent running with Azure Communication Services, Speech Services, and AI in just a few steps. + +## :material-check-circle: Prerequisites + +=== "System Requirements" + - **Python**: 3.11 or higher + - **Operating System**: Windows 10+, macOS 10.15+, or Linux + - **Memory**: Minimum 4GB RAM (8GB recommended) + - **Network**: Internet connectivity for Azure services + +=== "Azure Requirements" + - **Azure Subscription**: [Create one for free](https://azure.microsoft.com/free/) if you don't have one + - **Azure CLI**: [Install Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) for resource management + + !!! tip "Microsoft Learn Resources" + - **[Azure Free Account Setup](https://learn.microsoft.com/en-us/azure/cost-management-billing/manage/create-free-services)** - Step-by-step account creation + - **[Azure CLI Fundamentals](https://learn.microsoft.com/en-us/cli/azure/get-started-with-azure-cli)** - Essential CLI commands + +## :material-path: Learning Paths + +=== "🚀 Quick Start (15 minutes)" + **Get up and running fast**: + + 1. **[Local Development Guide](local-development.md)** - Complete setup with raw commands + 2. **[Architecture Overview](../architecture/README.md)** - Understand the system design + 3. **[API Reference](../api/README.md)** - Explore available endpoints + + **Best for**: Developers who want to see the accelerator in action immediately + +=== "🏗️ Infrastructure First" + **Set up Azure resources properly**: + + 1. **[Production Deployment](../deployment/production.md)** - Infrastructure provisioning + 2. **[Configuration Details](configuration.md)** - Advanced configuration options + 3. **[Local Development Guide](local-development.md)** - Connect to your infrastructure + + **Best for**: Architects and teams planning production deployments + +=== "🔧 Deep Dive" + **Understand the complete system**: + + 1. **[Architecture Overview](../architecture/README.md)** - System design and patterns + 2. **[Data Flow Patterns](../architecture/data-flows.md)** - Processing pipeline architecture + 3. **[LLM Orchestration](../architecture/llm-orchestration.md)** - AI routing and conversation management + 4. **[Operations Guide](../operations/monitoring.md)** - Monitoring and troubleshooting + + **Best for**: Technical leads and teams building custom voice applications + +## :material-microsoft-azure: Azure Setup Requirements + +!!! note "Required Azure Resources" + The accelerator requires these Azure services for full functionality: + +| Service | Purpose | Required For | +|---------|---------|--------------| +| **Speech Services** | Text-to-Speech, Speech-to-Text | All voice features | +| **Communication Services** | Phone calls, WebSocket media | Phone integration | +| **AI Foundry / OpenAI** | Conversation intelligence | AI agent responses | +| **Redis Cache** | Session state management | Multi-turn conversations | +| **Cosmos DB** | Conversation persistence | Analytics, compliance | + +**Quick Azure Setup**: +```bash +# Clone the repository +git clone https://github.com/Azure-Samples/art-voice-agent-accelerator.git +cd art-voice-agent-accelerator + +# Deploy infrastructure (choose one) +azd provision # Azure Developer CLI (recommended) +# or use Terraform/Bicep directly +``` + +## :material-compass: Development Approaches + +=== "🏃‍♂️ Fast Track" + **Start developing immediately**: + + - **Goal**: Voice agent running locally in 15 minutes + - **Path**: [Local Development Guide](local-development.md) + - **Infrastructure**: Minimal (Speech Services only) + - **Best for**: Proof of concepts, learning, simple demos + +=== "🏭 Production Ready" + **Enterprise deployment preparation**: + + - **Goal**: Scalable, secure, monitored deployment + - **Path**: [Production Deployment](../deployment/production.md) → [Local Development](local-development.md) + - **Infrastructure**: Complete (all Azure services) + - **Best for**: Production applications, enterprise environments + +=== "🔬 Custom Development" + **Extend and customize the accelerator**: + + - **Goal**: Build custom voice applications + - **Path**: [Architecture Deep Dive](../architecture/README.md) → [Local Development](local-development.md) + - **Infrastructure**: As needed for your use case + - **Best for**: Custom voice solutions, specialized industries + +## :material-help: Getting Help + +!!! info "Community & Support Resources" + + **Documentation**: + - **[Troubleshooting Guide](../operations/troubleshooting.md)** - Common issues and solutions + - **[API Reference](../api/README.md)** - Complete endpoint documentation + - **[Examples & Samples](../examples/README.md)** - Practical implementation examples + + **Community**: + - **[GitHub Issues](https://github.com/Azure-Samples/art-voice-agent-accelerator/issues)** - Report bugs and request features + - **[GitHub Discussions](https://github.com/Azure-Samples/art-voice-agent-accelerator/discussions)** - Community Q&A + - **[Microsoft Q&A](https://learn.microsoft.com/en-us/answers/topics/azure-speech.html)** - Official Microsoft support + +--- + +## :material-arrow-right: What's Next? + +Choose your path above and start building your voice-powered applications! Most developers find success starting with the **[Local Development Guide](local-development.md)** to see the accelerator in action immediately. + +!!! tip "New to Voice AI?" + Check out the **[Architecture Overview](../architecture/README.md)** first to understand how real-time voice processing works with Azure Communication Services and Speech Services. \ No newline at end of file diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 00000000..6f8497a4 --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,435 @@ +# :material-cog: Configuration Guide + +!!! info "Fine-Tune Your Voice Agent" + Comprehensive configuration options for environment variables, authentication, and optional features. + +## :material-file-settings: Environment Setup + +### Step 1: Environment File Creation + +!!! tip "Quick Setup" + Start with the provided template for all required variables. + +```bash title="Copy and configure environment template" +# Copy the environment template +cp .env.example .env + +# Edit with your preferred editor +code .env # VS Code +# or nano .env, vim .env, etc. +``` + +### Step 2: Required Configuration + +=== "Azure Speech Services" + | Variable | Required | Description | Example | + |----------|----------|-------------|---------| + | `AZURE_SPEECH_KEY` | ✅ (unless using managed identity) | Speech resource key | `1a2b3c4d5e6f...` | + | `AZURE_SPEECH_REGION` | ✅ | Azure region identifier | `eastus`, `westeurope` | + | `AZURE_SPEECH_ENDPOINT` | Optional | Custom endpoint URL | `https://custom.cognitiveservices.azure.com` | + | `AZURE_SPEECH_RESOURCE_ID` | Optional | Full resource ID for managed identity | `/subscriptions/.../accounts/speech-svc` | + +=== "Azure Communication Services" + | Variable | Required | Description | Example | + |----------|----------|-------------|---------| + | `AZURE_COMMUNICATION_CONNECTION_STRING` | ✅ for call automation | ACS connection string | `endpoint=https://...;accesskey=...` | + | `ACS_RESOURCE_CONNECTION_STRING` | Alternative | Legacy naming convention | Same format as above | + +=== "Optional Services" + | Variable | Required | Description | Example | + |----------|----------|-------------|---------| + | `AZURE_OPENAI_ENDPOINT` | Optional | Azure OpenAI service endpoint | `https://my-openai.openai.azure.com` | + | `AZURE_OPENAI_KEY` | Optional | Azure OpenAI API key | `sk-...` | + | `REDIS_CONNECTION_STRING` | For session state | Redis cache connection | `redis://localhost:6379` | + +!!! info "Microsoft Learn Resources" + - **[Speech Services Keys](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/overview#create-a-speech-resource-in-the-azure-portal)** - Get your Speech Services credentials + - **[Communication Services Setup](https://learn.microsoft.com/en-us/azure/communication-services/quickstarts/create-communication-resource)** - Create ACS resources + - **[Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource)** - Set up OpenAI integration + +## :material-security: Managed Identity (Recommended for Production) + +!!! success "Enhanced Security" + Use managed identity to eliminate API keys in production environments. + +### Configuration for Managed Identity + +```bash title="Managed identity environment variables" +# Disable API key authentication +AZURE_SPEECH_KEY="" + +# Required: Region and Resource ID +AZURE_SPEECH_REGION=eastus +AZURE_SPEECH_RESOURCE_ID=/subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts/ + +# Enable managed identity +USE_MANAGED_IDENTITY=true +``` + +### Azure Role Assignments + +=== "Required Roles" + **For Speech Services**: + ```bash title="Assign Speech Services role" + # Get your managed identity principal ID + IDENTITY_PRINCIPAL_ID=$(az identity show \ + --name your-managed-identity \ + --resource-group your-resource-group \ + --query principalId -o tsv) + + # Assign Cognitive Services User role + az role assignment create \ + --assignee $IDENTITY_PRINCIPAL_ID \ + --role "Cognitive Services User" \ + --scope "/subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts/" + ``` + +=== "Optional Roles" + **For Azure OpenAI**: + ```bash title="Assign OpenAI role" + az role assignment create \ + --assignee $IDENTITY_PRINCIPAL_ID \ + --role "Cognitive Services OpenAI User" \ + --scope "/subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts/" + ``` + +!!! info "Microsoft Learn Resources" + - **[Managed Identity Overview](https://learn.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview)** - Understanding managed identities + - **[Role-Based Access Control](https://learn.microsoft.com/en-us/azure/role-based-access-control/overview)** - Azure RBAC fundamentals + +## :material-microphone: Voice Configuration + +!!! tip "Customization Options" + Tailor voice characteristics for your specific use case and audience. + +### Default Voice Settings + +Customize default voices via `apps/rtagent/backend/config/voice_config.py`. You can override values with environment variables: + +=== "Voice Selection" + ```bash title="Voice configuration options" + # Primary voice selection + DEFAULT_VOICE_ALIAS=support_contact_center + DEFAULT_VOICE_NAME=en-US-JennyMultilingualNeural + + # Voice characteristics + DEFAULT_VOICE_STYLE=customer-service + DEFAULT_VOICE_RATE=+10% + DEFAULT_VOICE_PITCH=medium + ``` + +=== "Advanced Settings" + ```bash title="Advanced voice options" + # Audio quality settings + AUDIO_OUTPUT_FORMAT=audio-24khz-48kbitrate-mono-mp3 + SAMPLE_RATE=24000 + + # Streaming configuration + ENABLE_STREAMING=true + STREAM_CHUNK_SIZE=1024 + + # Pronunciation and SSML + ENABLE_SSML_PROCESSING=true + PRONUNCIATION_LEXICON_URI=https://example.com/lexicon.xml + ``` + +### Voice Aliases + +Configure voice aliases for different scenarios: + +| Alias | Voice | Style | Use Case | +|-------|-------|-------|----------| +| `support_contact_center` | `en-US-JennyMultilingualNeural` | `customer-service` | Customer support calls | +| `sales_assistant` | `en-US-AriaNeural` | `friendly` | Sales and marketing | +| `technical_narrator` | `en-US-BrianNeural` | `newscast` | Technical documentation | +| `casual_chat` | `en-US-SaraNeural` | `chat` | Informal conversations | + +!!! info "Microsoft Learn Resources" + - **[Voice Gallery](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)** - Browse all available voices + - **[SSML Reference](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup)** - Speech Synthesis Markup Language + - **[Voice Tuning](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-speech-synthesis-viseme)** - Advanced voice customization + +## :material-chart-line: Telemetry & Observability + +!!! success "Production Monitoring" + Enable comprehensive monitoring and tracing for production deployments. + +### OpenTelemetry Configuration + +```bash title="OpenTelemetry environment variables" +# Azure Monitor integration +OTEL_EXPORTER_OTLP_ENDPOINT=https://.monitor.azure.com/v1/traces +OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer " +OTEL_SERVICE_NAME=rt-voice-agent +OTEL_SERVICE_VERSION=1.0.0 + +# Service identification +OTEL_RESOURCE_ATTRIBUTES=service.name=rt-voice-agent,service.version=1.0.0,deployment.environment=production + +# Tracing configuration +OTEL_TRACES_EXPORTER=otlp +OTEL_METRICS_EXPORTER=otlp +OTEL_LOGS_EXPORTER=otlp +``` + +### Logging Configuration + +=== "Development" + ```bash title="Development logging" + LOG_LEVEL=DEBUG + LOG_FORMAT=human-readable + ENABLE_CORRELATION_ID=true + LOG_TO_FILE=false + ``` + +=== "Production" + ```bash title="Production logging" + LOG_LEVEL=INFO + LOG_FORMAT=json + ENABLE_CORRELATION_ID=true + LOG_TO_FILE=true + LOG_FILE_PATH=/var/log/voice-agent/app.log + LOG_ROTATION_SIZE=10MB + LOG_RETENTION_DAYS=30 + ``` + +### Application Insights Setup + +!!! tip "Quick Setup" + Use the Makefile command to bootstrap Application Insights automatically. + +```bash title="Bootstrap Application Insights" +# Configure Azure Monitor and Application Insights +make configure_observability + +# This will: +# 1. Create Application Insights workspace +# 2. Configure connection strings +# 3. Set up log analytics workspace +# 4. Update .env with correct values +``` + +!!! info "Microsoft Learn Resources" + - **[Application Insights](https://learn.microsoft.com/en-us/azure/azure-monitor/app/app-insights-overview)** - Application performance monitoring + - **[OpenTelemetry with Azure](https://learn.microsoft.com/en-us/azure/azure-monitor/app/opentelemetry-enable)** - OpenTelemetry integration guide + - **[Log Analytics](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/log-analytics-overview)** - Centralized logging solution + +## :material-folder: Storage and File Management + +### Local Storage Configuration + +```bash title="Storage environment variables" +# Audio output configuration +AUDIO_OUTPUT_DIR=./output/audio +ENABLE_AUDIO_CACHE=true +AUDIO_CACHE_TTL=3600 # 1 hour in seconds + +# Application cache +VOICE_AGENT_CACHE_DIR=./cache +CACHE_MAX_SIZE=1GB + +# Temporary files +TEMP_FILE_DIR=./tmp +TEMP_FILE_CLEANUP_INTERVAL=300 # 5 minutes +``` + +### Headless Environment Settings + +!!! warning "CI/CD and Headless Deployments" + Disable audio playback for automated environments and server deployments. + +```bash title="Headless configuration" +# Disable local audio playback +TTS_ENABLE_LOCAL_PLAYBACK=false + +# Headless environment detection +FORCE_HEADLESS_MODE=true + +# Alternative audio output +AUDIO_OUTPUT_FORMAT=file # Options: file, stream, buffer +SAVE_AUDIO_FILES=true # Save to disk for debugging +```## :material-key: Secrets Management + +!!! danger "Security Best Practices" + Never commit secrets to version control. Use secure secret management for all environments. + +### Local Development + +=== "Using direnv" + ```bash title="Setup direnv for automatic environment loading" + # Install direnv (macOS) + brew install direnv + + # Add to shell configuration + echo 'eval "$(direnv hook zsh)"' >> ~/.zshrc + source ~/.zshrc + + # Create .envrc file + echo "dotenv .env" > .envrc + direnv allow . + ``` + +=== "Using python-dotenv" + ```python title="Load environment variables in Python" + from dotenv import load_dotenv + import os + + # Load .env file + load_dotenv() + + # Access variables + speech_key = os.getenv('AZURE_SPEECH_KEY') + speech_region = os.getenv('AZURE_SPEECH_REGION') + ``` + +### GitHub Actions + +```yaml title="GitHub Actions secrets configuration" +# .github/workflows/deploy.yml +env: + AZURE_SPEECH_KEY: ${{ secrets.AZURE_SPEECH_KEY }} + AZURE_SPEECH_REGION: ${{ secrets.AZURE_SPEECH_REGION }} + AZURE_COMMUNICATION_CONNECTION_STRING: ${{ secrets.ACS_CONNECTION_STRING }} +``` + +**Setup Steps**: +1. Go to **Settings → Secrets and variables → Actions** +2. Click **New repository secret** +3. Add each required secret from your `.env` file + +### Azure Key Vault Integration + +=== "Terraform/AZD Deployment" + ```bash title="Sync Key Vault secrets to local environment" + # After infrastructure deployment + make update_env_with_secrets + + # This will: + # 1. Read secrets from Azure Key Vault + # 2. Update your local .env file + # 3. Validate all required variables are set + ``` + +=== "Manual Key Vault Setup" + ```bash title="Azure Key Vault commands" + # Store secrets in Key Vault + az keyvault secret set \ + --vault-name your-key-vault \ + --name "azure-speech-key" \ + --value "your-speech-key-here" + + # Retrieve secrets + az keyvault secret show \ + --vault-name your-key-vault \ + --name "azure-speech-key" \ + --query "value" -o tsv + ``` + +### Environment Validation + +```bash title="Validate environment configuration" +# Check required variables are set +python -c " +import os +required_vars = [ + 'AZURE_SPEECH_REGION', + 'AZURE_COMMUNICATION_CONNECTION_STRING' +] + +missing = [var for var in required_vars if not os.getenv(var)] +if missing: + print(f'❌ Missing required variables: {missing}') + exit(1) +else: + print('✅ All required environment variables are set') +" +``` + +!!! info "Microsoft Learn Resources" + - **[Azure Key Vault](https://learn.microsoft.com/en-us/azure/key-vault/general/overview)** - Secure secret management + - **[Key Vault Integration](https://learn.microsoft.com/en-us/azure/key-vault/general/tutorial-net-create-vault-azure-web-app)** - Application integration patterns + - **[GitHub Actions with Azure](https://learn.microsoft.com/en-us/azure/developer/github/connect-from-azure)** - Secure GitHub workflows + +## :material-check-circle: Configuration Validation + +### Environment Health Check + +```python title="Comprehensive configuration validation" +#!/usr/bin/env python3 +"""Configuration validation script""" + +import os +from typing import Dict, List, Tuple + +def validate_config() -> Tuple[bool, List[str]]: + """Validate all configuration settings.""" + issues = [] + + # Required variables + required = { + 'AZURE_SPEECH_REGION': 'Azure Speech Services region', + 'AZURE_COMMUNICATION_CONNECTION_STRING': 'Azure Communication Services connection', + } + + # Check managed identity vs API key + use_managed_identity = os.getenv('USE_MANAGED_IDENTITY', '').lower() == 'true' + + if use_managed_identity: + if not os.getenv('AZURE_SPEECH_RESOURCE_ID'): + issues.append('AZURE_SPEECH_RESOURCE_ID required for managed identity') + else: + if not os.getenv('AZURE_SPEECH_KEY'): + issues.append('AZURE_SPEECH_KEY required (or enable managed identity)') + + # Check required variables + for var, description in required.items(): + if not os.getenv(var): + issues.append(f'Missing {var} ({description})') + + # Validate region format + region = os.getenv('AZURE_SPEECH_REGION', '') + if region and ' ' in region: + issues.append(f'Invalid region format: "{region}". Use format like "eastus", not "East US"') + + return len(issues) == 0, issues + +if __name__ == '__main__': + valid, issues = validate_config() + if valid: + print('✅ Configuration validation passed') + else: + print('❌ Configuration validation failed:') + for issue in issues: + print(f' - {issue}') +``` + +### Quick Configuration Test + +```bash title="Quick configuration test" +# Run configuration validation +python scripts/validate_config.py + +# Test Speech Services connection +python -c " +from src.speech.text_to_speech import SpeechSynthesizer +import os + +try: + synthesizer = SpeechSynthesizer( + key=os.getenv('AZURE_SPEECH_KEY'), + region=os.getenv('AZURE_SPEECH_REGION') + ) + if synthesizer.validate_configuration(): + print('✅ Speech Services configuration valid') + else: + print('❌ Speech Services configuration invalid') +except Exception as e: + print(f'❌ Error: {e}') +" +``` + +--- + +!!! success "Configuration Complete" + Your Real-Time Voice Agent is now configured and ready for deployment. Next, explore the [API Reference](../api/README.md) to start building your voice application. + diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md deleted file mode 100644 index c5560fdf..00000000 --- a/docs/getting-started/installation.md +++ /dev/null @@ -1,204 +0,0 @@ -# Installation Guide - -Complete installation instructions for the Real-Time Voice Agent. - -## System Requirements - -- **Python**: 3.11 or higher -- **Operating System**: Windows 10+, macOS 10.15+, or Linux -- **Memory**: Minimum 4GB RAM (8GB recommended) -- **Network**: Internet connectivity for Azure services - -## Azure Prerequisites - -### 1. Azure Subscription -You'll need an active Azure subscription. [Create one for free](https://azure.microsoft.com/free/) if you don't have one. - -### 2. Azure Speech Services Resource -Create a Speech Services resource in the Azure portal: - -1. Go to [Azure Portal](https://portal.azure.com) -2. Click "Create a resource" -3. Search for "Speech Services" -4. Select your subscription and resource group -5. Choose a region (e.g., East US, West Europe) -6. Select pricing tier (F0 for free tier, S0 for standard) - -### 3. Get Your Credentials -After creating the resource: -- Copy the **Key** from the "Keys and Endpoint" section -- Note the **Region** where you created the resource -- Optionally copy the **Resource ID** for managed identity authentication - -## Local Development Setup - -### 1. Clone Repository -```bash -git clone https://github.com/pablosalvador10/gbb-ai-audio-agent.git -cd gbb-ai-audio-agent -``` - -### 2. Python Environment -We recommend using a virtual environment: - -```bash -# Using venv -python -m venv audioagent -source audioagent/bin/activate # Linux/macOS -# audioagent\Scripts\activate # Windows - -# Using conda -conda create -n audioagent python=3.11 -conda activate audioagent -``` - -### 3. Install Dependencies -```bash -# Core dependencies -pip install -r requirements.txt - -# Development dependencies (optional) -pip install -r requirements-dev.txt -``` - -### 4. Environment Configuration -```bash -# Copy environment template -cp .env.example .env - -# Edit .env file with your credentials -nano .env # or use your preferred editor -``` - -Required environment variables: -```bash -# Azure Speech Services -AZURE_SPEECH_KEY=your-speech-key-here -AZURE_SPEECH_REGION=eastus - -# Optional: Custom endpoint -AZURE_SPEECH_ENDPOINT=https://your-custom-endpoint.cognitiveservices.azure.com - -# Optional: For managed identity (production) -AZURE_SPEECH_RESOURCE_ID=/subscriptions/xxx/resourceGroups/xxx/providers/Microsoft.CognitiveServices/accounts/xxx - -# Optional: Audio playback control -TTS_ENABLE_LOCAL_PLAYBACK=true -``` - -## Production Deployment - -### Docker Container -```dockerfile -FROM python:3.11-slim - -WORKDIR /app -COPY requirements.txt . -RUN pip install -r requirements.txt - -COPY src/ ./src/ -COPY utils/ ./utils/ - -# Set environment variables -ENV AZURE_SPEECH_REGION=eastus -ENV TTS_ENABLE_LOCAL_PLAYBACK=false - -EXPOSE 8000 -CMD ["python", "-m", "src.main"] -``` - -### Azure Container Instances -```bash -# Build and push to Azure Container Registry -az acr build --registry myregistry --image voice-agent:latest . - -# Deploy to Container Instances with managed identity -az container create \ - --resource-group myResourceGroup \ - --name voice-agent \ - --image myregistry.azurecr.io/voice-agent:latest \ - --assign-identity \ - --environment-variables AZURE_SPEECH_REGION=eastus -``` - -### Azure App Service -```bash -# Deploy to App Service with system-assigned managed identity -az webapp create \ - --resource-group myResourceGroup \ - --plan myServicePlan \ - --name my-voice-agent \ - --runtime "PYTHON|3.11" - -# Enable managed identity -az webapp identity assign \ - --resource-group myResourceGroup \ - --name my-voice-agent -``` - -## Verify Installation - -### 1. Test Import -```python -python -c " -from src.speech.text_to_speech import SpeechSynthesizer -print('✅ Successfully imported SpeechSynthesizer') -" -``` - -### 2. Test Configuration -```python -from src.speech.text_to_speech import SpeechSynthesizer -import os - -synthesizer = SpeechSynthesizer( - key=os.getenv('AZURE_SPEECH_KEY'), - region=os.getenv('AZURE_SPEECH_REGION') -) - -if synthesizer.validate_configuration(): - print('✅ Configuration is valid') -else: - print('❌ Configuration validation failed') -``` - -### 3. Test Basic Synthesis -```python -# Quick synthesis test -audio_data = synthesizer.synthesize_speech("Hello, world!") -print(f'✅ Generated {len(audio_data)} bytes of audio') -``` - -## Troubleshooting - -### Common Installation Issues - -**Import Error: No module named 'azure'** -```bash -pip install azure-cognitiveservices-speech -``` - -**Authentication Failed** -- Verify your `AZURE_SPEECH_KEY` is correct -- Check that your `AZURE_SPEECH_REGION` matches your resource -- Ensure your Azure subscription is active - -**Audio Hardware Issues** -- Set `TTS_ENABLE_LOCAL_PLAYBACK=false` for headless environments -- Use `playback="never"` mode in production containers - -**Network Connectivity** -- Ensure outbound HTTPS (port 443) access to Azure endpoints -- Check firewall rules for `*.cognitiveservices.azure.com` - -### Getting Help - -- **Documentation**: [API Reference](../api/overview.md) -- **Examples**: [Usage Examples](../examples/basic-usage.md) -- **Issues**: [GitHub Issues](https://github.com/pablosalvador10/gbb-ai-audio-agent/issues) - -## Next Steps - -- **[Quick Start Guide](quickstart.md)** - Get started with basic usage -- **[Configuration](configuration.md)** - Advanced configuration options -- **[API Reference](../api/overview.md)** - Complete API documentation diff --git a/docs/getting-started/local-development.md b/docs/getting-started/local-development.md new file mode 100644 index 00000000..cf60aae0 --- /dev/null +++ b/docs/getting-started/local-development.md @@ -0,0 +1,356 @@ +# ⚡ Local Development + +Run the ARTVoice Accelerator locally with raw commands. No Makefile usage. Keep secrets out of git and rotate any previously exposed keys. + +--- + +## 1. Scope + +What this covers: + +- Local backend (FastAPI + Uvicorn) and frontend (Vite/React) +- Dev tunnel for inbound [Azure Communication Services](https://learn.microsoft.com/en-us/azure/communication-services/) callbacks +- Environment setup via venv OR Conda +- Minimal `.env` files (root + frontend) + +What this does NOT cover: +- Full infra provisioning +- CI/CD +- Persistence hardening + +--- + +## 2. Prerequisites + +| Tool | Notes | +|------|-------| +| Python 3.11 | Required runtime | +| Node.js ≥ 22 | Frontend | +| Azure CLI | `az login` first | +| Dev Tunnels | [Getting Started Guide](https://learn.microsoft.com/en-us/azure/developer/dev-tunnels/get-started) | +| (Optional) Conda | If using `environment.yaml` | +| Provisioned Azure resources | For real STT/TTS/LLM/ACS | + +If you only want a browser demo (no phone), ACS variables are optional. + +--- + +## 3. Clone Repository + +```bash +git clone https://github.com/Azure-Samples/art-voice-agent-accelerator.git +cd art-voice-agent-accelerator +``` + +--- + +## 4. Python Environment (Choose One) + +### Option A: venv +```bash +python -m venv .venv +source .venv/bin/activate +pip install --upgrade pip +pip install -r requirements.txt +``` + +### Option B: Conda +```bash +conda env create -f environment.yaml +conda activate audioagent +pip install -r requirements.txt # sync with lock +``` + +--- + +## 5. Root `.env` (Create in repo root) + +!!! tip "Sample Configuration" + Use [`.env.sample`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/.env.sample) as a starting template and customize with your Azure resource values. + +!!! info "Using Azure Developer CLI (azd)" + If you provisioned infrastructure using `azd provision`, an environment file will be automatically generated for you in the format `.env.`. + + **To use the azd-generated configuration:** + ```bash + # Copy the azd-generated environment file + cp .env. .env + + # Example: if your azd environment is named "dev" + cp .env.dev .env + ``` + + The azd-generated file contains all the Azure resource endpoints and configuration needed for local development. + +**Manual Configuration Template** (edit placeholders; DO NOT commit real values): + +``` +# ===== Azure OpenAI ===== +AZURE_OPENAI_ENDPOINT=https://.openai.azure.com +AZURE_OPENAI_KEY= +AZURE_OPENAI_DEPLOYMENT=gpt-4-1-mini +AZURE_OPENAI_API_VERSION=2024-12-01-preview +AZURE_OPENAI_CHAT_DEPLOYMENT_ID=gpt-4-1-mini +AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-11-20 + +# ===== Speech ===== +AZURE_SPEECH_REGION= +AZURE_SPEECH_KEY= + +# ===== ACS (optional unless using phone/PSTN) ===== +ACS_CONNECTION_STRING=endpoint=https://.communication.azure.com/;accesskey= +ACS_SOURCE_PHONE_NUMBER=+1XXXXXXXXXX +ACS_ENDPOINT=https://.communication.azure.com + +# ===== Optional Data Stores ===== +REDIS_HOST= +REDIS_PORT=6380 +REDIS_PASSWORD= +AZURE_COSMOS_CONNECTION_STRING= +AZURE_COSMOS_DATABASE_NAME=audioagentdb +AZURE_COSMOS_COLLECTION_NAME=audioagentcollection + +# ===== Runtime ===== +ENVIRONMENT=dev +ACS_STREAMING_MODE=media + +# ===== Filled after dev tunnel starts ===== +BASE_URL=https:// +``` + +Ensure `.env` is in `.gitignore`. + +--- + +## 6. Start Dev Tunnel + +Required if you want ACS callbacks (phone flow) or remote test: + +```bash +devtunnel host -p 8010 --allow-anonymous +``` + +Copy the printed HTTPS URL and set `BASE_URL` in root `.env`. Update it again if the tunnel restarts (URL changes). + +The Dev Tunnel URL will look similar to: +```bash +https://abc123xy-8010.usw3.devtunnels.ms +``` + +!!! warning "Security Considerations for Operations Teams" + **Dev Tunnels create public endpoints** that expose your local development environment to the internet. Review the following security guidelines: + + - **[Azure Dev Tunnels Security](https://learn.microsoft.com/en-us/azure/developer/dev-tunnels/security)** - Comprehensive security guidance + - **Access Control**: Use `--allow-anonymous` only for development; consider authentication for sensitive environments + - **Network Policies**: Ensure dev tunnels comply with organizational network security policies + - **Monitoring**: Dev tunnels should be monitored and logged like any public endpoint + - **Temporary Usage**: Tunnels are for development only; use proper Azure services for production + - **Credential Protection**: Never expose production credentials through dev tunnels + + **InfoSec Recommendation**: Review tunnel usage with your security team before use in corporate environments. + +--- + +## 7. Run Backend + +```bash +cd apps/rtagent/backend +uvicorn apps.rtagent.backend.main:app --host 0.0.0.0 --port 8010 --reload +``` + +--- + +## 8. Frontend Environment + +Create or edit `apps/rtagent/frontend/.env`: + +!!! tip "Sample Configuration" + Use [`apps/rtagent/frontend/.env.sample`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/apps/rtagent/frontend/.env.sample) as a starting template. + +Use the dev tunnel URL by default so the frontend (and any external device or ACS-related flows) reaches your backend consistently—even if you open the UI on another machine or need secure HTTPS. + +``` +# Recommended (works across devices / matches ACS callbacks) +VITE_BACKEND_BASE_URL=https:// +``` + +If the tunnel restarts (URL changes), update both `BASE_URL` in the root `.env` and this value. + +--- + +## 9. Run Frontend + +```bash +cd apps/rtagent/frontend +npm install +npm run dev +``` + +Open: http://localhost:5173 + +WebSocket URL is auto-derived by replacing `http/https` with `ws/wss`. + +--- + +## 10. Alternative: VS Code Debugging + +**Built-in debugger configurations** are available in [`.vscode/launch.json`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/.vscode/launch.json): + +### Backend Debugging +1. **Set breakpoints** in Python code +2. **Press F5** or go to Run & Debug view +3. **Select "[RT Agent] Python Debugger: FastAPI"** +4. **Debug session starts** with hot reload enabled + +### Frontend Debugging +1. **Start the React dev server** (`npm run dev`) +2. **Press F5** or go to Run & Debug view +3. **Select "[RT Agent] React App: Browser Debug"** +4. **Browser opens** with debugger attached + +**Benefits:** +- Set breakpoints in both Python and TypeScript/React code +- Step through code execution +- Inspect variables and call stacks +- Hot reload for both frontend and backend + +--- + +## 11. Alternative: Docker Compose + +**For containerized local development**, use the provided [`docker-compose.yml`](https://github.com/Azure-Samples/art-voice-agent-accelerator/blob/main/docker-compose.yml): + +```bash +# Ensure .env files are configured (see sections 5 & 8 above) + +# Build and run both frontend and backend containers +docker-compose up --build + +# Or run in detached mode +docker-compose up --build -d + +# View logs +docker-compose logs -f + +# Stop containers +docker-compose down +``` + +**Container Ports:** + +- **Frontend**: http://localhost:8080 (containerized) +- **Backend**: http://localhost:8010 (same as manual setup) + +**When to use Docker Compose:** + +- Consistent environment across team members +- Testing containerized deployment locally +- Isolating dependencies from host system +- Matching production container behavior + +!!! note "Dev Tunnel with Docker" + You still need to run `devtunnel host -p 8010 --allow-anonymous` for ACS callbacks, as the containers need external access for webhook endpoints. + +--- + +## 12. Optional: Phone (PSTN) Flow + +1. Purchase ACS phone number (Portal or CLI). + +2. Ensure these vars are set in your root `.env` (with real values): + + ``` + ACS_CONNECTION_STRING=endpoint=... + ACS_SOURCE_PHONE_NUMBER=+1XXXXXXXXXX + ACS_ENDPOINT=https://.communication.azure.com + BASE_URL=https://-8010.usw3.devtunnels.ms + ``` + +3. Create a single Event Grid subscription for the Incoming Call event pointing to your answer handler: + - Inbound endpoint: + `https://-8010.usw3.devtunnels.ms/api/v1/calls/answer` + - Event type: `Microsoft.Communication.IncomingCall` + - (Callbacks endpoint `/api/v1/calls/callbacks` is optional unless you need detailed lifecycle events.) + + If tunnel URL changes, update the subscription (delete & recreate or update endpoint). + + Reference: [Subscribing to events](https://learn.microsoft.com/en-us/azure/communication-services/quickstarts/events/subscribe-to-event) + +4. Dial the number; observe: + - Call connection established + - Media session events + - STT transcripts + - TTS audio frames + +--- + +## 13. Quick Browser Test + +1. Backend + frontend running. +2. Open app, allow microphone. +3. Speak → expect: + - Interim/final transcripts + - Model response + - Audio playback + +--- + +## 14. Troubleshooting + +| Symptom | Likely Cause | Fix | +|---------|--------------|-----| +| 404 on callbacks | Stale `BASE_URL` | Restart tunnel, update `.env` | +| No audio | Speech key/region invalid | Verify Azure Speech resource | +| WS closes fast | Wrong `VITE_BACKEND_BASE_URL` | Use exact backend/tunnel URL | +| Slow first reply | Cold pool warm-up | Keep process running | +| Phone call no events | ACS callback not updated to tunnel | Reconfigure Event Grid subscription | +| Import errors | Missing dependencies | Re-run `pip install -r requirements.txt` | + +--- + +## 15. Testing Your Setup + +### Quick Unit Tests +Validate your local setup with the comprehensive test suite: + +```bash +# Run core component tests +python -m pytest tests/test_acs_media_lifecycle.py -v + +# Test event handling and WebSocket integration +python -m pytest tests/test_acs_events_handlers.py -v + +# Validate DTMF processing (if using phone features) +python -m pytest tests/test_dtmf_validation.py -v +``` + +### Load Testing (Advanced) +Test WebSocket performance with realistic conversation scenarios: + +```bash +# Generate realistic audio for testing +make generate_audio + +# Run WebSocket load test locally +locust -f tests/load/locustfile.py --web-host 127.0.0.1 --web-port 8089 +``` + +**What the load tests validate:** +- ✅ **Real-time audio streaming** - 20ms PCM chunks via WebSocket +- ✅ **Multi-turn conversations** - Insurance inquiries and quick questions +- ✅ **Response timing** - TTFB (Time-to-First-Byte) measurement +- ✅ **Barge-in handling** - Response interruption simulation +- ✅ **Connection stability** - Automatic WebSocket reconnection + +!!! info "Additional Resources" + For more comprehensive guidance on development and operations: + + - **[Troubleshooting Guide](../operations/troubleshooting.md)** - Detailed problem resolution for common issues + - **[Testing Guide](../operations/testing.md)** - Comprehensive unit and integration testing (85%+ coverage) + - **[Load Testing](../operations/load-testing.md)** - WebSocket performance testing and Azure Load Testing integration + - **[Repository Structure](../guides/repository-structure.md)** - Understand the codebase layout + - **[Utilities & Services](../guides/utilities.md)** - Core infrastructure components + +--- + +Keep secrets out of commits. Rotate anything that has leaked. \ No newline at end of file diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md deleted file mode 100644 index d76d4954..00000000 --- a/docs/getting-started/quickstart.md +++ /dev/null @@ -1,157 +0,0 @@ -# Quick Start Guide - -Get up and running with the Real-Time Voice Agent in just a few minutes. - -## Prerequisites - -- Python 3.11+ -- Azure Subscription with Cognitive Services -- Azure Speech Services resource - -## Installation - -1. **Clone the repository:** -```bash -git clone https://github.com/pablosalvador10/gbb-ai-audio-agent.git -cd gbb-ai-audio-agent -``` - -2. **Install dependencies:** -```bash -pip install -r requirements.txt -``` - -3. **Configure environment variables:** -```bash -# Copy environment template -cp .env.example .env - -# Edit .env with your Azure credentials -AZURE_SPEECH_KEY=your-speech-key -AZURE_SPEECH_REGION=eastus -``` - -## Basic Text-to-Speech - -```python -from src.speech.text_to_speech import SpeechSynthesizer - -# Initialize synthesizer -synthesizer = SpeechSynthesizer( - key="your-speech-key", - region="eastus", - voice="en-US-JennyMultilingualNeural" -) - -# Synthesize speech to memory -audio_data = synthesizer.synthesize_speech( - "Hello! Welcome to our voice application.", - style="chat", - rate="+10%" -) - -# Save to file -with open("output.wav", "wb") as f: - f.write(audio_data) - -print(f"Generated {len(audio_data)} bytes of audio") -``` - -## Real-time Streaming - -```python -# Generate base64-encoded frames for streaming -frames = synthesizer.synthesize_to_base64_frames( - "This is real-time streaming audio", - sample_rate=16000 -) - -print(f"Generated {len(frames)} audio frames") -for i, frame in enumerate(frames[:3]): # Show first 3 frames - print(f"Frame {i}: {frame[:50]}...") -``` - -## Local Speaker Playback - -```python -# Play audio through system speakers (if available) -synthesizer = SpeechSynthesizer( - key="your-key", - region="eastus", - playback="auto" # Automatic hardware detection -) - -# Speak text directly -synthesizer.start_speaking_text( - "This will play through your speakers!", - voice="en-US-AriaNeural", - style="excited" -) - -# Stop if needed -import time -time.sleep(3) -synthesizer.stop_speaking() -``` - -## Production Configuration - -```python -import os -from src.speech.text_to_speech import SpeechSynthesizer - -# Production setup with managed identity -synthesizer = SpeechSynthesizer( - region=os.getenv("AZURE_SPEECH_REGION"), - voice="en-US-JennyMultilingualNeural", - playback="never", # Headless deployment - enable_tracing=True, # OpenTelemetry monitoring - call_connection_id="session-abc123" # Correlation tracking -) - -# Validate configuration -if synthesizer.validate_configuration(): - print("✅ Speech synthesizer ready for production") - - # Synthesize with advanced options - audio = synthesizer.synthesize_speech( - "Production-ready voice synthesis", - voice="en-US-AriaNeural", - style="news", - rate="+5%" - ) -else: - print("❌ Configuration validation failed") -``` - -## Next Steps - -- **[Configuration Guide](configuration.md)** - Detailed setup options -- **[API Reference](../api/overview.md)** - Complete API documentation -- **[Architecture](../architecture/overview.md)** - System design and components -- **[Examples](../examples/basic-usage.md)** - More usage examples - -## Common Issues - -### Authentication Errors -```bash -# Verify your credentials -az account show -az cognitiveservices account list -``` - -### Audio Hardware Issues -```python -# Check headless environment detection -from src.speech.text_to_speech import _is_headless -print(f"Headless environment: {_is_headless()}") -``` - -### Import Errors -```bash -# Ensure all dependencies installed -pip install -r requirements.txt - -# Check Python path -python -c "import src.speech.text_to_speech; print('✅ Import successful')" -``` diff --git a/docs/repo-structure.md b/docs/guides/repository-structure.md similarity index 99% rename from docs/repo-structure.md rename to docs/guides/repository-structure.md index 927890e5..6ab754a1 100644 --- a/docs/repo-structure.md +++ b/docs/guides/repository-structure.md @@ -18,7 +18,7 @@ The repository follows a modular, microservice-oriented structure with clear sep ## Complete Repository Map (5 Levels Deep) ``` -📁 gbb-ai-audio-agent/ +📁 art-voice-agent-accelerator/ ├── 📄 azure.yaml # Azure Developer CLI configuration ├── 📄 CHANGELOG.md # Release notes and version history ├── 📄 CONTRIBUTING.md # Contribution guidelines @@ -291,7 +291,7 @@ The repository follows a modular, microservice-oriented structure with clear sep │ └── 📄 terraform.tfvars │ ├── 📁 docs/ # Documentation -│ ├── 📄 README.md # Documentation index +│ ├── 📄 docs-overview.md # Documentation index │ ├── 📄 Architecture.md # System architecture │ ├── 📄 AuthForHTTPandWSS.md # Authentication guide │ ├── 📄 CICDGuide.md # CI/CD setup @@ -308,7 +308,7 @@ The repository follows a modular, microservice-oriented structure with clear sep │ ├── 📄 repo-structure.md # This document │ ├── 📁 api/ # API documentation │ │ ├── 📄 overview.md # API overview -│ │ ├── 📄 speech-synthesis.md # Speech API docs +│ │ ├── 📄 architecture.md # Speech API docs │ │ └── 📁 endpoints/ # Endpoint documentation │ │ ├── 📄 calls.md # Call endpoints │ │ └── 📄 speech.md # Speech endpoints diff --git a/docs/guides/utilities.md b/docs/guides/utilities.md new file mode 100644 index 00000000..1008930d --- /dev/null +++ b/docs/guides/utilities.md @@ -0,0 +1,329 @@ +# Utilities and Infrastructure Services + +Supporting utilities and infrastructure services provide the foundation for the Real-Time Voice Agent's scalability, resilience, and configurability. These modules are shared across all API endpoints and handlers. + +## Handler Selection and Routing + +The API uses a **factory pattern** to select appropriate handlers based on configuration and endpoint: + +### Handler Factory (`/api/v1/endpoints/media.py`) + +```python +async def _create_media_handler(websocket, call_connection_id, session_id, orchestrator): + """Factory function creates handler based on ACS_STREAMING_MODE""" + + if ACS_STREAMING_MODE == StreamMode.MEDIA: + # Three-thread architecture for traditional STT → LLM → TTS + return ACSMediaHandler( + websocket=websocket, + orchestrator_func=orchestrator, + call_connection_id=call_connection_id, + recognizer=await stt_pool.acquire(), + memory_manager=memory_manager, + session_id=session_id, + ) + + elif ACS_STREAMING_MODE == StreamMode.VOICE_LIVE: + # Azure Voice Live API integration + return VoiceLiveHandler( + azure_endpoint=AZURE_VOICE_LIVE_ENDPOINT, + model_name=AZURE_VOICE_LIVE_MODEL, + session_id=session_id, + websocket=websocket, + orchestrator=orchestrator, + lva_agent=injected_agent, + ) +``` + +### Configuration-Driven Routing + +```python +# Environment configuration determines handler selection +ACS_STREAMING_MODE = StreamMode.MEDIA # Default: three-thread architecture +ACS_STREAMING_MODE = StreamMode.VOICE_LIVE # Azure Voice Live integration +ACS_STREAMING_MODE = StreamMode.TRANSCRIPTION # Lightweight transcription only + +# Handlers automatically selected at runtime based on configuration +# No code changes required to switch between modes +``` + +## Resource Pool Management + +### Speech-to-Text Pool (`src.pools.stt_pool`) + +```python +from src.pools.stt_pool import STTResourcePool + +# Managed pool of speech recognizers +stt_pool = STTResourcePool( + pool_size=4, # Concurrent recognizers + region="eastus", + enable_diarization=True +) + +# Automatic resource lifecycle in handlers +recognizer = await stt_pool.acquire() # Get from pool +# ... use recognizer ... +await stt_pool.release(recognizer) # Return to pool +``` + +### Text-to-Speech Pool (`src.pools.tts_pool`) + +```python +from src.pools.tts_pool import TTSResourcePool + +# Shared TTS synthesizers across connections +tts_pool = TTSResourcePool( + pool_size=4, # Concurrent synthesizers + region="eastus", + voice_name="en-US-JennyMultilingualV2Neural" +) + +# Pool-based resource management +synthesizer = await tts_pool.acquire() +await synthesizer.speak_text_async("Hello world") +await tts_pool.release(synthesizer) +``` + +### Azure OpenAI Pool (`src.pools.aoai_pool`) + +```python +from src.pools.aoai_pool import AOAIResourcePool + +# Managed OpenAI client connections +aoai_pool = AOAIResourcePool( + pool_size=8, # Higher concurrency for AI processing + endpoint=AZURE_OPENAI_ENDPOINT, + model="gpt-4o", + max_tokens=150 +) + +# Used by orchestrator for conversation processing +client = await aoai_pool.acquire() +response = await client.chat_completions_create(messages=conversation_history) +await aoai_pool.release(client) +``` + +## Connection Management (`src.pools.connection_manager`) + +Centralized WebSocket connection tracking and lifecycle management: + +```python +from src.pools.connection_manager import ConnectionManager + +# Single connection manager instance per application +conn_manager = ConnectionManager() + +# Register connections with metadata and topic subscriptions +conn_id = await conn_manager.register( + websocket=websocket, + client_type="media", # or "dashboard", "conversation" + call_id=call_connection_id, + session_id=session_id, + topics={"media", "session"} +) + +# Topic-based broadcasting +await conn_manager.broadcast_topic("media", { + "type": "audio_status", + "status": "playing" +}) + +# Session-isolated broadcasting +await conn_manager.broadcast_session(session_id, { + "type": "transcript", + "text": "User spoke something" +}) + +# Automatic cleanup on disconnect +await conn_manager.unregister(conn_id) +``` + +## State Management and Persistence + +### Memory Manager (`src.stateful.state_managment.MemoManager`) + +Conversation state and session persistence: + +```python +from src.stateful.state_managment import MemoManager + +# Load existing conversation or create new session +memory_manager = MemoManager.from_redis(session_id, redis_mgr) + +# Conversation history management +memory_manager.append_to_history("user", "Hello") +memory_manager.append_to_history("assistant", "Hi there!") + +# Context storage and retrieval +memory_manager.set_context("target_number", "+1234567890") +phone_number = memory_manager.get_context("target_number") + +# Persistent storage to Redis +await memory_manager.persist_to_redis_async(redis_mgr) +``` + +### Redis Session Management (`src.redis.manager`) + +```python +from src.redis.manager import AzureRedisManager + +# Azure-native Redis integration with Entra ID +redis_mgr = AzureRedisManager( + host="your-redis.redis.cache.windows.net", + credential=DefaultAzureCredential() +) + +# Session data storage with TTL +await redis_mgr.set_value_async(f"session:{session_id}", session_data, expire=3600) + +# Call connection mapping for UI coordination +await redis_mgr.set_value_async( + f"call_session_map:{call_connection_id}", + browser_session_id +) +``` + +## Voice Configuration and Neural Voices + +### Voice Configuration (`config.voice_config`) + +```python +from config.voice_config import VoiceConfiguration + +# Centralized voice metadata and selection +voice_config = VoiceConfiguration.from_env() + +# Get optimized voice for use case +support_voice = voice_config.get_voice_alias("support_contact_center") +print(f"Voice: {support_voice.neural_voice}") +print(f"Style: {support_voice.style}") # cheerful, empathetic, etc. + +# Multi-language voice selection +spanish_voice = voice_config.get_voice_for_language("es-ES") +``` + +## Authentication and Security + +### Azure Entra ID Integration (`src.auth`) + +```python +from azure.identity import DefaultAzureCredential + +# Keyless authentication for all Azure services +credential = DefaultAzureCredential() + +# Automatic token refresh and service principal authentication +# Used by STT/TTS pools, Redis manager, and ACS clients +``` + +### WebSocket Authentication (`apps.rtagent.backend.src.utils.auth`) + +```python +from apps.rtagent.backend.src.utils.auth import validate_acs_ws_auth + +# Optional WebSocket authentication for secure environments +try: + await validate_acs_ws_auth(websocket, required_scope="media.stream") + # Proceed with authenticated connection +except AuthError as e: + await websocket.close(code=4001, reason="Authentication required") +``` + +## Observability and Monitoring + +### OpenTelemetry Integration (`utils.telemetry_config`) + +```python +from utils.telemetry_config import configure_tracing + +# Comprehensive distributed tracing +configure_tracing( + service_name="voice-agent-api", + service_version="v1.0.0", + otlp_endpoint=OTEL_EXPORTER_OTLP_ENDPOINT +) + +# Automatic span creation for: +# - WebSocket connections and lifecycle +# - Speech recognition sessions +# - TTS synthesis operations +# - Azure service calls +# - Orchestrator processing +``` + +### Structured Logging (`utils.ml_logging`) + +```python +from utils.ml_logging import get_logger + +logger = get_logger("api.v1.media") + +# Consistent JSON logging with correlation IDs +logger.info( + "Media session started", + extra={ + "session_id": session_id, + "call_connection_id": call_connection_id, + "streaming_mode": str(ACS_STREAMING_MODE) + } +) +``` + +### Performance Monitoring (`src.tools.latency_tool`) + +```python +from src.tools.latency_tool import LatencyTool + +# Track conversation timing metrics +latency_tool = LatencyTool(memory_manager) + +# Measure time to first byte for greeting +latency_tool.start("greeting_ttfb") +await send_greeting_audio() +latency_tool.stop("greeting_ttfb") + +# Automatic span attributes for performance analysis +``` + +## Development and Testing Utilities + +### Load Testing Framework (`tests/load/`) + +```python +from tests.load.utils.load_test_conversations import ConversationSimulator + +# Simulate high-load scenarios +simulator = ConversationSimulator( + base_url="wss://api.domain.com", + concurrent_sessions=50, + conversation_length=10 +) + +await simulator.run_load_test() +``` + +### ACS Event Simulation (`tests/conftest.py`) + +```python +# Test fixtures for ACS webhook simulation +@pytest.fixture +def acs_call_connected_event(): + return { + "eventType": "Microsoft.Communication.CallConnected", + "data": { + "callConnectionId": "test-call-123", + "correlationId": "test-correlation-456" + } + } + +# Integration testing with mock ACS events +async def test_call_lifecycle(acs_call_connected_event): + response = await client.post("/api/v1/calls/callbacks", + json=[acs_call_connected_event]) + assert response.status_code == 200 +``` + +## Integration Patterns + +See **[Streaming Modes](streaming-modes.md)** for detailed configuration options, **[Speech Recognition](speech-recognition.md)** for STT integration patterns, and **[Speech Synthesis](speech-synthesis.md)** for TTS implementation details. diff --git a/docs/index.md b/docs/index.md index 6f60cb18..d2d13c55 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,69 +1,184 @@ -# Real-Time Voice Agent - -A production-ready Azure-powered voice agent with advanced text-to-speech and speech recognition capabilities. - -## 🚀 Features - -- **Real-time speech synthesis** with Azure Cognitive Services -- **Streaming speech recognition** with advanced language detection -- **Multi-language support** with automatic optimization -- **Neural voice synthesis** with customizable styles and prosody -- **OpenTelemetry observability** with distributed tracing -- **Production-ready** with comprehensive error handling and monitoring - -## 🏗️ Architecture - -The voice agent is built with a modular architecture optimized for low-latency real-time applications: - -- **FastAPI backend** for high-performance async operations -- **Azure Communication Services** for call automation and media streaming -- **Azure Speech Services** for TTS/STT with neural voice models -- **Azure OpenAI** for intelligent conversation handling -- **OpenTelemetry** for comprehensive observability and monitoring - -## 🎯 Key Components - -### SpeechSynthesizer - -The core text-to-speech engine providing: - -- Multiple synthesis modes (speaker playback, memory synthesis, frame-based streaming) -- Flexible authentication (API key, managed identity, credential chains) -- Intelligent environment detection for headless deployments -- Advanced SSML support with style and prosody control -- Real-time frame generation for streaming applications - -### StreamingSpeechRecognizer - -Advanced speech-to-text engine featuring: - -- Real-time streaming recognition with minimal latency -- Language detection and speaker diarization -- Neural audio processing for improved accuracy -- Comprehensive callback system for real-time processing -- Session management with proper resource cleanup - -## 📊 Observability - -Built-in observability features include: - -- **Distributed tracing** with OpenTelemetry and Azure Monitor -- **Structured logging** with correlation IDs for request tracking -- **Performance metrics** for latency and error rate monitoring -- **Service dependency mapping** for application insights -- **Real-time monitoring** dashboards and alerting - -## 🔧 Configuration - -The system supports flexible configuration through: - -- Environment variables for credentials and settings -- Runtime configuration for voice parameters and behavior -- Deployment-specific settings for different environments -- Automatic fallback mechanisms for robust operation - -## 🌟 Getting Started - -Ready to build your voice application? Check out our [Quick Start Guide](getting-started/quickstart.md) to get up and running in minutes. - -For detailed API documentation, explore our [API Reference](api/overview.md) section. +# Real-Time Voice Agent Documentation Hub + +!!! tip "Welcome to the Complete Guide" + This documentation covers everything you need to deploy, operate, and extend the Azure-based Real-Time Voice Agent with **Python 3.11 + FastAPI** and enterprise-grade Azure integrations. + +## :material-rocket: Quick Start + +=== "🚀 New Users" + Start here for basic setup and deployment: + + 1. **[Getting Started Guide](getting-started/README.md)** - Installation & basic usage + 2. **[Local Development](getting-started/local-development.md)** - Development workflow + 3. **[Deployment Guide](deployment/README.md)** - Azure deployment with azd + +=== "🏗️ Architects" + Understand the system design: + + 1. **[Architecture Overview](architecture/README.md)** - System architecture + 2. **[Data Flows](architecture/data-flows.md)** - Redis & Cosmos DB architecture + 3. **[ACS Integration](architecture/acs-flows.md)** - Three-thread voice processing + +=== "🔧 Operators" + Deploy and monitor in production: + + 1. **[Production Deployment](deployment/production.md)** - Production checklist + 2. **[Monitoring Guide](operations/monitoring.md)** - Application Insights setup + 3. **[Troubleshooting](operations/troubleshooting.md)** - Common issues & solutions + +## :material-map: Navigation Guide + +| Guide | Description | +|-------|-------------| +| [Quick Start Guide](getting-started/README.md) | Complete setup and basic usage examples | +| [Local Development](getting-started/local-development.md) | Local development setup and testing | +| [Configuration Guide](getting-started/configuration.md) | Advanced configuration options | +| [Deployment Guide](deployment/README.md) | Complete Azure deployment with Terraform/azd | +| [Architecture Overview](architecture/README.md) | System architecture and design decisions | +| [Troubleshooting](operations/troubleshooting.md) | Common issues and solutions | + +### :material-book-open: By Topic + +!!! info "Microsoft Learn Integration" + Documentation includes comprehensive [Microsoft Learn](https://learn.microsoft.com) references with validated links to official Azure documentation, samples, and best practices. + +=== "🏗️ Architecture & Design" + **Core System Design** + + - **[Architecture Overview](architecture/README.md)** - Enterprise Azure infrastructure & logical design + - **[ACS Flows](architecture/acs-flows.md)** - Three-thread voice processing architecture + - **[Data Flows](architecture/data-flows.md)** - Redis & Cosmos DB three-tier storage + - **[Cross-Cloud Integration](architecture/integrations.md)** - Azure/AWS integration patterns + - **[LLM Orchestration](architecture/llm-orchestration.md)** - AI model routing & conversation flows + +=== "🚀 Deployment & Operations" + **Production Deployment** + + - **[Deployment Guide](deployment/README.md)** - Complete Azure deployment with `azd` + - **[Production Checklist](deployment/production.md)** - Security, scaling & monitoring + - **[CI/CD Pipeline](deployment/cicd.md)** - Automated deployment workflows + - **[Monitoring & Observability](operations/monitoring.md)** - Application Insights integration + - **[Troubleshooting](operations/troubleshooting.md)** - Diagnostic guides & solutions + - **[Load Testing](operations/load-testing.md)** - Performance validation strategies + - **[Testing Framework](operations/testing.md)** - Comprehensive testing approach + +=== "🔧 Development & API" + **Development Resources** + + - **[Getting Started](getting-started/README.md)** - Quick setup & basic usage + - **[Local Development](getting-started/local-development.md)** - Development environment + - **[Configuration Guide](getting-started/configuration.md)** - Environment & service setup + - **[API Reference](api/README.md)** - Complete REST & WebSocket API documentation + - **[Interactive API Docs](api/api-reference.md)** - OpenAPI specification with testing + +=== "📚 Reference & Utilities" + **Supporting Documentation** + + - **[Speech Synthesis](reference/speech-synthesis.md)** - Azure Speech TTS integration + - **[Speech Recognition](reference/speech-recognition.md)** - Azure Speech STT capabilities + - **[Streaming Modes](reference/streaming-modes.md)** - Audio processing pipelines + - **[Utilities & Tools](reference/utilities.md)** - Helper services & infrastructure + - **[Repository Structure](reference/repository-structure.md)** - Codebase organization + - **[Authentication Guide](security/authentication.md)** - Security & session management + +=== "🏥 Industry Solutions" + **Domain-Specific Guides** + + - **[Healthcare Solutions](industry/healthcare.md)** - HIPAA-compliant voice applications + - **[Samples & Examples](samples/README.md)** - Implementation examples & tutorials + +## Diagram Highlights + +- Production reference: [Architecture Overview – Production Deployment](architecture/README.md#production-deployment-architecture) (image: `assets/RTAudio.v0.png`) +- Data lifecycle: [Data Flows – Call Lifecycle](architecture/data-flows.md#complete-call-lifecycle-flow) with interactive Mermaid sequence diagrams +- Contact center routing: [ACS Flows](architecture/acs-flows.md) featuring step-by-step diagrams and Mermaid flows +- Authentication flows: [Authentication Guide](security/authentication.md#authentication-flow-diagram) detailing OAuth and shared access tokens + +## :material-sitemap: Architecture Overview + +!!! abstract "Enterprise-Grade Voice AI Platform" + Built on **Azure Communication Services**, **Azure OpenAI**, and **Azure Speech Services** with FastAPI backend architecture. + +```mermaid +graph TB + subgraph "📞 Communication Layer" + Phone[📱 Phone/PSTN] + Browser[🌐 Web Browser] + Teams[👥 MS Teams] + end + + subgraph "⚡ Azure Services" + ACS[🔗 Azure Communication Services
Call Automation & Media Streaming] + Speech[🗣️ Azure Speech Services
STT/TTS + Real-time Processing] + OpenAI[🧠 Azure OpenAI
GPT-4o + Realtime API] + Redis[⚡ Azure Cache for Redis
Session State & Coordination] + Cosmos[🗄️ Azure Cosmos DB
Conversation History] + end + + subgraph "🏗️ Application Platform" + Apps[📦 Azure Container Apps
FastAPI Backend + React Frontend] + Monitor[📊 Azure Monitor
Application Insights & Tracing] + end + + Phone --> ACS + Browser --> ACS + Teams --> ACS + ACS <--> Speech + ACS <--> Apps + Speech <--> Apps + Apps <--> OpenAI + Apps <--> Redis + Apps <--> Cosmos + Apps --> Monitor + + classDef communication fill:#e1f5fe,stroke:#01579b,stroke-width:2px,color:#000 + classDef azure fill:#fff3e0,stroke:#e65100,stroke-width:2px,color:#000 + classDef platform fill:#f3e5f5,stroke:#4a148c,stroke-width:2px,color:#000 + + class Phone,Browser,Teams communication + class ACS,Speech,OpenAI,Redis,Cosmos azure + class Apps,Monitor platform + +``` + +!!! tip "Microsoft Learn Resources" + + - [Azure Communication Services Architecture](https://learn.microsoft.com/en-us/azure/communication-services/concepts/call-automation/audio-streaming-concept) - Real-time media streaming concepts + - [Azure Developer CLI Templates](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/get-started) - Deployment automation with `azd up` + - [Azure Container Apps](https://learn.microsoft.com/en-us/azure/container-apps/overview) - Serverless container platform + +## :material-compass: Next Steps + +!!! success "Choose Your Learning Path" + Select the path that matches your role and experience level: + +=== "🆕 First Time Setup" + **New to the project? Start here:** + + 1. **[Getting Started Guide](getting-started/README.md)** - Complete setup walkthrough + 2. **[Architecture Overview](architecture/README.md)** - Understand the system design + 3. **[Deployment Guide](deployment/README.md)** - Deploy with `azd up` in 15 minutes + 4. **[API Reference](api/README.md)** - Explore the REST and WebSocket APIs + +=== "🏗️ Platform Engineers" + **Infrastructure and operations focus:** + + 1. **[Production Deployment](deployment/production.md)** - Enterprise deployment checklist + 2. **[Monitoring Setup](operations/monitoring.md)** - Application Insights configuration + 3. **[Security Guide](security/authentication.md)** - Authentication & session management + 4. **[Troubleshooting](operations/troubleshooting.md)** - Diagnostic playbooks + +=== "👨‍💻 Solution Developers" + **Integration and customization:** + + 1. **[Local Development](getting-started/local-development.md)** - Dev environment setup + 2. **[Cross-Cloud Integration](architecture/integrations.md)** - Azure/AWS patterns + 3. **[Healthcare Solutions](industry/healthcare.md)** - Domain-specific implementations + 4. **[Speech Services](reference/speech-synthesis.md)** - Advanced voice capabilities + +!!! info "Microsoft Learn Learning Paths" + Complement this documentation with official Microsoft learning resources: + + - **[Azure Communication Services Learning Path](https://learn.microsoft.com/en-us/training/paths/azure-communication-services/)** - Comprehensive ACS training + - **[Azure Developer CLI Fundamentals](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/overview)** - Master `azd` deployment workflows + - **[Azure Container Apps](https://learn.microsoft.com/en-us/training/paths/deploy-applications-azure-container-apps/)** - Container orchestration on Azure diff --git a/docs/HealthcareUsecases.md b/docs/industry/healthcare.md similarity index 100% rename from docs/HealthcareUsecases.md rename to docs/industry/healthcare.md diff --git a/docs/operations/load-testing.md b/docs/operations/load-testing.md new file mode 100644 index 00000000..0f88fd37 --- /dev/null +++ b/docs/operations/load-testing.md @@ -0,0 +1,535 @@ +# Load Testing + +Comprehensive WebSocket load testing framework for real-time voice agent using Locust with realistic conversation simulation and Azure Load Testing integration. + +> **Note**: For unit tests, integration tests, and code quality validation, see [Testing Framework](testing.md). + +## Overview + +The load testing framework validates WebSocket performance under realistic conversation scenarios using: +- **Locust-based testing**: WebSocket simulation with real audio streaming +- **Audio generation**: Production TTS-generated conversation audio +- **Azure integration**: Seamless deployment to Azure Load Testing service +- **Realistic scenarios**: Multi-turn conversation patterns + +## Audio Generation + +### Production TTS Integration + +The audio generator uses production [Azure Speech Services](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech) to create realistic conversation audio: + +```python +# Audio generator configuration +synthesizer = SpeechSynthesizer( + region=os.getenv("AZURE_SPEECH_REGION"), + key=os.getenv("AZURE_SPEECH_KEY"), + language="en-US", + voice="en-US-JennyMultilingualNeural", + playback="never", # Disable for load testing + enable_tracing=False # Performance optimization +) +``` + +### Quick Start: Generate Audio Files + +```bash +# Using Makefile (recommended) +make generate_audio + +# Direct command with options +python tests/load/utils/audio_generator.py \ + --max-turns 5 \ + --scenarios insurance_inquiry quick_question +``` + +**Generated Structure:** +``` +tests/load/audio_cache/ +├── insurance_inquiry_turn_1_of_5_abc123.pcm +├── insurance_inquiry_turn_2_of_5_def456.pcm +├── insurance_inquiry_turn_3_of_5_ghi789.pcm +├── quick_question_turn_1_of_3_pqr678.pcm +├── quick_question_turn_2_of_3_stu901.pcm +└── manifest.jsonl # Audio file metadata +``` + +### Conversation Scenarios + +#### Insurance Inquiry (5 turns) +1. "Hello, my name is Alice Brown, my social is 1234, and my zip code is 60601" +2. "I'm calling about my auto insurance policy" +3. "I need to understand what's covered under my current plan" +4. "What happens if I get into an accident?" +5. "Thank you for all the information, that's very helpful" + +#### Quick Question (3 turns) +1. "Hi there, I have a quick question" +2. "Can you help me check my account balance?" +3. "Thanks, that's all I needed to know" + +### Audio Requirements + +| Property | Value | Notes | +|----------|-------|-------| +| **Format** | 16-bit PCM | Compatible with WebSocket streaming | +| **Sample Rate** | 16 kHz | Optimized for voice recognition | +| **Channels** | Mono | Single channel for conversation | +| **Encoding** | Base64 | WebSocket transmission format | + +## Locust Load Testing Framework + +### Core Features + +The [Locust framework](https://docs.locust.io/en/stable/) provides WebSocket load testing with: + +- **Real-time audio streaming**: 20ms PCM chunks via WebSocket +- **TTFB measurement**: Time-to-first-byte response tracking +- **Barge-in testing**: Response interruption simulation +- **Connection management**: Automatic WebSocket reconnection +- **Configurable scenarios**: Multi-turn conversation patterns + +### WebSocket Testing Implementation + +The actual Locust implementation simulates realistic WebSocket voice conversation patterns: + +```python +# From locustfile.py - actual implementation +WS_URL = os.getenv("WS_URL", "ws://127.0.0.1:8010/api/v1/media/stream") +PCM_DIR = os.getenv("PCM_DIR", "tests/load/audio_cache") +TURNS_PER_USER = int(os.getenv("TURNS_PER_USER", "3")) +CHUNKS_PER_TURN = int(os.getenv("CHUNKS_PER_TURN", "100")) # ~2s @20ms +CHUNK_MS = int(os.getenv("CHUNK_MS", "20")) # 20 ms chunks +FIRST_BYTE_TIMEOUT_SEC = float(os.getenv("FIRST_BYTE_TIMEOUT_SEC", "5.0")) +BARGE_QUIET_MS = int(os.getenv("BARGE_QUIET_MS", "400")) +``` + +### Audio Streaming Process + +Based on the actual `locustfile.py` implementation: + +1. **Connection Setup**: WebSocket connection with ACS correlation headers +2. **Audio Metadata**: Initial format specification (16kHz PCM mono) +3. **Chunk Streaming**: 20ms audio frames from PCM files at regular intervals +4. **Silence Frames**: End-of-speech detection with generated low-level noise +5. **Response Measurement**: TTFB and barge-in timing using `_measure_ttfb()` +6. **Turn Rotation**: Cycles through available PCM files for realistic conversation flow + +### Configuration Options + +```bash +# Environment variables for locustfile.py +export WS_URL="ws://localhost:8010/api/v1/media/stream" +export PCM_DIR="tests/load/audio_cache" +export TURNS_PER_USER=3 +export CHUNKS_PER_TURN=100 +export CHUNK_MS=20 +export FIRST_BYTE_TIMEOUT_SEC=5.0 +export BARGE_QUIET_MS=400 +export WS_IGNORE_CLOSE_EXCEPTIONS=true +``` + +### Performance Metrics Tracked + +#### Real-time Metrics +- **TTFB (Time-to-First-Byte)**: Server response latency after audio completion +- **Barge-in latency**: Response interruption timing measured with `_wait_for_end_of_response()` +- **WebSocket stability**: Connection durability under load with reconnection handling +- **Audio streaming**: Chunk transmission success rates with error handling +- **Turn completion**: End-to-end conversation success + +#### Test Implementation Example + +```python +class ACSUser(User): + def _measure_ttfb(self, max_wait_sec: float) -> tuple[bool, float]: + """Time-To-First-Byte after EOS: measure server response time""" + start = time.time() + deadline = start + max_wait_sec + while time.time() < deadline: + msg = self._recv_with_timeout(0.05) + if msg: + return True, (time.time() - start) * 1000.0 + return False, (time.time() - start) * 1000.0 +``` + +## Running Load Tests + +### Local Testing + +```bash +# Basic local test +make run_load_test + +# Custom configuration via Makefile +make run_load_test \ + URL=wss://your-backend.azurecontainerapps.io/api/v1/media/stream \ + CONVERSATIONS=50 \ + CONCURRENT=10 + +# Direct Locust command +locust -f tests/load/locustfile.py \ + --host=http://localhost:8010 \ + --users 20 \ + --spawn-rate 5 \ + --run-time 300s +``` + +### Makefile Integration + +#### Available Commands + +```bash +# Generate audio files for testing +make generate_audio + +# Run local load test with defaults +make run_load_test + +# Run with custom parameters +make run_load_test \ + URL=wss://prod-backend.azurecontainerapps.io/api/v1/media/stream \ + CONVERSATIONS=100 \ + CONCURRENT=20 +``` + +#### Makefile Implementation + +The actual implementation from the Makefile: + +```makefile +# Audio generation target +generate_audio: + python $(SCRIPTS_LOAD_DIR)/utils/audio_generator.py --max-turns 5 + +# Load testing target with configurable parameters +run_load_test: + @echo "Running load test (override with make run_load_test URL=wss://host)" + $(eval URL ?= wss://$(LOCAL_URL)/api/v1/media/stream) + $(eval TURNS ?= 5) + $(eval CONVERSATIONS ?= 20) + $(eval CONCURRENT ?= 20) + @locust -f $(SCRIPTS_LOAD_DIR)/locustfile.py \ + --headless \ + -u $(CONVERSATIONS) \ + -r $(CONCURRENT) \ + --run-time 10m \ + --host $(URL) \ + --stop-timeout 60 \ + --csv=locust_report \ + --only-summary +``` + +**Key Parameters:** +- `URL`: WebSocket endpoint to test (default: `wss://localhost:8010/api/v1/media/stream`) +- `CONVERSATIONS`: Number of concurrent users (default: 20) +- `CONCURRENT`: Spawn rate per second (default: 20) +- `TURNS`: Number of conversation turns (default: 5) + +**Output Files:** +- `locust_report_stats.csv`: Detailed performance statistics +- `locust_report_failures.csv`: Error analysis +- `locust_report_exceptions.csv`: Exception tracking + +## Azure Load Testing Integration + +### Overview + +[Azure Load Testing](https://learn.microsoft.com/en-us/azure/load-testing/overview-what-is-azure-load-testing) provides a fully managed load testing service that supports Locust-based testing for WebSocket applications. + +### Setup Steps + +#### 1. Create Azure Load Testing Resource + +```bash +# Create load testing resource +az load create \ + --name "voice-agent-loadtest" \ + --resource-group "rg-voice-agent" \ + --location "eastus" +``` + +#### 2. Prepare Test Files + +Upload the following files to Azure Load Testing: + +**Required Files:** +- `tests/load/locustfile.py` (rename to `locustfile.py`) +- All PCM files from `tests/load/audio_cache/*.pcm` + +**File Organization:** +``` +Azure Load Testing Upload: +├── locustfile.py # Main test script +├── insurance_inquiry_turn_1_of_5_abc123.pcm +├── insurance_inquiry_turn_2_of_5_def456.pcm +├── quick_question_turn_1_of_3_pqr678.pcm +└── manifest.jsonl # Audio file metadata +``` + +#### 3. Configure Environment Variables + +Set the following environment variables in Azure Load Testing: + +```bash +# Target configuration +WS_URL=wss://your-backend.azurecontainerapps.io/api/v1/media/stream +PCM_DIR=./ # Azure places files in working directory + +# Performance tuning +TURNS_PER_USER=3 +CHUNKS_PER_TURN=100 +CHUNK_MS=20 +FIRST_BYTE_TIMEOUT_SEC=5.0 +BARGE_QUIET_MS=400 +WS_IGNORE_CLOSE_EXCEPTIONS=true + +# Optional: Custom scenarios +RESPONSE_TOKENS=recognizer,greeting,response,transcript,result +END_TOKENS=final,end,completed,stopped,barge +``` + +#### 4. Configure Load Parameters + +Following [Azure Load Testing best practices](https://learn.microsoft.com/en-us/azure/load-testing/quickstart-create-and-run-load-test): + +| Parameter | Development | Staging | Production | +|-----------|-------------|---------|------------| +| **Virtual Users** | 5-10 | 50-100 | 200-500 | +| **Spawn Rate** | 1-2/sec | 5-10/sec | 20-50/sec | +| **Test Duration** | 5-10 min | 15-30 min | 30-60 min | +| **Engine Instances** | 1-2 | 3-5 | 5-10 | + +#### 5. Add Server Monitoring + +Integrate Azure resources for comprehensive monitoring: + +```bash +# Add monitored resources using azd-env-name tag +az load test server-metric create \ + --test-id "voice-agent-test" \ + --load-test-resource "voice-agent-loadtest" \ + --resource-group "rg-voice-agent" \ + --metric-id "app-service-cpu" \ + --resource-id "/subscriptions/{subscription}/resourceGroups/{rg}/providers/Microsoft.Web/sites/{app-name}" +``` + +## Performance Targets + +### Latency Benchmarks + +| Metric | Target | Acceptable | Notes | +|--------|--------|------------|-------| +| **TTFB P95** | <2000ms | <3000ms | Time to first server response | +| **Barge-in P95** | <500ms | <1000ms | Response interruption latency | +| **Connection Success** | >98% | >95% | WebSocket establishment rate | +| **Turn Success** | >95% | >90% | Successful conversation completion | + +### Capacity Targets + +| Environment | Concurrent Users | Duration | Success Rate | +|-------------|------------------|----------|--------------| +| **Development** | 10 users | 5 minutes | >95% | +| **Staging** | 100 users | 30 minutes | >95% | +| **Production** | 500+ users | 60 minutes | >98% | + +### Load Test Scenarios + +#### Development Testing +```bash +make run_load_test \ + URL=ws://localhost:8010/api/v1/media/stream \ + CONVERSATIONS=5 \ + CONCURRENT=2 +``` + +#### Staging Validation +```bash +make run_load_test \ + URL=wss://staging-backend.azurecontainerapps.io/api/v1/media/stream \ + CONVERSATIONS=50 \ + CONCURRENT=10 +``` + +#### Production Scale Testing +```bash +make run_load_test \ + URL=wss://prod-backend.azurecontainerapps.io/api/v1/media/stream \ + CONVERSATIONS=200 \ + CONCURRENT=50 +``` + +## Performance Analysis + +### Result Interpretation + +#### Locust Output Analysis +```bash +# View Locust results +cat locust_report_stats.csv | column -t -s, + +# Analyze specific metrics +grep "speech_turns" locust_report_stats.csv + +# Check error rates +cat locust_report_failures.csv +``` + +#### Azure Monitor Integration +```bash +# Monitor Azure resources during test +az monitor metrics list \ + --resource "/subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.Web/sites/{app}" \ + --metric "CpuPercentage,MemoryPercentage" \ + --start-time "2024-01-01T00:00:00Z" \ + --end-time "2024-01-01T01:00:00Z" +``` + +### Key Performance Indicators + +#### Response Time Metrics +- **TTFB percentiles**: P50, P95, P99 response times +- **Barge-in timing**: Response interruption effectiveness +- **End-to-end latency**: Complete conversation turn timing + +#### Throughput Metrics +- **Requests per second**: WebSocket message throughput +- **Concurrent connections**: Maximum sustainable WebSocket connections +- **Audio streaming rate**: PCM chunk transmission rates + +#### Error Metrics +- **Connection failures**: WebSocket establishment errors +- **Timeout rates**: TTFB and response timeouts +- **Audio streaming errors**: PCM transmission failures + +## Best Practices + +### Local Development Testing + +1. **Start small**: Begin with 5-10 concurrent users +2. **Validate setup**: Ensure audio files are generated correctly +3. **Monitor resources**: Watch local CPU/memory usage +4. **Check endpoints**: Verify WebSocket connection establishment + +### Azure Load Testing Deployment + +1. **File size optimization**: Compress PCM files if needed for upload +2. **Environment parity**: Match production WebSocket endpoints +3. **Monitoring integration**: Include all relevant Azure resources +4. **Gradual scaling**: Increase load incrementally +5. **Result analysis**: Review both client and server metrics + +### Performance Optimization + +#### WebSocket Configuration +- **Connection pooling**: Reuse WebSocket connections where possible +- **Message batching**: Optimize audio chunk transmission +- **Error handling**: Implement robust reconnection logic + +#### Audio Processing +- **Chunk sizing**: Optimize PCM chunk size for performance +- **Silence detection**: Efficient end-of-speech handling +- **Memory management**: Proper audio buffer cleanup + +## Troubleshooting + +### Common Issues + +#### Audio Generation Failures +```bash +# Check Azure Speech Service credentials +echo $AZURE_SPEECH_KEY +echo $AZURE_SPEECH_REGION + +# Verify TTS functionality +python tests/load/utils/audio_generator.py --test-connection +``` + +#### WebSocket Connection Issues +```bash +# Test WebSocket endpoint +export WS_URL="ws://localhost:8010/api/v1/media/stream" +python -c "import websocket; ws = websocket.create_connection('$WS_URL'); print('Connected'); ws.close()" +``` + +#### Azure Load Testing Upload Issues +- Ensure file sizes are under Azure limits +- Verify all PCM files are in the correct format +- Check that locustfile.py is named correctly + +### Debugging Load Test Issues + +#### Locust Debug Mode +```bash +# Run with verbose logging +locust -f tests/load/locustfile.py --loglevel DEBUG + +# Single user testing +locust -f tests/load/locustfile.py --users 1 --spawn-rate 1 +``` + +#### Network Troubleshooting +```bash +# Test network connectivity +curl -I https://your-backend.azurecontainerapps.io/health + +# Check DNS resolution +nslookup your-backend.azurecontainerapps.io + +# Test WebSocket upgrade +curl -i -N \ + -H "Connection: Upgrade" \ + -H "Upgrade: websocket" \ + -H "Sec-WebSocket-Key: test" \ + -H "Sec-WebSocket-Version: 13" \ + https://your-backend.azurecontainerapps.io/api/v1/media/stream +``` + +## Advanced Usage + +### Custom Scenarios + +To add new conversation scenarios: + +1. **Update audio generator** with new conversation templates +2. **Regenerate audio files** using `make generate_audio` +3. **Update locustfile** to reference new audio files +4. **Test locally** before Azure deployment + +### Integration with CI/CD + +```yaml +# GitHub Actions example +- name: Generate Load Test Audio + run: make generate_audio + +- name: Run Load Test + run: make run_load_test URL=${{ secrets.STAGING_WS_URL }} + +- name: Upload Results + uses: actions/upload-artifact@v3 + with: + name: load-test-results + path: locust_report_*.csv +``` + +### Continuous Performance Testing + +#### Scheduled Testing +```bash +# Daily performance regression test +0 2 * * * cd /path/to/project && make run_load_test CONVERSATIONS=20 CONCURRENT=5 + +# Weekly capacity test +0 3 * * 0 cd /path/to/project && make run_load_test CONVERSATIONS=100 CONCURRENT=20 +``` + +#### Performance Monitoring +- Set up alerts for performance degradation +- Track performance trends over time +- Compare results across different environments + +--- + +This comprehensive load testing framework ensures reliable WebSocket performance testing with realistic audio streaming scenarios, supporting both local development and production-scale Azure Load Testing deployments. + +> **📖 References**: [Azure Load Testing](https://learn.microsoft.com/en-us/azure/load-testing/overview-what-is-azure-load-testing) • [Locust Documentation](https://docs.locust.io/en/stable/) • [Azure Speech Services](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/text-to-speech) \ No newline at end of file diff --git a/docs/operations/monitoring.md b/docs/operations/monitoring.md new file mode 100644 index 00000000..ecb679b8 --- /dev/null +++ b/docs/operations/monitoring.md @@ -0,0 +1,111 @@ +# :material-monitor-dashboard: Monitoring & Observability Guide + +!!! abstract "Application Insights Integration" + This guide explains how to configure, use, and troubleshoot Azure Application Insights for comprehensive telemetry in the real-time audio agent application. + +The application uses the **Azure Monitor OpenTelemetry Distro** to automatically collect and send telemetry data to Application Insights, including: +- Structured logging +- Distributed request tracing +- Performance metrics +- Live Metrics + +--- + +## :material-cogs: Configuration & Authentication + +### Environment Variables + +| Variable | Description | Default | Required | +| --------------------------------------- | ------------------------------------------------ | --------- | -------- | +| `APPLICATIONINSIGHTS_CONNECTION_STRING` | The connection string for your App Insights resource. | None | **Yes** | +| `AZURE_MONITOR_DISABLE_LIVE_METRICS` | Disables Live Metrics to reduce permissions. | `false` | No | +| `ENVIRONMENT` | Sets the environment (`dev`, `prod`). | `dev` | No | + +### Authentication + +The telemetry configuration uses the `DefaultAzureCredential` chain, which automatically handles authentication in both local and deployed environments: +1. **Managed Identity (in Azure):** Automatically uses the system-assigned or user-assigned managed identity of the hosting service (e.g., Container Apps). +2. **Local Development:** Falls back to credentials from Azure CLI, Visual Studio Code, or environment variables. + +--- + +## :material-lock-check: Permissions & Troubleshooting + +!!! question "Problem: 'Forbidden' errors or 'The Agent/SDK does not have permissions to send telemetry'" + **Symptoms:** + ``` + azure.core.exceptions.HttpResponseError: Operation returned an invalid status 'Forbidden' + Content: {"Code":"InvalidOperation","Message":"The Agent/SDK does not have permissions to send telemetry..."} + ``` + This error typically occurs because the identity running the application (your user account locally, or a managed identity in Azure) lacks the necessary permissions to write telemetry, especially for the **Live Metrics** feature. + + **Solutions:** + 1. **Immediate Fix (Disable Live Metrics):** The simplest solution is to disable the Live Metrics feature, which requires elevated permissions. + ```bash + # Add this to your .env file or export it + AZURE_MONITOR_DISABLE_LIVE_METRICS=true + ``` + 2. **Grant Permissions (Local Development):** Grant your user account the `Application Insights Component Contributor` role on the App Insights resource. + ```bash + # Grant permissions to your Azure CLI user + az role assignment create \ + --assignee $(az account show --query user.name -o tsv) \ + --role "Application Insights Component Contributor" \ + --scope + ``` + 3. **Configure Managed Identity (Production):** In Azure, ensure the managed identity of your Container App has the `Application Insights Component Contributor` role. This is handled automatically by the provided Bicep and Terraform templates. + +--- + +## :material-magnify: Viewing Telemetry & Logs + +Once configured, you can explore your application's telemetry in the Azure portal. + +### Log Analytics Queries +Navigate to your Application Insights resource, select **Logs**, and run Kusto (KQL) queries. + +!!! example "Kusto Query Examples" + === "View Recent Errors" + ```kusto + traces + | where timestamp > ago(1h) + | where severityLevel >= 3 // 3 for Error, 4 for Critical + | order by timestamp desc + ``` + === "Trace a Specific Call" + ```kusto + requests + | where url contains "start_call" + | project timestamp, url, resultCode, duration, operation_Id + | join kind=inner ( + traces | extend operation_Id = tostring(customDimensions.operation_Id) + ) on operation_Id + ``` + === "Custom Metrics" + ```kusto + customMetrics + | where name == "custom_requests_total" + | extend endpoint = tostring(customDimensions.endpoint) + | summarize sum(value) by endpoint + ``` + +### Key Monitoring Features +- **Application Map:** Visualizes the dependencies and communication between your services. +- **Live Metrics:** Real-time performance data (if permissions are granted). +- **Performance:** Analyze request latency, dependency calls, and identify bottlenecks. +- **Failures:** Investigate exceptions and failed requests with detailed stack traces. + +--- + +## :material-hammer-wrench: Production Best Practices + +- **Use Managed Identity:** Always prefer managed identities for authentication in Azure. +- **Use Key Vault:** Store the Application Insights connection string in Azure Key Vault and reference it in your application configuration. +- **Grant Minimal Permissions:** Assign the most restrictive role necessary. If you don't need Live Metrics, the `Monitoring Metrics Publisher` role may be sufficient. +- **Enable Alerts:** Configure alert rules in Azure Monitor to be notified of high error rates, performance degradation, or other critical events. +- **Sample Telemetry:** For high-traffic applications, configure sampling to reduce costs while still collecting representative data. + +!!! info "Additional Resources" + - **[Azure Monitor OpenTelemetry Documentation](https://learn.microsoft.com/en-us/azure/azure-monitor/app/opentelemetry-overview)** + - **[Application Insights Troubleshooting](https://learn.microsoft.com/en-us/azure/azure-monitor/app/troubleshoot)** + - **[Azure RBAC Documentation](https://learn.microsoft.com/en-us/azure/role-based-access-control/)** diff --git a/docs/operations/testing.md b/docs/operations/testing.md new file mode 100644 index 00000000..60af5c77 --- /dev/null +++ b/docs/operations/testing.md @@ -0,0 +1,443 @@ +# Testing Framework + +Comprehensive unit and integration testing suite for ARTVoice Accelerator covering core components along the call automation path. + +> **Note**: For load testing and performance validation, see [Load Testing Guide](load-testing.md). + +## Overview + +The testing framework provides validation for: + +- **Unit Tests**: Core component testing for call automation path +- **Integration Tests**: End-to-end event handling and lifecycle testing +- **DTMF Testing**: Dual-tone multi-frequency validation and failure scenarios +- **Code Quality**: Automated formatting, linting, and type checking + +## Unit Tests + +### Test Coverage Overview + +The unit test suite validates critical components along the call automation path: + +``` +tests/ +├── test_acs_media_lifecycle.py # Audio processing pipeline +├── test_acs_events_handlers.py # Event processing & WebSocket integration +├── test_redis_manager.py # Session state management +├── test_dtmf_validation.py # DTMF tone processing +├── test_dtmf_validation_failure_cancellation.py # DTMF error scenarios +├── test_events_architecture_simple.py # Event-driven architecture +├── test_speech_queue.py # Audio queue management +└── test_v1_events_integration.py # API v1 event integration +``` + +### Core Components Coverage + +#### ACS Media Lifecycle (`test_acs_media_lifecycle.py`) + +Tests the real-time audio processing pipeline components: + +**ThreadBridge Testing:** +- Queue management and speech result handling +- Backpressure handling when queues are full +- Cross-thread communication patterns + +**SpeechSDKThread Testing:** +- Speech recognition lifecycle and audio streaming +- Push stream initialization and management +- Recognizer state management and error handling + +**MainEventLoop Testing:** +- WebSocket message handling and audio metadata processing +- Barge-in functionality and playback cancellation +- Audio chunk processing and base64 decoding + +**RouteTurnThread Testing:** +- Turn processing and conversation flow management +- Cancellation logic and queue cleanup +- Response task management + +```python +# Example test coverage +def test_thread_bridge_queue_speech_result_put_nowait(): + # Tests immediate queue operations + +def test_main_event_loop_handle_barge_in_cancels_playback(): + # Tests response interruption handling + +def test_route_turn_thread_cancel_current_processing_clears_queue(): + # Tests conversation state cleanup +``` + +#### Event Handlers (`test_acs_events_handlers.py`) + +Validates event processing and WebSocket integration: + +**Call Event Processing:** +- Inbound and outbound call lifecycle management +- Call connection state transitions +- Participant management and call metadata + +**DTMF Event Handling:** +- Tone sequence processing and validation +- DTMF recognition and routing +- Sequence building and context updates + +**WebSocket Broadcasting:** +- Client notification system +- Message serialization and delivery +- Multi-client event distribution + +**Event Routing:** +- Cloud event dispatcher functionality +- Unknown event type handling +- Event context management + +```python +# Key test scenarios +def test_handle_call_initiated(): + # Tests outbound call setup + +def test_handle_call_connected_with_broadcast(): + # Tests WebSocket client notifications + +def test_handle_dtmf_tone_received(): + # Tests tone processing and sequence building +``` + +#### Redis Session Management (`test_redis_manager.py`) + +Tests Azure Redis cluster management and session persistence: + +**Cluster Detection:** +- Automatic cluster mode switching on MovedError +- Fallback behavior when cluster support unavailable +- Connection pool management + +**Address Remapping:** +- IP to domain name mapping for Azure Redis +- Cluster node address resolution +- Connection string handling + +**Session Operations:** +- Session data storage and retrieval +- Conversation history persistence +- Memory context management + +```python +def test_get_session_data_switches_to_cluster(): + # Tests automatic cluster detection + +def test_remap_cluster_address_to_domain(): + # Tests Azure Redis address mapping +``` + +#### DTMF Validation (`test_dtmf_validation.py`) + +Validates dual-tone multi-frequency processing: + +**Validation Flow:** +- AWS Connect DTMF validation setup +- Validation gate state management +- Tone collection and processing + +**Context Management:** +- Session state persistence during validation +- Validation context setup and teardown +- Error state handling + +**Timeout Handling:** +- Validation completion monitoring +- Timeout detection and handling +- Async validation workflows + +```python +def test_setup_aws_connect_validation_flow_sets_context(): + # Tests validation workflow initialization + +def test_wait_for_dtmf_validation_completion_success(): + # Tests successful validation completion +``` + +### Running Unit Tests + +#### Basic Test Execution + +```bash +# Run all unit tests +python -m pytest tests/ -v + +# Run specific test file +python -m pytest tests/test_acs_media_lifecycle.py -v + +# Run with coverage reporting +python -m pytest --cov=apps.rtagent.backend --cov-report=term-missing tests/ + +# Run specific test method +python -m pytest tests/test_acs_events_handlers.py::TestCallEventHandlers::test_handle_call_connected_with_broadcast -v +``` + +#### Advanced Test Options + +```bash +# Run tests with detailed output +python -m pytest tests/ -v -s + +# Run tests matching pattern +python -m pytest tests/ -k "dtmf" -v + +# Run tests with performance profiling +python -m pytest tests/ --durations=10 + +# Run tests in parallel (if pytest-xdist installed) +python -m pytest tests/ -n auto +``` + +## Integration Testing + +### Event Architecture Testing + +**Event Dispatching** (`test_events_architecture_simple.py`): +- Cloud event routing and handling +- Event serialization and deserialization +- Cross-component event flow validation + +**Memory Management**: +- Session context persistence across events +- Memory cleanup and lifecycle management +- Context sharing between components + +**Error Handling**: +- Exception management and recovery +- Graceful degradation scenarios +- Error propagation patterns + +### V1 Events Integration (`test_v1_events_integration.py`) + +**WebSocket Events**: +- Real-time event streaming validation +- Event ordering and sequencing +- Connection lifecycle management + +**Event Serialization**: +- JSON event format validation +- Event schema compliance +- Backward compatibility testing + +**Client Broadcasting**: +- Multi-client event distribution +- Client subscription management +- Event filtering and routing + +## Code Quality + +### Automated Code Quality Checks + +The project uses comprehensive code quality tools: + +```bash +# Run all code quality checks +make check_code_quality + +# Auto-fix formatting issues +make fix_code_quality + +# Individual tool execution +make run_unit_tests # Execute unit tests with coverage +``` + +#### Code Quality Tools + +**Formatting and Style:** +- **ruff**: Python linter and code formatter +- **black**: Code formatting +- **isort**: Import sorting and organization +- **flake8**: Style guide enforcement + +**Type Checking:** +- **mypy**: Static type checking +- **Type annotations**: Function and class type hints + +**Security:** +- **bandit**: Security vulnerability scanning +- **Dependency scanning**: Package vulnerability checks + +**Documentation:** +- **interrogate**: Docstring coverage checking +- **YAML validation**: Configuration file validation + +### Pre-commit Hooks + +```bash +# Install pre-commit hooks +make set_up_precommit_and_prepush + +# Manual pre-commit execution +pre-commit run --all-files +``` + +## Test Structure and Patterns + +### Test Organization + +**File Naming Convention:** +- `test_.py`: Unit tests for specific components +- `test__integration.py`: Integration tests for features +- `test__failure_.py`: Failure scenario tests + +**Test Class Structure:** +```python +class TestComponentName: + """Test class for ComponentName functionality.""" + + @pytest.fixture + def component_instance(self): + """Fixture providing test instance.""" + return ComponentName() + + def test_component_basic_functionality(self, component_instance): + """Test basic component operation.""" + pass + + def test_component_error_handling(self, component_instance): + """Test component error scenarios.""" + pass +``` + +### Mocking and Test Doubles + +**Common Patterns:** +```python +# WebSocket mocking +mock_websocket = MagicMock() +mock_websocket.send_text = AsyncMock() + +# Azure service mocking +with patch('azure.communication.callautomation.CallAutomationClient'): + # Test Azure integration + pass + +# Async operation testing +@pytest.mark.asyncio +async def test_async_operation(): + result = await async_function() + assert result is not None +``` + +### Test Data Management + +**Fixtures for Test Data:** +```python +@pytest.fixture +def sample_call_event(): + """Provide sample call event data.""" + return CloudEvent( + source="test", + type=ACSEventTypes.CALL_CONNECTED, + data={"callConnectionId": "test_123"} + ) + +@pytest.fixture +def mock_memory_manager(): + """Provide mock memory manager.""" + manager = MagicMock() + manager.get_context.return_value = None + return manager +``` + +## Development Workflow + +### Testing During Development + +1. **Write tests first**: Follow TDD principles where applicable +2. **Run tests frequently**: Use `pytest --watch` for continuous testing +3. **Check coverage**: Maintain >80% test coverage on critical paths +4. **Review test output**: Analyze test failures and performance + +### CI/CD Integration + +```yaml +# Example GitHub Actions workflow +- name: Run Unit Tests + run: make run_unit_tests + +- name: Check Code Quality + run: make check_code_quality + +- name: Upload Coverage + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml +``` + +### Test Environment Setup + +```bash +# Create test environment +make create_conda_env + +# Activate environment +make activate_conda_env + +# Install test dependencies +pip install -r requirements-test.txt +``` + +## Best Practices + +### Test Development Guidelines + +1. **Isolation**: Each test should be independent and repeatable +2. **Clarity**: Test names should clearly describe what is being tested +3. **Coverage**: Focus on critical paths and edge cases +4. **Performance**: Keep unit tests fast (<1s per test) +5. **Documentation**: Include docstrings explaining complex test scenarios + +### Debugging Test Failures + +```bash +# Run with verbose output +python -m pytest tests/test_failing.py -v -s + +# Run with debugger +python -m pytest tests/test_failing.py --pdb + +# Run with logging +python -m pytest tests/test_failing.py --log-cli-level=DEBUG +``` + +### Mock Strategy + +- **Unit tests**: Mock external dependencies (Azure services, databases) +- **Integration tests**: Use test doubles for expensive operations +- **End-to-end tests**: Minimize mocking, use test environments + +## Test Results and Coverage + +### Current Test Coverage + +The test suite provides comprehensive coverage of: +- **ACS Media Pipeline**: 85% coverage of audio processing components +- **Event Handling**: 90% coverage of webhook and cloud event processing +- **Redis Management**: 95% coverage of session state management +- **DTMF Processing**: 80% coverage of tone validation logic + +### Coverage Reporting + +```bash +# Generate HTML coverage report +python -m pytest --cov=apps.rtagent.backend --cov-report=html tests/ + +# View coverage report +open htmlcov/index.html +``` + +### Performance Testing + +For performance and load testing capabilities, including WebSocket stress testing and Azure Load Testing integration, see the dedicated [Load Testing Guide](load-testing.md). + +--- + +This testing framework ensures the reliability and maintainability of the ARTVoice Accelerator platform through comprehensive unit and integration testing coverage. + +> **📖 References**: [pytest Documentation](https://docs.pytest.org/) • [Python Testing Best Practices](https://docs.python-guide.org/writing/tests/) • [Azure SDK Testing](https://github.com/Azure/azure-sdk-for-python/blob/main/doc/dev/tests.md) \ No newline at end of file diff --git a/docs/operations/troubleshooting.md b/docs/operations/troubleshooting.md new file mode 100644 index 00000000..1fa94bf7 --- /dev/null +++ b/docs/operations/troubleshooting.md @@ -0,0 +1,188 @@ +# :material-wrench: Troubleshooting Guide + +!!! abstract "Quick Solutions for Common Issues" + This guide provides solutions for common issues encountered with the Real-Time Voice Agent application, covering deployment, connectivity, and performance. + +--- + +## :material-phone: ACS & WebSocket Issues + +!!! question "Problem: ACS is not making outbound calls or audio quality is poor" + **Symptoms:** + - Call fails to initiate or no audio connection is established. + - ACS callback events are not received. + - Audio quality is choppy or has high latency. + + **Solutions:** + 1. **Check Container App Logs:** + ```bash + # Monitor backend logs for errors + make monitor_backend_deployment + # Or directly query Azure Container Apps + az containerapp logs show --name --resource-group + ``` + 2. **Verify Webhook Accessibility:** Ensure your webhook URL is public and uses `https`. For local development, use a tunnel: + ```bash + # Use devtunnel for local development + devtunnel host -p 8010 --allow-anonymous + ``` + 3. **Test WebSocket Connectivity:** + ```bash + # Install wscat (npm install -g wscat) and test the connection + wscat -c wss://your-domain.com/ws/call/{callConnectionId} + ``` + 4. **Check ACS & Speech Resources:** Verify that your ACS connection string and Speech service keys are correctly configured in your environment variables. + +!!! question "Problem: WebSocket connection fails or drops frequently" + **Symptoms:** + - `WebSocket connection failed` errors in the browser console. + - Frequent reconnections or missing real-time updates. + + **Solutions:** + 1. **Test WebSocket Endpoint Directly:** + ```bash + wscat -c wss:///api/v1/media/stream + ``` + 2. **Check CORS Configuration:** Ensure your frontend's origin is allowed in the backend's CORS settings, especially for WebSocket upgrade headers. + 3. **Monitor Connection Lifecycle:** Review backend logs for WebSocket connection and disconnection events to identify patterns. + +--- + +## :material-api: Backend & API Issues + +!!! question "Problem: FastAPI server won't start or endpoints return 500 errors" + **Symptoms:** + - Import errors, "port already in use," or environment variable errors on startup. + - API endpoints respond with `500 Internal Server Error`. + + **Solutions:** + 1. **Check Python Environment & Dependencies:** + ```bash + # Ensure you are in the correct conda environment + conda activate audioagent + # Reinstall dependencies + pip install -r requirements.txt + ``` + 2. **Free Up Port:** If port `8010` is in use, find and terminate the process: + ```bash + # Find and kill the process on macOS or Linux + lsof -ti:8010 | xargs kill -9 + ``` + 3. **Run with Debug Logging:** + ```bash + uvicorn apps.rtagent.backend.main:app --reload --port 8010 --log-level debug + ``` + 4. **Verify Environment File (`.env`):** Ensure the file exists and all required variables for Azure, Redis, and OpenAI are correctly set. + +--- + +## :material-cloud-alert: Azure AI & Redis Issues + +!!! question "Problem: Speech-to-Text or OpenAI API errors" + **Symptoms:** + - Transcription is not appearing or is inaccurate. + - AI-generated responses are missing or failing. + - `401 Unauthorized` or `429 Too Many Requests` errors. + + **Solutions:** + 1. **Check Keys and Endpoints:** Verify that `AZURE_COGNITIVE_SERVICES_KEY`, `AZURE_OPENAI_ENDPOINT`, and other related variables are correct. + 2. **Test Service Connectivity Directly:** + ```bash + # Test Azure Speech API (replace with a valid audio file) + curl -X POST "https://{region}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1" \ + -H "Ocp-Apim-Subscription-Key: {key}" -H "Content-Type: audio/wav" --data-binary @test.wav + + # Test OpenAI API + curl -X GET "{endpoint}/openai/deployments?api-version=2023-12-01-preview" -H "api-key: {key}" + ``` + 3. **Check Quotas and Model Names:** Ensure your service quotas have not been exceeded and that the model deployment names in your code match those in the Azure portal. + +!!! question "Problem: Redis connection timeouts or failures" + **Symptoms:** + - High latency in agent responses. + - Errors related to reading or writing session state. + - `ConnectionTimeoutError` in backend logs. + + **Solutions:** + 1. **Test Redis Connectivity:** + ```bash + # Use redis-cli to ping the server + redis-cli -u $REDIS_URL ping + ``` + 2. **Verify Configuration:** For Azure Cache for Redis, check the connection string, firewall rules, and whether SSL/TLS is required. + +--- + +## :material-rocket-launch: Deployment & Performance + +!!! question "Problem: `azd` deployment fails or containers won't start" + **Symptoms:** + - `azd up` or `azd provision` command fails with an error. + - Container Apps show a status of "unhealthy" or are stuck in a restart loop. + + **Solutions:** + 1. **Check Azure Authentication & Permissions:** + ```bash + # Ensure you are logged into the correct account + az account show + # Verify you have Contributor/Owner rights on the subscription + ``` + 2. **Review Deployment Logs:** + ```bash + # Use the 'logs' command for detailed output + azd logs + # For container-specific issues + az containerapp logs show --name --resource-group --follow + ``` + 3. **Purge and Redeploy:** As a last resort, a clean deployment can resolve state issues: + ```bash + azd down --force --purge + azd up + ``` + +!!! question "Problem: High latency or memory usage" + **Symptoms:** + - Slow audio processing or delayed AI responses. + - Backend container memory usage grows over time and leads to restarts. + + **Solutions:** + 1. **Monitor Resources:** Use `htop` or `docker stats` locally, and Application Insights in Azure to monitor CPU and memory usage. + 2. **Profile Memory Usage:** Add lightweight profiling to your Python code to track object allocation and identify potential leaks. + ```python + import psutil + process = psutil.Process() + print(f"Memory usage: {process.memory_info().rss / 1024 / 1024:.1f} MB") + ``` + 3. **Check for Connection Leaks:** Ensure that database and WebSocket connections are properly closed and managed. + +--- + +## :material-toolbox-outline: Debugging Tools & Commands + +!!! tip "Essential Commands for Quick Diagnostics" + + - **Health Check:** + ```bash + make health_check + ``` + - **Monitor Backend Deployment:** + ```bash + make monitor_backend_deployment + ``` + - **View Logs:** + ```bash + tail -f logs/app.log + ``` + - **Test WebSocket Connection:** + ```bash + wscat -c ws://localhost:8010/ws/call/test-id + ``` + - **Check Network Connectivity:** + ```bash + curl -v http://localhost:8010/health + ``` + +!!! info "Log Locations" + - **Backend:** Container logs in Azure or `logs/app.log` locally. + - **Frontend:** Browser developer console (F12). + - **Azure Services:** Azure Monitor and Application Insights. diff --git a/docs/quickstart-local-development.md b/docs/quickstart-local-development.md deleted file mode 100644 index 1133b7a7..00000000 --- a/docs/quickstart-local-development.md +++ /dev/null @@ -1,217 +0,0 @@ -# ⚡ Local Development - -Run the ARTVoice Accelerator locally with raw commands. No Makefile usage. Keep secrets out of git and rotate any previously exposed keys. - ---- - -## 1. Scope - -What this covers: -- Local backend (FastAPI + Uvicorn) and frontend (Vite/React) -- Dev tunnel for inbound [Azure Communication Services](https://learn.microsoft.com/en-us/azure/communication-services/) callbacks -- Environment setup via venv OR Conda -- Minimal `.env` files (root + frontend) - -What this does NOT cover: -- Full infra provisioning -- CI/CD -- Persistence hardening - ---- - -## 2. Prerequisites - -| Tool | Notes | -|------|-------| -| Python 3.11 | Required runtime | -| Node.js ≥ 22 | Frontend | -| Azure CLI | `az login` first | -| Dev Tunnels | `az extension add --name dev-tunnel` | -| (Optional) Conda | If using `environment.yaml` | -| Provisioned Azure resources | For real STT/TTS/LLM/ACS | - -If you only want a browser demo (no phone), ACS variables are optional. - ---- - -## 3. Clone Repository - -```bash -git clone https://github.com/pablosalvador10/gbb-ai-audio-agent.git -cd gbb-ai-audio-agent -``` - ---- - -## 4. Python Environment (Choose One) - -### Option A: venv -```bash -python -m venv .venv -source .venv/bin/activate -pip install --upgrade pip -pip install -r requirements.txt -``` - -### Option B: Conda -```bash -conda env create -f environment.yaml -conda activate audioagent -pip install -r requirements.txt # sync with lock -``` - ---- - -## 5. Root `.env` (Create in repo root) - -Minimal template (edit placeholders; DO NOT commit real values): - -``` -# ===== Azure OpenAI ===== -AZURE_OPENAI_ENDPOINT=https://.openai.azure.com -AZURE_OPENAI_KEY= -AZURE_OPENAI_DEPLOYMENT=gpt-4-1-mini -AZURE_OPENAI_API_VERSION=2024-12-01-preview -AZURE_OPENAI_CHAT_DEPLOYMENT_ID=gpt-4-1-mini -AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION=2024-11-20 - -# ===== Speech ===== -AZURE_SPEECH_REGION= -AZURE_SPEECH_KEY= - -# ===== ACS (optional unless using phone/PSTN) ===== -ACS_CONNECTION_STRING=endpoint=https://.communication.azure.com/;accesskey= -ACS_SOURCE_PHONE_NUMBER=+1XXXXXXXXXX -ACS_ENDPOINT=https://.communication.azure.com - -# ===== Optional Data Stores ===== -REDIS_HOST= -REDIS_PORT=6380 -REDIS_PASSWORD= -AZURE_COSMOS_CONNECTION_STRING= -AZURE_COSMOS_DATABASE_NAME=audioagentdb -AZURE_COSMOS_COLLECTION_NAME=audioagentcollection - -# ===== Runtime ===== -ENVIRONMENT=dev -ACS_STREAMING_MODE=media - -# ===== Filled after dev tunnel starts ===== -BASE_URL=https:// -``` - -Ensure `.env` is in `.gitignore`. - ---- - -## 6. Start Dev Tunnel - -Required if you want ACS callbacks (phone flow) or remote test: - -```bash -devtunnel host -p 8010 --allow-anonymous -``` - -Copy the printed HTTPS URL and set `BASE_URL` in root `.env`. Update it again if the tunnel restarts (URL changes). - -The Dev Tunnel URL will look similar to: -```bash -https://abc123xy-8010.usw3.devtunnels.ms -``` - ---- - -## 7. Run Backend - -```bash -cd apps/rtagent/backend -uvicorn apps.rtagent.backend.main:app --host 0.0.0.0 --port 8010 --reload -``` - ---- - -## 8. Frontend Environment - -Create or edit `apps/rtagent/frontend/.env`: - -Use the dev tunnel URL by default so the frontend (and any external device or ACS-related flows) reaches your backend consistently—even if you open the UI on another machine or need secure HTTPS. - -``` -# Recommended (works across devices / matches ACS callbacks) -VITE_BACKEND_BASE_URL=https:// -``` - -If the tunnel restarts (URL changes), update both `BASE_URL` in the root `.env` and this value. - ---- - -## 9. Run Frontend - -```bash -cd apps/rtagent/frontend -npm install -npm run dev -``` - -Open: http://localhost:5173 - -WebSocket URL is auto-derived by replacing `http/https` with `ws/wss`. - ---- - -## 10. Optional: Phone (PSTN) Flow - -1. Purchase ACS phone number (Portal or CLI). - -2. Ensure these vars are set in your root `.env` (with real values): - - ``` - ACS_CONNECTION_STRING=endpoint=... - ACS_SOURCE_PHONE_NUMBER=+1XXXXXXXXXX - ACS_ENDPOINT=https://.communication.azure.com - BASE_URL=https://-8010.usw3.devtunnels.ms - ``` - -3. Create a single Event Grid subscription for the Incoming Call event pointing to your answer handler: - - Inbound endpoint: - `https://-8010.usw3.devtunnels.ms/api/v1/calls/answer` - - Event type: `Microsoft.Communication.IncomingCall` - - (Callbacks endpoint `/api/v1/calls/callbacks` is optional unless you need detailed lifecycle events.) - - If tunnel URL changes, update the subscription (delete & recreate or update endpoint). - - Reference: [Subscribing to events](https://learn.microsoft.com/en-us/azure/communication-services/quickstarts/events/subscribe-to-event) - -4. Dial the number; observe: - - Call connection established - - Media session events - - STT transcripts - - TTS audio frames - ---- - -## 11. Quick Browser Test - -1. Backend + frontend running. -2. Open app, allow microphone. -3. Speak → expect: - - Interim/final transcripts - - Model response - - Audio playback - ---- - -## 12. Troubleshooting - -| Symptom | Likely Cause | Fix | -|---------|--------------|-----| -| 404 on callbacks | Stale `BASE_URL` | Restart tunnel, update `.env` | -| No audio | Speech key/region invalid | Verify Azure Speech resource | -| WS closes fast | Wrong `VITE_BACKEND_BASE_URL` | Use exact backend/tunnel URL | -| Slow first reply | Cold pool warm-up | Keep process running | -| Phone call no events | ACS callback not updated to tunnel | Reconfigure Event Grid subscription | -| Import errors | Missing dependencies | Re-run `pip install -r requirements.txt` | - ---- - -Keep secrets out of commits. Rotate anything that has leaked. \ No newline at end of file diff --git a/docs/samples/README.md b/docs/samples/README.md new file mode 100644 index 00000000..22e588ca --- /dev/null +++ b/docs/samples/README.md @@ -0,0 +1,63 @@ +# Samples & Labs + +Explore hands-on notebooks that demonstrate how to build and extend the Real-Time +Voice Agent. The repository groups content into quickstart “Hello World” tutorials +and deeper lab exercises. + +## Hello World Series + +Beginner-friendly notebooks under `samples/hello_world/` walk through the core +features step by step. + +| Notebook | Summary | +| --- | --- | +| `01-create-your-first-rt-agent.ipynb` | Assemble a basic customer-support voice agent end to end. | +| `02-run-test-rt-agent.ipynb` | Execute call flows and validate the agent locally. | +| `03-create-your-first-foundry-agents.ipynb` | Provision Azure AI Foundry agents and wire them into the pipeline. | +| `04-exploring-live-api.ipynb` | Explore Azure Live Voice API capabilities. | +| `05-create-your-first-livevoice.ipynb` | Build out Live Voice scenarios using the accelerator scaffold. | + +Tips: +- Run notebooks in sequence for a guided learning path. +- Launch Jupyter from the repo root so relative imports work (`jupyter lab`). +- Ensure `.env` contains valid Azure credentials before executing calls. + +## Advanced Labs + +Deep-dive content lives in `samples/labs/` and focuses on performance tuning, +state management, and experimentation. + +| Notebook | Focus | +| --- | --- | +| `01-build-your-audio-agent.ipynb` | Full voice-to-voice pipeline with Azure AI components. | +| `02-how-to-use-aoai-for-realtime-transcriptions.ipynb` | Optimize Azure OpenAI for real-time STT. | +| `03-latency-arena.ipynb` | Measure and optimize end-to-end latency. | +| `04-memory-agents.ipynb` | Implement conversational memory and session persistence. | +| `05-speech-to-text-multilingual.ipynb` | Multi-language transcription workflows. | +| `06-text-to-speech.ipynb` | Tune neural voice synthesis and SSML. | +| `07-vad.ipynb` | Voice activity detection experiments. | +| `08-speech-to-text-diarization.ipynb` | Multi-speaker diarization strategies. | +| `voice-live.ipynb` | Real-time voice tests across environments. | + +### Audio Experiment Bundles + +- `labs/podcast_voice_tests/` – Compare TTS model outputs against ground-truth + recordings to evaluate voice quality. +- `labs/recordings/` – Store captured audio samples for regression testing and + debugging. + +## Environment Checklist + +1. Python 3.11+ with project dependencies installed (`pip install -r requirements.txt`). +2. Jupyter or VS Code notebooks. Activate the project virtual environment first. +3. Azure resources (Speech, OpenAI, ACS, Redis) provisioned and referenced in `.env`. + +## Suggested Paths + +- **New to the stack?** Start with the Hello World series (notebooks 01 → 05). +- **Voice quality & tuning:** Labs 06, 07, and the podcast voice tests. +- **Performance & reliability:** Labs 03 and `voice-live.ipynb` for latency and live + validation. + +For additional context, see `samples/README.md` in the repository root—this page is a +condensed version suitable for the documentation site. diff --git a/docs/AuthForHTTPandWSS.md b/docs/security/authentication.md similarity index 89% rename from docs/AuthForHTTPandWSS.md rename to docs/security/authentication.md index eb1f1b21..e09e96e2 100644 --- a/docs/AuthForHTTPandWSS.md +++ b/docs/security/authentication.md @@ -1,7 +1,6 @@ +# Authentication Guide -# Azure Communication Services Authentication Guide - -This document outlines the authentication and session management strategy for a real-time voice agent application that integrates Azure Communication Services (ACS) with external telephony systems. +This document outlines the authentication and session management strategy for the real-time voice agent application that integrates Azure Communication Services (ACS) with external telephony systems. ## Table of Contents @@ -11,8 +10,8 @@ This document outlines the authentication and session management strategy for a - [PSTN Flow (with DTMF Authentication)](#pstn-flow) - [SIP Flow (with DTMF Authentication)](#sip-flow) - [API Flow (with Direct Lookup)](#api-flow) -4. [Session Key Management](#session-key-management) -5. [Implementation Examples](#implementation-examples) +4. [WebSocket Authentication](#websocket-authentication) +5. [Session Key Management](#session-key-management) 6. [Security Architecture](#security-architecture) 7. [Technical References](#technical-references) @@ -105,7 +104,6 @@ flowchart LR class Redis storage ``` - ## Call Flow Types ### PSTN Flow @@ -137,6 +135,24 @@ flowchart LR --- +## WebSocket Authentication + +WebSocket connections require secure authentication for real-time media processing. The system implements custom token validation based on the established session. + +### WebSocket Security Implementation + +For detailed WebSocket authentication patterns, see the official Azure Communication Services documentation: +[Secure Webhook Endpoint](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/secure-webhook-endpoint?pivots=programming-language-python) + +### Key Security Features + +- **Token-based Authentication**: Custom JWT tokens for WebSocket connections +- **Session Correlation**: WebSocket sessions correlated with call sessions +- **Real-time Validation**: Continuous validation during media streaming +- **Secure Handshake**: Encrypted WebSocket handshake process + +--- + ## Session Key Management ### Session Key Formats @@ -201,9 +217,7 @@ The authentication flow leverages **DTMF media analysis** for telephony calls (P --- -### 🔐 **Authentication & Security Architecture** - -### Security Layers +### 🔐 **Security Layers** | Layer | Method | Purpose | |-------|--------|---------| @@ -241,4 +255,4 @@ The authentication flow leverages **DTMF media analysis** for telephony calls (P ### Implementation Patterns - [WebSocket Authentication](https://learn.microsoft.com/en-us/azure/communication-services/how-tos/call-automation/secure-webhook-endpoint#call-automation-webhook-events) -- [Redis Session Management](https://redis.io/docs/latest/develop/use/patterns/sessions/) +- [Redis Session Management](https://redis.io/docs/latest/develop/use/patterns/sessions/) \ No newline at end of file diff --git a/infra/terraform/ai-foundry.tf b/infra/terraform/ai-foundry.tf index c309c8ae..59cef31f 100644 --- a/infra/terraform/ai-foundry.tf +++ b/infra/terraform/ai-foundry.tf @@ -1,5 +1,5 @@ module "ai_foundry" { - source = "./modules/ai" + source = "./modules/aifoundry" resource_group_name = azurerm_resource_group.main.name location = azurerm_resource_group.main.location @@ -16,10 +16,31 @@ module "ai_foundry" { model_deployments = var.model_deployments log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id - account_principal_ids = distinct([ - azurerm_user_assigned_identity.backend.principal_id, - azurerm_user_assigned_identity.frontend.principal_id, - azapi_resource.acs.identity[0].principal_id, - local.principal_id - ]) + account_principal_ids = { + backend_identity = azurerm_user_assigned_identity.backend.principal_id + frontend_identity = azurerm_user_assigned_identity.frontend.principal_id + acs_identity = azapi_resource.acs.identity[0].principal_id + deployer_identity = local.principal_id + } + + depends_on = [ azurerm_resource_group.main ] +} + + +resource "azurerm_monitor_diagnostic_setting" "ai_foundry_account" { + name = "${local.resource_names.foundry_account}-diagnostics" + target_resource_id = module.ai_foundry.account_id + log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id + + enabled_log { + category = "Audit" + } + + enabled_log { + category = "RequestResponse" + } + + enabled_metric { + category = "AllMetrics" + } } diff --git a/infra/terraform/modules/ai/foundry.tf b/infra/terraform/modules/aifoundry/main.tf similarity index 56% rename from infra/terraform/modules/ai/foundry.tf rename to infra/terraform/modules/aifoundry/main.tf index 499e1276..3d5c525c 100644 --- a/infra/terraform/modules/ai/foundry.tf +++ b/infra/terraform/modules/aifoundry/main.tf @@ -1,18 +1,7 @@ # Terraform module for provisioning Azure AI Foundry aligned with the ai-services deployment. locals { - account_name_raw = lower(trimspace(var.foundry_account_name)) - custom_subdomain_name_raw = var.foundry_custom_subdomain_name != null && trimspace(var.foundry_custom_subdomain_name) != "" ? lower(trimspace(var.foundry_custom_subdomain_name)) : local.account_name_raw - - project_name_raw = var.project_name != null && trimspace(var.project_name) != "" ? lower(trimspace(var.project_name)) : "${local.account_name_raw}-project" - - project_display_name_raw = var.project_display_name != null && trimspace(var.project_display_name) != "" ? trimspace(var.project_display_name) : local.project_name_raw - - project_description_raw = var.project_description != null && trimspace(var.project_description) != "" ? trimspace(var.project_description) : "Azure AI Foundry project ${local.project_display_name_raw}" - project_id_guid = "${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 0, 8)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 8, 4)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 12, 4)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 16, 4)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 20, 12)}" - - account_principal_map = { for idx, pid in tolist(nonsensitive(var.account_principal_ids)) : idx => pid if pid != null && pid != "" } } data "azurerm_resource_group" "rg" { @@ -21,7 +10,7 @@ data "azurerm_resource_group" "rg" { resource "azapi_resource" "ai_foundry_account" { type = "Microsoft.CognitiveServices/accounts@2025-06-01" - name = local.account_name_raw + name = var.foundry_account_name parent_id = data.azurerm_resource_group.rg.id location = var.location schema_validation_enabled = false @@ -38,29 +27,11 @@ resource "azapi_resource" "ai_foundry_account" { properties = { allowProjectManagement = true disableLocalAuth = var.disable_local_auth - customSubDomainName = local.custom_subdomain_name_raw + customSubDomainName = var.foundry_custom_subdomain_name } } } -resource "azurerm_monitor_diagnostic_setting" "ai_foundry_account" { - count = var.log_analytics_workspace_id != null && var.log_analytics_workspace_id != "" ? 1 : 0 - name = "${local.account_name_raw}-diagnostics" - target_resource_id = azapi_resource.ai_foundry_account.id - log_analytics_workspace_id = var.log_analytics_workspace_id - - enabled_log { - category = "Audit" - } - - enabled_log { - category = "RequestResponse" - } - - enabled_metric { - category = "AllMetrics" - } -} resource "azurerm_cognitive_deployment" "model" { for_each = { for deployment in var.model_deployments : deployment.name => deployment } @@ -82,7 +53,7 @@ resource "azurerm_cognitive_deployment" "model" { resource "azapi_resource" "ai_foundry_project" { type = "Microsoft.CognitiveServices/accounts/projects@2025-06-01" - name = local.project_name_raw + name = var.project_name parent_id = azapi_resource.ai_foundry_account.id location = var.location schema_validation_enabled = false @@ -96,8 +67,8 @@ resource "azapi_resource" "ai_foundry_project" { name = var.project_sku_name } properties = { - displayName = local.project_display_name_raw - description = local.project_description_raw + displayName = var.project_display_name + description = var.project_description } } response_export_values = [ @@ -107,10 +78,9 @@ resource "azapi_resource" "ai_foundry_project" { } resource "azurerm_role_assignment" "ai_foundry_account" { - for_each = local.account_principal_map + for_each = var.account_principal_ids scope = azapi_resource.ai_foundry_account.id role_definition_name = var.account_principal_role_definition_name principal_id = each.value } - diff --git a/infra/terraform/modules/ai/outputs.tf b/infra/terraform/modules/aifoundry/outputs.tf similarity index 100% rename from infra/terraform/modules/ai/outputs.tf rename to infra/terraform/modules/aifoundry/outputs.tf diff --git a/infra/terraform/modules/ai/project_capability_host.tf b/infra/terraform/modules/aifoundry/project_capability_host.tf similarity index 100% rename from infra/terraform/modules/ai/project_capability_host.tf rename to infra/terraform/modules/aifoundry/project_capability_host.tf diff --git a/infra/terraform/modules/ai/project_connections.tf b/infra/terraform/modules/aifoundry/project_connections.tf similarity index 100% rename from infra/terraform/modules/ai/project_connections.tf rename to infra/terraform/modules/aifoundry/project_connections.tf diff --git a/infra/terraform/modules/ai/providers.tf b/infra/terraform/modules/aifoundry/providers.tf similarity index 67% rename from infra/terraform/modules/ai/providers.tf rename to infra/terraform/modules/aifoundry/providers.tf index 030715df..7e498943 100644 --- a/infra/terraform/modules/ai/providers.tf +++ b/infra/terraform/modules/aifoundry/providers.tf @@ -11,9 +11,9 @@ terraform { } } -provider "azurerm" { - features {} - storage_use_azuread = true -} +# provider "azurerm" { +# features {} +# storage_use_azuread = true +# } -provider "azapi" {} +# provider "azapi" {} diff --git a/infra/terraform/modules/ai/variables.tf b/infra/terraform/modules/aifoundry/variables.tf similarity index 95% rename from infra/terraform/modules/ai/variables.tf rename to infra/terraform/modules/aifoundry/variables.tf index 3c357ff8..c4a13f8b 100644 --- a/infra/terraform/modules/ai/variables.tf +++ b/infra/terraform/modules/aifoundry/variables.tf @@ -90,9 +90,9 @@ variable "log_analytics_workspace_id" { } variable "account_principal_ids" { - description = "Principal IDs to assign Cognitive Services access to the AI Foundry account." - type = list(string) - default = [] + description = "Map of principals to assign Cognitive Services access to the AI Foundry account (keys should be stable labels)." + type = map(string) + default = {} } variable "account_principal_role_definition_name" { @@ -137,4 +137,4 @@ variable "ai_search_endpoint" { description = "Optional Azure AI Search resource endpoint for AI Foundry to use for search capabilities." type = string default = null -} \ No newline at end of file +} diff --git a/infra/terraform/variables.tf b/infra/terraform/variables.tf index 594a926b..f38ed66e 100644 --- a/infra/terraform/variables.tf +++ b/infra/terraform/variables.tf @@ -221,7 +221,7 @@ variable "container_memory_gb" { variable "aoai_pool_size" { description = "Size of the Azure OpenAI client pool for optimal performance" type = number - default = 50 + default = 5 validation { condition = var.aoai_pool_size >= 5 && var.aoai_pool_size <= 200 error_message = "AOAI pool size must be between 5 and 200." @@ -231,7 +231,7 @@ variable "aoai_pool_size" { variable "tts_pool_size" { description = "Size of the TTS client pool for optimal performance" type = number - default = 100 + default = 10 validation { condition = var.tts_pool_size >= 10 && var.tts_pool_size <= 500 error_message = "TTS pool size must be between 10 and 500." @@ -241,7 +241,7 @@ variable "tts_pool_size" { variable "stt_pool_size" { description = "Size of the STT client pool for optimal performance" type = number - default = 100 + default = 10 validation { condition = var.stt_pool_size >= 10 && var.stt_pool_size <= 500 error_message = "STT pool size must be between 10 and 500." diff --git a/mkdocs.yml b/mkdocs.yml index cb748faa..5407b1d3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,8 +1,8 @@ site_name: Real-Time Voice Agent Documentation site_description: Azure-powered real-time voice agent with text-to-speech and speech recognition capabilities -site_url: https://pablosalvador10.github.io/gbb-ai-audio-agent/ -repo_url: https://github.com/pablosalvador10/gbb-ai-audio-agent -repo_name: pablosalvador10/gbb-ai-audio-agent +# site_url: https://github.com/Azure-Samples/art-voice-agent-accelerator/ +repo_url: https://github.com/Azure-Samples/art-voice-agent-accelerator/ +repo_name: Azure-Samples/art-voice-agent-accelerator theme: name: material @@ -33,6 +33,8 @@ theme: plugins: - search + - mermaid2 + - neoteroi.mkdocsoad - mkdocstrings: handlers: python: @@ -51,31 +53,57 @@ plugins: nav: - Home: index.md - Getting Started: - - Installation: getting-started/installation.md - - Quick Start: getting-started/quickstart.md - - Configuration: getting-started/configuration.md - - API Reference: - - Overview: api/overview.md - - Speech Synthesis: api/speech-synthesis.md - - Speech Recognition: api/speech-recognition.md - - Utilities: api/utilities.md + - Quick Start Guide: getting-started/README.md + - Local Development: getting-started/local-development.md + - Configuration Guide: getting-started/configuration.md - Architecture: - - Overview: architecture/overview.md - - Azure Integration: architecture/azure-integration.md - - Observability: architecture/observability.md - - Examples: - - Basic Usage: examples/basic-usage.md - - Advanced Scenarios: examples/advanced-scenarios.md - - Production Deployment: examples/production.md - + - Overview: architecture/README.md + - LLM Orchestration: architecture/llm-orchestration.md + - ACS Flows: architecture/acs-flows.md + - Data Flows: architecture/data-flows.md + - Speech Recognition: architecture/speech-recognition.md + - Speech Synthesis: architecture/speech-synthesis.md + - Streaming Modes: architecture/streaming-modes.md + - Integrations: architecture/integrations.md + - Deployment: + - Deployment Guide: deployment/README.md + - Production: deployment/production.md + - CI/CD: deployment/cicd.md + - Security: + - Authentication: security/authentication.md + - Operations: + - Monitoring: operations/monitoring.md + - Troubleshooting: operations/troubleshooting.md + - Testing: operations/testing.md + - Load Testing: operations/load-testing.md + - API Reference: + - Overview: api/README.md + - API Reference: api/api-reference.md + - Industry Solutions: + - Healthcare: industry/healthcare.md + - Samples & Labs: + - Overview: samples/README.md + - Guides: + - Repository Structure: guides/repository-structure.md + - Utilities & Services: guides/utilities.md +extra_css: + - assets/oad-styles.css markdown_extensions: + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg - pymdownx.highlight: anchor_linenums: true line_spans: __span pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.snippets - - pymdownx.superfences + - pymdownx.superfences: + # make exceptions to highlighting of code: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:mermaid2.fence_mermaid - pymdownx.tabbed: alternate_style: true - admonition @@ -89,3 +117,6 @@ markdown_extensions: - md_in_html - tables - footnotes + - toc: + permalink: true + baselevel: 2 diff --git a/pyproject.toml b/pyproject.toml index b140b35d..1ccc3eab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=68", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "gbb-ai-audio-agent" +name = "art-voice-agent-accelerator" version = "0.0.0" description = "Real-time voice app: FastAPI + Azure ACS/Speech/OpenAI" readme = "README.md" diff --git a/requirements-docs.txt b/requirements-docs.txt index 5bac7ab9..4110890a 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,5 +1,13 @@ -# Documentation build requirements -mkdocs>=1.5.0 -mkdocs-material>=9.0.0 +# Documentation build requirements only +mkdocs>=1.6.1 # Pin to version compatible with neoteroi-mkdocs +mkdocs-material>=9.4.0 mkdocstrings[python]>=0.20.0 pymdown-extensions>=10.0.0 +mkdocs-mermaid2-plugin>=1.2.2 +neoteroi-mkdocs==1.1.3 # Re-enabled with pinned MkDocs version + +# Minimal dependencies for mkdocstrings to document the code +fastapi>=0.104.0 +pydantic>=2.5.0 +uvicorn>=0.24.0 +starlette>=0.27.0 diff --git a/setup.py b/setup.py index 614b33b2..8725bdc4 100644 --- a/setup.py +++ b/setup.py @@ -16,14 +16,14 @@ long_description = "Real-time voice agent with Azure AI and Apps Service" setup( - name="gbb-ai-audio-agent", + name="art-voice-agent-accelerator", version="1.0.0", description="Real-time voice agent with Azure AI and Apps Service", long_description=long_description, long_description_content_type="text/markdown", author="Pablo Salvador, Jin Lee", author_email="pablosalvador11@gmail.com", - url="https://github.com/pablosalvador10/gbb-ai-audio-agent", + url="https://github.com/Azure-Samples/art-voice-agent-accelerator", packages=find_packages(), python_requires=">=3.11", install_requires=requirements, @@ -56,8 +56,8 @@ ], keywords="azure speech voice tts stt real-time audio ai and apps-services", project_urls={ - "Documentation": "https://pablosalvador10.github.io/gbb-ai-audio-agent/", - "Source": "https://github.com/pablosalvador10/gbb-ai-audio-agent", - "Tracker": "https://github.com/pablosalvador10/gbb-ai-audio-agent/issues", + "Documentation": "https://github.com/Azure-Samples/art-voice-agent-accelerator/", + "Source": "https://github.com/pablosalvador10/art-voice-agent-accelerator", + "Tracker": "https://github.com/Azure-Samples/art-voice-agent-accelerator/issues", }, ) diff --git a/src/redis/manager.py b/src/redis/manager.py index 5ef09c74..4386b9a0 100644 --- a/src/redis/manager.py +++ b/src/redis/manager.py @@ -1,17 +1,27 @@ -from opentelemetry import trace -from opentelemetry.trace import SpanKind import asyncio +import ipaddress import os import threading import time -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar -from utils.azure_auth import get_credential +from opentelemetry import trace +from opentelemetry.trace import SpanKind import redis -from redis.exceptions import AuthenticationError +from redis.exceptions import AuthenticationError, MovedError + +from utils.azure_auth import get_credential from utils.ml_logging import get_logger +try: # redis-py always provides cluster module from v5+, keep guard for safety + from redis.cluster import RedisCluster +except ImportError: # pragma: no cover - only in legacy environments + RedisCluster = None # type: ignore[assignment] + + +T = TypeVar("T") + class AzureRedisManager: """ @@ -68,6 +78,15 @@ def __init__( ) self.user_name = user_name or os.getenv("REDIS_USER_NAME") or "user" self._auth_expires_at = 0 # For AAD token refresh tracking + self.token_expiry = 0 + + # Cluster configuration + self._cluster_preference = self._parse_optional_bool( + os.getenv("REDIS_USE_CLUSTER") + ) + self._cluster_auto = self._cluster_preference is True + self._using_cluster = False + self._client_lock = threading.RLock() # Build initial client and, if using AAD, start a refresh thread self._create_client() @@ -84,15 +103,30 @@ async def initialize(self) -> None: """ try: self.logger.info(f"Validating Redis connection to {self.host}:{self.port}") - - # Validate connection with health check - loop = asyncio.get_event_loop() - ping_result = await loop.run_in_executor(None, self._health_check) - - if ping_result: - self.logger.info("✅ Redis connection validated successfully") - else: - raise ConnectionError("Redis health check failed") + # Ensure a client exists and perform a quick ping; recreate on failure. + try: + if not getattr(self, "redis_client", None): + self.logger.info("Redis client not present during initialize — creating client.") + self.__init__() + else: + try: + # use a short timeout to avoid blocking startup + ok = self._health_check() + except asyncio.TimeoutError: + self.logger.warning("Redis ping timed out during initialize; recreating client.") + self._create_client() + except AuthenticationError: + self.logger.info("Redis authentication failed during initialize; recreating client.") + self._create_client() + except Exception as e: + # Non-fatal here; let the subsequent health check determine final status + self.logger.debug("Non-fatal error during quick ping check: %s", e) + else: + if not ok: + self.logger.info("Redis ping returned False during initialize; recreating client.") + self._create_client() + except Exception as e: + self.logger.error("Unexpected error during Redis pre-initialization check: %s", e) except Exception as e: self.logger.error(f"Redis initialization failed: {e}") @@ -104,14 +138,16 @@ def _health_check(self) -> bool: """ try: # Basic connectivity test - if not self.redis_client.ping(): + if not self._execute_with_redirect("PING", lambda client: client.ping()): return False # Test basic operations test_key = "health_check_test" - self.redis_client.set(test_key, "test_value", ex=5) - result = self.redis_client.get(test_key) - self.redis_client.delete(test_key) + self._execute_with_redirect( + "SET", lambda client: client.set(test_key, "test_value", ex=5) + ) + result = self._execute_with_redirect("GET", lambda client: client.get(test_key)) + self._execute_with_redirect("DEL", lambda client: client.delete(test_key)) return result == "test_value" @@ -133,42 +169,210 @@ def _redis_span(self, name: str, op: str | None = None): }, ) - def _create_client(self): - """(Re)create self.redis_client and record expiry for AAD.""" + @staticmethod + def _parse_optional_bool(value: Optional[str]) -> Optional[bool]: + if value is None: + return None + normalized = value.strip().lower() + if normalized in {"1", "true", "yes", "on"}: + return True + if normalized in {"0", "false", "no", "off"}: + return False + return None + + def _resolve_cluster(self, force_cluster: Optional[bool]) -> bool: + if force_cluster is not None: + return force_cluster + if self._cluster_preference is not None: + return self._cluster_preference + return self._cluster_auto + + def _build_auth_kwargs(self) -> Dict[str, Any]: + if self.access_key: + return {"password": self.access_key} + + token = self.credential.get_token(self.scope) + self.token_expiry = token.expires_on + return {"username": self.user_name, "password": token.token} + + def _build_standard_client( + self, host: str, port: Optional[int], auth_kwargs: Dict[str, Any] + ) -> redis.Redis: + client = redis.Redis( + host=host, + port=port, + db=self.db, + ssl=self.ssl, + decode_responses=True, + socket_keepalive=True, + health_check_interval=30, + socket_connect_timeout=2.0, + socket_timeout=1.0, + max_connections=200, + client_name="rtagent-api", + **auth_kwargs, + ) if self.access_key: - # static key-based auth - self.redis_client = redis.Redis( - host=self.host, - port=self.port, - db=self.db, - password=self.access_key, - ssl=self.ssl, - decode_responses=True, - socket_keepalive=True, - health_check_interval=30, - socket_connect_timeout=0.2, - socket_timeout=1.0, - max_connections=200, - client_name="rtagent-api", - ) self.logger.info("Azure Redis connection initialized with access key.") else: - # get fresh AAD token - token = self.credential.get_token(self.scope) - self.token_expiry = token.expires_on - self.redis_client = redis.Redis( - host=self.host, - port=self.port, - db=self.db, - username=self.user_name, - password=token.token, - ssl=self.ssl, - decode_responses=True, - ) self.logger.info( "Azure Redis connection initialized with AAD token (expires at %s).", self.token_expiry, ) + return client + + def _build_cluster_client( + self, host: str, port: Optional[int], auth_kwargs: Dict[str, Any] + ) -> "RedisCluster": + if RedisCluster is None: + raise RuntimeError("redis-py cluster support unavailable") + + client = RedisCluster( + host=host, + port=port or 6379, + ssl=self.ssl, + decode_responses=True, + socket_keepalive=True, + health_check_interval=30, + socket_connect_timeout=2.0, + socket_timeout=1.0, + max_connections=200, + client_name="rtagent-api", + require_full_coverage=False, + address_remap=self._remap_cluster_address, + **auth_kwargs, + ) + if self.access_key: + self.logger.info( + "Azure Redis cluster client initialized with access key (startup %s:%s).", + host, + port, + ) + else: + self.logger.info( + "Azure Redis cluster client initialized with AAD token (expires at %s).", + self.token_expiry, + ) + return client + + def _execute_with_redirect( + self, command: str, operation: Callable[[redis.Redis], T] + ) -> T: + try: + return operation(self.redis_client) + except MovedError as err: + return self._handle_cluster_redirect(command, operation, err) + + @staticmethod + def _is_ip_address(value: str) -> bool: + try: + ipaddress.ip_address(value) + except ValueError: + return False + return True + + def _remap_cluster_address(self, address: Tuple[str, int]) -> Tuple[str, int]: + host, port = address + if self._is_ip_address(host): + return (self.host, port) + return address + + def _handle_cluster_redirect( + self, + command: str, + operation: Callable[[redis.Redis], T], + err: MovedError, + ) -> T: + details = f"slot {err.slot_id} -> {err.host}:{err.port}" + self.logger.warning( + "Redis MOVED error on %s (%s). Switching to cluster-aware client.", + command, + details, + ) + if RedisCluster is None: + self.logger.error( + "redis-py cluster support is unavailable; unable to honor MOVED redirect." + ) + raise err + + attempts: List[Tuple[Optional[str], Optional[int]]] = [] + if getattr(err, "port", None) is not None: + attempts.append((self.host, int(err.port))) + attempts.append((self.host, self.port)) + + last_exc: Optional[Exception] = None + tried: set[tuple[str, Optional[int]]] = set() + for host, port in attempts: + key = (host, port) + if key in tried or host is None or port is None: + continue + tried.add(key) + try: + self._create_client( + force_cluster=True, host_override=host, port_override=port + ) + break + except Exception as exc: # pragma: no cover - dependent on runtime config + last_exc = exc + self.logger.debug( + "Redis cluster initialization attempt using %s:%s failed: %s", + host, + port, + exc, + ) + else: + if last_exc: + raise last_exc + raise err + + return operation(self.redis_client) + + def _create_client( + self, + force_cluster: Optional[bool] = None, + host_override: Optional[str] = None, + port_override: Optional[int] = None, + ) -> None: + host = host_override or self.host + port = port_override if port_override is not None else self.port + + with self._client_lock: + use_cluster = self._resolve_cluster(force_cluster) + if use_cluster and RedisCluster is None: + if force_cluster: + raise RuntimeError( + "redis-py cluster support unavailable" + ) + self.logger.warning( + "Redis cluster requested but redis-py cluster support unavailable; using single-node client." + ) + use_cluster = False + + auth_kwargs = self._build_auth_kwargs() + client: Optional[redis.Redis] = None + if use_cluster: + try: + client = self._build_cluster_client(host, port, auth_kwargs) + self._using_cluster = True + except Exception as exc: + if force_cluster: + raise + self.logger.warning( + "Failed to initialize Redis cluster client (%s); falling back to single-node client.", + exc, + ) + use_cluster = False + + if not use_cluster: + client = self._build_standard_client(host, port, auth_kwargs) + self._using_cluster = False + + if client is None: # pragma: no cover - defensive guard + raise RuntimeError("Failed to create Redis client") + + self.redis_client = client + if self._cluster_preference is None: + self._cluster_auto = self._using_cluster def _refresh_loop(self): """Background thread: sleep until just before expiry, then refresh token.""" @@ -188,7 +392,9 @@ def _refresh_loop(self): def publish_event(self, stream_key: str, event_data: Dict[str, Any]) -> str: """Append an event to a Redis stream.""" with self._redis_span("Redis.XADD"): - return self.redis_client.xadd(stream_key, event_data) + return self._execute_with_redirect( + "XADD", lambda client: client.xadd(stream_key, event_data) + ) def read_events_blocking( self, @@ -202,8 +408,11 @@ def read_events_blocking( Returns list of new events (or None on timeout). """ with self._redis_span("Redis.XREAD"): - streams = self.redis_client.xread( - {stream_key: last_id}, block=block_ms, count=count + streams = self._execute_with_redirect( + "XREAD", + lambda client: client.xread( + {stream_key: last_id}, block=block_ms, count=count + ), ) return streams if streams else None @@ -231,13 +440,15 @@ async def ping(self) -> bool: """Check Redis connectivity.""" try: with self._redis_span("Redis.PING"): - return self.redis_client.ping() + return self._execute_with_redirect("PING", lambda client: client.ping()) except AuthenticationError: # token might have expired early: rebuild & retry once self.logger.info("Redis auth error on ping, refreshing token") - self._create_client() + self._create_client(force_cluster=self._using_cluster) with self._redis_span("Redis.PING"): - return self.redis_client.ping() + return self._execute_with_redirect( + "PING", lambda client: client.ping() + ) def set_value( self, key: str, value: str, ttl_seconds: Optional[int] = None @@ -245,40 +456,60 @@ def set_value( """Set a string value in Redis (optionally with TTL).""" with self._redis_span("Redis.SET"): if ttl_seconds is not None: - return self.redis_client.setex(key, ttl_seconds, str(value)) - return self.redis_client.set(key, str(value)) + return self._execute_with_redirect( + "SETEX", + lambda client: client.setex(key, ttl_seconds, str(value)), + ) + return self._execute_with_redirect( + "SET", lambda client: client.set(key, str(value)) + ) def get_value(self, key: str) -> Optional[str]: """Get a string value from Redis.""" with self._redis_span("Redis.GET"): - value = self.redis_client.get(key) + value = self._execute_with_redirect("GET", lambda client: client.get(key)) return value.decode() if isinstance(value, bytes) else value def store_session_data(self, session_id: str, data: Dict[str, Any]) -> bool: """Store session data using a Redis hash.""" with self._redis_span("Redis.HSET"): - return bool(self.redis_client.hset(session_id, mapping=data)) + return bool( + self._execute_with_redirect( + "HSET", lambda client: client.hset(session_id, mapping=data) + ) + ) def get_session_data(self, session_id: str) -> Dict[str, str]: """Retrieve all session data for a given session ID.""" with self._redis_span("Redis.HGETALL"): - raw = self.redis_client.hgetall(session_id) + raw = self._execute_with_redirect( + "HGETALL", lambda client: client.hgetall(session_id) + ) return dict(raw) def update_session_field(self, session_id: str, field: str, value: str) -> bool: """Update a single field in the session hash.""" with self._redis_span("Redis.HSET"): - return bool(self.redis_client.hset(session_id, field, value)) + return bool( + self._execute_with_redirect( + "HSET", + lambda client: client.hset(session_id, field, value), + ) + ) def delete_session(self, session_id: str) -> int: """Delete a session from Redis.""" with self._redis_span("Redis.DEL"): - return self.redis_client.delete(session_id) + return self._execute_with_redirect( + "DEL", lambda client: client.delete(session_id) + ) def list_connected_clients(self) -> List[Dict[str, str]]: """List currently connected clients.""" with self._redis_span("Redis.CLIENTLIST"): - return self.redis_client.client_list() + return self._execute_with_redirect( + "CLIENT LIST", lambda client: client.client_list() + ) async def store_session_data_async( self, session_id: str, data: Dict[str, Any] diff --git a/tests/load/multi_turn_load_test.py b/tests/load/multi_turn_load_test.py index 87268ebf..f7bc2628 100644 --- a/tests/load/multi_turn_load_test.py +++ b/tests/load/multi_turn_load_test.py @@ -12,7 +12,7 @@ from pathlib import Path from datetime import datetime -from utils.load_test_conversations import ConversationLoadTester, LoadTestConfig +from tests.load.utils.load_test_conversations import ConversationLoadTester, LoadTestConfig class MultiTurnLoadTest: diff --git a/tests/load/utils/load_test_conversations.py b/tests/load/utils/load_test_conversations.py index 22c8166f..55e96629 100644 --- a/tests/load/utils/load_test_conversations.py +++ b/tests/load/utils/load_test_conversations.py @@ -17,7 +17,7 @@ import statistics from pathlib import Path -from utils.conversation_simulator import ( +from tests.load.utils.conversation_simulator import ( ConversationSimulator, ConversationTemplates, ConversationMetrics, diff --git a/tests/test_acs_events_handlers.py b/tests/test_acs_events_handlers.py index 3fcc8cc5..7004f48f 100644 --- a/tests/test_acs_events_handlers.py +++ b/tests/test_acs_events_handlers.py @@ -5,11 +5,17 @@ Focused tests for the refactored ACS events handling. """ +import sys + import pytest import asyncio from unittest.mock import AsyncMock, MagicMock, patch from azure.core.messaging import CloudEvent +# The Lvagent audio stack depends on sounddevice, which is unavailable in CI. +# Inject a stub before importing handlers so tests can load without native deps. +sys.modules.setdefault("sounddevice", MagicMock()) + from apps.rtagent.backend.api.v1.events.handlers import CallEventHandlers from apps.rtagent.backend.api.v1.events.types import ( CallEventContext, @@ -18,6 +24,11 @@ ) +def run_async(coro): + """Execute coroutine in a fresh event loop for pytest compatibility.""" + return asyncio.run(coro) + + class TestCallEventHandlers: """Test individual event handlers.""" @@ -37,10 +48,12 @@ def mock_context(self): ) context.memo_manager = MagicMock() context.redis_mgr = MagicMock() + context.app_state = MagicMock() + context.app_state.redis_pool = None return context @patch("apps.rtagent.backend.api.v1.events.handlers.logger") - async def test_handle_call_initiated(self, mock_logger, mock_context): + def test_handle_call_initiated(self, mock_logger, mock_context): """Test call initiated handler.""" mock_context.event_type = V1EventTypes.CALL_INITIATED mock_context.event.data = { @@ -49,7 +62,7 @@ async def test_handle_call_initiated(self, mock_logger, mock_context): "api_version": "v1", } - await CallEventHandlers.handle_call_initiated(mock_context) + run_async(CallEventHandlers.handle_call_initiated(mock_context)) # Verify context updates assert mock_context.memo_manager.update_context.called @@ -63,7 +76,7 @@ async def test_handle_call_initiated(self, mock_logger, mock_context): assert updates["call_direction"] == "outbound" @patch("apps.rtagent.backend.api.v1.events.handlers.logger") - async def test_handle_inbound_call_received(self, mock_logger, mock_context): + def test_handle_inbound_call_received(self, mock_logger, mock_context): """Test inbound call received handler.""" mock_context.event_type = V1EventTypes.INBOUND_CALL_RECEIVED mock_context.event.data = { @@ -71,7 +84,7 @@ async def test_handle_inbound_call_received(self, mock_logger, mock_context): "from": {"kind": "phoneNumber", "phoneNumber": {"value": "+1987654321"}}, } - await CallEventHandlers.handle_inbound_call_received(mock_context) + run_async(CallEventHandlers.handle_inbound_call_received(mock_context)) # Verify context updates calls = mock_context.memo_manager.update_context.call_args_list @@ -81,31 +94,38 @@ async def test_handle_inbound_call_received(self, mock_logger, mock_context): assert updates["caller_id"] == "+1987654321" @patch("apps.rtagent.backend.api.v1.events.handlers.logger") - async def test_handle_call_connected_with_broadcast( + def test_handle_call_connected_with_broadcast( self, mock_logger, mock_context ): """Test call connected handler with WebSocket broadcast.""" mock_clients = [MagicMock(), MagicMock()] mock_context.clients = mock_clients + mock_call_conn = MagicMock() + mock_call_conn.list_participants.return_value = [] + mock_context.acs_caller = MagicMock() + mock_context.acs_caller.get_call_connection.return_value = mock_call_conn with patch( "apps.rtagent.backend.api.v1.events.handlers.broadcast_message" - ) as mock_broadcast: - await CallEventHandlers.handle_call_connected(mock_context) + ) as mock_broadcast, patch( + "apps.rtagent.backend.api.v1.events.handlers.DTMFValidationLifecycle.setup_aws_connect_validation_flow", + new=AsyncMock(), + ): + run_async(CallEventHandlers.handle_call_connected(mock_context)) mock_broadcast.assert_called_once() # Verify message structure - args = mock_broadcast.call_args - assert args[0][0] == mock_clients # clients + call_args, call_kwargs = mock_broadcast.call_args + assert call_args[0] is None # Message should be JSON string import json - message = json.loads(args[0][1]) + message = json.loads(call_args[1]) assert message["type"] == "call_connected" assert message["call_connection_id"] == "test_123" @patch("apps.rtagent.backend.api.v1.events.handlers.logger") - async def test_handle_dtmf_tone_received(self, mock_logger, mock_context): + def test_handle_dtmf_tone_received(self, mock_logger, mock_context): """Test DTMF tone handling.""" mock_context.event_type = ACSEventTypes.DTMF_TONE_RECEIVED mock_context.event.data = { @@ -117,26 +137,26 @@ async def test_handle_dtmf_tone_received(self, mock_logger, mock_context): # Mock current sequence mock_context.memo_manager.get_context.return_value = "123" - await CallEventHandlers.handle_dtmf_tone_received(mock_context) + run_async(CallEventHandlers.handle_dtmf_tone_received(mock_context)) # Should update DTMF sequence mock_context.memo_manager.update_context.assert_called() - async def test_extract_caller_id_phone_number(self): + def test_extract_caller_id_phone_number(self): """Test caller ID extraction from phone number.""" caller_info = {"kind": "phoneNumber", "phoneNumber": {"value": "+1234567890"}} caller_id = CallEventHandlers._extract_caller_id(caller_info) assert caller_id == "+1234567890" - async def test_extract_caller_id_raw_id(self): + def test_extract_caller_id_raw_id(self): """Test caller ID extraction from raw ID.""" caller_info = {"kind": "other", "rawId": "user@domain.com"} caller_id = CallEventHandlers._extract_caller_id(caller_info) assert caller_id == "user@domain.com" - async def test_extract_caller_id_fallback(self): + def test_extract_caller_id_fallback(self): """Test caller ID extraction fallback.""" caller_info = {} @@ -148,7 +168,7 @@ class TestEventProcessingFlow: """Test event processing flow.""" @patch("apps.rtagent.backend.api.v1.events.handlers.logger") - async def test_webhook_event_routing(self, mock_logger): + def test_webhook_event_routing(self, mock_logger): """Test webhook event router.""" event = CloudEvent( source="test", @@ -163,11 +183,11 @@ async def test_webhook_event_routing(self, mock_logger): ) with patch.object(CallEventHandlers, "handle_call_connected") as mock_handler: - await CallEventHandlers.handle_webhook_events(context) + run_async(CallEventHandlers.handle_webhook_events(context)) mock_handler.assert_called_once_with(context) @patch("apps.rtagent.backend.api.v1.events.handlers.logger") - async def test_unknown_event_type_handling(self, mock_logger): + def test_unknown_event_type_handling(self, mock_logger): """Test handling of unknown event types.""" event = CloudEvent( source="test", @@ -180,7 +200,7 @@ async def test_unknown_event_type_handling(self, mock_logger): ) # Should handle gracefully without error - await CallEventHandlers.handle_webhook_events(context) + run_async(CallEventHandlers.handle_webhook_events(context)) # No specific handler should be called for unknown type # This should just log and continue diff --git a/tests/test_acs_media_lifecycle.py b/tests/test_acs_media_lifecycle.py index 146abf31..5f10a890 100644 --- a/tests/test_acs_media_lifecycle.py +++ b/tests/test_acs_media_lifecycle.py @@ -1,750 +1,279 @@ -""" -Tests for ACS Media Lifecycle Three-Thread Architecture -====================================================== +"""Unit tests for ACS media lifecycle components aligned with the current implementation.""" -Tests the complete V1 ACS Media Handler implementation including: -- Three-thread architecture (Speech SDK, Route Turn, Main Event Loop) -- Cross-thread communication via ThreadBridge -- Barge-in detection and cancellation -- Speech recognition callback handling -- Media message processing -- Handler lifecycle management - -""" - -import pytest import asyncio -import json import base64 -import threading -import time -from unittest.mock import Mock, AsyncMock, MagicMock, patch, call -from typing import Optional, Dict, Any +import json +from unittest.mock import AsyncMock, MagicMock, Mock, patch + +import pytest -# Import the classes under test from apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle import ( - ACSMediaHandler, - ThreadBridge, - SpeechSDKThread, - RouteTurnThread, MainEventLoop, + RouteTurnThread, SpeechEvent, SpeechEventType, + SpeechSDKThread, + ThreadBridge, ) -class MockWebSocket: - """Mock WebSocket for testing.""" - - def __init__(self): - self.sent_messages = [] - self.closed = False - - async def send_text(self, message: str): - """Mock send_text method.""" - self.sent_messages.append(message) - - async def close(self): - """Mock close method.""" - self.closed = True - - -class MockRecognizer: - """Mock speech recognizer for testing.""" +class DummyRecognizer: + """Lightweight recognizer test double that matches the current interface.""" def __init__(self): self.started = False self.stopped = False self.callbacks = {} + self.push_stream = None + self.create_push_stream_called = False + self.prepare_stream_called = False + self.prepare_start_called = False self.write_bytes_calls = [] def set_partial_result_callback(self, callback): - """Mock partial result callback setter.""" self.callbacks["partial"] = callback def set_final_result_callback(self, callback): - """Mock final result callback setter.""" self.callbacks["final"] = callback def set_cancel_callback(self, callback): - """Mock cancel callback setter.""" self.callbacks["cancel"] = callback + def create_push_stream(self): + self.create_push_stream_called = True + self.push_stream = object() + + def prepare_stream(self): + self.prepare_stream_called = True + self.push_stream = object() + + def prepare_start(self): + self.prepare_start_called = True + self.push_stream = object() + def start(self): - """Mock start method.""" self.started = True def stop(self): - """Mock stop method.""" self.stopped = True - def write_bytes(self, audio_bytes: bytes): - """Mock write_bytes method.""" - self.write_bytes_calls.append(len(audio_bytes)) - - def trigger_partial(self, text: str, lang: str = "en-US"): - """Helper method to trigger partial callback.""" - if "partial" in self.callbacks: - self.callbacks["partial"](text, lang) - - def trigger_final(self, text: str, lang: str = "en-US"): - """Helper method to trigger final callback.""" - if "final" in self.callbacks: - self.callbacks["final"](text, lang) - - def trigger_error(self, error: str): - """Helper method to trigger error callback.""" - if "cancel" in self.callbacks: - self.callbacks["cancel"](error) - - -class MockOrchestrator: - """Mock orchestrator function for testing.""" - - def __init__(self): - self.calls = [] - self.responses = ["Hello, how can I help you?"] - self.call_index = 0 - - async def __call__(self, cm, transcript: str, ws): - """Mock orchestrator call.""" - self.calls.append({"transcript": transcript, "timestamp": time.time()}) - - # Return mock response - response = self.responses[self.call_index % len(self.responses)] - self.call_index += 1 - return response + def write_bytes(self, data: bytes): + self.write_bytes_calls.append(data) @pytest.fixture -def mock_websocket(): - """Fixture providing a mock WebSocket.""" - return MockWebSocket() +def dummy_recognizer(): + return DummyRecognizer() -@pytest.fixture -def mock_recognizer(): - """Fixture providing a mock speech recognizer.""" - return MockRecognizer() +@pytest.mark.asyncio +async def test_thread_bridge_queue_speech_result_put_nowait(): + bridge = ThreadBridge(call_connection_id="call-12345678") + queue = asyncio.Queue() + event = SpeechEvent( + event_type=SpeechEventType.FINAL, + text="hello", + language="en-US", + ) + bridge.queue_speech_result(queue, event) -@pytest.fixture -def mock_orchestrator(): - """Fixture providing a mock orchestrator.""" - return MockOrchestrator() + queued_event = await asyncio.wait_for(queue.get(), timeout=0.1) + assert queued_event.text == "hello" + assert queue.empty() -@pytest.fixture -def mock_memory_manager(): - """Fixture providing a mock memory manager.""" - return Mock() - - -@pytest.fixture -async def media_handler( - mock_websocket, mock_recognizer, mock_orchestrator, mock_memory_manager -): - """Fixture providing a configured ACS Media Handler.""" - with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"): - handler = ACSMediaHandler( - websocket=mock_websocket, - call_connection_id="test-call-123", - session_id="test-session-456", - recognizer=mock_recognizer, - orchestrator_func=mock_orchestrator, - memory_manager=mock_memory_manager, - greeting_text="Hello, welcome to our service!", - ) +@pytest.mark.asyncio +async def test_thread_bridge_queue_speech_result_drops_when_full(): + bridge = ThreadBridge(call_connection_id="call-abcdef01") + bridge.set_main_loop(asyncio.get_running_loop()) - # Start the handler - await handler.start() + queue = asyncio.Queue(maxsize=1) + await queue.put("sentinel") - yield handler - - # Cleanup - await handler.stop() - - -class TestThreadBridge: - """Test ThreadBridge cross-thread communication.""" - - def test_initialization(self): - """Test ThreadBridge initialization.""" - bridge = ThreadBridge() - assert bridge.main_loop is None - - def test_set_main_loop(self): - """Test setting main event loop.""" - bridge = ThreadBridge() - loop = asyncio.new_event_loop() - - bridge.set_main_loop(loop) - assert bridge.main_loop is loop - - @pytest.mark.asyncio - async def test_queue_speech_result_put_nowait(self): - """Test queuing speech result using put_nowait.""" - bridge = ThreadBridge() - queue = asyncio.Queue(maxsize=10) - - event = SpeechEvent( - event_type=SpeechEventType.FINAL, text="Hello world", language="en-US" - ) + event = SpeechEvent( + event_type=SpeechEventType.PARTIAL, + text="queued", + language="en-US", + ) + with patch( + "apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger.warning" + ) as warning_mock: bridge.queue_speech_result(queue, event) - # Verify event was queued - queued_event = await asyncio.wait_for(queue.get(), timeout=1.0) - assert queued_event.text == "Hello world" - assert queued_event.event_type == SpeechEventType.FINAL + await queue.get() + assert queue.empty() + warning_mock.assert_called_once() - @pytest.mark.asyncio - async def test_queue_speech_result_with_event_loop(self): - """Test queuing speech result with event loop fallback.""" - bridge = ThreadBridge() - loop = asyncio.get_running_loop() - bridge.set_main_loop(loop) - # Create a full queue to force fallback - queue = asyncio.Queue(maxsize=1) - await queue.put("dummy_item") # Fill the queue - - event = SpeechEvent( - event_type=SpeechEventType.PARTIAL, text="Test", language="en-US" - ) - - # This should use the event loop fallback - bridge.queue_speech_result(queue, event) - - # Remove dummy item and check for our event - await queue.get() # Remove dummy - queued_event = await asyncio.wait_for(queue.get(), timeout=1.0) - assert queued_event.text == "Test" - - -class TestSpeechSDKThread: - """Test SpeechSDKThread functionality.""" - - def test_initialization(self, mock_recognizer): - """Test SpeechSDKThread initialization.""" - bridge = ThreadBridge() - speech_queue = asyncio.Queue() - barge_in_handler = AsyncMock() +@pytest.mark.asyncio +async def test_speechsdkthread_preinitializes_push_stream(dummy_recognizer): + bridge = ThreadBridge(call_connection_id="call-abcdef12") + speech_queue = asyncio.Queue() + barge_in_handler = AsyncMock() + with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"): thread = SpeechSDKThread( - recognizer=mock_recognizer, + recognizer=dummy_recognizer, thread_bridge=bridge, barge_in_handler=barge_in_handler, speech_queue=speech_queue, ) - assert thread.recognizer is mock_recognizer - assert thread.thread_bridge is bridge - assert not thread.thread_running - assert not thread.recognizer_started + assert dummy_recognizer.create_push_stream_called or dummy_recognizer.push_stream + assert set(dummy_recognizer.callbacks) == {"partial", "final", "cancel"} - def test_callback_setup(self, mock_recognizer): - """Test speech recognition callback setup.""" - bridge = ThreadBridge() - speech_queue = asyncio.Queue() - barge_in_handler = AsyncMock() + thread.stop() - thread = SpeechSDKThread( - recognizer=mock_recognizer, - thread_bridge=bridge, - barge_in_handler=barge_in_handler, - speech_queue=speech_queue, - ) - - # Verify callbacks were set - assert "partial" in mock_recognizer.callbacks - assert "final" in mock_recognizer.callbacks - assert "cancel" in mock_recognizer.callbacks - def test_prepare_thread(self, mock_recognizer): - """Test thread preparation.""" - bridge = ThreadBridge() - speech_queue = asyncio.Queue() - barge_in_handler = AsyncMock() +@pytest.mark.asyncio +async def test_speechsdkthread_start_requires_thread_running(dummy_recognizer): + bridge = ThreadBridge(call_connection_id="call-abcdef12") + speech_queue = asyncio.Queue() + with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"): thread = SpeechSDKThread( - recognizer=mock_recognizer, + recognizer=dummy_recognizer, thread_bridge=bridge, - barge_in_handler=barge_in_handler, + barge_in_handler=AsyncMock(), speech_queue=speech_queue, ) - thread.prepare_thread() + thread.start_recognizer() + + assert not dummy_recognizer.started + assert not thread.recognizer_started - assert thread.thread_running - assert thread.thread_obj is not None - assert thread.thread_obj.is_alive() + thread.stop() - # Cleanup - thread.stop() - def test_start_recognizer(self, mock_recognizer): - """Test recognizer startup.""" - bridge = ThreadBridge() - speech_queue = asyncio.Queue() - barge_in_handler = AsyncMock() +@pytest.mark.asyncio +async def test_speechsdkthread_prepare_then_start(dummy_recognizer): + bridge = ThreadBridge(call_connection_id="call-abcdef12") + speech_queue = asyncio.Queue() + with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"): thread = SpeechSDKThread( - recognizer=mock_recognizer, + recognizer=dummy_recognizer, thread_bridge=bridge, - barge_in_handler=barge_in_handler, + barge_in_handler=AsyncMock(), speech_queue=speech_queue, ) - thread.prepare_thread() - thread.start_recognizer() - - assert mock_recognizer.started - assert thread.recognizer_started - - # Cleanup - thread.stop() - - -class TestMainEventLoop: - """Test MainEventLoop media processing.""" - - @pytest.fixture - def main_event_loop(self, mock_websocket): - """Fixture for MainEventLoop.""" - route_turn_thread = Mock() - return MainEventLoop(mock_websocket, "test-call-123", route_turn_thread) - - @pytest.mark.asyncio - async def test_handle_audio_metadata(self, main_event_loop, mock_recognizer): - """Test AudioMetadata handling.""" - acs_handler = Mock() - acs_handler.speech_sdk_thread = Mock() - acs_handler.speech_sdk_thread.start_recognizer = Mock() - - stream_data = json.dumps( - { - "kind": "AudioMetadata", - "audioMetadata": { - "subscriptionId": "test", - "encoding": "PCM", - "sampleRate": 16000, - "channels": 1, - }, - } - ) - - await main_event_loop.handle_media_message( - stream_data, mock_recognizer, acs_handler - ) - - # Verify recognizer was started - acs_handler.speech_sdk_thread.start_recognizer.assert_called_once() - - @pytest.mark.asyncio - async def test_handle_audio_data(self, main_event_loop, mock_recognizer): - """Test AudioData processing.""" - # Mock audio data (base64 encoded) - audio_bytes = b"\x00" * 320 # 20ms of silence - audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") - - stream_data = json.dumps( - {"kind": "AudioData", "audioData": {"data": audio_b64, "silent": False}} - ) - - with patch.object( - main_event_loop, "_process_audio_chunk_async" - ) as mock_process: - await main_event_loop.handle_media_message( - stream_data, mock_recognizer, None - ) + thread.prepare_thread() + await asyncio.sleep(0) + thread.start_recognizer() - # Give async task time to start - await asyncio.sleep(0.1) + assert dummy_recognizer.started + assert thread.recognizer_started - # Verify audio processing was scheduled - mock_process.assert_called_once() + thread.stop() - @pytest.mark.asyncio - async def test_process_audio_chunk_async(self, main_event_loop, mock_recognizer): - """Test audio chunk processing.""" - audio_bytes = b"\x00" * 320 - audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") - await main_event_loop._process_audio_chunk_async(audio_b64, mock_recognizer) +@pytest.mark.asyncio +async def test_main_event_loop_handles_audio_metadata(): + mock_websocket = MagicMock() + mock_websocket.send_text = AsyncMock() + mock_websocket.state = MagicMock() - # Verify recognizer received audio - assert len(mock_recognizer.write_bytes_calls) == 1 - assert mock_recognizer.write_bytes_calls[0] == 320 + main_loop = MainEventLoop(mock_websocket, "call-abcdef12", None) - @pytest.mark.asyncio - async def test_barge_in_handling(self, main_event_loop): - """Test barge-in interruption.""" - # Mock current playback task - main_event_loop.current_playback_task = AsyncMock() - main_event_loop.route_turn_thread = AsyncMock() + handler = MagicMock() + handler.speech_sdk_thread.start_recognizer = Mock() + handler.thread_bridge.queue_speech_result = Mock() + handler.speech_queue = asyncio.Queue() + handler.greeting_text = "Welcome!" - with patch.object(main_event_loop, "_send_stop_audio_command") as mock_stop: - await main_event_loop.handle_barge_in() + metadata_message = json.dumps( + { + "kind": "AudioMetadata", + "audioMetadata": { + "encoding": "PCM", + "sampleRate": 24000, + "channels": 1, + }, + } + ) - # Verify barge-in actions - main_event_loop.current_playback_task.cancel.assert_called_once() - main_event_loop.route_turn_thread.cancel_current_processing.assert_called_once() - mock_stop.assert_called_once() + await main_loop.handle_media_message(metadata_message, recognizer=None, acs_handler=handler) + handler.speech_sdk_thread.start_recognizer.assert_called_once() + handler.thread_bridge.queue_speech_result.assert_called_once() + assert main_loop.greeting_played -class TestRouteTurnThread: - """Test RouteTurnThread conversation processing.""" + await main_loop.handle_media_message(metadata_message, recognizer=None, acs_handler=handler) + handler.thread_bridge.queue_speech_result.assert_called_once() - @pytest.mark.asyncio - async def test_initialization( - self, mock_orchestrator, mock_memory_manager, mock_websocket - ): - """Test RouteTurnThread initialization.""" - speech_queue = asyncio.Queue() - thread = RouteTurnThread( - speech_queue=speech_queue, - orchestrator_func=mock_orchestrator, - memory_manager=mock_memory_manager, - websocket=mock_websocket, - ) +@pytest.mark.asyncio +async def test_main_event_loop_process_audio_chunk_async(): + mock_websocket = MagicMock() + mock_websocket.send_text = AsyncMock() + mock_websocket.state = MagicMock() - assert thread.speech_queue is speech_queue - assert thread.orchestrator_func is mock_orchestrator - assert not thread.running + main_loop = MainEventLoop(mock_websocket, "call-abcdef12", None) - @pytest.mark.asyncio - async def test_speech_event_processing( - self, mock_orchestrator, mock_memory_manager, mock_websocket - ): - """Test processing speech events.""" - speech_queue = asyncio.Queue() + recognizer = MagicMock() + recognizer.push_stream = object() + recognizer.write_bytes = MagicMock() - thread = RouteTurnThread( - speech_queue=speech_queue, - orchestrator_func=mock_orchestrator, - memory_manager=mock_memory_manager, - websocket=mock_websocket, - ) + encoded = base64.b64encode(b"audio-bytes").decode("ascii") - # Start the thread - await thread.start() + await main_loop._process_audio_chunk_async(encoded, recognizer) - # Queue a speech event - event = SpeechEvent( - event_type=SpeechEventType.FINAL, text="Hello world", language="en-US" - ) - await speech_queue.put(event) - - # Give time for processing - await asyncio.sleep(0.1) - - # Verify orchestrator was called - assert len(mock_orchestrator.calls) == 1 - assert mock_orchestrator.calls[0]["transcript"] == "Hello world" - - # Cleanup - await thread.stop() - - -class TestACSMediaHandler: - """Test complete ACS Media Handler integration.""" - - @pytest.mark.asyncio - async def test_handler_lifecycle(self, media_handler, mock_recognizer): - """Test complete handler lifecycle.""" - # Verify handler started correctly - assert media_handler.running - assert media_handler.speech_sdk_thread.thread_running - - # Test stopping - await media_handler.stop() - assert not media_handler.running - assert media_handler._stopped - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_media_message_processing( - self, mock_logger, media_handler, mock_recognizer - ): - """Test end-to-end media message processing.""" - # Send AudioMetadata - metadata = json.dumps( - { - "kind": "AudioMetadata", - "audioMetadata": { - "subscriptionId": "test", - "encoding": "PCM", - "sampleRate": 16000, - }, - } - ) + recognizer.write_bytes.assert_called_once_with(b"audio-bytes") - await media_handler.handle_media_message(metadata) - # Verify recognizer was started - assert mock_recognizer.started +@pytest.mark.asyncio +async def test_main_event_loop_handle_barge_in_cancels_playback(): + mock_websocket = MagicMock() + mock_websocket.send_text = AsyncMock() + mock_websocket.state = MagicMock() - # Send AudioData - audio_bytes = b"\x00" * 320 - audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") + route_turn_thread = MagicMock() + route_turn_thread.cancel_current_processing = AsyncMock() - audio_data = json.dumps( - {"kind": "AudioData", "audioData": {"data": audio_b64, "silent": False}} - ) + main_loop = MainEventLoop(mock_websocket, "call-abcdef12", route_turn_thread) + main_loop.current_playback_task = asyncio.create_task(asyncio.sleep(1)) - await media_handler.handle_media_message(audio_data) - - # Give async processing time - await asyncio.sleep(0.1) - - # Verify audio was processed - assert len(mock_recognizer.write_bytes_calls) > 0 - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_barge_in_flow( - self, mock_logger, media_handler, mock_recognizer, mock_orchestrator - ): - """Test complete barge-in detection and cancellation flow.""" - # Start processing by triggering recognizer - await media_handler.handle_media_message( - json.dumps( - {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}} - ) - ) - - # Simulate speech detection that should trigger barge-in - mock_recognizer.trigger_partial("Hello", "en-US") - - # Give time for barge-in processing - await asyncio.sleep(0.1) - - # Verify barge-in was triggered (check WebSocket for stop command) - sent_messages = media_handler.websocket.sent_messages - stop_commands = [msg for msg in sent_messages if "StopAudio" in msg] - assert len(stop_commands) > 0 - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_speech_recognition_callbacks( - self, mock_logger, media_handler, mock_recognizer, mock_orchestrator - ): - """Test speech recognition callback integration.""" - # Start recognizer - await media_handler.handle_media_message( - json.dumps( - {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}} - ) - ) + await main_loop.handle_barge_in() - # Trigger final speech result - mock_recognizer.trigger_final("How can you help me?", "en-US") - - # Give time for processing - await asyncio.sleep(0.2) - - # Verify orchestrator was called - assert len(mock_orchestrator.calls) == 1 - assert mock_orchestrator.calls[0]["transcript"] == "How can you help me?" - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_error_handling(self, mock_logger, media_handler, mock_recognizer): - """Test error handling in speech recognition.""" - # Start recognizer - await media_handler.handle_media_message( - json.dumps( - {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}} - ) - ) - - # Trigger error - mock_recognizer.trigger_error("Test error message") - - # Give time for processing - await asyncio.sleep(0.1) - - # Verify error was handled (no exceptions raised) - assert media_handler.running # Handler should still be running - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_concurrent_audio_processing( - self, mock_logger, media_handler, mock_recognizer - ): - """Test concurrent audio chunk processing with task limiting.""" - # Start recognizer - await media_handler.handle_media_message( - json.dumps( - {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}} - ) - ) + route_turn_thread.cancel_current_processing.assert_awaited() + mock_websocket.send_text.assert_called() + assert main_loop.current_playback_task.cancelled() - # Send multiple audio chunks rapidly - audio_bytes = b"\x00" * 320 - audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") + await asyncio.sleep(0.11) + assert not main_loop.barge_in_active.is_set() - audio_data = json.dumps( - {"kind": "AudioData", "audioData": {"data": audio_b64, "silent": False}} - ) - - # Send 10 audio chunks - tasks = [] - for _ in range(10): - task = asyncio.create_task(media_handler.handle_media_message(audio_data)) - tasks.append(task) - - # Wait for all processing - await asyncio.gather(*tasks) - await asyncio.sleep(0.2) - - # Verify audio processing occurred (some may be dropped due to limiting) - assert len(mock_recognizer.write_bytes_calls) > 0 - assert len(mock_recognizer.write_bytes_calls) <= 10 - - -class TestSpeechEvent: - """Test SpeechEvent data structure.""" - - def test_speech_event_creation(self): - """Test SpeechEvent creation and timing.""" - event = SpeechEvent( - event_type=SpeechEventType.FINAL, - text="Hello world", - language="en-US", - speaker_id="speaker1", - ) - - assert event.event_type == SpeechEventType.FINAL - assert event.text == "Hello world" - assert event.language == "en-US" - assert event.speaker_id == "speaker1" - assert isinstance(event.timestamp, float) - assert event.timestamp > 0 - - def test_speech_event_types(self): - """Test all speech event types.""" - # Test all event types - for event_type in SpeechEventType: - event = SpeechEvent(event_type=event_type, text="test", language="en-US") - assert event.event_type == event_type - - -# Integration test scenarios -class TestIntegrationScenarios: - """Integration tests for realistic usage scenarios.""" - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_call_flow_with_greeting( - self, - mock_logger, - mock_websocket, - mock_recognizer, - mock_orchestrator, - mock_memory_manager, - ): - """Test complete call flow including greeting.""" - # Create handler with greeting - handler = ACSMediaHandler( - websocket=mock_websocket, - call_connection_id="test-call-integration", - session_id="test-session-integration", - recognizer=mock_recognizer, - orchestrator_func=mock_orchestrator, - memory_manager=mock_memory_manager, - greeting_text="Welcome! How can I help you today?", - ) - - await handler.start() - - try: - # Simulate call connection with AudioMetadata - await handler.handle_media_message( - json.dumps( - { - "kind": "AudioMetadata", - "audioMetadata": { - "subscriptionId": "test-integration", - "encoding": "PCM", - "sampleRate": 16000, - "channels": 1, - }, - } - ) - ) - - # Give time for greeting to be processed - await asyncio.sleep(0.3) - - # Simulate customer speech - mock_recognizer.trigger_final("I need help with my account", "en-US") - - # Give time for orchestrator processing - await asyncio.sleep(0.2) - - # Verify greeting was sent and customer speech processed - assert len(mock_orchestrator.calls) >= 1 - assert any( - "account" in call["transcript"].lower() - for call in mock_orchestrator.calls - ) - - finally: - await handler.stop() - - @pytest.mark.asyncio - @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger") - async def test_barge_in_during_response( - self, - mock_logger, - mock_websocket, - mock_recognizer, - mock_orchestrator, - mock_memory_manager, - ): - """Test barge-in interruption during AI response playback.""" - handler = ACSMediaHandler( - websocket=mock_websocket, - call_connection_id="test-barge-in", - session_id="test-barge-in-session", - recognizer=mock_recognizer, - orchestrator_func=mock_orchestrator, - memory_manager=mock_memory_manager, - ) - await handler.start() +@pytest.mark.asyncio +async def test_route_turn_thread_cancel_current_processing_clears_queue(): + speech_queue = asyncio.Queue() + await speech_queue.put( + SpeechEvent(event_type=SpeechEventType.FINAL, text="hello", language="en-US") + ) - try: - # Start call - await handler.handle_media_message( - json.dumps( - { - "kind": "AudioMetadata", - "audioMetadata": {"subscriptionId": "test-barge-in"}, - } - ) - ) + orchestrator = AsyncMock() + memory_manager = MagicMock() + websocket = MagicMock() + websocket.state = MagicMock() - # Customer asks question - mock_recognizer.trigger_final("What are your hours?", "en-US") - await asyncio.sleep(0.1) + route_thread = RouteTurnThread( + call_connection_id="call-abcdef12", + speech_queue=speech_queue, + orchestrator_func=orchestrator, + memory_manager=memory_manager, + websocket=websocket, + ) - # While AI is responding, customer interrupts (barge-in) - mock_recognizer.trigger_partial("Actually, I need to", "en-US") - await asyncio.sleep(0.1) + route_thread.current_response_task = asyncio.create_task(asyncio.sleep(1)) - # Verify stop audio command was sent for barge-in - sent_messages = handler.websocket.sent_messages - stop_commands = [msg for msg in sent_messages if "StopAudio" in msg] - assert len(stop_commands) > 0 + await route_thread.cancel_current_processing() - finally: - await handler.stop() + assert speech_queue.empty() + assert route_thread.current_response_task.cancelled() + # Cleanup to silence lingering tasks + await asyncio.sleep(0) -if __name__ == "__main__": - # Run tests with verbose output - pytest.main([__file__, "-v", "--tb=short"]) diff --git a/tests/test_acs_media_lifecycle_memory.py b/tests/test_acs_media_lifecycle_memory.py index 78412cbd..c64a88a8 100644 --- a/tests/test_acs_media_lifecycle_memory.py +++ b/tests/test_acs_media_lifecycle_memory.py @@ -8,7 +8,6 @@ from apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle import ( ACSMediaHandler, - get_active_handlers_count, ) @@ -92,21 +91,6 @@ async def dummy_orchestrator(*args, **kwargs): return handler, ws, recog -@pytest.mark.asyncio -async def test_handler_registers_and_cleans_up(): - """Start a handler and ensure it's registered then cleaned up on stop.""" - before = get_active_handlers_count() - handler, ws, recog = await _create_start_stop_handler(asyncio.get_running_loop()) - - after = get_active_handlers_count() - # Should be same as before after full stop - assert ( - after == before - ), f"active handlers should be cleaned up (before={before}, after={after})" - # websocket attribute should be removed/cleared or not reference running handler - # The implementation sets _acs_media_handler during start; after stop it may remain but handler.is_running must be False - assert not handler.is_running - @pytest.mark.asyncio async def test_threads_terminated_on_stop(): diff --git a/tests/test_acs_simple.py b/tests/test_acs_simple.py index cb6998f4..d19d336a 100644 --- a/tests/test_acs_simple.py +++ b/tests/test_acs_simple.py @@ -19,7 +19,7 @@ import base64 import threading import time -from unittest.mock import Mock, AsyncMock, patch +from unittest.mock import Mock, MagicMock, AsyncMock, patch # Test the basic functionality without complex logging @@ -64,12 +64,14 @@ async def test_main_event_loop_basic(): mock_websocket = Mock() mock_websocket.send_text = AsyncMock() - mock_route_turn_thread = Mock() + mock_route_turn_thread = MagicMock() + mock_route_turn_thread.cancel_current_processing = AsyncMock() main_loop = MainEventLoop(mock_websocket, "test-call", mock_route_turn_thread) # Test barge-in handling await main_loop.handle_barge_in() + await asyncio.sleep(0.11) # Verify WebSocket was called (stop audio command) mock_websocket.send_text.assert_called() @@ -77,11 +79,13 @@ async def test_main_event_loop_basic(): class MockRecognizer: - """Simple mock recognizer.""" + """Simple mock recognizer that mirrors the current interface.""" def __init__(self): self.started = False + self.stopped = False self.callbacks = {} + self.push_stream = None def set_partial_result_callback(self, callback): self.callbacks["partial"] = callback @@ -92,9 +96,21 @@ def set_final_result_callback(self, callback): def set_cancel_callback(self, callback): self.callbacks["cancel"] = callback + def create_push_stream(self): + self.push_stream = object() + + def prepare_stream(self): + self.push_stream = object() + + def prepare_start(self): + self.push_stream = object() + def start(self): self.started = True + def stop(self): + self.stopped = True + def write_bytes(self, data): pass diff --git a/tests/test_dtmf_validation.py b/tests/test_dtmf_validation.py index 3975bac9..ee382fed 100644 --- a/tests/test_dtmf_validation.py +++ b/tests/test_dtmf_validation.py @@ -8,10 +8,10 @@ os.environ.pop("APPLICATIONINSIGHTS_CONNECTION_STRING", None) import asyncio -import json -import pytest from types import SimpleNamespace -from unittest.mock import patch, AsyncMock +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest from apps.rtagent.backend.api.v1.handlers.dtmf_validation_lifecycle import ( DTMFValidationLifecycle, @@ -21,81 +21,122 @@ class DummyMemo: def __init__(self): self._d = {} + self.persist_calls = 0 - def get_context(self, k, default=None): - return self._d.get(k, default) + def get_context(self, key, default=None): + return self._d.get(key, default) - def update_context(self, k, v): - self._d[k] = v + def set_context(self, key, value): + self._d[key] = value - def set_context(self, k, v): - self._d[k] = v + def update_context(self, key, value): + self._d[key] = value async def persist_to_redis_async(self, redis_mgr): - pass + self.persist_calls += 1 -class FakeAuthService: - def __init__(self, ok=True): - self.ok = ok - self.calls = [] +class DummyContext: + def __init__(self, event_data, memo_manager=None, redis_mgr=None, acs_caller=None): + self._event_data = event_data + self.memo_manager = memo_manager + self.redis_mgr = redis_mgr + self.acs_caller = acs_caller + self.call_connection_id = "call-123" - async def validate_pin(self, call_id, phone, pin): - self.calls.append((call_id, phone, pin)) - # small delay to emulate I/O - await asyncio.sleep(0.01) - return {"ok": self.ok, "user_id": "u1"} if self.ok else {"ok": False} + def get_event_data(self): + return self._event_data -@pytest.mark.asyncio -async def test_validate_sequence_success(): - """Test successful DTMF sequence validation using centralized logic.""" +class DummyRedis: + def __init__(self, result): + self._result = result + + async def read_events_blocking_async(self, **kwargs): + return self._result + + +def test_is_dtmf_validation_gate_open(): memo = DummyMemo() + memo.set_context("dtmf_validation_gate_open", True) + + assert DTMFValidationLifecycle.is_dtmf_validation_gate_open(memo, "call") - context = SimpleNamespace( - call_connection_id="call-1", - memo_manager=memo, - redis_mgr=AsyncMock(), - clients=None, - acs_caller=None, + memo.set_context("dtmf_validation_gate_open", False) + assert not DTMFValidationLifecycle.is_dtmf_validation_gate_open(memo, "call") + + +@pytest.mark.asyncio +async def test_handle_dtmf_tone_received_updates_sequence(): + memo = DummyMemo() + redis_mgr = AsyncMock() + context = DummyContext( + {"tone": "5", "sequenceId": 1}, memo_manager=memo, redis_mgr=redis_mgr ) - # Mock the cancellation method to ensure it's not called on success + await DTMFValidationLifecycle.handle_dtmf_tone_received(context) + + assert memo.get_context("dtmf_tone") == "5" + assert memo.persist_calls == 1 + + +@pytest.mark.asyncio +async def test_handle_dtmf_tone_received_routes_to_validation_flow(): + memo = DummyMemo() + memo.set_context("aws_connect_validation_pending", True) + context = DummyContext({"tone": "1", "sequenceId": 2}, memo_manager=memo) + with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_cancel: - # Test a valid 4-digit sequence - await DTMFValidationLifecycle._validate_sequence(context, "1234") + DTMFValidationLifecycle, + "_handle_aws_connect_validation_tone", + new=AsyncMock(), + ) as mock_handler: + await DTMFValidationLifecycle.handle_dtmf_tone_received(context) - # Assert success case - assert memo.get_context("dtmf_validated") is True - assert memo.get_context("entered_pin") == "1234" - assert memo.get_context("dtmf_validation_gate_open") is True - mock_cancel.assert_not_called() + mock_handler.assert_awaited_once() @pytest.mark.asyncio -async def test_validate_sequence_failure(): - """Test failed DTMF sequence validation using centralized logic.""" +async def test_setup_aws_connect_validation_flow_sets_context(): memo = DummyMemo() + redis_mgr = AsyncMock() + context = DummyContext({}, memo_manager=memo, redis_mgr=redis_mgr) + call_conn = MagicMock() + + with patch.object( + DTMFValidationLifecycle, + "_start_dtmf_recognition", + new=AsyncMock(), + ) as mock_start: + await DTMFValidationLifecycle.setup_aws_connect_validation_flow( + context, call_conn + ) + + assert memo.get_context("aws_connect_validation_pending") is True + assert memo.get_context("aws_connect_input_sequence") == "" + digits = memo.get_context("aws_connect_validation_digits") + assert isinstance(digits, str) and len(digits) == 3 + assert memo.persist_calls == 1 + mock_start.assert_awaited_once_with(context, call_conn) + + +@pytest.mark.asyncio +async def test_wait_for_dtmf_validation_completion_success(): + redis_mgr = DummyRedis(result={"validation_status": "completed"}) - context = SimpleNamespace( - call_connection_id="call-2", - memo_manager=memo, - redis_mgr=AsyncMock(), - clients=None, - acs_caller=None, + result = await DTMFValidationLifecycle.wait_for_dtmf_validation_completion( + redis_mgr, "call-1" ) - # Mock the cancellation method to verify it's called on failure - with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_cancel: - # Test an invalid sequence (too short) - await DTMFValidationLifecycle._validate_sequence(context, "12") - - # Assert failure case - assert memo.get_context("dtmf_validated") is False - assert memo.get_context("entered_pin") is None - # Verify call cancellation was triggered - mock_cancel.assert_called_once_with(context) + assert result is True + + +@pytest.mark.asyncio +async def test_wait_for_dtmf_validation_completion_timeout(): + redis_mgr = DummyRedis(result=None) + + result = await DTMFValidationLifecycle.wait_for_dtmf_validation_completion( + redis_mgr, "call-1" + ) + + assert result is False diff --git a/tests/test_dtmf_validation_failure_cancellation.py b/tests/test_dtmf_validation_failure_cancellation.py index cc2993cb..98858ea7 100644 --- a/tests/test_dtmf_validation_failure_cancellation.py +++ b/tests/test_dtmf_validation_failure_cancellation.py @@ -1,11 +1,9 @@ -""" -Test DTMF validation failure cancellation logic. +"""Tests for DTMF validation completion and helper utilities.""" -This test verifies that calls are properly cancelled when DTMF validation fails. -""" +import asyncio +from unittest.mock import AsyncMock, MagicMock import pytest -from unittest.mock import AsyncMock, MagicMock, patch from apps.rtagent.backend.api.v1.handlers.dtmf_validation_lifecycle import ( DTMFValidationLifecycle, @@ -13,156 +11,100 @@ from apps.rtagent.backend.api.v1.events.types import CallEventContext -@pytest.fixture -def mock_context(): - """Create a mock CallEventContext for testing.""" - context = MagicMock(spec=CallEventContext) - context.call_connection_id = "test-call-123" - context.memo_manager = MagicMock() - context.memo_manager.persist_to_redis_async = AsyncMock() - context.redis_mgr = AsyncMock() - context.acs_caller = MagicMock() - context.websocket = MagicMock() - return context +class DummyMemo: + def __init__(self): + self._d = {} + self.persist_calls = 0 + def set_context(self, key, value): + self._d[key] = value -@pytest.mark.asyncio -async def test_aws_connect_validation_success_no_cancellation(mock_context): - """Test that successful AWS Connect validation does NOT cancel the call.""" - # Arrange - input_sequence = "123" - expected_digits = "123" - - # Act - with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_cancel: - await DTMFValidationLifecycle._complete_aws_connect_validation( - mock_context, input_sequence, expected_digits - ) - - # Assert - call should NOT be cancelled on success - mock_cancel.assert_not_called() - mock_context.memo_manager.set_context.assert_any_call("dtmf_validated", True) - mock_context.memo_manager.set_context.assert_any_call( - "dtmf_validation_gate_open", True - ) + def get_context(self, key, default=None): + return self._d.get(key, default) + def update_context(self, key, value): + self._d[key] = value -@pytest.mark.asyncio -async def test_aws_connect_validation_failure_cancels_call(mock_context): - """Test that failed AWS Connect validation cancels the call.""" - # Arrange - input_sequence = "456" - expected_digits = "123" + async def persist_to_redis_async(self, redis_mgr): + self.persist_calls += 1 - # Act - with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_cancel: - await DTMFValidationLifecycle._complete_aws_connect_validation( - mock_context, input_sequence, expected_digits - ) - # Assert - call should be cancelled on failure - mock_cancel.assert_called_once_with(mock_context) - mock_context.memo_manager.set_context.assert_any_call("dtmf_validated", False) +class DummyContext: + def __init__(self, memo_manager=None, redis_mgr=None): + self.call_connection_id = "test-call-123" + self.memo_manager = memo_manager + self.redis_mgr = redis_mgr + self.acs_caller = MagicMock() + + +@pytest.fixture +def context_with_memo(): + memo = DummyMemo() + redis_mgr = AsyncMock() + redis_mgr.add_event_async = AsyncMock() + redis_mgr.set_value_async = AsyncMock() + return DummyContext(memo_manager=memo, redis_mgr=redis_mgr), memo, redis_mgr @pytest.mark.asyncio -async def test_sequence_validation_failure_cancels_call(mock_context): - """Test that failed sequence validation cancels the call.""" - # Arrange - invalid_sequence = "12" # Too short +async def test_complete_validation_success_sets_flags(context_with_memo): + context, memo, redis_mgr = context_with_memo - # Act - with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_cancel: - await DTMFValidationLifecycle._validate_sequence(mock_context, invalid_sequence) + await DTMFValidationLifecycle._complete_aws_connect_validation( + context, input_sequence="123", expected_digits="123" + ) - # Assert - call should be cancelled on failure - mock_cancel.assert_called_once_with(mock_context) - mock_context.memo_manager.update_context.assert_any_call("dtmf_validated", False) + assert memo.get_context("dtmf_validated") is True + assert memo.get_context("dtmf_validation_gate_open") is True + redis_mgr.add_event_async.assert_awaited_once() + assert memo.persist_calls == 1 @pytest.mark.asyncio -async def test_sequence_validation_success_no_cancellation(mock_context): - """Test that successful sequence validation does NOT cancel the call.""" - # Arrange - valid_sequence = "1234" # Valid 4-digit PIN - - # Act - with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_cancel: - await DTMFValidationLifecycle._validate_sequence(mock_context, valid_sequence) - - # Assert - call should NOT be cancelled on success - mock_cancel.assert_not_called() - mock_context.memo_manager.update_context.assert_any_call("dtmf_validated", True) - mock_context.memo_manager.update_context.assert_any_call( - "dtmf_validation_gate_open", True +async def test_complete_validation_failure_marks_invalid(context_with_memo): + context, memo, redis_mgr = context_with_memo + + await DTMFValidationLifecycle._complete_aws_connect_validation( + context, input_sequence="000", expected_digits="123" ) + assert memo.get_context("dtmf_validated") is False + redis_mgr.add_event_async.assert_not_called() -@pytest.mark.asyncio -async def test_cancel_call_for_validation_failure_with_session_terminator(mock_context): - """Test call cancellation using session terminator.""" - # Arrange - mock_context.acs_caller.client = MagicMock() - # Ensure websocket attribute exists and is truthy - mock_context.websocket = MagicMock() - - # Act - with patch( - "apps.rtagent.backend.api.v1.handlers.dtmf_validation_lifecycle.terminate_session", - new_callable=AsyncMock, - ) as mock_terminate: - await DTMFValidationLifecycle._cancel_call_for_validation_failure(mock_context) - - # Assert - mock_terminate.assert_called_once() - call_args = mock_terminate.call_args - assert call_args.kwargs["ws"] == mock_context.websocket - assert call_args.kwargs["is_acs"] is True - assert call_args.kwargs["call_connection_id"] == "test-call-123" - - # Verify context updates - mock_context.memo_manager.set_context.assert_any_call( - "call_cancelled_dtmf_failure", True - ) - mock_context.memo_manager.set_context.assert_any_call( - "dtmf_validation_gate_open", False - ) - # Verify Redis event publication - mock_context.redis_mgr.publish_event_async.assert_called_once() +def test_get_fresh_dtmf_validation_status(): + memo = DummyMemo() + memo.set_context("dtmf_validated", True) + result = DTMFValidationLifecycle.get_fresh_dtmf_validation_status( + memo, "call-123" + ) -@pytest.mark.asyncio -async def test_cancel_call_fallback_direct_hangup(mock_context): - """Test call cancellation fallback when session terminator is not available.""" - # Arrange - no websocket available, simulate fallback - mock_context.websocket = None - mock_call_conn = MagicMock() - mock_context.acs_caller.get_call_connection.return_value = mock_call_conn + assert result is True - # Act - await DTMFValidationLifecycle._cancel_call_for_validation_failure(mock_context) - # Assert - should use direct hang_up as fallback - mock_call_conn.hang_up.assert_called_once_with(is_for_everyone=True) +def test_normalize_tone_mapping(): + assert DTMFValidationLifecycle._normalize_tone("five") == "5" + assert DTMFValidationLifecycle._normalize_tone("*") == "*" + assert DTMFValidationLifecycle._normalize_tone(None) is None @pytest.mark.asyncio -async def test_public_cancel_method(): - """Test the public cancel_call_for_dtmf_failure method.""" - mock_context = MagicMock() +async def test_update_dtmf_sequence_handles_append(context_with_memo): + context, memo, redis_mgr = context_with_memo + + class DummyCallEventContext(CallEventContext): + def __init__(self, memo_manager, redis_mgr): + self.memo_manager = memo_manager + self.redis_mgr = redis_mgr + self.call_connection_id = "call-123" + + fake_context = DummyCallEventContext(memo, redis_mgr) + + DTMFValidationLifecycle._update_dtmf_sequence(fake_context, tone="1", sequence_id=0) + DTMFValidationLifecycle._update_dtmf_sequence(fake_context, tone="2", sequence_id=1) - with patch.object( - DTMFValidationLifecycle, "_cancel_call_for_validation_failure" - ) as mock_private: - await DTMFValidationLifecycle.cancel_call_for_dtmf_failure(mock_context) + assert memo.get_context("dtmf_sequence") == "12" - mock_private.assert_called_once_with(mock_context) + await asyncio.sleep(0) # allow background task to run + redis_mgr.set_value_async.assert_called() diff --git a/tests/test_events_architecture_simple.py b/tests/test_events_architecture_simple.py index cee54037..6de149b6 100644 --- a/tests/test_events_architecture_simple.py +++ b/tests/test_events_architecture_simple.py @@ -5,12 +5,16 @@ Tests the core refactoring without heavy dependencies. """ -import pytest import asyncio -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock, patch from azure.core.messaging import CloudEvent +def run_async(coro): + """Execute coroutine in a fresh loop for pytest compatibility.""" + return asyncio.run(coro) + + # Mock the modules to avoid import issues class MockCallEventContext: def __init__(self, event, call_connection_id, event_type): @@ -249,7 +253,7 @@ def test_event_context_json_data(self): assert data["callConnectionId"] == "test_123" assert data["status"] == "connected" - async def test_call_initiated_handler(self): + def test_call_initiated_handler(self): """Test call initiated handler.""" event = CloudEvent( source="api", @@ -268,7 +272,7 @@ async def test_call_initiated_handler(self): ) context.memo_manager = MagicMock() - await MockCallEventHandlers.handle_call_initiated(context) + run_async(MockCallEventHandlers.handle_call_initiated(context)) # Verify context updates context.memo_manager.update_context.assert_called() @@ -282,7 +286,7 @@ async def test_call_initiated_handler(self): assert updates["call_direction"] == "outbound" assert updates["target_number"] == "+1234567890" - async def test_inbound_call_handler(self): + def test_inbound_call_handler(self): """Test inbound call received handler.""" event = CloudEvent( source="eventgrid", @@ -303,7 +307,7 @@ async def test_inbound_call_handler(self): ) context.memo_manager = MagicMock() - await MockCallEventHandlers.handle_inbound_call_received(context) + run_async(MockCallEventHandlers.handle_inbound_call_received(context)) # Verify context updates calls = context.memo_manager.update_context.call_args_list @@ -312,7 +316,7 @@ async def test_inbound_call_handler(self): assert updates["call_direction"] == "inbound" assert updates["caller_id"] == "+1987654321" - async def test_event_processor_registration(self): + def test_event_processor_registration(self): """Test event processor handler registration.""" processor = MockCallEventProcessor() @@ -325,7 +329,7 @@ async def dummy_handler(context): assert stats["handlers_registered"] == 1 assert MockACSEventTypes.CALL_CONNECTED in stats["event_types"] - async def test_event_processing_flow(self): + def test_event_processing_flow(self): """Test end-to-end event processing.""" processor = MockCallEventProcessor() @@ -348,7 +352,7 @@ async def test_event_processing_flow(self): mock_state.clients = [] # Process event - result = await processor.process_events([event], mock_state) + result = run_async(processor.process_events([event], mock_state)) assert result["status"] == "success" assert result["processed"] == 1 @@ -358,7 +362,7 @@ async def test_event_processing_flow(self): active_calls = processor.get_active_calls() assert "test_789" in active_calls - async def test_active_call_lifecycle(self): + def test_active_call_lifecycle(self): """Test active call tracking through connect/disconnect.""" processor = MockCallEventProcessor() @@ -377,7 +381,7 @@ async def test_active_call_lifecycle(self): data={"callConnectionId": "lifecycle_test"}, ) - await processor.process_events([connect_event], mock_state) + run_async(processor.process_events([connect_event], mock_state)) assert "lifecycle_test" in processor.get_active_calls() # Disconnect event @@ -387,10 +391,10 @@ async def test_active_call_lifecycle(self): data={"callConnectionId": "lifecycle_test"}, ) - await processor.process_events([disconnect_event], mock_state) + run_async(processor.process_events([disconnect_event], mock_state)) assert "lifecycle_test" not in processor.get_active_calls() - async def test_error_handling_isolation(self): + def test_error_handling_isolation(self): """Test that one failing handler doesn't stop others.""" processor = MockCallEventProcessor() @@ -411,7 +415,7 @@ async def succeeding_handler(context): ) # Should handle error gracefully - result = await processor.process_events([event], MagicMock()) + result = run_async(processor.process_events([event], MagicMock())) # Event should still be processed despite one handler failing assert result["processed"] == 1 diff --git a/tests/test_redis_manager.py b/tests/test_redis_manager.py new file mode 100644 index 00000000..07da8e2c --- /dev/null +++ b/tests/test_redis_manager.py @@ -0,0 +1,107 @@ +import pytest + +from redis.exceptions import MovedError + +from src.redis import manager as redis_manager +from src.redis.manager import AzureRedisManager + + +class _FakeRedis: + def __init__(self) -> None: + self.hgetall_calls = 0 + + def hgetall(self, key: str) -> dict[str, str]: + self.hgetall_calls += 1 + raise MovedError("1234 127.0.0.1:7001") + + +class _FakeClusterRedis: + def __init__(self) -> None: + self.hgetall_calls = 0 + + def hgetall(self, key: str) -> dict[str, str]: + self.hgetall_calls += 1 + return {"foo": "bar"} + + +def test_get_session_data_switches_to_cluster(monkeypatch): + single_node_client = _FakeRedis() + cluster_client = _FakeClusterRedis() + + # Stub the redis client constructors used inside the manager + monkeypatch.setattr( + redis_manager.redis, + "Redis", + lambda *args, **kwargs: single_node_client, + ) + monkeypatch.setattr( + redis_manager, + "RedisCluster", + lambda *args, **kwargs: cluster_client, + ) + + mgr = AzureRedisManager( + host="example.redis.local", + port=6380, + access_key="dummy", + ssl=False, + credential=object(), + ) + + data = mgr.get_session_data("session-123") + + assert data == {"foo": "bar"} + assert single_node_client.hgetall_calls == 1 + assert cluster_client.hgetall_calls == 1 + assert mgr._using_cluster is True + + +def test_get_session_data_raises_without_cluster_support(monkeypatch): + single_node_client = _FakeRedis() + + monkeypatch.setattr( + redis_manager.redis, + "Redis", + lambda *args, **kwargs: single_node_client, + ) + monkeypatch.setattr(redis_manager, "RedisCluster", None, raising=False) + + mgr = AzureRedisManager( + host="example.redis.local", + port=6380, + access_key="dummy", + ssl=False, + credential=object(), + ) + + with pytest.raises(MovedError): + mgr.get_session_data("session-123") + + +def test_remap_cluster_address_to_domain(monkeypatch): + fake_client = object() + monkeypatch.setattr( + redis_manager.redis, "Redis", lambda *args, **kwargs: fake_client + ) + monkeypatch.setattr( + redis_manager, "RedisCluster", lambda *args, **kwargs: fake_client + ) + + mgr = AzureRedisManager( + host="example.redis.local", + port=6380, + access_key="dummy", + ssl=False, + credential=object(), + ) + + # IP addresses remap to canonical host + assert mgr._remap_cluster_address(("51.8.10.248", 8501)) == ( + "example.redis.local", + 8501, + ) + # Hostnames remain unchanged + assert mgr._remap_cluster_address(("cache.contoso.redis", 8501)) == ( + "cache.contoso.redis", + 8501, + ) diff --git a/tests/test_speech_queue.py b/tests/test_speech_queue.py index 380025cd..bf69fc1a 100644 --- a/tests/test_speech_queue.py +++ b/tests/test_speech_queue.py @@ -44,159 +44,173 @@ class SpeechEvent: ) # Use time.time() instead of asyncio loop time -async def test_basic_queue(): +def test_basic_queue(): """Test 1: Basic queue put/get functionality""" - logger.info("🧪 Test 1: Basic queue functionality") - queue = asyncio.Queue(maxsize=10) + async def _run(): + logger.info("🧪 Test 1: Basic queue functionality") - # Test event - test_event = SpeechEvent( - event_type=SpeechEventType.FINAL, text="Hello world test", language="en-US" - ) + queue = asyncio.Queue(maxsize=10) - # Put event - await queue.put(test_event) - logger.info(f"✅ Event queued successfully. Queue size: {queue.qsize()}") - - # Get event with timeout - try: - retrieved_event = await asyncio.wait_for(queue.get(), timeout=1.0) - logger.info( - f"✅ Event retrieved successfully: {retrieved_event.event_type.value} - '{retrieved_event.text}'" + # Test event + test_event = SpeechEvent( + event_type=SpeechEventType.FINAL, text="Hello world test", language="en-US" ) - return True - except asyncio.TimeoutError: - logger.error("❌ Queue get timed out - this should not happen!") - return False + # Put event + await queue.put(test_event) + logger.info(f"✅ Event queued successfully. Queue size: {queue.qsize()}") -async def test_processing_loop(): - """Test 2: Processing loop similar to Route Turn Thread""" - logger.info("🧪 Test 2: Processing loop simulation") + # Get event with timeout + try: + retrieved_event = await asyncio.wait_for(queue.get(), timeout=1.0) + logger.info( + f"✅ Event retrieved successfully: {retrieved_event.event_type.value} - '{retrieved_event.text}'" + ) + return True + except asyncio.TimeoutError: + logger.error("❌ Queue get timed out - this should not happen!") + return False - queue = asyncio.Queue(maxsize=10) - running = True - events_processed = 0 + assert asyncio.run(_run()) - async def processing_loop(): - nonlocal events_processed - while running: - try: - logger.debug(f"🔄 Waiting for events (queue size: {queue.qsize()})") - speech_event = await asyncio.wait_for(queue.get(), timeout=1.0) - logger.info( - f"📢 Processing loop received event: {speech_event.event_type.value} - '{speech_event.text}'" - ) - events_processed += 1 +def test_processing_loop(): + """Test 2: Processing loop similar to Route Turn Thread""" - if events_processed >= 3: # Stop after processing 3 events + async def _run(): + logger.info("🧪 Test 2: Processing loop simulation") + + queue = asyncio.Queue(maxsize=10) + running = True + events_processed = 0 + + async def processing_loop(): + nonlocal events_processed + while running: + try: + logger.debug(f"🔄 Waiting for events (queue size: {queue.qsize()})") + speech_event = await asyncio.wait_for(queue.get(), timeout=1.0) + + logger.info( + f"📢 Processing loop received event: {speech_event.event_type.value} - '{speech_event.text}'" + ) + events_processed += 1 + + if events_processed >= 3: # Stop after processing 3 events + break + + except asyncio.TimeoutError: + logger.debug("⏰ Processing loop timeout (normal)") + continue + except Exception as e: + logger.error(f"❌ Error in processing loop: {e}") break - except asyncio.TimeoutError: - logger.debug("⏰ Processing loop timeout (normal)") - continue - except Exception as e: - logger.error(f"❌ Error in processing loop: {e}") - break - - # Start processing loop - processing_task = asyncio.create_task(processing_loop()) - - # Send test events - test_events = [ - SpeechEvent(SpeechEventType.GREETING, "Welcome message"), - SpeechEvent(SpeechEventType.FINAL, "User speech input"), - SpeechEvent(SpeechEventType.FINAL, "Another user input"), - ] + # Start processing loop + processing_task = asyncio.create_task(processing_loop()) - for i, event in enumerate(test_events): - logger.info(f"📤 Sending test event {i+1}: {event.text}") - await queue.put(event) - await asyncio.sleep(0.5) # Small delay between events - - # Wait for processing to complete - await processing_task - - running = False - logger.info(f"✅ Processing loop completed. Events processed: {events_processed}") - return events_processed == 3 + # Send test events + test_events = [ + SpeechEvent(SpeechEventType.GREETING, "Welcome message"), + SpeechEvent(SpeechEventType.FINAL, "User speech input"), + SpeechEvent(SpeechEventType.FINAL, "Another user input"), + ] + for i, event in enumerate(test_events): + logger.info(f"📤 Sending test event {i+1}: {event.text}") + await queue.put(event) + await asyncio.sleep(0.5) # Small delay between events -async def test_cross_thread_queue(): - """Test 3: Cross-thread queue communication simulation""" - logger.info("🧪 Test 3: Cross-thread queue communication") + # Wait for processing to complete + await processing_task - import threading + running = False + logger.info( + f"✅ Processing loop completed. Events processed: {events_processed}" + ) + return events_processed == 3 - queue = asyncio.Queue(maxsize=10) - main_loop = asyncio.get_running_loop() - events_received = [] + assert asyncio.run(_run()) - def background_thread_func(): - """Simulate Speech SDK Thread sending events""" - logger.info("🧵 Background thread started") - test_events = [ - SpeechEvent(SpeechEventType.PARTIAL, "Partial speech..."), - SpeechEvent(SpeechEventType.FINAL, "Complete speech recognition"), - ] - - for event in test_events: - logger.info(f"🧵 Background thread queuing: {event.text}") +def test_cross_thread_queue(): + """Test 3: Cross-thread queue communication simulation""" - # Method 1: Try put_nowait (fastest) + async def _run(): + logger.info("🧪 Test 3: Cross-thread queue communication") + + import threading + + queue = asyncio.Queue(maxsize=10) + main_loop = asyncio.get_running_loop() + events_received = [] + + def background_thread_func(): + """Simulate Speech SDK Thread sending events""" + logger.info("🧵 Background thread started") + + test_events = [ + SpeechEvent(SpeechEventType.PARTIAL, "Partial speech..."), + SpeechEvent(SpeechEventType.FINAL, "Complete speech recognition"), + ] + + for event in test_events: + logger.info(f"🧵 Background thread queuing: {event.text}") + + # Method 1: Try put_nowait (fastest) + try: + queue.put_nowait(event) + logger.info("🧵 Event queued via put_nowait") + continue + except Exception as e: + logger.debug( + f"🧵 put_nowait failed: {e}, trying run_coroutine_threadsafe..." + ) + + # Method 2: Fall back to run_coroutine_threadsafe + try: + future = asyncio.run_coroutine_threadsafe(queue.put(event), main_loop) + future.result(timeout=0.1) + logger.info("🧵 Event queued via run_coroutine_threadsafe") + except Exception as e: + logger.error(f"🧵 Failed to queue event: {e}") + + # Start background thread + thread = threading.Thread(target=background_thread_func, daemon=True) + thread.start() + + # Process events in main thread + timeout_count = 0 + max_timeouts = 5 + + while timeout_count < max_timeouts: try: - queue.put_nowait(event) - logger.info("🧵 Event queued via put_nowait") - continue - except Exception as e: logger.debug( - f"🧵 put_nowait failed: {e}, trying run_coroutine_threadsafe..." + f"🔄 Main thread waiting for events (queue size: {queue.qsize()})" ) + event = await asyncio.wait_for(queue.get(), timeout=1.0) + logger.info( + f"📢 Main thread received: {event.event_type.value} - '{event.text}'" + ) + events_received.append(event) - # Method 2: Fall back to run_coroutine_threadsafe - try: - future = asyncio.run_coroutine_threadsafe(queue.put(event), main_loop) - future.result(timeout=0.1) - logger.info("🧵 Event queued via run_coroutine_threadsafe") - except Exception as e: - logger.error(f"🧵 Failed to queue event: {e}") - - # Start background thread - thread = threading.Thread(target=background_thread_func, daemon=True) - thread.start() - - # Process events in main thread - timeout_count = 0 - max_timeouts = 5 - - while timeout_count < max_timeouts: - try: - logger.debug( - f"🔄 Main thread waiting for events (queue size: {queue.qsize()})" - ) - event = await asyncio.wait_for(queue.get(), timeout=1.0) - logger.info( - f"📢 Main thread received: {event.event_type.value} - '{event.text}'" - ) - events_received.append(event) + if len(events_received) >= 2: # Got both events + break - if len(events_received) >= 2: # Got both events - break + except asyncio.TimeoutError: + timeout_count += 1 + logger.debug(f"⏰ Main thread timeout {timeout_count}/{max_timeouts}") + continue - except asyncio.TimeoutError: - timeout_count += 1 - logger.debug(f"⏰ Main thread timeout {timeout_count}/{max_timeouts}") - continue + thread.join(timeout=1.0) - thread.join(timeout=1.0) + logger.info( + f"✅ Cross-thread test completed. Events received: {len(events_received)}" + ) + return len(events_received) == 2 - logger.info( - f"✅ Cross-thread test completed. Events received: {len(events_received)}" - ) - return len(events_received) == 2 + assert asyncio.run(_run()) async def main(): @@ -217,10 +231,13 @@ async def main(): logger.info(f"{'='*50}") try: - result = await test_func() - results[test_name] = result - status = "✅ PASSED" if result else "❌ FAILED" + test_func() + results[test_name] = True + status = "✅ PASSED" logger.info(f"{test_name}: {status}") + except AssertionError as e: + logger.error(f"{test_name}: ❌ ASSERTION FAILED - {e}") + results[test_name] = False except Exception as e: logger.error(f"{test_name}: ❌ EXCEPTION - {e}") results[test_name] = False diff --git a/tests/test_v1_events_integration.py b/tests/test_v1_events_integration.py index 5d029ffb..ce65a024 100644 --- a/tests/test_v1_events_integration.py +++ b/tests/test_v1_events_integration.py @@ -11,9 +11,11 @@ import asyncio import json +import sys import pytest from unittest.mock import AsyncMock, MagicMock, patch -from fastapi.testclient import TestClient + +sys.modules.setdefault("sounddevice", MagicMock()) from azure.core.messaging import CloudEvent from datetime import datetime @@ -32,6 +34,11 @@ from apps.rtagent.backend.api.v1.handlers.acs_call_lifecycle import ACSLifecycleHandler +def run_async(coro): + """Execute coroutine for pytest environments without asyncio plugin.""" + return asyncio.run(coro) + + class TestV1EventsIntegration: """Test the integrated V1 events system.""" @@ -79,15 +86,24 @@ def sample_call_event_context(self, mock_memo_manager, mock_redis_mgr): }, ) - return CallEventContext( + mock_call_conn = MagicMock() + mock_call_conn.list_participants.return_value = [] + acs_caller = MagicMock() + acs_caller.get_call_connection.return_value = mock_call_conn + + context = CallEventContext( event=event, call_connection_id="test_call_123", event_type=ACSEventTypes.CALL_CONNECTED, memo_manager=mock_memo_manager, redis_mgr=mock_redis_mgr, + acs_caller=acs_caller, ) + context.app_state = MagicMock() + context.app_state.redis_pool = None + return context - async def test_event_processor_registration(self): + def test_event_processor_registration(self): """Test that handlers can be registered and retrieved.""" processor = CallEventProcessor() @@ -102,7 +118,7 @@ async def dummy_handler(context: CallEventContext): assert stats["handlers_registered"] == 1 assert ACSEventTypes.CALL_CONNECTED in stats["event_types"] - async def test_default_handlers_registration(self): + def test_default_handlers_registration(self): """Test that default handlers are registered correctly.""" register_default_handlers() @@ -119,7 +135,7 @@ async def test_default_handlers_registration(self): assert V1EventTypes.CALL_INITIATED in stats["event_types"] assert ACSEventTypes.CALL_CONNECTED in stats["event_types"] - async def test_call_initiated_handler( + def test_call_initiated_handler( self, sample_call_event_context, mock_memo_manager ): """Test call initiated event handler.""" @@ -132,7 +148,7 @@ async def test_call_initiated_handler( } # Call handler - await CallEventHandlers.handle_call_initiated(sample_call_event_context) + run_async(CallEventHandlers.handle_call_initiated(sample_call_event_context)) # Verify memo manager was updated mock_memo_manager.update_context.assert_called() @@ -146,7 +162,7 @@ async def test_call_initiated_handler( assert call_args["call_direction"] == "outbound" assert call_args["target_number"] == "+1234567890" - async def test_call_connected_handler(self, sample_call_event_context): + def test_call_connected_handler(self, sample_call_event_context): """Test call connected event handler.""" # Mock clients for broadcast mock_clients = [MagicMock(), MagicMock()] @@ -154,8 +170,13 @@ async def test_call_connected_handler(self, sample_call_event_context): with patch( "apps.rtagent.backend.api.v1.events.handlers.broadcast_message" - ) as mock_broadcast: - await CallEventHandlers.handle_call_connected(sample_call_event_context) + ) as mock_broadcast, patch( + "apps.rtagent.backend.api.v1.events.handlers.DTMFValidationLifecycle.setup_aws_connect_validation_flow", + new=AsyncMock(), + ): + run_async( + CallEventHandlers.handle_call_connected(sample_call_event_context) + ) # Verify broadcast was called mock_broadcast.assert_called_once() @@ -167,7 +188,7 @@ async def test_call_connected_handler(self, sample_call_event_context): assert message_data["type"] == "call_connected" assert message_data["call_connection_id"] == "test_call_123" - async def test_webhook_events_router(self, sample_call_event_context): + def test_webhook_events_router(self, sample_call_event_context): """Test webhook events router delegates to specific handlers.""" sample_call_event_context.event_type = V1EventTypes.WEBHOOK_EVENTS @@ -175,22 +196,24 @@ async def test_webhook_events_router(self, sample_call_event_context): # Set the original event type in context sample_call_event_context.event_type = ACSEventTypes.CALL_CONNECTED - await CallEventHandlers.handle_webhook_events(sample_call_event_context) + run_async(CallEventHandlers.handle_webhook_events(sample_call_event_context)) # Verify the specific handler was called mock_handle.assert_called_once_with(sample_call_event_context) - async def test_acs_lifecycle_handler_event_emission( + def test_acs_lifecycle_handler_event_emission( self, mock_acs_caller, mock_redis_mgr ): """Test that ACS lifecycle handler emits events correctly.""" handler = ACSLifecycleHandler() with patch.object(handler, "_emit_call_event") as mock_emit: - result = await handler.start_outbound_call( - acs_caller=mock_acs_caller, - target_number="+1234567890", - redis_mgr=mock_redis_mgr, + result = run_async( + handler.start_outbound_call( + acs_caller=mock_acs_caller, + target_number="+1234567890", + redis_mgr=mock_redis_mgr, + ) ) # Verify call was successful @@ -205,7 +228,7 @@ async def test_acs_lifecycle_handler_event_emission( assert emit_args[1] == "test_call_123" # call_connection_id assert emit_args[2]["target_number"] == "+1234567890" # data - async def test_process_call_events_delegation(self, mock_redis_mgr): + def test_process_call_events_delegation(self, mock_redis_mgr): """Test that process_call_events delegates to V1 event system.""" handler = ACSLifecycleHandler() @@ -223,8 +246,10 @@ async def test_process_call_events_delegation(self, mock_redis_mgr): ] with patch( - "apps.rtagent.backend.api.v1.events.processor.get_call_event_processor" - ) as mock_get_processor: + "apps.rtagent.backend.api.v1.events.get_call_event_processor" + ) as mock_get_processor, patch( + "apps.rtagent.backend.api.v1.events.register_default_handlers" + ): mock_processor = AsyncMock() mock_processor.process_events.return_value = { "status": "success", @@ -233,7 +258,9 @@ async def test_process_call_events_delegation(self, mock_redis_mgr): } mock_get_processor.return_value = mock_processor - result = await handler.process_call_events(mock_events, mock_request) + result = run_async( + handler.process_call_events(mock_events, mock_request) + ) # Verify delegation occurred assert result["status"] == "success" @@ -241,9 +268,9 @@ async def test_process_call_events_delegation(self, mock_redis_mgr): assert result["processed_events"] == 1 # Verify processor was called - mock_processor.process_events.assert_called_once() + mock_processor.process_events.assert_awaited_once() - async def test_event_context_data_extraction(self): + def test_event_context_data_extraction(self): """Test event context data extraction methods.""" # Test with dict data event = CloudEvent( @@ -265,7 +292,7 @@ async def test_event_context_data_extraction(self): assert context.get_event_field("field1") == "value1" assert context.get_event_field("nonexistent", "default") == "default" - async def test_event_context_json_data_extraction(self): + def test_event_context_json_data_extraction(self): """Test event context with JSON string data.""" json_data = json.dumps({"callConnectionId": "test_123", "status": "connected"}) @@ -279,7 +306,7 @@ async def test_event_context_json_data_extraction(self): assert data["callConnectionId"] == "test_123" assert data["status"] == "connected" - async def test_processor_error_isolation(self): + def test_processor_error_isolation(self): """Test that one failing handler doesn't stop others.""" processor = CallEventProcessor() @@ -305,13 +332,13 @@ async def succeeding_handler(context: CallEventContext): mock_state = MagicMock() # Process event - should not raise exception - result = await processor.process_events([event], mock_state) + result = run_async(processor.process_events([event], mock_state)) # Should indicate partial success assert result["processed"] == 1 # One event processed assert "failed" in result or "status" in result # Some indication of issues - async def test_active_call_tracking(self): + def test_active_call_tracking(self): """Test that processor tracks active calls correctly.""" processor = CallEventProcessor() @@ -325,7 +352,7 @@ async def test_active_call_tracking(self): data={"callConnectionId": "test_123"}, ) - await processor.process_events([connected_event], mock_state) + run_async(processor.process_events([connected_event], mock_state)) # Should track the active call active_calls = processor.get_active_calls() @@ -338,7 +365,7 @@ async def test_active_call_tracking(self): data={"callConnectionId": "test_123"}, ) - await processor.process_events([disconnected_event], mock_state) + run_async(processor.process_events([disconnected_event], mock_state)) # Should no longer track the call active_calls = processor.get_active_calls() @@ -353,7 +380,7 @@ def setup(self): """Reset processor before each test.""" reset_call_event_processor() - async def test_outbound_call_flow(self): + def test_outbound_call_flow(self): """Test complete outbound call flow through hybrid architecture.""" # 1. Setup mocks @@ -372,10 +399,12 @@ async def test_outbound_call_flow(self): handler = ACSLifecycleHandler() with patch.object(handler, "_emit_call_event") as mock_emit: - result = await handler.start_outbound_call( - acs_caller=mock_acs_caller, - target_number="+1234567890", - redis_mgr=mock_redis_mgr, + result = run_async( + handler.start_outbound_call( + acs_caller=mock_acs_caller, + target_number="+1234567890", + redis_mgr=mock_redis_mgr, + ) ) # 4. Verify ACS operation succeeded @@ -388,7 +417,7 @@ async def test_outbound_call_flow(self): assert emit_args[0] == "V1.Call.Initiated" assert emit_args[1] == "test_call_outbound" - async def test_webhook_processing_flow(self): + def test_webhook_processing_flow(self): """Test webhook event processing through the events system.""" # 1. Register handlers @@ -424,13 +453,13 @@ async def test_webhook_processing_flow(self): processor = get_call_event_processor() - result = await processor.process_events(webhook_events, mock_state) + result = run_async(processor.process_events(webhook_events, mock_state)) # 5. Verify processing assert result["processed"] == 2 assert result["failed"] == 0 - async def test_error_handling_consistency(self): + def test_error_handling_consistency(self): """Test that errors are handled consistently across the system.""" # 1. Test ACS operation error @@ -440,10 +469,12 @@ async def test_error_handling_consistency(self): handler = ACSLifecycleHandler() with pytest.raises(Exception): # Should propagate as HTTPException - await handler.start_outbound_call( - acs_caller=mock_acs_caller, - target_number="+1234567890", - redis_mgr=MagicMock(), + run_async( + handler.start_outbound_call( + acs_caller=mock_acs_caller, + target_number="+1234567890", + redis_mgr=MagicMock(), + ) ) # 2. Test event processing error @@ -459,7 +490,7 @@ async def test_error_handling_consistency(self): processor = get_call_event_processor() # Should handle gracefully without raising - result = await processor.process_events([bad_event], MagicMock()) + result = run_async(processor.process_events([bad_event], MagicMock())) assert "status" in result diff --git a/utils/docstringtool/docstring_report.json b/utils/docstringtool/docstring_report.json index d2e64300..69db8797 100644 --- a/utils/docstringtool/docstring_report.json +++ b/utils/docstringtool/docstring_report.json @@ -10,7 +10,7 @@ }, "files": [ { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\docstring_standardizer.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\docstring_standardizer.py", "functions": [ { "name": "main", @@ -925,13 +925,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\utils\\azure_auth.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\utils\\azure_auth.py", "functions": [ { "name": "_using_managed_identity", @@ -964,7 +964,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\utils\\ml_logging.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\utils\\ml_logging.py", "functions": [ { "name": "keyinfo", @@ -1353,7 +1353,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\utils\\telemetry_config.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\utils\\telemetry_config.py", "functions": [ { "name": "suppress_azure_credential_logs", @@ -1446,7 +1446,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\utils\\trace_context.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\utils\\trace_context.py", "functions": [ { "name": "create_trace_context", @@ -2216,13 +2216,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\utils\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\utils\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\main.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\main.py", "functions": [ { "name": "create_app", @@ -2278,7 +2278,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\settings.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\settings.py", "functions": [ { "name": "get_agent_voice", @@ -2306,13 +2306,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\scripts\\start_backend.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\scripts\\start_backend.py", "functions": [ { "name": "find_project_root", @@ -2395,13 +2395,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\scripts\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\scripts\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\swagger_docs.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\swagger_docs.py", "functions": [ { "name": "get_tags", @@ -2594,13 +2594,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\helpers.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\helpers.py", "error": "unexpected indent (, line 7)", "functions": [], "classes": [], @@ -2609,25 +2609,25 @@ ] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\shared_ws.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\shared_ws.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\router.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\router.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\dependencies\\orchestrator.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\dependencies\\orchestrator.py", "functions": [ { "name": "get_orchestrator", @@ -2648,7 +2648,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\endpoints\\calls.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\endpoints\\calls.py", "functions": [ { "name": "create_call_event", @@ -2684,7 +2684,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\endpoints\\health.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\endpoints\\health.py", "functions": [ { "name": "_validate_phone_number", @@ -2806,7 +2806,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\endpoints\\media.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\endpoints\\media.py", "functions": [ { "name": "_log_websocket_disconnect", @@ -2871,7 +2871,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\endpoints\\realtime.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\endpoints\\realtime.py", "functions": [ { "name": "_log_dashboard_disconnect", @@ -3022,13 +3022,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\endpoints\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\endpoints\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\events\\demo.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\events\\demo.py", "functions": [ { "name": "create_webhook_handler_example", @@ -3098,7 +3098,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\events\\handlers.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\events\\handlers.py", "functions": [ { "name": "_extract_caller_id", @@ -3787,7 +3787,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\events\\processor.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\events\\processor.py", "functions": [ { "name": "get_call_event_processor", @@ -4283,7 +4283,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\events\\registration.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\events\\registration.py", "functions": [ { "name": "register_default_handlers", @@ -4335,7 +4335,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\events\\types.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\events\\types.py", "functions": [ { "name": "get_event_data", @@ -4518,13 +4518,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\events\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\events\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\handlers\\acs_call_lifecycle.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\handlers\\acs_call_lifecycle.py", "functions": [ { "name": "safe_set_span_attributes", @@ -5089,7 +5089,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\handlers\\acs_media_lifecycle.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\handlers\\acs_media_lifecycle.py", "functions": [ { "name": "__init__", @@ -6441,7 +6441,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\handlers\\realtime.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\handlers\\realtime.py", "functions": [ { "name": "create_v1_realtime_handler", @@ -6632,7 +6632,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\models\\base.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\models\\base.py", "functions": [ { "name": "update_timestamp", @@ -6721,7 +6721,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\models\\call.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\models\\call.py", "functions": [], "classes": [ { @@ -6778,7 +6778,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\models\\event.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\models\\event.py", "functions": [], "classes": [ { @@ -6835,7 +6835,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\models\\participant.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\models\\participant.py", "functions": [], "classes": [ { @@ -6878,13 +6878,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\models\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\models\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\call.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\call.py", "functions": [], "classes": [ { @@ -7029,7 +7029,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\event.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\event.py", "functions": [], "classes": [ { @@ -7198,7 +7198,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\health.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\health.py", "functions": [], "classes": [ { @@ -7274,7 +7274,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\media.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\media.py", "functions": [], "classes": [ { @@ -7488,7 +7488,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\participant.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\participant.py", "functions": [], "classes": [ { @@ -7610,7 +7610,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\realtime.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\realtime.py", "functions": [], "classes": [ { @@ -7737,7 +7737,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\webhook.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\webhook.py", "functions": [], "classes": [ { @@ -7882,13 +7882,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\schemas\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\schemas\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\utils\\simple_events.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\utils\\simple_events.py", "functions": [ { "name": "get_simple_health_status", @@ -7927,7 +7927,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\handlers\\back\\acs_media_handler.back.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\handlers\\back\\acs_media_handler.back.py", "functions": [ { "name": "get_current_time", @@ -9004,7 +9004,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\api\\v1\\handlers\\back\\legacy_acs_lifecycle.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\api\\v1\\handlers\\back\\legacy_acs_lifecycle.py", "functions": [ { "name": "get_current_time", @@ -9365,7 +9365,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\base.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\base.py", "functions": [ { "name": "__init__", @@ -9559,13 +9559,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\factories\\stt_factory.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\factories\\stt_factory.py", "functions": [ { "name": "create_stt_recognizer", @@ -9585,7 +9585,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\handlers\\acs_handler.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\handlers\\acs_handler.py", "functions": [ { "name": "_get_participant_phone", @@ -10151,7 +10151,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\handlers\\acs_media_handler.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\handlers\\acs_media_handler.py", "functions": [ { "name": "__init__", @@ -10931,7 +10931,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\handlers\\acs_transcript_handler.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\handlers\\acs_transcript_handler.py", "functions": [ { "name": "__init__", @@ -11151,13 +11151,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\handlers\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\handlers\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\latency\\latency_tool.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\latency\\latency_tool.py", "functions": [ { "name": "__init__", @@ -11365,13 +11365,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\latency\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\latency\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\orchestration\\gpt_flow.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\orchestration\\gpt_flow.py", "functions": [ { "name": "_get_agent_voice_config", @@ -11481,7 +11481,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\orchestration\\orchestrator.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\orchestration\\orchestrator.py", "functions": [ { "name": "configure_entry_and_specialists", @@ -11812,13 +11812,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\orchestration\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\orchestration\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\routers\\acs.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\routers\\acs.py", "functions": [], "classes": [ { @@ -11836,7 +11836,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\routers\\health.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\routers\\health.py", "functions": [ { "name": "_validate_phone_number", @@ -11865,7 +11865,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\routers\\realtime.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\routers\\realtime.py", "functions": [ { "name": "on_partial", @@ -11916,19 +11916,19 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\routers\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\routers\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\cosmosdb_services.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\cosmosdb_services.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\openai_services.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\openai_services.py", "functions": [ { "name": "create_azure_openai_client", @@ -11949,25 +11949,25 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\redis_services.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\redis_services.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\speech_services.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\speech_services.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\utils\\auth.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\utils\\auth.py", "functions": [ { "name": "get_jwks", @@ -12095,7 +12095,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\utils\\auth_utils.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\utils\\auth_utils.py", "functions": [ { "name": "decode_jwt_payload", @@ -12195,7 +12195,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\utils\\tracing.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\utils\\tracing.py", "functions": [ { "name": "create_span_attrs", @@ -12829,7 +12829,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\utils\\tracing_utils.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\utils\\tracing_utils.py", "functions": [ { "name": "create_span_attrs", @@ -12957,7 +12957,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\prompt_store\\prompt_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\prompt_store\\prompt_manager.py", "functions": [ { "name": "__init__", @@ -13075,13 +13075,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\prompt_store\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\prompt_store\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\auth.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\auth.py", "functions": [], "classes": [ { @@ -13112,7 +13112,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\emergency.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\emergency.py", "functions": [], "classes": [ { @@ -13130,7 +13130,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\fnol.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\fnol.py", "functions": [ { "name": "_new_claim_id", @@ -13238,7 +13238,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\functions_helper.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\functions_helper.py", "functions": [ { "name": "_json", @@ -13267,7 +13267,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\handoffs.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\handoffs.py", "functions": [], "classes": [ { @@ -13310,7 +13310,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\policies.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\policies.py", "functions": [ { "name": "_best_attr", @@ -13380,13 +13380,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\schemas.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\schemas.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\tools_helper.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\tools_helper.py", "functions": [ { "name": "_frame", @@ -13419,19 +13419,19 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\tool_registry.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\tool_registry.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\agents\\tool_store\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\agents\\tool_store\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\acs\\acs_caller.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\acs\\acs_caller.py", "functions": [ { "name": "initialize_acs_caller_instance", @@ -13453,7 +13453,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\acs\\acs_helpers.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\acs\\acs_helpers.py", "functions": [ { "name": "construct_websocket_url", @@ -13513,7 +13513,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\acs\\session_terminator.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\acs\\session_terminator.py", "functions": [ { "name": "_get_disconnect_event", @@ -13616,13 +13616,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\apps\\rtagent\\backend\\src\\services\\acs\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\apps\\rtagent\\backend\\src\\services\\acs\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\devops\\security\\bandit_to_sarif.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\devops\\security\\bandit_to_sarif.py", "functions": [ { "name": "bandit_to_sarif", @@ -13667,7 +13667,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\devops\\security\\run_bandit.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\devops\\security\\run_bandit.py", "functions": [ { "name": "utc_stamp", @@ -13718,7 +13718,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\devops\\scripts\\azd\\helpers\\acs_phone_number_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\devops\\scripts\\azd\\helpers\\acs_phone_number_manager.py", "functions": [ { "name": "get_azd_env_value", @@ -13807,7 +13807,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\acs\\acs_helper.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\acs\\acs_helper.py", "functions": [ { "name": "_endpoint_host_from_client", @@ -14278,13 +14278,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\acs\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\acs\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\memoriesbuilder.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\memoriesbuilder.py", "functions": [ { "name": "__init__", @@ -14442,7 +14442,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\playback_queue.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\playback_queue.py", "functions": [ { "name": "__init__", @@ -14774,7 +14774,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\types.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\types.py", "functions": [ { "name": "__init__", @@ -15510,7 +15510,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\utils.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\utils.py", "functions": [ { "name": "__init__", @@ -15663,13 +15663,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\aoai\\audio_util.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\aoai\\audio_util.py", "functions": [ { "name": "audio_to_pcm16_base64", @@ -16071,7 +16071,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\aoai\\manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\aoai\\manager.py", "functions": [ { "name": "_is_aoai_tracing_enabled", @@ -16987,7 +16987,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\aoai\\manager_transcribe.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\aoai\\manager_transcribe.py", "functions": [ { "name": "__init__", @@ -17721,7 +17721,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\aoai\\push_to_talk.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\aoai\\push_to_talk.py", "functions": [ { "name": "render", @@ -18047,13 +18047,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\aoai\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\aoai\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\blob\\blob_helper.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\blob\\blob_helper.py", "functions": [ { "name": "get_blob_helper", @@ -18583,13 +18583,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\blob\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\blob\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\cosmosdb\\manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\cosmosdb\\manager.py", "functions": [ { "name": "__init__", @@ -19097,13 +19097,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\cosmosdb\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\cosmosdb\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\enums\\monitoring.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\enums\\monitoring.py", "functions": [], "classes": [ { @@ -19121,7 +19121,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\enums\\stream_modes.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\enums\\stream_modes.py", "functions": [ { "name": "__str__", @@ -19327,13 +19327,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\enums\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\enums\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\pools\\async_pool.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\pools\\async_pool.py", "functions": [ { "name": "__init__", @@ -19490,7 +19490,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\pools\\session_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\pools\\session_manager.py", "functions": [ { "name": "__init__", @@ -19697,7 +19697,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\pools\\session_metrics.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\pools\\session_metrics.py", "functions": [ { "name": "__init__", @@ -19839,7 +19839,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\pools\\websocket_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\pools\\websocket_manager.py", "functions": [ { "name": "__init__", @@ -20009,13 +20009,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\pools\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\pools\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\postcall\\push.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\postcall\\push.py", "functions": [ { "name": "build_and_flush", @@ -20046,13 +20046,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\postcall\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\postcall\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\prompts\\prompt_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\prompts\\prompt_manager.py", "functions": [ { "name": "__init__", @@ -20252,13 +20252,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\prompts\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\prompts\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\redis\\manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\redis\\manager.py", "functions": [ { "name": "is_connected", @@ -21130,13 +21130,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\redis\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\redis\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\speech\\conversation_recognizer.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\speech\\conversation_recognizer.py", "functions": [ { "name": "__init__", @@ -21832,7 +21832,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\speech\\speech_recognizer.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\speech\\speech_recognizer.py", "functions": [ { "name": "__init__", @@ -22732,7 +22732,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\speech\\text_to_speech.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\speech\\text_to_speech.py", "functions": [ { "name": "split_sentences", @@ -23810,7 +23810,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\speech\\utils_audio.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\speech\\utils_audio.py", "functions": [ { "name": "check_audio_file", @@ -23858,13 +23858,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\speech\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\speech\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\stateful\\state_managment.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\stateful\\state_managment.py", "functions": [ { "name": "__init__", @@ -26123,13 +26123,13 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\stateful\\__init__.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\stateful\\__init__.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\vad\\vad_iterator.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\vad\\vad_iterator.py", "functions": [ { "name": "int2float", @@ -26359,19 +26359,19 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\prompts\\prompt_gpt_summarize.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\prompts\\prompt_gpt_summarize.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\agenticmemory\\prompts\\prompt_voice_chat.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\agenticmemory\\prompts\\prompt_voice_chat.py", "functions": [], "classes": [], "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\redis\\legacy\\async_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\redis\\legacy\\async_manager.py", "functions": [ { "name": "__init__", @@ -26804,7 +26804,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\redis\\legacy\\key_manager.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\redis\\legacy\\key_manager.py", "functions": [ { "name": "get_key_manager", @@ -27440,7 +27440,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\redis\\legacy\\models.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\redis\\legacy\\models.py", "functions": [], "classes": [ { @@ -27480,7 +27480,7 @@ "issues": [] }, { - "file_path": "C:\\Users\\pablosal\\Desktop\\gbb-ai-audio-agent\\src\\redis\\legacy\\__backup.py", + "file_path": "C:\\Users\\pablosal\\Desktop\\art-voice-agent-accelerator\\src\\redis\\legacy\\__backup.py", "functions": [ { "name": "__init__",