diff --git a/.env.sample b/.env.sample
index 62e8d5bd..cacd4371 100644
--- a/.env.sample
+++ b/.env.sample
@@ -65,7 +65,6 @@ AZURE_OPENAI_STT_TTS_KEY=your-azure-speech-service-key                   # Optio
 # Azure Voice Live Integration (Optional - for Azure Voice Live API)
 # ============================================================================
 AZURE_VOICE_LIVE_ENDPOINT=https://your-voice-live-endpoint.voice.azure.com/  # Optional: Azure Voice Live API endpoint
-AZURE_VOICE_LIVE_KEY=optional-key                                        # Optional: Azure Voice Live API key
 AZURE_VOICE_LIVE_API_KEY=your-voice-live-api-key                         # Optional: Alternative API key name
 AZURE_VOICE_LIVE_MODEL=gpt-4o                                            # Optional: Voice Live model deployment (default: gpt-4o)
 AZURE_VOICE_LIVE_API_VERSION=2024-10-01-preview                          # Optional: Voice Live API version
@@ -73,7 +72,7 @@ AZURE_VOICE_LIVE_API_VERSION=2024-10-01-preview                          # Optio
 # Azure AI Foundry Integration (Optional)
 AZURE_AI_FOUNDRY_ENDPOINT=https://your-foundry-endpoint.services.ai.azure.com/api/projects/your-project  # Optional: AI Foundry project endpoint
 AI_FOUNDRY_PROJECT_NAME=your-ai-foundry-project                          # Optional: AI Foundry project name
-
+AI_FOUNDRY_AGENT_ID=your-ai-foundry-agent-id                            # Optional: AI Foundry agent ID
 # ============================================================================
 # Base URL Configuration (Required for Webhooks)
 # ============================================================================
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 792ebd75..17283d58 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -35,6 +35,11 @@ jobs:
         with:
           python-version: '3.11'
 
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y portaudio19-dev
+
       - name: Cache dependencies
         uses: actions/cache@v4
         with:
diff --git a/Makefile b/Makefile
index 2bd6b29b..61667658 100644
--- a/Makefile
+++ b/Makefile
@@ -124,14 +124,15 @@ run_load_test:
 	$(eval CONCURRENT ?= 20)
 	$(eval RECORD ?= )
 	$(eval RECORD_RATE ?= 0.2)
-	@python $(SCRIPTS_LOAD_DIR)/detailed_statistics_analyzer.py \
-		--url $(URL) \
-		--turns $(TURNS) \
-		--conversations $(CONVERSATIONS) \
-		--concurrent $(CONCURRENT) \
-		$(if $(RECORD),--record) \
-		$(if $(RECORD_RATE),--record-rate $(RECORD_RATE)) \
-		$(EXTRA_ARGS)
+	@locust -f $(SCRIPTS_LOAD_DIR)/locustfile.py --headless -u $(CONVERSATIONS) -r $(CONCURRENT) --run-time 10m --host $(URL) --stop-timeout 60 --csv=locust_report --only-summary
+# 	@python $(SCRIPTS_LOAD_DIR)/detailed_statistics_analyzer.py \
+# 		--url $(URL) \
+# 		--turns $(TURNS) \
+# 		--conversations $(CONVERSATIONS) \
+# 		--concurrent $(CONCURRENT) \
+# 		$(if $(RECORD),--record) \
+# 		$(if $(RECORD_RATE),--record-rate $(RECORD_RATE)) \
+# 		$(EXTRA_ARGS)
 
 # Conversation Analysis Targets
 list-conversations:
@@ -142,484 +143,11 @@ FILE_TO_ANALYZE = tests\load\results\recorded_conversations_20250829_085350.json
 playback-conversations:
 	python $(SCRIPTS_LOAD_DIR)/conversation_playback.py --conversation-file $(FILE_TO_ANALYZE)
 
-# ACS call load testing (real phone calls - requires phone numbers)
-run_acs_call_load_test:
-	@echo "⚠️  WARNING: This will initiate real ACS phone calls!"
-	@echo "⚠️  Make sure you have test numbers and sufficient credits!"
-	@echo "⚠️  Press Ctrl+C to cancel in the next 5 seconds..."
-	@sleep 5
-	python $(SCRIPTS_LOAD_DIR)/acs_call_load_test.py
-
-# Development ACS testing (single call to specified phone)
-run_acs_dev_test:
-	python $(SCRIPTS_LOAD_DIR)/acs_call_load_test_dev.py --environment dev --target-phones $(PHONE)
-
-# Staging ACS testing (5 calls)
-run_acs_staging_test:
-	python $(SCRIPTS_LOAD_DIR)/acs_call_load_test_dev.py --environment staging --target-phones $(PHONE)
-
-############################################################
-# Azure App Service Deployment Artifacts
-# Purpose: Generate build artifacts and deployment packages
-############################################################
-
-# Directories and files to include in backend deployment
-BACKEND_DIRS = src utils apps/rtagent/backend
-BACKEND_FILES = requirements.txt .deploy/runtime.txt .deploy/.python-version
-EXCLUDE_PATTERNS = __pycache__ *.pyc .pytest_cache *.log .coverage htmlcov .DS_Store .git node_modules *.tmp *.temp dist .env
-DEPLOY_DIR = .deploy/backend
-TIMESTAMP = $(shell date +%Y%m%d_%H%M%S)
-GIT_HASH = $(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown")
-DEPLOY_ZIP = backend_deployment_$(GIT_HASH)_$(TIMESTAMP).zip
-
-# Generate frontend deployment artifacts
-generate_frontend_deployment:
-	@echo "🏗️  Generating Frontend Deployment Artifacts"
-	@echo "=============================================="
-	@echo ""
-
-	# Clean and create deployment directory
-	@echo "🧹 Cleaning previous frontend deployment artifacts..."
-	@rm -rf .deploy/frontend
-	@mkdir -p .deploy/frontend
-
-	# Copy frontend directory
-	@echo "📦 Copying frontend directory..."
-	@if [ -d "apps/rtagent/frontend" ]; then \
-		rsync -av --exclude='node_modules' --exclude='.env' --exclude='.DS_Store' apps/rtagent/frontend/ .deploy/frontend/; \
-	else \
-		echo "   ❌ Error: apps/rtagent/frontend directory not found."; \
-		exit 1; \
-	fi
-
-	# Create deployment zip
-	@FRONTEND_DEPLOY_ZIP=frontend_deployment_$(GIT_HASH)_$(TIMESTAMP).zip; \
-	echo "📦 Creating deployment zip: $$FRONTEND_DEPLOY_ZIP"; \
-	cd .deploy/frontend && zip -rq "../$$FRONTEND_DEPLOY_ZIP" .; \
-	echo ""; \
-	echo "✅ Frontend deployment artifacts generated successfully!"; \
-	echo "📊 Deployment Summary:"; \
-	echo "   📁 Artifacts directory: .deploy/frontend"; \
-	echo "   📦 Deployment package: .deploy/$$FRONTEND_DEPLOY_ZIP"; \
-	echo "   📏 Package size: $$(du -h .deploy/$$FRONTEND_DEPLOY_ZIP | cut -f1)"; \
-	echo "   🔢 Git commit: $(GIT_HASH)"; \
-	echo "   🕐 Timestamp: $(TIMESTAMP)"; \
-	echo ""; \
-	echo "🚀 Ready for Azure App Service deployment!"
-
-# Generate backend deployment artifacts
-generate_backend_deployment:
-	@echo "🏗️  Generating Backend Deployment Artifacts"
-	@echo "=============================================="
-	@echo ""
-	# Clean and create deployment directory
-	@echo "🧹 Cleaning previous deployment artifacts..."
-	@rm -rf $(DEPLOY_DIR)
-	@mkdir -p $(DEPLOY_DIR)
-	
-	# Copy backend directories with exclusions
-	@echo "📦 Copying backend directories..."
-	@echo "$(EXCLUDE_PATTERNS)" | tr ' ' '\n' > .deploy-excludes.tmp
-	@for dir in $(BACKEND_DIRS); do \
-		if [ -d "$$dir" ]; then \
-			echo "   Copying: $$dir"; \
-			rsync -av --exclude-from=.deploy-excludes.tmp "$$dir/" "$(DEPLOY_DIR)/$$dir/"; \
-		else \
-			echo "   ⚠️  Warning: Directory not found: $$dir"; \
-		fi \
-	done
-	@rm -f .deploy-excludes.tmp
-	
-	# Copy required files
-	@echo "📄 Copying required files..."
-	@for file in $(BACKEND_FILES); do \
-		if [ -f "$$file" ]; then \
-			echo "   Copying: $$file"; \
-			mkdir -p "$(DEPLOY_DIR)/$$(dirname "$$file")"; \
-			cp "$$file" "$(DEPLOY_DIR)/$$file"; \
-		else \
-			echo "   ❌ Error: Required file missing: $$file"; \
-			exit 1; \
-		fi \
-	done
-
-	# Copy runtime files to root for Oryx detection, create if missing
-	@echo "🐍 Setting up Python runtime configuration..."
-	@if [ -f ".deploy/runtime.txt" ]; then \
-		cp ".deploy/runtime.txt" "$(DEPLOY_DIR)/runtime.txt"; \
-		echo "   ✅ Copied runtime.txt to deployment root"; \
-	else \
-		echo "python-3.11" > "$(DEPLOY_DIR)/runtime.txt"; \
-		echo "   ⚠️  .deploy/runtime.txt not found, created default runtime.txt"; \
-	fi
-	@if [ -f ".deploy/.python-version" ]; then \
-		cp ".deploy/.python-version" "$(DEPLOY_DIR)/.python-version"; \
-		echo "   ✅ Copied .python-version to deployment root"; \
-	else \
-		echo "3.11" > "$(DEPLOY_DIR)/.python-version"; \
-		echo "   ⚠️  .deploy/.python-version not found, created default .python-version"; \
-	fi
-	
-	# Create deployment zip
-	@echo "📦 Creating deployment zip: $(DEPLOY_ZIP)"
-	@cd $(DEPLOY_DIR) && zip -rq "../$(DEPLOY_ZIP)" . \
-		$(foreach pattern,$(EXCLUDE_PATTERNS),-x "$(pattern)")
-	
-	# Show deployment summary
-	@echo ""
-	@echo "✅ Backend deployment artifacts generated successfully!"
-	@echo "📊 Deployment Summary:"
-	@echo "   📁 Artifacts directory: $(DEPLOY_DIR)"
-	@echo "   📦 Deployment package: .deploy/$(DEPLOY_ZIP)"
-	@echo "   📏 Package size: $$(du -h .deploy/$(DEPLOY_ZIP) | cut -f1)"
-	@echo "   🔢 Git commit: $(GIT_HASH)"
-	@echo "   🕐 Timestamp: $(TIMESTAMP)"
-	@echo ""
-	@echo "🚀 Ready for Azure App Service deployment!"
-
-
-# Clean deployment artifacts
-clean_deployment_artifacts:
-	@echo "🧹 Cleaning deployment artifacts..."
-	@rm -rf .deploy/backend
-	@rm -f .deploy/backend_deployment_*.zip
-	@echo "✅ Deployment artifacts cleaned"
-
-
-# Show deployment package info
-show_deployment_info:
-	@echo "📊 Deployment Package Information"
-	@echo "================================="
-	@echo ""
-	@if [ -d "$(DEPLOY_DIR)" ]; then \
-		echo "📁 Artifacts directory: $(DEPLOY_DIR)"; \
-		echo "📄 Directory contents:"; \
-		find $(DEPLOY_DIR) -type f | head -20 | sed 's/^/   /'; \
-		echo ""; \
-	else \
-		echo "❌ No deployment artifacts found. Run 'make generate_backend_deployment' first."; \
-	fi
-	@echo "📦 Available deployment packages:"
-	@ls -la .deploy/backend_deployment_*.zip 2>/dev/null | sed 's/^/   /' || echo "   No deployment packages found"
-
-
 # Run pylint on all Python files (excluding tests), output to report file
 run_pylint:
 	@echo "Running linter"
 	find . -type f -name "*.py" ! -path "./tests/*" | xargs pylint -disable=logging-fstring-interpolation > utils/pylint_report/pylint_report.txt
 
-
-############################################################
-# Terraform State to Environment File
-# Purpose: Extract values from Terraform remote state and create local .env file
-############################################################
-
-# Environment variables for Terraform state extraction
-AZURE_ENV_NAME ?= dev
-# Automatically set AZURE_SUBSCRIPTION_ID from Azure CLI if not provided
-AZURE_SUBSCRIPTION_ID ?= $(shell az account show --query id -o tsv 2>/dev/null)
-TF_DIR = infra/terraform
-ENV_FILE = .env.$(AZURE_ENV_NAME)
-
-# Generate environment file from Terraform remote state outputs
-generate_env_from_terraform:
-	@echo "🔧 Generating Environment File from Terraform State"
-	@echo "============================================================"
-	@./devops/scripts/generate-env-from-terraform.sh $(AZURE_ENV_NAME) $(AZURE_SUBSCRIPTION_ID) generate
-
-# Check if Terraform is initialized (now handled by script)
-check_terraform_initialized:
-	@echo "⚠️  Note: Terraform initialization check is now handled by the generation script"
-
-# Show current environment file (if it exists)
-show_env_file:
-	@./devops/scripts/generate-env-from-terraform.sh $(AZURE_ENV_NAME) $(AZURE_SUBSCRIPTION_ID) show
-
-# Extract sensitive values from Azure Key Vault and update environment file
-update_env_with_secrets:
-	@echo "🔧 Updating Environment File with Key Vault Secrets"
-	@echo "============================================================"
-	@./devops/scripts/generate-env-from-terraform.sh $(AZURE_ENV_NAME) $(AZURE_SUBSCRIPTION_ID) update-secrets
-
-# Generate environment file from Terraform remote state outputs (PowerShell)
-generate_env_from_terraform_ps:
-	@echo "🔧 Generating Environment File from Terraform State (PowerShell)"
-	@echo "============================================================"
-	@powershell -ExecutionPolicy Bypass -File devops/scripts/Generate-EnvFromTerraform.ps1 -EnvironmentName $(AZURE_ENV_NAME) -SubscriptionId $(AZURE_SUBSCRIPTION_ID) -Action generate
-
-# Show current environment file (PowerShell)
-show_env_file_ps:
-	@powershell -ExecutionPolicy Bypass -File devops/scripts/Generate-EnvFromTerraform.ps1 -EnvironmentName $(AZURE_ENV_NAME) -SubscriptionId $(AZURE_SUBSCRIPTION_ID) -Action show
-
-# Update environment file with Key Vault secrets (PowerShell)
-update_env_with_secrets_ps:
-	@echo "🔧 Updating Environment File with Key Vault Secrets (PowerShell)"
-	@echo "============================================================"
-	@powershell -ExecutionPolicy Bypass -File devops/scripts/Generate-EnvFromTerraform.ps1 -EnvironmentName $(AZURE_ENV_NAME) -SubscriptionId $(AZURE_SUBSCRIPTION_ID) -Action update-secrets
-
-# Deploy a user-provided directory to Azure Web App using Azure CLI
-# Usage: make deploy_to_webapp WEBAPP_NAME=<your-app-name> DEPLOY_DIR=<directory-to-deploy>
-deploy_to_webapp:
-	@if [ -z "$(WEBAPP_NAME)" ]; then \
-		if [ -f ".env" ]; then \
-			WEBAPP_NAME_ENV=$$(grep '^BACKEND_APP_SERVICE_URL=' .env | cut -d'=' -f2 | sed 's|https\?://||;s|/.*||'); \
-			if [ -n "$$WEBAPP_NAME_ENV" ]; then \
-				echo "ℹ️  Using BACKEND_APP_SERVICE_URL from .env: $$WEBAPP_NAME_ENV"; \
-				WEBAPP_NAME=$$WEBAPP_NAME_ENV; \
-			else \
-				echo "❌ WEBAPP_NAME not set and BACKEND_APP_SERVICE_URL not found in .env"; \
-				exit 1; \
-			fi \
-		else \
-			echo "❌ WEBAPP_NAME not set and .env file not found"; \
-			exit 1; \
-		fi \
-	fi
-	@if [ -z "$(DEPLOY_DIR)" ]; then \
-		echo "❌ Usage: make deploy_to_webapp WEBAPP_NAME=<your-app-name> DEPLOY_DIR=<directory-to-deploy> AZURE_RESOURCE_GROUP=<your-resource-group>"; \
-		exit 1; \
-	fi
-	@if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \
-		if [ -f ".env" ]; then \
-			RESOURCE_GROUP_ENV=$$(grep '^AZURE_RESOURCE_GROUP=' .env | cut -d'=' -f2); \
-			if [ -n "$$RESOURCE_GROUP_ENV" ]; then \
-				echo "ℹ️  Using AZURE_RESOURCE_GROUP from .env: $$RESOURCE_GROUP_ENV"; \
-				AZURE_RESOURCE_GROUP=$$RESOURCE_GROUP_ENV; \
-			else \
-				echo "❌ AZURE_RESOURCE_GROUP not set and not found in .env"; \
-				exit 1; \
-			fi \
-		else \
-			echo "❌ AZURE_RESOURCE_GROUP not set and .env file not found"; \
-			exit 1; \
-		fi \
-	fi
-	@echo "🚀 Deploying '$(DEPLOY_DIR)' to Azure Web App '$(WEBAPP_NAME)' in resource group '$(AZURE_RESOURCE_GROUP)'"
-	@echo "⏳ Note: Large deployments may take 10+ minutes. Please be patient..."
-	@echo ""
-	@echo "📊 Deployment Progress Monitor:"
-	@echo "   🌐 Azure Portal: https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)/deploymentCenter"
-	@echo "   📋 Deployment Logs: https://$(WEBAPP_NAME).scm.azurewebsites.net/api/deployments/latest/log"
-	@echo ""
-	@set -e; \
-	if az webapp deploy --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME) --src-path $(DEPLOY_DIR) --type zip; then \
-		DEPLOY_EXIT_CODE=$$?; \
-		echo ""; \
-		echo "⚠️  Deployment command timed out or encountered an issue (exit code: $$DEPLOY_EXIT_CODE)"; \
-		echo ""; \
-		if [ "$$DEPLOY_EXIT_CODE" = "124" ]; then \
-			echo "🔄 TIMEOUT NOTICE: The deployment is likely still in progress in the background."; \
-			echo "   The 15-minute timeout is a safety measure to prevent hanging builds."; \
-			echo ""; \
-		fi; \
-		echo "📋 Next Steps:"; \
-		echo "   1. 🔍 Check deployment status in Azure Portal:"; \
-		echo "      https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)/deploymentCenter"; \
-		echo ""; \
-		echo "   2. 📊 Monitor real-time logs via VS Code:"; \
-		echo "      • Install 'Azure App Service' extension"; \
-		echo "      • Right-click on '$(WEBAPP_NAME)' → 'Start Streaming Logs'"; \
-		echo "      • Or use Command Palette: 'Azure App Service: Start Streaming Logs'"; \
-		echo ""; \
-		echo "   3. 🖥️  Monitor logs via Azure CLI:"; \
-		echo "      az webapp log tail --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME)"; \
-		echo ""; \
-		echo "   4. 🌐 Check deployment logs directly:"; \
-		echo "      https://$(WEBAPP_NAME).scm.azurewebsites.net/api/deployments/latest/log"; \
-		echo ""; \
-		echo "   5. 🔄 If deployment fails, retry with:"; \
-		echo "      make deploy_to_webapp WEBAPP_NAME=$(WEBAPP_NAME) DEPLOY_DIR=$(DEPLOY_DIR) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP)"; \
-		echo ""; \
-		echo "💡 Pro Tip: Large Node.js builds (like Vite) typically take 5-15 minutes."; \
-		echo "   The site may show 'Application Error' until build completes."; \
-		exit $$DEPLOY_EXIT_CODE; \
-	else \
-		echo ""; \
-		echo "✅ Deployment command completed successfully!"; \
-		echo "🌐 Your app should be available at: https://$(WEBAPP_NAME).azurewebsites.net"; \
-		echo ""; \
-		echo "📋 Post-Deployment Verification:"; \
-		echo "   • Wait 2-3 minutes for app startup"; \
-		echo "   • Check app health: https://$(WEBAPP_NAME).azurewebsites.net/health (if available)"; \
-		echo "   • Monitor logs for any startup issues"; \
-		echo ""; \
-	fi
-
-# Deploy frontend to Azure Web App using Terraform outputs and deployment artifacts
-deploy_frontend:
-	@echo "🚀 Deploying frontend to Azure Web App using Terraform outputs"
-	$(MAKE) generate_frontend_deployment
-	$(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw FRONTEND_APP_SERVICE_NAME 2>/dev/null))
-	$(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null))
-	$(eval DEPLOY_ZIP := $(shell ls -1t .deploy/frontend_deployment_*.zip 2>/dev/null | head -n1))
-	@if [ -z "$(WEBAPP_NAME)" ]; then \
-		echo "❌ Could not determine frontend web app name from Terraform outputs."; \
-		exit 1; \
-	fi
-	@if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \
-		echo "❌ Could not determine resource group name from Terraform outputs."; \
-		exit 1; \
-	fi
-	@if [ -z "$(DEPLOY_ZIP)" ]; then \
-		echo "❌ No frontend deployment zip found. Run 'make generate_frontend_deployment' first."; \
-		exit 1; \
-	fi
-	$(MAKE) deploy_to_webapp WEBAPP_NAME=$(WEBAPP_NAME) DEPLOY_DIR=$(DEPLOY_ZIP) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP)
-
-# Deploy backend to Azure Web App using Terraform outputs and deployment artifacts
-deploy_backend:
-	@echo "🚀 Deploying backend to Azure Web App using Terraform outputs"
-	$(MAKE) generate_backend_deployment
-	$(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw BACKEND_APP_SERVICE_NAME 2>/dev/null))
-	$(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null))
-	$(eval DEPLOY_ZIP := $(shell ls -1t .deploy/backend_deployment_*.zip 2>/dev/null | head -n1))
-	@if [ -z "$(WEBAPP_NAME)" ]; then \
-		echo "❌ Could not determine backend web app name from Terraform outputs."; \
-		exit 1; \
-	fi
-	@if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \
-		echo "❌ Could not determine resource group name from Terraform outputs."; \
-		exit 1; \
-	fi
-	@if [ -z "$(DEPLOY_ZIP)" ]; then \
-		echo "❌ No backend deployment zip found. Run 'make generate_backend_deployment' first."; \
-		exit 1; \
-	fi
-	$(MAKE) deploy_to_webapp WEBAPP_NAME=$(WEBAPP_NAME) DEPLOY_DIR=$(DEPLOY_ZIP) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP)
-
-# Monitor deployment status and logs for any webapp
-# Usage: make monitor_deployment WEBAPP_NAME=<your-app-name> AZURE_RESOURCE_GROUP=<your-resource-group>
-monitor_deployment:
-	@if [ -z "$(WEBAPP_NAME)" ]; then \
-		echo "❌ Usage: make monitor_deployment WEBAPP_NAME=<your-app-name> AZURE_RESOURCE_GROUP=<your-resource-group>"; \
-		exit 1; \
-	fi
-	@if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \
-		if [ -f ".env" ]; then \
-			RESOURCE_GROUP_ENV=$$(grep '^AZURE_RESOURCE_GROUP=' .env | cut -d'=' -f2); \
-			if [ -n "$$RESOURCE_GROUP_ENV" ]; then \
-				echo "ℹ️  Using AZURE_RESOURCE_GROUP from .env: $$RESOURCE_GROUP_ENV"; \
-				AZURE_RESOURCE_GROUP=$$RESOURCE_GROUP_ENV; \
-			else \
-				echo "❌ AZURE_RESOURCE_GROUP not set and not found in .env"; \
-				exit 1; \
-			fi \
-		else \
-			echo "❌ AZURE_RESOURCE_GROUP not set and .env file not found"; \
-			exit 1; \
-		fi \
-	fi
-	@echo "📊 Monitoring Azure Web App: $(WEBAPP_NAME)"
-	@echo "============================================="
-	@echo ""
-	@echo "🔍 App Service Status:"
-	@az webapp show --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME) --query "{name:name,state:state,defaultHostName:defaultHostName,lastModifiedTime:lastModifiedTime}" --output table 2>/dev/null || echo "❌ Could not retrieve app status"
-	@echo ""
-	@echo "📋 Recent Deployment Status:"
-	@az webapp deployment list --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME) --query "[0].{status:status,deploymentId:id,startTime:startTime,endTime:endTime,message:message}" --output table 2>/dev/null || echo "❌ Could not retrieve deployment status"
-	@echo ""
-	@echo "🌐 Useful Links:"
-	@echo "   • App URL: https://$(WEBAPP_NAME).azurewebsites.net"
-	@echo "   • Azure Portal: https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)"
-	@echo "   • Deployment Center: https://portal.azure.com/#@/resource/subscriptions/$(shell az account show --query id -o tsv 2>/dev/null)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Web/sites/$(WEBAPP_NAME)/deploymentCenter"
-	@echo "   • Kudu Console: https://$(WEBAPP_NAME).scm.azurewebsites.net"
-	@echo "   • Deployment Logs: https://$(WEBAPP_NAME).scm.azurewebsites.net/api/deployments/latest/log"
-	@echo ""
-	@echo "📊 VS Code Log Streaming:"
-	@echo "   1. Install 'Azure App Service' extension"
-	@echo "   2. Sign in to Azure account"
-	@echo "   3. Right-click '$(WEBAPP_NAME)' → 'Start Streaming Logs'"
-	@echo "   4. Or use Command Palette: 'Azure App Service: Start Streaming Logs'"
-	@echo ""
-	@echo "🖥️  CLI Log Streaming (run in separate terminal):"
-	@echo "   az webapp log tail --resource-group $(AZURE_RESOURCE_GROUP) --name $(WEBAPP_NAME)"
-	@echo ""
-
-# Stream logs for backend app service
-monitor_backend_deployment:
-	@echo "📊 Monitoring Backend Deployment"
-	$(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw BACKEND_APP_SERVICE_NAME 2>/dev/null))
-	$(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null))
-	@if [ -z "$(WEBAPP_NAME)" ]; then \
-		echo "❌ Could not determine backend web app name from Terraform outputs."; \
-		exit 1; \
-	fi
-	@if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \
-		echo "❌ Could not determine resource group name from Terraform outputs."; \
-		exit 1; \
-	fi
-	$(MAKE) monitor_deployment WEBAPP_NAME=$(WEBAPP_NAME) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP)
-
-# Stream logs for frontend app service  
-monitor_frontend_deployment:
-	@echo "📊 Monitoring Frontend Deployment"
-	$(eval WEBAPP_NAME := $(shell terraform -chdir=$(TF_DIR) output -raw FRONTEND_APP_SERVICE_NAME 2>/dev/null))
-	$(eval AZURE_RESOURCE_GROUP := $(shell terraform -chdir=$(TF_DIR) output -raw AZURE_RESOURCE_GROUP 2>/dev/null))
-	@if [ -z "$(WEBAPP_NAME)" ]; then \
-		echo "❌ Could not determine frontend web app name from Terraform outputs."; \
-		exit 1; \
-	fi
-	@if [ -z "$(AZURE_RESOURCE_GROUP)" ]; then \
-		echo "❌ Could not determine resource group name from Terraform outputs."; \
-		exit 1; \
-	fi
-	$(MAKE) monitor_deployment WEBAPP_NAME=$(WEBAPP_NAME) AZURE_RESOURCE_GROUP=$(AZURE_RESOURCE_GROUP)
-
-.PHONY: generate_env_from_terraform check_terraform_initialized show_env_file update_env_with_secrets generate_env_from_terraform_ps show_env_file_ps update_env_with_secrets_ps monitor_deployment monitor_backend_deployment monitor_frontend_deployment
-
-
-############################################################
-# Azure Communication Services Phone Number Management
-# Purpose: Purchase and manage ACS phone numbers
-############################################################
-
-# Purchase ACS phone number and store in environment file
-# Usage: make purchase_acs_phone_number [ENV_FILE=custom.env] [COUNTRY_CODE=US] [AREA_CODE=833] [PHONE_TYPE=TOLL_FREE]
-purchase_acs_phone_number:
-	@echo "📞 Azure Communication Services - Phone Number Purchase"
-	@echo "======================================================"
-	@echo ""
-	# Set default parameters
-	$(eval ENV_FILE ?= .env.$(AZURE_ENV_NAME))
-	$(eval COUNTRY_CODE ?= US)
-	$(eval AREA_CODE ?= 866)
-	$(eval PHONE_TYPE ?= TOLL_FREE)
-
-	# Extract ACS endpoint from environment file
-	@echo "🔍 Extracting ACS endpoint from $(ENV_FILE)"
-	$(eval ACS_ENDPOINT := $(shell grep '^ACS_ENDPOINT=' $(ENV_FILE) | cut -d'=' -f2))
-
-	@if [ -z "$(ACS_ENDPOINT)" ]; then \
-		echo "❌ ACS_ENDPOINT not found in $(ENV_FILE). Please ensure the environment file contains ACS_ENDPOINT."; \
-		exit 1; \
-	fi
-
-	@echo "📞 Creating a new ACS phone number using Python script..."
-	python3 devops/scripts/azd/helpers/acs_phone_number_manager.py --endpoint $(ACS_ENDPOINT) purchase --country $(COUNTRY_CODE) --area $(AREA_CODE)  --phone-number-type $(PHONE_TYPE)
-
-# Purchase ACS phone number using PowerShell (Windows)	
-# Usage: make purchase_acs_phone_number_ps [ENV_FILE=custom.env] [COUNTRY_CODE=US] [AREA_CODE=833] [PHONE_TYPE=TOLL_FREE]
-purchase_acs_phone_number_ps:
-	@echo "📞 Azure Communication Services - Phone Number Purchase (PowerShell)"
-	@echo "=================================================================="
-	@echo ""
-	
-	# Set default parameters
-	$(eval ENV_FILE ?= .env.$(AZURE_ENV_NAME))
-	$(eval COUNTRY_CODE ?= US)
-	$(eval AREA_CODE ?= 866)
-	$(eval PHONE_TYPE ?= TOLL_FREE)
-	
-	# Execute the PowerShell script with parameters
-	@powershell -ExecutionPolicy Bypass -File devops/scripts/Purchase-AcsPhoneNumber.ps1 \
-		-EnvFile "$(ENV_FILE)" \
-		-AzureEnvName "$(AZURE_ENV_NAME)" \
-		-CountryCode "$(COUNTRY_CODE)" \
-		-AreaCode "$(AREA_CODE)" \
-		-PhoneType "$(PHONE_TYPE)" \
-		-TerraformDir "$(TF_DIR)"
-
-
 ############################################################
 # Azure Redis Management
 # Purpose: Connect to Azure Redis using Azure AD authentication
@@ -804,37 +332,6 @@ help:
 	@echo "⚡ Load Testing:"
 	@echo "  generate_audio                   Generate PCM audio files for load testing"
 	@echo "  run_load_test                    Run WebSocket endpoint load testing (safe)"
-	@echo "  run_acs_dev_test                 Run 1 ACS call to +8165019907 (development)"
-	@echo "  run_acs_staging_test             Run 5 ACS calls (staging environment)"
-	@echo "  run_acs_prod_test                Run 20 ACS calls (production testing)"
-	@echo "  show_acs_test_config             Show ACS test configurations without running"
-	@echo ""
-	@echo "📦 Deployment Artifacts:"
-	@echo "  generate_backend_deployment      Generate backend deployment artifacts and zip"
-	@echo "  generate_frontend_deployment     Generate frontend deployment artifacts and zip"
-	@echo "  clean_deployment_artifacts       Clean deployment artifacts"
-	@echo "  show_deployment_info             Show deployment package information"
-	@echo ""
-	@echo "🌐 Azure Web App Deployment:"
-	@echo "  deploy_backend                   Deploy backend to Azure App Service (using Terraform outputs)"
-	@echo "  deploy_frontend                  Deploy frontend to Azure App Service (using Terraform outputs)"
-	@echo "  deploy_to_webapp                 Generic Web App deployment (manual parameters)"
-	@echo "  monitor_deployment               Monitor any webapp deployment status and logs"
-	@echo "  monitor_backend_deployment       Monitor backend deployment (using Terraform outputs)"
-	@echo "  monitor_frontend_deployment      Monitor frontend deployment (using Terraform outputs)"
-	@echo ""
-	@echo "🏗️  Terraform Environment Management:"
-	@echo "  generate_env_from_terraform      Generate .env file from Terraform state (Bash)"
-	@echo "  generate_env_from_terraform_ps   Generate .env file from Terraform state (PowerShell)"
-	@echo "  show_env_file                    Display current environment file info (Bash)"
-	@echo "  show_env_file_ps                 Display current environment file info (PowerShell)"
-	@echo "  update_env_with_secrets          Update .env file with Key Vault secrets (Bash)"
-	@echo "  update_env_with_secrets_ps       Update .env file with Key Vault secrets (PowerShell)"
-	@echo "  check_terraform_initialized      Check if Terraform is properly initialized"
-	@echo ""
-	@echo "📞 Azure Communication Services:"
-	@echo "  purchase_acs_phone_number        Purchase ACS phone number and store in env file"
-	@echo "  purchase_acs_phone_number_ps     Purchase ACS phone number (PowerShell version)"
 	@echo ""
 	@echo "🔴 Azure Redis Management:"
 	@echo "  connect_redis                    Connect to Azure Redis using Azure AD authentication"
@@ -857,10 +354,6 @@ help:
 	@echo "  4. make generate_backend_deployment && make deploy_backend"
 	@echo "  5. make generate_frontend_deployment && make deploy_frontend"
 	@echo ""
-	@echo "💡 Quick Start for ACS Phone Number Purchase:"
-	@echo "  make purchase_acs_phone_number                    # Bash/Python version"
-	@echo "  make purchase_acs_phone_number_ps                # PowerShell version"
-	@echo ""
 	@echo "💡 Deployment Monitoring Tips:"
 	@echo "  • Large deployments may timeout after 15 minutes but continue in background"
 	@echo "  • Use monitor_deployment targets to check status during/after deployment"
diff --git a/apps/rtagent/backend/api/v1/events/demo.py b/apps/rtagent/backend/api/v1/events/demo.py
deleted file mode 100644
index 140d73de..00000000
--- a/apps/rtagent/backend/api/v1/events/demo.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""
-V1 Event Processor Demo
-======================
-
-Demonstrates how to use the new V1 Event Processor inspired by Azure's Event Processor pattern.
-This shows integration with legacy handlers and simplified event processing.
-"""
-
-import asyncio
-import json
-from azure.core.messaging import CloudEvent
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse
-
-# Import the V1 event system
-from apps.rtagent.backend.api.v1.events import (
-    get_call_event_processor,
-    register_default_handlers,
-    get_processor_stats,
-    get_active_calls,
-    ACSEventTypes,
-)
-
-
-async def demo_v1_event_processing():
-    """
-    Demo showing how to use the V1 Event Processor.
-    """
-    print("🚀 V1 Event Processor Demo")
-    print("=" * 50)
-
-    # 1. Register default handlers (adapted from legacy)
-    print("📋 Registering default handlers...")
-    register_default_handlers()
-
-    # 2. Get processor instance
-    processor = get_call_event_processor()
-
-    # 3. Show initial stats
-    print("📊 Initial processor stats:")
-    stats = get_processor_stats()
-    print(json.dumps(stats, indent=2))
-
-    # 4. Create sample CloudEvents (like from ACS webhook)
-    sample_events = [
-        CloudEvent(
-            source="azure.communication.callautomation",
-            type=ACSEventTypes.CALL_CONNECTED,
-            data={
-                "callConnectionId": "demo-call-123",
-                "callConnectionProperties": {"connectedTime": "2025-08-11T10:30:00Z"},
-            },
-        ),
-        CloudEvent(
-            source="azure.communication.callautomation",
-            type=ACSEventTypes.PARTICIPANTS_UPDATED,
-            data={
-                "callConnectionId": "demo-call-123",
-                "participants": [
-                    {
-                        "identifier": {
-                            "phoneNumber": {"value": "+1234567890"},
-                            "rawId": "4:+1234567890",
-                        }
-                    }
-                ],
-            },
-        ),
-        CloudEvent(
-            source="azure.communication.callautomation",
-            type=ACSEventTypes.DTMF_TONE_RECEIVED,
-            data={"callConnectionId": "demo-call-123", "tone": "1", "sequenceId": 1},
-        ),
-    ]
-
-    # 5. Process events through V1 processor
-    print("🔄 Processing sample events...")
-
-    # Mock request state (normally from FastAPI request.app.state)
-    class MockRequestState:
-        def __init__(self):
-            self.redis = None
-            self.acs_caller = None
-            self.clients = []
-
-    mock_state = MockRequestState()
-
-    # Process the events
-    result = await processor.process_events(sample_events, mock_state)
-
-    print("✅ Processing result:")
-    print(json.dumps(result, indent=2))
-
-    # 6. Show updated stats
-    print("📊 Updated processor stats:")
-    final_stats = get_processor_stats()
-    print(json.dumps(final_stats, indent=2))
-
-    # 7. Show active calls
-    print("📞 Active calls:")
-    active_calls = get_active_calls()
-    print(list(active_calls))
-
-    print("✅ Demo completed!")
-
-
-def create_webhook_handler_example():
-    """
-    Example of how to integrate V1 Event Processor with FastAPI webhook endpoint.
-    """
-    app = FastAPI()
-
-    @app.post("/webhook/acs-events")
-    async def handle_acs_webhook(request: Request):
-        """
-        Example webhook handler using V1 Event Processor.
-
-        This replaces the complex event registry with simple, direct processing.
-        """
-        try:
-            # Parse CloudEvents from webhook
-            events_data = await request.json()
-
-            # Convert to CloudEvent objects
-            cloud_events = []
-            for event_data in events_data:
-                cloud_event = CloudEvent(
-                    source="azure.communication.callautomation",
-                    type=event_data.get("eventType", "Unknown"),
-                    data=event_data.get("data", event_data),
-                )
-                cloud_events.append(cloud_event)
-
-            # Ensure handlers are registered
-            register_default_handlers()
-
-            # Process through V1 Event Processor
-            processor = get_call_event_processor()
-            result = await processor.process_events(cloud_events, request.app.state)
-
-            return JSONResponse(
-                {
-                    "status": "success",
-                    "processed": result.get("processed", 0),
-                    "api_version": "v1",
-                    "processor_type": "v1_event_processor",
-                }
-            )
-
-        except Exception as e:
-            return JSONResponse({"error": str(e), "api_version": "v1"}, status_code=500)
-
-    return app
-
-
-if __name__ == "__main__":
-    # Run the demo
-    asyncio.run(demo_v1_event_processing())
-
-    print("\n" + "=" * 50)
-    print("📖 Integration Example:")
-    print("See create_webhook_handler_example() for FastAPI integration")
-    print("Key benefits of V1 Event Processor:")
-    print("- ✅ Simple handler registration")
-    print("- ✅ Call correlation by callConnectionId")
-    print("- ✅ Direct integration with legacy handlers")
-    print("- ✅ No complex middleware or retry logic")
-    print("- ✅ Inspired by Azure's Event Processor pattern")
diff --git a/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py b/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py
index 40bf6e4b..0948c0ad 100644
--- a/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py
+++ b/apps/rtagent/backend/api/v1/handlers/acs_media_lifecycle.py
@@ -90,7 +90,7 @@ class ThreadBridge:
     Implements the non-blocking patterns described in the documentation.
     """
 
-    def __init__(self):
+    def __init__(self, call_connection_id: Optional[str] = None):
         """
         Initialize cross-thread communication bridge.
 
@@ -98,8 +98,9 @@ def __init__(self):
         :type main_loop: Optional[asyncio.AbstractEventLoop]
         """
         self.main_loop: Optional[asyncio.AbstractEventLoop] = None
+        self.call_connection_id = call_connection_id
         # Create shorthand for call connection ID (last 8 chars)
-        self.call_id_short = "unknown"
+        self.call_id_short = call_connection_id[-8:] if call_connection_id else "unknown"
 
     def set_main_loop(
         self, loop: asyncio.AbstractEventLoop, call_connection_id: str = None
@@ -454,6 +455,7 @@ class RouteTurnThread:
 
     def __init__(
         self,
+        call_connection_id: Optional[str],
         speech_queue: asyncio.Queue,
         orchestrator_func: Callable,
         memory_manager: Optional[MemoManager],
@@ -468,11 +470,8 @@ def __init__(
         self.running = False
         self._stopped = False
         # Get call ID shorthand from websocket if available
-        self.call_id_short = (
-            getattr(websocket, "_call_connection_id", "unknown")[-8:]
-            if hasattr(websocket, "_call_connection_id")
-            else "unknown"
-        )
+        self.call_connection_id = call_connection_id
+        self.call_id_short = call_connection_id[-8:] if call_connection_id else "unknown"
 
     async def start(self):
         """Start the route turn processing loop."""
@@ -917,10 +916,11 @@ def __init__(
 
         # Cross-thread communication
         self.speech_queue = asyncio.Queue(maxsize=10)
-        self.thread_bridge = ThreadBridge()
+        self.thread_bridge = ThreadBridge(call_connection_id=self.call_connection_id)
 
         # Initialize threads
         self.route_turn_thread = RouteTurnThread(
+            call_connection_id=self.call_connection_id,
             speech_queue=self.speech_queue,
             orchestrator_func=orchestrator_func,
             memory_manager=memory_manager,
diff --git a/apps/rtagent/backend/config/app_settings_new.py b/apps/rtagent/backend/config/app_settings_new.py
deleted file mode 100644
index c5579387..00000000
--- a/apps/rtagent/backend/config/app_settings_new.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""
-Application Settings
-===================
-
-Main configuration module that consolidates all settings from specialized
-configuration modules for easy access throughout the application.
-"""
-
-# Import all settings from specialized modules
-from .voice_config import *
-from .connection_config import *
-from .feature_flags import *
-from .ai_config import *
-from .security_config import *
-
-# ==============================================================================
-# VALIDATION FUNCTIONS
-# ==============================================================================
-
-
-def validate_app_settings():
-    """
-    Validate current application settings and return validation results.
-
-    Returns:
-        Dict containing validation status, issues, warnings, and settings count
-    """
-    issues = []
-    warnings = []
-
-    # Check critical pool settings
-    if POOL_SIZE_TTS < 1:
-        issues.append("POOL_SIZE_TTS must be at least 1")
-    elif POOL_SIZE_TTS < 10:
-        warnings.append(f"POOL_SIZE_TTS ({POOL_SIZE_TTS}) is quite low for production")
-
-    if POOL_SIZE_STT < 1:
-        issues.append("POOL_SIZE_STT must be at least 1")
-    elif POOL_SIZE_STT < 10:
-        warnings.append(f"POOL_SIZE_STT ({POOL_SIZE_STT}) is quite low for production")
-
-    # Check connection settings
-    if MAX_WEBSOCKET_CONNECTIONS < 1:
-        issues.append("MAX_WEBSOCKET_CONNECTIONS must be at least 1")
-    elif MAX_WEBSOCKET_CONNECTIONS > 1000:
-        warnings.append(
-            f"MAX_WEBSOCKET_CONNECTIONS ({MAX_WEBSOCKET_CONNECTIONS}) is very high"
-        )
-
-    # Check timeout settings
-    if CONNECTION_TIMEOUT_SECONDS < 60:
-        warnings.append(
-            f"CONNECTION_TIMEOUT_SECONDS ({CONNECTION_TIMEOUT_SECONDS}) is quite short"
-        )
-
-    # Check voice settings
-    if not GREETING_VOICE_TTS:
-        issues.append("GREETING_VOICE_TTS is empty")
-
-    # Count all settings from current module
-    import sys
-
-    current_module = sys.modules[__name__]
-    settings_count = len(
-        [
-            name
-            for name in dir(current_module)
-            if name.isupper() and not name.startswith("_")
-        ]
-    )
-
-    return {
-        "valid": len(issues) == 0,
-        "issues": issues,
-        "warnings": warnings,
-        "settings_count": settings_count,
-    }
-
-
-if __name__ == "__main__":
-    # Quick validation check
-    result = validate_app_settings()
-    print(f"App Settings Validation: {'✅ VALID' if result['valid'] else '❌ INVALID'}")
-
-    if result["issues"]:
-        print("Issues:")
-        for issue in result["issues"]:
-            print(f"  ❌ {issue}")
-
-    if result["warnings"]:
-        print("Warnings:")
-        for warning in result["warnings"]:
-            print(f"  ⚠️  {warning}")
-
-    print(f"Total settings: {result['settings_count']}")
diff --git a/apps/rtagent/backend/config/infrastructure.py b/apps/rtagent/backend/config/infrastructure.py
index 123ddfd9..c6e23156 100644
--- a/apps/rtagent/backend/config/infrastructure.py
+++ b/apps/rtagent/backend/config/infrastructure.py
@@ -64,7 +64,8 @@ def __str__(self):
 ACS_ENDPOINT: str = os.getenv("ACS_ENDPOINT", "")
 ACS_CONNECTION_STRING: str = os.getenv("ACS_CONNECTION_STRING", "")
 ACS_SOURCE_PHONE_NUMBER: str = os.getenv("ACS_SOURCE_PHONE_NUMBER", "")
-BASE_URL: str = os.getenv("BASE_URL", "")
+# Base application URL (ensure no trailing slash)
+BASE_URL: str = os.getenv("BASE_URL", "").rstrip("/")
 
 # ACS Streaming configuration
 ACS_STREAMING_MODE: StreamMode = StreamMode(
diff --git a/apps/rtagent/backend/config/voice_config.py b/apps/rtagent/backend/config/voice_config.py
index 35172f25..dacff91c 100644
--- a/apps/rtagent/backend/config/voice_config.py
+++ b/apps/rtagent/backend/config/voice_config.py
@@ -93,6 +93,9 @@ def get_agent_voice(agent_config_path: str) -> str:
 # AZURE VOICE LIVE SETTINGS
 # ==============================================================================
 
-AZURE_VOICE_LIVE_ENDPOINT = os.getenv("AZURE_VOICE_LIVE_ENDPOINT", "")
-AZURE_VOICE_API_KEY = os.getenv("AZURE_VOICE_API_KEY", "")
+AZURE_VOICE_LIVE_ENDPOINT = os.getenv("AZURE_SPEECH_ENDPOINT", "")
+AZURE_VOICE_API_KEY = os.getenv("AZURE_SPEECH_KEY", "")
 AZURE_VOICE_LIVE_MODEL = os.getenv("AZURE_VOICE_LIVE_MODEL", "gpt-4o")
+# AZURE_VOICE_LIVE_ENDPOINT = os.getenv("AZURE_VOICE_LIVE_ENDPOINT", "")
+# AZURE_VOICE_API_KEY = os.getenv("AZURE_VOICE_API_KEY", "")
+# AZURE_VOICE_LIVE_MODEL = os.getenv("AZURE_VOICE_LIVE_MODEL", "gpt-4o")
diff --git a/apps/rtagent/backend/main.py b/apps/rtagent/backend/main.py
index 8922cecd..4d85cd40 100644
--- a/apps/rtagent/backend/main.py
+++ b/apps/rtagent/backend/main.py
@@ -141,7 +141,7 @@ async def lifespan(app: FastAPI):
         try:
             app.state.redis = AzureRedisManager()
             await app.state.redis.initialize()
-            logger.info("Redis initialized successfully")
+            logger.info("Redis initialized successfully with cluster support and retry logic")
         except Exception as e:
             logger.error(f"Redis initialization failed: {e}")
             raise RuntimeError(f"Redis initialization failed: {e}")
@@ -244,10 +244,10 @@ async def make_stt() -> StreamingSpeechRecognizerFromBytes:
 
         if os.getenv("AOAI_POOL_ENABLED", "true").lower() == "true":
             logger.info("Initializing AOAI client pool during startup...")
-            start_time = time.time()
+            aoai_start = time.perf_counter()
             aoai_pool = await get_aoai_pool()
             if aoai_pool:
-                init_time = time.time() - start_time
+                init_time = time.perf_counter() - aoai_start
                 logger.info(
                     f"AOAI client pool pre-initialized in {init_time:.2f}s with {len(aoai_pool.clients)} clients"
                 )
@@ -256,16 +256,6 @@ async def make_stt() -> StreamingSpeechRecognizerFromBytes:
         else:
             logger.info("AOAI pool disabled, skipping startup initialization")
 
-        # if ACS_STREAMING_MODE == StreamMode.VOICE_LIVE:
-        #     # Initialize Voice Live warm pool (pre-connect agents)
-        #     span.set_attribute("startup.stage", "voice_live_pool")
-        #     try:
-        #         # Use background prewarm to avoid blocking startup time
-        #         app.state.voice_live_pool = await get_voice_live_pool(background_prewarm=True)
-        #         logger.info("Voice Live pool initialization scheduled (background prewarm)")
-        #     except Exception as e:
-        #         logger.error(f"Voice Live pool initialization failed: {e}")
-
         # ------------------------ Other singletons ---------------------------
         span.set_attribute("startup.stage", "cosmos_db")
         app.state.cosmos = CosmosDBMongoCoreManager(
diff --git a/devops/scripts/README.md b/devops/scripts/README.md
deleted file mode 100644
index 1fce8394..00000000
--- a/devops/scripts/README.md
+++ /dev/null
@@ -1,168 +0,0 @@
-# **DevOps Scripts**
-
-**Automation scripts** for Azure deployment pipeline setup and management for ARTVoice Accelerator.
-
-## **Quick Start**
-
-```bash
-# Complete CI/CD setup for azd deployment
-./setup-gha-config.sh --interactive
-```
-
-This configures:
-- Azure App Registration for OIDC authentication
-- GitHub Actions federated credentials  
-- Azure permissions and Terraform state storage
-- Optional GitHub secrets/variables setup
-
-## **Scripts Overview**
-
-### **CI/CD Setup**
-- **[`setup-gha-config.sh`](./setup-gha-config.sh)** - Complete CI/CD setup (start here)
-
-### **Azure Developer CLI Helpers**
-- **[`azd/`](./azd/)** - AZD lifecycle hooks and utilities
-  - [`postprovision.sh`](./azd/postprovision.sh) - Post-deployment configuration
-  - [`preprovision.sh`](./azd/preprovision.sh) - Pre-deployment setup
-
-### **Infrastructure Management**
-- **[`generate-env-from-terraform.sh`](./generate-env-from-terraform.sh)** - Generate .env from Terraform outputs
-- **[`validate-terraform-backend.sh`](./validate-terraform-backend.sh)** - Validate Terraform backend
-- **[`webapp-deploy.sh`](./webapp-deploy.sh)** - Direct webapp deployment
-
-## **Prerequisites**
-
-- **Azure CLI** (`az`) - [Install Guide](https://docs.microsoft.com/cli/azure/install-azure-cli)
-- **Azure Developer CLI** (`azd`) - [Install Guide](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd)
-- **jq** - JSON processor
-- **OpenSSL** - For generating random values
-
-### Optional Tools
-- **GitHub CLI** (`gh`) - For automatic secret configuration
-- **Terraform** - If using direct Terraform deployment
-
-### Permissions
-- **Azure**: Contributor + User Access Administrator on target subscription
-- **GitHub**: Admin access to repository for secrets/variables configuration
-
-## 🔐 Authentication Setup
-
-### Azure Authentication
-```bash
-# Login to Azure
-az login
-
-# Set default subscription (if needed)
-az account set --subscription "your-subscription-id"
-```
-
-### GitHub Authentication (Optional)
-```bash
-# Login to GitHub CLI (for automatic secret setup)
-gh auth login
-```
-
-## 🎯 Usage Examples
-
-### Interactive Setup (Recommended for first-time users)
-```bash
-./setup-gha-config.sh --interactive
-```
-
-### Automated Setup with Environment Variables
-```bash
-export GITHUB_ORG="your-org"
-export GITHUB_REPO="your-repo"
-export AZURE_LOCATION="eastus"
-export AZURE_ENV_NAME="dev"
-
-./setup-gha-config.sh
-```
-
-### Production Environment Setup
-```bash
-AZURE_ENV_NAME=prod AZURE_LOCATION=westus2 ./setup-gha-config.sh
-```
-
-## 📤 Output
-
-After running the setup script, you'll get:
-
-### 1. **Azure Resources Created**
-- App Registration for OIDC authentication
-- Service Principal with proper permissions
-- Terraform remote state storage account
-- Federated credentials for GitHub Actions
-
-### 2. **GitHub Configuration**
-- Repository secrets for Azure authentication
-- Repository variables for deployment configuration
-- Ready-to-use workflows in `.github/workflows/`
-
-### 3. **Configuration Summary**
-- Saved to `.azd-cicd-config.txt` in project root
-- Contains all IDs, names, and next steps
-
-## 🔍 Troubleshooting
-
-### Common Issues
-
-**Permission Denied Errors:**
-```bash
-# Check your Azure permissions
-az role assignment list --assignee $(az ad signed-in-user show --query id -o tsv) --output table
-```
-
-**GitHub CLI Not Authenticated:**
-```bash
-# Re-authenticate to GitHub
-gh auth login --git-protocol https
-```
-
-**Storage Account Access Issues:**
-```bash
-# Test storage access
-az storage container list --account-name YOUR_STORAGE_ACCOUNT --auth-mode login
-```
-
-### Debug Mode
-```bash
-# Run with debug output
-bash -x ./setup-gha-config.sh --interactive
-```
-
-## 🔄 Updating Configuration
-
-To update existing configuration:
-
-1. **Add new environments**: Run script with `AZURE_ENV_NAME=newenv`
-2. **Update permissions**: Re-run the script (it's idempotent)
-3. **Rotate credentials**: Delete app registration and re-run
-
-## 📚 Related Documentation
-
-- [GitHub Secrets Configuration Guide](../../.github/SECRETS.md)
-- [Azure Developer CLI Deployment](../../docs/AZD-DEPLOYMENT.md)
-- [CI/CD Pipeline Guide](../../docs/CICDGuide.md)
-- [Microsoft Docs: Container Apps GitHub Actions](https://learn.microsoft.com/azure/container-apps/github-actions-cli)
-
-## 💡 Best Practices
-
-1. **Start with dev environment** - Test thoroughly before production
-2. **Use environment-specific configurations** - Separate dev/staging/prod
-3. **Review permissions regularly** - Follow principle of least privilege
-4. **Monitor deployment logs** - Use Azure Monitor and GitHub Actions logs
-5. **Keep secrets up to date** - Rotate credentials periodically
-
-## 🆘 Support
-
-Need help? Check these resources:
-
-1. **Script help**: `./setup-gha-config.sh --help`
-2. **Project documentation**: Check `docs/` directory
-3. **Azure support**: [Azure Portal Support](https://portal.azure.com/#blade/Microsoft_Azure_Support/HelpAndSupportBlade)
-4. **GitHub support**: [GitHub Actions documentation](https://docs.github.com/actions)
-
----
-
-**Happy Deploying! 🚀**
diff --git a/devops/scripts/generate_test_audio.py b/devops/scripts/generate_test_audio.py
deleted file mode 100644
index 1fe7619d..00000000
--- a/devops/scripts/generate_test_audio.py
+++ /dev/null
@@ -1,424 +0,0 @@
-#!/usr/bin/env python3
-"""
-Standalone Audio File Generator for Load Testing
-
-This script generates realistic customer audio files using Azure Speech Services
-for testing the various agent flows (Auth, FNOL, General Info).
-
-Usage:
-    python generate_test_audio.py
-    python generate_test_audio.py --output-dir ./test_audio --count 10
-    python generate_test_audio.py --agent-type auth --voice "en-US-AriaNeural"
-"""
-
-import argparse
-import json
-import os
-import sys
-from pathlib import Path
-from typing import Dict, List, Optional
-import logging
-
-# Add project root to path for imports
-project_root = Path(__file__).parent
-sys.path.insert(0, str(project_root))
-
-from dotenv import load_dotenv
-
-# Load environment variables
-env_path = project_root / ".env"
-if env_path.exists():
-    load_dotenv(env_path)
-
-# Import Azure Speech SDK and identity libraries
-try:
-    import azure.cognitiveservices.speech as speechsdk
-    from azure.identity import DefaultAzureCredential
-except ImportError:
-    logging.error("Required Azure libraries not installed. Please install:")
-    logging.error("pip install azure-cognitiveservices-speech azure-identity")
-    sys.exit(1)
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-# Customer conversation samples organized by agent type
-CUSTOMER_CONVERSATIONS = {
-    "auth": [
-        "Hi, my name is Alice Brown, my zip code is 60610, and my last four digits of social security are 1234",
-        "Hello, this is John Smith. My ZIP is 90210 and the last four of my SSN are 5678",
-        "This is Maria Garcia, ZIP code 33101, last four social security digits 9876",
-        "My name is David Wilson, I live in ZIP 10001, last four of my social are 4321",
-        "Hi there, I'm Sarah Johnson. My ZIP code is 78701 and my last four SSN digits are 8765",
-    ],
-    "fnol_new_claim": [
-        "I need to file a claim. I was rear-ended on Highway 95 about an hour ago",
-        "I was in a car accident this morning. A truck hit my passenger side door",
-        "My car was damaged in a parking lot. Someone hit it and left a note with their information",
-        "I need to report a claim. My windshield was cracked by flying debris on the freeway",
-        "I was backing out of my driveway and hit my neighbor's fence. I need to file a claim",
-        "A tree fell on my car during the storm last night. I need to start a claim",
-        "Someone broke into my car and stole my laptop. I want to file a theft claim",
-    ],
-    "fnol_existing_claim": [
-        "I'm calling about my claim. The adjuster hasn't called me back yet",
-        "I filed a claim last week and need an update on the status",
-        "My claim number is 12345 and I wanted to check if you received the photos I sent",
-        "I submitted my claim documents three days ago but haven't heard anything back",
-        "The repair shop is asking for authorization. When will my claim be approved?",
-        "I got an estimate but it's higher than what the adjuster quoted. What do I do?",
-    ],
-    "general_info": [
-        "What does my comprehensive coverage include exactly?",
-        "I want to know what my deductible is for collision coverage",
-        "Can you explain what roadside assistance covers under my policy?",
-        "I'm moving to a new state. Do I need to update my policy?",
-        "How much does it cost to add a teenage driver to my policy?",
-        "What's the difference between liability and full coverage?",
-        "I want to increase my coverage limits. How much would that cost?",
-        "Can you help me understand what uninsured motorist coverage does?",
-    ],
-    "emergency": [
-        "I've been in an accident and my passenger is bleeding. We need help immediately",
-        "There's smoke coming from my engine and I smell gas. What should I do?",
-        "My car went off the road and we're trapped inside. Please send help",
-        "I hit a pedestrian. They're not moving. I need emergency services now",
-        "My car is on fire after the accident. I got out but I need the fire department",
-    ],
-    "escalation": [
-        "I've been trying to reach someone about my claim for three weeks. This is ridiculous",
-        "Your adjuster denied my claim but I think it should be covered. I want to speak to a supervisor",
-        "I'm not satisfied with the settlement offer. I want to escalate this to management",
-        "This is the fourth time I've called about the same issue. I need to speak to someone in charge",
-        "I'm considering hiring a lawyer if this isn't resolved today",
-    ],
-}
-
-# Voice options for different conversation types
-VOICE_OPTIONS = {
-    "default": "en-US-JennyMultilingualNeural",
-    "male": "en-US-BrianMultilingualNeural",
-    "female": "en-US-EmmaMultilingualNeural",
-    "calm": "en-US-AriaNeural",
-    "urgent": "en-US-DavisNeural",
-    "frustrated": "en-US-GuyNeural",
-}
-
-
-class TestAudioGenerator:
-    """Generate audio files for load testing customer conversations."""
-
-    def __init__(self, output_dir: str = "./test_audio"):
-        """Initialize the audio generator.
-
-        Args:
-            output_dir: Directory to save generated audio files
-        """
-        self.output_dir = Path(output_dir)
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-
-        # Initialize Azure Speech configuration with DefaultAzureCredential
-        speech_region = os.getenv("AZURE_SPEECH_REGION", "centralus")
-        speech_endpoint = os.getenv("AZURE_SPEECH_ENDPOINT")
-
-        logger.info("Authenticating with DefaultAzureCredential...")
-
-        try:
-            # Get access token using DefaultAzureCredential
-            credential = DefaultAzureCredential()
-            token = credential.get_token("https://cognitiveservices.azure.com/.default")
-
-            # Create speech config with endpoint if available, otherwise use region
-            if speech_endpoint:
-                logger.info(f"Using Azure Speech endpoint: {speech_endpoint}")
-                self.speech_config = speechsdk.SpeechConfig(endpoint=speech_endpoint)
-                self.speech_config.authorization_token = token.token
-            else:
-                logger.info(f"Using Azure Speech region: {speech_region}")
-                self.speech_config = speechsdk.SpeechConfig(region=speech_region)
-                self.speech_config.authorization_token = token.token
-
-            # Store credential for token refresh if needed
-            self._credential = credential
-
-            logger.info("Successfully authenticated with DefaultAzureCredential")
-
-        except Exception as e:
-            logger.error(f"Failed to authenticate with DefaultAzureCredential: {e}")
-            logger.error(
-                "Please ensure you are logged in with 'az login' or have appropriate credentials configured"
-            )
-            raise
-
-        # Set default voice
-        self.speech_config.speech_synthesis_voice_name = VOICE_OPTIONS["default"]
-
-        logger.info(
-            f"Initialized audio generator with output directory: {self.output_dir}"
-        )
-        logger.info(f"Using Azure Speech region: {speech_region}")
-
-    def generate_audio_file(
-        self,
-        text: str,
-        filename: str,
-        voice: str = None,
-        style: str = "chat",
-        rate: str = "+5%",
-    ) -> Optional[str]:
-        """Generate a single audio file from text.
-
-        Args:
-            text: Text to synthesize
-            filename: Output filename (without extension)
-            voice: Voice to use (defaults to current voice)
-            style: Speech style
-            rate: Speech rate
-
-        Returns:
-            Path to generated file or None if failed
-        """
-        try:
-            # Use the existing authenticated speech config
-            config = self.speech_config
-
-            # Set voice for this synthesis
-            current_voice = voice or config.speech_synthesis_voice_name
-            config.speech_synthesis_voice_name = current_voice
-
-            # Set output file
-            file_path = self.output_dir / f"{filename}.wav"
-            audio_config = speechsdk.audio.AudioOutputConfig(filename=str(file_path))
-
-            # Create synthesizer and synthesize
-            synthesizer = speechsdk.SpeechSynthesizer(
-                speech_config=config, audio_config=audio_config
-            )
-
-            # Use simple text synthesis first to test
-            result = synthesizer.speak_text_async(text).get()
-
-            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-                logger.info(f"Generated audio file: {file_path}")
-                return str(file_path)
-            else:
-                logger.error(f"Speech synthesis failed with reason: {result.reason}")
-                # Try to get error details safely
-                try:
-                    if hasattr(result, "error_details") and result.error_details:
-                        logger.error(f"Error details: {result.error_details}")
-                except:
-                    logger.error("Could not retrieve error details")
-                return None
-
-        except Exception as e:
-            logger.error(f"Error generating audio for '{filename}': {e}")
-            return None
-
-    def generate_agent_conversations(
-        self, agent_type: str, count: int = None, voice: str = None
-    ) -> List[str]:
-        """Generate audio files for a specific agent type.
-
-        Args:
-            agent_type: Type of agent conversation (auth, fnol_new_claim, etc.)
-            count: Number of files to generate (None = all available)
-            voice: Voice to use for generation
-
-        Returns:
-            List of generated file paths
-        """
-        if agent_type not in CUSTOMER_CONVERSATIONS:
-            logger.error(f"Unknown agent type: {agent_type}")
-            return []
-
-        conversations = CUSTOMER_CONVERSATIONS[agent_type]
-        if count:
-            conversations = conversations[:count]
-
-        generated_files = []
-
-        # Select appropriate voice based on agent type
-        if not voice:
-            if agent_type == "emergency":
-                voice = VOICE_OPTIONS["urgent"]
-            elif agent_type == "escalation":
-                voice = VOICE_OPTIONS["frustrated"]
-            else:
-                voice = VOICE_OPTIONS["default"]
-
-        for i, text in enumerate(conversations, 1):
-            filename = f"{agent_type}_{i:02d}"
-            file_path = self.generate_audio_file(text, filename, voice=voice)
-            if file_path:
-                generated_files.append(file_path)
-
-        logger.info(f"Generated {len(generated_files)} files for {agent_type}")
-        return generated_files
-
-    def generate_all_conversations(
-        self, count_per_type: int = None
-    ) -> Dict[str, List[str]]:
-        """Generate audio files for all agent types.
-
-        Args:
-            count_per_type: Max number of files per agent type
-
-        Returns:
-            Dictionary mapping agent types to generated file paths
-        """
-        all_generated = {}
-
-        for agent_type in CUSTOMER_CONVERSATIONS.keys():
-            generated_files = self.generate_agent_conversations(
-                agent_type, count_per_type
-            )
-            all_generated[agent_type] = generated_files
-
-        return all_generated
-
-    def create_manifest(self, generated_files: Dict[str, List[str]]) -> str:
-        """Create a JSON manifest of generated audio files.
-
-        Args:
-            generated_files: Dictionary of agent types to file paths
-
-        Returns:
-            Path to manifest file
-        """
-        manifest = {
-            "generated_at": "",
-            "total_files": sum(len(files) for files in generated_files.values()),
-            "agent_types": {},
-        }
-
-        import datetime
-
-        manifest["generated_at"] = datetime.datetime.now().isoformat()
-
-        for agent_type, file_paths in generated_files.items():
-            manifest["agent_types"][agent_type] = {
-                "count": len(file_paths),
-                "files": [
-                    {
-                        "filename": Path(fp).name,
-                        "path": fp,
-                        "text": self._get_text_for_file(agent_type, Path(fp).name),
-                    }
-                    for fp in file_paths
-                ],
-            }
-
-        manifest_path = self.output_dir / "audio_manifest.json"
-        with open(manifest_path, "w") as f:
-            json.dump(manifest, f, indent=2)
-
-        logger.info(f"Created manifest: {manifest_path}")
-        return str(manifest_path)
-
-    def _get_text_for_file(self, agent_type: str, filename: str) -> str:
-        """Get the original text for a generated file."""
-        try:
-            # Extract index from filename (e.g., "auth_01.wav" -> 0)
-            index = int(filename.split("_")[1].split(".")[0]) - 1
-            return CUSTOMER_CONVERSATIONS[agent_type][index]
-        except (IndexError, ValueError):
-            return "Unknown text"
-
-
-def main():
-    """Main function with command-line interface."""
-    parser = argparse.ArgumentParser(
-        description="Generate customer audio files for load testing"
-    )
-
-    parser.add_argument(
-        "--output-dir",
-        "-o",
-        default="./test_audio",
-        help="Output directory for audio files (default: ./test_audio)",
-    )
-
-    parser.add_argument(
-        "--agent-type",
-        "-a",
-        choices=list(CUSTOMER_CONVERSATIONS.keys()) + ["all"],
-        default="all",
-        help="Type of agent conversation to generate (default: all)",
-    )
-
-    parser.add_argument(
-        "--count",
-        "-c",
-        type=int,
-        help="Number of files to generate per agent type (default: all available)",
-    )
-
-    parser.add_argument(
-        "--voice",
-        "-v",
-        choices=list(VOICE_OPTIONS.values()),
-        help="Voice to use for synthesis",
-    )
-
-    parser.add_argument(
-        "--list-voices", action="store_true", help="List available voice options"
-    )
-
-    args = parser.parse_args()
-
-    if args.list_voices:
-        print("Available voice options:")
-        for name, voice in VOICE_OPTIONS.items():
-            print(f"  {name}: {voice}")
-        return
-
-    # Check for required environment variables for DefaultAzureCredential
-    speech_region = os.getenv("AZURE_SPEECH_REGION", "centralus")
-
-    logger.info(f"Using Azure Speech Services in region: {speech_region}")
-    logger.info("Authenticating with DefaultAzureCredential (no API key required)")
-    logger.info(
-        "Make sure you're logged in with 'az login' or have appropriate Azure credentials configured"
-    )
-
-    # Initialize generator
-    generator = TestAudioGenerator(args.output_dir)
-
-    try:
-        if args.agent_type == "all":
-            logger.info("Generating audio files for all agent types...")
-            generated_files = generator.generate_all_conversations(args.count)
-        else:
-            logger.info(f"Generating audio files for {args.agent_type} agent...")
-            files = generator.generate_agent_conversations(
-                args.agent_type, args.count, args.voice
-            )
-            generated_files = {args.agent_type: files}
-
-        # Create manifest
-        manifest_path = generator.create_manifest(generated_files)
-
-        # Summary
-        total_files = sum(len(files) for files in generated_files.values())
-        logger.info(f"\n=== Generation Complete ===")
-        logger.info(f"Total files generated: {total_files}")
-        logger.info(f"Output directory: {generator.output_dir}")
-        logger.info(f"Manifest file: {manifest_path}")
-
-        for agent_type, files in generated_files.items():
-            logger.info(f"  {agent_type}: {len(files)} files")
-
-    except KeyboardInterrupt:
-        logger.info("Generation interrupted by user")
-        sys.exit(1)
-    except Exception as e:
-        logger.error(f"Error during generation: {e}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/devops/scripts/EventGrid-EntraAppConfig.ps1 b/devops/scripts/misc/EventGrid-EntraAppConfig.ps1
similarity index 100%
rename from devops/scripts/EventGrid-EntraAppConfig.ps1
rename to devops/scripts/misc/EventGrid-EntraAppConfig.ps1
diff --git a/devops/scripts/EventGrid-EntraAppConfig.sh b/devops/scripts/misc/EventGrid-EntraAppConfig.sh
similarity index 100%
rename from devops/scripts/EventGrid-EntraAppConfig.sh
rename to devops/scripts/misc/EventGrid-EntraAppConfig.sh
diff --git a/devops/scripts/Generate-EnvFromTerraform.ps1 b/devops/scripts/misc/Generate-EnvFromTerraform.ps1
similarity index 100%
rename from devops/scripts/Generate-EnvFromTerraform.ps1
rename to devops/scripts/misc/Generate-EnvFromTerraform.ps1
diff --git a/devops/scripts/generate-env-from-terraform.sh b/devops/scripts/misc/generate-env-from-terraform.sh
similarity index 100%
rename from devops/scripts/generate-env-from-terraform.sh
rename to devops/scripts/misc/generate-env-from-terraform.sh
diff --git a/devops/scripts/validate-terraform-backend.sh b/devops/scripts/misc/validate-terraform-backend.sh
similarity index 100%
rename from devops/scripts/validate-terraform-backend.sh
rename to devops/scripts/misc/validate-terraform-backend.sh
diff --git a/devops/scripts/webapp-deploy.sh b/devops/scripts/misc/webapp-deploy.sh
similarity index 100%
rename from devops/scripts/webapp-deploy.sh
rename to devops/scripts/misc/webapp-deploy.sh
diff --git a/devops/scripts/setup-gha-config.sh b/devops/scripts/setup-gha-config.sh
deleted file mode 100755
index fb01ee35..00000000
--- a/devops/scripts/setup-gha-config.sh
+++ /dev/null
@@ -1,573 +0,0 @@
-#!/bin/bash
-
-# ========================================================================
-# 🚀 Setup CI/CD Configuration for Azure Developer CLI (AZD) Deployment
-# ========================================================================
-# This script provisions GitHub Actions secrets and variables needed for
-# automated deployment using Azure Developer CLI (azd) with OIDC authentication.
-#
-# Based on: https://learn.microsoft.com/en-us/azure/container-apps/github-actions-cli
-# Usage: ./setup-cicd-config.sh [--interactive] [--help]
-
-set -euo pipefail
-
-# ========================================================================
-# CONFIGURATION & CONSTANTS
-# ========================================================================
-
-readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-readonly PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
-readonly APP_REGISTRATION_NAME="GitHub-Actions-RTAudio-AZD"
-
-# Colors for output
-readonly RED='\033[0;31m'
-readonly GREEN='\033[0;32m'
-readonly YELLOW='\033[1;33m'
-readonly BLUE='\033[0;34m'
-readonly CYAN='\033[0;36m'
-readonly NC='\033[0m' # No Color
-
-# Default values
-INTERACTIVE_MODE=false
-GITHUB_ORG=""
-GITHUB_REPO=""
-AZURE_LOCATION="eastus"
-AZURE_ENV_NAME="dev"
-
-# ========================================================================
-# HELPER FUNCTIONS
-# ========================================================================
-
-log_info() {
-    echo -e "${BLUE}ℹ️  [INFO]${NC} $*"
-}
-
-log_success() {
-    echo -e "${GREEN}✅ [SUCCESS]${NC} $*"
-}
-
-log_warning() {
-    echo -e "${YELLOW}⚠️  [WARNING]${NC} $*"
-}
-
-log_error() {
-    echo -e "${RED}❌ [ERROR]${NC} $*" >&2
-}
-
-log_section() {
-    echo ""
-    echo -e "${CYAN}🔧 $*${NC}"
-    echo "========================================================================="
-}
-
-show_help() {
-    cat << EOF
-🚀 Setup CI/CD Configuration for Azure Developer CLI (AZD) Deployment
-
-USAGE:
-    $0 [OPTIONS]
-
-OPTIONS:
-    --interactive    Run in interactive mode (prompts for all values)
-    --help          Show this help message
-
-DESCRIPTION:
-    This script sets up GitHub Actions secrets and variables for automated
-    deployment using Azure Developer CLI (azd) with OIDC authentication.
-
-    It will:
-    1. Create Azure App Registration for OIDC authentication
-    2. Configure federated credentials for GitHub Actions
-    3. Assign necessary Azure permissions
-    4. Set up Terraform remote state storage
-    5. Display the secrets/variables to configure in GitHub
-
-PREREQUISITES:
-    - Azure CLI installed and authenticated
-    - GitHub CLI installed and authenticated (optional)
-    - Contributor permissions on Azure subscription
-    - GitHub repository already created
-
-ENVIRONMENT VARIABLES:
-    GITHUB_ORG       GitHub organization/user name
-    GITHUB_REPO      GitHub repository name
-    AZURE_LOCATION   Azure region (default: eastus)
-    AZURE_ENV_NAME   Environment name (default: dev)
-
-EXAMPLES:
-    # Interactive mode
-    $0 --interactive
-
-    # Using environment variables
-    GITHUB_ORG=myorg GITHUB_REPO=myrepo $0
-
-    # With custom values
-    AZURE_LOCATION=westus2 AZURE_ENV_NAME=prod $0
-
-EOF
-}
-
-check_dependencies() {
-    log_info "Checking dependencies..."
-    
-    local deps=("az" "jq")
-    local missing=()
-    
-    for dep in "${deps[@]}"; do
-        if ! command -v "$dep" &> /dev/null; then
-            missing+=("$dep")
-        fi
-    done
-    
-    if [[ ${#missing[@]} -gt 0 ]]; then
-        log_error "Missing dependencies: ${missing[*]}"
-        log_error "Please install them and try again"
-        exit 1
-    fi
-    
-    # Check if GitHub CLI is available (optional)
-    if command -v "gh" &> /dev/null; then
-        log_info "GitHub CLI detected (optional features available)"
-    else
-        log_warning "GitHub CLI not found (manual secret configuration required)"
-    fi
-    
-    log_success "Dependencies verified"
-}
-
-check_azure_auth() {
-    log_info "Checking Azure authentication..."
-    
-    if ! az account show &> /dev/null; then
-        log_error "Azure CLI not authenticated"
-        log_error "Please run 'az login' first"
-        exit 1
-    fi
-    
-    local subscription_name
-    subscription_name=$(az account show --query "name" -o tsv)
-    log_success "Authenticated to Azure subscription: $subscription_name"
-}
-
-prompt_for_values() {
-    if [[ "$INTERACTIVE_MODE" == "true" ]] || [[ -z "$GITHUB_ORG" ]] || [[ -z "$GITHUB_REPO" ]]; then
-        echo ""
-        log_info "Please provide the following information:"
-        
-        if [[ -z "$GITHUB_ORG" ]]; then
-            read -p "GitHub organization/username: " GITHUB_ORG
-        fi
-        
-        if [[ -z "$GITHUB_REPO" ]]; then
-            read -p "GitHub repository name: " GITHUB_REPO
-        fi
-        
-        if [[ "$INTERACTIVE_MODE" == "true" ]]; then
-            read -p "Azure location [$AZURE_LOCATION]: " input_location
-            AZURE_LOCATION="${input_location:-$AZURE_LOCATION}"
-            
-            read -p "Environment name [$AZURE_ENV_NAME]: " input_env
-            AZURE_ENV_NAME="${input_env:-$AZURE_ENV_NAME}"
-        fi
-    fi
-    
-    # Validate required values
-    if [[ -z "$GITHUB_ORG" ]] || [[ -z "$GITHUB_REPO" ]]; then
-        log_error "GitHub organization and repository name are required"
-        log_error "Set GITHUB_ORG and GITHUB_REPO environment variables or use --interactive"
-        exit 1
-    fi
-    
-    log_info "Configuration:"
-    log_info "  GitHub: $GITHUB_ORG/$GITHUB_REPO"
-    log_info "  Azure Location: $AZURE_LOCATION"
-    log_info "  Environment: $AZURE_ENV_NAME"
-}
-
-create_app_registration() {
-    log_section "Creating Azure App Registration for OIDC"
-    
-    # Check if app registration already exists
-    local existing_app_id
-    existing_app_id=$(az ad app list --display-name "$APP_REGISTRATION_NAME" --query "[0].appId" -o tsv 2>/dev/null || echo "")
-    
-    if [[ -n "$existing_app_id" && "$existing_app_id" != "null" ]]; then
-        log_warning "App registration '$APP_REGISTRATION_NAME' already exists"
-        APP_ID="$existing_app_id"
-    else
-        log_info "Creating app registration: $APP_REGISTRATION_NAME"
-        APP_ID=$(az ad app create --display-name "$APP_REGISTRATION_NAME" --query "appId" -o tsv)
-        
-        # Create service principal
-        log_info "Creating service principal..."
-        az ad sp create --id "$APP_ID" > /dev/null
-        
-        log_success "Created app registration: $APP_ID"
-    fi
-    
-    # Get tenant and subscription info
-    TENANT_ID=$(az account show --query "tenantId" -o tsv)
-    SUBSCRIPTION_ID=$(az account show --query "id" -o tsv)
-    SP_OBJECT_ID=$(az ad sp show --id "$APP_ID" --query "id" -o tsv)
-    
-    log_info "App Registration Details:"
-    log_info "  Application ID: $APP_ID"
-    log_info "  Tenant ID: $TENANT_ID"
-    log_info "  Subscription ID: $SUBSCRIPTION_ID"
-    log_info "  Service Principal Object ID: $SP_OBJECT_ID"
-}
-
-configure_federated_credentials() {
-    log_section "Configuring OIDC Federated Credentials"
-    
-    local credentials=(
-        "main-branch:repo:$GITHUB_ORG/$GITHUB_REPO:ref:refs/heads/main:Main branch deployments"
-        "cleanup-deployment:repo:$GITHUB_ORG/$GITHUB_REPO:ref:refs/heads/cleanup/deployment:Cleanup deployment branch"
-        "pull-requests:repo:$GITHUB_ORG/$GITHUB_REPO:pull_request:Pull request validation"
-        "workflow-dispatch:repo:$GITHUB_ORG/$GITHUB_REPO:environment:$AZURE_ENV_NAME:Manual workflow triggers"
-    )
-    
-    for credential in "${credentials[@]}"; do
-        IFS=':' read -r name subject_prefix org_repo subject_suffix description <<< "$credential"
-        local full_subject="${subject_prefix}:${org_repo}:${subject_suffix}"
-        
-        log_info "Creating federated credential: $name"
-        
-        # Check if credential already exists
-        local existing_cred
-        existing_cred=$(az ad app federated-credential list --id "$APP_ID" --query "[?name=='$name'].name" -o tsv 2>/dev/null || echo "")
-        
-        if [[ -n "$existing_cred" ]]; then
-            log_warning "Federated credential '$name' already exists, skipping..."
-            continue
-        fi
-        
-        # Create the federated credential
-        az ad app federated-credential create \
-            --id "$APP_ID" \
-            --parameters "{
-                \"name\": \"$name\",
-                \"issuer\": \"https://token.actions.githubusercontent.com\",
-                \"subject\": \"$full_subject\",
-                \"description\": \"$description\",
-                \"audiences\": [\"api://AzureADTokenExchange\"]
-            }" > /dev/null
-        
-        log_success "Created federated credential: $name"
-    done
-}
-
-assign_azure_permissions() {
-    log_section "Assigning Azure Permissions"
-    
-    local roles=("Contributor" "User Access Administrator")
-    
-    for role in "${roles[@]}"; do
-        log_info "Assigning '$role' role to service principal..."
-        
-        # Check if role assignment already exists
-        local existing_assignment
-        existing_assignment=$(az role assignment list \
-            --assignee "$SP_OBJECT_ID" \
-            --role "$role" \
-            --scope "/subscriptions/$SUBSCRIPTION_ID" \
-            --query "[0].id" -o tsv 2>/dev/null || echo "")
-        
-        if [[ -n "$existing_assignment" && "$existing_assignment" != "null" ]]; then
-            log_warning "Role '$role' already assigned, skipping..."
-            continue
-        fi
-        
-        az role assignment create \
-            --assignee "$SP_OBJECT_ID" \
-            --role "$role" \
-            --scope "/subscriptions/$SUBSCRIPTION_ID" > /dev/null
-        
-        log_success "Assigned '$role' role"
-    done
-}
-
-setup_terraform_state_storage() {
-    log_section "Setting up Terraform Remote State Storage"
-    
-    local resource_group="rg-terraform-state-${AZURE_ENV_NAME}"
-    local storage_account="tfstate${AZURE_ENV_NAME}$(openssl rand -hex 4)"
-    local container_name="tfstate"
-    
-    log_info "Creating resource group: $resource_group"
-    az group create \
-        --name "$resource_group" \
-        --location "$AZURE_LOCATION" \
-        --tags "purpose=terraform-state" "environment=$AZURE_ENV_NAME" > /dev/null
-    
-    log_info "Creating storage account: $storage_account"
-    az storage account create \
-        --name "$storage_account" \
-        --resource-group "$resource_group" \
-        --location "$AZURE_LOCATION" \
-        --sku "Standard_LRS" \
-        --encryption-services blob \
-        --allow-blob-public-access false \
-        --tags "purpose=terraform-state" "environment=$AZURE_ENV_NAME" > /dev/null
-    
-    log_info "Creating container: $container_name"
-    az storage container create \
-        --name "$container_name" \
-        --account-name "$storage_account" \
-        --auth-mode login > /dev/null
-    
-    # Assign permissions to service principal for state storage
-    log_info "Assigning storage permissions to service principal..."
-    az role assignment create \
-        --assignee "$SP_OBJECT_ID" \
-        --role "Storage Blob Data Contributor" \
-        --scope "/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$resource_group/providers/Microsoft.Storage/storageAccounts/$storage_account" > /dev/null
-    
-    # Store values for later use
-    TF_RESOURCE_GROUP="$resource_group"
-    TF_STORAGE_ACCOUNT="$storage_account"
-    TF_CONTAINER_NAME="$container_name"
-    
-    log_success "Terraform state storage configured"
-    log_info "  Resource Group: $TF_RESOURCE_GROUP"
-    log_info "  Storage Account: $TF_STORAGE_ACCOUNT"
-    log_info "  Container: $TF_CONTAINER_NAME"
-}
-
-configure_github_secrets() {
-    log_section "GitHub Repository Configuration"
-    
-    if command -v "gh" &> /dev/null && gh auth status &> /dev/null; then
-        log_info "GitHub CLI detected and authenticated"
-        
-        # Ask if user wants to automatically configure secrets
-        if [[ "$INTERACTIVE_MODE" == "true" ]]; then
-            read -p "Automatically configure GitHub secrets and variables? (y/N): " configure_auto
-            if [[ "$configure_auto" =~ ^[Yy]$ ]]; then
-                setup_github_secrets_auto
-                return
-            fi
-        fi
-    fi
-    
-    # Manual configuration instructions
-    show_manual_configuration
-}
-
-setup_github_secrets_auto() {
-    log_info "Configuring GitHub secrets and variables automatically..."
-    
-    # Set repository secrets
-    local secrets=(
-        "AZURE_CLIENT_ID:$APP_ID"
-        "AZURE_TENANT_ID:$TENANT_ID"
-        "AZURE_SUBSCRIPTION_ID:$SUBSCRIPTION_ID"
-        "AZURE_PRINCIPAL_ID:$SP_OBJECT_ID"
-    )
-    
-    for secret in "${secrets[@]}"; do
-        IFS=':' read -r name value <<< "$secret"
-        log_info "Setting secret: $name"
-        echo "$value" | gh secret set "$name" --repo "$GITHUB_ORG/$GITHUB_REPO"
-    done
-    
-    # Set repository variables
-    local variables=(
-        "AZURE_LOCATION:$AZURE_LOCATION"
-        "AZURE_ENV_NAME:$AZURE_ENV_NAME"
-        "RS_RESOURCE_GROUP:$TF_RESOURCE_GROUP"
-        "RS_STORAGE_ACCOUNT:$TF_STORAGE_ACCOUNT"
-        "RS_CONTAINER_NAME:$TF_CONTAINER_NAME"
-    )
-    
-    for variable in "${variables[@]}"; do
-        IFS=':' read -r name value <<< "$variable"
-        log_info "Setting variable: $name"
-        echo "$value" | gh variable set "$name" --repo "$GITHUB_ORG/$GITHUB_REPO"
-    done
-    
-    log_success "GitHub secrets and variables configured automatically!"
-}
-
-show_manual_configuration() {
-    cat << EOF
-
-${CYAN}📝 Manual GitHub Configuration Required${NC}
-========================================================================
-
-Navigate to your GitHub repository: https://github.com/$GITHUB_ORG/$GITHUB_REPO
-Go to Settings → Secrets and variables → Actions
-
-${YELLOW}Repository Secrets:${NC}
-Add these under "Repository secrets":
-
-AZURE_CLIENT_ID: $APP_ID
-AZURE_TENANT_ID: $TENANT_ID
-AZURE_SUBSCRIPTION_ID: $SUBSCRIPTION_ID
-AZURE_PRINCIPAL_ID: $SP_OBJECT_ID
-
-${YELLOW}Repository Variables:${NC}
-Add these under "Repository variables":
-
-AZURE_LOCATION: $AZURE_LOCATION
-AZURE_ENV_NAME: $AZURE_ENV_NAME
-RS_RESOURCE_GROUP: $TF_RESOURCE_GROUP
-RS_STORAGE_ACCOUNT: $TF_STORAGE_ACCOUNT
-RS_CONTAINER_NAME: $TF_CONTAINER_NAME
-
-${YELLOW}Optional Secrets:${NC}
-If you have an ACS phone number:
-
-ACS_SOURCE_PHONE_NUMBER: +1234567890
-
-EOF
-}
-
-create_summary_file() {
-    local summary_file="$PROJECT_ROOT/.azd-cicd-config.txt"
-    
-    cat > "$summary_file" << EOF
-# Azure Developer CLI (AZD) CI/CD Configuration Summary
-# Generated on: $(date)
-# Script: $0
-
-## Azure App Registration
-Application ID: $APP_ID
-Tenant ID: $TENANT_ID
-Subscription ID: $SUBSCRIPTION_ID
-Service Principal Object ID: $SP_OBJECT_ID
-
-## Terraform State Storage
-Resource Group: $TF_RESOURCE_GROUP
-Storage Account: $TF_STORAGE_ACCOUNT
-Container: $TF_CONTAINER_NAME
-
-## GitHub Repository
-Organization/User: $GITHUB_ORG
-Repository: $GITHUB_REPO
-URL: https://github.com/$GITHUB_ORG/$GITHUB_REPO
-
-## Next Steps
-1. Configure GitHub secrets and variables (see output above)
-2. Test the deployment workflow
-3. Purchase ACS phone number if needed
-4. Configure environment-specific settings
-
-## Useful Commands
-# Test authentication
-az login --service-principal --username $APP_ID --tenant $TENANT_ID
-
-# View role assignments
-az role assignment list --assignee $SP_OBJECT_ID --output table
-
-# Test azd deployment
-azd auth login --client-id $APP_ID --federated-credential-provider github --tenant-id $TENANT_ID
-azd up
-
-EOF
-
-    log_success "Configuration summary saved to: $summary_file"
-}
-
-verify_configuration() {
-    log_section "Verifying Configuration"
-    
-    # Test service principal permissions
-    log_info "Testing service principal permissions..."
-    local test_result
-    test_result=$(az role assignment list --assignee "$SP_OBJECT_ID" --output table 2>/dev/null | wc -l)
-    
-    if [[ "$test_result" -gt 1 ]]; then
-        log_success "Service principal has role assignments"
-    else
-        log_warning "Service principal may not have proper permissions"
-    fi
-    
-    # Test federated credentials
-    log_info "Checking federated credentials..."
-    local cred_count
-    cred_count=$(az ad app federated-credential list --id "$APP_ID" --query "length(@)" -o tsv 2>/dev/null || echo "0")
-    
-    if [[ "$cred_count" -gt 0 ]]; then
-        log_success "Found $cred_count federated credential(s)"
-    else
-        log_warning "No federated credentials found"
-    fi
-    
-    # Test storage account access
-    log_info "Testing storage account access..."
-    if az storage container show --name "$TF_CONTAINER_NAME" --account-name "$TF_STORAGE_ACCOUNT" --auth-mode login &> /dev/null; then
-        log_success "Storage account accessible"
-    else
-        log_warning "Storage account access may be limited"
-    fi
-}
-
-# ========================================================================
-# MAIN EXECUTION
-# ========================================================================
-
-main() {
-    # Parse command line arguments
-    while [[ $# -gt 0 ]]; do
-        case $1 in
-            --interactive)
-                INTERACTIVE_MODE=true
-                shift
-                ;;
-            --help)
-                show_help
-                exit 0
-                ;;
-            *)
-                log_error "Unknown option: $1"
-                show_help
-                exit 1
-                ;;
-        esac
-    done
-    
-    # Set values from environment variables if provided
-    GITHUB_ORG="${GITHUB_ORG:-}"
-    GITHUB_REPO="${GITHUB_REPO:-}"
-    AZURE_LOCATION="${AZURE_LOCATION:-eastus}"
-    AZURE_ENV_NAME="${AZURE_ENV_NAME:-dev}"
-    
-    # Display banner
-    echo -e "${CYAN}"
-    echo "🚀 Azure Developer CLI (AZD) CI/CD Configuration Setup"
-    echo "======================================================="
-    echo -e "${NC}"
-    
-    # Run setup steps
-    check_dependencies
-    check_azure_auth
-    prompt_for_values
-    
-    log_info "Starting CI/CD configuration setup..."
-    
-    create_app_registration
-    configure_federated_credentials
-    assign_azure_permissions
-    setup_terraform_state_storage
-    configure_github_secrets
-    verify_configuration
-    create_summary_file
-    
-    echo ""
-    log_success "🎉 CI/CD configuration setup completed!"
-    log_info "Your azd deployment workflows should now be ready to run."
-    log_info "Test your setup by pushing to the main or cleanup/deployment branch."
-    
-    echo ""
-    echo -e "${YELLOW}💡 Next Steps:${NC}"
-    echo "1. Review the configuration summary file"
-    echo "2. Test the GitHub Actions workflow"
-    echo "3. Purchase an ACS phone number if needed"
-    echo "4. Configure any additional environment-specific settings"
-}
-
-# Run main function if script is executed directly
-if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
-    main "$@"
-fi
diff --git a/docs/DeploymentGuide.md b/docs/DeploymentGuide.md
index 13242542..77a3e9e3 100644
--- a/docs/DeploymentGuide.md
+++ b/docs/DeploymentGuide.md
@@ -176,7 +176,7 @@ disable_local_auth = true
 redis_sku = "MemoryOptimized_M10"
 
 # OpenAI model deployments with latest models
-openai_models = [
+model_deployments = [
   {
     name     = "gpt-4-1-mini"
     version  = "2024-11-20"
@@ -615,7 +615,7 @@ container_apps_configuration = {
 Customize OpenAI model deployments for the latest supported models:
 
 ```hcl
-openai_models = [
+model_deployments = [
   {
     name     = "gpt-4-1-mini"
     version  = "2024-11-20"
diff --git a/docs/index.md b/docs/index.md
index 6f60cb18..223296ab 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,69 +1,56 @@
-# Real-Time Voice Agent
-
-A production-ready Azure-powered voice agent with advanced text-to-speech and speech recognition capabilities.
-
-## 🚀 Features
-
-- **Real-time speech synthesis** with Azure Cognitive Services
-- **Streaming speech recognition** with advanced language detection
-- **Multi-language support** with automatic optimization
-- **Neural voice synthesis** with customizable styles and prosody
-- **OpenTelemetry observability** with distributed tracing
-- **Production-ready** with comprehensive error handling and monitoring
-
-## 🏗️ Architecture
-
-The voice agent is built with a modular architecture optimized for low-latency real-time applications:
-
-- **FastAPI backend** for high-performance async operations
-- **Azure Communication Services** for call automation and media streaming
-- **Azure Speech Services** for TTS/STT with neural voice models
-- **Azure OpenAI** for intelligent conversation handling
-- **OpenTelemetry** for comprehensive observability and monitoring
-
-## 🎯 Key Components
-
-### SpeechSynthesizer
-
-The core text-to-speech engine providing:
-
-- Multiple synthesis modes (speaker playback, memory synthesis, frame-based streaming)
-- Flexible authentication (API key, managed identity, credential chains)
-- Intelligent environment detection for headless deployments
-- Advanced SSML support with style and prosody control
-- Real-time frame generation for streaming applications
-
-### StreamingSpeechRecognizer
-
-Advanced speech-to-text engine featuring:
-
-- Real-time streaming recognition with minimal latency
-- Language detection and speaker diarization
-- Neural audio processing for improved accuracy
-- Comprehensive callback system for real-time processing
-- Session management with proper resource cleanup
-
-## 📊 Observability
-
-Built-in observability features include:
-
-- **Distributed tracing** with OpenTelemetry and Azure Monitor
-- **Structured logging** with correlation IDs for request tracking
-- **Performance metrics** for latency and error rate monitoring
-- **Service dependency mapping** for application insights
-- **Real-time monitoring** dashboards and alerting
-
-## 🔧 Configuration
-
-The system supports flexible configuration through:
-
-- Environment variables for credentials and settings
-- Runtime configuration for voice parameters and behavior
-- Deployment-specific settings for different environments
-- Automatic fallback mechanisms for robust operation
-
-## 🌟 Getting Started
-
-Ready to build your voice application? Check out our [Quick Start Guide](getting-started/quickstart.md) to get up and running in minutes.
-
-For detailed API documentation, explore our [API Reference](api/overview.md) section.
+# Real-Time Voice Agent Documentation Hub
+
+This site brings together everything you need to deploy, operate, and extend the Azure-based Real-Time Voice Agent. Use the map below to jump into the areas most relevant to your work.
+
+## Documentation Map
+
+- **Quick Start & Setup**
+  - [Quick Start Guide](getting-started/quickstart.md)
+  - [Local Development Playbook](quickstart-local-development.md)
+  - [Installation Checklist](getting-started/installation.md)
+- **Architecture & Flows**
+  - [Architecture Deep Dive](Architecture.md)
+  - [Data Architecture](DataArchitecture.md)
+  - [Integration Points](IntegrationPoints.md)
+  - [ACS Barge-In Flow](ACSBargeInFlow.md)
+- **Operations & Delivery**
+  - [Deployment Guide](DeploymentGuide.md)
+  - [CI/CD Runbook](CICDGuide.md)
+  - [Path to Production](PathToProduction.md)
+  - [Application Insights](ApplicationInsights.md)
+- **Security & Access**
+  - [Auth for HTTP and WebSocket Calls](AuthForHTTPandWSS.md)
+  - [WebSocket Authentication](WebsocketAuth.md)
+- **Testing & Load**
+  - [Load Testing Strategy](LoadTesting.md)
+  - [Troubleshooting Guide](Troubleshooting.md)
+- **Industry Scenarios**
+  - [Healthcare Playbooks](HealthcareUsecases.md)
+
+## Diagram Highlights
+
+- Production reference: [Architecture Deep Dive – Production Deployment](Architecture.md#production-deployment-architecture) (image: `assets/RTAudio.v0.png`)
+- Data lifecycle: [Data Architecture – Event Pipeline](DataArchitecture.md#event-driven-data-pipeline) with interactive Mermaid sequence diagrams
+- Contact center routing: [ACS Barge-In Flow](ACSBargeInFlow.md) featuring step-by-step diagrams and Mermaid flows
+- Authentication flows: [Auth for HTTP and WSS](AuthForHTTPandWSS.md#end-to-end-authentication-flow) detailing OAuth and shared access tokens
+
+## Architecture at a Glance
+
+```mermaid
+flowchart LR
+    Caller[Caller / Client] --> ACS[Azure Communication Services]
+    ACS --> Speech[Azure Speech Services]
+    Speech --> FastAPI[FastAPI Real-Time Agent]
+    FastAPI -->|Intent & Context| LLM[Azure OpenAI / GPT-4o]
+    FastAPI --> Redis[(Redis Session State)]
+    FastAPI --> Monitoring[Azure Monitor & App Insights]
+    FastAPI --> Downstream[External Integrations]
+    classDef azure fill:#1c75bc,stroke:#0f3c62,color:#fff;
+    class ACS,Speech azure;
+```
+
+The architecture section contains comprehensive Mermaid diagrams for every critical workflow. When rendered on GitHub Pages, these charts remain interactive—hover to highlight paths or copy the diagram source to integrate into your own documentation.
+
+## Next Steps
+
+If you are new to the project, start with the [Quick Start Guide](getting-started/quickstart.md) and then dive into the [Architecture Deep Dive](Architecture.md) for system context. Platform operators should review the [Deployment Guide](DeploymentGuide.md) and [Troubleshooting](Troubleshooting.md) playbooks, while solution teams can jump into the [Integration Points](IntegrationPoints.md) and [Healthcare Playbooks](HealthcareUsecases.md) for domain-specific patterns.
diff --git a/infra/terraform/ai-foundry.tf b/infra/terraform/ai-foundry.tf
index c309c8ae..59cef31f 100644
--- a/infra/terraform/ai-foundry.tf
+++ b/infra/terraform/ai-foundry.tf
@@ -1,5 +1,5 @@
 module "ai_foundry" {
-  source = "./modules/ai"
+  source = "./modules/aifoundry"
 
   resource_group_name = azurerm_resource_group.main.name
   location            = azurerm_resource_group.main.location
@@ -16,10 +16,31 @@ module "ai_foundry" {
   model_deployments = var.model_deployments
 
   log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id
-  account_principal_ids = distinct([
-    azurerm_user_assigned_identity.backend.principal_id,
-    azurerm_user_assigned_identity.frontend.principal_id,
-    azapi_resource.acs.identity[0].principal_id,
-    local.principal_id
-  ])
+  account_principal_ids = {
+    backend_identity  = azurerm_user_assigned_identity.backend.principal_id
+    frontend_identity = azurerm_user_assigned_identity.frontend.principal_id
+    acs_identity      = azapi_resource.acs.identity[0].principal_id
+    deployer_identity = local.principal_id
+  }
+
+  depends_on = [ azurerm_resource_group.main ]
+}
+
+
+resource "azurerm_monitor_diagnostic_setting" "ai_foundry_account" {
+  name                       = "${local.resource_names.foundry_account}-diagnostics"
+  target_resource_id         = module.ai_foundry.account_id
+  log_analytics_workspace_id = azurerm_log_analytics_workspace.main.id
+
+  enabled_log {
+    category = "Audit"
+  }
+
+  enabled_log {
+    category = "RequestResponse"
+  }
+
+  enabled_metric {
+    category = "AllMetrics"
+  }
 }
diff --git a/infra/terraform/modules/ai/foundry.tf b/infra/terraform/modules/aifoundry/main.tf
similarity index 56%
rename from infra/terraform/modules/ai/foundry.tf
rename to infra/terraform/modules/aifoundry/main.tf
index 499e1276..3d5c525c 100644
--- a/infra/terraform/modules/ai/foundry.tf
+++ b/infra/terraform/modules/aifoundry/main.tf
@@ -1,18 +1,7 @@
 # Terraform module for provisioning Azure AI Foundry aligned with the ai-services deployment.
 
 locals {
-  account_name_raw          = lower(trimspace(var.foundry_account_name))
-  custom_subdomain_name_raw = var.foundry_custom_subdomain_name != null && trimspace(var.foundry_custom_subdomain_name) != "" ? lower(trimspace(var.foundry_custom_subdomain_name)) : local.account_name_raw
-
-  project_name_raw = var.project_name != null && trimspace(var.project_name) != "" ? lower(trimspace(var.project_name)) : "${local.account_name_raw}-project"
-
-  project_display_name_raw = var.project_display_name != null && trimspace(var.project_display_name) != "" ? trimspace(var.project_display_name) : local.project_name_raw
-
-  project_description_raw = var.project_description != null && trimspace(var.project_description) != "" ? trimspace(var.project_description) : "Azure AI Foundry project ${local.project_display_name_raw}"
-
   project_id_guid = "${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 0, 8)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 8, 4)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 12, 4)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 16, 4)}-${substr(azapi_resource.ai_foundry_project.output.properties.internalId, 20, 12)}"
-
-  account_principal_map = { for idx, pid in tolist(nonsensitive(var.account_principal_ids)) : idx => pid if pid != null && pid != "" }
 }
 
 data "azurerm_resource_group" "rg" {
@@ -21,7 +10,7 @@ data "azurerm_resource_group" "rg" {
 
 resource "azapi_resource" "ai_foundry_account" {
   type                      = "Microsoft.CognitiveServices/accounts@2025-06-01"
-  name                      = local.account_name_raw
+  name                      = var.foundry_account_name
   parent_id                 = data.azurerm_resource_group.rg.id
   location                  = var.location
   schema_validation_enabled = false
@@ -38,29 +27,11 @@ resource "azapi_resource" "ai_foundry_account" {
     properties = {
       allowProjectManagement = true
       disableLocalAuth       = var.disable_local_auth
-      customSubDomainName    = local.custom_subdomain_name_raw
+      customSubDomainName    = var.foundry_custom_subdomain_name
     }
   }
 }
 
-resource "azurerm_monitor_diagnostic_setting" "ai_foundry_account" {
-  count                      = var.log_analytics_workspace_id != null && var.log_analytics_workspace_id != "" ? 1 : 0
-  name                       = "${local.account_name_raw}-diagnostics"
-  target_resource_id         = azapi_resource.ai_foundry_account.id
-  log_analytics_workspace_id = var.log_analytics_workspace_id
-
-  enabled_log {
-    category = "Audit"
-  }
-
-  enabled_log {
-    category = "RequestResponse"
-  }
-
-  enabled_metric {
-    category = "AllMetrics"
-  }
-}
 
 resource "azurerm_cognitive_deployment" "model" {
   for_each = { for deployment in var.model_deployments : deployment.name => deployment }
@@ -82,7 +53,7 @@ resource "azurerm_cognitive_deployment" "model" {
 
 resource "azapi_resource" "ai_foundry_project" {
   type                      = "Microsoft.CognitiveServices/accounts/projects@2025-06-01"
-  name                      = local.project_name_raw
+  name                      = var.project_name
   parent_id                 = azapi_resource.ai_foundry_account.id
   location                  = var.location
   schema_validation_enabled = false
@@ -96,8 +67,8 @@ resource "azapi_resource" "ai_foundry_project" {
       name = var.project_sku_name
     }
     properties = {
-      displayName = local.project_display_name_raw
-      description = local.project_description_raw
+      displayName = var.project_display_name
+      description = var.project_description
     }
   }
   response_export_values = [
@@ -107,10 +78,9 @@ resource "azapi_resource" "ai_foundry_project" {
 }
 
 resource "azurerm_role_assignment" "ai_foundry_account" {
-  for_each = local.account_principal_map
+  for_each = var.account_principal_ids
 
   scope                = azapi_resource.ai_foundry_account.id
   role_definition_name = var.account_principal_role_definition_name
   principal_id         = each.value
 }
-
diff --git a/infra/terraform/modules/ai/outputs.tf b/infra/terraform/modules/aifoundry/outputs.tf
similarity index 100%
rename from infra/terraform/modules/ai/outputs.tf
rename to infra/terraform/modules/aifoundry/outputs.tf
diff --git a/infra/terraform/modules/ai/project_capability_host.tf b/infra/terraform/modules/aifoundry/project_capability_host.tf
similarity index 100%
rename from infra/terraform/modules/ai/project_capability_host.tf
rename to infra/terraform/modules/aifoundry/project_capability_host.tf
diff --git a/infra/terraform/modules/ai/project_connections.tf b/infra/terraform/modules/aifoundry/project_connections.tf
similarity index 100%
rename from infra/terraform/modules/ai/project_connections.tf
rename to infra/terraform/modules/aifoundry/project_connections.tf
diff --git a/infra/terraform/modules/ai/providers.tf b/infra/terraform/modules/aifoundry/providers.tf
similarity index 67%
rename from infra/terraform/modules/ai/providers.tf
rename to infra/terraform/modules/aifoundry/providers.tf
index 030715df..7e498943 100644
--- a/infra/terraform/modules/ai/providers.tf
+++ b/infra/terraform/modules/aifoundry/providers.tf
@@ -11,9 +11,9 @@ terraform {
   }
 }
 
-provider "azurerm" {
-  features {}
-  storage_use_azuread = true
-}
+# provider "azurerm" {
+#   features {}
+#   storage_use_azuread = true
+# }
 
-provider "azapi" {}
+# provider "azapi" {}
diff --git a/infra/terraform/modules/ai/variables.tf b/infra/terraform/modules/aifoundry/variables.tf
similarity index 95%
rename from infra/terraform/modules/ai/variables.tf
rename to infra/terraform/modules/aifoundry/variables.tf
index 3c357ff8..c4a13f8b 100644
--- a/infra/terraform/modules/ai/variables.tf
+++ b/infra/terraform/modules/aifoundry/variables.tf
@@ -90,9 +90,9 @@ variable "log_analytics_workspace_id" {
 }
 
 variable "account_principal_ids" {
-  description = "Principal IDs to assign Cognitive Services access to the AI Foundry account."
-  type        = list(string)
-  default     = []
+  description = "Map of principals to assign Cognitive Services access to the AI Foundry account (keys should be stable labels)."
+  type        = map(string)
+  default     = {}
 }
 
 variable "account_principal_role_definition_name" {
@@ -137,4 +137,4 @@ variable "ai_search_endpoint" {
   description = "Optional Azure AI Search resource endpoint for AI Foundry to use for search capabilities."
   type        = string
   default     = null
-}
\ No newline at end of file
+}
diff --git a/infra/terraform/variables.tf b/infra/terraform/variables.tf
index 594a926b..f38ed66e 100644
--- a/infra/terraform/variables.tf
+++ b/infra/terraform/variables.tf
@@ -221,7 +221,7 @@ variable "container_memory_gb" {
 variable "aoai_pool_size" {
   description = "Size of the Azure OpenAI client pool for optimal performance"
   type        = number
-  default     = 50
+  default     = 5
   validation {
     condition     = var.aoai_pool_size >= 5 && var.aoai_pool_size <= 200
     error_message = "AOAI pool size must be between 5 and 200."
@@ -231,7 +231,7 @@ variable "aoai_pool_size" {
 variable "tts_pool_size" {
   description = "Size of the TTS client pool for optimal performance"
   type        = number
-  default     = 100
+  default     = 10
   validation {
     condition     = var.tts_pool_size >= 10 && var.tts_pool_size <= 500
     error_message = "TTS pool size must be between 10 and 500."
@@ -241,7 +241,7 @@ variable "tts_pool_size" {
 variable "stt_pool_size" {
   description = "Size of the STT client pool for optimal performance"
   type        = number
-  default     = 100
+  default     = 10
   validation {
     condition     = var.stt_pool_size >= 10 && var.stt_pool_size <= 500
     error_message = "STT pool size must be between 10 and 500."
diff --git a/mkdocs.yml b/mkdocs.yml
index cb748faa..f22f5223 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -51,22 +51,31 @@ plugins:
 nav:
   - Home: index.md
   - Getting Started:
-    - Installation: getting-started/installation.md
-    - Quick Start: getting-started/quickstart.md
-    - Configuration: getting-started/configuration.md
+    - Quick Start Guide: getting-started/quickstart.md
+    - Local Development Playbook: quickstart-local-development.md
+    - Installation Checklist: getting-started/installation.md
+    - Repository Tour: repo-structure.md
+  - Architecture & Flows:
+    - Architecture Deep Dive: Architecture.md
+    - Data Architecture: DataArchitecture.md
+    - Integration Points: IntegrationPoints.md
+    - ACS Barge-In Flow: ACSBargeInFlow.md
+  - Security & Access:
+    - Auth for HTTP and WSS: AuthForHTTPandWSS.md
+    - WebSocket Authentication: WebsocketAuth.md
+  - Operations & Delivery:
+    - Deployment Guide: DeploymentGuide.md
+    - Path to Production: PathToProduction.md
+    - CI/CD Runbook: CICDGuide.md
+    - Application Insights: ApplicationInsights.md
+    - Troubleshooting Handbook: Troubleshooting.md
+  - Testing & Load:
+    - Load Testing Strategy: LoadTesting.md
+  - Industry Solutions:
+    - Healthcare Playbooks: HealthcareUsecases.md
   - API Reference:
     - Overview: api/overview.md
     - Speech Synthesis: api/speech-synthesis.md
-    - Speech Recognition: api/speech-recognition.md
-    - Utilities: api/utilities.md
-  - Architecture:
-    - Overview: architecture/overview.md
-    - Azure Integration: architecture/azure-integration.md
-    - Observability: architecture/observability.md
-  - Examples:
-    - Basic Usage: examples/basic-usage.md
-    - Advanced Scenarios: examples/advanced-scenarios.md
-    - Production Deployment: examples/production.md
 
 markdown_extensions:
   - pymdownx.highlight:
diff --git a/src/redis/manager.py b/src/redis/manager.py
index 5ef09c74..4386b9a0 100644
--- a/src/redis/manager.py
+++ b/src/redis/manager.py
@@ -1,17 +1,27 @@
-from opentelemetry import trace
-from opentelemetry.trace import SpanKind
 import asyncio
+import ipaddress
 import os
 import threading
 import time
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
 
-from utils.azure_auth import get_credential
+from opentelemetry import trace
+from opentelemetry.trace import SpanKind
 
 import redis
-from redis.exceptions import AuthenticationError
+from redis.exceptions import AuthenticationError, MovedError
+
+from utils.azure_auth import get_credential
 from utils.ml_logging import get_logger
 
+try:  # redis-py always provides cluster module from v5+, keep guard for safety
+    from redis.cluster import RedisCluster
+except ImportError:  # pragma: no cover - only in legacy environments
+    RedisCluster = None  # type: ignore[assignment]
+
+
+T = TypeVar("T")
+
 
 class AzureRedisManager:
     """
@@ -68,6 +78,15 @@ def __init__(
         )
         self.user_name = user_name or os.getenv("REDIS_USER_NAME") or "user"
         self._auth_expires_at = 0  # For AAD token refresh tracking
+        self.token_expiry = 0
+
+        # Cluster configuration
+        self._cluster_preference = self._parse_optional_bool(
+            os.getenv("REDIS_USE_CLUSTER")
+        )
+        self._cluster_auto = self._cluster_preference is True
+        self._using_cluster = False
+        self._client_lock = threading.RLock()
 
         # Build initial client and, if using AAD, start a refresh thread
         self._create_client()
@@ -84,15 +103,30 @@ async def initialize(self) -> None:
         """
         try:
             self.logger.info(f"Validating Redis connection to {self.host}:{self.port}")
-
-            # Validate connection with health check
-            loop = asyncio.get_event_loop()
-            ping_result = await loop.run_in_executor(None, self._health_check)
-
-            if ping_result:
-                self.logger.info("✅ Redis connection validated successfully")
-            else:
-                raise ConnectionError("Redis health check failed")
+            # Ensure a client exists and perform a quick ping; recreate on failure.
+            try:
+                if not getattr(self, "redis_client", None):
+                    self.logger.info("Redis client not present during initialize — creating client.")
+                    self.__init__()
+                else:
+                    try:
+                        # use a short timeout to avoid blocking startup
+                        ok = self._health_check()
+                    except asyncio.TimeoutError:
+                        self.logger.warning("Redis ping timed out during initialize; recreating client.")
+                        self._create_client()
+                    except AuthenticationError:
+                        self.logger.info("Redis authentication failed during initialize; recreating client.")
+                        self._create_client()
+                    except Exception as e:
+                        # Non-fatal here; let the subsequent health check determine final status
+                        self.logger.debug("Non-fatal error during quick ping check: %s", e)
+                    else:
+                        if not ok:
+                            self.logger.info("Redis ping returned False during initialize; recreating client.")
+                            self._create_client()
+            except Exception as e:
+                self.logger.error("Unexpected error during Redis pre-initialization check: %s", e)
 
         except Exception as e:
             self.logger.error(f"Redis initialization failed: {e}")
@@ -104,14 +138,16 @@ def _health_check(self) -> bool:
         """
         try:
             # Basic connectivity test
-            if not self.redis_client.ping():
+            if not self._execute_with_redirect("PING", lambda client: client.ping()):
                 return False
 
             # Test basic operations
             test_key = "health_check_test"
-            self.redis_client.set(test_key, "test_value", ex=5)
-            result = self.redis_client.get(test_key)
-            self.redis_client.delete(test_key)
+            self._execute_with_redirect(
+                "SET", lambda client: client.set(test_key, "test_value", ex=5)
+            )
+            result = self._execute_with_redirect("GET", lambda client: client.get(test_key))
+            self._execute_with_redirect("DEL", lambda client: client.delete(test_key))
 
             return result == "test_value"
 
@@ -133,42 +169,210 @@ def _redis_span(self, name: str, op: str | None = None):
             },
         )
 
-    def _create_client(self):
-        """(Re)create self.redis_client and record expiry for AAD."""
+    @staticmethod
+    def _parse_optional_bool(value: Optional[str]) -> Optional[bool]:
+        if value is None:
+            return None
+        normalized = value.strip().lower()
+        if normalized in {"1", "true", "yes", "on"}:
+            return True
+        if normalized in {"0", "false", "no", "off"}:
+            return False
+        return None
+
+    def _resolve_cluster(self, force_cluster: Optional[bool]) -> bool:
+        if force_cluster is not None:
+            return force_cluster
+        if self._cluster_preference is not None:
+            return self._cluster_preference
+        return self._cluster_auto
+
+    def _build_auth_kwargs(self) -> Dict[str, Any]:
+        if self.access_key:
+            return {"password": self.access_key}
+
+        token = self.credential.get_token(self.scope)
+        self.token_expiry = token.expires_on
+        return {"username": self.user_name, "password": token.token}
+
+    def _build_standard_client(
+        self, host: str, port: Optional[int], auth_kwargs: Dict[str, Any]
+    ) -> redis.Redis:
+        client = redis.Redis(
+            host=host,
+            port=port,
+            db=self.db,
+            ssl=self.ssl,
+            decode_responses=True,
+            socket_keepalive=True,
+            health_check_interval=30,
+            socket_connect_timeout=2.0,
+            socket_timeout=1.0,
+            max_connections=200,
+            client_name="rtagent-api",
+            **auth_kwargs,
+        )
         if self.access_key:
-            # static key-based auth
-            self.redis_client = redis.Redis(
-                host=self.host,
-                port=self.port,
-                db=self.db,
-                password=self.access_key,
-                ssl=self.ssl,
-                decode_responses=True,
-                socket_keepalive=True,
-                health_check_interval=30,
-                socket_connect_timeout=0.2,
-                socket_timeout=1.0,
-                max_connections=200,
-                client_name="rtagent-api",
-            )
             self.logger.info("Azure Redis connection initialized with access key.")
         else:
-            # get fresh AAD token
-            token = self.credential.get_token(self.scope)
-            self.token_expiry = token.expires_on
-            self.redis_client = redis.Redis(
-                host=self.host,
-                port=self.port,
-                db=self.db,
-                username=self.user_name,
-                password=token.token,
-                ssl=self.ssl,
-                decode_responses=True,
-            )
             self.logger.info(
                 "Azure Redis connection initialized with AAD token (expires at %s).",
                 self.token_expiry,
             )
+        return client
+
+    def _build_cluster_client(
+        self, host: str, port: Optional[int], auth_kwargs: Dict[str, Any]
+    ) -> "RedisCluster":
+        if RedisCluster is None:
+            raise RuntimeError("redis-py cluster support unavailable")
+
+        client = RedisCluster(
+            host=host,
+            port=port or 6379,
+            ssl=self.ssl,
+            decode_responses=True,
+            socket_keepalive=True,
+            health_check_interval=30,
+            socket_connect_timeout=2.0,
+            socket_timeout=1.0,
+            max_connections=200,
+            client_name="rtagent-api",
+            require_full_coverage=False,
+            address_remap=self._remap_cluster_address,
+            **auth_kwargs,
+        )
+        if self.access_key:
+            self.logger.info(
+                "Azure Redis cluster client initialized with access key (startup %s:%s).",
+                host,
+                port,
+            )
+        else:
+            self.logger.info(
+                "Azure Redis cluster client initialized with AAD token (expires at %s).",
+                self.token_expiry,
+            )
+        return client
+
+    def _execute_with_redirect(
+        self, command: str, operation: Callable[[redis.Redis], T]
+    ) -> T:
+        try:
+            return operation(self.redis_client)
+        except MovedError as err:
+            return self._handle_cluster_redirect(command, operation, err)
+
+    @staticmethod
+    def _is_ip_address(value: str) -> bool:
+        try:
+            ipaddress.ip_address(value)
+        except ValueError:
+            return False
+        return True
+
+    def _remap_cluster_address(self, address: Tuple[str, int]) -> Tuple[str, int]:
+        host, port = address
+        if self._is_ip_address(host):
+            return (self.host, port)
+        return address
+
+    def _handle_cluster_redirect(
+        self,
+        command: str,
+        operation: Callable[[redis.Redis], T],
+        err: MovedError,
+    ) -> T:
+        details = f"slot {err.slot_id} -> {err.host}:{err.port}"
+        self.logger.warning(
+            "Redis MOVED error on %s (%s). Switching to cluster-aware client.",
+            command,
+            details,
+        )
+        if RedisCluster is None:
+            self.logger.error(
+                "redis-py cluster support is unavailable; unable to honor MOVED redirect."
+            )
+            raise err
+
+        attempts: List[Tuple[Optional[str], Optional[int]]] = []
+        if getattr(err, "port", None) is not None:
+            attempts.append((self.host, int(err.port)))
+        attempts.append((self.host, self.port))
+
+        last_exc: Optional[Exception] = None
+        tried: set[tuple[str, Optional[int]]] = set()
+        for host, port in attempts:
+            key = (host, port)
+            if key in tried or host is None or port is None:
+                continue
+            tried.add(key)
+            try:
+                self._create_client(
+                    force_cluster=True, host_override=host, port_override=port
+                )
+                break
+            except Exception as exc:  # pragma: no cover - dependent on runtime config
+                last_exc = exc
+                self.logger.debug(
+                    "Redis cluster initialization attempt using %s:%s failed: %s",
+                    host,
+                    port,
+                    exc,
+                )
+        else:
+            if last_exc:
+                raise last_exc
+            raise err
+
+        return operation(self.redis_client)
+
+    def _create_client(
+        self,
+        force_cluster: Optional[bool] = None,
+        host_override: Optional[str] = None,
+        port_override: Optional[int] = None,
+    ) -> None:
+        host = host_override or self.host
+        port = port_override if port_override is not None else self.port
+
+        with self._client_lock:
+            use_cluster = self._resolve_cluster(force_cluster)
+            if use_cluster and RedisCluster is None:
+                if force_cluster:
+                    raise RuntimeError(
+                        "redis-py cluster support unavailable"
+                    )
+                self.logger.warning(
+                    "Redis cluster requested but redis-py cluster support unavailable; using single-node client."
+                )
+                use_cluster = False
+
+            auth_kwargs = self._build_auth_kwargs()
+            client: Optional[redis.Redis] = None
+            if use_cluster:
+                try:
+                    client = self._build_cluster_client(host, port, auth_kwargs)
+                    self._using_cluster = True
+                except Exception as exc:
+                    if force_cluster:
+                        raise
+                    self.logger.warning(
+                        "Failed to initialize Redis cluster client (%s); falling back to single-node client.",
+                        exc,
+                    )
+                    use_cluster = False
+
+            if not use_cluster:
+                client = self._build_standard_client(host, port, auth_kwargs)
+                self._using_cluster = False
+
+            if client is None:  # pragma: no cover - defensive guard
+                raise RuntimeError("Failed to create Redis client")
+
+            self.redis_client = client
+            if self._cluster_preference is None:
+                self._cluster_auto = self._using_cluster
 
     def _refresh_loop(self):
         """Background thread: sleep until just before expiry, then refresh token."""
@@ -188,7 +392,9 @@ def _refresh_loop(self):
     def publish_event(self, stream_key: str, event_data: Dict[str, Any]) -> str:
         """Append an event to a Redis stream."""
         with self._redis_span("Redis.XADD"):
-            return self.redis_client.xadd(stream_key, event_data)
+            return self._execute_with_redirect(
+                "XADD", lambda client: client.xadd(stream_key, event_data)
+            )
 
     def read_events_blocking(
         self,
@@ -202,8 +408,11 @@ def read_events_blocking(
         Returns list of new events (or None on timeout).
         """
         with self._redis_span("Redis.XREAD"):
-            streams = self.redis_client.xread(
-                {stream_key: last_id}, block=block_ms, count=count
+            streams = self._execute_with_redirect(
+                "XREAD",
+                lambda client: client.xread(
+                    {stream_key: last_id}, block=block_ms, count=count
+                ),
             )
             return streams if streams else None
 
@@ -231,13 +440,15 @@ async def ping(self) -> bool:
         """Check Redis connectivity."""
         try:
             with self._redis_span("Redis.PING"):
-                return self.redis_client.ping()
+                return self._execute_with_redirect("PING", lambda client: client.ping())
         except AuthenticationError:
             # token might have expired early: rebuild & retry once
             self.logger.info("Redis auth error on ping, refreshing token")
-            self._create_client()
+            self._create_client(force_cluster=self._using_cluster)
             with self._redis_span("Redis.PING"):
-                return self.redis_client.ping()
+                return self._execute_with_redirect(
+                    "PING", lambda client: client.ping()
+                )
 
     def set_value(
         self, key: str, value: str, ttl_seconds: Optional[int] = None
@@ -245,40 +456,60 @@ def set_value(
         """Set a string value in Redis (optionally with TTL)."""
         with self._redis_span("Redis.SET"):
             if ttl_seconds is not None:
-                return self.redis_client.setex(key, ttl_seconds, str(value))
-            return self.redis_client.set(key, str(value))
+                return self._execute_with_redirect(
+                    "SETEX",
+                    lambda client: client.setex(key, ttl_seconds, str(value)),
+                )
+            return self._execute_with_redirect(
+                "SET", lambda client: client.set(key, str(value))
+            )
 
     def get_value(self, key: str) -> Optional[str]:
         """Get a string value from Redis."""
         with self._redis_span("Redis.GET"):
-            value = self.redis_client.get(key)
+            value = self._execute_with_redirect("GET", lambda client: client.get(key))
             return value.decode() if isinstance(value, bytes) else value
 
     def store_session_data(self, session_id: str, data: Dict[str, Any]) -> bool:
         """Store session data using a Redis hash."""
         with self._redis_span("Redis.HSET"):
-            return bool(self.redis_client.hset(session_id, mapping=data))
+            return bool(
+                self._execute_with_redirect(
+                    "HSET", lambda client: client.hset(session_id, mapping=data)
+                )
+            )
 
     def get_session_data(self, session_id: str) -> Dict[str, str]:
         """Retrieve all session data for a given session ID."""
         with self._redis_span("Redis.HGETALL"):
-            raw = self.redis_client.hgetall(session_id)
+            raw = self._execute_with_redirect(
+                "HGETALL", lambda client: client.hgetall(session_id)
+            )
             return dict(raw)
 
     def update_session_field(self, session_id: str, field: str, value: str) -> bool:
         """Update a single field in the session hash."""
         with self._redis_span("Redis.HSET"):
-            return bool(self.redis_client.hset(session_id, field, value))
+            return bool(
+                self._execute_with_redirect(
+                    "HSET",
+                    lambda client: client.hset(session_id, field, value),
+                )
+            )
 
     def delete_session(self, session_id: str) -> int:
         """Delete a session from Redis."""
         with self._redis_span("Redis.DEL"):
-            return self.redis_client.delete(session_id)
+            return self._execute_with_redirect(
+                "DEL", lambda client: client.delete(session_id)
+            )
 
     def list_connected_clients(self) -> List[Dict[str, str]]:
         """List currently connected clients."""
         with self._redis_span("Redis.CLIENTLIST"):
-            return self.redis_client.client_list()
+            return self._execute_with_redirect(
+                "CLIENT LIST", lambda client: client.client_list()
+            )
 
     async def store_session_data_async(
         self, session_id: str, data: Dict[str, Any]
diff --git a/tests/load/multi_turn_load_test.py b/tests/load/multi_turn_load_test.py
index 87268ebf..f7bc2628 100644
--- a/tests/load/multi_turn_load_test.py
+++ b/tests/load/multi_turn_load_test.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 from datetime import datetime
 
-from utils.load_test_conversations import ConversationLoadTester, LoadTestConfig
+from tests.load.utils.load_test_conversations import ConversationLoadTester, LoadTestConfig
 
 
 class MultiTurnLoadTest:
diff --git a/tests/load/utils/load_test_conversations.py b/tests/load/utils/load_test_conversations.py
index 22c8166f..55e96629 100644
--- a/tests/load/utils/load_test_conversations.py
+++ b/tests/load/utils/load_test_conversations.py
@@ -17,7 +17,7 @@
 import statistics
 from pathlib import Path
 
-from utils.conversation_simulator import (
+from tests.load.utils.conversation_simulator import (
     ConversationSimulator, 
     ConversationTemplates, 
     ConversationMetrics,
diff --git a/tests/test_acs_events_handlers.py b/tests/test_acs_events_handlers.py
index 3fcc8cc5..7004f48f 100644
--- a/tests/test_acs_events_handlers.py
+++ b/tests/test_acs_events_handlers.py
@@ -5,11 +5,17 @@
 Focused tests for the refactored ACS events handling.
 """
 
+import sys
+
 import pytest
 import asyncio
 from unittest.mock import AsyncMock, MagicMock, patch
 from azure.core.messaging import CloudEvent
 
+# The Lvagent audio stack depends on sounddevice, which is unavailable in CI.
+# Inject a stub before importing handlers so tests can load without native deps.
+sys.modules.setdefault("sounddevice", MagicMock())
+
 from apps.rtagent.backend.api.v1.events.handlers import CallEventHandlers
 from apps.rtagent.backend.api.v1.events.types import (
     CallEventContext,
@@ -18,6 +24,11 @@
 )
 
 
+def run_async(coro):
+    """Execute coroutine in a fresh event loop for pytest compatibility."""
+    return asyncio.run(coro)
+
+
 class TestCallEventHandlers:
     """Test individual event handlers."""
 
@@ -37,10 +48,12 @@ def mock_context(self):
         )
         context.memo_manager = MagicMock()
         context.redis_mgr = MagicMock()
+        context.app_state = MagicMock()
+        context.app_state.redis_pool = None
         return context
 
     @patch("apps.rtagent.backend.api.v1.events.handlers.logger")
-    async def test_handle_call_initiated(self, mock_logger, mock_context):
+    def test_handle_call_initiated(self, mock_logger, mock_context):
         """Test call initiated handler."""
         mock_context.event_type = V1EventTypes.CALL_INITIATED
         mock_context.event.data = {
@@ -49,7 +62,7 @@ async def test_handle_call_initiated(self, mock_logger, mock_context):
             "api_version": "v1",
         }
 
-        await CallEventHandlers.handle_call_initiated(mock_context)
+        run_async(CallEventHandlers.handle_call_initiated(mock_context))
 
         # Verify context updates
         assert mock_context.memo_manager.update_context.called
@@ -63,7 +76,7 @@ async def test_handle_call_initiated(self, mock_logger, mock_context):
         assert updates["call_direction"] == "outbound"
 
     @patch("apps.rtagent.backend.api.v1.events.handlers.logger")
-    async def test_handle_inbound_call_received(self, mock_logger, mock_context):
+    def test_handle_inbound_call_received(self, mock_logger, mock_context):
         """Test inbound call received handler."""
         mock_context.event_type = V1EventTypes.INBOUND_CALL_RECEIVED
         mock_context.event.data = {
@@ -71,7 +84,7 @@ async def test_handle_inbound_call_received(self, mock_logger, mock_context):
             "from": {"kind": "phoneNumber", "phoneNumber": {"value": "+1987654321"}},
         }
 
-        await CallEventHandlers.handle_inbound_call_received(mock_context)
+        run_async(CallEventHandlers.handle_inbound_call_received(mock_context))
 
         # Verify context updates
         calls = mock_context.memo_manager.update_context.call_args_list
@@ -81,31 +94,38 @@ async def test_handle_inbound_call_received(self, mock_logger, mock_context):
         assert updates["caller_id"] == "+1987654321"
 
     @patch("apps.rtagent.backend.api.v1.events.handlers.logger")
-    async def test_handle_call_connected_with_broadcast(
+    def test_handle_call_connected_with_broadcast(
         self, mock_logger, mock_context
     ):
         """Test call connected handler with WebSocket broadcast."""
         mock_clients = [MagicMock(), MagicMock()]
         mock_context.clients = mock_clients
+        mock_call_conn = MagicMock()
+        mock_call_conn.list_participants.return_value = []
+        mock_context.acs_caller = MagicMock()
+        mock_context.acs_caller.get_call_connection.return_value = mock_call_conn
 
         with patch(
             "apps.rtagent.backend.api.v1.events.handlers.broadcast_message"
-        ) as mock_broadcast:
-            await CallEventHandlers.handle_call_connected(mock_context)
+        ) as mock_broadcast, patch(
+            "apps.rtagent.backend.api.v1.events.handlers.DTMFValidationLifecycle.setup_aws_connect_validation_flow",
+            new=AsyncMock(),
+        ):
+            run_async(CallEventHandlers.handle_call_connected(mock_context))
 
             mock_broadcast.assert_called_once()
             # Verify message structure
-            args = mock_broadcast.call_args
-            assert args[0][0] == mock_clients  # clients
+            call_args, call_kwargs = mock_broadcast.call_args
+            assert call_args[0] is None
             # Message should be JSON string
             import json
 
-            message = json.loads(args[0][1])
+            message = json.loads(call_args[1])
             assert message["type"] == "call_connected"
             assert message["call_connection_id"] == "test_123"
 
     @patch("apps.rtagent.backend.api.v1.events.handlers.logger")
-    async def test_handle_dtmf_tone_received(self, mock_logger, mock_context):
+    def test_handle_dtmf_tone_received(self, mock_logger, mock_context):
         """Test DTMF tone handling."""
         mock_context.event_type = ACSEventTypes.DTMF_TONE_RECEIVED
         mock_context.event.data = {
@@ -117,26 +137,26 @@ async def test_handle_dtmf_tone_received(self, mock_logger, mock_context):
         # Mock current sequence
         mock_context.memo_manager.get_context.return_value = "123"
 
-        await CallEventHandlers.handle_dtmf_tone_received(mock_context)
+        run_async(CallEventHandlers.handle_dtmf_tone_received(mock_context))
 
         # Should update DTMF sequence
         mock_context.memo_manager.update_context.assert_called()
 
-    async def test_extract_caller_id_phone_number(self):
+    def test_extract_caller_id_phone_number(self):
         """Test caller ID extraction from phone number."""
         caller_info = {"kind": "phoneNumber", "phoneNumber": {"value": "+1234567890"}}
 
         caller_id = CallEventHandlers._extract_caller_id(caller_info)
         assert caller_id == "+1234567890"
 
-    async def test_extract_caller_id_raw_id(self):
+    def test_extract_caller_id_raw_id(self):
         """Test caller ID extraction from raw ID."""
         caller_info = {"kind": "other", "rawId": "user@domain.com"}
 
         caller_id = CallEventHandlers._extract_caller_id(caller_info)
         assert caller_id == "user@domain.com"
 
-    async def test_extract_caller_id_fallback(self):
+    def test_extract_caller_id_fallback(self):
         """Test caller ID extraction fallback."""
         caller_info = {}
 
@@ -148,7 +168,7 @@ class TestEventProcessingFlow:
     """Test event processing flow."""
 
     @patch("apps.rtagent.backend.api.v1.events.handlers.logger")
-    async def test_webhook_event_routing(self, mock_logger):
+    def test_webhook_event_routing(self, mock_logger):
         """Test webhook event router."""
         event = CloudEvent(
             source="test",
@@ -163,11 +183,11 @@ async def test_webhook_event_routing(self, mock_logger):
         )
 
         with patch.object(CallEventHandlers, "handle_call_connected") as mock_handler:
-            await CallEventHandlers.handle_webhook_events(context)
+            run_async(CallEventHandlers.handle_webhook_events(context))
             mock_handler.assert_called_once_with(context)
 
     @patch("apps.rtagent.backend.api.v1.events.handlers.logger")
-    async def test_unknown_event_type_handling(self, mock_logger):
+    def test_unknown_event_type_handling(self, mock_logger):
         """Test handling of unknown event types."""
         event = CloudEvent(
             source="test",
@@ -180,7 +200,7 @@ async def test_unknown_event_type_handling(self, mock_logger):
         )
 
         # Should handle gracefully without error
-        await CallEventHandlers.handle_webhook_events(context)
+        run_async(CallEventHandlers.handle_webhook_events(context))
 
         # No specific handler should be called for unknown type
         # This should just log and continue
diff --git a/tests/test_acs_media_lifecycle.py b/tests/test_acs_media_lifecycle.py
index 146abf31..5f10a890 100644
--- a/tests/test_acs_media_lifecycle.py
+++ b/tests/test_acs_media_lifecycle.py
@@ -1,750 +1,279 @@
-"""
-Tests for ACS Media Lifecycle Three-Thread Architecture
-======================================================
+"""Unit tests for ACS media lifecycle components aligned with the current implementation."""
 
-Tests the complete V1 ACS Media Handler implementation including:
-- Three-thread architecture (Speech SDK, Route Turn, Main Event Loop)
-- Cross-thread communication via ThreadBridge
-- Barge-in detection and cancellation
-- Speech recognition callback handling
-- Media message processing
-- Handler lifecycle management
-
-"""
-
-import pytest
 import asyncio
-import json
 import base64
-import threading
-import time
-from unittest.mock import Mock, AsyncMock, MagicMock, patch, call
-from typing import Optional, Dict, Any
+import json
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
+
+import pytest
 
-# Import the classes under test
 from apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle import (
-    ACSMediaHandler,
-    ThreadBridge,
-    SpeechSDKThread,
-    RouteTurnThread,
     MainEventLoop,
+    RouteTurnThread,
     SpeechEvent,
     SpeechEventType,
+    SpeechSDKThread,
+    ThreadBridge,
 )
 
 
-class MockWebSocket:
-    """Mock WebSocket for testing."""
-
-    def __init__(self):
-        self.sent_messages = []
-        self.closed = False
-
-    async def send_text(self, message: str):
-        """Mock send_text method."""
-        self.sent_messages.append(message)
-
-    async def close(self):
-        """Mock close method."""
-        self.closed = True
-
-
-class MockRecognizer:
-    """Mock speech recognizer for testing."""
+class DummyRecognizer:
+    """Lightweight recognizer test double that matches the current interface."""
 
     def __init__(self):
         self.started = False
         self.stopped = False
         self.callbacks = {}
+        self.push_stream = None
+        self.create_push_stream_called = False
+        self.prepare_stream_called = False
+        self.prepare_start_called = False
         self.write_bytes_calls = []
 
     def set_partial_result_callback(self, callback):
-        """Mock partial result callback setter."""
         self.callbacks["partial"] = callback
 
     def set_final_result_callback(self, callback):
-        """Mock final result callback setter."""
         self.callbacks["final"] = callback
 
     def set_cancel_callback(self, callback):
-        """Mock cancel callback setter."""
         self.callbacks["cancel"] = callback
 
+    def create_push_stream(self):
+        self.create_push_stream_called = True
+        self.push_stream = object()
+
+    def prepare_stream(self):
+        self.prepare_stream_called = True
+        self.push_stream = object()
+
+    def prepare_start(self):
+        self.prepare_start_called = True
+        self.push_stream = object()
+
     def start(self):
-        """Mock start method."""
         self.started = True
 
     def stop(self):
-        """Mock stop method."""
         self.stopped = True
 
-    def write_bytes(self, audio_bytes: bytes):
-        """Mock write_bytes method."""
-        self.write_bytes_calls.append(len(audio_bytes))
-
-    def trigger_partial(self, text: str, lang: str = "en-US"):
-        """Helper method to trigger partial callback."""
-        if "partial" in self.callbacks:
-            self.callbacks["partial"](text, lang)
-
-    def trigger_final(self, text: str, lang: str = "en-US"):
-        """Helper method to trigger final callback."""
-        if "final" in self.callbacks:
-            self.callbacks["final"](text, lang)
-
-    def trigger_error(self, error: str):
-        """Helper method to trigger error callback."""
-        if "cancel" in self.callbacks:
-            self.callbacks["cancel"](error)
-
-
-class MockOrchestrator:
-    """Mock orchestrator function for testing."""
-
-    def __init__(self):
-        self.calls = []
-        self.responses = ["Hello, how can I help you?"]
-        self.call_index = 0
-
-    async def __call__(self, cm, transcript: str, ws):
-        """Mock orchestrator call."""
-        self.calls.append({"transcript": transcript, "timestamp": time.time()})
-
-        # Return mock response
-        response = self.responses[self.call_index % len(self.responses)]
-        self.call_index += 1
-        return response
+    def write_bytes(self, data: bytes):
+        self.write_bytes_calls.append(data)
 
 
 @pytest.fixture
-def mock_websocket():
-    """Fixture providing a mock WebSocket."""
-    return MockWebSocket()
+def dummy_recognizer():
+    return DummyRecognizer()
 
 
-@pytest.fixture
-def mock_recognizer():
-    """Fixture providing a mock speech recognizer."""
-    return MockRecognizer()
+@pytest.mark.asyncio
+async def test_thread_bridge_queue_speech_result_put_nowait():
+    bridge = ThreadBridge(call_connection_id="call-12345678")
+    queue = asyncio.Queue()
+    event = SpeechEvent(
+        event_type=SpeechEventType.FINAL,
+        text="hello",
+        language="en-US",
+    )
 
+    bridge.queue_speech_result(queue, event)
 
-@pytest.fixture
-def mock_orchestrator():
-    """Fixture providing a mock orchestrator."""
-    return MockOrchestrator()
+    queued_event = await asyncio.wait_for(queue.get(), timeout=0.1)
+    assert queued_event.text == "hello"
+    assert queue.empty()
 
 
-@pytest.fixture
-def mock_memory_manager():
-    """Fixture providing a mock memory manager."""
-    return Mock()
-
-
-@pytest.fixture
-async def media_handler(
-    mock_websocket, mock_recognizer, mock_orchestrator, mock_memory_manager
-):
-    """Fixture providing a configured ACS Media Handler."""
-    with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"):
-        handler = ACSMediaHandler(
-            websocket=mock_websocket,
-            call_connection_id="test-call-123",
-            session_id="test-session-456",
-            recognizer=mock_recognizer,
-            orchestrator_func=mock_orchestrator,
-            memory_manager=mock_memory_manager,
-            greeting_text="Hello, welcome to our service!",
-        )
+@pytest.mark.asyncio
+async def test_thread_bridge_queue_speech_result_drops_when_full():
+    bridge = ThreadBridge(call_connection_id="call-abcdef01")
+    bridge.set_main_loop(asyncio.get_running_loop())
 
-        # Start the handler
-        await handler.start()
+    queue = asyncio.Queue(maxsize=1)
+    await queue.put("sentinel")
 
-        yield handler
-
-        # Cleanup
-        await handler.stop()
-
-
-class TestThreadBridge:
-    """Test ThreadBridge cross-thread communication."""
-
-    def test_initialization(self):
-        """Test ThreadBridge initialization."""
-        bridge = ThreadBridge()
-        assert bridge.main_loop is None
-
-    def test_set_main_loop(self):
-        """Test setting main event loop."""
-        bridge = ThreadBridge()
-        loop = asyncio.new_event_loop()
-
-        bridge.set_main_loop(loop)
-        assert bridge.main_loop is loop
-
-    @pytest.mark.asyncio
-    async def test_queue_speech_result_put_nowait(self):
-        """Test queuing speech result using put_nowait."""
-        bridge = ThreadBridge()
-        queue = asyncio.Queue(maxsize=10)
-
-        event = SpeechEvent(
-            event_type=SpeechEventType.FINAL, text="Hello world", language="en-US"
-        )
+    event = SpeechEvent(
+        event_type=SpeechEventType.PARTIAL,
+        text="queued",
+        language="en-US",
+    )
 
+    with patch(
+        "apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger.warning"
+    ) as warning_mock:
         bridge.queue_speech_result(queue, event)
 
-        # Verify event was queued
-        queued_event = await asyncio.wait_for(queue.get(), timeout=1.0)
-        assert queued_event.text == "Hello world"
-        assert queued_event.event_type == SpeechEventType.FINAL
+    await queue.get()
+    assert queue.empty()
+    warning_mock.assert_called_once()
 
-    @pytest.mark.asyncio
-    async def test_queue_speech_result_with_event_loop(self):
-        """Test queuing speech result with event loop fallback."""
-        bridge = ThreadBridge()
-        loop = asyncio.get_running_loop()
-        bridge.set_main_loop(loop)
 
-        # Create a full queue to force fallback
-        queue = asyncio.Queue(maxsize=1)
-        await queue.put("dummy_item")  # Fill the queue
-
-        event = SpeechEvent(
-            event_type=SpeechEventType.PARTIAL, text="Test", language="en-US"
-        )
-
-        # This should use the event loop fallback
-        bridge.queue_speech_result(queue, event)
-
-        # Remove dummy item and check for our event
-        await queue.get()  # Remove dummy
-        queued_event = await asyncio.wait_for(queue.get(), timeout=1.0)
-        assert queued_event.text == "Test"
-
-
-class TestSpeechSDKThread:
-    """Test SpeechSDKThread functionality."""
-
-    def test_initialization(self, mock_recognizer):
-        """Test SpeechSDKThread initialization."""
-        bridge = ThreadBridge()
-        speech_queue = asyncio.Queue()
-        barge_in_handler = AsyncMock()
+@pytest.mark.asyncio
+async def test_speechsdkthread_preinitializes_push_stream(dummy_recognizer):
+    bridge = ThreadBridge(call_connection_id="call-abcdef12")
+    speech_queue = asyncio.Queue()
+    barge_in_handler = AsyncMock()
 
+    with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"):
         thread = SpeechSDKThread(
-            recognizer=mock_recognizer,
+            recognizer=dummy_recognizer,
             thread_bridge=bridge,
             barge_in_handler=barge_in_handler,
             speech_queue=speech_queue,
         )
 
-        assert thread.recognizer is mock_recognizer
-        assert thread.thread_bridge is bridge
-        assert not thread.thread_running
-        assert not thread.recognizer_started
+    assert dummy_recognizer.create_push_stream_called or dummy_recognizer.push_stream
+    assert set(dummy_recognizer.callbacks) == {"partial", "final", "cancel"}
 
-    def test_callback_setup(self, mock_recognizer):
-        """Test speech recognition callback setup."""
-        bridge = ThreadBridge()
-        speech_queue = asyncio.Queue()
-        barge_in_handler = AsyncMock()
+    thread.stop()
 
-        thread = SpeechSDKThread(
-            recognizer=mock_recognizer,
-            thread_bridge=bridge,
-            barge_in_handler=barge_in_handler,
-            speech_queue=speech_queue,
-        )
-
-        # Verify callbacks were set
-        assert "partial" in mock_recognizer.callbacks
-        assert "final" in mock_recognizer.callbacks
-        assert "cancel" in mock_recognizer.callbacks
 
-    def test_prepare_thread(self, mock_recognizer):
-        """Test thread preparation."""
-        bridge = ThreadBridge()
-        speech_queue = asyncio.Queue()
-        barge_in_handler = AsyncMock()
+@pytest.mark.asyncio
+async def test_speechsdkthread_start_requires_thread_running(dummy_recognizer):
+    bridge = ThreadBridge(call_connection_id="call-abcdef12")
+    speech_queue = asyncio.Queue()
 
+    with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"):
         thread = SpeechSDKThread(
-            recognizer=mock_recognizer,
+            recognizer=dummy_recognizer,
             thread_bridge=bridge,
-            barge_in_handler=barge_in_handler,
+            barge_in_handler=AsyncMock(),
             speech_queue=speech_queue,
         )
 
-        thread.prepare_thread()
+    thread.start_recognizer()
+
+    assert not dummy_recognizer.started
+    assert not thread.recognizer_started
 
-        assert thread.thread_running
-        assert thread.thread_obj is not None
-        assert thread.thread_obj.is_alive()
+    thread.stop()
 
-        # Cleanup
-        thread.stop()
 
-    def test_start_recognizer(self, mock_recognizer):
-        """Test recognizer startup."""
-        bridge = ThreadBridge()
-        speech_queue = asyncio.Queue()
-        barge_in_handler = AsyncMock()
+@pytest.mark.asyncio
+async def test_speechsdkthread_prepare_then_start(dummy_recognizer):
+    bridge = ThreadBridge(call_connection_id="call-abcdef12")
+    speech_queue = asyncio.Queue()
 
+    with patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger"):
         thread = SpeechSDKThread(
-            recognizer=mock_recognizer,
+            recognizer=dummy_recognizer,
             thread_bridge=bridge,
-            barge_in_handler=barge_in_handler,
+            barge_in_handler=AsyncMock(),
             speech_queue=speech_queue,
         )
 
-        thread.prepare_thread()
-        thread.start_recognizer()
-
-        assert mock_recognizer.started
-        assert thread.recognizer_started
-
-        # Cleanup
-        thread.stop()
-
-
-class TestMainEventLoop:
-    """Test MainEventLoop media processing."""
-
-    @pytest.fixture
-    def main_event_loop(self, mock_websocket):
-        """Fixture for MainEventLoop."""
-        route_turn_thread = Mock()
-        return MainEventLoop(mock_websocket, "test-call-123", route_turn_thread)
-
-    @pytest.mark.asyncio
-    async def test_handle_audio_metadata(self, main_event_loop, mock_recognizer):
-        """Test AudioMetadata handling."""
-        acs_handler = Mock()
-        acs_handler.speech_sdk_thread = Mock()
-        acs_handler.speech_sdk_thread.start_recognizer = Mock()
-
-        stream_data = json.dumps(
-            {
-                "kind": "AudioMetadata",
-                "audioMetadata": {
-                    "subscriptionId": "test",
-                    "encoding": "PCM",
-                    "sampleRate": 16000,
-                    "channels": 1,
-                },
-            }
-        )
-
-        await main_event_loop.handle_media_message(
-            stream_data, mock_recognizer, acs_handler
-        )
-
-        # Verify recognizer was started
-        acs_handler.speech_sdk_thread.start_recognizer.assert_called_once()
-
-    @pytest.mark.asyncio
-    async def test_handle_audio_data(self, main_event_loop, mock_recognizer):
-        """Test AudioData processing."""
-        # Mock audio data (base64 encoded)
-        audio_bytes = b"\x00" * 320  # 20ms of silence
-        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
-
-        stream_data = json.dumps(
-            {"kind": "AudioData", "audioData": {"data": audio_b64, "silent": False}}
-        )
-
-        with patch.object(
-            main_event_loop, "_process_audio_chunk_async"
-        ) as mock_process:
-            await main_event_loop.handle_media_message(
-                stream_data, mock_recognizer, None
-            )
+    thread.prepare_thread()
+    await asyncio.sleep(0)
+    thread.start_recognizer()
 
-            # Give async task time to start
-            await asyncio.sleep(0.1)
+    assert dummy_recognizer.started
+    assert thread.recognizer_started
 
-            # Verify audio processing was scheduled
-            mock_process.assert_called_once()
+    thread.stop()
 
-    @pytest.mark.asyncio
-    async def test_process_audio_chunk_async(self, main_event_loop, mock_recognizer):
-        """Test audio chunk processing."""
-        audio_bytes = b"\x00" * 320
-        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
 
-        await main_event_loop._process_audio_chunk_async(audio_b64, mock_recognizer)
+@pytest.mark.asyncio
+async def test_main_event_loop_handles_audio_metadata():
+    mock_websocket = MagicMock()
+    mock_websocket.send_text = AsyncMock()
+    mock_websocket.state = MagicMock()
 
-        # Verify recognizer received audio
-        assert len(mock_recognizer.write_bytes_calls) == 1
-        assert mock_recognizer.write_bytes_calls[0] == 320
+    main_loop = MainEventLoop(mock_websocket, "call-abcdef12", None)
 
-    @pytest.mark.asyncio
-    async def test_barge_in_handling(self, main_event_loop):
-        """Test barge-in interruption."""
-        # Mock current playback task
-        main_event_loop.current_playback_task = AsyncMock()
-        main_event_loop.route_turn_thread = AsyncMock()
+    handler = MagicMock()
+    handler.speech_sdk_thread.start_recognizer = Mock()
+    handler.thread_bridge.queue_speech_result = Mock()
+    handler.speech_queue = asyncio.Queue()
+    handler.greeting_text = "Welcome!"
 
-        with patch.object(main_event_loop, "_send_stop_audio_command") as mock_stop:
-            await main_event_loop.handle_barge_in()
+    metadata_message = json.dumps(
+        {
+            "kind": "AudioMetadata",
+            "audioMetadata": {
+                "encoding": "PCM",
+                "sampleRate": 24000,
+                "channels": 1,
+            },
+        }
+    )
 
-            # Verify barge-in actions
-            main_event_loop.current_playback_task.cancel.assert_called_once()
-            main_event_loop.route_turn_thread.cancel_current_processing.assert_called_once()
-            mock_stop.assert_called_once()
+    await main_loop.handle_media_message(metadata_message, recognizer=None, acs_handler=handler)
 
+    handler.speech_sdk_thread.start_recognizer.assert_called_once()
+    handler.thread_bridge.queue_speech_result.assert_called_once()
+    assert main_loop.greeting_played
 
-class TestRouteTurnThread:
-    """Test RouteTurnThread conversation processing."""
+    await main_loop.handle_media_message(metadata_message, recognizer=None, acs_handler=handler)
+    handler.thread_bridge.queue_speech_result.assert_called_once()
 
-    @pytest.mark.asyncio
-    async def test_initialization(
-        self, mock_orchestrator, mock_memory_manager, mock_websocket
-    ):
-        """Test RouteTurnThread initialization."""
-        speech_queue = asyncio.Queue()
 
-        thread = RouteTurnThread(
-            speech_queue=speech_queue,
-            orchestrator_func=mock_orchestrator,
-            memory_manager=mock_memory_manager,
-            websocket=mock_websocket,
-        )
+@pytest.mark.asyncio
+async def test_main_event_loop_process_audio_chunk_async():
+    mock_websocket = MagicMock()
+    mock_websocket.send_text = AsyncMock()
+    mock_websocket.state = MagicMock()
 
-        assert thread.speech_queue is speech_queue
-        assert thread.orchestrator_func is mock_orchestrator
-        assert not thread.running
+    main_loop = MainEventLoop(mock_websocket, "call-abcdef12", None)
 
-    @pytest.mark.asyncio
-    async def test_speech_event_processing(
-        self, mock_orchestrator, mock_memory_manager, mock_websocket
-    ):
-        """Test processing speech events."""
-        speech_queue = asyncio.Queue()
+    recognizer = MagicMock()
+    recognizer.push_stream = object()
+    recognizer.write_bytes = MagicMock()
 
-        thread = RouteTurnThread(
-            speech_queue=speech_queue,
-            orchestrator_func=mock_orchestrator,
-            memory_manager=mock_memory_manager,
-            websocket=mock_websocket,
-        )
+    encoded = base64.b64encode(b"audio-bytes").decode("ascii")
 
-        # Start the thread
-        await thread.start()
+    await main_loop._process_audio_chunk_async(encoded, recognizer)
 
-        # Queue a speech event
-        event = SpeechEvent(
-            event_type=SpeechEventType.FINAL, text="Hello world", language="en-US"
-        )
-        await speech_queue.put(event)
-
-        # Give time for processing
-        await asyncio.sleep(0.1)
-
-        # Verify orchestrator was called
-        assert len(mock_orchestrator.calls) == 1
-        assert mock_orchestrator.calls[0]["transcript"] == "Hello world"
-
-        # Cleanup
-        await thread.stop()
-
-
-class TestACSMediaHandler:
-    """Test complete ACS Media Handler integration."""
-
-    @pytest.mark.asyncio
-    async def test_handler_lifecycle(self, media_handler, mock_recognizer):
-        """Test complete handler lifecycle."""
-        # Verify handler started correctly
-        assert media_handler.running
-        assert media_handler.speech_sdk_thread.thread_running
-
-        # Test stopping
-        await media_handler.stop()
-        assert not media_handler.running
-        assert media_handler._stopped
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_media_message_processing(
-        self, mock_logger, media_handler, mock_recognizer
-    ):
-        """Test end-to-end media message processing."""
-        # Send AudioMetadata
-        metadata = json.dumps(
-            {
-                "kind": "AudioMetadata",
-                "audioMetadata": {
-                    "subscriptionId": "test",
-                    "encoding": "PCM",
-                    "sampleRate": 16000,
-                },
-            }
-        )
+    recognizer.write_bytes.assert_called_once_with(b"audio-bytes")
 
-        await media_handler.handle_media_message(metadata)
 
-        # Verify recognizer was started
-        assert mock_recognizer.started
+@pytest.mark.asyncio
+async def test_main_event_loop_handle_barge_in_cancels_playback():
+    mock_websocket = MagicMock()
+    mock_websocket.send_text = AsyncMock()
+    mock_websocket.state = MagicMock()
 
-        # Send AudioData
-        audio_bytes = b"\x00" * 320
-        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+    route_turn_thread = MagicMock()
+    route_turn_thread.cancel_current_processing = AsyncMock()
 
-        audio_data = json.dumps(
-            {"kind": "AudioData", "audioData": {"data": audio_b64, "silent": False}}
-        )
+    main_loop = MainEventLoop(mock_websocket, "call-abcdef12", route_turn_thread)
+    main_loop.current_playback_task = asyncio.create_task(asyncio.sleep(1))
 
-        await media_handler.handle_media_message(audio_data)
-
-        # Give async processing time
-        await asyncio.sleep(0.1)
-
-        # Verify audio was processed
-        assert len(mock_recognizer.write_bytes_calls) > 0
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_barge_in_flow(
-        self, mock_logger, media_handler, mock_recognizer, mock_orchestrator
-    ):
-        """Test complete barge-in detection and cancellation flow."""
-        # Start processing by triggering recognizer
-        await media_handler.handle_media_message(
-            json.dumps(
-                {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}}
-            )
-        )
-
-        # Simulate speech detection that should trigger barge-in
-        mock_recognizer.trigger_partial("Hello", "en-US")
-
-        # Give time for barge-in processing
-        await asyncio.sleep(0.1)
-
-        # Verify barge-in was triggered (check WebSocket for stop command)
-        sent_messages = media_handler.websocket.sent_messages
-        stop_commands = [msg for msg in sent_messages if "StopAudio" in msg]
-        assert len(stop_commands) > 0
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_speech_recognition_callbacks(
-        self, mock_logger, media_handler, mock_recognizer, mock_orchestrator
-    ):
-        """Test speech recognition callback integration."""
-        # Start recognizer
-        await media_handler.handle_media_message(
-            json.dumps(
-                {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}}
-            )
-        )
+    await main_loop.handle_barge_in()
 
-        # Trigger final speech result
-        mock_recognizer.trigger_final("How can you help me?", "en-US")
-
-        # Give time for processing
-        await asyncio.sleep(0.2)
-
-        # Verify orchestrator was called
-        assert len(mock_orchestrator.calls) == 1
-        assert mock_orchestrator.calls[0]["transcript"] == "How can you help me?"
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_error_handling(self, mock_logger, media_handler, mock_recognizer):
-        """Test error handling in speech recognition."""
-        # Start recognizer
-        await media_handler.handle_media_message(
-            json.dumps(
-                {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}}
-            )
-        )
-
-        # Trigger error
-        mock_recognizer.trigger_error("Test error message")
-
-        # Give time for processing
-        await asyncio.sleep(0.1)
-
-        # Verify error was handled (no exceptions raised)
-        assert media_handler.running  # Handler should still be running
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_concurrent_audio_processing(
-        self, mock_logger, media_handler, mock_recognizer
-    ):
-        """Test concurrent audio chunk processing with task limiting."""
-        # Start recognizer
-        await media_handler.handle_media_message(
-            json.dumps(
-                {"kind": "AudioMetadata", "audioMetadata": {"subscriptionId": "test"}}
-            )
-        )
+    route_turn_thread.cancel_current_processing.assert_awaited()
+    mock_websocket.send_text.assert_called()
+    assert main_loop.current_playback_task.cancelled()
 
-        # Send multiple audio chunks rapidly
-        audio_bytes = b"\x00" * 320
-        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+    await asyncio.sleep(0.11)
+    assert not main_loop.barge_in_active.is_set()
 
-        audio_data = json.dumps(
-            {"kind": "AudioData", "audioData": {"data": audio_b64, "silent": False}}
-        )
-
-        # Send 10 audio chunks
-        tasks = []
-        for _ in range(10):
-            task = asyncio.create_task(media_handler.handle_media_message(audio_data))
-            tasks.append(task)
-
-        # Wait for all processing
-        await asyncio.gather(*tasks)
-        await asyncio.sleep(0.2)
-
-        # Verify audio processing occurred (some may be dropped due to limiting)
-        assert len(mock_recognizer.write_bytes_calls) > 0
-        assert len(mock_recognizer.write_bytes_calls) <= 10
-
-
-class TestSpeechEvent:
-    """Test SpeechEvent data structure."""
-
-    def test_speech_event_creation(self):
-        """Test SpeechEvent creation and timing."""
-        event = SpeechEvent(
-            event_type=SpeechEventType.FINAL,
-            text="Hello world",
-            language="en-US",
-            speaker_id="speaker1",
-        )
-
-        assert event.event_type == SpeechEventType.FINAL
-        assert event.text == "Hello world"
-        assert event.language == "en-US"
-        assert event.speaker_id == "speaker1"
-        assert isinstance(event.timestamp, float)
-        assert event.timestamp > 0
-
-    def test_speech_event_types(self):
-        """Test all speech event types."""
-        # Test all event types
-        for event_type in SpeechEventType:
-            event = SpeechEvent(event_type=event_type, text="test", language="en-US")
-            assert event.event_type == event_type
-
-
-# Integration test scenarios
-class TestIntegrationScenarios:
-    """Integration tests for realistic usage scenarios."""
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_call_flow_with_greeting(
-        self,
-        mock_logger,
-        mock_websocket,
-        mock_recognizer,
-        mock_orchestrator,
-        mock_memory_manager,
-    ):
-        """Test complete call flow including greeting."""
-        # Create handler with greeting
-        handler = ACSMediaHandler(
-            websocket=mock_websocket,
-            call_connection_id="test-call-integration",
-            session_id="test-session-integration",
-            recognizer=mock_recognizer,
-            orchestrator_func=mock_orchestrator,
-            memory_manager=mock_memory_manager,
-            greeting_text="Welcome! How can I help you today?",
-        )
-
-        await handler.start()
-
-        try:
-            # Simulate call connection with AudioMetadata
-            await handler.handle_media_message(
-                json.dumps(
-                    {
-                        "kind": "AudioMetadata",
-                        "audioMetadata": {
-                            "subscriptionId": "test-integration",
-                            "encoding": "PCM",
-                            "sampleRate": 16000,
-                            "channels": 1,
-                        },
-                    }
-                )
-            )
-
-            # Give time for greeting to be processed
-            await asyncio.sleep(0.3)
-
-            # Simulate customer speech
-            mock_recognizer.trigger_final("I need help with my account", "en-US")
-
-            # Give time for orchestrator processing
-            await asyncio.sleep(0.2)
-
-            # Verify greeting was sent and customer speech processed
-            assert len(mock_orchestrator.calls) >= 1
-            assert any(
-                "account" in call["transcript"].lower()
-                for call in mock_orchestrator.calls
-            )
-
-        finally:
-            await handler.stop()
-
-    @pytest.mark.asyncio
-    @patch("apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle.logger")
-    async def test_barge_in_during_response(
-        self,
-        mock_logger,
-        mock_websocket,
-        mock_recognizer,
-        mock_orchestrator,
-        mock_memory_manager,
-    ):
-        """Test barge-in interruption during AI response playback."""
-        handler = ACSMediaHandler(
-            websocket=mock_websocket,
-            call_connection_id="test-barge-in",
-            session_id="test-barge-in-session",
-            recognizer=mock_recognizer,
-            orchestrator_func=mock_orchestrator,
-            memory_manager=mock_memory_manager,
-        )
 
-        await handler.start()
+@pytest.mark.asyncio
+async def test_route_turn_thread_cancel_current_processing_clears_queue():
+    speech_queue = asyncio.Queue()
+    await speech_queue.put(
+        SpeechEvent(event_type=SpeechEventType.FINAL, text="hello", language="en-US")
+    )
 
-        try:
-            # Start call
-            await handler.handle_media_message(
-                json.dumps(
-                    {
-                        "kind": "AudioMetadata",
-                        "audioMetadata": {"subscriptionId": "test-barge-in"},
-                    }
-                )
-            )
+    orchestrator = AsyncMock()
+    memory_manager = MagicMock()
+    websocket = MagicMock()
+    websocket.state = MagicMock()
 
-            # Customer asks question
-            mock_recognizer.trigger_final("What are your hours?", "en-US")
-            await asyncio.sleep(0.1)
+    route_thread = RouteTurnThread(
+        call_connection_id="call-abcdef12",
+        speech_queue=speech_queue,
+        orchestrator_func=orchestrator,
+        memory_manager=memory_manager,
+        websocket=websocket,
+    )
 
-            # While AI is responding, customer interrupts (barge-in)
-            mock_recognizer.trigger_partial("Actually, I need to", "en-US")
-            await asyncio.sleep(0.1)
+    route_thread.current_response_task = asyncio.create_task(asyncio.sleep(1))
 
-            # Verify stop audio command was sent for barge-in
-            sent_messages = handler.websocket.sent_messages
-            stop_commands = [msg for msg in sent_messages if "StopAudio" in msg]
-            assert len(stop_commands) > 0
+    await route_thread.cancel_current_processing()
 
-        finally:
-            await handler.stop()
+    assert speech_queue.empty()
+    assert route_thread.current_response_task.cancelled()
 
+    # Cleanup to silence lingering tasks
+    await asyncio.sleep(0)
 
-if __name__ == "__main__":
-    # Run tests with verbose output
-    pytest.main([__file__, "-v", "--tb=short"])
diff --git a/tests/test_acs_media_lifecycle_memory.py b/tests/test_acs_media_lifecycle_memory.py
index 78412cbd..c64a88a8 100644
--- a/tests/test_acs_media_lifecycle_memory.py
+++ b/tests/test_acs_media_lifecycle_memory.py
@@ -8,7 +8,6 @@
 
 from apps.rtagent.backend.api.v1.handlers.acs_media_lifecycle import (
     ACSMediaHandler,
-    get_active_handlers_count,
 )
 
 
@@ -92,21 +91,6 @@ async def dummy_orchestrator(*args, **kwargs):
     return handler, ws, recog
 
 
-@pytest.mark.asyncio
-async def test_handler_registers_and_cleans_up():
-    """Start a handler and ensure it's registered then cleaned up on stop."""
-    before = get_active_handlers_count()
-    handler, ws, recog = await _create_start_stop_handler(asyncio.get_running_loop())
-
-    after = get_active_handlers_count()
-    # Should be same as before after full stop
-    assert (
-        after == before
-    ), f"active handlers should be cleaned up (before={before}, after={after})"
-    # websocket attribute should be removed/cleared or not reference running handler
-    # The implementation sets _acs_media_handler during start; after stop it may remain but handler.is_running must be False
-    assert not handler.is_running
-
 
 @pytest.mark.asyncio
 async def test_threads_terminated_on_stop():
diff --git a/tests/test_acs_simple.py b/tests/test_acs_simple.py
index cb6998f4..d19d336a 100644
--- a/tests/test_acs_simple.py
+++ b/tests/test_acs_simple.py
@@ -19,7 +19,7 @@
 import base64
 import threading
 import time
-from unittest.mock import Mock, AsyncMock, patch
+from unittest.mock import Mock, MagicMock, AsyncMock, patch
 
 
 # Test the basic functionality without complex logging
@@ -64,12 +64,14 @@ async def test_main_event_loop_basic():
     mock_websocket = Mock()
     mock_websocket.send_text = AsyncMock()
 
-    mock_route_turn_thread = Mock()
+    mock_route_turn_thread = MagicMock()
+    mock_route_turn_thread.cancel_current_processing = AsyncMock()
 
     main_loop = MainEventLoop(mock_websocket, "test-call", mock_route_turn_thread)
 
     # Test barge-in handling
     await main_loop.handle_barge_in()
+    await asyncio.sleep(0.11)
 
     # Verify WebSocket was called (stop audio command)
     mock_websocket.send_text.assert_called()
@@ -77,11 +79,13 @@ async def test_main_event_loop_basic():
 
 
 class MockRecognizer:
-    """Simple mock recognizer."""
+    """Simple mock recognizer that mirrors the current interface."""
 
     def __init__(self):
         self.started = False
+        self.stopped = False
         self.callbacks = {}
+        self.push_stream = None
 
     def set_partial_result_callback(self, callback):
         self.callbacks["partial"] = callback
@@ -92,9 +96,21 @@ def set_final_result_callback(self, callback):
     def set_cancel_callback(self, callback):
         self.callbacks["cancel"] = callback
 
+    def create_push_stream(self):
+        self.push_stream = object()
+
+    def prepare_stream(self):
+        self.push_stream = object()
+
+    def prepare_start(self):
+        self.push_stream = object()
+
     def start(self):
         self.started = True
 
+    def stop(self):
+        self.stopped = True
+
     def write_bytes(self, data):
         pass
 
diff --git a/tests/test_dtmf_validation.py b/tests/test_dtmf_validation.py
index 3975bac9..ee382fed 100644
--- a/tests/test_dtmf_validation.py
+++ b/tests/test_dtmf_validation.py
@@ -8,10 +8,10 @@
 os.environ.pop("APPLICATIONINSIGHTS_CONNECTION_STRING", None)
 
 import asyncio
-import json
-import pytest
 from types import SimpleNamespace
-from unittest.mock import patch, AsyncMock
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
 
 from apps.rtagent.backend.api.v1.handlers.dtmf_validation_lifecycle import (
     DTMFValidationLifecycle,
@@ -21,81 +21,122 @@
 class DummyMemo:
     def __init__(self):
         self._d = {}
+        self.persist_calls = 0
 
-    def get_context(self, k, default=None):
-        return self._d.get(k, default)
+    def get_context(self, key, default=None):
+        return self._d.get(key, default)
 
-    def update_context(self, k, v):
-        self._d[k] = v
+    def set_context(self, key, value):
+        self._d[key] = value
 
-    def set_context(self, k, v):
-        self._d[k] = v
+    def update_context(self, key, value):
+        self._d[key] = value
 
     async def persist_to_redis_async(self, redis_mgr):
-        pass
+        self.persist_calls += 1
 
 
-class FakeAuthService:
-    def __init__(self, ok=True):
-        self.ok = ok
-        self.calls = []
+class DummyContext:
+    def __init__(self, event_data, memo_manager=None, redis_mgr=None, acs_caller=None):
+        self._event_data = event_data
+        self.memo_manager = memo_manager
+        self.redis_mgr = redis_mgr
+        self.acs_caller = acs_caller
+        self.call_connection_id = "call-123"
 
-    async def validate_pin(self, call_id, phone, pin):
-        self.calls.append((call_id, phone, pin))
-        # small delay to emulate I/O
-        await asyncio.sleep(0.01)
-        return {"ok": self.ok, "user_id": "u1"} if self.ok else {"ok": False}
+    def get_event_data(self):
+        return self._event_data
 
 
-@pytest.mark.asyncio
-async def test_validate_sequence_success():
-    """Test successful DTMF sequence validation using centralized logic."""
+class DummyRedis:
+    def __init__(self, result):
+        self._result = result
+
+    async def read_events_blocking_async(self, **kwargs):
+        return self._result
+
+
+def test_is_dtmf_validation_gate_open():
     memo = DummyMemo()
+    memo.set_context("dtmf_validation_gate_open", True)
+
+    assert DTMFValidationLifecycle.is_dtmf_validation_gate_open(memo, "call")
 
-    context = SimpleNamespace(
-        call_connection_id="call-1",
-        memo_manager=memo,
-        redis_mgr=AsyncMock(),
-        clients=None,
-        acs_caller=None,
+    memo.set_context("dtmf_validation_gate_open", False)
+    assert not DTMFValidationLifecycle.is_dtmf_validation_gate_open(memo, "call")
+
+
+@pytest.mark.asyncio
+async def test_handle_dtmf_tone_received_updates_sequence():
+    memo = DummyMemo()
+    redis_mgr = AsyncMock()
+    context = DummyContext(
+        {"tone": "5", "sequenceId": 1}, memo_manager=memo, redis_mgr=redis_mgr
     )
 
-    # Mock the cancellation method to ensure it's not called on success
+    await DTMFValidationLifecycle.handle_dtmf_tone_received(context)
+
+    assert memo.get_context("dtmf_tone") == "5"
+    assert memo.persist_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_handle_dtmf_tone_received_routes_to_validation_flow():
+    memo = DummyMemo()
+    memo.set_context("aws_connect_validation_pending", True)
+    context = DummyContext({"tone": "1", "sequenceId": 2}, memo_manager=memo)
+
     with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_cancel:
-        # Test a valid 4-digit sequence
-        await DTMFValidationLifecycle._validate_sequence(context, "1234")
+        DTMFValidationLifecycle,
+        "_handle_aws_connect_validation_tone",
+        new=AsyncMock(),
+    ) as mock_handler:
+        await DTMFValidationLifecycle.handle_dtmf_tone_received(context)
 
-    # Assert success case
-    assert memo.get_context("dtmf_validated") is True
-    assert memo.get_context("entered_pin") == "1234"
-    assert memo.get_context("dtmf_validation_gate_open") is True
-    mock_cancel.assert_not_called()
+    mock_handler.assert_awaited_once()
 
 
 @pytest.mark.asyncio
-async def test_validate_sequence_failure():
-    """Test failed DTMF sequence validation using centralized logic."""
+async def test_setup_aws_connect_validation_flow_sets_context():
     memo = DummyMemo()
+    redis_mgr = AsyncMock()
+    context = DummyContext({}, memo_manager=memo, redis_mgr=redis_mgr)
+    call_conn = MagicMock()
+
+    with patch.object(
+        DTMFValidationLifecycle,
+        "_start_dtmf_recognition",
+        new=AsyncMock(),
+    ) as mock_start:
+        await DTMFValidationLifecycle.setup_aws_connect_validation_flow(
+            context, call_conn
+        )
+
+    assert memo.get_context("aws_connect_validation_pending") is True
+    assert memo.get_context("aws_connect_input_sequence") == ""
+    digits = memo.get_context("aws_connect_validation_digits")
+    assert isinstance(digits, str) and len(digits) == 3
+    assert memo.persist_calls == 1
+    mock_start.assert_awaited_once_with(context, call_conn)
+
+
+@pytest.mark.asyncio
+async def test_wait_for_dtmf_validation_completion_success():
+    redis_mgr = DummyRedis(result={"validation_status": "completed"})
 
-    context = SimpleNamespace(
-        call_connection_id="call-2",
-        memo_manager=memo,
-        redis_mgr=AsyncMock(),
-        clients=None,
-        acs_caller=None,
+    result = await DTMFValidationLifecycle.wait_for_dtmf_validation_completion(
+        redis_mgr, "call-1"
     )
 
-    # Mock the cancellation method to verify it's called on failure
-    with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_cancel:
-        # Test an invalid sequence (too short)
-        await DTMFValidationLifecycle._validate_sequence(context, "12")
-
-    # Assert failure case
-    assert memo.get_context("dtmf_validated") is False
-    assert memo.get_context("entered_pin") is None
-    # Verify call cancellation was triggered
-    mock_cancel.assert_called_once_with(context)
+    assert result is True
+
+
+@pytest.mark.asyncio
+async def test_wait_for_dtmf_validation_completion_timeout():
+    redis_mgr = DummyRedis(result=None)
+
+    result = await DTMFValidationLifecycle.wait_for_dtmf_validation_completion(
+        redis_mgr, "call-1"
+    )
+
+    assert result is False
diff --git a/tests/test_dtmf_validation_failure_cancellation.py b/tests/test_dtmf_validation_failure_cancellation.py
index cc2993cb..98858ea7 100644
--- a/tests/test_dtmf_validation_failure_cancellation.py
+++ b/tests/test_dtmf_validation_failure_cancellation.py
@@ -1,11 +1,9 @@
-"""
-Test DTMF validation failure cancellation logic.
+"""Tests for DTMF validation completion and helper utilities."""
 
-This test verifies that calls are properly cancelled when DTMF validation fails.
-"""
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from unittest.mock import AsyncMock, MagicMock, patch
 
 from apps.rtagent.backend.api.v1.handlers.dtmf_validation_lifecycle import (
     DTMFValidationLifecycle,
@@ -13,156 +11,100 @@
 from apps.rtagent.backend.api.v1.events.types import CallEventContext
 
 
-@pytest.fixture
-def mock_context():
-    """Create a mock CallEventContext for testing."""
-    context = MagicMock(spec=CallEventContext)
-    context.call_connection_id = "test-call-123"
-    context.memo_manager = MagicMock()
-    context.memo_manager.persist_to_redis_async = AsyncMock()
-    context.redis_mgr = AsyncMock()
-    context.acs_caller = MagicMock()
-    context.websocket = MagicMock()
-    return context
+class DummyMemo:
+    def __init__(self):
+        self._d = {}
+        self.persist_calls = 0
 
+    def set_context(self, key, value):
+        self._d[key] = value
 
-@pytest.mark.asyncio
-async def test_aws_connect_validation_success_no_cancellation(mock_context):
-    """Test that successful AWS Connect validation does NOT cancel the call."""
-    # Arrange
-    input_sequence = "123"
-    expected_digits = "123"
-
-    # Act
-    with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_cancel:
-        await DTMFValidationLifecycle._complete_aws_connect_validation(
-            mock_context, input_sequence, expected_digits
-        )
-
-    # Assert - call should NOT be cancelled on success
-    mock_cancel.assert_not_called()
-    mock_context.memo_manager.set_context.assert_any_call("dtmf_validated", True)
-    mock_context.memo_manager.set_context.assert_any_call(
-        "dtmf_validation_gate_open", True
-    )
+    def get_context(self, key, default=None):
+        return self._d.get(key, default)
 
+    def update_context(self, key, value):
+        self._d[key] = value
 
-@pytest.mark.asyncio
-async def test_aws_connect_validation_failure_cancels_call(mock_context):
-    """Test that failed AWS Connect validation cancels the call."""
-    # Arrange
-    input_sequence = "456"
-    expected_digits = "123"
+    async def persist_to_redis_async(self, redis_mgr):
+        self.persist_calls += 1
 
-    # Act
-    with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_cancel:
-        await DTMFValidationLifecycle._complete_aws_connect_validation(
-            mock_context, input_sequence, expected_digits
-        )
 
-    # Assert - call should be cancelled on failure
-    mock_cancel.assert_called_once_with(mock_context)
-    mock_context.memo_manager.set_context.assert_any_call("dtmf_validated", False)
+class DummyContext:
+    def __init__(self, memo_manager=None, redis_mgr=None):
+        self.call_connection_id = "test-call-123"
+        self.memo_manager = memo_manager
+        self.redis_mgr = redis_mgr
+        self.acs_caller = MagicMock()
+
+
+@pytest.fixture
+def context_with_memo():
+    memo = DummyMemo()
+    redis_mgr = AsyncMock()
+    redis_mgr.add_event_async = AsyncMock()
+    redis_mgr.set_value_async = AsyncMock()
+    return DummyContext(memo_manager=memo, redis_mgr=redis_mgr), memo, redis_mgr
 
 
 @pytest.mark.asyncio
-async def test_sequence_validation_failure_cancels_call(mock_context):
-    """Test that failed sequence validation cancels the call."""
-    # Arrange
-    invalid_sequence = "12"  # Too short
+async def test_complete_validation_success_sets_flags(context_with_memo):
+    context, memo, redis_mgr = context_with_memo
 
-    # Act
-    with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_cancel:
-        await DTMFValidationLifecycle._validate_sequence(mock_context, invalid_sequence)
+    await DTMFValidationLifecycle._complete_aws_connect_validation(
+        context, input_sequence="123", expected_digits="123"
+    )
 
-    # Assert - call should be cancelled on failure
-    mock_cancel.assert_called_once_with(mock_context)
-    mock_context.memo_manager.update_context.assert_any_call("dtmf_validated", False)
+    assert memo.get_context("dtmf_validated") is True
+    assert memo.get_context("dtmf_validation_gate_open") is True
+    redis_mgr.add_event_async.assert_awaited_once()
+    assert memo.persist_calls == 1
 
 
 @pytest.mark.asyncio
-async def test_sequence_validation_success_no_cancellation(mock_context):
-    """Test that successful sequence validation does NOT cancel the call."""
-    # Arrange
-    valid_sequence = "1234"  # Valid 4-digit PIN
-
-    # Act
-    with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_cancel:
-        await DTMFValidationLifecycle._validate_sequence(mock_context, valid_sequence)
-
-    # Assert - call should NOT be cancelled on success
-    mock_cancel.assert_not_called()
-    mock_context.memo_manager.update_context.assert_any_call("dtmf_validated", True)
-    mock_context.memo_manager.update_context.assert_any_call(
-        "dtmf_validation_gate_open", True
+async def test_complete_validation_failure_marks_invalid(context_with_memo):
+    context, memo, redis_mgr = context_with_memo
+
+    await DTMFValidationLifecycle._complete_aws_connect_validation(
+        context, input_sequence="000", expected_digits="123"
     )
 
+    assert memo.get_context("dtmf_validated") is False
+    redis_mgr.add_event_async.assert_not_called()
 
-@pytest.mark.asyncio
-async def test_cancel_call_for_validation_failure_with_session_terminator(mock_context):
-    """Test call cancellation using session terminator."""
-    # Arrange
-    mock_context.acs_caller.client = MagicMock()
-    # Ensure websocket attribute exists and is truthy
-    mock_context.websocket = MagicMock()
-
-    # Act
-    with patch(
-        "apps.rtagent.backend.api.v1.handlers.dtmf_validation_lifecycle.terminate_session",
-        new_callable=AsyncMock,
-    ) as mock_terminate:
-        await DTMFValidationLifecycle._cancel_call_for_validation_failure(mock_context)
-
-    # Assert
-    mock_terminate.assert_called_once()
-    call_args = mock_terminate.call_args
-    assert call_args.kwargs["ws"] == mock_context.websocket
-    assert call_args.kwargs["is_acs"] is True
-    assert call_args.kwargs["call_connection_id"] == "test-call-123"
-
-    # Verify context updates
-    mock_context.memo_manager.set_context.assert_any_call(
-        "call_cancelled_dtmf_failure", True
-    )
-    mock_context.memo_manager.set_context.assert_any_call(
-        "dtmf_validation_gate_open", False
-    )
 
-    # Verify Redis event publication
-    mock_context.redis_mgr.publish_event_async.assert_called_once()
+def test_get_fresh_dtmf_validation_status():
+    memo = DummyMemo()
+    memo.set_context("dtmf_validated", True)
 
+    result = DTMFValidationLifecycle.get_fresh_dtmf_validation_status(
+        memo, "call-123"
+    )
 
-@pytest.mark.asyncio
-async def test_cancel_call_fallback_direct_hangup(mock_context):
-    """Test call cancellation fallback when session terminator is not available."""
-    # Arrange - no websocket available, simulate fallback
-    mock_context.websocket = None
-    mock_call_conn = MagicMock()
-    mock_context.acs_caller.get_call_connection.return_value = mock_call_conn
+    assert result is True
 
-    # Act
-    await DTMFValidationLifecycle._cancel_call_for_validation_failure(mock_context)
 
-    # Assert - should use direct hang_up as fallback
-    mock_call_conn.hang_up.assert_called_once_with(is_for_everyone=True)
+def test_normalize_tone_mapping():
+    assert DTMFValidationLifecycle._normalize_tone("five") == "5"
+    assert DTMFValidationLifecycle._normalize_tone("*") == "*"
+    assert DTMFValidationLifecycle._normalize_tone(None) is None
 
 
 @pytest.mark.asyncio
-async def test_public_cancel_method():
-    """Test the public cancel_call_for_dtmf_failure method."""
-    mock_context = MagicMock()
+async def test_update_dtmf_sequence_handles_append(context_with_memo):
+    context, memo, redis_mgr = context_with_memo
+
+    class DummyCallEventContext(CallEventContext):
+        def __init__(self, memo_manager, redis_mgr):
+            self.memo_manager = memo_manager
+            self.redis_mgr = redis_mgr
+            self.call_connection_id = "call-123"
+
+    fake_context = DummyCallEventContext(memo, redis_mgr)
+
+    DTMFValidationLifecycle._update_dtmf_sequence(fake_context, tone="1", sequence_id=0)
+    DTMFValidationLifecycle._update_dtmf_sequence(fake_context, tone="2", sequence_id=1)
 
-    with patch.object(
-        DTMFValidationLifecycle, "_cancel_call_for_validation_failure"
-    ) as mock_private:
-        await DTMFValidationLifecycle.cancel_call_for_dtmf_failure(mock_context)
+    assert memo.get_context("dtmf_sequence") == "12"
 
-    mock_private.assert_called_once_with(mock_context)
+    await asyncio.sleep(0)  # allow background task to run
+    redis_mgr.set_value_async.assert_called()
diff --git a/tests/test_events_architecture_simple.py b/tests/test_events_architecture_simple.py
index cee54037..6de149b6 100644
--- a/tests/test_events_architecture_simple.py
+++ b/tests/test_events_architecture_simple.py
@@ -5,12 +5,16 @@
 Tests the core refactoring without heavy dependencies.
 """
 
-import pytest
 import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import MagicMock, patch
 from azure.core.messaging import CloudEvent
 
 
+def run_async(coro):
+    """Execute coroutine in a fresh loop for pytest compatibility."""
+    return asyncio.run(coro)
+
+
 # Mock the modules to avoid import issues
 class MockCallEventContext:
     def __init__(self, event, call_connection_id, event_type):
@@ -249,7 +253,7 @@ def test_event_context_json_data(self):
         assert data["callConnectionId"] == "test_123"
         assert data["status"] == "connected"
 
-    async def test_call_initiated_handler(self):
+    def test_call_initiated_handler(self):
         """Test call initiated handler."""
         event = CloudEvent(
             source="api",
@@ -268,7 +272,7 @@ async def test_call_initiated_handler(self):
         )
         context.memo_manager = MagicMock()
 
-        await MockCallEventHandlers.handle_call_initiated(context)
+        run_async(MockCallEventHandlers.handle_call_initiated(context))
 
         # Verify context updates
         context.memo_manager.update_context.assert_called()
@@ -282,7 +286,7 @@ async def test_call_initiated_handler(self):
         assert updates["call_direction"] == "outbound"
         assert updates["target_number"] == "+1234567890"
 
-    async def test_inbound_call_handler(self):
+    def test_inbound_call_handler(self):
         """Test inbound call received handler."""
         event = CloudEvent(
             source="eventgrid",
@@ -303,7 +307,7 @@ async def test_inbound_call_handler(self):
         )
         context.memo_manager = MagicMock()
 
-        await MockCallEventHandlers.handle_inbound_call_received(context)
+        run_async(MockCallEventHandlers.handle_inbound_call_received(context))
 
         # Verify context updates
         calls = context.memo_manager.update_context.call_args_list
@@ -312,7 +316,7 @@ async def test_inbound_call_handler(self):
         assert updates["call_direction"] == "inbound"
         assert updates["caller_id"] == "+1987654321"
 
-    async def test_event_processor_registration(self):
+    def test_event_processor_registration(self):
         """Test event processor handler registration."""
         processor = MockCallEventProcessor()
 
@@ -325,7 +329,7 @@ async def dummy_handler(context):
         assert stats["handlers_registered"] == 1
         assert MockACSEventTypes.CALL_CONNECTED in stats["event_types"]
 
-    async def test_event_processing_flow(self):
+    def test_event_processing_flow(self):
         """Test end-to-end event processing."""
         processor = MockCallEventProcessor()
 
@@ -348,7 +352,7 @@ async def test_event_processing_flow(self):
         mock_state.clients = []
 
         # Process event
-        result = await processor.process_events([event], mock_state)
+        result = run_async(processor.process_events([event], mock_state))
 
         assert result["status"] == "success"
         assert result["processed"] == 1
@@ -358,7 +362,7 @@ async def test_event_processing_flow(self):
         active_calls = processor.get_active_calls()
         assert "test_789" in active_calls
 
-    async def test_active_call_lifecycle(self):
+    def test_active_call_lifecycle(self):
         """Test active call tracking through connect/disconnect."""
         processor = MockCallEventProcessor()
 
@@ -377,7 +381,7 @@ async def test_active_call_lifecycle(self):
             data={"callConnectionId": "lifecycle_test"},
         )
 
-        await processor.process_events([connect_event], mock_state)
+        run_async(processor.process_events([connect_event], mock_state))
         assert "lifecycle_test" in processor.get_active_calls()
 
         # Disconnect event
@@ -387,10 +391,10 @@ async def test_active_call_lifecycle(self):
             data={"callConnectionId": "lifecycle_test"},
         )
 
-        await processor.process_events([disconnect_event], mock_state)
+        run_async(processor.process_events([disconnect_event], mock_state))
         assert "lifecycle_test" not in processor.get_active_calls()
 
-    async def test_error_handling_isolation(self):
+    def test_error_handling_isolation(self):
         """Test that one failing handler doesn't stop others."""
         processor = MockCallEventProcessor()
 
@@ -411,7 +415,7 @@ async def succeeding_handler(context):
         )
 
         # Should handle error gracefully
-        result = await processor.process_events([event], MagicMock())
+        result = run_async(processor.process_events([event], MagicMock()))
 
         # Event should still be processed despite one handler failing
         assert result["processed"] == 1
diff --git a/tests/test_redis_manager.py b/tests/test_redis_manager.py
new file mode 100644
index 00000000..07da8e2c
--- /dev/null
+++ b/tests/test_redis_manager.py
@@ -0,0 +1,107 @@
+import pytest
+
+from redis.exceptions import MovedError
+
+from src.redis import manager as redis_manager
+from src.redis.manager import AzureRedisManager
+
+
+class _FakeRedis:
+    def __init__(self) -> None:
+        self.hgetall_calls = 0
+
+    def hgetall(self, key: str) -> dict[str, str]:
+        self.hgetall_calls += 1
+        raise MovedError("1234 127.0.0.1:7001")
+
+
+class _FakeClusterRedis:
+    def __init__(self) -> None:
+        self.hgetall_calls = 0
+
+    def hgetall(self, key: str) -> dict[str, str]:
+        self.hgetall_calls += 1
+        return {"foo": "bar"}
+
+
+def test_get_session_data_switches_to_cluster(monkeypatch):
+    single_node_client = _FakeRedis()
+    cluster_client = _FakeClusterRedis()
+
+    # Stub the redis client constructors used inside the manager
+    monkeypatch.setattr(
+        redis_manager.redis,
+        "Redis",
+        lambda *args, **kwargs: single_node_client,
+    )
+    monkeypatch.setattr(
+        redis_manager,
+        "RedisCluster",
+        lambda *args, **kwargs: cluster_client,
+    )
+
+    mgr = AzureRedisManager(
+        host="example.redis.local",
+        port=6380,
+        access_key="dummy",
+        ssl=False,
+        credential=object(),
+    )
+
+    data = mgr.get_session_data("session-123")
+
+    assert data == {"foo": "bar"}
+    assert single_node_client.hgetall_calls == 1
+    assert cluster_client.hgetall_calls == 1
+    assert mgr._using_cluster is True
+
+
+def test_get_session_data_raises_without_cluster_support(monkeypatch):
+    single_node_client = _FakeRedis()
+
+    monkeypatch.setattr(
+        redis_manager.redis,
+        "Redis",
+        lambda *args, **kwargs: single_node_client,
+    )
+    monkeypatch.setattr(redis_manager, "RedisCluster", None, raising=False)
+
+    mgr = AzureRedisManager(
+        host="example.redis.local",
+        port=6380,
+        access_key="dummy",
+        ssl=False,
+        credential=object(),
+    )
+
+    with pytest.raises(MovedError):
+        mgr.get_session_data("session-123")
+
+
+def test_remap_cluster_address_to_domain(monkeypatch):
+    fake_client = object()
+    monkeypatch.setattr(
+        redis_manager.redis, "Redis", lambda *args, **kwargs: fake_client
+    )
+    monkeypatch.setattr(
+        redis_manager, "RedisCluster", lambda *args, **kwargs: fake_client
+    )
+
+    mgr = AzureRedisManager(
+        host="example.redis.local",
+        port=6380,
+        access_key="dummy",
+        ssl=False,
+        credential=object(),
+    )
+
+    # IP addresses remap to canonical host
+    assert mgr._remap_cluster_address(("51.8.10.248", 8501)) == (
+        "example.redis.local",
+        8501,
+    )
+    # Hostnames remain unchanged
+    assert mgr._remap_cluster_address(("cache.contoso.redis", 8501)) == (
+        "cache.contoso.redis",
+        8501,
+    )
diff --git a/tests/test_speech_queue.py b/tests/test_speech_queue.py
index 380025cd..bf69fc1a 100644
--- a/tests/test_speech_queue.py
+++ b/tests/test_speech_queue.py
@@ -44,159 +44,173 @@ class SpeechEvent:
     )  # Use time.time() instead of asyncio loop time
 
 
-async def test_basic_queue():
+def test_basic_queue():
     """Test 1: Basic queue put/get functionality"""
-    logger.info("🧪 Test 1: Basic queue functionality")
 
-    queue = asyncio.Queue(maxsize=10)
+    async def _run():
+        logger.info("🧪 Test 1: Basic queue functionality")
 
-    # Test event
-    test_event = SpeechEvent(
-        event_type=SpeechEventType.FINAL, text="Hello world test", language="en-US"
-    )
+        queue = asyncio.Queue(maxsize=10)
 
-    # Put event
-    await queue.put(test_event)
-    logger.info(f"✅ Event queued successfully. Queue size: {queue.qsize()}")
-
-    # Get event with timeout
-    try:
-        retrieved_event = await asyncio.wait_for(queue.get(), timeout=1.0)
-        logger.info(
-            f"✅ Event retrieved successfully: {retrieved_event.event_type.value} - '{retrieved_event.text}'"
+        # Test event
+        test_event = SpeechEvent(
+            event_type=SpeechEventType.FINAL, text="Hello world test", language="en-US"
         )
-        return True
-    except asyncio.TimeoutError:
-        logger.error("❌ Queue get timed out - this should not happen!")
-        return False
 
+        # Put event
+        await queue.put(test_event)
+        logger.info(f"✅ Event queued successfully. Queue size: {queue.qsize()}")
 
-async def test_processing_loop():
-    """Test 2: Processing loop similar to Route Turn Thread"""
-    logger.info("🧪 Test 2: Processing loop simulation")
+        # Get event with timeout
+        try:
+            retrieved_event = await asyncio.wait_for(queue.get(), timeout=1.0)
+            logger.info(
+                f"✅ Event retrieved successfully: {retrieved_event.event_type.value} - '{retrieved_event.text}'"
+            )
+            return True
+        except asyncio.TimeoutError:
+            logger.error("❌ Queue get timed out - this should not happen!")
+            return False
 
-    queue = asyncio.Queue(maxsize=10)
-    running = True
-    events_processed = 0
+    assert asyncio.run(_run())
 
-    async def processing_loop():
-        nonlocal events_processed
-        while running:
-            try:
-                logger.debug(f"🔄 Waiting for events (queue size: {queue.qsize()})")
-                speech_event = await asyncio.wait_for(queue.get(), timeout=1.0)
 
-                logger.info(
-                    f"📢 Processing loop received event: {speech_event.event_type.value} - '{speech_event.text}'"
-                )
-                events_processed += 1
+def test_processing_loop():
+    """Test 2: Processing loop similar to Route Turn Thread"""
 
-                if events_processed >= 3:  # Stop after processing 3 events
+    async def _run():
+        logger.info("🧪 Test 2: Processing loop simulation")
+
+        queue = asyncio.Queue(maxsize=10)
+        running = True
+        events_processed = 0
+
+        async def processing_loop():
+            nonlocal events_processed
+            while running:
+                try:
+                    logger.debug(f"🔄 Waiting for events (queue size: {queue.qsize()})")
+                    speech_event = await asyncio.wait_for(queue.get(), timeout=1.0)
+
+                    logger.info(
+                        f"📢 Processing loop received event: {speech_event.event_type.value} - '{speech_event.text}'"
+                    )
+                    events_processed += 1
+
+                    if events_processed >= 3:  # Stop after processing 3 events
+                        break
+
+                except asyncio.TimeoutError:
+                    logger.debug("⏰ Processing loop timeout (normal)")
+                    continue
+                except Exception as e:
+                    logger.error(f"❌ Error in processing loop: {e}")
                     break
 
-            except asyncio.TimeoutError:
-                logger.debug("⏰ Processing loop timeout (normal)")
-                continue
-            except Exception as e:
-                logger.error(f"❌ Error in processing loop: {e}")
-                break
-
-    # Start processing loop
-    processing_task = asyncio.create_task(processing_loop())
-
-    # Send test events
-    test_events = [
-        SpeechEvent(SpeechEventType.GREETING, "Welcome message"),
-        SpeechEvent(SpeechEventType.FINAL, "User speech input"),
-        SpeechEvent(SpeechEventType.FINAL, "Another user input"),
-    ]
+        # Start processing loop
+        processing_task = asyncio.create_task(processing_loop())
 
-    for i, event in enumerate(test_events):
-        logger.info(f"📤 Sending test event {i+1}: {event.text}")
-        await queue.put(event)
-        await asyncio.sleep(0.5)  # Small delay between events
-
-    # Wait for processing to complete
-    await processing_task
-
-    running = False
-    logger.info(f"✅ Processing loop completed. Events processed: {events_processed}")
-    return events_processed == 3
+        # Send test events
+        test_events = [
+            SpeechEvent(SpeechEventType.GREETING, "Welcome message"),
+            SpeechEvent(SpeechEventType.FINAL, "User speech input"),
+            SpeechEvent(SpeechEventType.FINAL, "Another user input"),
+        ]
 
+        for i, event in enumerate(test_events):
+            logger.info(f"📤 Sending test event {i+1}: {event.text}")
+            await queue.put(event)
+            await asyncio.sleep(0.5)  # Small delay between events
 
-async def test_cross_thread_queue():
-    """Test 3: Cross-thread queue communication simulation"""
-    logger.info("🧪 Test 3: Cross-thread queue communication")
+        # Wait for processing to complete
+        await processing_task
 
-    import threading
+        running = False
+        logger.info(
+            f"✅ Processing loop completed. Events processed: {events_processed}"
+        )
+        return events_processed == 3
 
-    queue = asyncio.Queue(maxsize=10)
-    main_loop = asyncio.get_running_loop()
-    events_received = []
+    assert asyncio.run(_run())
 
-    def background_thread_func():
-        """Simulate Speech SDK Thread sending events"""
-        logger.info("🧵 Background thread started")
 
-        test_events = [
-            SpeechEvent(SpeechEventType.PARTIAL, "Partial speech..."),
-            SpeechEvent(SpeechEventType.FINAL, "Complete speech recognition"),
-        ]
-
-        for event in test_events:
-            logger.info(f"🧵 Background thread queuing: {event.text}")
+def test_cross_thread_queue():
+    """Test 3: Cross-thread queue communication simulation"""
 
-            # Method 1: Try put_nowait (fastest)
+    async def _run():
+        logger.info("🧪 Test 3: Cross-thread queue communication")
+
+        import threading
+
+        queue = asyncio.Queue(maxsize=10)
+        main_loop = asyncio.get_running_loop()
+        events_received = []
+
+        def background_thread_func():
+            """Simulate Speech SDK Thread sending events"""
+            logger.info("🧵 Background thread started")
+
+            test_events = [
+                SpeechEvent(SpeechEventType.PARTIAL, "Partial speech..."),
+                SpeechEvent(SpeechEventType.FINAL, "Complete speech recognition"),
+            ]
+
+            for event in test_events:
+                logger.info(f"🧵 Background thread queuing: {event.text}")
+
+                # Method 1: Try put_nowait (fastest)
+                try:
+                    queue.put_nowait(event)
+                    logger.info("🧵 Event queued via put_nowait")
+                    continue
+                except Exception as e:
+                    logger.debug(
+                        f"🧵 put_nowait failed: {e}, trying run_coroutine_threadsafe..."
+                    )
+
+                # Method 2: Fall back to run_coroutine_threadsafe
+                try:
+                    future = asyncio.run_coroutine_threadsafe(queue.put(event), main_loop)
+                    future.result(timeout=0.1)
+                    logger.info("🧵 Event queued via run_coroutine_threadsafe")
+                except Exception as e:
+                    logger.error(f"🧵 Failed to queue event: {e}")
+
+        # Start background thread
+        thread = threading.Thread(target=background_thread_func, daemon=True)
+        thread.start()
+
+        # Process events in main thread
+        timeout_count = 0
+        max_timeouts = 5
+
+        while timeout_count < max_timeouts:
             try:
-                queue.put_nowait(event)
-                logger.info("🧵 Event queued via put_nowait")
-                continue
-            except Exception as e:
                 logger.debug(
-                    f"🧵 put_nowait failed: {e}, trying run_coroutine_threadsafe..."
+                    f"🔄 Main thread waiting for events (queue size: {queue.qsize()})"
                 )
+                event = await asyncio.wait_for(queue.get(), timeout=1.0)
+                logger.info(
+                    f"📢 Main thread received: {event.event_type.value} - '{event.text}'"
+                )
+                events_received.append(event)
 
-            # Method 2: Fall back to run_coroutine_threadsafe
-            try:
-                future = asyncio.run_coroutine_threadsafe(queue.put(event), main_loop)
-                future.result(timeout=0.1)
-                logger.info("🧵 Event queued via run_coroutine_threadsafe")
-            except Exception as e:
-                logger.error(f"🧵 Failed to queue event: {e}")
-
-    # Start background thread
-    thread = threading.Thread(target=background_thread_func, daemon=True)
-    thread.start()
-
-    # Process events in main thread
-    timeout_count = 0
-    max_timeouts = 5
-
-    while timeout_count < max_timeouts:
-        try:
-            logger.debug(
-                f"🔄 Main thread waiting for events (queue size: {queue.qsize()})"
-            )
-            event = await asyncio.wait_for(queue.get(), timeout=1.0)
-            logger.info(
-                f"📢 Main thread received: {event.event_type.value} - '{event.text}'"
-            )
-            events_received.append(event)
+                if len(events_received) >= 2:  # Got both events
+                    break
 
-            if len(events_received) >= 2:  # Got both events
-                break
+            except asyncio.TimeoutError:
+                timeout_count += 1
+                logger.debug(f"⏰ Main thread timeout {timeout_count}/{max_timeouts}")
+                continue
 
-        except asyncio.TimeoutError:
-            timeout_count += 1
-            logger.debug(f"⏰ Main thread timeout {timeout_count}/{max_timeouts}")
-            continue
+        thread.join(timeout=1.0)
 
-    thread.join(timeout=1.0)
+        logger.info(
+            f"✅ Cross-thread test completed. Events received: {len(events_received)}"
+        )
+        return len(events_received) == 2
 
-    logger.info(
-        f"✅ Cross-thread test completed. Events received: {len(events_received)}"
-    )
-    return len(events_received) == 2
+    assert asyncio.run(_run())
 
 
 async def main():
@@ -217,10 +231,13 @@ async def main():
         logger.info(f"{'='*50}")
 
         try:
-            result = await test_func()
-            results[test_name] = result
-            status = "✅ PASSED" if result else "❌ FAILED"
+            test_func()
+            results[test_name] = True
+            status = "✅ PASSED"
             logger.info(f"{test_name}: {status}")
+        except AssertionError as e:
+            logger.error(f"{test_name}: ❌ ASSERTION FAILED - {e}")
+            results[test_name] = False
         except Exception as e:
             logger.error(f"{test_name}: ❌ EXCEPTION - {e}")
             results[test_name] = False
diff --git a/tests/test_v1_events_integration.py b/tests/test_v1_events_integration.py
index 5d029ffb..ce65a024 100644
--- a/tests/test_v1_events_integration.py
+++ b/tests/test_v1_events_integration.py
@@ -11,9 +11,11 @@
 
 import asyncio
 import json
+import sys
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
-from fastapi.testclient import TestClient
+
+sys.modules.setdefault("sounddevice", MagicMock())
 from azure.core.messaging import CloudEvent
 from datetime import datetime
 
@@ -32,6 +34,11 @@
 from apps.rtagent.backend.api.v1.handlers.acs_call_lifecycle import ACSLifecycleHandler
 
 
+def run_async(coro):
+    """Execute coroutine for pytest environments without asyncio plugin."""
+    return asyncio.run(coro)
+
+
 class TestV1EventsIntegration:
     """Test the integrated V1 events system."""
 
@@ -79,15 +86,24 @@ def sample_call_event_context(self, mock_memo_manager, mock_redis_mgr):
             },
         )
 
-        return CallEventContext(
+        mock_call_conn = MagicMock()
+        mock_call_conn.list_participants.return_value = []
+        acs_caller = MagicMock()
+        acs_caller.get_call_connection.return_value = mock_call_conn
+
+        context = CallEventContext(
             event=event,
             call_connection_id="test_call_123",
             event_type=ACSEventTypes.CALL_CONNECTED,
             memo_manager=mock_memo_manager,
             redis_mgr=mock_redis_mgr,
+            acs_caller=acs_caller,
         )
+        context.app_state = MagicMock()
+        context.app_state.redis_pool = None
+        return context
 
-    async def test_event_processor_registration(self):
+    def test_event_processor_registration(self):
         """Test that handlers can be registered and retrieved."""
         processor = CallEventProcessor()
 
@@ -102,7 +118,7 @@ async def dummy_handler(context: CallEventContext):
         assert stats["handlers_registered"] == 1
         assert ACSEventTypes.CALL_CONNECTED in stats["event_types"]
 
-    async def test_default_handlers_registration(self):
+    def test_default_handlers_registration(self):
         """Test that default handlers are registered correctly."""
         register_default_handlers()
 
@@ -119,7 +135,7 @@ async def test_default_handlers_registration(self):
         assert V1EventTypes.CALL_INITIATED in stats["event_types"]
         assert ACSEventTypes.CALL_CONNECTED in stats["event_types"]
 
-    async def test_call_initiated_handler(
+    def test_call_initiated_handler(
         self, sample_call_event_context, mock_memo_manager
     ):
         """Test call initiated event handler."""
@@ -132,7 +148,7 @@ async def test_call_initiated_handler(
         }
 
         # Call handler
-        await CallEventHandlers.handle_call_initiated(sample_call_event_context)
+        run_async(CallEventHandlers.handle_call_initiated(sample_call_event_context))
 
         # Verify memo manager was updated
         mock_memo_manager.update_context.assert_called()
@@ -146,7 +162,7 @@ async def test_call_initiated_handler(
         assert call_args["call_direction"] == "outbound"
         assert call_args["target_number"] == "+1234567890"
 
-    async def test_call_connected_handler(self, sample_call_event_context):
+    def test_call_connected_handler(self, sample_call_event_context):
         """Test call connected event handler."""
         # Mock clients for broadcast
         mock_clients = [MagicMock(), MagicMock()]
@@ -154,8 +170,13 @@ async def test_call_connected_handler(self, sample_call_event_context):
 
         with patch(
             "apps.rtagent.backend.api.v1.events.handlers.broadcast_message"
-        ) as mock_broadcast:
-            await CallEventHandlers.handle_call_connected(sample_call_event_context)
+        ) as mock_broadcast, patch(
+            "apps.rtagent.backend.api.v1.events.handlers.DTMFValidationLifecycle.setup_aws_connect_validation_flow",
+            new=AsyncMock(),
+        ):
+            run_async(
+                CallEventHandlers.handle_call_connected(sample_call_event_context)
+            )
 
             # Verify broadcast was called
             mock_broadcast.assert_called_once()
@@ -167,7 +188,7 @@ async def test_call_connected_handler(self, sample_call_event_context):
             assert message_data["type"] == "call_connected"
             assert message_data["call_connection_id"] == "test_call_123"
 
-    async def test_webhook_events_router(self, sample_call_event_context):
+    def test_webhook_events_router(self, sample_call_event_context):
         """Test webhook events router delegates to specific handlers."""
         sample_call_event_context.event_type = V1EventTypes.WEBHOOK_EVENTS
 
@@ -175,22 +196,24 @@ async def test_webhook_events_router(self, sample_call_event_context):
             # Set the original event type in context
             sample_call_event_context.event_type = ACSEventTypes.CALL_CONNECTED
 
-            await CallEventHandlers.handle_webhook_events(sample_call_event_context)
+            run_async(CallEventHandlers.handle_webhook_events(sample_call_event_context))
 
             # Verify the specific handler was called
             mock_handle.assert_called_once_with(sample_call_event_context)
 
-    async def test_acs_lifecycle_handler_event_emission(
+    def test_acs_lifecycle_handler_event_emission(
         self, mock_acs_caller, mock_redis_mgr
     ):
         """Test that ACS lifecycle handler emits events correctly."""
         handler = ACSLifecycleHandler()
 
         with patch.object(handler, "_emit_call_event") as mock_emit:
-            result = await handler.start_outbound_call(
-                acs_caller=mock_acs_caller,
-                target_number="+1234567890",
-                redis_mgr=mock_redis_mgr,
+            result = run_async(
+                handler.start_outbound_call(
+                    acs_caller=mock_acs_caller,
+                    target_number="+1234567890",
+                    redis_mgr=mock_redis_mgr,
+                )
             )
 
             # Verify call was successful
@@ -205,7 +228,7 @@ async def test_acs_lifecycle_handler_event_emission(
             assert emit_args[1] == "test_call_123"  # call_connection_id
             assert emit_args[2]["target_number"] == "+1234567890"  # data
 
-    async def test_process_call_events_delegation(self, mock_redis_mgr):
+    def test_process_call_events_delegation(self, mock_redis_mgr):
         """Test that process_call_events delegates to V1 event system."""
         handler = ACSLifecycleHandler()
 
@@ -223,8 +246,10 @@ async def test_process_call_events_delegation(self, mock_redis_mgr):
         ]
 
         with patch(
-            "apps.rtagent.backend.api.v1.events.processor.get_call_event_processor"
-        ) as mock_get_processor:
+            "apps.rtagent.backend.api.v1.events.get_call_event_processor"
+        ) as mock_get_processor, patch(
+            "apps.rtagent.backend.api.v1.events.register_default_handlers"
+        ):
             mock_processor = AsyncMock()
             mock_processor.process_events.return_value = {
                 "status": "success",
@@ -233,7 +258,9 @@ async def test_process_call_events_delegation(self, mock_redis_mgr):
             }
             mock_get_processor.return_value = mock_processor
 
-            result = await handler.process_call_events(mock_events, mock_request)
+            result = run_async(
+                handler.process_call_events(mock_events, mock_request)
+            )
 
             # Verify delegation occurred
             assert result["status"] == "success"
@@ -241,9 +268,9 @@ async def test_process_call_events_delegation(self, mock_redis_mgr):
             assert result["processed_events"] == 1
 
             # Verify processor was called
-            mock_processor.process_events.assert_called_once()
+            mock_processor.process_events.assert_awaited_once()
 
-    async def test_event_context_data_extraction(self):
+    def test_event_context_data_extraction(self):
         """Test event context data extraction methods."""
         # Test with dict data
         event = CloudEvent(
@@ -265,7 +292,7 @@ async def test_event_context_data_extraction(self):
         assert context.get_event_field("field1") == "value1"
         assert context.get_event_field("nonexistent", "default") == "default"
 
-    async def test_event_context_json_data_extraction(self):
+    def test_event_context_json_data_extraction(self):
         """Test event context with JSON string data."""
         json_data = json.dumps({"callConnectionId": "test_123", "status": "connected"})
 
@@ -279,7 +306,7 @@ async def test_event_context_json_data_extraction(self):
         assert data["callConnectionId"] == "test_123"
         assert data["status"] == "connected"
 
-    async def test_processor_error_isolation(self):
+    def test_processor_error_isolation(self):
         """Test that one failing handler doesn't stop others."""
         processor = CallEventProcessor()
 
@@ -305,13 +332,13 @@ async def succeeding_handler(context: CallEventContext):
         mock_state = MagicMock()
 
         # Process event - should not raise exception
-        result = await processor.process_events([event], mock_state)
+        result = run_async(processor.process_events([event], mock_state))
 
         # Should indicate partial success
         assert result["processed"] == 1  # One event processed
         assert "failed" in result or "status" in result  # Some indication of issues
 
-    async def test_active_call_tracking(self):
+    def test_active_call_tracking(self):
         """Test that processor tracks active calls correctly."""
         processor = CallEventProcessor()
 
@@ -325,7 +352,7 @@ async def test_active_call_tracking(self):
             data={"callConnectionId": "test_123"},
         )
 
-        await processor.process_events([connected_event], mock_state)
+        run_async(processor.process_events([connected_event], mock_state))
 
         # Should track the active call
         active_calls = processor.get_active_calls()
@@ -338,7 +365,7 @@ async def test_active_call_tracking(self):
             data={"callConnectionId": "test_123"},
         )
 
-        await processor.process_events([disconnected_event], mock_state)
+        run_async(processor.process_events([disconnected_event], mock_state))
 
         # Should no longer track the call
         active_calls = processor.get_active_calls()
@@ -353,7 +380,7 @@ def setup(self):
         """Reset processor before each test."""
         reset_call_event_processor()
 
-    async def test_outbound_call_flow(self):
+    def test_outbound_call_flow(self):
         """Test complete outbound call flow through hybrid architecture."""
 
         # 1. Setup mocks
@@ -372,10 +399,12 @@ async def test_outbound_call_flow(self):
         handler = ACSLifecycleHandler()
 
         with patch.object(handler, "_emit_call_event") as mock_emit:
-            result = await handler.start_outbound_call(
-                acs_caller=mock_acs_caller,
-                target_number="+1234567890",
-                redis_mgr=mock_redis_mgr,
+            result = run_async(
+                handler.start_outbound_call(
+                    acs_caller=mock_acs_caller,
+                    target_number="+1234567890",
+                    redis_mgr=mock_redis_mgr,
+                )
             )
 
         # 4. Verify ACS operation succeeded
@@ -388,7 +417,7 @@ async def test_outbound_call_flow(self):
         assert emit_args[0] == "V1.Call.Initiated"
         assert emit_args[1] == "test_call_outbound"
 
-    async def test_webhook_processing_flow(self):
+    def test_webhook_processing_flow(self):
         """Test webhook event processing through the events system."""
 
         # 1. Register handlers
@@ -424,13 +453,13 @@ async def test_webhook_processing_flow(self):
 
         processor = get_call_event_processor()
 
-        result = await processor.process_events(webhook_events, mock_state)
+        result = run_async(processor.process_events(webhook_events, mock_state))
 
         # 5. Verify processing
         assert result["processed"] == 2
         assert result["failed"] == 0
 
-    async def test_error_handling_consistency(self):
+    def test_error_handling_consistency(self):
         """Test that errors are handled consistently across the system."""
 
         # 1. Test ACS operation error
@@ -440,10 +469,12 @@ async def test_error_handling_consistency(self):
         handler = ACSLifecycleHandler()
 
         with pytest.raises(Exception):  # Should propagate as HTTPException
-            await handler.start_outbound_call(
-                acs_caller=mock_acs_caller,
-                target_number="+1234567890",
-                redis_mgr=MagicMock(),
+            run_async(
+                handler.start_outbound_call(
+                    acs_caller=mock_acs_caller,
+                    target_number="+1234567890",
+                    redis_mgr=MagicMock(),
+                )
             )
 
         # 2. Test event processing error
@@ -459,7 +490,7 @@ async def test_error_handling_consistency(self):
         processor = get_call_event_processor()
 
         # Should handle gracefully without raising
-        result = await processor.process_events([bad_event], MagicMock())
+        result = run_async(processor.process_events([bad_event], MagicMock()))
         assert "status" in result