diff --git a/app/api/generate/agent-profiles/route.ts b/app/api/generate/agent-profiles/route.ts
index b081cbdac..fbf4d4e46 100644
--- a/app/api/generate/agent-profiles/route.ts
+++ b/app/api/generate/agent-profiles/route.ts
@@ -36,6 +36,8 @@ interface RequestBody {
   sceneOutlines?: { title: string; description?: string }[];
   language: string;
   availableAvatars: string[];
+  avatarDescriptions?: Array<{ path: string; desc: string }>;
+  availableVoices?: Array<{ providerId: string; voiceId: string; voiceName: string }>;
 }
 
 function stripCodeFences(text: string): string {
@@ -50,7 +52,14 @@ function stripCodeFences(text: string): string {
 export async function POST(req: NextRequest) {
   try {
     const body = (await req.json()) as RequestBody;
-    const { stageInfo, sceneOutlines, language, availableAvatars } = body;
+    const {
+      stageInfo,
+      sceneOutlines,
+      language,
+      availableAvatars,
+      avatarDescriptions,
+      availableVoices,
+    } = body;
 
     // ── Validate required fields ──
     if (!stageInfo?.name) {
@@ -79,6 +88,27 @@ export async function POST(req: NextRequest) {
 
     const systemPrompt = `You are an expert instructional designer. Generate agent profiles for a multi-agent classroom simulation. Decide the appropriate number of agents (typically 3-5) based on the course content and complexity. Return ONLY valid JSON, no markdown or explanation.`;
 
+    // Build voice list for prompt (if available)
+    const voiceListStr =
+      availableVoices && availableVoices.length > 0
+        ? JSON.stringify(
+            availableVoices.map((v) => ({
+              id: `${v.providerId}::${v.voiceId}`,
+              name: v.voiceName,
+            })),
+          )
+        : '';
+
+    const voicePrompt = voiceListStr
+      ? `- Each agent should be assigned a voice that matches their persona from this list: ${voiceListStr}
+  - Pick a voice that suits the agent's personality and role (e.g. authoritative voice for teacher, lively voice for energetic student)
+  - Try to use different voices for each agent`
+      : '';
+
+    const voiceJsonField = voiceListStr
+      ? ',\n      "voice": "string (voice id from available list, e.g. \'qwen-tts::Cherry\')"'
+      : '';
+
     const userPrompt = `Generate agent profiles for the following course:
 
 Course name: ${stageInfo.name}
@@ -90,10 +120,13 @@ Requirements:
 - Priority values: teacher=10 (highest), assistant=7, student=4-6
 - Each agent needs: name, role, persona (2-3 sentences describing personality and teaching/learning style)
 - Names and personas must be in language: ${language}
-- Each agent must be assigned one avatar from this list: ${JSON.stringify(availableAvatars)}
+- Each agent must be assigned one avatar from this list: ${JSON.stringify(avatarDescriptions && avatarDescriptions.length > 0 ? avatarDescriptions.map((a) => ({ path: a.path, description: a.desc })) : availableAvatars)}
+  - Pick an avatar that visually matches the agent's personality and role
   - Try to use different avatars for each agent
+  - Use the "path" value as the avatar field in the output
 - Each agent must be assigned one color from this list: ${JSON.stringify(COLOR_PALETTE)}
   - Each agent must have a different color
+${voicePrompt}
 
 Return a JSON object with this exact structure:
 {
@@ -104,7 +137,7 @@ Return a JSON object with this exact structure:
       "persona": "string (2-3 sentences)",
       "avatar": "string (from available list)",
       "color": "string (hex color from palette)",
-      "priority": number (10 for teacher, 7 for assistant, 4-6 for student)
+      "priority": number (10 for teacher, 7 for assistant, 4-6 for student)${voiceJsonField}
     }
   ]
 }`;
@@ -130,6 +163,7 @@ Return a JSON object with this exact structure:
         avatar: string;
         color: string;
         priority: number;
+        voice?: string;
       }>;
     };
 
@@ -161,16 +195,28 @@ Return a JSON object with this exact structure:
     }
 
     // ── Build output with IDs ──
-    const agents = parsed.agents.map((agent, index) => ({
-      id: `gen-${nanoid(8)}`,
-      name: agent.name,
-      role: agent.role,
-      persona: agent.persona,
-      avatar: agent.avatar || availableAvatars[index % availableAvatars.length],
-      color: agent.color || COLOR_PALETTE[index % COLOR_PALETTE.length],
-      priority:
-        agent.priority ?? (agent.role === 'teacher' ? 10 : agent.role === 'assistant' ? 7 : 5),
-    }));
+    const agents = parsed.agents.map((agent, index) => {
+      // Parse voice "providerId::voiceId" format
+      let voiceConfig: { providerId: string; voiceId: string } | undefined;
+      if (agent.voice && agent.voice.includes('::')) {
+        const [providerId, voiceId] = agent.voice.split('::');
+        if (providerId && voiceId) {
+          voiceConfig = { providerId, voiceId };
+        }
+      }
+
+      return {
+        id: `gen-${nanoid(8)}`,
+        name: agent.name,
+        role: agent.role,
+        persona: agent.persona,
+        avatar: agent.avatar || availableAvatars[index % availableAvatars.length],
+        color: agent.color || COLOR_PALETTE[index % COLOR_PALETTE.length],
+        priority:
+          agent.priority ?? (agent.role === 'teacher' ? 10 : agent.role === 'assistant' ? 7 : 5),
+        ...(voiceConfig ? { voiceConfig } : {}),
+      };
+    });
 
     log.info(`Successfully generated ${agents.length} agent profiles for "${stageInfo.name}"`);
 
diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx
index 213a51409..9272d0461 100644
--- a/app/generation-preview/page.tsx
+++ b/app/generation-preview/page.tsx
@@ -11,6 +11,7 @@ import { cn } from '@/lib/utils';
 import { useStageStore } from '@/lib/store/stage';
 import { useSettingsStore } from '@/lib/store/settings';
 import { useAgentRegistry } from '@/lib/orchestration/registry/store';
+import { getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver';
 import { useI18n } from '@/lib/hooks/use-i18n';
 import {
   loadImageMapping,
@@ -379,20 +380,67 @@ function GenerationPreviewContent() {
 
         try {
           const allAvatars = [
-            '/avatars/assist.png',
-            '/avatars/assist-2.png',
-            '/avatars/clown.png',
-            '/avatars/clown-2.png',
-            '/avatars/curious.png',
-            '/avatars/curious-2.png',
-            '/avatars/note-taker.png',
-            '/avatars/note-taker-2.png',
-            '/avatars/teacher.png',
-            '/avatars/teacher-2.png',
-            '/avatars/thinker.png',
-            '/avatars/thinker-2.png',
+            {
+              path: '/avatars/teacher.png',
+              desc: 'Male teacher with glasses, holding a book, green background',
+            },
+            {
+              path: '/avatars/teacher-2.png',
+              desc: 'Female teacher with long dark hair, blue traditional outfit, gentle expression',
+            },
+            {
+              path: '/avatars/assist.png',
+              desc: 'Young female assistant with glasses, pink background, friendly smile',
+            },
+            {
+              path: '/avatars/assist-2.png',
+              desc: 'Young female in orange top and purple overalls, cheerful and approachable',
+            },
+            {
+              path: '/avatars/clown.png',
+              desc: 'Energetic girl with glasses pointing up, green shirt, lively and fun',
+            },
+            {
+              path: '/avatars/clown-2.png',
+              desc: 'Playful girl with curly hair doing rock gesture, blue shirt, humorous vibe',
+            },
+            {
+              path: '/avatars/curious.png',
+              desc: 'Surprised boy with glasses, hand on cheek, curious expression',
+            },
+            {
+              path: '/avatars/curious-2.png',
+              desc: 'Boy with backpack holding a book and question mark bubble, inquisitive',
+            },
+            {
+              path: '/avatars/note-taker.png',
+              desc: 'Studious boy with glasses, blue shirt, calm and organized',
+            },
+            {
+              path: '/avatars/note-taker-2.png',
+              desc: 'Active boy with yellow backpack waving, blue outfit, enthusiastic learner',
+            },
+            {
+              path: '/avatars/thinker.png',
+              desc: 'Thoughtful girl with hand on chin, purple background, contemplative',
+            },
+            {
+              path: '/avatars/thinker-2.png',
+              desc: 'Girl reading a book intently, long dark hair, intellectual and focused',
+            },
           ];
 
+          const getAvailableVoicesForGeneration = () => {
+            const providers = getAvailableProvidersWithVoices(settings.ttsProvidersConfig);
+            return providers.flatMap((p) =>
+              p.voices.map((v) => ({
+                providerId: p.providerId,
+                voiceId: v.id,
+                voiceName: v.name,
+              })),
+            );
+          };
+
           // No outlines yet — agent generation uses only stage name + description
           const agentResp = await fetch('/api/generate/agent-profiles', {
             method: 'POST',
@@ -400,7 +448,9 @@ function GenerationPreviewContent() {
             body: JSON.stringify({
               stageInfo: { name: stage.name, description: stage.description },
               language: currentSession.requirements.language || 'zh-CN',
-              availableAvatars: allAvatars,
+              availableAvatars: allAvatars.map((a) => a.path),
+              avatarDescriptions: allAvatars.map((a) => ({ path: a.path, desc: a.desc })),
+              availableVoices: getAvailableVoicesForGeneration(),
             }),
             signal,
           });
diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx
index 289ee03cb..27585ecab 100644
--- a/components/agent/agent-bar.tsx
+++ b/components/agent/agent-bar.tsx
@@ -1,15 +1,450 @@
 'use client';
 
-import { useState, useEffect, useRef } from 'react';
+import { useState, useEffect, useRef, useCallback } from 'react';
 import { motion, AnimatePresence } from 'motion/react';
 import { Checkbox } from '@/components/ui/checkbox';
-import { Input } from '@/components/ui/input';
+import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
 import { cn } from '@/lib/utils';
 import { useI18n } from '@/lib/hooks/use-i18n';
 import { useSettingsStore } from '@/lib/store/settings';
 import { useAgentRegistry } from '@/lib/orchestration/registry/store';
-import { Sparkles, ChevronDown, ChevronUp, Shuffle } from 'lucide-react';
+import { resolveAgentVoice, getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver';
+import { playBrowserTTSPreview } from '@/lib/audio/browser-tts-preview';
+import {
+  Sparkles,
+  ChevronDown,
+  ChevronUp,
+  Shuffle,
+  Volume2,
+  VolumeX,
+  Loader2,
+  MessageSquare,
+  Minus,
+  Plus,
+} from 'lucide-react';
 import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip';
+import type { AgentConfig } from '@/lib/orchestration/registry/types';
+import type { TTSProviderId } from '@/lib/audio/types';
+import type { ProviderWithVoices } from '@/lib/audio/voice-resolver';
+
+function AgentVoicePill({
+  agent,
+  agentIndex,
+  availableProviders,
+  disabled,
+}: {
+  agent: AgentConfig;
+  agentIndex: number;
+  availableProviders: ProviderWithVoices[];
+  disabled?: boolean;
+}) {
+  const updateAgent = useAgentRegistry((s) => s.updateAgent);
+  const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
+  const resolved = resolveAgentVoice(agent, agentIndex, availableProviders);
+  const [popoverOpen, setPopoverOpen] = useState(false);
+  const [previewingId, setPreviewingId] = useState<string | null>(null);
+  const previewCancelRef = useRef<(() => void) | null>(null);
+  const previewAudioRef = useRef<HTMLAudioElement | null>(null);
+  const previewAbortRef = useRef<AbortController | null>(null);
+
+  const displayName = (() => {
+    for (const p of availableProviders) {
+      if (p.providerId === resolved.providerId) {
+        const v = p.voices.find((voice) => voice.id === resolved.voiceId);
+        if (v) return v.name;
+      }
+    }
+    return resolved.voiceId;
+  })();
+
+  const stopPreview = useCallback(() => {
+    previewCancelRef.current?.();
+    previewCancelRef.current = null;
+    previewAbortRef.current?.abort();
+    previewAbortRef.current = null;
+    if (previewAudioRef.current) {
+      previewAudioRef.current.pause();
+      previewAudioRef.current.src = '';
+      previewAudioRef.current = null;
+    }
+    setPreviewingId(null);
+  }, []);
+
+  const handlePreview = useCallback(
+    async (providerId: TTSProviderId, voiceId: string) => {
+      const key = `${providerId}::${voiceId}`;
+      if (previewingId === key) {
+        stopPreview();
+        return;
+      }
+      stopPreview();
+      setPreviewingId(key);
+
+      const courseLanguage =
+        (typeof localStorage !== 'undefined' && localStorage.getItem('generationLanguage')) ||
+        'zh-CN';
+      const previewText = courseLanguage === 'en-US' ? 'Welcome to AI Classroom' : '欢迎来到AI课堂';
+
+      if (providerId === 'browser-native-tts') {
+        const { promise, cancel } = playBrowserTTSPreview({ text: previewText, voice: voiceId });
+        previewCancelRef.current = cancel;
+        try {
+          await promise;
+        } catch {
+          // ignore abort
+        }
+        setPreviewingId(null);
+        return;
+      }
+
+      // Server TTS
+      try {
+        const controller = new AbortController();
+        previewAbortRef.current = controller;
+        const providerConfig = ttsProvidersConfig[providerId];
+        const res = await fetch('/api/generate/tts', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            text: previewText,
+            audioId: 'voice-preview',
+            ttsProviderId: providerId,
+            ttsVoice: voiceId,
+            ttsSpeed: 1,
+            ttsApiKey: providerConfig?.apiKey,
+            ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl,
+          }),
+          signal: controller.signal,
+        });
+        if (!res.ok) throw new Error('TTS error');
+        const data = await res.json();
+        if (!data.base64) throw new Error('No audio');
+
+        const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`);
+        previewAudioRef.current = audio;
+        audio.addEventListener('ended', () => setPreviewingId(null));
+        audio.addEventListener('error', () => setPreviewingId(null));
+        await audio.play();
+      } catch {
+        setPreviewingId(null);
+      }
+    },
+    [previewingId, stopPreview, ttsProvidersConfig],
+  );
+
+  // Cleanup on unmount
+  useEffect(() => () => stopPreview(), [stopPreview]);
+
+  if (disabled) {
+    return (
+      <div
+        onClick={(e) => e.stopPropagation()}
+        onPointerDown={(e) => e.stopPropagation()}
+        className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-muted/40 px-2 text-[10px] text-muted-foreground/30 shrink-0 cursor-not-allowed"
+      >
+        <VolumeX className="size-2.5 shrink-0" />
+        <span className="truncate flex-1 text-left">{displayName}</span>
+      </div>
+    );
+  }
+
+  return (
+    <Popover
+      open={popoverOpen}
+      onOpenChange={(open) => {
+        setPopoverOpen(open);
+        if (!open) stopPreview();
+      }}
+    >
+      <PopoverTrigger asChild>
+        <button
+          type="button"
+          onClick={(e) => e.stopPropagation()}
+          onPointerDown={(e) => e.stopPropagation()}
+          className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-primary/10 hover:bg-primary/20 dark:bg-primary/25 dark:hover:bg-primary/35 px-2 text-[10px] text-primary/80 hover:text-primary dark:text-primary/90 transition-colors shrink-0 cursor-pointer"
+        >
+          <Volume2 className="size-2.5 shrink-0" />
+          <span className="truncate flex-1 text-left">{displayName}</span>
+          <ChevronDown className="size-2.5 shrink-0 opacity-50" />
+        </button>
+      </PopoverTrigger>
+      <PopoverContent
+        side="bottom"
+        align="end"
+        sideOffset={4}
+        className="w-52 px-1 pb-1 pt-0 max-h-64 overflow-y-auto"
+        onClick={(e) => e.stopPropagation()}
+        onPointerDown={(e) => e.stopPropagation()}
+      >
+        {availableProviders.map((provider) => (
+          <div key={provider.providerId}>
+            <div className="text-[10px] text-muted-foreground/60 font-medium px-2 py-1 sticky top-0 bg-popover">
+              {provider.providerName}
+            </div>
+            {provider.voices.map((voice) => {
+              const isActive =
+                resolved.providerId === provider.providerId && resolved.voiceId === voice.id;
+              const previewKey = `${provider.providerId}::${voice.id}`;
+              const isPreviewing = previewingId === previewKey;
+              return (
+                <div
+                  key={previewKey}
+                  className={cn(
+                    'flex items-center gap-1 rounded-sm transition-colors',
+                    isActive ? 'bg-primary/10' : 'hover:bg-muted',
+                  )}
+                >
+                  <button
+                    type="button"
+                    onClick={() => {
+                      updateAgent(agent.id, {
+                        voiceConfig: { providerId: provider.providerId, voiceId: voice.id },
+                      });
+                      setPopoverOpen(false);
+                    }}
+                    className={cn(
+                      'flex-1 text-left text-xs px-2 py-1 min-w-0 truncate',
+                      isActive ? 'text-primary font-medium' : 'text-foreground',
+                    )}
+                  >
+                    {voice.name}
+                  </button>
+                  <button
+                    type="button"
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      handlePreview(provider.providerId, voice.id);
+                    }}
+                    className={cn(
+                      'shrink-0 size-5 flex items-center justify-center rounded-sm transition-colors',
+                      isPreviewing
+                        ? 'text-primary'
+                        : 'text-muted-foreground/40 hover:text-muted-foreground',
+                    )}
+                  >
+                    {isPreviewing ? (
+                      <Loader2 className="size-3 animate-spin" />
+                    ) : (
+                      <Volume2 className="size-3" />
+                    )}
+                  </button>
+                </div>
+              );
+            })}
+          </div>
+        ))}
+      </PopoverContent>
+    </Popover>
+  );
+}
+
+/**
+ * Teacher voice pill — reads/writes global ttsProviderId + ttsVoice (single source of truth).
+ * This ensures lecture and discussion use the same voice for the teacher.
+ */
+function TeacherVoicePill({
+  availableProviders,
+  disabled,
+}: {
+  availableProviders: ProviderWithVoices[];
+  disabled?: boolean;
+}) {
+  const ttsProviderId = useSettingsStore((s) => s.ttsProviderId);
+  const ttsVoice = useSettingsStore((s) => s.ttsVoice);
+  const setTTSProvider = useSettingsStore((s) => s.setTTSProvider);
+  const setTTSVoice = useSettingsStore((s) => s.setTTSVoice);
+  const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
+  const [popoverOpen, setPopoverOpen] = useState(false);
+  const [previewingId, setPreviewingId] = useState<string | null>(null);
+  const previewCancelRef = useRef<(() => void) | null>(null);
+  const previewAudioRef = useRef<HTMLAudioElement | null>(null);
+  const previewAbortRef = useRef<AbortController | null>(null);
+
+  const displayName = (() => {
+    for (const p of availableProviders) {
+      if (p.providerId === ttsProviderId) {
+        const v = p.voices.find((voice) => voice.id === ttsVoice);
+        if (v) return v.name;
+      }
+    }
+    return ttsVoice || 'default';
+  })();
+
+  const stopPreview = useCallback(() => {
+    previewCancelRef.current?.();
+    previewCancelRef.current = null;
+    previewAbortRef.current?.abort();
+    previewAbortRef.current = null;
+    if (previewAudioRef.current) {
+      previewAudioRef.current.pause();
+      previewAudioRef.current.src = '';
+      previewAudioRef.current = null;
+    }
+    setPreviewingId(null);
+  }, []);
+
+  const handlePreview = useCallback(
+    async (providerId: TTSProviderId, voiceId: string) => {
+      const key = `${providerId}::${voiceId}`;
+      if (previewingId === key) {
+        stopPreview();
+        return;
+      }
+      stopPreview();
+      setPreviewingId(key);
+
+      const courseLanguage =
+        (typeof localStorage !== 'undefined' && localStorage.getItem('generationLanguage')) ||
+        'zh-CN';
+      const previewText = courseLanguage === 'en-US' ? 'Welcome to AI Classroom' : '欢迎来到AI课堂';
+
+      if (providerId === 'browser-native-tts') {
+        const { promise, cancel } = playBrowserTTSPreview({ text: previewText, voice: voiceId });
+        previewCancelRef.current = cancel;
+        try {
+          await promise;
+        } catch {
+          // ignore abort
+        }
+        setPreviewingId(null);
+        return;
+      }
+
+      try {
+        const controller = new AbortController();
+        previewAbortRef.current = controller;
+        const providerConfig = ttsProvidersConfig[providerId];
+        const res = await fetch('/api/generate/tts', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            text: previewText,
+            audioId: 'voice-preview',
+            ttsProviderId: providerId,
+            ttsVoice: voiceId,
+            ttsSpeed: 1,
+            ttsApiKey: providerConfig?.apiKey,
+            ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl,
+          }),
+          signal: controller.signal,
+        });
+        if (!res.ok) throw new Error('TTS error');
+        const data = await res.json();
+        if (!data.base64) throw new Error('No audio');
+        const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`);
+        previewAudioRef.current = audio;
+        audio.addEventListener('ended', () => setPreviewingId(null));
+        audio.addEventListener('error', () => setPreviewingId(null));
+        await audio.play();
+      } catch {
+        setPreviewingId(null);
+      }
+    },
+    [previewingId, stopPreview, ttsProvidersConfig],
+  );
+
+  useEffect(() => () => stopPreview(), [stopPreview]);
+
+  if (disabled) {
+    return (
+      <div
+        onClick={(e) => e.stopPropagation()}
+        onPointerDown={(e) => e.stopPropagation()}
+        className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-muted/40 px-2 text-[10px] text-muted-foreground/30 shrink-0 cursor-not-allowed"
+      >
+        <VolumeX className="size-2.5 shrink-0" />
+        <span className="truncate flex-1 text-left">{displayName}</span>
+      </div>
+    );
+  }
+
+  return (
+    <Popover
+      open={popoverOpen}
+      onOpenChange={(open) => {
+        setPopoverOpen(open);
+        if (!open) stopPreview();
+      }}
+    >
+      <PopoverTrigger asChild>
+        <button
+          type="button"
+          onClick={(e) => e.stopPropagation()}
+          onPointerDown={(e) => e.stopPropagation()}
+          className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-primary/10 hover:bg-primary/20 dark:bg-primary/25 dark:hover:bg-primary/35 px-2 text-[10px] text-primary/80 hover:text-primary dark:text-primary/90 transition-colors shrink-0 cursor-pointer"
+        >
+          <Volume2 className="size-2.5 shrink-0" />
+          <span className="truncate flex-1 text-left">{displayName}</span>
+          <ChevronDown className="size-2.5 shrink-0 opacity-50" />
+        </button>
+      </PopoverTrigger>
+      <PopoverContent
+        side="bottom"
+        align="end"
+        sideOffset={4}
+        className="w-52 px-1 pb-1 pt-0 max-h-64 overflow-y-auto"
+        onClick={(e) => e.stopPropagation()}
+        onPointerDown={(e) => e.stopPropagation()}
+      >
+        {availableProviders.map((provider) => (
+          <div key={provider.providerId}>
+            <div className="text-[10px] text-muted-foreground/60 font-medium px-2 py-1 sticky top-0 bg-popover">
+              {provider.providerName}
+            </div>
+            {provider.voices.map((voice) => {
+              const isActive = ttsProviderId === provider.providerId && ttsVoice === voice.id;
+              const previewKey = `${provider.providerId}::${voice.id}`;
+              const isPreviewing = previewingId === previewKey;
+              return (
+                <div
+                  key={previewKey}
+                  className={cn(
+                    'flex items-center gap-1 rounded-sm transition-colors',
+                    isActive ? 'bg-primary/10' : 'hover:bg-muted',
+                  )}
+                >
+                  <button
+                    type="button"
+                    onClick={() => {
+                      setTTSProvider(provider.providerId);
+                      setTTSVoice(voice.id);
+                      setPopoverOpen(false);
+                    }}
+                    className={cn(
+                      'flex-1 text-left text-xs px-2 py-1 min-w-0 truncate',
+                      isActive ? 'text-primary font-medium' : 'text-foreground',
+                    )}
+                  >
+                    {voice.name}
+                  </button>
+                  <button
+                    type="button"
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      handlePreview(provider.providerId, voice.id);
+                    }}
+                    className={cn(
+                      'shrink-0 size-5 flex items-center justify-center rounded-sm transition-colors',
+                      isPreviewing
+                        ? 'text-primary'
+                        : 'text-muted-foreground/40 hover:text-muted-foreground',
+                    )}
+                  >
+                    {isPreviewing ? (
+                      <Loader2 className="size-3 animate-spin" />
+                    ) : (
+                      <Volume2 className="size-3" />
+                    )}
+                  </button>
+                </div>
+              );
+            })}
+          </div>
+        ))}
+      </PopoverContent>
+    </Popover>
+  );
+}
 
 export function AgentBar() {
   const { t } = useI18n();
@@ -20,24 +455,51 @@ export function AgentBar() {
   const setMaxTurns = useSettingsStore((s) => s.setMaxTurns);
   const agentMode = useSettingsStore((s) => s.agentMode);
   const setAgentMode = useSettingsStore((s) => s.setAgentMode);
+  const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
+  const ttsEnabled = useSettingsStore((s) => s.ttsEnabled);
 
   const [open, setOpen] = useState(false);
+  const [browserVoices, setBrowserVoices] = useState<SpeechSynthesisVoice[]>([]);
   const containerRef = useRef<HTMLDivElement>(null);
 
+  // Load browser native TTS voices
+  useEffect(() => {
+    if (typeof window === 'undefined' || !window.speechSynthesis) return;
+    const loadVoices = () => setBrowserVoices(speechSynthesis.getVoices());
+    loadVoices();
+    speechSynthesis.addEventListener('voiceschanged', loadVoices);
+    return () => speechSynthesis.removeEventListener('voiceschanged', loadVoices);
+  }, []);
+
   const allAgents = listAgents();
-  // In preset mode, only show default (non-generated) agents
   const agents = allAgents.filter((a) => !a.isGenerated);
   const teacherAgent = agents.find((a) => a.role === 'teacher');
   const selectedAgents = agents.filter((a) => selectedAgentIds.includes(a.id));
   const nonTeacherSelected = selectedAgents.filter((a) => a.role !== 'teacher');
 
-  // Click-outside to collapse
+  const serverProviders = getAvailableProvidersWithVoices(ttsProvidersConfig);
+  const availableProviders: ProviderWithVoices[] = [
+    ...serverProviders,
+    ...(browserVoices.length > 0
+      ? [
+          {
+            providerId: 'browser-native-tts' as TTSProviderId,
+            providerName: 'Browser Native',
+            voices: browserVoices.map((v) => ({ id: v.voiceURI, name: v.name })),
+          },
+        ]
+      : []),
+  ];
+  const showVoice = availableProviders.length > 0;
+
   useEffect(() => {
     if (!open) return;
     const handler = (e: MouseEvent) => {
-      if (containerRef.current && !containerRef.current.contains(e.target as Node)) {
-        setOpen(false);
-      }
+      const target = e.target as Node;
+      if (containerRef.current && containerRef.current.contains(target)) return;
+      // Don't close if clicking inside a Radix portal (Popover, Select, etc.)
+      if ((target as Element).closest?.('[data-radix-popper-content-wrapper]')) return;
+      setOpen(false);
     };
     document.addEventListener('mousedown', handler);
     return () => document.removeEventListener('mousedown', handler);
@@ -46,7 +508,6 @@ export function AgentBar() {
   const handleModeChange = (mode: 'preset' | 'auto') => {
     setAgentMode(mode);
     if (mode === 'preset') {
-      // Ensure a teacher is always selected in preset mode
       const hasTeacherSelected = selectedAgentIds.some((id) => {
         const a = agents.find((agent) => agent.id === id);
         return a?.role === 'teacher';
@@ -59,7 +520,7 @@ export function AgentBar() {
 
   const toggleAgent = (agentId: string) => {
     const agent = agents.find((a) => a.id === agentId);
-    if (agent?.role === 'teacher') return; // teacher is always selected
+    if (agent?.role === 'teacher') return;
     if (selectedAgentIds.includes(agentId)) {
       setSelectedAgentIds(selectedAgentIds.filter((id) => id !== agentId));
     } else {
@@ -79,10 +540,8 @@ export function AgentBar() {
     return translated !== key ? translated : agent.role;
   };
 
-  /* ── Shared avatar row — always visible on the right side ── */
   const avatarRow = (
     <div className="flex items-center gap-1.5 shrink-0">
-      {/* Teacher avatar — always shown */}
       {teacherAgent && (
         <div className="size-8 rounded-full overflow-hidden ring-2 ring-blue-400/40 dark:ring-blue-500/30 shrink-0">
           <img
@@ -95,7 +554,6 @@ export function AgentBar() {
 
       {agentMode === 'auto' ? (
         <>
-          {/* In auto mode: show assistant avatar + shuffle indicator */}
           <div className="flex -space-x-2">
             {agents.find((a) => a.role === 'assistant') && (
               <div className="size-6 rounded-full overflow-hidden ring-[1.5px] ring-background">
@@ -111,7 +569,6 @@ export function AgentBar() {
         </>
       ) : (
         <>
-          {/* In preset mode: show selected non-teacher agents */}
           {nonTeacherSelected.length > 0 && (
             <div className="flex -space-x-2">
               {nonTeacherSelected.slice(0, 4).map((agent) => (
@@ -137,12 +594,59 @@ export function AgentBar() {
           )}
         </>
       )}
+      {showVoice &&
+        (ttsEnabled ? (
+          <Volume2 className="size-3.5 text-muted-foreground/40 group-hover:text-muted-foreground/60 transition-colors" />
+        ) : (
+          <VolumeX className="size-3.5 text-muted-foreground/30" />
+        ))}
     </div>
   );
 
+  const renderAgentRow = (agent: AgentConfig, agentIndex: number, isTeacher: boolean) => {
+    const isSelected = isTeacher || selectedAgentIds.includes(agent.id);
+    return (
+      <div
+        key={agent.id}
+        onClick={isTeacher ? undefined : () => toggleAgent(agent.id)}
+        className={cn(
+          'w-full flex items-center gap-2 px-2.5 py-1.5 rounded-lg transition-colors',
+          isTeacher ? 'bg-primary/5' : 'cursor-pointer',
+          !isTeacher && isSelected && 'bg-primary/5',
+          !isTeacher && !isSelected && 'hover:bg-muted/50',
+        )}
+      >
+        <Checkbox
+          checked={isSelected}
+          disabled={isTeacher}
+          className={cn('pointer-events-none', isTeacher && 'opacity-50')}
+        />
+        <div
+          className="size-7 rounded-full overflow-hidden shrink-0 ring-1 ring-border/40"
+          style={{ boxShadow: isSelected ? `0 0 0 2px ${agent.color}30` : undefined }}
+        >
+          <img src={agent.avatar} alt={getAgentName(agent)} className="size-full object-cover" />
+        </div>
+        <span className="text-[13px] font-medium truncate min-w-0 flex-1">
+          {getAgentName(agent)}
+        </span>
+        <span className="text-[10px] text-muted-foreground/50 shrink-0 w-[52px] text-right">
+          {getAgentRole(agent)}
+        </span>
+        {showVoice && (
+          <AgentVoicePill
+            agent={agent}
+            agentIndex={agentIndex}
+            availableProviders={availableProviders}
+            disabled={!ttsEnabled}
+          />
+        )}
+      </div>
+    );
+  };
+
   return (
-    <div ref={containerRef} className="relative w-80">
-      {/* ── Header row — always in document flow ── */}
+    <div ref={containerRef} className="relative w-96">
       <Tooltip>
         <TooltipTrigger asChild>
           <button
@@ -152,15 +656,10 @@ export function AgentBar() {
             )}
             onClick={() => setOpen(!open)}
           >
-            {/* Left side — text changes based on open/close */}
-            <span className="text-xs text-muted-foreground/60 group-hover:text-muted-foreground transition-colors hidden sm:block font-medium flex-1 text-left">
+            <span className="text-xs text-muted-foreground/60 group-hover:text-muted-foreground transition-colors hidden sm:block font-medium flex-1 text-left truncate">
               {open ? t('agentBar.expandedTitle') : t('agentBar.readyToLearn')}
             </span>
-
-            {/* Right side — avatars always visible */}
             {avatarRow}
-
-            {/* Chevron */}
             {open ? (
               <ChevronUp className="size-3 text-muted-foreground/40 group-hover:text-muted-foreground/70 transition-colors" />
             ) : (
@@ -175,7 +674,6 @@ export function AgentBar() {
         )}
       </Tooltip>
 
-      {/* ── Expanded panel (absolute, floating below the header) ── */}
       <AnimatePresence>
         {open && (
           <motion.div
@@ -183,11 +681,36 @@ export function AgentBar() {
             animate={{ opacity: 1, y: 0, scale: 1 }}
             exit={{ opacity: 0, y: -4, scale: 0.97 }}
             transition={{ duration: 0.2, ease: [0.25, 0.1, 0.25, 1] }}
-            className="absolute right-0 top-full mt-1 z-50 w-80"
+            className="absolute right-0 top-full mt-1 z-50 w-96"
           >
-            <div className="rounded-2xl bg-white/95 dark:bg-slate-800/95 backdrop-blur-sm ring-1 ring-black/[0.04] dark:ring-white/[0.06] shadow-[0_1px_8px_-2px_rgba(0,0,0,0.06)] dark:shadow-[0_1px_8px_-2px_rgba(0,0,0,0.3)] px-2.5 py-2">
-              {/* Mode tabs — full width, 50/50 */}
-              <div className="flex rounded-lg border bg-muted/30 p-0.5 mb-2.5">
+            <div className="rounded-2xl bg-white/95 dark:bg-slate-800/95 backdrop-blur-sm ring-1 ring-black/[0.04] dark:ring-white/[0.06] shadow-[0_1px_8px_-2px_rgba(0,0,0,0.06)] dark:shadow-[0_1px_8px_-2px_rgba(0,0,0,0.3)] px-2 py-1.5">
+              {/* Teacher — always visible */}
+              {teacherAgent && (
+                <div className="flex items-center gap-2 px-2.5 py-1.5 rounded-lg bg-primary/5 mb-2">
+                  <div
+                    className="size-7 rounded-full overflow-hidden shrink-0 ring-1 ring-border/40"
+                    style={{ boxShadow: `0 0 0 2px ${teacherAgent.color}30` }}
+                  >
+                    <img
+                      src={teacherAgent.avatar}
+                      alt={getAgentName(teacherAgent)}
+                      className="size-full object-cover"
+                    />
+                  </div>
+                  <span className="text-[13px] font-medium truncate min-w-0 flex-1">
+                    {getAgentName(teacherAgent)}
+                  </span>
+                  {showVoice && (
+                    <TeacherVoicePill
+                      availableProviders={availableProviders}
+                      disabled={!ttsEnabled}
+                    />
+                  )}
+                </div>
+              )}
+
+              {/* Mode tabs */}
+              <div className="flex rounded-lg border bg-muted/30 p-0.5 mb-2">
                 <button
                   onClick={() => handleModeChange('preset')}
                   className={cn(
@@ -214,86 +737,79 @@ export function AgentBar() {
               </div>
 
               {agentMode === 'preset' ? (
-                /* Agent list — teacher is always selected, no need to show */
-                <div className="max-h-72 overflow-y-auto -mx-1">
+                <div className="max-h-56 overflow-y-auto -mx-0.5">
                   {agents
                     .filter((a) => a.role !== 'teacher')
-                    .map((agent) => {
-                      const isSelected = selectedAgentIds.includes(agent.id);
-                      return (
-                        <div
-                          key={agent.id}
-                          onClick={() => toggleAgent(agent.id)}
-                          className={cn(
-                            'w-full flex items-center gap-3 px-3 py-2 text-left transition-colors cursor-pointer rounded-lg',
-                            isSelected ? 'bg-primary/5' : 'hover:bg-muted/50',
-                          )}
-                        >
-                          <Checkbox checked={isSelected} className="pointer-events-none" />
-                          <div
-                            className="size-8 rounded-full overflow-hidden shrink-0 ring-1 ring-border/40"
-                            style={{
-                              boxShadow: isSelected ? `0 0 0 2px ${agent.color}30` : undefined,
-                            }}
-                          >
-                            <img
-                              src={agent.avatar}
-                              alt={getAgentName(agent)}
-                              className="size-full object-cover"
-                            />
-                          </div>
-                          <div className="flex-1 min-w-0">
-                            <div className="text-sm font-medium flex items-center gap-1.5">
-                              {getAgentName(agent)}
-                              <span className="text-[10px] text-muted-foreground/50 font-normal">
-                                {getAgentRole(agent)}
-                              </span>
-                            </div>
-                            {(() => {
-                              const descKey = `settings.agentDescriptions.${agent.id}`;
-                              const desc = t(descKey);
-                              return desc !== descKey ? (
-                                <p className="text-xs text-muted-foreground/60 mt-0.5 leading-relaxed">
-                                  {desc}
-                                </p>
-                              ) : null;
-                            })()}
-                          </div>
-                        </div>
-                      );
-                    })}
+                    .map((agent, idx) => renderAgentRow(agent, idx + 1, false))}
                 </div>
               ) : (
-                /* Auto-generate mode */
-                <div className="flex flex-col items-center pt-6 pb-2 gap-8">
-                  {/* Shuffle icon with ambient animation */}
+                <div className="flex flex-col items-center pt-6 pb-3 gap-4">
                   <div className="relative flex items-center justify-center">
-                    {/* Ping ripple */}
-                    <div className="absolute size-12 rounded-full bg-violet-400/10 dark:bg-violet-400/15 animate-ping [animation-duration:3s]" />
-                    {/* Soft glow ring */}
-                    <div className="absolute size-14 rounded-full bg-violet-400/5 dark:bg-violet-400/10 animate-pulse [animation-duration:2.5s]" />
-                    {/* Icon */}
-                    <Shuffle className="relative size-7 text-violet-400 dark:text-violet-500" />
+                    <div className="absolute size-10 rounded-full bg-violet-400/10 dark:bg-violet-400/15 animate-ping [animation-duration:3s]" />
+                    <div className="absolute size-12 rounded-full bg-violet-400/5 dark:bg-violet-400/10 animate-pulse [animation-duration:2.5s]" />
+                    <Shuffle className="relative size-5 text-violet-400 dark:text-violet-500" />
+                  </div>
+                  <div className="flex-1" />
+                  <div className="text-center space-y-1">
+                    <p className="text-[11px] text-muted-foreground/60">
+                      {t('settings.agentModeAutoDesc')}
+                    </p>
+                    <p className="text-[10px] text-muted-foreground/40">
+                      {t('agentBar.voiceAutoAssign')}
+                    </p>
                   </div>
-                  <p className="text-xs text-muted-foreground text-center">
-                    {t('settings.agentModeAutoDesc')}
-                  </p>
                 </div>
               )}
 
-              {/* Max turns — always visible */}
-              <div className="pt-2.5 mt-2.5 border-t flex items-center gap-3">
-                <span className="text-xs text-muted-foreground shrink-0">
+              {/* Max turns — compact stepper */}
+              <div className="flex items-center gap-1.5 px-2 py-1 mt-1 border-t border-border/30">
+                <MessageSquare className="size-3 text-muted-foreground/40 shrink-0" />
+                <span className="text-[11px] text-muted-foreground/50 flex-1">
                   {t('settings.maxTurns')}
                 </span>
-                <Input
-                  type="number"
-                  min="1"
-                  max="20"
-                  value={maxTurns}
-                  onChange={(e) => setMaxTurns(e.target.value)}
-                  className="w-16 h-7 text-xs"
-                />
+                <div className="flex items-center rounded-full bg-muted/50 h-5 shrink-0">
+                  <button
+                    type="button"
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      const v = Math.max(1, parseInt(maxTurns || '1') - 1);
+                      setMaxTurns(String(v));
+                    }}
+                    className="size-5 flex items-center justify-center text-muted-foreground/60 hover:text-foreground transition-colors rounded-full hover:bg-muted"
+                  >
+                    <Minus className="size-2.5" />
+                  </button>
+                  <input
+                    type="text"
+                    inputMode="numeric"
+                    value={maxTurns}
+                    onChange={(e) => {
+                      const raw = e.target.value.replace(/\D/g, '');
+                      if (!raw) {
+                        setMaxTurns('');
+                        return;
+                      }
+                      const v = Math.min(20, Math.max(1, parseInt(raw)));
+                      setMaxTurns(String(v));
+                    }}
+                    onBlur={() => {
+                      if (!maxTurns || parseInt(maxTurns) < 1) setMaxTurns('1');
+                    }}
+                    onClick={(e) => e.stopPropagation()}
+                    className="w-5 h-5 text-[11px] font-medium tabular-nums text-center bg-transparent outline-none border-none"
+                  />
+                  <button
+                    type="button"
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      const v = Math.min(20, parseInt(maxTurns || '1') + 1);
+                      setMaxTurns(String(v));
+                    }}
+                    className="size-5 flex items-center justify-center text-muted-foreground/60 hover:text-foreground transition-colors rounded-full hover:bg-muted"
+                  >
+                    <Plus className="size-2.5" />
+                  </button>
+                </div>
               </div>
             </div>
           </motion.div>
diff --git a/components/chat/chat-area.tsx b/components/chat/chat-area.tsx
index 03ebd8ecc..eae2efb81 100644
--- a/components/chat/chat-area.tsx
+++ b/components/chat/chat-area.tsx
@@ -27,6 +27,14 @@ interface ChatAreaProps {
   onThinking?: (state: { stage: string; agentId?: string } | null) => void;
   onCueUser?: (fromAgentId?: string, prompt?: string) => void;
   onStopSession?: () => void;
+  onSegmentSealed?: (
+    messageId: string,
+    partId: string,
+    fullText: string,
+    agentId: string | null,
+  ) => void;
+  /** When provided and returns true, StreamBuffer holds on the current text item after reveal. */
+  shouldHoldAfterReveal?: () => boolean;
   currentSceneId?: string | null;
 }
 
@@ -69,6 +77,8 @@ export const ChatArea = forwardRef<ChatAreaRef, ChatAreaProps>(
       onThinking,
       onCueUser,
       onStopSession,
+      onSegmentSealed,
+      shouldHoldAfterReveal,
       currentSceneId,
     },
     ref,
@@ -102,6 +112,8 @@ export const ChatArea = forwardRef<ChatAreaRef, ChatAreaProps>(
       onCueUser,
       onActiveBubble,
       onStopSession,
+      onSegmentSealed,
+      shouldHoldAfterReveal,
     });
 
     const [activeTab, setActiveTab] = useState<'lecture' | 'chat'>('lecture');
diff --git a/components/chat/use-chat-sessions.ts b/components/chat/use-chat-sessions.ts
index be39df01d..c78d269bb 100644
--- a/components/chat/use-chat-sessions.ts
+++ b/components/chat/use-chat-sessions.ts
@@ -36,6 +36,14 @@ interface UseChatSessionsOptions {
   onActiveBubble?: (messageId: string | null) => void;
   /** Called when a QA/Discussion session completes naturally (director end). */
   onStopSession?: () => void;
+  onSegmentSealed?: (
+    messageId: string,
+    partId: string,
+    fullText: string,
+    agentId: string | null,
+  ) => void;
+  /** When provided and returns true, StreamBuffer holds on the current text item after reveal. */
+  shouldHoldAfterReveal?: () => boolean;
 }
 
 export function useChatSessions(options: UseChatSessionsOptions = {}) {
@@ -45,6 +53,8 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
   const onCueUserRef = useRef(options.onCueUser);
   const onActiveBubbleRef = useRef(options.onActiveBubble);
   const onStopSessionRef = useRef(options.onStopSession);
+  const onSegmentSealedRef = useRef(options.onSegmentSealed);
+  const shouldHoldAfterRevealRef = useRef(options.shouldHoldAfterReveal);
   useEffect(() => {
     onLiveSpeechRef.current = options.onLiveSpeech;
     onSpeechProgressRef.current = options.onSpeechProgress;
@@ -52,6 +62,8 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
     onCueUserRef.current = options.onCueUser;
     onActiveBubbleRef.current = options.onActiveBubble;
     onStopSessionRef.current = options.onStopSession;
+    onSegmentSealedRef.current = options.onSegmentSealed;
+    shouldHoldAfterRevealRef.current = options.shouldHoldAfterReveal;
   }, [
     options.onLiveSpeech,
     options.onSpeechProgress,
@@ -59,6 +71,8 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
     options.onCueUser,
     options.onActiveBubble,
     options.onStopSession,
+    options.onSegmentSealed,
+    options.shouldHoldAfterReveal,
   ]);
   const { t } = useI18n();
 
@@ -321,6 +335,19 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
           onError(message: string) {
             log.error('[Buffer] Stream error:', message);
           },
+
+          onSegmentSealed(
+            messageId: string,
+            partId: string,
+            fullText: string,
+            agentId: string | null,
+          ) {
+            onSegmentSealedRef.current?.(messageId, partId, fullText, agentId);
+          },
+
+          shouldHoldAfterReveal() {
+            return shouldHoldAfterRevealRef.current?.() ?? false;
+          },
         },
         pacingOptions,
       );
diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx
index 496ec9fe0..309a12acc 100644
--- a/components/generation/media-popover.tsx
+++ b/components/generation/media-popover.tsx
@@ -416,56 +416,9 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) {
               enabled={ttsEnabled}
               onToggle={setTTSEnabled}
             >
-              {/* Provider + Voice grouped select + preview */}
-              <div className="flex items-center gap-2">
-                <div className="flex-1 min-w-0">
-                  <GroupedSelect
-                    groups={ttsGroups}
-                    selectedGroupId={ttsProviderId}
-                    selectedItemId={ttsVoice}
-                    onSelect={(gid, iid) => {
-                      if (gid !== ttsProviderId) {
-                        setTTSProvider(gid as TTSProviderId);
-                      }
-                      setTTSVoice(iid);
-                    }}
-                  />
-                </div>
-                <button
-                  onClick={handlePreview}
-                  className={cn(
-                    'inline-flex items-center gap-1 rounded-md px-2 py-1.5 text-[11px] font-medium transition-all shrink-0',
-                    previewing
-                      ? 'bg-violet-100 dark:bg-violet-900/40 text-violet-700 dark:text-violet-300'
-                      : 'bg-muted/60 text-muted-foreground hover:bg-muted hover:text-foreground',
-                  )}
-                >
-                  {previewing ? (
-                    <Loader2 className="size-3 animate-spin" />
-                  ) : (
-                    <Play className="size-3" />
-                  )}
-                  {previewing ? t('toolbar.ttsPreviewing') : t('toolbar.ttsPreview')}
-                </button>
-              </div>
-              {ttsSpeedRange && (
-                <div className="flex items-center gap-2.5 mt-2.5">
-                  <span className="text-[10px] text-muted-foreground/60 shrink-0">
-                    {t('media.speed')}
-                  </span>
-                  <Slider
-                    value={[ttsSpeed]}
-                    onValueChange={(value) => setTTSSpeed(value[0])}
-                    min={ttsSpeedRange.min}
-                    max={ttsSpeedRange.max}
-                    step={0.1}
-                    className="flex-1"
-                  />
-                  <span className="text-[10px] text-muted-foreground tabular-nums w-7 text-right">
-                    {ttsSpeed.toFixed(1)}x
-                  </span>
-                </div>
-              )}
+              <p className="text-[11px] text-muted-foreground/60">
+                {t('settings.ttsVoiceConfigHint')}
+              </p>
             </TabPanel>
           )}
 
diff --git a/components/roundtable/audio-indicator.tsx b/components/roundtable/audio-indicator.tsx
new file mode 100644
index 000000000..4b630a1d0
--- /dev/null
+++ b/components/roundtable/audio-indicator.tsx
@@ -0,0 +1,43 @@
+'use client';
+
+import { motion } from 'motion/react';
+
+export type AudioIndicatorState = 'idle' | 'generating' | 'playing';
+
+interface AudioIndicatorProps {
+  state: AudioIndicatorState;
+  agentColor?: string;
+}
+
+const BAR_COUNT = 4;
+
+export function AudioIndicator({ state, agentColor = '#10b981' }: AudioIndicatorProps) {
+  if (state === 'idle') return null;
+
+  const color = state === 'generating' ? 'rgba(251, 191, 36, 0.7)' : agentColor;
+  const cycleDuration = state === 'generating' ? 0.8 : 0.5;
+
+  return (
+    <span className="inline-flex items-end gap-[2px]" style={{ height: 12 }}>
+      {Array.from({ length: BAR_COUNT }).map((_, i) => (
+        <motion.span
+          key={i}
+          style={{
+            width: 2,
+            borderRadius: 1,
+            backgroundColor: color,
+          }}
+          animate={{
+            height: [4, 10 + (i % 2) * 2, 4],
+          }}
+          transition={{
+            duration: cycleDuration,
+            repeat: Infinity,
+            ease: 'easeInOut',
+            delay: i * (cycleDuration / BAR_COUNT),
+          }}
+        />
+      ))}
+    </span>
+  );
+}
diff --git a/components/roundtable/index.tsx b/components/roundtable/index.tsx
index 9ab09939b..31a3cd581 100644
--- a/components/roundtable/index.tsx
+++ b/components/roundtable/index.tsx
@@ -16,6 +16,8 @@ import {
   Loader2,
 } from 'lucide-react';
 import { cn } from '@/lib/utils';
+import { AudioIndicator } from './audio-indicator';
+import type { AudioIndicatorState } from './audio-indicator';
 import { CanvasToolbar } from '@/components/canvas/canvas-toolbar';
 import { useAudioRecorder } from '@/lib/hooks/use-audio-recorder';
 import { useI18n } from '@/lib/hooks/use-i18n';
@@ -47,6 +49,8 @@ interface RoundtableProps {
   readonly isStreaming?: boolean;
   readonly sessionType?: 'qa' | 'discussion';
   readonly speakingAgentId?: string | null;
+  readonly audioIndicatorState?: AudioIndicatorState;
+  readonly audioAgentId?: string | null;
   readonly speechProgress?: number | null; // StreamBuffer reveal progress (0–1) for auto-scroll
   readonly showEndFlash?: boolean;
   readonly endFlashSessionType?: 'qa' | 'discussion';
@@ -110,6 +114,8 @@ export function Roundtable({
   isStreaming,
   sessionType,
   speakingAgentId,
+  audioIndicatorState,
+  audioAgentId,
   speechProgress: _speechProgress,
   showEndFlash,
   endFlashSessionType = 'discussion',
@@ -456,7 +462,10 @@ export function Roundtable({
         ttsEnabled={ttsEnabled}
         ttsMuted={ttsMuted}
         ttsVolume={ttsVolume}
-        onToggleMute={() => ttsEnabled && setTTSMuted(!ttsMuted)}
+        onToggleMute={() => {
+          if (!ttsEnabled) return;
+          setTTSMuted(!ttsMuted);
+        }}
         onVolumeChange={(v) => setTTSVolume(v)}
         autoPlayLecture={autoPlayLecture}
         onToggleAutoPlay={() => setAutoPlayLecture(!autoPlayLecture)}
@@ -1020,6 +1029,29 @@ export function Roundtable({
                         })()}
 
                       <div ref={bubbleScrollRef} className="overflow-y-auto scrollbar-hide">
+                        {/* Agent name + audio indicator header */}
+                        {bubbleRole !== 'user' && bubbleName && (
+                          <div className="flex items-center gap-1 mb-0.5">
+                            <span className="text-[10px] font-semibold text-gray-400 dark:text-gray-500 truncate">
+                              {bubbleName}
+                            </span>
+                            <AudioIndicator
+                              state={
+                                speakingAgentId === audioAgentId
+                                  ? (audioIndicatorState ?? 'idle')
+                                  : 'idle'
+                              }
+                              agentColor={
+                                bubbleRole === 'agent'
+                                  ? (useAgentRegistry.getState().getAgent(speakingAgentId || '')
+                                      ?.color ?? undefined)
+                                  : (useAgentRegistry
+                                      .getState()
+                                      .getAgent(teacherParticipant?.id || '')?.color ?? undefined)
+                              }
+                            />
+                          </div>
+                        )}
                         {isBubbleLoading ? (
                           <div className="flex gap-1 items-center py-1">
                             <motion.div
diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx
index d8fe1e3ce..dd69eed23 100644
--- a/components/settings/audio-settings.tsx
+++ b/components/settings/audio-settings.tsx
@@ -10,7 +10,6 @@ import {
   SelectTrigger,
   SelectValue,
 } from '@/components/ui/select';
-import { Slider } from '@/components/ui/slider';
 import { Switch } from '@/components/ui/switch';
 import { Button } from '@/components/ui/button';
 import { useI18n } from '@/lib/hooks/use-i18n';
@@ -22,15 +21,10 @@ import {
   getASRSupportedLanguages,
 } from '@/lib/audio/constants';
 import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types';
-import { Volume2, Mic, MicOff, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
+import { Volume2, Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
 import { cn } from '@/lib/utils';
 import azureVoicesData from '@/lib/audio/azure.json';
 import { createLogger } from '@/lib/logger';
-import {
-  ensureVoicesLoaded,
-  isBrowserTTSAbortError,
-  playBrowserTTSPreview,
-} from '@/lib/audio/browser-tts-preview';
 
 const log = createLogger('AudioSettings');
 
@@ -75,11 +69,9 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
   // TTS state
   const ttsProviderId = useSettingsStore((state) => state.ttsProviderId);
   const ttsVoice = useSettingsStore((state) => state.ttsVoice);
-  const ttsSpeed = useSettingsStore((state) => state.ttsSpeed);
   const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig);
   const setTTSProvider = useSettingsStore((state) => state.setTTSProvider);
   const setTTSVoice = useSettingsStore((state) => state.setTTSVoice);
-  const setTTSSpeed = useSettingsStore((state) => state.setTTSSpeed);
   const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig);
 
   // ASR state
@@ -106,16 +98,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
     onSave?.();
   };
 
-  const handleTTSVoiceChange = (voice: string) => {
-    setTTSVoice(voice);
-    onSave?.();
-  };
-
-  const handleTTSSpeedChange = (speed: number) => {
-    setTTSSpeed(speed);
-    onSave?.();
-  };
-
   const handleTTSProviderConfigChange = (
     providerId: TTSProviderId,
     config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
@@ -150,12 +132,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
   const [selectedLocale, setSelectedLocale] = useState<string>('all');
 
   // Test state
-  const [testingTTS, setTestingTTS] = useState(false);
-  const [testText, setTestText] = useState(t('settings.ttsTestTextDefault'));
-  const [ttsTestStatus, setTTSTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>(
-    'idle',
-  );
-  const [ttsTestMessage, setTTSTestMessage] = useState('');
   const [isRecording, setIsRecording] = useState(false);
   const [asrResult, setASRResult] = useState('');
   const [asrTestStatus, setASRTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>(
@@ -170,13 +146,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
 
   const asrProvider = ASR_PROVIDERS[asrProviderId] ?? ASR_PROVIDERS['openai-whisper'];
 
-  // Update test text when language changes (derived state pattern)
-  const [prevT, setPrevT] = useState(() => t);
-  if (t !== prevT) {
-    setPrevT(t);
-    setTestText(t('settings.ttsTestTextDefault'));
-  }
-
   // Reset locale filter when provider changes (derived state pattern)
   const [prevTTSProviderId, setPrevTTSProviderId] = useState(ttsProviderId);
   if (ttsProviderId !== prevTTSProviderId) {
@@ -186,7 +155,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
     }
   }
 
-  const stopTTSPreview = useCallback((resetState = true) => {
+  const stopTTSPreview = useCallback(() => {
     ttsTestRequestIdRef.current += 1;
     browserPreviewCancelRef.current?.();
     browserPreviewCancelRef.current = null;
@@ -198,9 +167,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
       URL.revokeObjectURL(audioUrlRef.current);
       audioUrlRef.current = null;
     }
-    if (resetState) {
-      setTestingTTS(false);
-    }
   }, []);
 
   // Update voice selection when locale filter changes
@@ -222,9 +188,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
   }, [selectedLocale, ttsProviderId, azureVoices, setTTSVoice]);
 
   useEffect(() => {
-    stopTTSPreview(false);
-    setTTSTestStatus('idle');
-    setTTSTestMessage('');
+    stopTTSPreview();
   }, [ttsProviderId, stopTTSPreview]);
 
   // Initialize and reset TTS voice when provider changes
@@ -275,7 +239,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
 
   useEffect(() => {
     return () => {
-      stopTTSPreview(false);
+      stopTTSPreview();
     };
   }, [stopTTSPreview]);
 
@@ -288,123 +252,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
     setASRResult('');
   }
 
-  // Test TTS
-  const handleTestTTS = async () => {
-    if (!testText.trim()) {
-      return;
-    }
-
-    const requestId = ttsTestRequestIdRef.current + 1;
-    ttsTestRequestIdRef.current = requestId;
-
-    setTestingTTS(true);
-    setTTSTestStatus('testing');
-    setTTSTestMessage('');
-
-    try {
-      if (ttsProviderId === 'browser-native-tts') {
-        if (!('speechSynthesis' in window)) {
-          setTTSTestStatus('error');
-          setTTSTestMessage(t('settings.browserTTSNotSupported'));
-          return;
-        }
-
-        const voices = await ensureVoicesLoaded();
-        if (ttsTestRequestIdRef.current !== requestId) {
-          return;
-        }
-        if (voices.length === 0) {
-          setTTSTestStatus('error');
-          setTTSTestMessage(t('settings.browserTTSNoVoices'));
-          return;
-        }
-
-        const controller = playBrowserTTSPreview({
-          text: testText,
-          voice: ttsVoice,
-          rate: ttsSpeed,
-          voices,
-        });
-        browserPreviewCancelRef.current = controller.cancel;
-        await controller.promise;
-
-        if (ttsTestRequestIdRef.current !== requestId) {
-          return;
-        }
-        setTTSTestStatus('success');
-        setTTSTestMessage(t('settings.ttsTestSuccess'));
-        return;
-      }
-
-      const requestBody: Record<string, unknown> = {
-        text: testText,
-        audioId: 'tts-test',
-        ttsProviderId,
-        ttsVoice: ttsVoice,
-        ttsSpeed: ttsSpeed,
-      };
-
-      const apiKeyValue = ttsProvidersConfig[ttsProviderId]?.apiKey;
-      if (apiKeyValue && apiKeyValue.trim()) {
-        requestBody.ttsApiKey = apiKeyValue;
-      }
-
-      const baseUrlValue = ttsProvidersConfig[ttsProviderId]?.baseUrl;
-      if (baseUrlValue && baseUrlValue.trim()) {
-        requestBody.ttsBaseUrl = baseUrlValue;
-      }
-
-      const response = await fetch('/api/generate/tts', {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify(requestBody),
-      });
-
-      const data = await response
-        .json()
-        .catch(() => ({ success: false, error: response.statusText }));
-      if (ttsTestRequestIdRef.current !== requestId) {
-        return;
-      }
-      if (response.ok && data.success) {
-        const binaryStr = atob(data.base64);
-        const bytes = new Uint8Array(binaryStr.length);
-        for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i);
-        const audioBlob = new Blob([bytes], { type: `audio/${data.format}` });
-        if (audioUrlRef.current) {
-          URL.revokeObjectURL(audioUrlRef.current);
-        }
-        const audioUrl = URL.createObjectURL(audioBlob);
-        audioUrlRef.current = audioUrl;
-        if (audioRef.current) {
-          audioRef.current.src = audioUrl;
-          await audioRef.current.play();
-        }
-        setTTSTestStatus('success');
-        setTTSTestMessage(t('settings.ttsTestSuccess'));
-      } else {
-        setTTSTestStatus('error');
-        setTTSTestMessage(data.error || t('settings.ttsTestFailed'));
-      }
-    } catch (error) {
-      if (ttsTestRequestIdRef.current !== requestId || isBrowserTTSAbortError(error)) {
-        return;
-      }
-      log.error('TTS test failed:', error);
-      setTTSTestStatus('error');
-      setTTSTestMessage(
-        error instanceof Error && error.message
-          ? `${t('settings.ttsTestFailed')}: ${error.message}`
-          : t('settings.ttsTestFailed'),
-      );
-    } finally {
-      if (ttsTestRequestIdRef.current === requestId) {
-        browserPreviewCancelRef.current = null;
-        setTestingTTS(false);
-      }
-    }
-  };
-
   // Test ASR
   const handleToggleASRRecording = async () => {
     if (isRecording) {
@@ -571,6 +418,8 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
             ttsEnabled ? 'opacity-100' : 'opacity-40 max-h-0 pointer-events-none',
           )}
         >
+          <p className="text-xs text-muted-foreground">{t('settings.ttsVoiceConfigHint')}</p>
+
           <div className="space-y-2">
             <Label className="text-sm">{t('settings.ttsProvider')}</Label>
             <Select
@@ -602,296 +451,50 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
 
           {(ttsProvider.requiresApiKey ||
             ttsProvidersConfig[ttsProviderId]?.isServerConfigured) && (
-            <>
-              <div className="grid grid-cols-2 gap-4">
-                <div className="space-y-2">
-                  <Label className="text-sm">{t('settings.ttsApiKey')}</Label>
-                  <div className="relative">
-                    <Input
-                      type={showTTSApiKey ? 'text' : 'password'}
-                      placeholder={
-                        ttsProvidersConfig[ttsProviderId]?.isServerConfigured
-                          ? t('settings.optionalOverride')
-                          : t('settings.enterApiKey')
-                      }
-                      value={ttsProvidersConfig[ttsProviderId]?.apiKey || ''}
-                      onChange={(e) =>
-                        handleTTSProviderConfigChange(ttsProviderId, {
-                          apiKey: e.target.value,
-                        })
-                      }
-                      className="font-mono text-sm pr-10"
-                    />
-                    <button
-                      type="button"
-                      onClick={() => setShowTTSApiKey(!showTTSApiKey)}
-                      className="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
-                    >
-                      {showTTSApiKey ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
-                    </button>
-                  </div>
-                </div>
-
-                <div className="space-y-2">
-                  <Label className="text-sm">{t('settings.ttsBaseUrl')}</Label>
+            <div className="grid grid-cols-2 gap-4">
+              <div className="space-y-2">
+                <Label className="text-sm">{t('settings.ttsApiKey')}</Label>
+                <div className="relative">
                   <Input
-                    placeholder={ttsProvider.defaultBaseUrl || t('settings.enterCustomBaseUrl')}
-                    value={ttsProvidersConfig[ttsProviderId]?.baseUrl || ''}
+                    type={showTTSApiKey ? 'text' : 'password'}
+                    placeholder={
+                      ttsProvidersConfig[ttsProviderId]?.isServerConfigured
+                        ? t('settings.optionalOverride')
+                        : t('settings.enterApiKey')
+                    }
+                    value={ttsProvidersConfig[ttsProviderId]?.apiKey || ''}
                     onChange={(e) =>
                       handleTTSProviderConfigChange(ttsProviderId, {
-                        baseUrl: e.target.value,
+                        apiKey: e.target.value,
                       })
                     }
-                    className="text-sm"
+                    className="font-mono text-sm pr-10"
                   />
+                  <button
+                    type="button"
+                    onClick={() => setShowTTSApiKey(!showTTSApiKey)}
+                    className="absolute right-2 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground"
+                  >
+                    {showTTSApiKey ? <EyeOff className="h-4 w-4" /> : <Eye className="h-4 w-4" />}
+                  </button>
                 </div>
               </div>
-              {(() => {
-                const effectiveBaseUrl =
-                  ttsProvidersConfig[ttsProviderId]?.baseUrl || ttsProvider.defaultBaseUrl || '';
-                if (!effectiveBaseUrl) return null;
-
-                // Get endpoint path based on provider
-                let endpointPath = '';
-                switch (ttsProviderId) {
-                  case 'openai-tts':
-                  case 'glm-tts':
-                    endpointPath = '/audio/speech';
-                    break;
-                  case 'azure-tts':
-                    endpointPath = '/cognitiveservices/v1';
-                    break;
-                  case 'qwen-tts':
-                    endpointPath = '/services/aigc/multimodal-generation/generation';
-                    break;
-                  default:
-                    endpointPath = '';
-                }
-
-                if (!endpointPath) return null;
-                const fullUrl = effectiveBaseUrl + endpointPath;
-                return (
-                  <p className="text-xs text-muted-foreground break-all">
-                    {t('settings.requestUrl')}: {fullUrl}
-                  </p>
-                );
-              })()}
-            </>
-          )}
-
-          {/* Voice Selection Row */}
-          <div
-            className="grid gap-4"
-            style={{
-              gridTemplateColumns:
-                ttsProviderId === 'azure-tts' ? '280px 280px 200px' : '280px 200px',
-            }}
-          >
-            {/* Language Filter for Azure TTS */}
-            {ttsProviderId === 'azure-tts' && (
-              <div className="space-y-2">
-                <Label className="text-sm">{t('settings.ttsLanguageFilter')}</Label>
-                <Select value={selectedLocale} onValueChange={setSelectedLocale}>
-                  <SelectTrigger className="w-full">
-                    <SelectValue />
-                  </SelectTrigger>
-                  <SelectContent>
-                    <SelectItem value="all">{t('settings.allLanguages')}</SelectItem>
-                    {(() => {
-                      // Extract unique locales from Azure voices
-                      const uniqueLocales = Array.from(
-                        new Set(azureVoices.map((voice) => voice.Locale)),
-                      );
-
-                      // Sort: Chinese dialects first, then other major languages, then alphabetically
-                      const sortedLocales = uniqueLocales.sort((a, b) => {
-                        // Get LocaleName for both locales
-                        const voiceA = azureVoices.find((v) => v.Locale === a);
-                        const voiceB = azureVoices.find((v) => v.Locale === b);
-                        const localeNameA = voiceA?.LocaleName || a;
-                        const localeNameB = voiceB?.LocaleName || b;
-
-                        // Check if LocaleName contains "Chinese" (case-insensitive)
-                        const aIsChinese = /chinese/i.test(localeNameA);
-                        const bIsChinese = /chinese/i.test(localeNameB);
-
-                        // Both are Chinese - sort by priority
-                        if (aIsChinese && bIsChinese) {
-                          const chinesePriority = [
-                            'zh-CN', // Chinese (Simplified, China)
-                            'zh-CN-liaoning', // Chinese (Northeastern Mandarin, Liaoning)
-                            'zh-CN-shaanxi', // Chinese (Shaanxi dialect)
-                            'wuu-CN', // Chinese (Wu, China)
-                            'zh-HK', // Chinese (Cantonese, Hong Kong)
-                            'yue-CN', // Chinese (Cantonese, China)
-                            'zh-CN-shandong', // Chinese (Jinan dialect, Shandong)
-                            'zh-CN-sichuan', // Chinese (Sichuan dialect)
-                            'zh-TW', // Chinese (Taiwanese Mandarin)
-                          ];
-                          const aIndex = chinesePriority.indexOf(a);
-                          const bIndex = chinesePriority.indexOf(b);
-
-                          if (aIndex !== -1 && bIndex !== -1) return aIndex - bIndex;
-                          if (aIndex !== -1) return -1;
-                          if (bIndex !== -1) return 1;
-                          return localeNameA.localeCompare(localeNameB);
-                        }
-
-                        // Only a is Chinese
-                        if (aIsChinese) return -1;
-                        // Only b is Chinese
-                        if (bIsChinese) return 1;
-
-                        // Neither is Chinese - sort by priority for other major languages
-                        const otherPriority = [
-                          'en-US',
-                          'en-GB',
-                          'ja-JP',
-                          'ko-KR',
-                          'es-ES',
-                          'fr-FR',
-                          'de-DE',
-                          'ru-RU',
-                          'ar-SA',
-                          'pt-BR',
-                          'it-IT',
-                        ];
-                        const aIndex = otherPriority.indexOf(a);
-                        const bIndex = otherPriority.indexOf(b);
-
-                        if (aIndex !== -1 && bIndex !== -1) return aIndex - bIndex;
-                        if (aIndex !== -1) return -1;
-                        if (bIndex !== -1) return 1;
-
-                        // Sort alphabetically
-                        return a.localeCompare(b);
-                      });
-
-                      return sortedLocales.map((locale) => {
-                        // Find a voice with this locale to get the LocaleName
-                        const voiceWithLocale = azureVoices.find((v) => v.Locale === locale);
-                        const localeName = voiceWithLocale?.LocaleName || locale;
-                        return (
-                          <SelectItem key={locale} value={locale}>
-                            {localeName}
-                          </SelectItem>
-                        );
-                      });
-                    })()}
-                  </SelectContent>
-                </Select>
-              </div>
-            )}
-
-            <div className="space-y-2">
-              <Label className="text-sm">{t('settings.ttsVoice')}</Label>
-              <Select value={ttsVoice} onValueChange={handleTTSVoiceChange}>
-                <SelectTrigger className="w-full">
-                  <SelectValue />
-                </SelectTrigger>
-                <SelectContent>
-                  {(() => {
-                    // For Azure TTS, use JSON data
-                    if (ttsProviderId === 'azure-tts') {
-                      // Filter voices by selected locale
-                      const filteredVoices =
-                        selectedLocale === 'all'
-                          ? azureVoices
-                          : azureVoices.filter((voice) => voice.Locale === selectedLocale);
-
-                      return filteredVoices.map((voice) => (
-                        <SelectItem key={voice.ShortName} value={voice.ShortName}>
-                          {voice.LocalName} ({voice.DisplayName})
-                        </SelectItem>
-                      ));
-                    }
-
-                    // For other providers, use static voices
-                    const allVoices = getTTSVoices(ttsProviderId);
-                    return allVoices.map((voice) => (
-                      <SelectItem key={voice.id} value={voice.id}>
-                        {voice.name}
-                        {voice.description && ` - ${t(`settings.${voice.description}`)}`}
-                      </SelectItem>
-                    ));
-                  })()}
-                </SelectContent>
-              </Select>
-            </div>
 
-            {ttsProvider.speedRange && (
               <div className="space-y-2">
-                <Label className="text-sm">{t('settings.ttsSpeed')}</Label>
-                <div className="flex items-center gap-3">
-                  <Slider
-                    value={[ttsSpeed]}
-                    onValueChange={(value) => handleTTSSpeedChange(value[0])}
-                    min={ttsProvider.speedRange.min}
-                    max={ttsProvider.speedRange.max}
-                    step={0.1}
-                    className="flex-1"
-                  />
-                  <span className="text-xs text-muted-foreground min-w-[3rem] text-right">
-                    {ttsSpeed.toFixed(1)}x
-                  </span>
-                </div>
-              </div>
-            )}
-          </div>
-
-          {/* Test TTS Section */}
-          <div className="space-y-2">
-            <Label className="text-sm">{t('settings.testTTS')}</Label>
-            <div className="flex gap-2">
-              <Input
-                placeholder={t('settings.ttsTestTextPlaceholder')}
-                value={testText}
-                onChange={(e) => setTestText(e.target.value)}
-                className="flex-1"
-              />
-              <Button
-                onClick={handleTestTTS}
-                disabled={
-                  testingTTS ||
-                  !testText.trim() ||
-                  (ttsProvider.requiresApiKey &&
-                    !ttsProvidersConfig[ttsProviderId]?.apiKey?.trim() &&
-                    !ttsProvidersConfig[ttsProviderId]?.isServerConfigured)
-                }
-                size="default"
-                className="gap-2 w-32"
-              >
-                {testingTTS ? (
-                  <Loader2 className="h-4 w-4 animate-spin" />
-                ) : (
-                  <Volume2 className="h-4 w-4" />
-                )}
-                {t('settings.testTTS')}
-              </Button>
-            </div>
-          </div>
-
-          {ttsTestMessage && (
-            <div
-              className={cn(
-                'rounded-lg p-3 text-sm overflow-hidden',
-                ttsTestStatus === 'success' &&
-                  'bg-green-50 text-green-700 border border-green-200 dark:bg-green-950/50 dark:text-green-400 dark:border-green-800',
-                ttsTestStatus === 'error' &&
-                  'bg-red-50 text-red-700 border border-red-200 dark:bg-red-950/50 dark:text-red-400 dark:border-red-800',
-              )}
-            >
-              <div className="flex items-start gap-2 min-w-0">
-                {ttsTestStatus === 'success' && (
-                  <CheckCircle2 className="h-4 w-4 mt-0.5 shrink-0" />
-                )}
-                {ttsTestStatus === 'error' && <XCircle className="h-4 w-4 mt-0.5 shrink-0" />}
-                <p className="flex-1 min-w-0 break-all">{ttsTestMessage}</p>
+                <Label className="text-sm">{t('settings.ttsBaseUrl')}</Label>
+                <Input
+                  placeholder={ttsProvider.defaultBaseUrl || t('settings.enterCustomBaseUrl')}
+                  value={ttsProvidersConfig[ttsProviderId]?.baseUrl || ''}
+                  onChange={(e) =>
+                    handleTTSProviderConfigChange(ttsProviderId, {
+                      baseUrl: e.target.value,
+                    })
+                  }
+                  className="text-sm"
+                />
               </div>
             </div>
           )}
-
-          <audio ref={audioRef} className="hidden" />
         </div>
       </div>
 
diff --git a/components/stage.tsx b/components/stage.tsx
index dc5379f34..0684afc43 100644
--- a/components/stage.tsx
+++ b/components/stage.tsx
@@ -14,6 +14,8 @@ import { PlaybackEngine, computePlaybackView } from '@/lib/playback';
 import type { EngineMode, TriggerEvent, Effect } from '@/lib/playback';
 import { ActionEngine } from '@/lib/action/engine';
 import { createAudioPlayer } from '@/lib/utils/audio-player';
+import { useDiscussionTTS } from '@/lib/hooks/use-discussion-tts';
+import type { AudioIndicatorState } from '@/components/roundtable/audio-indicator';
 import type { Action, DiscussionAction, SpeechAction } from '@/lib/types/action';
 // Playback state persistence removed — refresh always starts from the beginning
 import { ChatArea, type ChatAreaRef } from '@/components/chat/chat-area';
@@ -100,6 +102,8 @@ export function Stage({
 
   // Selected agents from settings store (Zustand)
   const selectedAgentIds = useSettingsStore((s) => s.selectedAgentIds);
+  const ttsMuted = useSettingsStore((s) => s.ttsMuted);
+  const ttsEnabled = useSettingsStore((s) => s.ttsEnabled);
 
   // Generate participants from selected agents
   const participants = useMemo(
@@ -107,6 +111,27 @@ export function Stage({
     [selectedAgentIds, t],
   );
 
+  // Resolved AgentConfig array for hooks that need full agent objects
+  // Subscribe to the agents record so voiceConfig changes trigger re-resolution
+  const agentsRecord = useAgentRegistry((s) => s.agents);
+  const selectedAgents = useMemo(
+    () => selectedAgentIds.map((id) => agentsRecord[id]).filter((a): a is AgentConfig => a != null),
+    [agentsRecord, selectedAgentIds],
+  );
+
+  // Discussion TTS: audio indicator state
+  const [audioIndicatorState, setAudioIndicatorState] = useState<AudioIndicatorState>('idle');
+  const [audioAgentId, setAudioAgentId] = useState<string | null>(null);
+
+  const discussionTTS = useDiscussionTTS({
+    enabled: ttsEnabled && !ttsMuted,
+    agents: selectedAgents,
+    onAudioStateChange: (agentId, state) => {
+      setAudioAgentId(agentId);
+      setAudioIndicatorState(state);
+    },
+  });
+
   // Pick a student agent for discussion trigger (prioritize student > non-teacher > fallback)
   const pickStudentAgent = useCallback((): string => {
     const registry = useAgentRegistry.getState();
@@ -221,8 +246,11 @@ export function Stage({
       setTimeout(() => setShowEndFlash(false), 1800);
     }
 
+    // Stop any in-flight discussion TTS audio
+    discussionTTS.cleanup();
+
     resetLiveState();
-  }, [chatSessionType, resetLiveState]);
+  }, [chatSessionType, resetLiveState, discussionTTS]);
 
   // Shared stop-discussion handler (used by both Roundtable and Canvas toolbar)
   const handleStopDiscussion = useCallback(async () => {
@@ -246,6 +274,9 @@ export function Stage({
       discussionAbortRef.current = null;
     }
 
+    // Stop any in-flight discussion TTS audio on scene switch
+    discussionTTS.cleanup();
+
     // Reset all roundtable/live state so scenes are fully isolated
     resetSceneState();
 
@@ -335,6 +366,8 @@ export function Stage({
           discussionAbortRef.current = null;
         }
         setDiscussionTrigger(null);
+        // Stop any in-flight discussion TTS audio
+        discussionTTS.cleanup();
         // Clear roundtable state (idempotent — may already be cleared by doSessionCleanup)
         resetLiveState();
         // Only show flash for engine-initiated ends (not manual stop — that's handled by doSessionCleanup)
@@ -441,7 +474,6 @@ export function Stage({
   }, []);
 
   // Sync mute state from settings store to audioPlayer
-  const ttsMuted = useSettingsStore((s) => s.ttsMuted);
   useEffect(() => {
     audioPlayerRef.current.setMuted(ttsMuted);
   }, [ttsMuted]);
@@ -748,6 +780,8 @@ export function Stage({
             discussionRequest={discussionRequest}
             engineMode={engineMode}
             isStreaming={chatIsStreaming}
+            audioIndicatorState={audioIndicatorState}
+            audioAgentId={audioAgentId}
             sessionType={
               chatSessionType === 'qa'
                 ? 'qa'
@@ -890,6 +924,8 @@ export function Stage({
           setIsCueUser(true);
         }}
         onStopSession={doSessionCleanup}
+        onSegmentSealed={discussionTTS.handleSegmentSealed}
+        shouldHoldAfterReveal={discussionTTS.shouldHold}
       />
 
       {/* Scene switch confirmation dialog */}
diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts
new file mode 100644
index 000000000..2018add3f
--- /dev/null
+++ b/lib/audio/voice-resolver.ts
@@ -0,0 +1,103 @@
+import type { TTSProviderId } from '@/lib/audio/types';
+import type { AgentConfig } from '@/lib/orchestration/registry/types';
+import { TTS_PROVIDERS } from '@/lib/audio/constants';
+
+export interface ResolvedVoice {
+  providerId: TTSProviderId;
+  voiceId: string;
+}
+
+/**
+ * Resolve the TTS provider + voice for an agent.
+ * 1. If agent has voiceConfig and the voice is still valid, use it
+ * 2. Otherwise, use the first available provider + deterministic voice by index
+ */
+export function resolveAgentVoice(
+  agent: AgentConfig,
+  agentIndex: number,
+  availableProviders: ProviderWithVoices[],
+): ResolvedVoice {
+  // Agent-specific config
+  if (agent.voiceConfig) {
+    // Browser-native voices are dynamic (not in static registry), so skip validation
+    if (agent.voiceConfig.providerId === 'browser-native-tts') {
+      return agent.voiceConfig;
+    }
+    const list = getServerVoiceList(agent.voiceConfig.providerId);
+    if (list.includes(agent.voiceConfig.voiceId)) {
+      return agent.voiceConfig;
+    }
+  }
+
+  // Fallback: first available provider, deterministic voice
+  if (availableProviders.length > 0) {
+    const first = availableProviders[0];
+    return {
+      providerId: first.providerId,
+      voiceId: first.voices[agentIndex % first.voices.length].id,
+    };
+  }
+
+  return { providerId: 'browser-native-tts', voiceId: 'default' };
+}
+
+/**
+ * Get the list of voice IDs for a TTS provider.
+ * For browser-native-tts, returns empty (browser voices are dynamic).
+ */
+export function getServerVoiceList(providerId: TTSProviderId): string[] {
+  if (providerId === 'browser-native-tts') return [];
+  const provider = TTS_PROVIDERS[providerId];
+  if (!provider) return [];
+  return provider.voices.map((v) => v.id);
+}
+
+export interface ProviderWithVoices {
+  providerId: TTSProviderId;
+  providerName: string;
+  voices: Array<{ id: string; name: string }>;
+}
+
+/**
+ * Get all available providers and their voices for the voice picker UI.
+ * A provider is available if it has an API key or is server-configured.
+ * Browser-native-tts is excluded (no static voice list).
+ */
+export function getAvailableProvidersWithVoices(
+  ttsProvidersConfig: Record<
+    string,
+    { apiKey?: string; enabled?: boolean; isServerConfigured?: boolean }
+  >,
+): ProviderWithVoices[] {
+  const result: ProviderWithVoices[] = [];
+
+  for (const [id, config] of Object.entries(TTS_PROVIDERS)) {
+    const providerId = id as TTSProviderId;
+    if (providerId === 'browser-native-tts') continue;
+    if (config.voices.length === 0) continue;
+
+    const providerConfig = ttsProvidersConfig[providerId];
+    const hasApiKey = providerConfig?.apiKey && providerConfig.apiKey.trim().length > 0;
+    const isServerConfigured = providerConfig?.isServerConfigured === true;
+
+    if (hasApiKey || isServerConfigured) {
+      result.push({
+        providerId,
+        providerName: config.name,
+        voices: config.voices.map((v) => ({ id: v.id, name: v.name })),
+      });
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Find a voice display name across all providers.
+ */
+export function findVoiceDisplayName(providerId: TTSProviderId, voiceId: string): string {
+  const provider = TTS_PROVIDERS[providerId];
+  if (!provider) return voiceId;
+  const voice = provider.voices.find((v) => v.id === voiceId);
+  return voice?.name ?? voiceId;
+}
diff --git a/lib/buffer/stream-buffer.ts b/lib/buffer/stream-buffer.ts
index 6ba94ea6f..fb8e21699 100644
--- a/lib/buffer/stream-buffer.ts
+++ b/lib/buffer/stream-buffer.ts
@@ -124,6 +124,18 @@ export interface StreamBufferCallbacks {
     directorState?: DirectorState;
   }): void;
   onError(message: string): void;
+  onSegmentSealed?: (
+    messageId: string,
+    partId: string,
+    fullText: string,
+    agentId: string | null,
+  ) => void;
+  /**
+   * When provided, called after a text item is fully revealed and sealed.
+   * If it returns true, the tick loop will NOT advance to the next item —
+   * the bubble stays on the current text (e.g. waiting for TTS playback to finish).
+   */
+  shouldHoldAfterReveal?: () => boolean;
 }
 
 // ─── Options ─────────────────────────────────────────────────────────
@@ -165,6 +177,8 @@ export class StreamBuffer {
 
   // Dwell / delay counters (in ticks)
   private _dwellTicksRemaining = 0;
+  /** True when a text item's post-delay has elapsed and we're waiting for TTS to finish. */
+  private _holdingForTTS = false;
 
   // Config
   private readonly tickMs: number;
@@ -403,6 +417,9 @@ export class StreamBuffer {
       const item = this.items[i];
       if (item.kind === 'text' && !item.sealed) {
         item.sealed = true;
+        // Ordering invariant: sealLastText() is called BEFORE pushAgentEnd/pushAgentStart,
+        // so this.currentAgentId still refers to the agent whose text is being sealed.
+        this.cb.onSegmentSealed?.(item.messageId, item.partId, item.text, this.currentAgentId);
         break;
       }
       // Stop searching once we hit a non-text item
@@ -416,6 +433,21 @@ export class StreamBuffer {
     // Honour dwell / action-delay countdown before advancing
     if (this._dwellTicksRemaining > 0) {
       this._dwellTicksRemaining--;
+      if (this._dwellTicksRemaining === 0 && this._holdingForTTS) {
+        // Post-text delay just finished — fall through to the TTS hold check below
+      } else {
+        return;
+      }
+    }
+
+    // TTS hold: after post-text delay, keep the bubble on screen while audio plays
+    if (this._holdingForTTS) {
+      if (this.cb.shouldHoldAfterReveal?.()) {
+        return; // TTS still playing — stay on current item
+      }
+      this._holdingForTTS = false;
+      // TTS done — continue to process next item
+      this.advanceNonText();
       return;
     }
 
@@ -450,9 +482,19 @@ export class StreamBuffer {
           // before the next action or agent turn fires.
           if (this.postTextDelayTicks > 0) {
             this._dwellTicksRemaining = this.postTextDelayTicks;
+            // If TTS hold callback exists, mark that we need to check it after delay
+            if (this.cb.shouldHoldAfterReveal) {
+              this._holdingForTTS = true;
+            }
             return; // next tick will count down, then advanceNonText
           }
 
+          // No post-text delay — check TTS hold immediately
+          if (this.cb.shouldHoldAfterReveal?.()) {
+            this._holdingForTTS = true;
+            return; // TTS still playing — hold here
+          }
+
           // Process any immediately-advanceable items in the same tick
           // (e.g. action badges right after text)
           this.advanceNonText();
diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts
new file mode 100644
index 000000000..2075cf053
--- /dev/null
+++ b/lib/hooks/use-discussion-tts.ts
@@ -0,0 +1,224 @@
+'use client';
+
+import { useCallback, useEffect, useRef } from 'react';
+import { useSettingsStore } from '@/lib/store/settings';
+import { useBrowserTTS } from '@/lib/hooks/use-browser-tts';
+import { resolveAgentVoice, getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver';
+import type { AgentConfig } from '@/lib/orchestration/registry/types';
+import type { TTSProviderId } from '@/lib/audio/types';
+import type { AudioIndicatorState } from '@/components/roundtable/audio-indicator';
+
+interface DiscussionTTSOptions {
+  enabled: boolean;
+  agents: AgentConfig[];
+  onAudioStateChange?: (agentId: string | null, state: AudioIndicatorState) => void;
+}
+
+interface QueueItem {
+  messageId: string;
+  partId: string;
+  text: string;
+  agentId: string | null;
+  providerId: TTSProviderId;
+  voiceId: string;
+}
+
+export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: DiscussionTTSOptions) {
+  const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
+  const ttsSpeed = useSettingsStore((s) => s.ttsSpeed);
+  const ttsMuted = useSettingsStore((s) => s.ttsMuted);
+  const ttsVolume = useSettingsStore((s) => s.ttsVolume);
+  const playbackSpeed = useSettingsStore((s) => s.playbackSpeed);
+  // Global lecture voice — used as fallback for teacher agent
+  const globalTtsProviderId = useSettingsStore((s) => s.ttsProviderId);
+  const globalTtsVoice = useSettingsStore((s) => s.ttsVoice);
+
+  const queueRef = useRef<QueueItem[]>([]);
+  const isPlayingRef = useRef(false);
+  const abortControllerRef = useRef<AbortController | null>(null);
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+  const onAudioStateChangeRef = useRef(onAudioStateChange);
+  onAudioStateChangeRef.current = onAudioStateChange;
+  const processQueueRef = useRef<() => void>(() => {});
+
+  const { speak: browserSpeak, cancel: browserCancel } = useBrowserTTS({
+    rate: ttsSpeed,
+    onEnd: () => {
+      isPlayingRef.current = false;
+      onAudioStateChangeRef.current?.(null, 'idle');
+      processQueueRef.current();
+    },
+  });
+  const browserCancelRef = useRef(browserCancel);
+  browserCancelRef.current = browserCancel;
+  const browserSpeakRef = useRef(browserSpeak);
+  browserSpeakRef.current = browserSpeak;
+
+  // Build agent index map for deterministic voice resolution
+  const agentIndexMap = useRef<Map<string, number>>(new Map());
+  useEffect(() => {
+    const map = new Map<string, number>();
+    agents.forEach((agent, i) => map.set(agent.id, i));
+    agentIndexMap.current = map;
+  }, [agents]);
+
+  const resolveVoiceForAgent = useCallback(
+    (agentId: string | null): { providerId: TTSProviderId; voiceId: string } => {
+      const providers = getAvailableProvidersWithVoices(ttsProvidersConfig);
+      if (!agentId) {
+        if (providers.length > 0) {
+          return {
+            providerId: providers[0].providerId,
+            voiceId: providers[0].voices[0]?.id ?? 'default',
+          };
+        }
+        return { providerId: 'browser-native-tts', voiceId: 'default' };
+      }
+      const agent = agents.find((a) => a.id === agentId);
+      if (!agent) {
+        if (providers.length > 0) {
+          return {
+            providerId: providers[0].providerId,
+            voiceId: providers[0].voices[0]?.id ?? 'default',
+          };
+        }
+        return { providerId: 'browser-native-tts', voiceId: 'default' };
+      }
+      // Teacher: always use global lecture voice (single source of truth with settings)
+      if (agent.role === 'teacher') {
+        return { providerId: globalTtsProviderId, voiceId: globalTtsVoice };
+      }
+      const index = agentIndexMap.current.get(agentId) ?? 0;
+      return resolveAgentVoice(agent, index, providers);
+    },
+    [agents, ttsProvidersConfig, globalTtsProviderId, globalTtsVoice],
+  );
+
+  const processQueue = useCallback(async () => {
+    if (isPlayingRef.current || queueRef.current.length === 0) return;
+    if (!enabled || ttsMuted) {
+      queueRef.current = [];
+      return;
+    }
+
+    isPlayingRef.current = true;
+    const item = queueRef.current.shift()!;
+
+    // Browser TTS
+    if (item.providerId === 'browser-native-tts') {
+      onAudioStateChangeRef.current?.(item.agentId, 'playing');
+      browserSpeakRef.current(item.text, item.voiceId);
+      return;
+    }
+
+    // Server TTS — use the item's provider, not the global one
+    onAudioStateChangeRef.current?.(item.agentId, 'generating');
+    const controller = new AbortController();
+    abortControllerRef.current = controller;
+
+    try {
+      const providerConfig = ttsProvidersConfig[item.providerId];
+      const res = await fetch('/api/generate/tts', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          text: item.text,
+          audioId: item.partId,
+          ttsProviderId: item.providerId,
+          ttsVoice: item.voiceId,
+          ttsSpeed: ttsSpeed,
+          ttsApiKey: providerConfig?.apiKey,
+          ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl,
+        }),
+        signal: controller.signal,
+      });
+
+      if (!res.ok) throw new Error(`TTS API error: ${res.status}`);
+
+      const data = await res.json();
+      if (!data.base64) throw new Error('No audio in response');
+
+      onAudioStateChangeRef.current?.(item.agentId, 'playing');
+      const audioUrl = `data:audio/${data.format || 'mp3'};base64,${data.base64}`;
+      const audio = new Audio(audioUrl);
+      audio.playbackRate = playbackSpeed;
+      audio.volume = ttsMuted ? 0 : ttsVolume;
+      audioRef.current = audio;
+      audio.addEventListener('ended', () => {
+        isPlayingRef.current = false;
+        onAudioStateChangeRef.current?.(item.agentId, 'idle');
+        queueMicrotask(() => processQueueRef.current());
+      });
+      audio.addEventListener('error', () => {
+        isPlayingRef.current = false;
+        onAudioStateChangeRef.current?.(item.agentId, 'idle');
+        queueMicrotask(() => processQueueRef.current());
+      });
+      await audio.play();
+    } catch (err) {
+      if ((err as Error).name !== 'AbortError') {
+        console.error('[DiscussionTTS] TTS generation failed:', err);
+      }
+      isPlayingRef.current = false;
+      onAudioStateChangeRef.current?.(item.agentId, 'idle');
+      queueMicrotask(() => processQueueRef.current());
+    }
+  }, [enabled, ttsMuted, ttsVolume, ttsProvidersConfig, ttsSpeed, playbackSpeed]);
+
+  processQueueRef.current = processQueue;
+
+  const handleSegmentSealed = useCallback(
+    (messageId: string, partId: string, fullText: string, agentId: string | null) => {
+      if (!enabled || ttsMuted || !fullText.trim()) return;
+
+      const { providerId, voiceId } = resolveVoiceForAgent(agentId);
+      queueRef.current.push({ messageId, partId, text: fullText, agentId, providerId, voiceId });
+
+      if (!isPlayingRef.current) {
+        processQueueRef.current();
+      } else if (providerId !== 'browser-native-tts') {
+        onAudioStateChangeRef.current?.(agentId, 'generating');
+      }
+    },
+    [enabled, ttsMuted, resolveVoiceForAgent],
+  );
+
+  const cleanup = useCallback(() => {
+    abortControllerRef.current?.abort();
+    abortControllerRef.current = null;
+    if (audioRef.current) {
+      audioRef.current.pause();
+      audioRef.current.src = '';
+      audioRef.current = null;
+    }
+    browserCancelRef.current();
+    queueRef.current = [];
+    isPlayingRef.current = false;
+    onAudioStateChangeRef.current?.(null, 'idle');
+  }, []);
+
+  // Sync playbackSpeed to currently playing audio in real-time
+  useEffect(() => {
+    if (audioRef.current) {
+      audioRef.current.playbackRate = playbackSpeed;
+    }
+  }, [playbackSpeed]);
+
+  // Sync volume and mute to currently playing audio in real-time
+  useEffect(() => {
+    if (audioRef.current) {
+      audioRef.current.volume = ttsMuted ? 0 : ttsVolume;
+    }
+  }, [ttsVolume, ttsMuted]);
+
+  useEffect(() => cleanup, [cleanup]);
+
+  /** Returns true when TTS audio is still playing or queued — used by StreamBuffer hold logic. */
+  const shouldHold = useCallback(() => isPlayingRef.current || queueRef.current.length > 0, []);
+
+  return {
+    handleSegmentSealed,
+    cleanup,
+    shouldHold,
+  };
+}
diff --git a/lib/i18n/chat.ts b/lib/i18n/chat.ts
index 4a5421399..1bb535d3e 100644
--- a/lib/i18n/chat.ts
+++ b/lib/i18n/chat.ts
@@ -55,6 +55,9 @@ export const chatZhCN = {
     readyToLearn: '准备好一起学习了吗？',
     expandedTitle: '课堂角色配置',
     configTooltip: '点击配置课堂角色',
+    voiceLabel: '音色',
+    voiceLoading: '加载中...',
+    voiceAutoAssign: '音色将自动分配',
   },
   proactiveCard: {
     discussion: '讨论',
@@ -126,6 +129,9 @@ export const chatEnUS = {
     readyToLearn: 'Ready to learn together?',
     expandedTitle: 'Classroom Role Config',
     configTooltip: 'Click to configure classroom roles',
+    voiceLabel: 'Voice',
+    voiceLoading: 'Loading...',
+    voiceAutoAssign: 'Voices will be auto-assigned',
   },
   proactiveCard: {
     discussion: 'Discussion',
diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts
index 3dba9f669..3ba0be4f3 100644
--- a/lib/i18n/settings.ts
+++ b/lib/i18n/settings.ts
@@ -200,6 +200,7 @@ export const settingsZhCN = {
     asrDescription: 'ASR (Automatic Speech Recognition) - 将语音转换为文字',
     enableTTS: '启用语音合成',
     ttsEnabledDescription: '开启后，课程生成时将自动合成语音',
+    ttsVoiceConfigHint: '每个 Agent 的音色可在首页「课堂角色配置」中设置',
     enableASR: '启用语音识别',
     asrEnabledDescription: '开启后，学生可使用麦克风进行语音输入',
     ttsProvider: 'TTS 提供商',
@@ -788,6 +789,8 @@ export const settingsEnUS = {
     asrDescription: 'ASR (Automatic Speech Recognition) - Convert speech to text',
     enableTTS: 'Enable Text-to-Speech',
     ttsEnabledDescription: 'When enabled, speech audio will be generated during course creation',
+    ttsVoiceConfigHint:
+      'Per-agent voice can be configured in "Classroom Role Config" on the homepage',
     enableASR: 'Enable Speech Recognition',
     asrEnabledDescription: 'When enabled, students can use microphone for voice input',
     ttsProvider: 'TTS Provider',
diff --git a/lib/orchestration/registry/store.ts b/lib/orchestration/registry/store.ts
index b5e7b8600..893a5fa63 100644
--- a/lib/orchestration/registry/store.ts
+++ b/lib/orchestration/registry/store.ts
@@ -7,6 +7,7 @@ import { create } from 'zustand';
 import { persist } from 'zustand/middleware';
 import type { AgentConfig } from './types';
 import { getActionsForRole } from './types';
+import type { TTSProviderId } from '@/lib/audio/types';
 import { USER_AVATAR } from '@/lib/types/roundtable';
 import type { Participant, ParticipantRole } from '@/lib/types/roundtable';
 import { useUserProfileStore } from '@/lib/store/user-profile';
@@ -231,7 +232,7 @@ export const useAgentRegistry = create<AgentRegistryState>()(
     }),
     {
       name: 'agent-registry-storage',
-      version: 10, // Bumped: exclude generated agents from persisted cache
+      version: 11, // Bumped: add voiceOverrides field to AgentConfig
       migrate: (persistedState: unknown) => persistedState,
       // Merge persisted state with default agents
       // Default agents always use code-defined values (not cached)
@@ -377,6 +378,7 @@ export async function saveGeneratedAgents(
     avatar: string;
     color: string;
     priority: number;
+    voiceConfig?: { providerId: string; voiceId: string };
   }>,
 ): Promise<string[]> {
   const { db } = await import('@/lib/utils/database');
@@ -396,14 +398,23 @@ export async function saveGeneratedAgents(
 
   // Add to registry
   for (const record of records) {
+    const { voiceConfig, ...rest } = record;
     registry.addAgent({
-      ...record,
+      ...rest,
       allowedActions: getActionsForRole(record.role),
       isDefault: false,
       isGenerated: true,
       boundStageId: stageId,
       createdAt: new Date(record.createdAt),
       updatedAt: new Date(record.createdAt),
+      ...(voiceConfig
+        ? {
+            voiceConfig: {
+              providerId: voiceConfig.providerId as TTSProviderId,
+              voiceId: voiceConfig.voiceId,
+            },
+          }
+        : {}),
     });
   }
 
diff --git a/lib/orchestration/registry/types.ts b/lib/orchestration/registry/types.ts
index ba978b02c..6631e9b46 100644
--- a/lib/orchestration/registry/types.ts
+++ b/lib/orchestration/registry/types.ts
@@ -3,6 +3,8 @@
  * Defines the structure for configurable AI agents in the multi-agent system
  */
 
+import type { TTSProviderId } from '@/lib/audio/types';
+
 export interface AgentConfig {
   id: string; // Unique agent ID
   name: string; // Display name (Chinese)
@@ -12,6 +14,7 @@ export interface AgentConfig {
   color: string; // UI theme color (hex)
   allowedActions: string[]; // Action types this agent can use
   priority: number; // Priority for director selection (1-10)
+  voiceConfig?: { providerId: TTSProviderId; voiceId: string }; // Per-agent TTS voice selection
 
   // Metadata
   createdAt: Date;
@@ -32,6 +35,7 @@ export interface AgentTemplate {
   color: string;
   allowedActions: string[];
   priority: number;
+  voiceConfig?: { providerId: TTSProviderId; voiceId: string }; // Per-agent TTS voice selection
 
   // LLM-generated agent fields
   isGenerated?: boolean; // true for LLM-generated agents