From 27ac0fc7a52ce76df4fcce0986a55ba63a19a1bb Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 13:47:23 +0800 Subject: [PATCH 01/65] feat(tts): add resolveVoice() and getServerVoiceList() utilities Co-Authored-By: Claude Sonnet 4.6 --- lib/audio/voice-resolver.ts | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 lib/audio/voice-resolver.ts diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts new file mode 100644 index 000000000..438a8ca7a --- /dev/null +++ b/lib/audio/voice-resolver.ts @@ -0,0 +1,35 @@ +import type { TTSProviderId } from '@/lib/audio/types'; +import type { AgentConfig } from '@/lib/orchestration/registry/types'; +import { TTS_PROVIDERS } from '@/lib/audio/constants'; + +/** + * Resolve the voice ID for an agent given the current TTS provider. + * 1. Check agent.voiceOverrides[providerId] — if valid, use it + * 2. Otherwise — deterministic assignment from voiceList by agentIndex + */ +export function resolveVoice( + agent: AgentConfig, + providerId: TTSProviderId, + agentIndex: number, + voiceList: string[], +): string { + if (voiceList.length === 0) return 'default'; + + const override = agent.voiceOverrides?.[providerId]; + if (override && voiceList.includes(override)) { + return override; + } + + return voiceList[agentIndex % voiceList.length]; +} + +/** + * Get the list of voice IDs for a server-side TTS provider. + * For browser-native-tts, caller must pass browser voices separately. + */ +export function getServerVoiceList(providerId: TTSProviderId): string[] { + if (providerId === 'browser-native-tts') return []; + const provider = TTS_PROVIDERS[providerId]; + if (!provider) return []; + return provider.voices.map((v) => v.id); +} From eabaad20af0d6e83657a1dbea100224181578fcf Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 13:47:23 +0800 Subject: [PATCH 02/65] feat(tts): add AudioIndicator equalizer bars component Co-Authored-By: Claude Sonnet 4.6 --- components/roundtable/audio-indicator.tsx | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 components/roundtable/audio-indicator.tsx diff --git a/components/roundtable/audio-indicator.tsx b/components/roundtable/audio-indicator.tsx new file mode 100644 index 000000000..4b630a1d0 --- /dev/null +++ b/components/roundtable/audio-indicator.tsx @@ -0,0 +1,43 @@ +'use client'; + +import { motion } from 'motion/react'; + +export type AudioIndicatorState = 'idle' | 'generating' | 'playing'; + +interface AudioIndicatorProps { + state: AudioIndicatorState; + agentColor?: string; +} + +const BAR_COUNT = 4; + +export function AudioIndicator({ state, agentColor = '#10b981' }: AudioIndicatorProps) { + if (state === 'idle') return null; + + const color = state === 'generating' ? 'rgba(251, 191, 36, 0.7)' : agentColor; + const cycleDuration = state === 'generating' ? 0.8 : 0.5; + + return ( + + {Array.from({ length: BAR_COUNT }).map((_, i) => ( + + ))} + + ); +} From 9053b3803ecdafaadce7bad1475e177a2492e992 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 13:47:25 +0800 Subject: [PATCH 03/65] feat(tts): add onSegmentSealed callback to StreamBuffer Co-Authored-By: Claude Sonnet 4.6 --- lib/buffer/stream-buffer.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/buffer/stream-buffer.ts b/lib/buffer/stream-buffer.ts index 6ba94ea6f..725c1285c 100644 --- a/lib/buffer/stream-buffer.ts +++ b/lib/buffer/stream-buffer.ts @@ -124,6 +124,12 @@ export interface StreamBufferCallbacks { directorState?: DirectorState; }): void; onError(message: string): void; + onSegmentSealed?: ( + messageId: string, + partId: string, + fullText: string, + agentId: string | null, + ) => void; } // ─── Options ───────────────────────────────────────────────────────── @@ -403,6 +409,7 @@ export class StreamBuffer { const item = this.items[i]; if (item.kind === 'text' && !item.sealed) { item.sealed = true; + this.cb.onSegmentSealed?.(item.messageId, item.partId, item.text, this.currentAgentId); break; } // Stop searching once we hit a non-text item From ea8c1890deb59a29a90af3ef1b6abd5156a94e77 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 13:48:05 +0800 Subject: [PATCH 04/65] feat(tts): add voiceOverrides field to AgentConfig and AgentTemplate Co-Authored-By: Claude Sonnet 4.6 --- lib/orchestration/registry/store.ts | 2 +- lib/orchestration/registry/types.ts | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/orchestration/registry/store.ts b/lib/orchestration/registry/store.ts index b5e7b8600..c15296c36 100644 --- a/lib/orchestration/registry/store.ts +++ b/lib/orchestration/registry/store.ts @@ -231,7 +231,7 @@ export const useAgentRegistry = create()( }), { name: 'agent-registry-storage', - version: 10, // Bumped: exclude generated agents from persisted cache + version: 11, // Bumped: add voiceOverrides field to AgentConfig migrate: (persistedState: unknown) => persistedState, // Merge persisted state with default agents // Default agents always use code-defined values (not cached) diff --git a/lib/orchestration/registry/types.ts b/lib/orchestration/registry/types.ts index ba978b02c..17b7f876c 100644 --- a/lib/orchestration/registry/types.ts +++ b/lib/orchestration/registry/types.ts @@ -3,6 +3,8 @@ * Defines the structure for configurable AI agents in the multi-agent system */ +import type { TTSProviderId } from '@/lib/audio/types'; + export interface AgentConfig { id: string; // Unique agent ID name: string; // Display name (Chinese) @@ -12,6 +14,7 @@ export interface AgentConfig { color: string; // UI theme color (hex) allowedActions: string[]; // Action types this agent can use priority: number; // Priority for director selection (1-10) + voiceOverrides?: Partial>; // Per-provider voice selections // Metadata createdAt: Date; @@ -32,6 +35,7 @@ export interface AgentTemplate { color: string; allowedActions: string[]; priority: number; + voiceOverrides?: Partial>; // Per-provider voice selections // LLM-generated agent fields isGenerated?: boolean; // true for LLM-generated agents From 984bdb3e1d916efb9f4bac2533b674fdacc2f749 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 13:54:42 +0800 Subject: [PATCH 05/65] feat(tts): add useDiscussionTTS hook with audio queue and cleanup Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/hooks/use-discussion-tts.ts | 198 ++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 lib/hooks/use-discussion-tts.ts diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts new file mode 100644 index 000000000..c54ff4cfe --- /dev/null +++ b/lib/hooks/use-discussion-tts.ts @@ -0,0 +1,198 @@ +'use client'; + +import { useCallback, useEffect, useRef } from 'react'; +import { useSettingsStore } from '@/lib/store/settings'; +import { useBrowserTTS } from '@/lib/hooks/use-browser-tts'; +import { resolveVoice, getServerVoiceList } from '@/lib/audio/voice-resolver'; +import type { AgentConfig } from '@/lib/orchestration/registry/types'; +import type { AudioIndicatorState } from '@/components/roundtable/audio-indicator'; + +interface DiscussionTTSOptions { + enabled: boolean; + agents: AgentConfig[]; + onAudioStateChange?: (agentId: string | null, state: AudioIndicatorState) => void; +} + +interface QueueItem { + messageId: string; + partId: string; + text: string; + agentId: string | null; + voiceId: string; +} + +export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: DiscussionTTSOptions) { + const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); + const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); + const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); + const ttsMuted = useSettingsStore((s) => s.ttsMuted); + + const queueRef = useRef([]); + const isPlayingRef = useRef(false); + const abortControllerRef = useRef(null); + const audioRef = useRef(null); + // Use refs to avoid stale closures in callbacks + const onAudioStateChangeRef = useRef(onAudioStateChange); + onAudioStateChangeRef.current = onAudioStateChange; + const processQueueRef = useRef<() => void>(() => {}); + + const isBrowserTTS = ttsProviderId === 'browser-native-tts'; + + const { + speak: browserSpeak, + cancel: browserCancel, + availableVoices: browserAvailableVoices, + } = useBrowserTTS({ + rate: ttsSpeed, + onEnd: () => { + isPlayingRef.current = false; + onAudioStateChangeRef.current?.(null, 'idle'); + processQueueRef.current(); + }, + }); + const browserCancelRef = useRef(browserCancel); + browserCancelRef.current = browserCancel; + + // Build agent index map for deterministic voice resolution + const agentIndexMap = useRef>(new Map()); + useEffect(() => { + const map = new Map(); + agents.forEach((agent, i) => map.set(agent.id, i)); + agentIndexMap.current = map; + }, [agents]); + + const getVoiceForAgent = useCallback( + (agentId: string | null): string => { + if (!agentId) return 'default'; + const agent = agents.find((a) => a.id === agentId); + if (!agent) return 'default'; + const index = agentIndexMap.current.get(agentId) ?? 0; + + if (isBrowserTTS) { + const browserVoices = browserAvailableVoices.map((v) => v.voiceURI); + return resolveVoice(agent, ttsProviderId, index, browserVoices); + } + + const serverVoices = getServerVoiceList(ttsProviderId); + return resolveVoice(agent, ttsProviderId, index, serverVoices); + }, + [agents, ttsProviderId, isBrowserTTS, browserAvailableVoices], + ); + + const processQueue = useCallback(async () => { + if (isPlayingRef.current || queueRef.current.length === 0) return; + if (!enabled || ttsMuted) { + queueRef.current = []; + return; + } + + isPlayingRef.current = true; + const item = queueRef.current.shift()!; + + if (isBrowserTTS) { + onAudioStateChangeRef.current?.(item.agentId, 'playing'); + browserSpeak(item.text, item.voiceId); + // browserTTS onEnd callback resets isPlayingRef and calls processQueue + return; + } + + // Server TTS + onAudioStateChangeRef.current?.(item.agentId, 'generating'); + const controller = new AbortController(); + abortControllerRef.current = controller; + + try { + const providerConfig = ttsProvidersConfig[ttsProviderId]; + // Match actual /api/generate/tts field names: + // { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } + // Response: { audioId, base64, format } + const res = await fetch('/api/generate/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + text: item.text, + audioId: item.partId, + ttsProviderId: ttsProviderId, + ttsVoice: item.voiceId, + ttsSpeed: ttsSpeed, + ttsApiKey: providerConfig?.apiKey, + ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, + }), + signal: controller.signal, + }); + + if (!res.ok) throw new Error(`TTS API error: ${res.status}`); + + const data = await res.json(); + if (!data.base64) throw new Error('No audio in response'); + + // Play via HTMLAudioElement directly (simpler than AudioPlayer for queued playback) + onAudioStateChangeRef.current?.(item.agentId, 'playing'); + const audioUrl = `data:audio/${data.format || 'mp3'};base64,${data.base64}`; + const audio = new Audio(audioUrl); + audioRef.current = audio; + audio.addEventListener('ended', () => { + isPlayingRef.current = false; + onAudioStateChangeRef.current?.(item.agentId, 'idle'); + processQueueRef.current(); + }); + audio.addEventListener('error', () => { + isPlayingRef.current = false; + onAudioStateChangeRef.current?.(item.agentId, 'idle'); + processQueueRef.current(); + }); + await audio.play(); + } catch (err) { + if ((err as Error).name !== 'AbortError') { + console.error('[DiscussionTTS] TTS generation failed:', err); + } + isPlayingRef.current = false; + onAudioStateChangeRef.current?.(item.agentId, 'idle'); + processQueueRef.current(); // skip failed segment, continue queue + } + }, [enabled, ttsMuted, isBrowserTTS, ttsProviderId, ttsProvidersConfig, ttsSpeed, browserSpeak]); + + // Keep processQueueRef in sync + processQueueRef.current = processQueue; + + // Called by StreamBuffer's onSegmentSealed + const handleSegmentSealed = useCallback( + (messageId: string, partId: string, fullText: string, agentId: string | null) => { + if (!enabled || ttsMuted || !fullText.trim()) return; + + const voiceId = getVoiceForAgent(agentId); + queueRef.current.push({ messageId, partId, text: fullText, agentId, voiceId }); + + if (!isPlayingRef.current) { + processQueueRef.current(); + } else if (!isBrowserTTS) { + // Show generating indicator for queued items + onAudioStateChangeRef.current?.(agentId, 'generating'); + } + }, + [enabled, ttsMuted, getVoiceForAgent, isBrowserTTS], + ); + + // Cleanup: abort all, stop playback, clear queue + const cleanup = useCallback(() => { + abortControllerRef.current?.abort(); + abortControllerRef.current = null; + if (audioRef.current) { + audioRef.current.pause(); + audioRef.current.src = ''; + audioRef.current = null; + } + browserCancelRef.current(); + queueRef.current = []; + isPlayingRef.current = false; + onAudioStateChangeRef.current?.(null, 'idle'); + }, []); // stable — uses only refs + + // Cleanup on unmount + useEffect(() => cleanup, [cleanup]); + + return { + handleSegmentSealed, + cleanup, + }; +} From 93e954210c3b5129902d4a2dcd28961c3b85d981 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 13:59:22 +0800 Subject: [PATCH 06/65] feat(tts): add audio state indicator to Roundtable bubble Co-Authored-By: Claude Sonnet 4.6 --- components/roundtable/index.tsx | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/components/roundtable/index.tsx b/components/roundtable/index.tsx index 0d08a71b6..e619aa92e 100644 --- a/components/roundtable/index.tsx +++ b/components/roundtable/index.tsx @@ -16,6 +16,8 @@ import { Loader2, } from 'lucide-react'; import { cn } from '@/lib/utils'; +import { AudioIndicator } from './audio-indicator'; +import type { AudioIndicatorState } from './audio-indicator'; import { CanvasToolbar } from '@/components/canvas/canvas-toolbar'; import { useAudioRecorder } from '@/lib/hooks/use-audio-recorder'; import { useI18n } from '@/lib/hooks/use-i18n'; @@ -47,6 +49,8 @@ interface RoundtableProps { readonly isStreaming?: boolean; readonly sessionType?: 'qa' | 'discussion'; readonly speakingAgentId?: string | null; + readonly audioIndicatorState?: AudioIndicatorState; + readonly audioAgentId?: string | null; readonly speechProgress?: number | null; // StreamBuffer reveal progress (0–1) for auto-scroll readonly showEndFlash?: boolean; readonly endFlashSessionType?: 'qa' | 'discussion'; @@ -107,6 +111,8 @@ export function Roundtable({ isStreaming, sessionType, speakingAgentId, + audioIndicatorState, + audioAgentId, speechProgress: _speechProgress, showEndFlash, endFlashSessionType = 'discussion', @@ -974,6 +980,29 @@ export function Roundtable({ })()}
+ {/* Agent name + audio indicator header */} + {bubbleRole !== 'user' && bubbleName && ( +
+ + {bubbleName} + + +
+ )} {isBubbleLoading ? (
Date: Sat, 21 Mar 2026 13:59:41 +0800 Subject: [PATCH 07/65] feat(tts): wire onSegmentSealed callback through chat sessions Co-Authored-By: Claude Opus 4.6 (1M context) --- components/chat/use-chat-sessions.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/components/chat/use-chat-sessions.ts b/components/chat/use-chat-sessions.ts index 5dfa45b09..4f9117b81 100644 --- a/components/chat/use-chat-sessions.ts +++ b/components/chat/use-chat-sessions.ts @@ -36,6 +36,12 @@ interface UseChatSessionsOptions { onActiveBubble?: (messageId: string | null) => void; /** Called when a QA/Discussion session completes naturally (director end). */ onStopSession?: () => void; + onSegmentSealed?: ( + messageId: string, + partId: string, + fullText: string, + agentId: string | null, + ) => void; } export function useChatSessions(options: UseChatSessionsOptions = {}) { @@ -45,6 +51,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { const onCueUserRef = useRef(options.onCueUser); const onActiveBubbleRef = useRef(options.onActiveBubble); const onStopSessionRef = useRef(options.onStopSession); + const onSegmentSealedRef = useRef(options.onSegmentSealed); useEffect(() => { onLiveSpeechRef.current = options.onLiveSpeech; onSpeechProgressRef.current = options.onSpeechProgress; @@ -52,6 +59,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { onCueUserRef.current = options.onCueUser; onActiveBubbleRef.current = options.onActiveBubble; onStopSessionRef.current = options.onStopSession; + onSegmentSealedRef.current = options.onSegmentSealed; }, [ options.onLiveSpeech, options.onSpeechProgress, @@ -59,6 +67,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { options.onCueUser, options.onActiveBubble, options.onStopSession, + options.onSegmentSealed, ]); const { t } = useI18n(); @@ -317,6 +326,15 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { onError(message: string) { log.error('[Buffer] Stream error:', message); }, + + onSegmentSealed( + messageId: string, + partId: string, + fullText: string, + agentId: string | null, + ) { + onSegmentSealedRef.current?.(messageId, partId, fullText, agentId); + }, }, pacingOptions, ); From 266e976a1ba4f97585b6c0316a54ed72b410afde Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 14:01:32 +0800 Subject: [PATCH 08/65] feat(tts): add per-agent voice dropdown to AgentBar Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 98 +++++++++++++++++++++++++++++++++- lib/i18n/chat.ts | 6 +++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 289ee03cb..fada6e6d5 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -8,6 +8,8 @@ import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; +import { resolveVoice, getServerVoiceList } from '@/lib/audio/voice-resolver'; +import { TTS_PROVIDERS } from '@/lib/audio/constants'; import { Sparkles, ChevronDown, ChevronUp, Shuffle } from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; @@ -20,6 +22,8 @@ export function AgentBar() { const setMaxTurns = useSettingsStore((s) => s.setMaxTurns); const agentMode = useSettingsStore((s) => s.agentMode); const setAgentMode = useSettingsStore((s) => s.setAgentMode); + const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); + const updateAgent = useAgentRegistry((s) => s.updateAgent); const [open, setOpen] = useState(false); const containerRef = useRef(null); @@ -31,6 +35,11 @@ export function AgentBar() { const selectedAgents = agents.filter((a) => selectedAgentIds.includes(a.id)); const nonTeacherSelected = selectedAgents.filter((a) => a.role !== 'teacher'); + const voiceList = getServerVoiceList(ttsProviderId); + const providerVoices = TTS_PROVIDERS[ttsProviderId]?.voices ?? []; + const getVoiceDisplayName = (voiceId: string) => + providerVoices.find((v) => v.id === voiceId)?.name ?? voiceId; + // Click-outside to collapse useEffect(() => { if (!open) return; @@ -214,12 +223,69 @@ export function AgentBar() {
{agentMode === 'preset' ? ( - /* Agent list — teacher is always selected, no need to show */ + /* Agent list — teacher first (always selected), then others */
+ {/* Teacher row — always selected, checkbox disabled */} + {teacherAgent && ( +
+ +
+ {getAgentName(teacherAgent)} +
+
+
+ {getAgentName(teacherAgent)} + + {getAgentRole(teacherAgent)} + +
+
+ {voiceList.length > 0 ? ( + + ) : ( + + {t('agentBar.voiceLoading')} + + )} +
+ )} + + {/* Non-teacher agents */} {agents .filter((a) => a.role !== 'teacher') - .map((agent) => { + .map((agent, idx) => { const isSelected = selectedAgentIds.includes(agent.id); + // agentIndex: teacher=0, then non-teachers start at 1 + const agentIndex = idx + 1; return (
+ {voiceList.length > 0 ? ( + + ) : ( + + {t('agentBar.voiceLoading')} + + )}
); })} @@ -278,6 +369,9 @@ export function AgentBar() {

{t('settings.agentModeAutoDesc')}

+

+ {t('agentBar.voiceAutoAssign')} +

)} diff --git a/lib/i18n/chat.ts b/lib/i18n/chat.ts index 4a5421399..1bb535d3e 100644 --- a/lib/i18n/chat.ts +++ b/lib/i18n/chat.ts @@ -55,6 +55,9 @@ export const chatZhCN = { readyToLearn: '准备好一起学习了吗?', expandedTitle: '课堂角色配置', configTooltip: '点击配置课堂角色', + voiceLabel: '音色', + voiceLoading: '加载中...', + voiceAutoAssign: '音色将自动分配', }, proactiveCard: { discussion: '讨论', @@ -126,6 +129,9 @@ export const chatEnUS = { readyToLearn: 'Ready to learn together?', expandedTitle: 'Classroom Role Config', configTooltip: 'Click to configure classroom roles', + voiceLabel: 'Voice', + voiceLoading: 'Loading...', + voiceAutoAssign: 'Voices will be auto-assigned', }, proactiveCard: { discussion: 'Discussion', From 8e4e9648a4aae6a8fc85ad44ba971cb19506eecc Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 14:05:15 +0800 Subject: [PATCH 09/65] feat(tts): integrate useDiscussionTTS in Stage and pass state to Roundtable Co-Authored-By: Claude Opus 4.6 (1M context) --- components/chat/chat-area.tsx | 8 ++++++++ components/stage.tsx | 38 +++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/components/chat/chat-area.tsx b/components/chat/chat-area.tsx index 7ce66e57a..ddeb6e241 100644 --- a/components/chat/chat-area.tsx +++ b/components/chat/chat-area.tsx @@ -27,6 +27,12 @@ interface ChatAreaProps { onThinking?: (state: { stage: string; agentId?: string } | null) => void; onCueUser?: (fromAgentId?: string, prompt?: string) => void; onStopSession?: () => void; + onSegmentSealed?: ( + messageId: string, + partId: string, + fullText: string, + agentId: string | null, + ) => void; currentSceneId?: string | null; } @@ -67,6 +73,7 @@ export const ChatArea = forwardRef( onThinking, onCueUser, onStopSession, + onSegmentSealed, currentSceneId, }, ref, @@ -98,6 +105,7 @@ export const ChatArea = forwardRef( onCueUser, onActiveBubble, onStopSession, + onSegmentSealed, }); const [activeTab, setActiveTab] = useState<'lecture' | 'chat'>('lecture'); diff --git a/components/stage.tsx b/components/stage.tsx index d9025ec04..2ef7328ca 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -14,6 +14,8 @@ import { PlaybackEngine, computePlaybackView } from '@/lib/playback'; import type { EngineMode, TriggerEvent, Effect } from '@/lib/playback'; import { ActionEngine } from '@/lib/action/engine'; import { createAudioPlayer } from '@/lib/utils/audio-player'; +import { useDiscussionTTS } from '@/lib/hooks/use-discussion-tts'; +import type { AudioIndicatorState } from '@/components/roundtable/audio-indicator'; import type { Action, DiscussionAction, SpeechAction } from '@/lib/types/action'; // Playback state persistence removed — refresh always starts from the beginning import { ChatArea, type ChatAreaRef } from '@/components/chat/chat-area'; @@ -100,6 +102,7 @@ export function Stage({ // Selected agents from settings store (Zustand) const selectedAgentIds = useSettingsStore((s) => s.selectedAgentIds); + const ttsMuted = useSettingsStore((s) => s.ttsMuted); // Generate participants from selected agents const participants = useMemo( @@ -107,6 +110,27 @@ export function Stage({ [selectedAgentIds, t], ); + // Resolved AgentConfig array for hooks that need full agent objects + const selectedAgents = useMemo(() => { + const registry = useAgentRegistry.getState(); + return selectedAgentIds + .map((id) => registry.getAgent(id)) + .filter((a): a is AgentConfig => a != null); + }, [selectedAgentIds]); + + // Discussion TTS: audio indicator state + const [audioIndicatorState, setAudioIndicatorState] = useState('idle'); + const [audioAgentId, setAudioAgentId] = useState(null); + + const discussionTTS = useDiscussionTTS({ + enabled: !ttsMuted, + agents: selectedAgents, + onAudioStateChange: (agentId, state) => { + setAudioAgentId(agentId); + setAudioIndicatorState(state); + }, + }); + // Pick a student agent for discussion trigger (prioritize student > non-teacher > fallback) const pickStudentAgent = useCallback((): string => { const registry = useAgentRegistry.getState(); @@ -217,8 +241,11 @@ export function Stage({ setTimeout(() => setShowEndFlash(false), 1800); } + // Stop any in-flight discussion TTS audio + discussionTTS.cleanup(); + resetLiveState(); - }, [chatSessionType, resetLiveState]); + }, [chatSessionType, resetLiveState, discussionTTS]); // Shared stop-discussion handler (used by both Roundtable and Canvas toolbar) const handleStopDiscussion = useCallback(async () => { @@ -242,6 +269,9 @@ export function Stage({ discussionAbortRef.current = null; } + // Stop any in-flight discussion TTS audio on scene switch + discussionTTS.cleanup(); + // Reset all roundtable/live state so scenes are fully isolated resetSceneState(); @@ -331,6 +361,8 @@ export function Stage({ discussionAbortRef.current = null; } setDiscussionTrigger(null); + // Stop any in-flight discussion TTS audio + discussionTTS.cleanup(); // Clear roundtable state (idempotent — may already be cleared by doSessionCleanup) resetLiveState(); // Only show flash for engine-initiated ends (not manual stop — that's handled by doSessionCleanup) @@ -437,7 +469,6 @@ export function Stage({ }, []); // Sync mute state from settings store to audioPlayer - const ttsMuted = useSettingsStore((s) => s.ttsMuted); useEffect(() => { audioPlayerRef.current.setMuted(ttsMuted); }, [ttsMuted]); @@ -744,6 +775,8 @@ export function Stage({ discussionRequest={discussionRequest} engineMode={engineMode} isStreaming={chatIsStreaming} + audioIndicatorState={audioIndicatorState} + audioAgentId={audioAgentId} sessionType={ chatSessionType === 'qa' ? 'qa' @@ -878,6 +911,7 @@ export function Stage({ setIsCueUser(true); }} onStopSession={doSessionCleanup} + onSegmentSealed={discussionTTS.handleSegmentSealed} /> {/* Scene switch confirmation dialog */} From f0f084777391bfa5362ce1e1c45748959712535d Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 14:29:49 +0800 Subject: [PATCH 10/65] style(tts): refine voice dropdown to pill-style selector Replace native select styling with a compact rounded-full pill that blends into the agent row. Remove border, use muted bg, smaller text, and a custom chevron icon. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 94 +++++++++++++++++----------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index fada6e6d5..120f0d1d8 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -252,30 +252,29 @@ export function AgentBar() {
{voiceList.length > 0 ? ( - - ) : ( - - {t('agentBar.voiceLoading')} - - )} +
+ + +
+ ) : null} )} @@ -326,30 +325,29 @@ export function AgentBar() { })()} {voiceList.length > 0 ? ( - - ) : ( - - {t('agentBar.voiceLoading')} - - )} +
+ + +
+ ) : null} ); })} From 606682a3f56d93cf618aea67e76f153da13c49c6 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 14:33:11 +0800 Subject: [PATCH 11/65] style(tts): use shadcn Select for voice dropdown, link with TTS toggle - Replace native { - updateAgent(teacherAgent.id, { - voiceOverrides: { - ...teacherAgent.voiceOverrides, - [ttsProviderId]: e.target.value, - }, - }); - }} + {showVoice && ( + - - - ) : null} + + + )} )} @@ -324,30 +336,33 @@ export function AgentBar() { ) : null; })()} - {voiceList.length > 0 ? ( -
- { + updateAgent(agent.id, { + voiceOverrides: { + ...agent.voiceOverrides, + [ttsProviderId]: value, + }, + }); + }} + > + e.stopPropagation()} - className="appearance-none h-5 text-[10px] rounded-full bg-muted/60 hover:bg-muted pl-2 pr-5 text-muted-foreground/70 hover:text-muted-foreground shrink-0 max-w-[80px] truncate cursor-pointer focus:outline-none transition-colors border-0" > + + + {voiceList.map((voiceId) => ( - + ))} - - -
- ) : null} + + + )} ); })} From a2e5124dd3f72a1379fdd99dea7fa1cb1a306e84 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 14:44:01 +0800 Subject: [PATCH 12/65] style(tts): add voice label prefix and always show dropdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Show "音色: Alloy" instead of plain "Alloy" in the voice pill. Always show dropdown regardless of mute state (voice config is independent of playback). Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index c0c6ded99..7d2933128 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -47,7 +47,7 @@ export function AgentBar() { const providerVoices = TTS_PROVIDERS[ttsProviderId]?.voices ?? []; const getVoiceDisplayName = (voiceId: string) => providerVoices.find((v) => v.id === voiceId)?.name ?? voiceId; - const showVoice = voiceList.length > 0 && !ttsMuted; + const showVoice = voiceList.length > 0; // Click-outside to collapse useEffect(() => { @@ -273,10 +273,15 @@ export function AgentBar() { }} > e.stopPropagation()} > - + + {t('agentBar.voiceLabel')}:{' '} + {getVoiceDisplayName( + resolveVoice(teacherAgent, ttsProviderId, 0, voiceList), + )} + {voiceList.map((voiceId) => ( @@ -349,10 +354,15 @@ export function AgentBar() { }} > e.stopPropagation()} > - + + {t('agentBar.voiceLabel')}:{' '} + {getVoiceDisplayName( + resolveVoice(agent, ttsProviderId, agentIndex, voiceList), + )} + {voiceList.map((voiceId) => ( From 0ffdf49ca07e91d60676e6af5f35dbd347bacd36 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 14:47:14 +0800 Subject: [PATCH 13/65] style(tts): add volume icon hint in collapsed AgentBar Show a small Volume2 icon in the collapsed pill when voice config is available, hinting that voice settings are inside. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 7d2933128..4c2a47187 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -17,7 +17,7 @@ import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; import { resolveVoice, getServerVoiceList } from '@/lib/audio/voice-resolver'; import { TTS_PROVIDERS } from '@/lib/audio/constants'; -import { Sparkles, ChevronDown, ChevronUp, Shuffle } from 'lucide-react'; +import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2 } from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; export function AgentBar() { @@ -155,6 +155,9 @@ export function AgentBar() { )} )} + {showVoice && ( + + )} ); From 60abace8dd611c3a634af9d51d4bbf6a06f0edf6 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 17:38:23 +0800 Subject: [PATCH 14/65] fix(tts): fix voice dropdown layout and click handling - Move voice pill below agent name (second line) to prevent horizontal overflow in English - Wrap Select in div with onPointerDown stopPropagation to fix Radix click-through to parent row - Add line-clamp-1 to descriptions for consistent row height - Use items-start instead of items-center for better multi-line alignment Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 145 +++++++++++++++++---------------- 1 file changed, 75 insertions(+), 70 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 4c2a47187..a0b6ee9d4 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -241,12 +241,12 @@ export function AgentBar() { {teacherAgent && (
- +
-
- {showVoice && ( - - )} + + + )} + )} @@ -303,20 +306,19 @@ export function AgentBar() { .filter((a) => a.role !== 'teacher') .map((agent, idx) => { const isSelected = selectedAgentIds.includes(agent.id); - // agentIndex: teacher=0, then non-teachers start at 1 const agentIndex = idx + 1; return (
toggleAgent(agent.id)} className={cn( - 'w-full flex items-center gap-3 px-3 py-2 text-left transition-colors cursor-pointer rounded-lg', + 'w-full flex items-start gap-3 px-3 py-2 text-left transition-colors cursor-pointer rounded-lg', isSelected ? 'bg-primary/5' : 'hover:bg-muted/50', )} > - +
+

{desc}

) : null; })()} -
- {showVoice && ( - - )} + +
+ )} + ); })} From e835c0097fb599c81c5bec0f3e054942f88a8173 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 17:45:55 +0800 Subject: [PATCH 15/65] refactor(tts): redesign AgentBar voice layout for compactness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Single-line layout: checkbox · avatar · name · role · voice pill - Remove descriptions from agent rows (saves vertical space) - Extract AgentVoicePill component to isolate Select event handling - Smaller avatars (size-7), tighter row padding (py-1.5) - Voice pill uses Volume2 icon + voice name (no prefix text) - Works in both Chinese and English without overflow Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 225 ++++++++++++++------------------- 1 file changed, 95 insertions(+), 130 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index a0b6ee9d4..1a5ef8faf 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -19,6 +19,61 @@ import { resolveVoice, getServerVoiceList } from '@/lib/audio/voice-resolver'; import { TTS_PROVIDERS } from '@/lib/audio/constants'; import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2 } from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; +import type { AgentConfig } from '@/lib/orchestration/registry/types'; + +function AgentVoicePill({ + agent, + agentIndex, + voiceList, + ttsProviderId, + getVoiceDisplayName, +}: { + agent: AgentConfig; + agentIndex: number; + voiceList: string[]; + ttsProviderId: string; + getVoiceDisplayName: (id: string) => string; +}) { + const updateAgent = useAgentRegistry((s) => s.updateAgent); + const currentVoice = resolveVoice( + agent, + ttsProviderId as Parameters[1], + agentIndex, + voiceList, + ); + + return ( +
e.stopPropagation()} + onPointerDown={(e) => e.stopPropagation()} + className="shrink-0" + > + +
+ ); +} export function AgentBar() { const { t } = useI18n(); @@ -31,13 +86,11 @@ export function AgentBar() { const setAgentMode = useSettingsStore((s) => s.setAgentMode); const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); const ttsMuted = useSettingsStore((s) => s.ttsMuted); - const updateAgent = useAgentRegistry((s) => s.updateAgent); const [open, setOpen] = useState(false); const containerRef = useRef(null); const allAgents = listAgents(); - // In preset mode, only show default (non-generated) agents const agents = allAgents.filter((a) => !a.isGenerated); const teacherAgent = agents.find((a) => a.role === 'teacher'); const selectedAgents = agents.filter((a) => selectedAgentIds.includes(a.id)); @@ -49,7 +102,6 @@ export function AgentBar() { providerVoices.find((v) => v.id === voiceId)?.name ?? voiceId; const showVoice = voiceList.length > 0; - // Click-outside to collapse useEffect(() => { if (!open) return; const handler = (e: MouseEvent) => { @@ -64,7 +116,6 @@ export function AgentBar() { const handleModeChange = (mode: 'preset' | 'auto') => { setAgentMode(mode); if (mode === 'preset') { - // Ensure a teacher is always selected in preset mode const hasTeacherSelected = selectedAgentIds.some((id) => { const a = agents.find((agent) => agent.id === id); return a?.role === 'teacher'; @@ -77,7 +128,7 @@ export function AgentBar() { const toggleAgent = (agentId: string) => { const agent = agents.find((a) => a.id === agentId); - if (agent?.role === 'teacher') return; // teacher is always selected + if (agent?.role === 'teacher') return; if (selectedAgentIds.includes(agentId)) { setSelectedAgentIds(selectedAgentIds.filter((id) => id !== agentId)); } else { @@ -97,10 +148,8 @@ export function AgentBar() { return translated !== key ? translated : agent.role; }; - /* ── Shared avatar row — always visible on the right side ── */ const avatarRow = (
- {/* Teacher avatar — always shown */} {teacherAgent && (
- {/* In auto mode: show assistant avatar + shuffle indicator */}
{agents.find((a) => a.role === 'assistant') && (
@@ -129,7 +177,6 @@ export function AgentBar() { ) : ( <> - {/* In preset mode: show selected non-teacher agents */} {nonTeacherSelected.length > 0 && (
{nonTeacherSelected.slice(0, 4).map((agent) => ( @@ -163,7 +210,6 @@ export function AgentBar() { return (
- {/* ── Header row — always in document flow ── */}
{agentMode === 'preset' ? ( - /* Agent list — teacher first (always selected), then others */
- {/* Teacher row — always selected, checkbox disabled */} + {/* Teacher row */} {teacherAgent && ( -
- +
+
-
-
- {getAgentName(teacherAgent)} - - {getAgentRole(teacherAgent)} - -
- {showVoice && ( -
e.stopPropagation()} - onPointerDown={(e) => e.stopPropagation()} - > - -
- )} -
+ + {getAgentName(teacherAgent)} + + + {getAgentRole(teacherAgent)} + + {showVoice && ( + + )}
)} @@ -312,13 +319,13 @@ export function AgentBar() { key={agent.id} onClick={() => toggleAgent(agent.id)} className={cn( - 'w-full flex items-start gap-3 px-3 py-2 text-left transition-colors cursor-pointer rounded-lg', + 'w-full flex items-center gap-2.5 px-3 py-1.5 cursor-pointer rounded-lg transition-colors', isSelected ? 'bg-primary/5' : 'hover:bg-muted/50', )} > - +
-
-
- {getAgentName(agent)} - - {getAgentRole(agent)} - -
- {(() => { - const descKey = `settings.agentDescriptions.${agent.id}`; - const desc = t(descKey); - return desc !== descKey ? ( -

- {desc} -

- ) : null; - })()} - {showVoice && ( -
e.stopPropagation()} - onPointerDown={(e) => e.stopPropagation()} - > - -
- )} -
+ + {getAgentName(agent)} + + + {getAgentRole(agent)} + + {showVoice && ( + + )}
); })}
) : ( - /* Auto-generate mode */
- {/* Shuffle icon with ambient animation */}
- {/* Ping ripple */}
- {/* Soft glow ring */}
- {/* Icon */}

@@ -406,7 +371,7 @@ export function AgentBar() {

)} - {/* Max turns — always visible */} + {/* Max turns */}
{t('settings.maxTurns')} From 0b004931e556f8f106bc628c41ac40e790d3cb9b Mon Sep 17 00:00:00 2001 From: wyuc Date: Sat, 21 Mar 2026 18:04:45 +0800 Subject: [PATCH 16/65] feat(tts): cross-provider voice selection per agent - Change voiceConfig from per-provider lookup to explicit { providerId, voiceId } per agent - Each agent can use a different TTS provider's voice - Voice picker dropdown groups voices by provider - useDiscussionTTS routes TTS requests per agent's provider - resolveAgentVoice falls back to global provider if no config Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 85 ++++++++++++++++------------- lib/audio/voice-resolver.ts | 75 ++++++++++++++++++++----- lib/hooks/use-discussion-tts.ts | 63 ++++++++------------- lib/orchestration/registry/types.ts | 4 +- 4 files changed, 134 insertions(+), 93 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 1a5ef8faf..dd46c259e 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -7,40 +7,46 @@ import { Input } from '@/components/ui/input'; import { Select, SelectContent, + SelectGroup, SelectItem, + SelectLabel, SelectTrigger, - SelectValue, } from '@/components/ui/select'; import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; -import { resolveVoice, getServerVoiceList } from '@/lib/audio/voice-resolver'; -import { TTS_PROVIDERS } from '@/lib/audio/constants'; +import { resolveAgentVoice, getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver'; import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2 } from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; +import type { TTSProviderId } from '@/lib/audio/types'; function AgentVoicePill({ agent, agentIndex, - voiceList, - ttsProviderId, - getVoiceDisplayName, + availableProviders, + globalProviderId, }: { agent: AgentConfig; agentIndex: number; - voiceList: string[]; - ttsProviderId: string; - getVoiceDisplayName: (id: string) => string; + availableProviders: ReturnType; + globalProviderId: TTSProviderId; }) { const updateAgent = useAgentRegistry((s) => s.updateAgent); - const currentVoice = resolveVoice( - agent, - ttsProviderId as Parameters[1], - agentIndex, - voiceList, - ); + const resolved = resolveAgentVoice(agent, globalProviderId, agentIndex); + + // Encode as "providerId::voiceId" for the Select value + const currentValue = `${resolved.providerId}::${resolved.voiceId}`; + + // Find display name for current voice + const currentVoiceName = (() => { + for (const p of availableProviders) { + const v = p.voices.find((voice) => voice.id === resolved.voiceId); + if (v) return v.name; + } + return resolved.voiceId; + })(); return (
@@ -85,6 +100,7 @@ export function AgentBar() { const agentMode = useSettingsStore((s) => s.agentMode); const setAgentMode = useSettingsStore((s) => s.setAgentMode); const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); + const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const ttsMuted = useSettingsStore((s) => s.ttsMuted); const [open, setOpen] = useState(false); @@ -96,11 +112,8 @@ export function AgentBar() { const selectedAgents = agents.filter((a) => selectedAgentIds.includes(a.id)); const nonTeacherSelected = selectedAgents.filter((a) => a.role !== 'teacher'); - const voiceList = getServerVoiceList(ttsProviderId); - const providerVoices = TTS_PROVIDERS[ttsProviderId]?.voices ?? []; - const getVoiceDisplayName = (voiceId: string) => - providerVoices.find((v) => v.id === voiceId)?.name ?? voiceId; - const showVoice = voiceList.length > 0; + const availableProviders = getAvailableProvidersWithVoices(ttsProvidersConfig); + const showVoice = availableProviders.length > 0; useEffect(() => { if (!open) return; @@ -219,7 +232,7 @@ export function AgentBar() { )} onClick={() => setOpen(!open)} > - + {open ? t('agentBar.expandedTitle') : t('agentBar.readyToLearn')} {avatarRow} @@ -300,9 +313,8 @@ export function AgentBar() { )}
@@ -346,9 +358,8 @@ export function AgentBar() { )}
diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts index 438a8ca7a..b35b41a7f 100644 --- a/lib/audio/voice-resolver.ts +++ b/lib/audio/voice-resolver.ts @@ -2,30 +2,40 @@ import type { TTSProviderId } from '@/lib/audio/types'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; import { TTS_PROVIDERS } from '@/lib/audio/constants'; +export interface ResolvedVoice { + providerId: TTSProviderId; + voiceId: string; +} + /** - * Resolve the voice ID for an agent given the current TTS provider. - * 1. Check agent.voiceOverrides[providerId] — if valid, use it - * 2. Otherwise — deterministic assignment from voiceList by agentIndex + * Resolve the TTS provider + voice for an agent. + * 1. If agent has voiceConfig and the voice is still valid, use it + * 2. Otherwise, use globalProviderId + deterministic assignment by agentIndex */ -export function resolveVoice( +export function resolveAgentVoice( agent: AgentConfig, - providerId: TTSProviderId, + globalProviderId: TTSProviderId, agentIndex: number, - voiceList: string[], -): string { - if (voiceList.length === 0) return 'default'; - - const override = agent.voiceOverrides?.[providerId]; - if (override && voiceList.includes(override)) { - return override; +): ResolvedVoice { + // Check agent-specific config + if (agent.voiceConfig) { + const list = getServerVoiceList(agent.voiceConfig.providerId); + if (list.includes(agent.voiceConfig.voiceId)) { + return agent.voiceConfig; + } } - return voiceList[agentIndex % voiceList.length]; + // Fallback: global provider + deterministic voice + const list = getServerVoiceList(globalProviderId); + if (list.length === 0) { + return { providerId: globalProviderId, voiceId: 'default' }; + } + return { providerId: globalProviderId, voiceId: list[agentIndex % list.length] }; } /** - * Get the list of voice IDs for a server-side TTS provider. - * For browser-native-tts, caller must pass browser voices separately. + * Get the list of voice IDs for a TTS provider. + * For browser-native-tts, returns empty (browser voices are dynamic). */ export function getServerVoiceList(providerId: TTSProviderId): string[] { if (providerId === 'browser-native-tts') return []; @@ -33,3 +43,38 @@ export function getServerVoiceList(providerId: TTSProviderId): string[] { if (!provider) return []; return provider.voices.map((v) => v.id); } + +/** + * Get all configured providers and their voices for the voice picker UI. + * Returns providers that have API keys configured or are browser-native. + */ +export function getAvailableProvidersWithVoices( + ttsProvidersConfig: Record, +): Array<{ + providerId: TTSProviderId; + providerName: string; + voices: Array<{ id: string; name: string }>; +}> { + const result: Array<{ + providerId: TTSProviderId; + providerName: string; + voices: Array<{ id: string; name: string }>; + }> = []; + + for (const [id, config] of Object.entries(TTS_PROVIDERS)) { + const providerId = id as TTSProviderId; + if (providerId === 'browser-native-tts') continue; + + const providerConfig = ttsProvidersConfig[providerId]; + // Show provider if it has an API key or is server-configured + if (providerConfig?.apiKey || (providerConfig as Record)?.isServerConfigured) { + result.push({ + providerId, + providerName: config.name, + voices: config.voices.map((v) => ({ id: v.id, name: v.name })), + }); + } + } + + return result; +} diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index c54ff4cfe..f4ee931fa 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -3,8 +3,9 @@ import { useCallback, useEffect, useRef } from 'react'; import { useSettingsStore } from '@/lib/store/settings'; import { useBrowserTTS } from '@/lib/hooks/use-browser-tts'; -import { resolveVoice, getServerVoiceList } from '@/lib/audio/voice-resolver'; +import { resolveAgentVoice } from '@/lib/audio/voice-resolver'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; +import type { TTSProviderId } from '@/lib/audio/types'; import type { AudioIndicatorState } from '@/components/roundtable/audio-indicator'; interface DiscussionTTSOptions { @@ -18,6 +19,7 @@ interface QueueItem { partId: string; text: string; agentId: string | null; + providerId: TTSProviderId; voiceId: string; } @@ -31,13 +33,10 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const isPlayingRef = useRef(false); const abortControllerRef = useRef(null); const audioRef = useRef(null); - // Use refs to avoid stale closures in callbacks const onAudioStateChangeRef = useRef(onAudioStateChange); onAudioStateChangeRef.current = onAudioStateChange; const processQueueRef = useRef<() => void>(() => {}); - const isBrowserTTS = ttsProviderId === 'browser-native-tts'; - const { speak: browserSpeak, cancel: browserCancel, @@ -52,6 +51,8 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus }); const browserCancelRef = useRef(browserCancel); browserCancelRef.current = browserCancel; + const browserSpeakRef = useRef(browserSpeak); + browserSpeakRef.current = browserSpeak; // Build agent index map for deterministic voice resolution const agentIndexMap = useRef>(new Map()); @@ -61,22 +62,15 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus agentIndexMap.current = map; }, [agents]); - const getVoiceForAgent = useCallback( - (agentId: string | null): string => { - if (!agentId) return 'default'; + const resolveVoiceForAgent = useCallback( + (agentId: string | null): { providerId: TTSProviderId; voiceId: string } => { + if (!agentId) return { providerId: ttsProviderId, voiceId: 'default' }; const agent = agents.find((a) => a.id === agentId); - if (!agent) return 'default'; + if (!agent) return { providerId: ttsProviderId, voiceId: 'default' }; const index = agentIndexMap.current.get(agentId) ?? 0; - - if (isBrowserTTS) { - const browserVoices = browserAvailableVoices.map((v) => v.voiceURI); - return resolveVoice(agent, ttsProviderId, index, browserVoices); - } - - const serverVoices = getServerVoiceList(ttsProviderId); - return resolveVoice(agent, ttsProviderId, index, serverVoices); + return resolveAgentVoice(agent, ttsProviderId, index); }, - [agents, ttsProviderId, isBrowserTTS, browserAvailableVoices], + [agents, ttsProviderId], ); const processQueue = useCallback(async () => { @@ -89,30 +83,27 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus isPlayingRef.current = true; const item = queueRef.current.shift()!; - if (isBrowserTTS) { + // Browser TTS + if (item.providerId === 'browser-native-tts') { onAudioStateChangeRef.current?.(item.agentId, 'playing'); - browserSpeak(item.text, item.voiceId); - // browserTTS onEnd callback resets isPlayingRef and calls processQueue + browserSpeakRef.current(item.text, item.voiceId); return; } - // Server TTS + // Server TTS — use the item's provider, not the global one onAudioStateChangeRef.current?.(item.agentId, 'generating'); const controller = new AbortController(); abortControllerRef.current = controller; try { - const providerConfig = ttsProvidersConfig[ttsProviderId]; - // Match actual /api/generate/tts field names: - // { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } - // Response: { audioId, base64, format } + const providerConfig = ttsProvidersConfig[item.providerId]; const res = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text: item.text, audioId: item.partId, - ttsProviderId: ttsProviderId, + ttsProviderId: item.providerId, ttsVoice: item.voiceId, ttsSpeed: ttsSpeed, ttsApiKey: providerConfig?.apiKey, @@ -126,7 +117,6 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const data = await res.json(); if (!data.base64) throw new Error('No audio in response'); - // Play via HTMLAudioElement directly (simpler than AudioPlayer for queued playback) onAudioStateChangeRef.current?.(item.agentId, 'playing'); const audioUrl = `data:audio/${data.format || 'mp3'};base64,${data.base64}`; const audio = new Audio(audioUrl); @@ -148,32 +138,28 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } isPlayingRef.current = false; onAudioStateChangeRef.current?.(item.agentId, 'idle'); - processQueueRef.current(); // skip failed segment, continue queue + processQueueRef.current(); } - }, [enabled, ttsMuted, isBrowserTTS, ttsProviderId, ttsProvidersConfig, ttsSpeed, browserSpeak]); + }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed]); - // Keep processQueueRef in sync processQueueRef.current = processQueue; - // Called by StreamBuffer's onSegmentSealed const handleSegmentSealed = useCallback( (messageId: string, partId: string, fullText: string, agentId: string | null) => { if (!enabled || ttsMuted || !fullText.trim()) return; - const voiceId = getVoiceForAgent(agentId); - queueRef.current.push({ messageId, partId, text: fullText, agentId, voiceId }); + const { providerId, voiceId } = resolveVoiceForAgent(agentId); + queueRef.current.push({ messageId, partId, text: fullText, agentId, providerId, voiceId }); if (!isPlayingRef.current) { processQueueRef.current(); - } else if (!isBrowserTTS) { - // Show generating indicator for queued items + } else if (providerId !== 'browser-native-tts') { onAudioStateChangeRef.current?.(agentId, 'generating'); } }, - [enabled, ttsMuted, getVoiceForAgent, isBrowserTTS], + [enabled, ttsMuted, resolveVoiceForAgent], ); - // Cleanup: abort all, stop playback, clear queue const cleanup = useCallback(() => { abortControllerRef.current?.abort(); abortControllerRef.current = null; @@ -186,9 +172,8 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus queueRef.current = []; isPlayingRef.current = false; onAudioStateChangeRef.current?.(null, 'idle'); - }, []); // stable — uses only refs + }, []); - // Cleanup on unmount useEffect(() => cleanup, [cleanup]); return { diff --git a/lib/orchestration/registry/types.ts b/lib/orchestration/registry/types.ts index 17b7f876c..6631e9b46 100644 --- a/lib/orchestration/registry/types.ts +++ b/lib/orchestration/registry/types.ts @@ -14,7 +14,7 @@ export interface AgentConfig { color: string; // UI theme color (hex) allowedActions: string[]; // Action types this agent can use priority: number; // Priority for director selection (1-10) - voiceOverrides?: Partial>; // Per-provider voice selections + voiceConfig?: { providerId: TTSProviderId; voiceId: string }; // Per-agent TTS voice selection // Metadata createdAt: Date; @@ -35,7 +35,7 @@ export interface AgentTemplate { color: string; allowedActions: string[]; priority: number; - voiceOverrides?: Partial>; // Per-provider voice selections + voiceConfig?: { providerId: TTSProviderId; voiceId: string }; // Per-agent TTS voice selection // LLM-generated agent fields isGenerated?: boolean; // true for LLM-generated agents From f6cc8b88cca77c0f9cfc317d0a22936d5c8ea3fa Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 12:04:49 +0800 Subject: [PATCH 17/65] fix(tts): align role badge and voice pill across agent rows Give role badge fixed width (w-14 text-right) so role text and voice pills align vertically across all rows regardless of agent name length. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index dd46c259e..05e298fb2 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -306,7 +306,7 @@ export function AgentBar() { {getAgentName(teacherAgent)} - + {getAgentRole(teacherAgent)} {showVoice && ( @@ -351,7 +351,7 @@ export function AgentBar() { {getAgentName(agent)} - + {getAgentRole(agent)} {showVoice && ( From cbad4df9797c28f78f9ddd7aad8abe686f27a027 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 12:07:36 +0800 Subject: [PATCH 18/65] fix(tts): fix role badge and voice pill alignment Wrap role badge + voice pill in a fixed-width container (w-[9.5rem] justify-end) so both align vertically across all agent rows regardless of name or role text length. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 48 ++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 05e298fb2..cd1c78d08 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -306,17 +306,19 @@ export function AgentBar() { {getAgentName(teacherAgent)} - - {getAgentRole(teacherAgent)} - - {showVoice && ( - - )} +
+ + {getAgentRole(teacherAgent)} + + {showVoice && ( + + )} +
)} @@ -351,17 +353,19 @@ export function AgentBar() { {getAgentName(agent)} - - {getAgentRole(agent)} - - {showVoice && ( - - )} +
+ + {getAgentRole(agent)} + + {showVoice && ( + + )} +
); })} From 04315897bac2d39194891908a4534127a9503fb6 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 12:11:09 +0800 Subject: [PATCH 19/65] style(tts): align role badge and voice pill across agent rows Add min-w-[52px] text-right to role badge so it starts at a consistent position regardless of agent name length. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 48 ++++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index cd1c78d08..4b5a0ba67 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -306,19 +306,17 @@ export function AgentBar() { {getAgentName(teacherAgent)} -
- - {getAgentRole(teacherAgent)} - - {showVoice && ( - - )} -
+ + {getAgentRole(teacherAgent)} + + {showVoice && ( + + )}
)} @@ -353,19 +351,17 @@ export function AgentBar() { {getAgentName(agent)} -
- - {getAgentRole(agent)} - - {showVoice && ( - - )} -
+ + {getAgentRole(agent)} + + {showVoice && ( + + )}
); })} From e10d0b0faaa2d90fb9a8110235aa07d392a3c10e Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 12:14:35 +0800 Subject: [PATCH 20/65] fix(tts): use fixed w-[60px] for role badge alignment --- components/agent/agent-bar.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 4b5a0ba67..dba16d925 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -306,7 +306,7 @@ export function AgentBar() { {getAgentName(teacherAgent)} - + {getAgentRole(teacherAgent)} {showVoice && ( @@ -351,7 +351,7 @@ export function AgentBar() { {getAgentName(agent)} - + {getAgentRole(agent)} {showVoice && ( From 82bc2739c2db1c827b5a6da6328d0f266a05b96c Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 12:57:35 +0800 Subject: [PATCH 21/65] fix(tts): use fixed w-[88px] for voice pill alignment --- components/agent/agent-bar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index dba16d925..086917d65 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -63,7 +63,7 @@ function AgentVoicePill({ }); }} > - + {currentVoiceName} From 13cf0a2abd6b491594741e871730a54e7d3a70fc Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 12:59:26 +0800 Subject: [PATCH 22/65] fix(tts): prevent click-outside from closing AgentBar when Select portal is open --- components/agent/agent-bar.tsx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 086917d65..4b7d2c072 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -118,9 +118,12 @@ export function AgentBar() { useEffect(() => { if (!open) return; const handler = (e: MouseEvent) => { - if (containerRef.current && !containerRef.current.contains(e.target as Node)) { - setOpen(false); - } + const target = e.target as Node; + // Don't close if clicking inside the AgentBar + if (containerRef.current && containerRef.current.contains(target)) return; + // Don't close if clicking inside a Radix Select portal + if ((target as Element).closest?.('[data-radix-popper-content-wrapper]')) return; + setOpen(false); }; document.addEventListener('mousedown', handler); return () => document.removeEventListener('mousedown', handler); From 55bf944c8371c5341baf6a7d68fa84728fac61c0 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 13:06:14 +0800 Subject: [PATCH 23/65] fix(tts): comprehensive voice picker rewrite - Replace Radix Select with Popover + button list (fixes click issue) - Fix getAvailableProvidersWithVoices to always include global provider - Widen panel from w-80 to w-96 (prevents name truncation) - Voice pill uses primary color instead of gray (more visible) - Extract renderAgentRow helper to reduce duplication - Popover shows voices grouped by provider with active state - Add findVoiceDisplayName utility Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 244 +++++++++++++++------------------ lib/audio/voice-resolver.ts | 51 ++++--- 2 files changed, 141 insertions(+), 154 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 4b7d2c072..524d6d8d9 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -4,23 +4,21 @@ import { useState, useEffect, useRef } from 'react'; import { motion, AnimatePresence } from 'motion/react'; import { Checkbox } from '@/components/ui/checkbox'; import { Input } from '@/components/ui/input'; -import { - Select, - SelectContent, - SelectGroup, - SelectItem, - SelectLabel, - SelectTrigger, -} from '@/components/ui/select'; +import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; -import { resolveAgentVoice, getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver'; +import { + resolveAgentVoice, + getAvailableProvidersWithVoices, + findVoiceDisplayName, +} from '@/lib/audio/voice-resolver'; import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2 } from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; import type { TTSProviderId } from '@/lib/audio/types'; +import type { ProviderWithVoices } from '@/lib/audio/voice-resolver'; function AgentVoicePill({ agent, @@ -30,63 +28,67 @@ function AgentVoicePill({ }: { agent: AgentConfig; agentIndex: number; - availableProviders: ReturnType; + availableProviders: ProviderWithVoices[]; globalProviderId: TTSProviderId; }) { const updateAgent = useAgentRegistry((s) => s.updateAgent); const resolved = resolveAgentVoice(agent, globalProviderId, agentIndex); - - // Encode as "providerId::voiceId" for the Select value - const currentValue = `${resolved.providerId}::${resolved.voiceId}`; - - // Find display name for current voice - const currentVoiceName = (() => { - for (const p of availableProviders) { - const v = p.voices.find((voice) => voice.id === resolved.voiceId); - if (v) return v.name; - } - return resolved.voiceId; - })(); + const displayName = findVoiceDisplayName(resolved.providerId, resolved.voiceId); return ( -
e.stopPropagation()} - onPointerDown={(e) => e.stopPropagation()} - className="shrink-0" - > - -
+ + ); + })} +
+ ))} + + ); } @@ -101,7 +103,6 @@ export function AgentBar() { const setAgentMode = useSettingsStore((s) => s.setAgentMode); const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); - const ttsMuted = useSettingsStore((s) => s.ttsMuted); const [open, setOpen] = useState(false); const containerRef = useRef(null); @@ -112,16 +113,15 @@ export function AgentBar() { const selectedAgents = agents.filter((a) => selectedAgentIds.includes(a.id)); const nonTeacherSelected = selectedAgents.filter((a) => a.role !== 'teacher'); - const availableProviders = getAvailableProvidersWithVoices(ttsProvidersConfig); + const availableProviders = getAvailableProvidersWithVoices(ttsProvidersConfig, ttsProviderId); const showVoice = availableProviders.length > 0; useEffect(() => { if (!open) return; const handler = (e: MouseEvent) => { const target = e.target as Node; - // Don't close if clicking inside the AgentBar if (containerRef.current && containerRef.current.contains(target)) return; - // Don't close if clicking inside a Radix Select portal + // Don't close if clicking inside a Radix portal (Popover, Select, etc.) if ((target as Element).closest?.('[data-radix-popper-content-wrapper]')) return; setOpen(false); }; @@ -224,8 +224,50 @@ export function AgentBar() {
); + const renderAgentRow = (agent: AgentConfig, agentIndex: number, isTeacher: boolean) => { + const isSelected = isTeacher || selectedAgentIds.includes(agent.id); + return ( +
toggleAgent(agent.id)} + className={cn( + 'w-full flex items-center gap-2 px-2.5 py-1.5 rounded-lg transition-colors', + isTeacher ? 'bg-primary/5' : 'cursor-pointer', + !isTeacher && isSelected && 'bg-primary/5', + !isTeacher && !isSelected && 'hover:bg-muted/50', + )} + > + +
+ {getAgentName(agent)} +
+ + {getAgentName(agent)} + + + {getAgentRole(agent)} + + {showVoice && ( + + )} +
+ ); + }; + return ( -
+
diff --git a/components/canvas/canvas-toolbar.tsx b/components/canvas/canvas-toolbar.tsx index bf4db9f46..b156ffe17 100644 --- a/components/canvas/canvas-toolbar.tsx +++ b/components/canvas/canvas-toolbar.tsx @@ -1,6 +1,5 @@ 'use client'; -import { useState, useRef, useCallback, useEffect } from 'react'; import { ChevronLeft, ChevronRight, @@ -94,10 +93,7 @@ export function CanvasToolbar({ onStopDiscussion, className, ttsEnabled, - ttsMuted, - ttsVolume = 1, onToggleMute, - onVolumeChange, autoPlayLecture, onToggleAutoPlay, playbackSpeed = 1, @@ -112,26 +108,6 @@ export function CanvasToolbar({ (s) => s.stage?.whiteboard?.[0]?.elements?.length || 0, ); - // Volume slider hover state - const [volumeHover, setVolumeHover] = useState(false); - const volumeTimerRef = useRef>(undefined); - const volumeContainerRef = useRef(null); - - const handleVolumeEnter = useCallback(() => { - clearTimeout(volumeTimerRef.current); - setVolumeHover(true); - }, []); - - const handleVolumeLeave = useCallback(() => { - volumeTimerRef.current = setTimeout(() => setVolumeHover(false), 300); - }, []); - - // Cleanup volume hover timer on unmount - useEffect(() => () => clearTimeout(volumeTimerRef.current), []); - - // Effective volume for display - const effectiveVolume = ttsMuted ? 0 : ttsVolume; - return (
{/* ── Left: sidebar toggle + page indicator ── */} @@ -161,71 +137,30 @@ export function CanvasToolbar({ {/* ── Center: unified playback controls ── */}
- {/* Volume with vertical popover slider */} + {/* TTS on/off toggle */} {onToggleMute && ( -
- - - {/* Vertical volume slider (pops up above) */} -
-
- - {Math.round(effectiveVolume * 100)} - - { - const v = parseFloat(e.target.value); - onVolumeChange?.(v); - if (v > 0 && ttsMuted) onToggleMute?.(); - }} + + + +
- {/* Arrow pointing down */} -
-
-
+ aria-label={ttsEnabled ? 'Disable TTS' : 'Enable TTS'} + > + + + + + {ttsEnabled ? 'TTS On' : 'TTS Off'} + + + )} {/* Speed */} diff --git a/components/roundtable/index.tsx b/components/roundtable/index.tsx index e619aa92e..28045a306 100644 --- a/components/roundtable/index.tsx +++ b/components/roundtable/index.tsx @@ -142,6 +142,7 @@ export function Roundtable({ const ttsMuted = useSettingsStore((s) => s.ttsMuted); const setTTSMuted = useSettingsStore((s) => s.setTTSMuted); const ttsEnabled = useSettingsStore((state) => state.ttsEnabled); + const setTTSEnabled = useSettingsStore((state) => state.setTTSEnabled); const asrEnabled = useSettingsStore((state) => state.asrEnabled); const ttsVolume = useSettingsStore((s) => s.ttsVolume); const setTTSVolume = useSettingsStore((s) => s.setTTSVolume); @@ -419,10 +420,7 @@ export function Roundtable({ showStopDiscussion={showStopButton} onStopDiscussion={onStopDiscussion} ttsEnabled={ttsEnabled} - ttsMuted={ttsMuted} - ttsVolume={ttsVolume} - onToggleMute={() => ttsEnabled && setTTSMuted(!ttsMuted)} - onVolumeChange={(v) => setTTSVolume(v)} + onToggleMute={() => setTTSEnabled(!ttsEnabled)} autoPlayLecture={autoPlayLecture} onToggleAutoPlay={() => setAutoPlayLecture(!autoPlayLecture)} playbackSpeed={playbackSpeed} From 1c06e00614d904a24bdd147bef0e42cfa4fdf799 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 13:39:11 +0800 Subject: [PATCH 29/65] refactor(tts): simplify Settings TTS tab to toggle + provider config Remove voice selection, speed slider, preview/test, Azure locale filter from Settings TTS tab. Voice is now per-agent in AgentBar. Keep: on/off toggle, provider selector, API key + base URL config. Add hint text pointing to AgentBar for voice configuration. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/settings/audio-settings.tsx | 310 +++---------------------- lib/i18n/settings.ts | 3 + 2 files changed, 36 insertions(+), 277 deletions(-) diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index 92c2670de..f40bea301 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -570,6 +570,8 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { ttsEnabled ? 'opacity-100' : 'opacity-40 max-h-0 pointer-events-none', )} > +

{t('settings.ttsVoiceConfigHint')}

+
- handleTTSProviderConfigChange(ttsProviderId, { - apiKey: e.target.value, - }) - } - className="font-mono text-sm pr-10" - /> - -
-
- -
- +
+
+ +
handleTTSProviderConfigChange(ttsProviderId, { - baseUrl: e.target.value, + apiKey: e.target.value, }) } - className="text-sm" + className="font-mono text-sm pr-10" /> +
- {(() => { - const effectiveBaseUrl = - ttsProvidersConfig[ttsProviderId]?.baseUrl || ttsProvider.defaultBaseUrl || ''; - if (!effectiveBaseUrl) return null; - - // Get endpoint path based on provider - let endpointPath = ''; - switch (ttsProviderId) { - case 'openai-tts': - case 'glm-tts': - endpointPath = '/audio/speech'; - break; - case 'azure-tts': - endpointPath = '/cognitiveservices/v1'; - break; - case 'qwen-tts': - endpointPath = '/services/aigc/multimodal-generation/generation'; - break; - default: - endpointPath = ''; - } - if (!endpointPath) return null; - const fullUrl = effectiveBaseUrl + endpointPath; - return ( -

- {t('settings.requestUrl')}: {fullUrl} -

- ); - })()} - - )} - - {/* Voice Selection Row */} -
- {/* Language Filter for Azure TTS */} - {ttsProviderId === 'azure-tts' && (
- - -
- )} - -
- - -
- - {ttsProvider.speedRange && ( -
- -
- handleTTSSpeedChange(value[0])} - min={ttsProvider.speedRange.min} - max={ttsProvider.speedRange.max} - step={0.1} - className="flex-1" - /> - - {ttsSpeed.toFixed(1)}x - -
-
- )} -
- - {/* Test TTS Section */} -
- -
- setTestText(e.target.value)} - className="flex-1" - /> - -
-
- - {ttsTestMessage && ( -
-
- {ttsTestStatus === 'success' && ( - - )} - {ttsTestStatus === 'error' && } -

{ttsTestMessage}

+ + + handleTTSProviderConfigChange(ttsProviderId, { + baseUrl: e.target.value, + }) + } + className="text-sm" + />
)} - -
diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts index 1e6918263..c6096827c 100644 --- a/lib/i18n/settings.ts +++ b/lib/i18n/settings.ts @@ -200,6 +200,7 @@ export const settingsZhCN = { asrDescription: 'ASR (Automatic Speech Recognition) - 将语音转换为文字', enableTTS: '启用语音合成', ttsEnabledDescription: '开启后,课程生成时将自动合成语音', + ttsVoiceConfigHint: '每个 Agent 的音色可在首页「课堂角色配置」中设置', enableASR: '启用语音识别', asrEnabledDescription: '开启后,学生可使用麦克风进行语音输入', ttsProvider: 'TTS 提供商', @@ -785,6 +786,8 @@ export const settingsEnUS = { asrDescription: 'ASR (Automatic Speech Recognition) - Convert speech to text', enableTTS: 'Enable Text-to-Speech', ttsEnabledDescription: 'When enabled, speech audio will be generated during course creation', + ttsVoiceConfigHint: + 'Per-agent voice can be configured in "Classroom Role Config" on the homepage', enableASR: 'Enable Speech Recognition', asrEnabledDescription: 'When enabled, students can use microphone for voice input', ttsProvider: 'TTS Provider', From 27370b0b15336d55bbb4c55cebd7ceadae27f291 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 13:42:40 +0800 Subject: [PATCH 30/65] refactor(tts): simplify media popover TTS tab to toggle only --- components/generation/media-popover.tsx | 53 +------------------------ 1 file changed, 1 insertion(+), 52 deletions(-) diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index c26d52a14..670f1807a 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -412,58 +412,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { label={t('media.ttsCapability')} enabled={ttsEnabled} onToggle={setTTSEnabled} - > - {/* Provider + Voice grouped select + preview */} -
-
- { - if (gid !== ttsProviderId) { - setTTSProvider(gid as TTSProviderId); - } - setTTSVoice(iid); - }} - /> -
- -
- {ttsSpeedRange && ( -
- - {t('media.speed')} - - setTTSSpeed(value[0])} - min={ttsSpeedRange.min} - max={ttsSpeedRange.max} - step={0.1} - className="flex-1" - /> - - {ttsSpeed.toFixed(1)}x - -
- )} - + /> )} {activeTab === 'asr' && ( From 8c6357fd820bff0a2100ec68e800582c16816352 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 13:44:24 +0800 Subject: [PATCH 31/65] fix(tts): add voice config hint to media popover TTS tab --- components/generation/media-popover.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index 670f1807a..082b9fb7c 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -412,7 +412,11 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { label={t('media.ttsCapability')} enabled={ttsEnabled} onToggle={setTTSEnabled} - /> + > +

+ {t('settings.ttsVoiceConfigHint')} +

+ )} {activeTab === 'asr' && ( From 230894a3a07625e128d53b5084d340b8200c6d64 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 13:50:21 +0800 Subject: [PATCH 32/65] feat(tts): add per-voice preview button in voice picker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each voice row in the popover has a small speaker icon button. Click to preview the voice with "欢迎来到AI课堂" / "Welcome to AI Classroom" (follows i18n). Browser native uses Web Speech API, server TTS calls /api/generate/tts. Click again or close popover to stop. Shows spinner while generating. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 140 ++++++++++++++++++++++++++++----- lib/i18n/chat.ts | 2 + 2 files changed, 123 insertions(+), 19 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 8b9db4d91..05abed899 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useEffect, useRef } from 'react'; +import { useState, useEffect, useRef, useCallback } from 'react'; import { motion, AnimatePresence } from 'motion/react'; import { Checkbox } from '@/components/ui/checkbox'; import { Input } from '@/components/ui/input'; @@ -10,7 +10,8 @@ import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; import { resolveAgentVoice, getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver'; -import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2, VolumeX } from 'lucide-react'; +import { playBrowserTTSPreview } from '@/lib/audio/browser-tts-preview'; +import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2, VolumeX, Loader2 } from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; import type { TTSProviderId } from '@/lib/audio/types'; @@ -27,9 +28,14 @@ function AgentVoicePill({ availableProviders: ProviderWithVoices[]; disabled?: boolean; }) { + const { t } = useI18n(); const updateAgent = useAgentRegistry((s) => s.updateAgent); + const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const resolved = resolveAgentVoice(agent, agentIndex, availableProviders); - // findVoiceDisplayName only knows static providers; check availableProviders for browser voices + const [previewingId, setPreviewingId] = useState(null); + const previewCancelRef = useRef<(() => void) | null>(null); + const previewAudioRef = useRef(null); + const displayName = (() => { for (const p of availableProviders) { if (p.providerId === resolved.providerId) { @@ -40,6 +46,76 @@ function AgentVoicePill({ return resolved.voiceId; })(); + const stopPreview = useCallback(() => { + previewCancelRef.current?.(); + previewCancelRef.current = null; + if (previewAudioRef.current) { + previewAudioRef.current.pause(); + previewAudioRef.current.src = ''; + previewAudioRef.current = null; + } + setPreviewingId(null); + }, []); + + const handlePreview = useCallback( + async (providerId: TTSProviderId, voiceId: string) => { + const key = `${providerId}::${voiceId}`; + if (previewingId === key) { + stopPreview(); + return; + } + stopPreview(); + setPreviewingId(key); + + const previewText = t('agentBar.voicePreviewText'); + + if (providerId === 'browser-native-tts') { + const { promise, cancel } = playBrowserTTSPreview({ text: previewText, voice: voiceId }); + previewCancelRef.current = cancel; + try { + await promise; + } catch { + // ignore abort + } + setPreviewingId(null); + return; + } + + // Server TTS + try { + const providerConfig = ttsProvidersConfig[providerId]; + const res = await fetch('/api/generate/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + text: previewText, + audioId: 'voice-preview', + ttsProviderId: providerId, + ttsVoice: voiceId, + ttsSpeed: 1, + ttsApiKey: providerConfig?.apiKey, + ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, + }), + }); + if (!res.ok) throw new Error('TTS error'); + const data = await res.json(); + if (!data.base64) throw new Error('No audio'); + + const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`); + previewAudioRef.current = audio; + audio.addEventListener('ended', () => setPreviewingId(null)); + audio.addEventListener('error', () => setPreviewingId(null)); + await audio.play(); + } catch { + setPreviewingId(null); + } + }, + [previewingId, stopPreview, t, ttsProvidersConfig], + ); + + // Cleanup on unmount + useEffect(() => () => stopPreview(), [stopPreview]); + if (disabled) { return (
+ !open && stopPreview()}> + + +
); })}
diff --git a/lib/i18n/chat.ts b/lib/i18n/chat.ts index 1bb535d3e..cc5d93b4b 100644 --- a/lib/i18n/chat.ts +++ b/lib/i18n/chat.ts @@ -58,6 +58,7 @@ export const chatZhCN = { voiceLabel: '音色', voiceLoading: '加载中...', voiceAutoAssign: '音色将自动分配', + voicePreviewText: '欢迎来到AI课堂', }, proactiveCard: { discussion: '讨论', @@ -132,6 +133,7 @@ export const chatEnUS = { voiceLabel: 'Voice', voiceLoading: 'Loading...', voiceAutoAssign: 'Voices will be auto-assigned', + voicePreviewText: 'Welcome to AI Classroom', }, proactiveCard: { discussion: 'Discussion', From bcca83dd5bfe91ccbdcd9cc59e017b7556f4efb8 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:04:41 +0800 Subject: [PATCH 33/65] fix(tts): preview text follows course language instead of UI language --- components/agent/agent-bar.tsx | 8 +++++--- lib/i18n/chat.ts | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 05abed899..5e8ee5f1f 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -28,7 +28,6 @@ function AgentVoicePill({ availableProviders: ProviderWithVoices[]; disabled?: boolean; }) { - const { t } = useI18n(); const updateAgent = useAgentRegistry((s) => s.updateAgent); const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const resolved = resolveAgentVoice(agent, agentIndex, availableProviders); @@ -67,7 +66,10 @@ function AgentVoicePill({ stopPreview(); setPreviewingId(key); - const previewText = t('agentBar.voicePreviewText'); + const courseLanguage = + (typeof localStorage !== 'undefined' && localStorage.getItem('generationLanguage')) || + 'zh-CN'; + const previewText = courseLanguage === 'en-US' ? 'Welcome to AI Classroom' : '欢迎来到AI课堂'; if (providerId === 'browser-native-tts') { const { promise, cancel } = playBrowserTTSPreview({ text: previewText, voice: voiceId }); @@ -110,7 +112,7 @@ function AgentVoicePill({ setPreviewingId(null); } }, - [previewingId, stopPreview, t, ttsProvidersConfig], + [previewingId, stopPreview, ttsProvidersConfig], ); // Cleanup on unmount diff --git a/lib/i18n/chat.ts b/lib/i18n/chat.ts index cc5d93b4b..1bb535d3e 100644 --- a/lib/i18n/chat.ts +++ b/lib/i18n/chat.ts @@ -58,7 +58,6 @@ export const chatZhCN = { voiceLabel: '音色', voiceLoading: '加载中...', voiceAutoAssign: '音色将自动分配', - voicePreviewText: '欢迎来到AI课堂', }, proactiveCard: { discussion: '讨论', @@ -133,7 +132,6 @@ export const chatEnUS = { voiceLabel: 'Voice', voiceLoading: 'Loading...', voiceAutoAssign: 'Voices will be auto-assigned', - voicePreviewText: 'Welcome to AI Classroom', }, proactiveCard: { discussion: 'Discussion', From 703886b80e5d5f4f56663e3e974021c0730e38d8 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:09:37 +0800 Subject: [PATCH 34/65] refactor(tts): redesign AgentBar expanded panel layout - Teacher always at top with voice pill (works in both modes) - Mode tabs moved below teacher - Auto mode: single compact row with shuffle icon + description - Max turns: compact inline row with smaller input - Preset mode: only student agents listed (teacher already above) - Remove large shuffle animation from auto mode Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 58 +++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 5e8ee5f1f..dc594c496 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -452,8 +452,38 @@ export function AgentBar() { className="absolute right-0 top-full mt-1 z-50 w-96" >
+ {/* Teacher — always visible */} + {teacherAgent && ( +
+
+ {getAgentName(teacherAgent)} +
+ + {getAgentName(teacherAgent)} + + + {getAgentRole(teacherAgent)} + + {showVoice && ( + + )} +
+ )} + {/* Mode tabs */} -
+
{agentMode === 'preset' ? ( -
- {teacherAgent && renderAgentRow(teacherAgent, 0, true)} +
{agents .filter((a) => a.role !== 'teacher') .map((agent, idx) => renderAgentRow(agent, idx + 1, false))}
) : ( -
-
-
-
- -
-

- {t('settings.agentModeAutoDesc')} -

-

- {t('agentBar.voiceAutoAssign')} -

+
+ + {t('settings.agentModeAutoDesc')}
)} - {/* Max turns */} -
- + {/* Max turns — compact inline */} +
+ {t('settings.maxTurns')} setMaxTurns(e.target.value)} - className="w-16 h-7 text-xs" + className="w-14 h-6 text-xs text-center" />
From c5aeb568a6046073150d4fa4c867f1d3e8119242 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:12:23 +0800 Subject: [PATCH 35/65] refactor(tts): merge max turns into teacher row --- components/agent/agent-bar.tsx | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index dc594c496..ad78839ab 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -452,7 +452,7 @@ export function AgentBar() { className="absolute right-0 top-full mt-1 z-50 w-96" >
- {/* Teacher — always visible */} + {/* Teacher + max turns — always visible */} {teacherAgent && (
{getAgentName(teacherAgent)} - - {getAgentRole(teacherAgent)} - {showVoice && ( )} + + {t('settings.maxTurns')} + + setMaxTurns(e.target.value)} + onClick={(e) => e.stopPropagation()} + className="w-12 h-5 text-[11px] text-center px-1 shrink-0" + />
)} @@ -521,21 +530,6 @@ export function AgentBar() { {t('settings.agentModeAutoDesc')}
)} - - {/* Max turns — compact inline */} -
- - {t('settings.maxTurns')} - - setMaxTurns(e.target.value)} - className="w-14 h-6 text-xs text-center" - /> -
)} From f8aa1bebf04d14a4e73b84e187666e5c32475721 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:15:29 +0800 Subject: [PATCH 36/65] refactor(tts): separate teacher row and max turns, use stepper UI - Teacher row: avatar + name + voice pill only - Max turns: bottom row with MessageSquare icon + compact stepper (minus/number/plus in a rounded pill) - Remove Input component dependency Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 63 ++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index ad78839ab..dbf98c65c 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -3,7 +3,6 @@ import { useState, useEffect, useRef, useCallback } from 'react'; import { motion, AnimatePresence } from 'motion/react'; import { Checkbox } from '@/components/ui/checkbox'; -import { Input } from '@/components/ui/input'; import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; @@ -11,7 +10,18 @@ import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; import { resolveAgentVoice, getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver'; import { playBrowserTTSPreview } from '@/lib/audio/browser-tts-preview'; -import { Sparkles, ChevronDown, ChevronUp, Shuffle, Volume2, VolumeX, Loader2 } from 'lucide-react'; +import { + Sparkles, + ChevronDown, + ChevronUp, + Shuffle, + Volume2, + VolumeX, + Loader2, + MessageSquare, + Minus, + Plus, +} from 'lucide-react'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; import type { AgentConfig } from '@/lib/orchestration/registry/types'; import type { TTSProviderId } from '@/lib/audio/types'; @@ -452,7 +462,7 @@ export function AgentBar() { className="absolute right-0 top-full mt-1 z-50 w-96" >
- {/* Teacher + max turns — always visible */} + {/* Teacher — always visible */} {teacherAgent && (
)} - - {t('settings.maxTurns')} - - setMaxTurns(e.target.value)} - onClick={(e) => e.stopPropagation()} - className="w-12 h-5 text-[11px] text-center px-1 shrink-0" - />
)} @@ -530,6 +528,41 @@ export function AgentBar() { {t('settings.agentModeAutoDesc')}
)} + + {/* Max turns — compact stepper */} +
+ + + {t('settings.maxTurns')} + +
+ + + {maxTurns} + + +
+
)} From 8bd865f98062b1c67005df7a2818ed35cb7fd0ec Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:16:40 +0800 Subject: [PATCH 37/65] fix(tts): increase voice pill contrast in dark mode --- components/agent/agent-bar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index dbf98c65c..6bbe1f358 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -148,7 +148,7 @@ function AgentVoicePill({ type="button" onClick={(e) => e.stopPropagation()} onPointerDown={(e) => e.stopPropagation()} - className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-primary/8 hover:bg-primary/15 dark:bg-primary/10 dark:hover:bg-primary/20 px-2 text-[10px] text-primary/70 hover:text-primary transition-colors shrink-0 cursor-pointer" + className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-primary/10 hover:bg-primary/20 dark:bg-primary/25 dark:hover:bg-primary/35 px-2 text-[10px] text-primary/80 hover:text-primary dark:text-primary/90 transition-colors shrink-0 cursor-pointer" > {displayName} From 1fd9adcd598dd8b21038afea70d8f35e5bb08a26 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:18:53 +0800 Subject: [PATCH 38/65] fix(tts): make max turns input editable, tighten panel padding --- components/agent/agent-bar.tsx | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 6bbe1f358..1e87406b6 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -461,7 +461,7 @@ export function AgentBar() { transition={{ duration: 0.2, ease: [0.25, 0.1, 0.25, 1] }} className="absolute right-0 top-full mt-1 z-50 w-96" > -
+
{/* Teacher — always visible */} {teacherAgent && (
@@ -530,7 +530,7 @@ export function AgentBar() { )} {/* Max turns — compact stepper */} -
+
{t('settings.maxTurns')} @@ -547,9 +547,25 @@ export function AgentBar() { > - - {maxTurns} - + { + const raw = e.target.value.replace(/\D/g, ''); + if (!raw) { + setMaxTurns(''); + return; + } + const v = Math.min(20, Math.max(1, parseInt(raw))); + setMaxTurns(String(v)); + }} + onBlur={() => { + if (!maxTurns || parseInt(maxTurns) < 1) setMaxTurns('1'); + }} + onClick={(e) => e.stopPropagation()} + className="w-5 h-5 text-[11px] font-medium tabular-nums text-center bg-transparent outline-none border-none" + />
) : ( -
- - {t('settings.agentModeAutoDesc')} +
+
+
+
+ +
+

+ {t('settings.agentModeAutoDesc')} +

)} From a559514356d817ec65c13eacdde97ac6eaeccda5 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:20:28 +0800 Subject: [PATCH 40/65] fix(tts): adjust auto mode text spacing and add voice auto-assign hint --- components/agent/agent-bar.tsx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 39d65d029..d341b6a4d 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -523,15 +523,20 @@ export function AgentBar() { .map((agent, idx) => renderAgentRow(agent, idx + 1, false))}
) : ( -
+
-

- {t('settings.agentModeAutoDesc')} -

+
+

+ {t('settings.agentModeAutoDesc')} +

+

+ {t('agentBar.voiceAutoAssign')} +

+
)} From 09469f0308d28d2055752e899fe096437b01443b Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:22:07 +0800 Subject: [PATCH 41/65] fix(tts): auto-close voice popover after selecting a voice --- components/agent/agent-bar.tsx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index d341b6a4d..e640f5fa7 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -41,6 +41,7 @@ function AgentVoicePill({ const updateAgent = useAgentRegistry((s) => s.updateAgent); const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const resolved = resolveAgentVoice(agent, agentIndex, availableProviders); + const [popoverOpen, setPopoverOpen] = useState(false); const [previewingId, setPreviewingId] = useState(null); const previewCancelRef = useRef<(() => void) | null>(null); const previewAudioRef = useRef(null); @@ -142,7 +143,13 @@ function AgentVoicePill({ } return ( - !open && stopPreview()}> + { + setPopoverOpen(open); + if (!open) stopPreview(); + }} + >
) : ( -
+
From 151afbf741ff038883cb8955ca1f99c50d7887e0 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:26:20 +0800 Subject: [PATCH 43/65] fix(tts): push auto mode text toward bottom with flex spacer --- components/agent/agent-bar.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index bc09ced8b..f4e4e0aac 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -531,12 +531,13 @@ export function AgentBar() { .map((agent, idx) => renderAgentRow(agent, idx + 1, false))}
) : ( -
+
+

{t('settings.agentModeAutoDesc')} From 55408f09e87c5526b421b19eda9aa0a772f6daee Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:27:47 +0800 Subject: [PATCH 44/65] fix(tts): reduce auto mode bottom padding --- components/agent/agent-bar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index f4e4e0aac..b1a014397 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -531,7 +531,7 @@ export function AgentBar() { .map((agent, idx) => renderAgentRow(agent, idx + 1, false))}

) : ( -
+
From cbc45ea709e9606d528838bcd44f30de3358b821 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:42:56 +0800 Subject: [PATCH 45/65] feat(tts): wait for TTS audio to finish before next agent turn Add waitForDrain() to useDiscussionTTS that returns a promise resolving when the audio queue is empty. The agent loop in useChatSessions now awaits this after buffer drain, so the next agent's turn doesn't start until the current agent's TTS audio finishes playing. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/chat/chat-area.tsx | 3 +++ components/chat/use-chat-sessions.ts | 13 +++++++++++ components/stage.tsx | 1 + lib/hooks/use-discussion-tts.ts | 33 ++++++++++++++++++++++++++-- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/components/chat/chat-area.tsx b/components/chat/chat-area.tsx index ddeb6e241..0ccdac030 100644 --- a/components/chat/chat-area.tsx +++ b/components/chat/chat-area.tsx @@ -33,6 +33,7 @@ interface ChatAreaProps { fullText: string, agentId: string | null, ) => void; + waitForTTSDrain?: () => Promise; currentSceneId?: string | null; } @@ -74,6 +75,7 @@ export const ChatArea = forwardRef( onCueUser, onStopSession, onSegmentSealed, + waitForTTSDrain, currentSceneId, }, ref, @@ -106,6 +108,7 @@ export const ChatArea = forwardRef( onActiveBubble, onStopSession, onSegmentSealed, + waitForTTSDrain, }); const [activeTab, setActiveTab] = useState<'lecture' | 'chat'>('lecture'); diff --git a/components/chat/use-chat-sessions.ts b/components/chat/use-chat-sessions.ts index 4f9117b81..e1c43baae 100644 --- a/components/chat/use-chat-sessions.ts +++ b/components/chat/use-chat-sessions.ts @@ -42,6 +42,7 @@ interface UseChatSessionsOptions { fullText: string, agentId: string | null, ) => void; + waitForTTSDrain?: () => Promise; } export function useChatSessions(options: UseChatSessionsOptions = {}) { @@ -52,6 +53,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { const onActiveBubbleRef = useRef(options.onActiveBubble); const onStopSessionRef = useRef(options.onStopSession); const onSegmentSealedRef = useRef(options.onSegmentSealed); + const waitForTTSDrainRef = useRef(options.waitForTTSDrain); useEffect(() => { onLiveSpeechRef.current = options.onLiveSpeech; onSpeechProgressRef.current = options.onSpeechProgress; @@ -60,6 +62,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { onActiveBubbleRef.current = options.onActiveBubble; onStopSessionRef.current = options.onStopSession; onSegmentSealedRef.current = options.onSegmentSealed; + waitForTTSDrainRef.current = options.waitForTTSDrain; }, [ options.onLiveSpeech, options.onSpeechProgress, @@ -68,6 +71,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { options.onActiveBubble, options.onStopSession, options.onSegmentSealed, + options.waitForTTSDrain, ]); const { t } = useI18n(); @@ -443,6 +447,15 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { break; } + // Wait for TTS audio to finish before next turn + if (waitForTTSDrainRef.current) { + try { + await waitForTTSDrainRef.current(); + } catch { + break; + } + } + if (controller.signal.aborted) break; // Read loop outcome from done data. diff --git a/components/stage.tsx b/components/stage.tsx index 2ef7328ca..20b4564d1 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -912,6 +912,7 @@ export function Stage({ }} onStopSession={doSessionCleanup} onSegmentSealed={discussionTTS.handleSegmentSealed} + waitForTTSDrain={discussionTTS.waitForDrain} /> {/* Scene switch confirmation dialog */} diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 23a0e53bb..fb1ffa1df 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -34,6 +34,17 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const audioRef = useRef(null); const onAudioStateChangeRef = useRef(onAudioStateChange); onAudioStateChangeRef.current = onAudioStateChange; + const drainResolversRef = useRef void>>([]); + + // Resolve all drain waiters when queue is empty and nothing is playing + const checkDrain = useCallback(() => { + if (queueRef.current.length === 0 && !isPlayingRef.current) { + const resolvers = drainResolversRef.current; + drainResolversRef.current = []; + resolvers.forEach((r) => r()); + } + }, []); + const processQueueRef = useRef<() => void>(() => {}); const { @@ -90,7 +101,10 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus ); const processQueue = useCallback(async () => { - if (isPlayingRef.current || queueRef.current.length === 0) return; + if (isPlayingRef.current || queueRef.current.length === 0) { + checkDrain(); + return; + } if (!enabled || ttsMuted) { queueRef.current = []; return; @@ -156,7 +170,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus onAudioStateChangeRef.current?.(item.agentId, 'idle'); processQueueRef.current(); } - }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed]); + }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed, checkDrain]); processQueueRef.current = processQueue; @@ -192,8 +206,23 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus useEffect(() => cleanup, [cleanup]); + /** + * Returns a promise that resolves when the audio queue is empty + * and nothing is playing. Used by the agent loop to wait for + * TTS to finish before requesting the next agent turn. + */ + const waitForDrain = useCallback((): Promise => { + if (queueRef.current.length === 0 && !isPlayingRef.current) { + return Promise.resolve(); + } + return new Promise((resolve) => { + drainResolversRef.current.push(resolve); + }); + }, []); + return { handleSegmentSealed, + waitForDrain, cleanup, }; } From b5dc01577d53c9433e5f298b830d06de15fc450d Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:47:26 +0800 Subject: [PATCH 46/65] fix(tts): keep bubble visible while TTS audio is still playing When buffer drains (text=null) but audio indicator is still active, don't clear liveSpeech. Clear it only when audio state goes idle. This keeps the speech bubble visible until TTS finishes. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/stage.tsx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/components/stage.tsx b/components/stage.tsx index 20b4564d1..63b051b29 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -120,6 +120,7 @@ export function Stage({ // Discussion TTS: audio indicator state const [audioIndicatorState, setAudioIndicatorState] = useState('idle'); + const audioIndicatorStateRef = useRef('idle'); const [audioAgentId, setAudioAgentId] = useState(null); const discussionTTS = useDiscussionTTS({ @@ -128,6 +129,11 @@ export function Stage({ onAudioStateChange: (agentId, state) => { setAudioAgentId(agentId); setAudioIndicatorState(state); + audioIndicatorStateRef.current = state; + // When audio finishes (idle), clear the bubble if buffer already drained + if (state === 'idle') { + setLiveSpeech(null); + } }, }); @@ -877,10 +883,16 @@ export function Stage({ // Use queueMicrotask to let any pending scene-switch reset settle first queueMicrotask(() => { if (sceneEpochRef.current !== epoch) return; // stale — scene changed - setLiveSpeech(text); if (agentId !== undefined) { setSpeakingAgentId(agentId); } + // When buffer clears speech (text=null) but TTS audio is still + // playing, keep the bubble visible by not clearing liveSpeech. + if (text === null && audioIndicatorStateRef.current !== 'idle') { + // Don't clear — audio still playing + } else { + setLiveSpeech(text); + } if (text !== null || agentId) { setChatIsStreaming(true); setChatSessionType(chatAreaRef.current?.getActiveSessionType?.() ?? null); From 16cb71a6b01a272d4e9f8c9cd0b05d6252406ccc Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 14:56:10 +0800 Subject: [PATCH 47/65] feat(tts): hold discussion bubble until TTS audio finishes When StreamBuffer fires the done signal (onLiveSpeech null), Stage now checks if TTS is still playing. If so, it defers clearing the bubble state. The bubble stays visible until onAllAudioEnd fires from the TTS hook (queue empty + nothing playing), then clears. This prevents the jarring UX where the bubble disappears while the agent's voice is still audible. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/stage.tsx | 32 +++++++++++++--------- lib/hooks/use-discussion-tts.ts | 47 ++++++++++++--------------------- 2 files changed, 37 insertions(+), 42 deletions(-) diff --git a/components/stage.tsx b/components/stage.tsx index 63b051b29..862d19f38 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -120,8 +120,9 @@ export function Stage({ // Discussion TTS: audio indicator state const [audioIndicatorState, setAudioIndicatorState] = useState('idle'); - const audioIndicatorStateRef = useRef('idle'); const [audioAgentId, setAudioAgentId] = useState(null); + // When TTS is playing and done signal arrives, hold the bubble until audio finishes + const pendingDoneClearRef = useRef(false); const discussionTTS = useDiscussionTTS({ enabled: !ttsMuted, @@ -129,10 +130,14 @@ export function Stage({ onAudioStateChange: (agentId, state) => { setAudioAgentId(agentId); setAudioIndicatorState(state); - audioIndicatorStateRef.current = state; - // When audio finishes (idle), clear the bubble if buffer already drained - if (state === 'idle') { + }, + onAllAudioEnd: () => { + // If we were holding a done signal, now release it + if (pendingDoneClearRef.current) { + pendingDoneClearRef.current = false; setLiveSpeech(null); + setSpeakingAgentId(null); + setChatIsStreaming(false); } }, }); @@ -249,6 +254,7 @@ export function Stage({ // Stop any in-flight discussion TTS audio discussionTTS.cleanup(); + pendingDoneClearRef.current = false; resetLiveState(); }, [chatSessionType, resetLiveState, discussionTTS]); @@ -883,20 +889,23 @@ export function Stage({ // Use queueMicrotask to let any pending scene-switch reset settle first queueMicrotask(() => { if (sceneEpochRef.current !== epoch) return; // stale — scene changed + + // Guard: if this is a "done" signal but TTS is still playing, hold the bubble + if (text === null && agentId === null && discussionTTS.isPlaying()) { + pendingDoneClearRef.current = true; + // Don't clear state yet — onAllAudioEnd will handle it + return; + } + + setLiveSpeech(text); if (agentId !== undefined) { setSpeakingAgentId(agentId); } - // When buffer clears speech (text=null) but TTS audio is still - // playing, keep the bubble visible by not clearing liveSpeech. - if (text === null && audioIndicatorStateRef.current !== 'idle') { - // Don't clear — audio still playing - } else { - setLiveSpeech(text); - } if (text !== null || agentId) { setChatIsStreaming(true); setChatSessionType(chatAreaRef.current?.getActiveSessionType?.() ?? null); setIsTopicPending(false); + pendingDoneClearRef.current = false; } else if (text === null && agentId === null) { setChatIsStreaming(false); // Don't clear chatSessionType here — it's needed by the stop @@ -924,7 +933,6 @@ export function Stage({ }} onStopSession={doSessionCleanup} onSegmentSealed={discussionTTS.handleSegmentSealed} - waitForTTSDrain={discussionTTS.waitForDrain} /> {/* Scene switch confirmation dialog */} diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index fb1ffa1df..af9f0621a 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -12,6 +12,8 @@ interface DiscussionTTSOptions { enabled: boolean; agents: AgentConfig[]; onAudioStateChange?: (agentId: string | null, state: AudioIndicatorState) => void; + /** Called when all queued audio has finished playing (queue empty + nothing playing) */ + onAllAudioEnd?: () => void; } interface QueueItem { @@ -23,7 +25,12 @@ interface QueueItem { voiceId: string; } -export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: DiscussionTTSOptions) { +export function useDiscussionTTS({ + enabled, + agents, + onAudioStateChange, + onAllAudioEnd, +}: DiscussionTTSOptions) { const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); const ttsMuted = useSettingsStore((s) => s.ttsMuted); @@ -34,17 +41,8 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const audioRef = useRef(null); const onAudioStateChangeRef = useRef(onAudioStateChange); onAudioStateChangeRef.current = onAudioStateChange; - const drainResolversRef = useRef void>>([]); - - // Resolve all drain waiters when queue is empty and nothing is playing - const checkDrain = useCallback(() => { - if (queueRef.current.length === 0 && !isPlayingRef.current) { - const resolvers = drainResolversRef.current; - drainResolversRef.current = []; - resolvers.forEach((r) => r()); - } - }, []); - + const onAllAudioEndRef = useRef(onAllAudioEnd); + onAllAudioEndRef.current = onAllAudioEnd; const processQueueRef = useRef<() => void>(() => {}); const { @@ -101,12 +99,15 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus ); const processQueue = useCallback(async () => { - if (isPlayingRef.current || queueRef.current.length === 0) { - checkDrain(); + if (isPlayingRef.current) return; + if (queueRef.current.length === 0) { + // Queue empty + not playing = all audio done + onAllAudioEndRef.current?.(); return; } if (!enabled || ttsMuted) { queueRef.current = []; + onAllAudioEndRef.current?.(); return; } @@ -170,7 +171,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus onAudioStateChangeRef.current?.(item.agentId, 'idle'); processQueueRef.current(); } - }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed, checkDrain]); + }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed]); processQueueRef.current = processQueue; @@ -206,23 +207,9 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus useEffect(() => cleanup, [cleanup]); - /** - * Returns a promise that resolves when the audio queue is empty - * and nothing is playing. Used by the agent loop to wait for - * TTS to finish before requesting the next agent turn. - */ - const waitForDrain = useCallback((): Promise => { - if (queueRef.current.length === 0 && !isPlayingRef.current) { - return Promise.resolve(); - } - return new Promise((resolve) => { - drainResolversRef.current.push(resolve); - }); - }, []); - return { handleSegmentSealed, - waitForDrain, cleanup, + isPlaying: () => isPlayingRef.current || queueRef.current.length > 0, }; } From 41c2e9904167a2dc59c91b40735e8b2e8b55aff9 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 15:04:38 +0800 Subject: [PATCH 48/65] fix(tts): fix bubble hold - guard onStopSession instead of onLiveSpeech Root cause: bubble disappears because doSessionCleanup fires via onStopSession when the agent loop ends naturally, NOT because of onLiveSpeech(null, null). Fix: when onStopSession fires and TTS is still playing, defer doSessionCleanup to onAllAudioEnd callback. Manual stop (user presses button) still cleans up immediately via handleStopDiscussion. Use doSessionCleanupRef to avoid circular dependency between discussionTTS hook and doSessionCleanup useCallback. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/stage.tsx | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/components/stage.tsx b/components/stage.tsx index 862d19f38..d895a5874 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -121,8 +121,9 @@ export function Stage({ // Discussion TTS: audio indicator state const [audioIndicatorState, setAudioIndicatorState] = useState('idle'); const [audioAgentId, setAudioAgentId] = useState(null); - // When TTS is playing and done signal arrives, hold the bubble until audio finishes + // When TTS is playing and session ends naturally, hold the bubble until audio finishes const pendingDoneClearRef = useRef(false); + const doSessionCleanupRef = useRef<() => void>(() => {}); const discussionTTS = useDiscussionTTS({ enabled: !ttsMuted, @@ -132,12 +133,9 @@ export function Stage({ setAudioIndicatorState(state); }, onAllAudioEnd: () => { - // If we were holding a done signal, now release it if (pendingDoneClearRef.current) { pendingDoneClearRef.current = false; - setLiveSpeech(null); - setSpeakingAgentId(null); - setChatIsStreaming(false); + doSessionCleanupRef.current(); } }, }); @@ -258,6 +256,7 @@ export function Stage({ resetLiveState(); }, [chatSessionType, resetLiveState, discussionTTS]); + doSessionCleanupRef.current = doSessionCleanup; // Shared stop-discussion handler (used by both Roundtable and Canvas toolbar) const handleStopDiscussion = useCallback(async () => { @@ -889,14 +888,6 @@ export function Stage({ // Use queueMicrotask to let any pending scene-switch reset settle first queueMicrotask(() => { if (sceneEpochRef.current !== epoch) return; // stale — scene changed - - // Guard: if this is a "done" signal but TTS is still playing, hold the bubble - if (text === null && agentId === null && discussionTTS.isPlaying()) { - pendingDoneClearRef.current = true; - // Don't clear state yet — onAllAudioEnd will handle it - return; - } - setLiveSpeech(text); if (agentId !== undefined) { setSpeakingAgentId(agentId); @@ -905,7 +896,6 @@ export function Stage({ setChatIsStreaming(true); setChatSessionType(chatAreaRef.current?.getActiveSessionType?.() ?? null); setIsTopicPending(false); - pendingDoneClearRef.current = false; } else if (text === null && agentId === null) { setChatIsStreaming(false); // Don't clear chatSessionType here — it's needed by the stop @@ -931,7 +921,16 @@ export function Stage({ onCueUser={(_fromAgentId, _prompt) => { setIsCueUser(true); }} - onStopSession={doSessionCleanup} + onStopSession={() => { + // Agent loop finished naturally. + // If TTS is still playing, defer cleanup until audio ends. + if (discussionTTS.isPlaying()) { + pendingDoneClearRef.current = true; + // onAllAudioEnd will fire doSessionCleanup + return; + } + doSessionCleanup(); + }} onSegmentSealed={discussionTTS.handleSegmentSealed} /> From 717c80e710d5d40c33df2066deebe9d031cf84c9 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 21:23:45 +0800 Subject: [PATCH 49/65] fix(tts): guard BOTH onLiveSpeech and onStopSession for bubble hold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two paths clear the bubble: 1. onLiveSpeech(null, null) from StreamBuffer done → clears liveSpeech 2. onStopSession → doSessionCleanup → clears all state Both fire when agent loop ends. Path 1 fires first (tick loop), path 2 fires after (waitUntilDrained resolves). Both must be guarded when TTS is still playing. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/stage.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/components/stage.tsx b/components/stage.tsx index d895a5874..4a2aa21ac 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -888,6 +888,13 @@ export function Stage({ // Use queueMicrotask to let any pending scene-switch reset settle first queueMicrotask(() => { if (sceneEpochRef.current !== epoch) return; // stale — scene changed + + // Guard: if done signal arrives but TTS is still playing, hold the bubble + if (text === null && agentId === null && discussionTTS.isPlaying()) { + pendingDoneClearRef.current = true; + return; + } + setLiveSpeech(text); if (agentId !== undefined) { setSpeakingAgentId(agentId); From 963b2f24cfffcf5d04e12793d0dc8af33592ae2e Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 21:42:51 +0800 Subject: [PATCH 50/65] feat(tts): hold bubble during TTS playback and respect playback speed Co-Authored-By: Claude Opus 4.6 (1M context) --- components/chat/chat-area.tsx | 7 +++--- components/chat/use-chat-sessions.ts | 22 ++++++++----------- components/stage.tsx | 30 ++----------------------- lib/buffer/stream-buffer.ts | 33 ++++++++++++++++++++++++++++ lib/hooks/use-discussion-tts.ts | 28 ++++++++--------------- 5 files changed, 57 insertions(+), 63 deletions(-) diff --git a/components/chat/chat-area.tsx b/components/chat/chat-area.tsx index 0ccdac030..fc8ceba43 100644 --- a/components/chat/chat-area.tsx +++ b/components/chat/chat-area.tsx @@ -33,7 +33,8 @@ interface ChatAreaProps { fullText: string, agentId: string | null, ) => void; - waitForTTSDrain?: () => Promise; + /** When provided and returns true, StreamBuffer holds on the current text item after reveal. */ + shouldHoldAfterReveal?: () => boolean; currentSceneId?: string | null; } @@ -75,7 +76,7 @@ export const ChatArea = forwardRef( onCueUser, onStopSession, onSegmentSealed, - waitForTTSDrain, + shouldHoldAfterReveal, currentSceneId, }, ref, @@ -108,7 +109,7 @@ export const ChatArea = forwardRef( onActiveBubble, onStopSession, onSegmentSealed, - waitForTTSDrain, + shouldHoldAfterReveal, }); const [activeTab, setActiveTab] = useState<'lecture' | 'chat'>('lecture'); diff --git a/components/chat/use-chat-sessions.ts b/components/chat/use-chat-sessions.ts index e1c43baae..1804d9990 100644 --- a/components/chat/use-chat-sessions.ts +++ b/components/chat/use-chat-sessions.ts @@ -42,7 +42,8 @@ interface UseChatSessionsOptions { fullText: string, agentId: string | null, ) => void; - waitForTTSDrain?: () => Promise; + /** When provided and returns true, StreamBuffer holds on the current text item after reveal. */ + shouldHoldAfterReveal?: () => boolean; } export function useChatSessions(options: UseChatSessionsOptions = {}) { @@ -53,7 +54,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { const onActiveBubbleRef = useRef(options.onActiveBubble); const onStopSessionRef = useRef(options.onStopSession); const onSegmentSealedRef = useRef(options.onSegmentSealed); - const waitForTTSDrainRef = useRef(options.waitForTTSDrain); + const shouldHoldAfterRevealRef = useRef(options.shouldHoldAfterReveal); useEffect(() => { onLiveSpeechRef.current = options.onLiveSpeech; onSpeechProgressRef.current = options.onSpeechProgress; @@ -62,7 +63,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { onActiveBubbleRef.current = options.onActiveBubble; onStopSessionRef.current = options.onStopSession; onSegmentSealedRef.current = options.onSegmentSealed; - waitForTTSDrainRef.current = options.waitForTTSDrain; + shouldHoldAfterRevealRef.current = options.shouldHoldAfterReveal; }, [ options.onLiveSpeech, options.onSpeechProgress, @@ -71,7 +72,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { options.onActiveBubble, options.onStopSession, options.onSegmentSealed, - options.waitForTTSDrain, + options.shouldHoldAfterReveal, ]); const { t } = useI18n(); @@ -339,6 +340,10 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { ) { onSegmentSealedRef.current?.(messageId, partId, fullText, agentId); }, + + shouldHoldAfterReveal() { + return shouldHoldAfterRevealRef.current?.() ?? false; + }, }, pacingOptions, ); @@ -447,15 +452,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) { break; } - // Wait for TTS audio to finish before next turn - if (waitForTTSDrainRef.current) { - try { - await waitForTTSDrainRef.current(); - } catch { - break; - } - } - if (controller.signal.aborted) break; // Read loop outcome from done data. diff --git a/components/stage.tsx b/components/stage.tsx index 4a2aa21ac..e8160371c 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -121,9 +121,6 @@ export function Stage({ // Discussion TTS: audio indicator state const [audioIndicatorState, setAudioIndicatorState] = useState('idle'); const [audioAgentId, setAudioAgentId] = useState(null); - // When TTS is playing and session ends naturally, hold the bubble until audio finishes - const pendingDoneClearRef = useRef(false); - const doSessionCleanupRef = useRef<() => void>(() => {}); const discussionTTS = useDiscussionTTS({ enabled: !ttsMuted, @@ -132,12 +129,6 @@ export function Stage({ setAudioAgentId(agentId); setAudioIndicatorState(state); }, - onAllAudioEnd: () => { - if (pendingDoneClearRef.current) { - pendingDoneClearRef.current = false; - doSessionCleanupRef.current(); - } - }, }); // Pick a student agent for discussion trigger (prioritize student > non-teacher > fallback) @@ -252,11 +243,9 @@ export function Stage({ // Stop any in-flight discussion TTS audio discussionTTS.cleanup(); - pendingDoneClearRef.current = false; resetLiveState(); }, [chatSessionType, resetLiveState, discussionTTS]); - doSessionCleanupRef.current = doSessionCleanup; // Shared stop-discussion handler (used by both Roundtable and Canvas toolbar) const handleStopDiscussion = useCallback(async () => { @@ -888,13 +877,6 @@ export function Stage({ // Use queueMicrotask to let any pending scene-switch reset settle first queueMicrotask(() => { if (sceneEpochRef.current !== epoch) return; // stale — scene changed - - // Guard: if done signal arrives but TTS is still playing, hold the bubble - if (text === null && agentId === null && discussionTTS.isPlaying()) { - pendingDoneClearRef.current = true; - return; - } - setLiveSpeech(text); if (agentId !== undefined) { setSpeakingAgentId(agentId); @@ -928,17 +910,9 @@ export function Stage({ onCueUser={(_fromAgentId, _prompt) => { setIsCueUser(true); }} - onStopSession={() => { - // Agent loop finished naturally. - // If TTS is still playing, defer cleanup until audio ends. - if (discussionTTS.isPlaying()) { - pendingDoneClearRef.current = true; - // onAllAudioEnd will fire doSessionCleanup - return; - } - doSessionCleanup(); - }} + onStopSession={doSessionCleanup} onSegmentSealed={discussionTTS.handleSegmentSealed} + shouldHoldAfterReveal={discussionTTS.shouldHold} /> {/* Scene switch confirmation dialog */} diff --git a/lib/buffer/stream-buffer.ts b/lib/buffer/stream-buffer.ts index 725c1285c..a354a22d9 100644 --- a/lib/buffer/stream-buffer.ts +++ b/lib/buffer/stream-buffer.ts @@ -130,6 +130,12 @@ export interface StreamBufferCallbacks { fullText: string, agentId: string | null, ) => void; + /** + * When provided, called after a text item is fully revealed and sealed. + * If it returns true, the tick loop will NOT advance to the next item — + * the bubble stays on the current text (e.g. waiting for TTS playback to finish). + */ + shouldHoldAfterReveal?: () => boolean; } // ─── Options ───────────────────────────────────────────────────────── @@ -171,6 +177,8 @@ export class StreamBuffer { // Dwell / delay counters (in ticks) private _dwellTicksRemaining = 0; + /** True when a text item's post-delay has elapsed and we're waiting for TTS to finish. */ + private _holdingForTTS = false; // Config private readonly tickMs: number; @@ -423,6 +431,21 @@ export class StreamBuffer { // Honour dwell / action-delay countdown before advancing if (this._dwellTicksRemaining > 0) { this._dwellTicksRemaining--; + if (this._dwellTicksRemaining === 0 && this._holdingForTTS) { + // Post-text delay just finished — fall through to the TTS hold check below + } else { + return; + } + } + + // TTS hold: after post-text delay, keep the bubble on screen while audio plays + if (this._holdingForTTS) { + if (this.cb.shouldHoldAfterReveal?.()) { + return; // TTS still playing — stay on current item + } + this._holdingForTTS = false; + // TTS done — continue to process next item + this.advanceNonText(); return; } @@ -457,9 +480,19 @@ export class StreamBuffer { // before the next action or agent turn fires. if (this.postTextDelayTicks > 0) { this._dwellTicksRemaining = this.postTextDelayTicks; + // If TTS hold callback exists, mark that we need to check it after delay + if (this.cb.shouldHoldAfterReveal) { + this._holdingForTTS = true; + } return; // next tick will count down, then advanceNonText } + // No post-text delay — check TTS hold immediately + if (this.cb.shouldHoldAfterReveal?.()) { + this._holdingForTTS = true; + return; // TTS still playing — hold here + } + // Process any immediately-advanceable items in the same tick // (e.g. action badges right after text) this.advanceNonText(); diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index af9f0621a..fa66a6c62 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -12,8 +12,6 @@ interface DiscussionTTSOptions { enabled: boolean; agents: AgentConfig[]; onAudioStateChange?: (agentId: string | null, state: AudioIndicatorState) => void; - /** Called when all queued audio has finished playing (queue empty + nothing playing) */ - onAllAudioEnd?: () => void; } interface QueueItem { @@ -25,15 +23,11 @@ interface QueueItem { voiceId: string; } -export function useDiscussionTTS({ - enabled, - agents, - onAudioStateChange, - onAllAudioEnd, -}: DiscussionTTSOptions) { +export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: DiscussionTTSOptions) { const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); const ttsMuted = useSettingsStore((s) => s.ttsMuted); + const playbackSpeed = useSettingsStore((s) => s.playbackSpeed); const queueRef = useRef([]); const isPlayingRef = useRef(false); @@ -41,8 +35,6 @@ export function useDiscussionTTS({ const audioRef = useRef(null); const onAudioStateChangeRef = useRef(onAudioStateChange); onAudioStateChangeRef.current = onAudioStateChange; - const onAllAudioEndRef = useRef(onAllAudioEnd); - onAllAudioEndRef.current = onAllAudioEnd; const processQueueRef = useRef<() => void>(() => {}); const { @@ -99,15 +91,9 @@ export function useDiscussionTTS({ ); const processQueue = useCallback(async () => { - if (isPlayingRef.current) return; - if (queueRef.current.length === 0) { - // Queue empty + not playing = all audio done - onAllAudioEndRef.current?.(); - return; - } + if (isPlayingRef.current || queueRef.current.length === 0) return; if (!enabled || ttsMuted) { queueRef.current = []; - onAllAudioEndRef.current?.(); return; } @@ -151,6 +137,7 @@ export function useDiscussionTTS({ onAudioStateChangeRef.current?.(item.agentId, 'playing'); const audioUrl = `data:audio/${data.format || 'mp3'};base64,${data.base64}`; const audio = new Audio(audioUrl); + audio.playbackRate = playbackSpeed; audioRef.current = audio; audio.addEventListener('ended', () => { isPlayingRef.current = false; @@ -171,7 +158,7 @@ export function useDiscussionTTS({ onAudioStateChangeRef.current?.(item.agentId, 'idle'); processQueueRef.current(); } - }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed]); + }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed, playbackSpeed]); processQueueRef.current = processQueue; @@ -207,9 +194,12 @@ export function useDiscussionTTS({ useEffect(() => cleanup, [cleanup]); + /** Returns true when TTS audio is still playing or queued — used by StreamBuffer hold logic. */ + const shouldHold = useCallback(() => isPlayingRef.current, []); + return { handleSegmentSealed, cleanup, - isPlaying: () => isPlayingRef.current || queueRef.current.length > 0, + shouldHold, }; } From 759bf11dca1db5954aaef96d0daf213b8414b03e Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 22:20:23 +0800 Subject: [PATCH 51/65] feat(tts): LLM picks voice matching agent persona during generation - Client sends available voices (providerId + voiceId + name) to /api/generate/agent-profiles - LLM prompt asks to pick a voice matching each agent's personality - Parse "providerId::voiceId" from response, save as voiceConfig - Fallback to index-based assignment if LLM doesn't pick - Browser native voices hidden when server providers are available - saveGeneratedAgents accepts and persists voiceConfig Co-Authored-By: Claude Opus 4.6 (1M context) --- app/api/generate/agent-profiles/route.ts | 60 +++++++++++++++++++----- app/generation-preview/page.tsx | 13 +++++ components/agent/agent-bar.tsx | 25 +++++----- lib/orchestration/registry/store.ts | 13 ++++- 4 files changed, 86 insertions(+), 25 deletions(-) diff --git a/app/api/generate/agent-profiles/route.ts b/app/api/generate/agent-profiles/route.ts index b081cbdac..5d0041903 100644 --- a/app/api/generate/agent-profiles/route.ts +++ b/app/api/generate/agent-profiles/route.ts @@ -36,6 +36,7 @@ interface RequestBody { sceneOutlines?: { title: string; description?: string }[]; language: string; availableAvatars: string[]; + availableVoices?: Array<{ providerId: string; voiceId: string; voiceName: string }>; } function stripCodeFences(text: string): string { @@ -50,7 +51,7 @@ function stripCodeFences(text: string): string { export async function POST(req: NextRequest) { try { const body = (await req.json()) as RequestBody; - const { stageInfo, sceneOutlines, language, availableAvatars } = body; + const { stageInfo, sceneOutlines, language, availableAvatars, availableVoices } = body; // ── Validate required fields ── if (!stageInfo?.name) { @@ -79,6 +80,27 @@ export async function POST(req: NextRequest) { const systemPrompt = `You are an expert instructional designer. Generate agent profiles for a multi-agent classroom simulation. Decide the appropriate number of agents (typically 3-5) based on the course content and complexity. Return ONLY valid JSON, no markdown or explanation.`; + // Build voice list for prompt (if available) + const voiceListStr = + availableVoices && availableVoices.length > 0 + ? JSON.stringify( + availableVoices.map((v) => ({ + id: `${v.providerId}::${v.voiceId}`, + name: v.voiceName, + })), + ) + : ''; + + const voicePrompt = voiceListStr + ? `- Each agent should be assigned a voice that matches their persona from this list: ${voiceListStr} + - Pick a voice that suits the agent's personality and role (e.g. authoritative voice for teacher, lively voice for energetic student) + - Try to use different voices for each agent` + : ''; + + const voiceJsonField = voiceListStr + ? ',\n "voice": "string (voice id from available list, e.g. \'qwen-tts::Cherry\')"' + : ''; + const userPrompt = `Generate agent profiles for the following course: Course name: ${stageInfo.name} @@ -94,6 +116,7 @@ Requirements: - Try to use different avatars for each agent - Each agent must be assigned one color from this list: ${JSON.stringify(COLOR_PALETTE)} - Each agent must have a different color +${voicePrompt} Return a JSON object with this exact structure: { @@ -104,7 +127,7 @@ Return a JSON object with this exact structure: "persona": "string (2-3 sentences)", "avatar": "string (from available list)", "color": "string (hex color from palette)", - "priority": number (10 for teacher, 7 for assistant, 4-6 for student) + "priority": number (10 for teacher, 7 for assistant, 4-6 for student)${voiceJsonField} } ] }`; @@ -130,6 +153,7 @@ Return a JSON object with this exact structure: avatar: string; color: string; priority: number; + voice?: string; }>; }; @@ -161,16 +185,28 @@ Return a JSON object with this exact structure: } // ── Build output with IDs ── - const agents = parsed.agents.map((agent, index) => ({ - id: `gen-${nanoid(8)}`, - name: agent.name, - role: agent.role, - persona: agent.persona, - avatar: agent.avatar || availableAvatars[index % availableAvatars.length], - color: agent.color || COLOR_PALETTE[index % COLOR_PALETTE.length], - priority: - agent.priority ?? (agent.role === 'teacher' ? 10 : agent.role === 'assistant' ? 7 : 5), - })); + const agents = parsed.agents.map((agent, index) => { + // Parse voice "providerId::voiceId" format + let voiceConfig: { providerId: string; voiceId: string } | undefined; + if (agent.voice && agent.voice.includes('::')) { + const [providerId, voiceId] = agent.voice.split('::'); + if (providerId && voiceId) { + voiceConfig = { providerId, voiceId }; + } + } + + return { + id: `gen-${nanoid(8)}`, + name: agent.name, + role: agent.role, + persona: agent.persona, + avatar: agent.avatar || availableAvatars[index % availableAvatars.length], + color: agent.color || COLOR_PALETTE[index % COLOR_PALETTE.length], + priority: + agent.priority ?? (agent.role === 'teacher' ? 10 : agent.role === 'assistant' ? 7 : 5), + ...(voiceConfig ? { voiceConfig } : {}), + }; + }); log.info(`Successfully generated ${agents.length} agent profiles for "${stageInfo.name}"`); diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index 213a51409..855841d75 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -11,6 +11,7 @@ import { cn } from '@/lib/utils'; import { useStageStore } from '@/lib/store/stage'; import { useSettingsStore } from '@/lib/store/settings'; import { useAgentRegistry } from '@/lib/orchestration/registry/store'; +import { getAvailableProvidersWithVoices } from '@/lib/audio/voice-resolver'; import { useI18n } from '@/lib/hooks/use-i18n'; import { loadImageMapping, @@ -393,6 +394,17 @@ function GenerationPreviewContent() { '/avatars/thinker-2.png', ]; + const getAvailableVoicesForGeneration = () => { + const providers = getAvailableProvidersWithVoices(settings.ttsProvidersConfig); + return providers.flatMap((p) => + p.voices.map((v) => ({ + providerId: p.providerId, + voiceId: v.id, + voiceName: v.name, + })), + ); + }; + // No outlines yet — agent generation uses only stage name + description const agentResp = await fetch('/api/generate/agent-profiles', { method: 'POST', @@ -401,6 +413,7 @@ function GenerationPreviewContent() { stageInfo: { name: stage.name, description: stage.description }, language: currentSession.requirements.language || 'zh-CN', availableAvatars: allAvatars, + availableVoices: getAvailableVoicesForGeneration(), }), signal, }); diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index b1a014397..4deb2afde 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -264,18 +264,19 @@ export function AgentBar() { const nonTeacherSelected = selectedAgents.filter((a) => a.role !== 'teacher'); const serverProviders = getAvailableProvidersWithVoices(ttsProvidersConfig); - const availableProviders: ProviderWithVoices[] = [ - ...serverProviders, - ...(browserVoices.length > 0 - ? [ - { - providerId: 'browser-native-tts' as TTSProviderId, - providerName: 'Browser Native', - voices: browserVoices.map((v) => ({ id: v.voiceURI, name: v.name })), - }, - ] - : []), - ]; + // Only show browser native when no server providers are available + const availableProviders: ProviderWithVoices[] = + serverProviders.length > 0 + ? serverProviders + : browserVoices.length > 0 + ? [ + { + providerId: 'browser-native-tts' as TTSProviderId, + providerName: 'Browser Native', + voices: browserVoices.map((v) => ({ id: v.voiceURI, name: v.name })), + }, + ] + : []; const showVoice = availableProviders.length > 0; useEffect(() => { diff --git a/lib/orchestration/registry/store.ts b/lib/orchestration/registry/store.ts index c15296c36..893a5fa63 100644 --- a/lib/orchestration/registry/store.ts +++ b/lib/orchestration/registry/store.ts @@ -7,6 +7,7 @@ import { create } from 'zustand'; import { persist } from 'zustand/middleware'; import type { AgentConfig } from './types'; import { getActionsForRole } from './types'; +import type { TTSProviderId } from '@/lib/audio/types'; import { USER_AVATAR } from '@/lib/types/roundtable'; import type { Participant, ParticipantRole } from '@/lib/types/roundtable'; import { useUserProfileStore } from '@/lib/store/user-profile'; @@ -377,6 +378,7 @@ export async function saveGeneratedAgents( avatar: string; color: string; priority: number; + voiceConfig?: { providerId: string; voiceId: string }; }>, ): Promise { const { db } = await import('@/lib/utils/database'); @@ -396,14 +398,23 @@ export async function saveGeneratedAgents( // Add to registry for (const record of records) { + const { voiceConfig, ...rest } = record; registry.addAgent({ - ...record, + ...rest, allowedActions: getActionsForRole(record.role), isDefault: false, isGenerated: true, boundStageId: stageId, createdAt: new Date(record.createdAt), updatedAt: new Date(record.createdAt), + ...(voiceConfig + ? { + voiceConfig: { + providerId: voiceConfig.providerId as TTSProviderId, + voiceId: voiceConfig.voiceId, + }, + } + : {}), }); } From 5e0eac9a3106692b7c90a18d22b76536b3a093f1 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 22:27:18 +0800 Subject: [PATCH 52/65] fix(tts): restore volume slider in classroom toolbar Revert the toolbar simplification from 36e3997 that replaced the volume slider with a TTS on/off toggle. The volume control with hover slider is a core classroom UX. TTS on/off is controlled via Settings and Media popover instead. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/canvas/canvas-toolbar.tsx | 107 +++++++++++++++++++++------ components/roundtable/index.tsx | 9 ++- 2 files changed, 93 insertions(+), 23 deletions(-) diff --git a/components/canvas/canvas-toolbar.tsx b/components/canvas/canvas-toolbar.tsx index b156ffe17..bf4db9f46 100644 --- a/components/canvas/canvas-toolbar.tsx +++ b/components/canvas/canvas-toolbar.tsx @@ -1,5 +1,6 @@ 'use client'; +import { useState, useRef, useCallback, useEffect } from 'react'; import { ChevronLeft, ChevronRight, @@ -93,7 +94,10 @@ export function CanvasToolbar({ onStopDiscussion, className, ttsEnabled, + ttsMuted, + ttsVolume = 1, onToggleMute, + onVolumeChange, autoPlayLecture, onToggleAutoPlay, playbackSpeed = 1, @@ -108,6 +112,26 @@ export function CanvasToolbar({ (s) => s.stage?.whiteboard?.[0]?.elements?.length || 0, ); + // Volume slider hover state + const [volumeHover, setVolumeHover] = useState(false); + const volumeTimerRef = useRef>(undefined); + const volumeContainerRef = useRef(null); + + const handleVolumeEnter = useCallback(() => { + clearTimeout(volumeTimerRef.current); + setVolumeHover(true); + }, []); + + const handleVolumeLeave = useCallback(() => { + volumeTimerRef.current = setTimeout(() => setVolumeHover(false), 300); + }, []); + + // Cleanup volume hover timer on unmount + useEffect(() => () => clearTimeout(volumeTimerRef.current), []); + + // Effective volume for display + const effectiveVolume = ttsMuted ? 0 : ttsVolume; + return (
{/* ── Left: sidebar toggle + page indicator ── */} @@ -137,30 +161,71 @@ export function CanvasToolbar({ {/* ── Center: unified playback controls ── */}
- {/* TTS on/off toggle */} + {/* Volume with vertical popover slider */} {onToggleMute && ( - - - - + + {/* Vertical volume slider (pops up above) */} +
+
+ + {Math.round(effectiveVolume * 100)} + + { + const v = parseFloat(e.target.value); + onVolumeChange?.(v); + if (v > 0 && ttsMuted) onToggleMute?.(); + }} className={cn( - ctrlBtn, - 'w-6 h-6', - !ttsEnabled - ? 'text-gray-300 dark:text-gray-600' - : 'text-gray-500 dark:text-gray-400', + 'appearance-none cursor-pointer', + 'h-16 w-1 rounded-full', + 'bg-gray-200 dark:bg-gray-600', + '[writing-mode:vertical-lr] [direction:rtl]', + '[&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:w-3 [&::-webkit-slider-thumb]:h-3', + '[&::-webkit-slider-thumb]:rounded-full [&::-webkit-slider-thumb]:bg-violet-500 [&::-webkit-slider-thumb]:dark:bg-violet-400', + '[&::-webkit-slider-thumb]:shadow-sm [&::-webkit-slider-thumb]:cursor-pointer', + '[&::-moz-range-thumb]:w-3 [&::-moz-range-thumb]:h-3', + '[&::-moz-range-thumb]:rounded-full [&::-moz-range-thumb]:bg-violet-500 [&::-moz-range-thumb]:border-0', )} - aria-label={ttsEnabled ? 'Disable TTS' : 'Enable TTS'} - > - - - - - {ttsEnabled ? 'TTS On' : 'TTS Off'} - - - + /> +
+ {/* Arrow pointing down */} +
+
+
)} {/* Speed */} diff --git a/components/roundtable/index.tsx b/components/roundtable/index.tsx index 28045a306..80656bf0a 100644 --- a/components/roundtable/index.tsx +++ b/components/roundtable/index.tsx @@ -142,7 +142,6 @@ export function Roundtable({ const ttsMuted = useSettingsStore((s) => s.ttsMuted); const setTTSMuted = useSettingsStore((s) => s.setTTSMuted); const ttsEnabled = useSettingsStore((state) => state.ttsEnabled); - const setTTSEnabled = useSettingsStore((state) => state.setTTSEnabled); const asrEnabled = useSettingsStore((state) => state.asrEnabled); const ttsVolume = useSettingsStore((s) => s.ttsVolume); const setTTSVolume = useSettingsStore((s) => s.setTTSVolume); @@ -420,7 +419,13 @@ export function Roundtable({ showStopDiscussion={showStopButton} onStopDiscussion={onStopDiscussion} ttsEnabled={ttsEnabled} - onToggleMute={() => setTTSEnabled(!ttsEnabled)} + ttsMuted={ttsMuted} + ttsVolume={ttsVolume} + onToggleMute={() => { + if (!ttsEnabled) return; + setTTSMuted(!ttsMuted); + }} + onVolumeChange={(v) => setTTSVolume(v)} autoPlayLecture={autoPlayLecture} onToggleAutoPlay={() => setAutoPlayLecture(!autoPlayLecture)} playbackSpeed={playbackSpeed} From 0e6184585da565c62df784b9c21363d197224b76 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 22:31:09 +0800 Subject: [PATCH 53/65] fix(tts): teacher uses global lecture voice in discussion when no voiceConfig override --- lib/hooks/use-discussion-tts.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index fa66a6c62..4e481e291 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -28,6 +28,9 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); const ttsMuted = useSettingsStore((s) => s.ttsMuted); const playbackSpeed = useSettingsStore((s) => s.playbackSpeed); + // Global lecture voice — used as fallback for teacher agent + const globalTtsProviderId = useSettingsStore((s) => s.ttsProviderId); + const globalTtsVoice = useSettingsStore((s) => s.ttsVoice); const queueRef = useRef([]); const isPlayingRef = useRef(false); @@ -84,10 +87,14 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } return { providerId: 'browser-native-tts', voiceId: 'default' }; } + // Teacher without explicit voiceConfig → use global lecture voice + if (agent.role === 'teacher' && !agent.voiceConfig && globalTtsVoice && globalTtsProviderId) { + return { providerId: globalTtsProviderId, voiceId: globalTtsVoice }; + } const index = agentIndexMap.current.get(agentId) ?? 0; return resolveAgentVoice(agent, index, providers); }, - [agents, ttsProvidersConfig], + [agents, ttsProvidersConfig, globalTtsProviderId, globalTtsVoice], ); const processQueue = useCallback(async () => { From db18945d18c3a2b554e54db09505f182e29bca53 Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 22:40:26 +0800 Subject: [PATCH 54/65] fix(tts): teacher always uses global lecture voice, no overrides --- lib/hooks/use-discussion-tts.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 4e481e291..23510cd65 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -87,8 +87,8 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } return { providerId: 'browser-native-tts', voiceId: 'default' }; } - // Teacher without explicit voiceConfig → use global lecture voice - if (agent.role === 'teacher' && !agent.voiceConfig && globalTtsVoice && globalTtsProviderId) { + // Teacher always uses the global lecture voice (same as course generation) + if (agent.role === 'teacher') { return { providerId: globalTtsProviderId, voiceId: globalTtsVoice }; } const index = agentIndexMap.current.get(agentId) ?? 0; From 880190e0a7693d63237d6ba38d57384e1301317c Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 23:07:09 +0800 Subject: [PATCH 55/65] fix(tts): sync playback speed to currently playing audio in real-time --- lib/hooks/use-discussion-tts.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 23510cd65..9b7525ccc 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -199,6 +199,13 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus onAudioStateChangeRef.current?.(null, 'idle'); }, []); + // Sync playbackSpeed to currently playing audio in real-time + useEffect(() => { + if (audioRef.current) { + audioRef.current.playbackRate = playbackSpeed; + } + }, [playbackSpeed]); + useEffect(() => cleanup, [cleanup]); /** Returns true when TTS audio is still playing or queued — used by StreamBuffer hold logic. */ From 3491f1b591063b60cb15b27bb904fa4810de267a Mon Sep 17 00:00:00 2001 From: wyuc Date: Sun, 22 Mar 2026 23:17:04 +0800 Subject: [PATCH 56/65] fix(tts): address code review issues - Issue 2: enabled flag now checks ttsEnabled && !ttsMuted in stage.tsx - Issue 4: remove unused browserAvailableVoices from useDiscussionTTS - Issue 5: remove dead code in audio-settings.tsx (Slider, Loader2, handleTTSVoiceChange, handleTTSSpeedChange, handleTestTTS, testingTTS, ttsTestStatus, ttsTestMessage, testText, ttsSpeed, setTTSSpeed, and unused browser-tts-preview imports) - Issue 6: shouldHold now checks queue length in addition to isPlayingRef - Issue 8: hide AgentVoicePill for teacher row in agent-bar.tsx (teacher voice is controlled in Settings) Co-Authored-By: Claude Sonnet 4.6 --- components/agent/agent-bar.tsx | 8 -- components/settings/audio-settings.tsx | 161 +------------------------ components/stage.tsx | 3 +- lib/hooks/use-discussion-tts.ts | 8 +- 4 files changed, 8 insertions(+), 172 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 4deb2afde..25ba6a0e6 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -487,14 +487,6 @@ export function AgentBar() { {getAgentName(teacherAgent)} - {showVoice && ( - - )}
)} diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index f40bea301..64a15cf23 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -10,7 +10,6 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select'; -import { Slider } from '@/components/ui/slider'; import { Switch } from '@/components/ui/switch'; import { Button } from '@/components/ui/button'; import { useI18n } from '@/lib/hooks/use-i18n'; @@ -22,15 +21,10 @@ import { getASRSupportedLanguages, } from '@/lib/audio/constants'; import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types'; -import { Volume2, Mic, MicOff, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; +import { Volume2, Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; import { cn } from '@/lib/utils'; import azureVoicesData from '@/lib/audio/azure.json'; import { createLogger } from '@/lib/logger'; -import { - ensureVoicesLoaded, - isBrowserTTSAbortError, - playBrowserTTSPreview, -} from '@/lib/audio/browser-tts-preview'; const log = createLogger('AudioSettings'); @@ -74,11 +68,9 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { // TTS state const ttsProviderId = useSettingsStore((state) => state.ttsProviderId); const ttsVoice = useSettingsStore((state) => state.ttsVoice); - const ttsSpeed = useSettingsStore((state) => state.ttsSpeed); const ttsProvidersConfig = useSettingsStore((state) => state.ttsProvidersConfig); const setTTSProvider = useSettingsStore((state) => state.setTTSProvider); const setTTSVoice = useSettingsStore((state) => state.setTTSVoice); - const setTTSSpeed = useSettingsStore((state) => state.setTTSSpeed); const setTTSProviderConfig = useSettingsStore((state) => state.setTTSProviderConfig); // ASR state @@ -105,16 +97,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { onSave?.(); }; - const handleTTSVoiceChange = (voice: string) => { - setTTSVoice(voice); - onSave?.(); - }; - - const handleTTSSpeedChange = (speed: number) => { - setTTSSpeed(speed); - onSave?.(); - }; - const handleTTSProviderConfigChange = ( providerId: TTSProviderId, config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>, @@ -149,12 +131,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const [selectedLocale, setSelectedLocale] = useState('all'); // Test state - const [testingTTS, setTestingTTS] = useState(false); - const [testText, setTestText] = useState(t('settings.ttsTestTextDefault')); - const [ttsTestStatus, setTTSTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>( - 'idle', - ); - const [ttsTestMessage, setTTSTestMessage] = useState(''); const [isRecording, setIsRecording] = useState(false); const [asrResult, setASRResult] = useState(''); const [asrTestStatus, setASRTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>( @@ -169,13 +145,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const asrProvider = ASR_PROVIDERS[asrProviderId] ?? ASR_PROVIDERS['openai-whisper']; - // Update test text when language changes (derived state pattern) - const [prevT, setPrevT] = useState(() => t); - if (t !== prevT) { - setPrevT(t); - setTestText(t('settings.ttsTestTextDefault')); - } - // Reset locale filter when provider changes (derived state pattern) const [prevTTSProviderId, setPrevTTSProviderId] = useState(ttsProviderId); if (ttsProviderId !== prevTTSProviderId) { @@ -185,7 +154,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { } } - const stopTTSPreview = useCallback((resetState = true) => { + const stopTTSPreview = useCallback(() => { ttsTestRequestIdRef.current += 1; browserPreviewCancelRef.current?.(); browserPreviewCancelRef.current = null; @@ -197,9 +166,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { URL.revokeObjectURL(audioUrlRef.current); audioUrlRef.current = null; } - if (resetState) { - setTestingTTS(false); - } }, []); // Update voice selection when locale filter changes @@ -221,9 +187,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { }, [selectedLocale, ttsProviderId, azureVoices, setTTSVoice]); useEffect(() => { - stopTTSPreview(false); - setTTSTestStatus('idle'); - setTTSTestMessage(''); + stopTTSPreview(); }, [ttsProviderId, stopTTSPreview]); // Initialize and reset TTS voice when provider changes @@ -274,7 +238,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { useEffect(() => { return () => { - stopTTSPreview(false); + stopTTSPreview(); }; }, [stopTTSPreview]); @@ -287,123 +251,6 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { setASRResult(''); } - // Test TTS - const handleTestTTS = async () => { - if (!testText.trim()) { - return; - } - - const requestId = ttsTestRequestIdRef.current + 1; - ttsTestRequestIdRef.current = requestId; - - setTestingTTS(true); - setTTSTestStatus('testing'); - setTTSTestMessage(''); - - try { - if (ttsProviderId === 'browser-native-tts') { - if (!('speechSynthesis' in window)) { - setTTSTestStatus('error'); - setTTSTestMessage(t('settings.browserTTSNotSupported')); - return; - } - - const voices = await ensureVoicesLoaded(); - if (ttsTestRequestIdRef.current !== requestId) { - return; - } - if (voices.length === 0) { - setTTSTestStatus('error'); - setTTSTestMessage(t('settings.browserTTSNoVoices')); - return; - } - - const controller = playBrowserTTSPreview({ - text: testText, - voice: ttsVoice, - rate: ttsSpeed, - voices, - }); - browserPreviewCancelRef.current = controller.cancel; - await controller.promise; - - if (ttsTestRequestIdRef.current !== requestId) { - return; - } - setTTSTestStatus('success'); - setTTSTestMessage(t('settings.ttsTestSuccess')); - return; - } - - const requestBody: Record = { - text: testText, - audioId: 'tts-test', - ttsProviderId, - ttsVoice: ttsVoice, - ttsSpeed: ttsSpeed, - }; - - const apiKeyValue = ttsProvidersConfig[ttsProviderId]?.apiKey; - if (apiKeyValue && apiKeyValue.trim()) { - requestBody.ttsApiKey = apiKeyValue; - } - - const baseUrlValue = ttsProvidersConfig[ttsProviderId]?.baseUrl; - if (baseUrlValue && baseUrlValue.trim()) { - requestBody.ttsBaseUrl = baseUrlValue; - } - - const response = await fetch('/api/generate/tts', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(requestBody), - }); - - const data = await response - .json() - .catch(() => ({ success: false, error: response.statusText })); - if (ttsTestRequestIdRef.current !== requestId) { - return; - } - if (response.ok && data.success) { - const binaryStr = atob(data.base64); - const bytes = new Uint8Array(binaryStr.length); - for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i); - const audioBlob = new Blob([bytes], { type: `audio/${data.format}` }); - if (audioUrlRef.current) { - URL.revokeObjectURL(audioUrlRef.current); - } - const audioUrl = URL.createObjectURL(audioBlob); - audioUrlRef.current = audioUrl; - if (audioRef.current) { - audioRef.current.src = audioUrl; - await audioRef.current.play(); - } - setTTSTestStatus('success'); - setTTSTestMessage(t('settings.ttsTestSuccess')); - } else { - setTTSTestStatus('error'); - setTTSTestMessage(data.error || t('settings.ttsTestFailed')); - } - } catch (error) { - if (ttsTestRequestIdRef.current !== requestId || isBrowserTTSAbortError(error)) { - return; - } - log.error('TTS test failed:', error); - setTTSTestStatus('error'); - setTTSTestMessage( - error instanceof Error && error.message - ? `${t('settings.ttsTestFailed')}: ${error.message}` - : t('settings.ttsTestFailed'), - ); - } finally { - if (ttsTestRequestIdRef.current === requestId) { - browserPreviewCancelRef.current = null; - setTestingTTS(false); - } - } - }; - // Test ASR const handleToggleASRRecording = async () => { if (isRecording) { diff --git a/components/stage.tsx b/components/stage.tsx index e8160371c..4aa21b7d6 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -103,6 +103,7 @@ export function Stage({ // Selected agents from settings store (Zustand) const selectedAgentIds = useSettingsStore((s) => s.selectedAgentIds); const ttsMuted = useSettingsStore((s) => s.ttsMuted); + const ttsEnabled = useSettingsStore((s) => s.ttsEnabled); // Generate participants from selected agents const participants = useMemo( @@ -123,7 +124,7 @@ export function Stage({ const [audioAgentId, setAudioAgentId] = useState(null); const discussionTTS = useDiscussionTTS({ - enabled: !ttsMuted, + enabled: ttsEnabled && !ttsMuted, agents: selectedAgents, onAudioStateChange: (agentId, state) => { setAudioAgentId(agentId); diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 9b7525ccc..d869e0cf0 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -40,11 +40,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus onAudioStateChangeRef.current = onAudioStateChange; const processQueueRef = useRef<() => void>(() => {}); - const { - speak: browserSpeak, - cancel: browserCancel, - availableVoices: browserAvailableVoices, - } = useBrowserTTS({ + const { speak: browserSpeak, cancel: browserCancel } = useBrowserTTS({ rate: ttsSpeed, onEnd: () => { isPlayingRef.current = false; @@ -209,7 +205,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus useEffect(() => cleanup, [cleanup]); /** Returns true when TTS audio is still playing or queued — used by StreamBuffer hold logic. */ - const shouldHold = useCallback(() => isPlayingRef.current, []); + const shouldHold = useCallback(() => isPlayingRef.current || queueRef.current.length > 0, []); return { handleSegmentSealed, From 07fefd21e537890f0384676ae6edad87cf577e34 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 14:38:55 +0800 Subject: [PATCH 57/65] =?UTF-8?q?fix(tts):=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20abort=20preview=20fetch,=20defer=20error=20recovery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Add AbortController to voice preview server TTS fetch, abort on stopPreview to prevent stale responses on rapid switching 2. Use queueMicrotask for processQueue calls in error/ended handlers to prevent synchronous recursion if multiple items fail consecutively 3. Add ordering invariant comment on sealLastText's onSegmentSealed Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 6 ++++++ lib/buffer/stream-buffer.ts | 2 ++ lib/hooks/use-discussion-tts.ts | 6 +++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 25ba6a0e6..d3ca54c1d 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -45,6 +45,7 @@ function AgentVoicePill({ const [previewingId, setPreviewingId] = useState(null); const previewCancelRef = useRef<(() => void) | null>(null); const previewAudioRef = useRef(null); + const previewAbortRef = useRef(null); const displayName = (() => { for (const p of availableProviders) { @@ -59,6 +60,8 @@ function AgentVoicePill({ const stopPreview = useCallback(() => { previewCancelRef.current?.(); previewCancelRef.current = null; + previewAbortRef.current?.abort(); + previewAbortRef.current = null; if (previewAudioRef.current) { previewAudioRef.current.pause(); previewAudioRef.current.src = ''; @@ -96,6 +99,8 @@ function AgentVoicePill({ // Server TTS try { + const controller = new AbortController(); + previewAbortRef.current = controller; const providerConfig = ttsProvidersConfig[providerId]; const res = await fetch('/api/generate/tts', { method: 'POST', @@ -109,6 +114,7 @@ function AgentVoicePill({ ttsApiKey: providerConfig?.apiKey, ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, }), + signal: controller.signal, }); if (!res.ok) throw new Error('TTS error'); const data = await res.json(); diff --git a/lib/buffer/stream-buffer.ts b/lib/buffer/stream-buffer.ts index a354a22d9..fb8e21699 100644 --- a/lib/buffer/stream-buffer.ts +++ b/lib/buffer/stream-buffer.ts @@ -417,6 +417,8 @@ export class StreamBuffer { const item = this.items[i]; if (item.kind === 'text' && !item.sealed) { item.sealed = true; + // Ordering invariant: sealLastText() is called BEFORE pushAgentEnd/pushAgentStart, + // so this.currentAgentId still refers to the agent whose text is being sealed. this.cb.onSegmentSealed?.(item.messageId, item.partId, item.text, this.currentAgentId); break; } diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index d869e0cf0..9aacb37ca 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -145,12 +145,12 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus audio.addEventListener('ended', () => { isPlayingRef.current = false; onAudioStateChangeRef.current?.(item.agentId, 'idle'); - processQueueRef.current(); + queueMicrotask(() => processQueueRef.current()); }); audio.addEventListener('error', () => { isPlayingRef.current = false; onAudioStateChangeRef.current?.(item.agentId, 'idle'); - processQueueRef.current(); + queueMicrotask(() => processQueueRef.current()); }); await audio.play(); } catch (err) { @@ -159,7 +159,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } isPlayingRef.current = false; onAudioStateChangeRef.current?.(item.agentId, 'idle'); - processQueueRef.current(); + queueMicrotask(() => processQueueRef.current()); } }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed, playbackSpeed]); From 0e756f4ccb1f0f485703657f1c928f7a87228707 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 14:46:01 +0800 Subject: [PATCH 58/65] fix(tts): restore teacher voice pill, respect voiceConfig override --- components/agent/agent-bar.tsx | 8 ++++++++ lib/hooks/use-discussion-tts.ts | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index d3ca54c1d..3f6c9fc18 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -493,6 +493,14 @@ export function AgentBar() { {getAgentName(teacherAgent)} + {showVoice && ( + + )}
)} diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 9aacb37ca..7596ac1e5 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -83,8 +83,11 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } return { providerId: 'browser-native-tts', voiceId: 'default' }; } - // Teacher always uses the global lecture voice (same as course generation) + // Teacher: use voiceConfig if explicitly set, otherwise fall back to global lecture voice if (agent.role === 'teacher') { + if (agent.voiceConfig) { + return agent.voiceConfig; + } return { providerId: globalTtsProviderId, voiceId: globalTtsVoice }; } const index = agentIndexMap.current.get(agentId) ?? 0; From f489d5f25a750e67fa1e951f17d05f58f0fd09e2 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 14:55:57 +0800 Subject: [PATCH 59/65] fix(tts): sync volume and mute to discussion TTS audio in real-time --- lib/hooks/use-discussion-tts.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 7596ac1e5..61c14a035 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -27,6 +27,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); const ttsMuted = useSettingsStore((s) => s.ttsMuted); + const ttsVolume = useSettingsStore((s) => s.ttsVolume); const playbackSpeed = useSettingsStore((s) => s.playbackSpeed); // Global lecture voice — used as fallback for teacher agent const globalTtsProviderId = useSettingsStore((s) => s.ttsProviderId); @@ -144,6 +145,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus const audioUrl = `data:audio/${data.format || 'mp3'};base64,${data.base64}`; const audio = new Audio(audioUrl); audio.playbackRate = playbackSpeed; + audio.volume = ttsMuted ? 0 : ttsVolume; audioRef.current = audio; audio.addEventListener('ended', () => { isPlayingRef.current = false; @@ -164,7 +166,7 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus onAudioStateChangeRef.current?.(item.agentId, 'idle'); queueMicrotask(() => processQueueRef.current()); } - }, [enabled, ttsMuted, ttsProvidersConfig, ttsSpeed, playbackSpeed]); + }, [enabled, ttsMuted, ttsVolume, ttsProvidersConfig, ttsSpeed, playbackSpeed]); processQueueRef.current = processQueue; @@ -205,6 +207,13 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } }, [playbackSpeed]); + // Sync volume and mute to currently playing audio in real-time + useEffect(() => { + if (audioRef.current) { + audioRef.current.volume = ttsMuted ? 0 : ttsVolume; + } + }, [ttsVolume, ttsMuted]); + useEffect(() => cleanup, [cleanup]); /** Returns true when TTS audio is still playing or queued — used by StreamBuffer hold logic. */ From 6f84b5fd740be8d23170cf915c53898553e9aebd Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 16:33:27 +0800 Subject: [PATCH 60/65] fix(tts): allow browser-native TTS alongside server providers --- components/agent/agent-bar.tsx | 25 ++++++++++++------------- lib/audio/voice-resolver.ts | 4 ++++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 3f6c9fc18..1a1b54893 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -270,19 +270,18 @@ export function AgentBar() { const nonTeacherSelected = selectedAgents.filter((a) => a.role !== 'teacher'); const serverProviders = getAvailableProvidersWithVoices(ttsProvidersConfig); - // Only show browser native when no server providers are available - const availableProviders: ProviderWithVoices[] = - serverProviders.length > 0 - ? serverProviders - : browserVoices.length > 0 - ? [ - { - providerId: 'browser-native-tts' as TTSProviderId, - providerName: 'Browser Native', - voices: browserVoices.map((v) => ({ id: v.voiceURI, name: v.name })), - }, - ] - : []; + const availableProviders: ProviderWithVoices[] = [ + ...serverProviders, + ...(browserVoices.length > 0 + ? [ + { + providerId: 'browser-native-tts' as TTSProviderId, + providerName: 'Browser Native', + voices: browserVoices.map((v) => ({ id: v.voiceURI, name: v.name })), + }, + ] + : []), + ]; const showVoice = availableProviders.length > 0; useEffect(() => { diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts index 8eb167038..2018add3f 100644 --- a/lib/audio/voice-resolver.ts +++ b/lib/audio/voice-resolver.ts @@ -19,6 +19,10 @@ export function resolveAgentVoice( ): ResolvedVoice { // Agent-specific config if (agent.voiceConfig) { + // Browser-native voices are dynamic (not in static registry), so skip validation + if (agent.voiceConfig.providerId === 'browser-native-tts') { + return agent.voiceConfig; + } const list = getServerVoiceList(agent.voiceConfig.providerId); if (list.includes(agent.voiceConfig.voiceId)) { return agent.voiceConfig; From 7292dc7498c03830327b4e8be8397f0d7e762c48 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 16:37:10 +0800 Subject: [PATCH 61/65] fix(tts): remove top padding from voice popover content --- components/agent/agent-bar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 1a1b54893..0912b6f02 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -172,7 +172,7 @@ function AgentVoicePill({ side="bottom" align="end" sideOffset={4} - className="w-52 p-1 max-h-64 overflow-y-auto" + className="w-52 px-1 pb-1 pt-0 max-h-64 overflow-y-auto" onClick={(e) => e.stopPropagation()} onPointerDown={(e) => e.stopPropagation()} > From 460cbf2555ce9f97f9a9c7bf2394946508954697 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 17:03:40 +0800 Subject: [PATCH 62/65] fix(tts): make selectedAgents reactive to voiceConfig changes --- components/stage.tsx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/components/stage.tsx b/components/stage.tsx index f7c6f1685..dc174419a 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -112,12 +112,12 @@ export function Stage({ ); // Resolved AgentConfig array for hooks that need full agent objects - const selectedAgents = useMemo(() => { - const registry = useAgentRegistry.getState(); - return selectedAgentIds - .map((id) => registry.getAgent(id)) - .filter((a): a is AgentConfig => a != null); - }, [selectedAgentIds]); + // Subscribe reactively so voiceConfig changes in AgentBar trigger re-resolution + const allAgents = useAgentRegistry((s) => s.listAgents()); + const selectedAgents = useMemo( + () => allAgents.filter((a) => selectedAgentIds.includes(a.id)), + [allAgents, selectedAgentIds], + ); // Discussion TTS: audio indicator state const [audioIndicatorState, setAudioIndicatorState] = useState('idle'); From 88fae236b7c31de957f282251e426738b6e34646 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 17:09:02 +0800 Subject: [PATCH 63/65] fix(tts): use agents record instead of listAgents() to avoid infinite loop --- components/stage.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/stage.tsx b/components/stage.tsx index dc174419a..0684afc43 100644 --- a/components/stage.tsx +++ b/components/stage.tsx @@ -112,11 +112,11 @@ export function Stage({ ); // Resolved AgentConfig array for hooks that need full agent objects - // Subscribe reactively so voiceConfig changes in AgentBar trigger re-resolution - const allAgents = useAgentRegistry((s) => s.listAgents()); + // Subscribe to the agents record so voiceConfig changes trigger re-resolution + const agentsRecord = useAgentRegistry((s) => s.agents); const selectedAgents = useMemo( - () => allAgents.filter((a) => selectedAgentIds.includes(a.id)), - [allAgents, selectedAgentIds], + () => selectedAgentIds.map((id) => agentsRecord[id]).filter((a): a is AgentConfig => a != null), + [agentsRecord, selectedAgentIds], ); // Discussion TTS: audio indicator state From 9b2f91c0d51ca6c48f22f7b311095db64bdb039d Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 17:15:25 +0800 Subject: [PATCH 64/65] fix(tts): single source of truth for teacher voice Teacher voice pill now reads/writes global ttsProviderId + ttsVoice (same settings used by lecture TTS). This ensures lecture and discussion always use the same teacher voice. Student agents still use per-agent voiceConfig. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/agent/agent-bar.tsx | 212 +++++++++++++++++++++++++++++++- lib/hooks/use-discussion-tts.ts | 5 +- 2 files changed, 210 insertions(+), 7 deletions(-) diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx index 0912b6f02..27585ecab 100644 --- a/components/agent/agent-bar.tsx +++ b/components/agent/agent-bar.tsx @@ -238,6 +238,214 @@ function AgentVoicePill({ ); } +/** + * Teacher voice pill — reads/writes global ttsProviderId + ttsVoice (single source of truth). + * This ensures lecture and discussion use the same voice for the teacher. + */ +function TeacherVoicePill({ + availableProviders, + disabled, +}: { + availableProviders: ProviderWithVoices[]; + disabled?: boolean; +}) { + const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); + const ttsVoice = useSettingsStore((s) => s.ttsVoice); + const setTTSProvider = useSettingsStore((s) => s.setTTSProvider); + const setTTSVoice = useSettingsStore((s) => s.setTTSVoice); + const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); + const [popoverOpen, setPopoverOpen] = useState(false); + const [previewingId, setPreviewingId] = useState(null); + const previewCancelRef = useRef<(() => void) | null>(null); + const previewAudioRef = useRef(null); + const previewAbortRef = useRef(null); + + const displayName = (() => { + for (const p of availableProviders) { + if (p.providerId === ttsProviderId) { + const v = p.voices.find((voice) => voice.id === ttsVoice); + if (v) return v.name; + } + } + return ttsVoice || 'default'; + })(); + + const stopPreview = useCallback(() => { + previewCancelRef.current?.(); + previewCancelRef.current = null; + previewAbortRef.current?.abort(); + previewAbortRef.current = null; + if (previewAudioRef.current) { + previewAudioRef.current.pause(); + previewAudioRef.current.src = ''; + previewAudioRef.current = null; + } + setPreviewingId(null); + }, []); + + const handlePreview = useCallback( + async (providerId: TTSProviderId, voiceId: string) => { + const key = `${providerId}::${voiceId}`; + if (previewingId === key) { + stopPreview(); + return; + } + stopPreview(); + setPreviewingId(key); + + const courseLanguage = + (typeof localStorage !== 'undefined' && localStorage.getItem('generationLanguage')) || + 'zh-CN'; + const previewText = courseLanguage === 'en-US' ? 'Welcome to AI Classroom' : '欢迎来到AI课堂'; + + if (providerId === 'browser-native-tts') { + const { promise, cancel } = playBrowserTTSPreview({ text: previewText, voice: voiceId }); + previewCancelRef.current = cancel; + try { + await promise; + } catch { + // ignore abort + } + setPreviewingId(null); + return; + } + + try { + const controller = new AbortController(); + previewAbortRef.current = controller; + const providerConfig = ttsProvidersConfig[providerId]; + const res = await fetch('/api/generate/tts', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + text: previewText, + audioId: 'voice-preview', + ttsProviderId: providerId, + ttsVoice: voiceId, + ttsSpeed: 1, + ttsApiKey: providerConfig?.apiKey, + ttsBaseUrl: providerConfig?.serverBaseUrl || providerConfig?.baseUrl, + }), + signal: controller.signal, + }); + if (!res.ok) throw new Error('TTS error'); + const data = await res.json(); + if (!data.base64) throw new Error('No audio'); + const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`); + previewAudioRef.current = audio; + audio.addEventListener('ended', () => setPreviewingId(null)); + audio.addEventListener('error', () => setPreviewingId(null)); + await audio.play(); + } catch { + setPreviewingId(null); + } + }, + [previewingId, stopPreview, ttsProvidersConfig], + ); + + useEffect(() => () => stopPreview(), [stopPreview]); + + if (disabled) { + return ( +
e.stopPropagation()} + onPointerDown={(e) => e.stopPropagation()} + className="flex items-center gap-1 h-5 w-[88px] rounded-full bg-muted/40 px-2 text-[10px] text-muted-foreground/30 shrink-0 cursor-not-allowed" + > + + {displayName} +
+ ); + } + + return ( + { + setPopoverOpen(open); + if (!open) stopPreview(); + }} + > + + + + e.stopPropagation()} + onPointerDown={(e) => e.stopPropagation()} + > + {availableProviders.map((provider) => ( +
+
+ {provider.providerName} +
+ {provider.voices.map((voice) => { + const isActive = ttsProviderId === provider.providerId && ttsVoice === voice.id; + const previewKey = `${provider.providerId}::${voice.id}`; + const isPreviewing = previewingId === previewKey; + return ( +
+ + +
+ ); + })} +
+ ))} +
+
+ ); +} + export function AgentBar() { const { t } = useI18n(); const { listAgents } = useAgentRegistry(); @@ -493,9 +701,7 @@ export function AgentBar() { {getAgentName(teacherAgent)} {showVoice && ( - diff --git a/lib/hooks/use-discussion-tts.ts b/lib/hooks/use-discussion-tts.ts index 61c14a035..2075cf053 100644 --- a/lib/hooks/use-discussion-tts.ts +++ b/lib/hooks/use-discussion-tts.ts @@ -84,11 +84,8 @@ export function useDiscussionTTS({ enabled, agents, onAudioStateChange }: Discus } return { providerId: 'browser-native-tts', voiceId: 'default' }; } - // Teacher: use voiceConfig if explicitly set, otherwise fall back to global lecture voice + // Teacher: always use global lecture voice (single source of truth with settings) if (agent.role === 'teacher') { - if (agent.voiceConfig) { - return agent.voiceConfig; - } return { providerId: globalTtsProviderId, voiceId: globalTtsVoice }; } const index = agentIndexMap.current.get(agentId) ?? 0; From 2ecbe8c8514b152c230ac3a4ad9c5021c2d64e43 Mon Sep 17 00:00:00 2001 From: wyuc Date: Mon, 23 Mar 2026 17:25:57 +0800 Subject: [PATCH 65/65] feat: add avatar descriptions for smarter LLM avatar selection Each avatar now has a one-line description (appearance, vibe) sent to the agent-profiles generation API. LLM picks avatars matching agent personality instead of guessing from file paths. Co-Authored-By: Claude Opus 4.6 (1M context) --- app/api/generate/agent-profiles/route.ts | 14 +++++- app/generation-preview/page.tsx | 63 +++++++++++++++++++----- 2 files changed, 62 insertions(+), 15 deletions(-) diff --git a/app/api/generate/agent-profiles/route.ts b/app/api/generate/agent-profiles/route.ts index 5d0041903..fbf4d4e46 100644 --- a/app/api/generate/agent-profiles/route.ts +++ b/app/api/generate/agent-profiles/route.ts @@ -36,6 +36,7 @@ interface RequestBody { sceneOutlines?: { title: string; description?: string }[]; language: string; availableAvatars: string[]; + avatarDescriptions?: Array<{ path: string; desc: string }>; availableVoices?: Array<{ providerId: string; voiceId: string; voiceName: string }>; } @@ -51,7 +52,14 @@ function stripCodeFences(text: string): string { export async function POST(req: NextRequest) { try { const body = (await req.json()) as RequestBody; - const { stageInfo, sceneOutlines, language, availableAvatars, availableVoices } = body; + const { + stageInfo, + sceneOutlines, + language, + availableAvatars, + avatarDescriptions, + availableVoices, + } = body; // ── Validate required fields ── if (!stageInfo?.name) { @@ -112,8 +120,10 @@ Requirements: - Priority values: teacher=10 (highest), assistant=7, student=4-6 - Each agent needs: name, role, persona (2-3 sentences describing personality and teaching/learning style) - Names and personas must be in language: ${language} -- Each agent must be assigned one avatar from this list: ${JSON.stringify(availableAvatars)} +- Each agent must be assigned one avatar from this list: ${JSON.stringify(avatarDescriptions && avatarDescriptions.length > 0 ? avatarDescriptions.map((a) => ({ path: a.path, description: a.desc })) : availableAvatars)} + - Pick an avatar that visually matches the agent's personality and role - Try to use different avatars for each agent + - Use the "path" value as the avatar field in the output - Each agent must be assigned one color from this list: ${JSON.stringify(COLOR_PALETTE)} - Each agent must have a different color ${voicePrompt} diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index 855841d75..9272d0461 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -380,18 +380,54 @@ function GenerationPreviewContent() { try { const allAvatars = [ - '/avatars/assist.png', - '/avatars/assist-2.png', - '/avatars/clown.png', - '/avatars/clown-2.png', - '/avatars/curious.png', - '/avatars/curious-2.png', - '/avatars/note-taker.png', - '/avatars/note-taker-2.png', - '/avatars/teacher.png', - '/avatars/teacher-2.png', - '/avatars/thinker.png', - '/avatars/thinker-2.png', + { + path: '/avatars/teacher.png', + desc: 'Male teacher with glasses, holding a book, green background', + }, + { + path: '/avatars/teacher-2.png', + desc: 'Female teacher with long dark hair, blue traditional outfit, gentle expression', + }, + { + path: '/avatars/assist.png', + desc: 'Young female assistant with glasses, pink background, friendly smile', + }, + { + path: '/avatars/assist-2.png', + desc: 'Young female in orange top and purple overalls, cheerful and approachable', + }, + { + path: '/avatars/clown.png', + desc: 'Energetic girl with glasses pointing up, green shirt, lively and fun', + }, + { + path: '/avatars/clown-2.png', + desc: 'Playful girl with curly hair doing rock gesture, blue shirt, humorous vibe', + }, + { + path: '/avatars/curious.png', + desc: 'Surprised boy with glasses, hand on cheek, curious expression', + }, + { + path: '/avatars/curious-2.png', + desc: 'Boy with backpack holding a book and question mark bubble, inquisitive', + }, + { + path: '/avatars/note-taker.png', + desc: 'Studious boy with glasses, blue shirt, calm and organized', + }, + { + path: '/avatars/note-taker-2.png', + desc: 'Active boy with yellow backpack waving, blue outfit, enthusiastic learner', + }, + { + path: '/avatars/thinker.png', + desc: 'Thoughtful girl with hand on chin, purple background, contemplative', + }, + { + path: '/avatars/thinker-2.png', + desc: 'Girl reading a book intently, long dark hair, intellectual and focused', + }, ]; const getAvailableVoicesForGeneration = () => { @@ -412,7 +448,8 @@ function GenerationPreviewContent() { body: JSON.stringify({ stageInfo: { name: stage.name, description: stage.description }, language: currentSession.requirements.language || 'zh-CN', - availableAvatars: allAvatars, + availableAvatars: allAvatars.map((a) => a.path), + avatarDescriptions: allAvatars.map((a) => ({ path: a.path, desc: a.desc })), availableVoices: getAvailableVoicesForGeneration(), }), signal,