From 496b5d924eff20efb9e87c37e76f140abd5d0ec6 Mon Sep 17 00:00:00 2001 From: Yizuki_Ame Date: Mon, 16 Mar 2026 23:26:05 +0800 Subject: [PATCH 1/4] fix: use browser speechSynthesis for playback when browser-native-tts is selected Previously, selecting browser-native-tts as the TTS provider would produce sound in the settings test but remain silent during classroom playback. This happened because: 1. The scene generator correctly skipped pre-generation for browser TTS (it runs client-side, not via API) 2. The playback engine fell back to a silent reading timer when no pre-generated audio was found, instead of calling speechSynthesis This commit adds Web Speech API integration directly in the PlaybackEngine: - New playBrowserTTS() method speaks text via speechSynthesis - Properly wires onend/onerror to advance to the next action - pause()/resume() now handle speechSynthesis.pause()/resume() - stop() and handleUserInterrupt() cancel browser TTS Fixes #25, fixes #12, fixes #5 --- lib/playback/engine.ts | 87 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/lib/playback/engine.ts b/lib/playback/engine.ts index 4746e7fe7..7f2a3bee3 100644 --- a/lib/playback/engine.ts +++ b/lib/playback/engine.ts @@ -36,6 +36,7 @@ import type { import type { AudioPlayer } from '@/lib/utils/audio-player'; import { ActionEngine } from '@/lib/action/engine'; import { useCanvasStore } from '@/lib/store/canvas'; +import { useSettingsStore } from '@/lib/store/settings'; import { createLogger } from '@/lib/logger'; const log = createLogger('PlaybackEngine'); @@ -68,6 +69,8 @@ export class PlaybackEngine { // Reading-time timer for speech actions without pre-generated audio (TTS disabled) private speechTimer: ReturnType | null = null; private speechTimerStart: number = 0; // Date.now() when timer was scheduled + // Browser-native TTS state (Web Speech API) + private browserTTSActive: boolean = false; private speechTimerRemaining: number = 0; // remaining ms (set on pause) constructor( @@ -149,8 +152,12 @@ export class PlaybackEngine { } this.setMode('paused'); // Freeze TTS — but skip if waiting on ProactiveCard (no active speech) - if (!this.currentTrigger && this.audioPlayer.isPlaying()) { - this.audioPlayer.pause(); + if (!this.currentTrigger) { + if (this.browserTTSActive) { + window.speechSynthesis?.pause(); + } else if (this.audioPlayer.isPlaying()) { + this.audioPlayer.pause(); + } } } else if (this.mode === 'live') { this.setMode('paused'); @@ -178,7 +185,10 @@ export class PlaybackEngine { } else { // Resume lecture this.setMode('playing'); - if (this.audioPlayer.hasActiveAudio()) { + if (this.browserTTSActive) { + // Browser TTS is paused — resume it; utterance.onend will call processNext + window.speechSynthesis?.resume(); + } else if (this.audioPlayer.hasActiveAudio()) { // Audio is paused — resume it; TTS onend will call processNext this.audioPlayer.resume(); } else if (this.speechTimerRemaining > 0) { @@ -203,6 +213,7 @@ export class PlaybackEngine { // synchronous onend callbacks (see handleUserInterrupt for details). this.setMode('idle'); this.audioPlayer.stop(); + this.cancelBrowserTTS(); this.actionEngine.clearEffects(); if (this.triggerDelayTimer) { clearTimeout(this.triggerDelayTimer); @@ -311,6 +322,7 @@ export class PlaybackEngine { this.currentTopicState = 'active'; this.setMode('live'); this.audioPlayer.stop(); + this.cancelBrowserTTS(); this.callbacks.onUserInterrupt?.(text); } @@ -436,7 +448,20 @@ export class PlaybackEngine { this.audioPlayer .play(speechAction.audioId || '') .then((audioStarted) => { - if (!audioStarted) scheduleReadingTimer(); + if (!audioStarted) { + // No pre-generated audio — try browser-native TTS if selected + const settings = useSettingsStore.getState(); + if ( + settings.ttsEnabled && + settings.ttsProviderId === 'browser-native-tts' && + typeof window !== 'undefined' && + window.speechSynthesis + ) { + this.playBrowserTTS(speechAction); + } else { + scheduleReadingTimer(); + } + } }) .catch((err) => { log.error('TTS error:', err); @@ -521,4 +546,58 @@ export class PlaybackEngine { break; } } + + // ==================== Browser Native TTS ==================== + + /** + * Play text using the Web Speech API (browser-native TTS). + * Used when no pre-generated audio exists and browser-native-tts is selected. + */ + private playBrowserTTS(speechAction: SpeechAction): void { + const settings = useSettingsStore.getState(); + const utterance = new SpeechSynthesisUtterance(speechAction.text); + + // Apply settings + const speed = this.callbacks.getPlaybackSpeed?.() ?? 1; + utterance.rate = (settings.ttsSpeed ?? 1) * speed; + utterance.volume = settings.ttsMuted ? 0 : (settings.ttsVolume ?? 1); + + // Set voice if configured + if (settings.ttsVoice) { + const voices = window.speechSynthesis.getVoices(); + const voice = voices.find((v) => v.voiceURI === settings.ttsVoice); + if (voice) { + utterance.voice = voice; + utterance.lang = voice.lang; + } + } + + this.browserTTSActive = true; + + utterance.onend = () => { + this.browserTTSActive = false; + this.callbacks.onSpeechEnd?.(); + if (this.mode === 'playing') this.processNext(); + }; + + utterance.onerror = (event) => { + this.browserTTSActive = false; + // 'canceled' is expected when stop/pause is called — not a real error + if (event.error !== 'canceled') { + log.warn('Browser TTS error:', event.error); + } + this.callbacks.onSpeechEnd?.(); + if (this.mode === 'playing') this.processNext(); + }; + + window.speechSynthesis.speak(utterance); + } + + /** Cancel any active browser-native TTS */ + private cancelBrowserTTS(): void { + if (this.browserTTSActive) { + this.browserTTSActive = false; + window.speechSynthesis?.cancel(); + } + } } From fded1c8751b249e38a04175159c91a222e3af50e Mon Sep 17 00:00:00 2001 From: Yizuki_Ame <104178195+YizukiAme@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:47:57 +0800 Subject: [PATCH 2/4] fix: handle "default" ttsVoice, extract CJK_LANG_THRESHOLD constant - When ttsVoice is "default" (set by Browser Native TTS which has no voice picker), the voiceURI lookup silently fails and no lang is set, causing Chinese text to be spoken with an English voice. - Extract the 0.3 CJK detection threshold as a named constant CJK_LANG_THRESHOLD with JSDoc explaining the rationale. - Fall through to language auto-detection when voice lookup fails, regardless of the reason (missing voice, "default" sentinel, etc.). --- lib/playback/engine.ts | 141 +++++++++++++++++++++++++++++++++++------ 1 file changed, 123 insertions(+), 18 deletions(-) diff --git a/lib/playback/engine.ts b/lib/playback/engine.ts index 7f2a3bee3..4afe52b5e 100644 --- a/lib/playback/engine.ts +++ b/lib/playback/engine.ts @@ -41,6 +41,13 @@ import { createLogger } from '@/lib/logger'; const log = createLogger('PlaybackEngine'); +/** + * If more than 30% of characters are CJK, treat the text as Chinese. + * Intentionally low: mixed Chinese text often contains punctuation, + * numbers, and short Latin fragments (e.g. "AI课堂"). + */ +const CJK_LANG_THRESHOLD = 0.3; + export class PlaybackEngine { private scenes: Scene[] = []; private sceneIndex: number = 0; @@ -71,6 +78,9 @@ export class PlaybackEngine { private speechTimerStart: number = 0; // Date.now() when timer was scheduled // Browser-native TTS state (Web Speech API) private browserTTSActive: boolean = false; + private browserTTSChunks: string[] = []; // sentence-level chunks for sequential playback + private browserTTSChunkIndex: number = 0; // current chunk being spoken + private browserTTSPausedChunks: string[] = []; // remaining chunks saved on pause (for cancel+re-speak) private speechTimerRemaining: number = 0; // remaining ms (set on pause) constructor( @@ -154,7 +164,12 @@ export class PlaybackEngine { // Freeze TTS — but skip if waiting on ProactiveCard (no active speech) if (!this.currentTrigger) { if (this.browserTTSActive) { - window.speechSynthesis?.pause(); + // Cancel+re-speak pattern: save remaining chunks for resume. + // speechSynthesis.pause()/resume() is broken on Firefox, so we + // cancel now and re-speak from current chunk onward on resume. + this.browserTTSPausedChunks = this.browserTTSChunks.slice(this.browserTTSChunkIndex); + window.speechSynthesis?.cancel(); + // Note: cancel fires onerror('canceled'), which we ignore (see playBrowserTTSChunk) } else if (this.audioPlayer.isPlaying()) { this.audioPlayer.pause(); } @@ -185,9 +200,13 @@ export class PlaybackEngine { } else { // Resume lecture this.setMode('playing'); - if (this.browserTTSActive) { - // Browser TTS is paused — resume it; utterance.onend will call processNext - window.speechSynthesis?.resume(); + if (this.browserTTSPausedChunks.length > 0) { + // Browser TTS was paused via cancel — re-speak remaining chunks + this.browserTTSActive = true; + this.browserTTSChunks = this.browserTTSPausedChunks; + this.browserTTSChunkIndex = 0; + this.browserTTSPausedChunks = []; + this.playBrowserTTSChunk(); } else if (this.audioPlayer.hasActiveAudio()) { // Audio is paused — resume it; TTS onend will call processNext this.audioPlayer.resume(); @@ -549,54 +568,140 @@ export class PlaybackEngine { // ==================== Browser Native TTS ==================== + /** + * Split text into sentence-level chunks for sequential playback. + * Chrome has a bug where utterances >~15s are silently cut off and onend + * never fires, causing the engine to hang. Chunking avoids this. + */ + private splitIntoChunks(text: string): string[] { + // Split on sentence-ending punctuation (Latin + CJK) and newlines + const chunks = text + .split(/(?<=[.!?。!?\n])\s*/) + .map((s) => s.trim()) + .filter((s) => s.length > 0); + // If splitting produced nothing (no punctuation), return the original text + return chunks.length > 0 ? chunks : [text]; + } + /** * Play text using the Web Speech API (browser-native TTS). - * Used when no pre-generated audio exists and browser-native-tts is selected. + * Splits text into sentence-level chunks to avoid Chrome's ~15s cutoff. + * Uses cancel+re-speak for pause/resume (Firefox compatibility). */ private playBrowserTTS(speechAction: SpeechAction): void { + this.browserTTSChunks = this.splitIntoChunks(speechAction.text); + this.browserTTSChunkIndex = 0; + this.browserTTSPausedChunks = []; + this.browserTTSActive = true; + this.playBrowserTTSChunk(); + } + + /** Speak the current chunk; on completion, advance to next or finish. */ + private async playBrowserTTSChunk(): Promise { + if (this.browserTTSChunkIndex >= this.browserTTSChunks.length) { + // All chunks done + this.browserTTSActive = false; + this.browserTTSChunks = []; + this.callbacks.onSpeechEnd?.(); + if (this.mode === 'playing') this.processNext(); + return; + } + const settings = useSettingsStore.getState(); - const utterance = new SpeechSynthesisUtterance(speechAction.text); + const chunkText = this.browserTTSChunks[this.browserTTSChunkIndex]; + const utterance = new SpeechSynthesisUtterance(chunkText); // Apply settings const speed = this.callbacks.getPlaybackSpeed?.() ?? 1; utterance.rate = (settings.ttsSpeed ?? 1) * speed; utterance.volume = settings.ttsMuted ? 0 : (settings.ttsVolume ?? 1); - // Set voice if configured - if (settings.ttsVoice) { - const voices = window.speechSynthesis.getVoices(); + // Ensure voices are loaded (Chrome loads them asynchronously) + const voices = await this.ensureVoicesLoaded(); + + // Set voice: try user's configured voice, fall back to auto-detect language + let voiceFound = false; + if (settings.ttsVoice && settings.ttsVoice !== 'default') { const voice = voices.find((v) => v.voiceURI === settings.ttsVoice); if (voice) { utterance.voice = voice; utterance.lang = voice.lang; + voiceFound = true; } } - - this.browserTTSActive = true; + if (!voiceFound) { + // No usable voice configured — detect text language so the browser + // auto-selects an appropriate voice. + const cjkRatio = + (chunkText.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g) || []).length / chunkText.length; + utterance.lang = cjkRatio > CJK_LANG_THRESHOLD ? 'zh-CN' : 'en-US'; + } utterance.onend = () => { - this.browserTTSActive = false; - this.callbacks.onSpeechEnd?.(); - if (this.mode === 'playing') this.processNext(); + this.browserTTSChunkIndex++; + if (this.mode === 'playing') { + this.playBrowserTTSChunk(); // next chunk + } }; utterance.onerror = (event) => { - this.browserTTSActive = false; // 'canceled' is expected when stop/pause is called — not a real error if (event.error !== 'canceled') { - log.warn('Browser TTS error:', event.error); + log.warn('Browser TTS chunk error:', event.error); + // Skip failed chunk, try next + this.browserTTSChunkIndex++; + if (this.mode === 'playing') { + this.playBrowserTTSChunk(); + } } - this.callbacks.onSpeechEnd?.(); - if (this.mode === 'playing') this.processNext(); + // On 'canceled': do nothing — pause handler already saved state }; window.speechSynthesis.speak(utterance); } + /** + * Wait for speechSynthesis voices to load (Chrome loads them asynchronously). + * Caches result so subsequent calls return immediately. + */ + private cachedVoices: SpeechSynthesisVoice[] | null = null; + private async ensureVoicesLoaded(): Promise { + if (this.cachedVoices && this.cachedVoices.length > 0) { + return this.cachedVoices; + } + + let voices = window.speechSynthesis.getVoices(); + if (voices.length > 0) { + this.cachedVoices = voices; + return voices; + } + + // Chrome: voices load asynchronously — wait for the voiceschanged event + await new Promise((resolve) => { + const onVoicesChanged = () => { + window.speechSynthesis.removeEventListener('voiceschanged', onVoicesChanged); + resolve(); + }; + window.speechSynthesis.addEventListener('voiceschanged', onVoicesChanged); + // Timeout after 2s to avoid hanging + setTimeout(() => { + window.speechSynthesis.removeEventListener('voiceschanged', onVoicesChanged); + resolve(); + }, 2000); + }); + + voices = window.speechSynthesis.getVoices(); + this.cachedVoices = voices; + return voices; + } + /** Cancel any active browser-native TTS */ private cancelBrowserTTS(): void { if (this.browserTTSActive) { this.browserTTSActive = false; + this.browserTTSChunks = []; + this.browserTTSChunkIndex = 0; + this.browserTTSPausedChunks = []; window.speechSynthesis?.cancel(); } } From 4350a8e92a2bd0e49f947899f783ab878518a5f9 Mon Sep 17 00:00:00 2001 From: Yizuki_Ame Date: Wed, 18 Mar 2026 20:28:14 +0800 Subject: [PATCH 3/4] fix: support browser-native tts previews --- components/audio/tts-config-popover.tsx | 110 ++++++++++-- components/generation/media-popover.tsx | 97 +++++++++-- components/settings/audio-settings.tsx | 110 ++++++++---- components/settings/tts-settings.tsx | 79 +++++++-- lib/audio/browser-tts-preview.ts | 211 ++++++++++++++++++++++++ lib/i18n/settings.ts | 2 + 6 files changed, 542 insertions(+), 67 deletions(-) create mode 100644 lib/audio/browser-tts-preview.ts diff --git a/components/audio/tts-config-popover.tsx b/components/audio/tts-config-popover.tsx index bda090554..176ec3d3a 100644 --- a/components/audio/tts-config-popover.tsx +++ b/components/audio/tts-config-popover.tsx @@ -1,7 +1,8 @@ 'use client'; -import { useState, useRef, useCallback, useMemo } from 'react'; +import { useState, useRef, useCallback, useMemo, useEffect } from 'react'; import { Volume2, Play, Loader2 } from 'lucide-react'; +import { toast } from 'sonner'; import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { Select, @@ -16,6 +17,11 @@ import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { getTTSVoices } from '@/lib/audio/constants'; +import { + ensureVoicesLoaded, + isBrowserTTSAbortError, + playBrowserTTSPreview, +} from '@/lib/audio/browser-tts-preview'; /** Extract the English name from voice name format "ChineseName (English)" */ function getVoiceDisplayName(name: string, lang: string): string { @@ -31,11 +37,14 @@ export function TtsConfigPopover() { const [open, setOpen] = useState(false); const [previewing, setPreviewing] = useState(false); const audioRef = useRef(null); + const browserPreviewCancelRef = useRef<(() => void) | null>(null); + const previewRequestIdRef = useRef(0); const ttsEnabled = useSettingsStore((s) => s.ttsEnabled); const setTTSEnabled = useSettingsStore((s) => s.setTTSEnabled); const ttsProviderId = useSettingsStore((s) => s.ttsProviderId); const ttsVoice = useSettingsStore((s) => s.ttsVoice); + const ttsSpeed = useSettingsStore((s) => s.ttsSpeed); const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig); const setTTSVoice = useSettingsStore((s) => s.setTTSVoice); @@ -52,25 +61,70 @@ export function TtsConfigPopover() { const pillCls = 'inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-all cursor-pointer select-none whitespace-nowrap border'; + const stopPreview = useCallback((resetState = true) => { + previewRequestIdRef.current += 1; + browserPreviewCancelRef.current?.(); + browserPreviewCancelRef.current = null; + audioRef.current?.pause(); + audioRef.current = null; + if (resetState) { + setPreviewing(false); + } + }, []); + + useEffect(() => { + return () => { + stopPreview(false); + }; + }, [stopPreview]); + const handlePreview = useCallback(async () => { if (previewing) { - audioRef.current?.pause(); - audioRef.current = null; - setPreviewing(false); + stopPreview(); return; } + const requestId = previewRequestIdRef.current + 1; + previewRequestIdRef.current = requestId; + const previewText = t('settings.ttsTestTextDefault'); + setPreviewing(true); try { + if (ttsProviderId === 'browser-native-tts') { + if (!('speechSynthesis' in window)) { + throw new Error(t('settings.browserTTSNotSupported')); + } + + const voices = await ensureVoicesLoaded(); + if (voices.length === 0) { + throw new Error(t('settings.browserTTSNoVoices')); + } + + const controller = playBrowserTTSPreview({ + text: previewText, + voice: ttsVoice, + rate: ttsSpeed, + voices, + }); + browserPreviewCancelRef.current = controller.cancel; + await controller.promise; + if (previewRequestIdRef.current === requestId) { + browserPreviewCancelRef.current = null; + setPreviewing(false); + } + return; + } + const providerConfig = ttsProvidersConfig[ttsProviderId]; const res = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - text: '你好,欢迎来到AI课堂!让我们一起学习吧。', + text: previewText, audioId: 'preview', - ttsProviderId: ttsProviderId, - ttsVoice: ttsVoice, + ttsProviderId, + ttsVoice, + ttsSpeed, ttsApiKey: providerConfig?.apiKey, ttsBaseUrl: providerConfig?.baseUrl, }), @@ -83,22 +137,50 @@ export function TtsConfigPopover() { const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`); audioRef.current = audio; audio.onended = () => { - setPreviewing(false); - audioRef.current = null; + if (previewRequestIdRef.current === requestId) { + setPreviewing(false); + audioRef.current = null; + } }; audio.onerror = () => { - setPreviewing(false); - audioRef.current = null; + if (previewRequestIdRef.current === requestId) { + setPreviewing(false); + audioRef.current = null; + } }; await audio.play(); + return; } - } catch { + } catch (error) { + if (previewRequestIdRef.current === requestId) { + browserPreviewCancelRef.current = null; + setPreviewing(false); + } + if (!isBrowserTTSAbortError(error)) { + const message = + error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed'); + toast.error(message); + } + return; + } + + if (previewRequestIdRef.current === requestId) { setPreviewing(false); } - }, [ttsProviderId, ttsVoice, ttsProvidersConfig, previewing]); + }, [previewing, stopPreview, t, ttsProviderId, ttsProvidersConfig, ttsSpeed, ttsVoice]); + + const handleOpenChange = useCallback( + (nextOpen: boolean) => { + if (!nextOpen) { + stopPreview(); + } + setOpen(nextOpen); + }, + [stopPreview], + ); return ( - + diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index 0a46230f4..85e8a7844 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef, useCallback, useMemo, Fragment } from 'react'; +import { useState, useRef, useCallback, useMemo, Fragment, useEffect } from 'react'; import type { LucideIcon } from 'lucide-react'; import { Image as ImageIcon, @@ -12,6 +12,7 @@ import { Play, Loader2, } from 'lucide-react'; +import { toast } from 'sonner'; import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { Select, @@ -28,6 +29,11 @@ import { Switch } from '@/components/ui/switch'; import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; +import { + ensureVoicesLoaded, + isBrowserTTSAbortError, + playBrowserTTSPreview, +} from '@/lib/audio/browser-tts-preview'; import { IMAGE_PROVIDERS } from '@/lib/media/image-providers'; import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; import { TTS_PROVIDERS, getTTSVoices } from '@/lib/audio/constants'; @@ -77,6 +83,8 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { const [activeTab, setActiveTab] = useState('image'); const [previewing, setPreviewing] = useState(false); const audioRef = useRef(null); + const browserPreviewCancelRef = useRef<(() => void) | null>(null); + const previewRequestIdRef = useRef(0); // ─── Store ─── const imageGenerationEnabled = useSettingsStore((s) => s.imageGenerationEnabled); @@ -180,25 +188,71 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { [ttsProviderId, locale], ); + const stopPreview = useCallback((resetState = true) => { + previewRequestIdRef.current += 1; + browserPreviewCancelRef.current?.(); + browserPreviewCancelRef.current = null; + audioRef.current?.pause(); + audioRef.current = null; + if (resetState) { + setPreviewing(false); + } + }, []); + + useEffect(() => { + return () => { + stopPreview(false); + }; + }, [stopPreview]); + // TTS preview const handlePreview = useCallback(async () => { if (previewing) { - audioRef.current?.pause(); - audioRef.current = null; - setPreviewing(false); + stopPreview(); return; } + + const requestId = previewRequestIdRef.current + 1; + previewRequestIdRef.current = requestId; + const previewText = t('settings.ttsTestTextDefault'); + setPreviewing(true); try { + if (ttsProviderId === 'browser-native-tts') { + if (!('speechSynthesis' in window)) { + throw new Error(t('settings.browserTTSNotSupported')); + } + + const voices = await ensureVoicesLoaded(); + if (voices.length === 0) { + throw new Error(t('settings.browserTTSNoVoices')); + } + + const controller = playBrowserTTSPreview({ + text: previewText, + voice: ttsVoice, + rate: ttsSpeed, + voices, + }); + browserPreviewCancelRef.current = controller.cancel; + await controller.promise; + if (previewRequestIdRef.current === requestId) { + browserPreviewCancelRef.current = null; + setPreviewing(false); + } + return; + } + const providerConfig = ttsProvidersConfig[ttsProviderId]; const res = await fetch('/api/generate/tts', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - text: '你好,欢迎来到AI课堂!让我们一起学习吧。', + text: previewText, audioId: 'preview', ttsProviderId, ttsVoice, + ttsSpeed, ttsApiKey: providerConfig?.apiKey, ttsBaseUrl: providerConfig?.baseUrl, }), @@ -209,19 +263,37 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`); audioRef.current = audio; audio.onended = () => { - setPreviewing(false); - audioRef.current = null; + if (previewRequestIdRef.current === requestId) { + setPreviewing(false); + audioRef.current = null; + } }; audio.onerror = () => { - setPreviewing(false); - audioRef.current = null; + if (previewRequestIdRef.current === requestId) { + setPreviewing(false); + audioRef.current = null; + } }; await audio.play(); + return; + } + } catch (error) { + if (previewRequestIdRef.current === requestId) { + browserPreviewCancelRef.current = null; + setPreviewing(false); + } + if (!isBrowserTTSAbortError(error)) { + const message = + error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed'); + toast.error(message); } - } catch { + return; + } + + if (previewRequestIdRef.current === requestId) { setPreviewing(false); } - }, [ttsProviderId, ttsVoice, ttsProvidersConfig, previewing]); + }, [previewing, stopPreview, t, ttsProviderId, ttsProvidersConfig, ttsSpeed, ttsVoice]); // ASR: only available providers const asrGroups = useMemo( @@ -243,6 +315,9 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { // Auto-select first enabled tab on open const handleOpenChange = (isOpen: boolean) => { + if (!isOpen) { + stopPreview(); + } setOpen(isOpen); if (isOpen) { const first = (['image', 'video', 'tts', 'asr'] as TabId[]).find((id) => enabledMap[id]); diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index 9a65ef806..92c2670de 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef, useEffect, useMemo } from 'react'; +import { useState, useRef, useEffect, useMemo, useCallback } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { @@ -26,6 +26,11 @@ import { Volume2, Mic, MicOff, Loader2, CheckCircle2, XCircle, Eye, EyeOff } fro import { cn } from '@/lib/utils'; import azureVoicesData from '@/lib/audio/azure.json'; import { createLogger } from '@/lib/logger'; +import { + ensureVoicesLoaded, + isBrowserTTSAbortError, + playBrowserTTSPreview, +} from '@/lib/audio/browser-tts-preview'; const log = createLogger('AudioSettings'); @@ -157,6 +162,9 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { ); const [asrTestMessage, setASRTestMessage] = useState(''); const audioRef = useRef(null); + const audioUrlRef = useRef(null); + const browserPreviewCancelRef = useRef<(() => void) | null>(null); + const ttsTestRequestIdRef = useRef(0); const mediaRecorderRef = useRef(null); const asrProvider = ASR_PROVIDERS[asrProviderId] ?? ASR_PROVIDERS['openai-whisper']; @@ -177,6 +185,23 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { } } + const stopTTSPreview = useCallback((resetState = true) => { + ttsTestRequestIdRef.current += 1; + browserPreviewCancelRef.current?.(); + browserPreviewCancelRef.current = null; + audioRef.current?.pause(); + if (audioRef.current) { + audioRef.current.src = ''; + } + if (audioUrlRef.current) { + URL.revokeObjectURL(audioUrlRef.current); + audioUrlRef.current = null; + } + if (resetState) { + setTestingTTS(false); + } + }, []); + // Update voice selection when locale filter changes useEffect(() => { if (ttsProviderId === 'azure-tts' && selectedLocale !== 'all') { @@ -195,6 +220,12 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { // eslint-disable-next-line react-hooks/exhaustive-deps }, [selectedLocale, ttsProviderId, azureVoices, setTTSVoice]); + useEffect(() => { + stopTTSPreview(false); + setTTSTestStatus('idle'); + setTTSTestMessage(''); + }, [ttsProviderId, stopTTSPreview]); + // Initialize and reset TTS voice when provider changes useEffect(() => { let availableVoices: Array<{ id: string; name: string }> = []; @@ -241,6 +272,12 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { } }, [asrProviderId, asrLanguage, setASRLanguage]); + useEffect(() => { + return () => { + stopTTSPreview(false); + }; + }, [stopTTSPreview]); + // Clear ASR test status when provider changes (derived state pattern) const [prevASRProviderId, setPrevASRProviderId] = useState(asrProviderId); if (asrProviderId !== prevASRProviderId) { @@ -256,48 +293,48 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { return; } + const requestId = ttsTestRequestIdRef.current + 1; + ttsTestRequestIdRef.current = requestId; + setTestingTTS(true); setTTSTestStatus('testing'); setTTSTestMessage(''); try { - // Handle Browser Native TTS with Web Speech API if (ttsProviderId === 'browser-native-tts') { if (!('speechSynthesis' in window)) { setTTSTestStatus('error'); setTTSTestMessage(t('settings.browserTTSNotSupported')); - setTestingTTS(false); return; } - const utterance = new SpeechSynthesisUtterance(testText); - utterance.rate = ttsSpeed; - - // Try to find matching voice - const voices = window.speechSynthesis.getVoices(); - const selectedVoice = voices.find((v) => v.name === ttsVoice || v.lang === ttsVoice); - if (selectedVoice) { - utterance.voice = selectedVoice; + const voices = await ensureVoicesLoaded(); + if (ttsTestRequestIdRef.current !== requestId) { + return; } - - utterance.onend = () => { - setTTSTestStatus('success'); - setTTSTestMessage(t('settings.ttsTestSuccess')); - setTestingTTS(false); - }; - - utterance.onerror = (event) => { - log.error('Browser TTS error:', event); + if (voices.length === 0) { setTTSTestStatus('error'); - setTTSTestMessage(t('settings.ttsTestFailed') + ': ' + event.error); - setTestingTTS(false); - }; + setTTSTestMessage(t('settings.browserTTSNoVoices')); + return; + } - window.speechSynthesis.speak(utterance); + const controller = playBrowserTTSPreview({ + text: testText, + voice: ttsVoice, + rate: ttsSpeed, + voices, + }); + browserPreviewCancelRef.current = controller.cancel; + await controller.promise; + + if (ttsTestRequestIdRef.current !== requestId) { + return; + } + setTTSTestStatus('success'); + setTTSTestMessage(t('settings.ttsTestSuccess')); return; } - // Server-side TTS for other providers const requestBody: Record = { text: testText, audioId: 'tts-test', @@ -325,15 +362,22 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { const data = await response .json() .catch(() => ({ success: false, error: response.statusText })); + if (ttsTestRequestIdRef.current !== requestId) { + return; + } if (response.ok && data.success) { const binaryStr = atob(data.base64); const bytes = new Uint8Array(binaryStr.length); for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i); const audioBlob = new Blob([bytes], { type: `audio/${data.format}` }); + if (audioUrlRef.current) { + URL.revokeObjectURL(audioUrlRef.current); + } const audioUrl = URL.createObjectURL(audioBlob); + audioUrlRef.current = audioUrl; if (audioRef.current) { audioRef.current.src = audioUrl; - audioRef.current.play(); + await audioRef.current.play(); } setTTSTestStatus('success'); setTTSTestMessage(t('settings.ttsTestSuccess')); @@ -342,11 +386,21 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { setTTSTestMessage(data.error || t('settings.ttsTestFailed')); } } catch (error) { + if (ttsTestRequestIdRef.current !== requestId || isBrowserTTSAbortError(error)) { + return; + } log.error('TTS test failed:', error); setTTSTestStatus('error'); - setTTSTestMessage(t('settings.ttsTestFailed')); + setTTSTestMessage( + error instanceof Error && error.message + ? `${t('settings.ttsTestFailed')}: ${error.message}` + : t('settings.ttsTestFailed'), + ); } finally { - setTestingTTS(false); + if (ttsTestRequestIdRef.current === requestId) { + browserPreviewCancelRef.current = null; + setTestingTTS(false); + } } }; diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx index b920a455f..31b899247 100644 --- a/components/settings/tts-settings.tsx +++ b/components/settings/tts-settings.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef, useEffect } from 'react'; +import { useState, useRef, useEffect, useCallback } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; @@ -11,6 +11,11 @@ import type { TTSProviderId } from '@/lib/audio/types'; import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; import { cn } from '@/lib/utils'; import { createLogger } from '@/lib/logger'; +import { + ensureVoicesLoaded, + isBrowserTTSAbortError, + playBrowserTTSPreview, +} from '@/lib/audio/browser-tts-preview'; const log = createLogger('TTSSettings'); @@ -43,6 +48,26 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); const audioRef = useRef(null); + const audioUrlRef = useRef(null); + const browserPreviewCancelRef = useRef<(() => void) | null>(null); + const testRequestIdRef = useRef(0); + + const stopPreview = useCallback((resetState = true) => { + testRequestIdRef.current += 1; + browserPreviewCancelRef.current?.(); + browserPreviewCancelRef.current = null; + audioRef.current?.pause(); + if (audioRef.current) { + audioRef.current.src = ''; + } + if (audioUrlRef.current) { + URL.revokeObjectURL(audioUrlRef.current); + audioUrlRef.current = null; + } + if (resetState) { + setTestingTTS(false); + } + }, []); // Update test text when language changes useEffect(() => { @@ -51,13 +76,24 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { // Reset state when provider changes useEffect(() => { + stopPreview(false); setShowApiKey(false); + setTestingTTS(false); setTestStatus('idle'); setTestMessage(''); - }, [selectedProviderId]); + }, [selectedProviderId, stopPreview]); + + useEffect(() => { + return () => { + stopPreview(false); + }; + }, [stopPreview]); const handleTestTTS = async () => { if (!testText.trim()) return; + const requestId = testRequestIdRef.current + 1; + testRequestIdRef.current = requestId; + setTestingTTS(true); setTestStatus('testing'); setTestMessage(''); @@ -70,20 +106,24 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { return; } - const utterance = new SpeechSynthesisUtterance(testText); - utterance.rate = ttsSpeed; - const voices = window.speechSynthesis.getVoices(); - const selectedVoice = voices.find( - (v) => v.name === effectiveVoice || v.lang === effectiveVoice, - ); - if (selectedVoice) utterance.voice = selectedVoice; + const voices = await ensureVoicesLoaded(); + if (testRequestIdRef.current !== requestId) return; + if (voices.length === 0) { + setTestStatus('error'); + setTestMessage(t('settings.browserTTSNoVoices')); + return; + } - await new Promise((resolve, reject) => { - utterance.onend = () => resolve(); - utterance.onerror = (event) => reject(new Error(event.error)); - window.speechSynthesis.speak(utterance); + const controller = playBrowserTTSPreview({ + text: testText, + voice: effectiveVoice, + rate: ttsSpeed, + voices, }); + browserPreviewCancelRef.current = controller.cancel; + await controller.promise; + if (testRequestIdRef.current !== requestId) return; setTestStatus('success'); setTestMessage(t('settings.ttsTestSuccess')); return; @@ -109,12 +149,17 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { const data = await response .json() .catch(() => ({ success: false, error: response.statusText })); + if (testRequestIdRef.current !== requestId) return; if (response.ok && data.success) { const binaryStr = atob(data.base64); const bytes = new Uint8Array(binaryStr.length); for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i); const audioBlob = new Blob([bytes], { type: `audio/${data.format}` }); + if (audioUrlRef.current) { + URL.revokeObjectURL(audioUrlRef.current); + } const audioUrl = URL.createObjectURL(audioBlob); + audioUrlRef.current = audioUrl; if (audioRef.current) { audioRef.current.src = audioUrl; await audioRef.current.play(); @@ -126,6 +171,9 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { setTestMessage(data.error || t('settings.ttsTestFailed')); } } catch (error) { + if (testRequestIdRef.current !== requestId || isBrowserTTSAbortError(error)) { + return; + } log.error('TTS test failed:', error); setTestStatus('error'); setTestMessage( @@ -134,7 +182,10 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { : t('settings.ttsTestFailed'), ); } finally { - setTestingTTS(false); + if (testRequestIdRef.current === requestId) { + browserPreviewCancelRef.current = null; + setTestingTTS(false); + } } }; diff --git a/lib/audio/browser-tts-preview.ts b/lib/audio/browser-tts-preview.ts new file mode 100644 index 000000000..5da699773 --- /dev/null +++ b/lib/audio/browser-tts-preview.ts @@ -0,0 +1,211 @@ +'use client'; + +const VOICES_LOAD_TIMEOUT_MS = 2000; +const PREVIEW_TIMEOUT_MS = 30000; +const CJK_LANG_THRESHOLD = 0.3; + +type PlayBrowserTTSPreviewOptions = { + text: string; + voice?: string; + rate?: number; + voices?: SpeechSynthesisVoice[]; +}; + +function createAbortError(): Error { + const error = new Error('Browser TTS preview canceled'); + error.name = 'AbortError'; + return error; +} + +function inferPreviewLang(text: string): string { + const cjkCount = (text.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g) || []).length; + const ratio = text.length > 0 ? cjkCount / text.length : 0; + return ratio > CJK_LANG_THRESHOLD ? 'zh-CN' : 'en-US'; +} + +export function isBrowserTTSAbortError(error: unknown): boolean { + return error instanceof Error && error.name === 'AbortError'; +} + +/** Wait for browser voices to load, with a 2s timeout fallback. */ +export async function ensureVoicesLoaded(): Promise { + if (typeof window === 'undefined' || !window.speechSynthesis) { + return []; + } + + const initialVoices = window.speechSynthesis.getVoices(); + if (initialVoices.length > 0) { + return initialVoices; + } + + return new Promise((resolve) => { + let settled = false; + let timeoutId: number | null = null; + + const cleanup = () => { + window.speechSynthesis.removeEventListener('voiceschanged', handleVoicesChanged); + if (timeoutId !== null) { + window.clearTimeout(timeoutId); + } + }; + + const finish = () => { + if (settled) return; + settled = true; + cleanup(); + resolve(window.speechSynthesis.getVoices()); + }; + + const handleVoicesChanged = () => { + const voices = window.speechSynthesis.getVoices(); + if (voices.length > 0) { + finish(); + } + }; + + window.speechSynthesis.addEventListener('voiceschanged', handleVoicesChanged); + timeoutId = window.setTimeout(finish, VOICES_LOAD_TIMEOUT_MS); + }); +} + +/** Resolve a browser voice by voiceURI, name, or lang, with language fallback by text. */ +export function resolveBrowserVoice( + voices: SpeechSynthesisVoice[], + voiceNameOrLang: string, + text: string, +): { voice: SpeechSynthesisVoice | null; lang: string } { + const target = voiceNameOrLang.trim(); + const matchedVoice = + target && target !== 'default' + ? voices.find( + (voice) => voice.voiceURI === target || voice.name === target || voice.lang === target, + ) || null + : null; + + return { + voice: matchedVoice, + lang: matchedVoice?.lang || inferPreviewLang(text), + }; +} + +/** + * Play a short browser-native TTS preview. + * + * Notes: + * - Uses the global speechSynthesis queue, so it must cancel queued utterances + * before starting a new preview. + * - Resolves only after the utterance has started and then ended successfully. + */ +export function playBrowserTTSPreview(options: PlayBrowserTTSPreviewOptions): { + promise: Promise; + cancel: () => void; +} { + const synth = typeof window !== 'undefined' ? window.speechSynthesis : undefined; + + if (!synth) { + return { + promise: Promise.reject(new Error('Browser does not support Speech Synthesis API')), + cancel: () => {}, + }; + } + + let settled = false; + let started = false; + let canceled = false; + let timeoutId: number | null = null; + let rejectPromise: ((reason?: unknown) => void) | null = null; + + const settleResolve = (resolve: () => void) => { + if (settled) return; + settled = true; + if (timeoutId !== null) { + window.clearTimeout(timeoutId); + timeoutId = null; + } + resolve(); + }; + + const settleReject = (reject: (reason?: unknown) => void, reason: unknown) => { + if (settled) return; + settled = true; + if (timeoutId !== null) { + window.clearTimeout(timeoutId); + timeoutId = null; + } + reject(reason); + }; + + const promise = new Promise((resolve, reject) => { + rejectPromise = reject; + + const startPlayback = async () => { + try { + const voices = options.voices ?? (await ensureVoicesLoaded()); + if (canceled) { + settleReject(reject, createAbortError()); + return; + } + if (voices.length === 0) { + settleReject(reject, new Error('No browser TTS voices available')); + return; + } + + const utterance = new SpeechSynthesisUtterance(options.text); + utterance.rate = options.rate ?? 1; + + const { voice, lang } = resolveBrowserVoice(voices, options.voice ?? '', options.text); + if (voice) { + utterance.voice = voice; + } + utterance.lang = lang; + + utterance.onstart = () => { + started = true; + }; + + utterance.onend = () => { + if (!started) { + settleReject(reject, new Error('Browser TTS preview ended before playback started')); + return; + } + settleResolve(resolve); + }; + + utterance.onerror = (event) => { + if (canceled || event.error === 'canceled' || event.error === 'interrupted') { + settleReject(reject, createAbortError()); + return; + } + settleReject(reject, new Error(event.error)); + }; + + timeoutId = window.setTimeout(() => { + synth.cancel(); + settleReject(reject, new Error('Browser TTS preview timed out')); + }, PREVIEW_TIMEOUT_MS); + + synth.cancel(); + if (canceled) { + settleReject(reject, createAbortError()); + return; + } + synth.speak(utterance); + } catch (error) { + settleReject(reject, error); + } + }; + + void startPlayback(); + }); + + const cancel = () => { + if (settled || canceled) return; + canceled = true; + synth.cancel(); + if (rejectPromise) { + settleReject(rejectPromise, createAbortError()); + } + }; + + return { promise, cancel }; +} diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts index 1d2579c95..1e6918263 100644 --- a/lib/i18n/settings.ts +++ b/lib/i18n/settings.ts @@ -240,6 +240,7 @@ export const settingsZhCN = { asrResult: '识别结果', asrNotSupported: '浏览器不支持语音识别 API', browserTTSNotSupported: '浏览器不支持语音合成 API', + browserTTSNoVoices: '当前浏览器没有可用的 TTS voice', microphoneAccessDenied: '麦克风访问被拒绝', microphoneAccessFailed: '无法访问麦克风', asrResultPlaceholder: '录音后将显示识别结果', @@ -825,6 +826,7 @@ export const settingsEnUS = { asrResult: 'Recognition Result', asrNotSupported: 'Browser does not support Speech Recognition API', browserTTSNotSupported: 'Browser does not support Speech Synthesis API', + browserTTSNoVoices: 'Current browser has no available TTS voices', microphoneAccessDenied: 'Microphone access denied', microphoneAccessFailed: 'Failed to access microphone', asrResultPlaceholder: 'Recognition result will be displayed after recording', From 1a582f3ac860a2fcf00a797db864ca0db7588b5b Mon Sep 17 00:00:00 2001 From: yangshen <1322568757@qq.com> Date: Wed, 18 Mar 2026 22:11:36 +0800 Subject: [PATCH 4/4] refactor: extract shared TTS preview logic into useTTSPreview hook The browser-native and API-based TTS preview code was duplicated across tts-config-popover, media-popover, and tts-settings. Extract it into a reusable useTTSPreview hook that handles refs, cancellation, audio lifecycle, and staleness checks in one place. Co-Authored-By: Claude Opus 4.6 (1M context) --- components/audio/tts-config-popover.tsx | 130 ++++---------------- components/generation/media-popover.tsx | 128 ++++--------------- components/settings/tts-settings.tsx | 130 ++------------------ lib/audio/use-tts-preview.ts | 157 ++++++++++++++++++++++++ 4 files changed, 215 insertions(+), 330 deletions(-) create mode 100644 lib/audio/use-tts-preview.ts diff --git a/components/audio/tts-config-popover.tsx b/components/audio/tts-config-popover.tsx index 176ec3d3a..cef2bfec8 100644 --- a/components/audio/tts-config-popover.tsx +++ b/components/audio/tts-config-popover.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef, useCallback, useMemo, useEffect } from 'react'; +import { useState, useCallback, useMemo } from 'react'; import { Volume2, Play, Loader2 } from 'lucide-react'; import { toast } from 'sonner'; import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; @@ -17,11 +17,7 @@ import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; import { getTTSVoices } from '@/lib/audio/constants'; -import { - ensureVoicesLoaded, - isBrowserTTSAbortError, - playBrowserTTSPreview, -} from '@/lib/audio/browser-tts-preview'; +import { useTTSPreview } from '@/lib/audio/use-tts-preview'; /** Extract the English name from voice name format "ChineseName (English)" */ function getVoiceDisplayName(name: string, lang: string): string { @@ -35,10 +31,7 @@ function getVoiceDisplayName(name: string, lang: string): string { export function TtsConfigPopover() { const { t, locale } = useI18n(); const [open, setOpen] = useState(false); - const [previewing, setPreviewing] = useState(false); - const audioRef = useRef(null); - const browserPreviewCancelRef = useRef<(() => void) | null>(null); - const previewRequestIdRef = useRef(0); + const { previewing, startPreview, stopPreview } = useTTSPreview(); const ttsEnabled = useSettingsStore((s) => s.ttsEnabled); const setTTSEnabled = useSettingsStore((s) => s.setTTSEnabled); @@ -61,113 +54,36 @@ export function TtsConfigPopover() { const pillCls = 'inline-flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-all cursor-pointer select-none whitespace-nowrap border'; - const stopPreview = useCallback((resetState = true) => { - previewRequestIdRef.current += 1; - browserPreviewCancelRef.current?.(); - browserPreviewCancelRef.current = null; - audioRef.current?.pause(); - audioRef.current = null; - if (resetState) { - setPreviewing(false); - } - }, []); - - useEffect(() => { - return () => { - stopPreview(false); - }; - }, [stopPreview]); - const handlePreview = useCallback(async () => { if (previewing) { stopPreview(); return; } - - const requestId = previewRequestIdRef.current + 1; - previewRequestIdRef.current = requestId; - const previewText = t('settings.ttsTestTextDefault'); - - setPreviewing(true); try { - if (ttsProviderId === 'browser-native-tts') { - if (!('speechSynthesis' in window)) { - throw new Error(t('settings.browserTTSNotSupported')); - } - - const voices = await ensureVoicesLoaded(); - if (voices.length === 0) { - throw new Error(t('settings.browserTTSNoVoices')); - } - - const controller = playBrowserTTSPreview({ - text: previewText, - voice: ttsVoice, - rate: ttsSpeed, - voices, - }); - browserPreviewCancelRef.current = controller.cancel; - await controller.promise; - if (previewRequestIdRef.current === requestId) { - browserPreviewCancelRef.current = null; - setPreviewing(false); - } - return; - } - const providerConfig = ttsProvidersConfig[ttsProviderId]; - const res = await fetch('/api/generate/tts', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - text: previewText, - audioId: 'preview', - ttsProviderId, - ttsVoice, - ttsSpeed, - ttsApiKey: providerConfig?.apiKey, - ttsBaseUrl: providerConfig?.baseUrl, - }), + await startPreview({ + text: t('settings.ttsTestTextDefault'), + providerId: ttsProviderId, + voice: ttsVoice, + speed: ttsSpeed, + apiKey: providerConfig?.apiKey, + baseUrl: providerConfig?.baseUrl, }); - - if (!res.ok) throw new Error('TTS failed'); - - const data = await res.json(); - if (data.base64) { - const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`); - audioRef.current = audio; - audio.onended = () => { - if (previewRequestIdRef.current === requestId) { - setPreviewing(false); - audioRef.current = null; - } - }; - audio.onerror = () => { - if (previewRequestIdRef.current === requestId) { - setPreviewing(false); - audioRef.current = null; - } - }; - await audio.play(); - return; - } } catch (error) { - if (previewRequestIdRef.current === requestId) { - browserPreviewCancelRef.current = null; - setPreviewing(false); - } - if (!isBrowserTTSAbortError(error)) { - const message = - error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed'); - toast.error(message); - } - return; - } - - if (previewRequestIdRef.current === requestId) { - setPreviewing(false); + const message = + error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed'); + toast.error(message); } - }, [previewing, stopPreview, t, ttsProviderId, ttsProvidersConfig, ttsSpeed, ttsVoice]); + }, [ + previewing, + startPreview, + stopPreview, + t, + ttsProviderId, + ttsProvidersConfig, + ttsSpeed, + ttsVoice, + ]); const handleOpenChange = useCallback( (nextOpen: boolean) => { diff --git a/components/generation/media-popover.tsx b/components/generation/media-popover.tsx index 85e8a7844..821c0ec97 100644 --- a/components/generation/media-popover.tsx +++ b/components/generation/media-popover.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef, useCallback, useMemo, Fragment, useEffect } from 'react'; +import { useState, useCallback, useMemo, Fragment } from 'react'; import type { LucideIcon } from 'lucide-react'; import { Image as ImageIcon, @@ -29,11 +29,7 @@ import { Switch } from '@/components/ui/switch'; import { cn } from '@/lib/utils'; import { useI18n } from '@/lib/hooks/use-i18n'; import { useSettingsStore } from '@/lib/store/settings'; -import { - ensureVoicesLoaded, - isBrowserTTSAbortError, - playBrowserTTSPreview, -} from '@/lib/audio/browser-tts-preview'; +import { useTTSPreview } from '@/lib/audio/use-tts-preview'; import { IMAGE_PROVIDERS } from '@/lib/media/image-providers'; import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; import { TTS_PROVIDERS, getTTSVoices } from '@/lib/audio/constants'; @@ -81,10 +77,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { const { t, locale } = useI18n(); const [open, setOpen] = useState(false); const [activeTab, setActiveTab] = useState('image'); - const [previewing, setPreviewing] = useState(false); - const audioRef = useRef(null); - const browserPreviewCancelRef = useRef<(() => void) | null>(null); - const previewRequestIdRef = useRef(0); + const { previewing, startPreview, stopPreview } = useTTSPreview(); // ─── Store ─── const imageGenerationEnabled = useSettingsStore((s) => s.imageGenerationEnabled); @@ -188,112 +181,37 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) { [ttsProviderId, locale], ); - const stopPreview = useCallback((resetState = true) => { - previewRequestIdRef.current += 1; - browserPreviewCancelRef.current?.(); - browserPreviewCancelRef.current = null; - audioRef.current?.pause(); - audioRef.current = null; - if (resetState) { - setPreviewing(false); - } - }, []); - - useEffect(() => { - return () => { - stopPreview(false); - }; - }, [stopPreview]); - // TTS preview const handlePreview = useCallback(async () => { if (previewing) { stopPreview(); return; } - - const requestId = previewRequestIdRef.current + 1; - previewRequestIdRef.current = requestId; - const previewText = t('settings.ttsTestTextDefault'); - - setPreviewing(true); try { - if (ttsProviderId === 'browser-native-tts') { - if (!('speechSynthesis' in window)) { - throw new Error(t('settings.browserTTSNotSupported')); - } - - const voices = await ensureVoicesLoaded(); - if (voices.length === 0) { - throw new Error(t('settings.browserTTSNoVoices')); - } - - const controller = playBrowserTTSPreview({ - text: previewText, - voice: ttsVoice, - rate: ttsSpeed, - voices, - }); - browserPreviewCancelRef.current = controller.cancel; - await controller.promise; - if (previewRequestIdRef.current === requestId) { - browserPreviewCancelRef.current = null; - setPreviewing(false); - } - return; - } - const providerConfig = ttsProvidersConfig[ttsProviderId]; - const res = await fetch('/api/generate/tts', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - text: previewText, - audioId: 'preview', - ttsProviderId, - ttsVoice, - ttsSpeed, - ttsApiKey: providerConfig?.apiKey, - ttsBaseUrl: providerConfig?.baseUrl, - }), + await startPreview({ + text: t('settings.ttsTestTextDefault'), + providerId: ttsProviderId, + voice: ttsVoice, + speed: ttsSpeed, + apiKey: providerConfig?.apiKey, + baseUrl: providerConfig?.baseUrl, }); - if (!res.ok) throw new Error('TTS failed'); - const data = await res.json(); - if (data.base64) { - const audio = new Audio(`data:audio/${data.format || 'mp3'};base64,${data.base64}`); - audioRef.current = audio; - audio.onended = () => { - if (previewRequestIdRef.current === requestId) { - setPreviewing(false); - audioRef.current = null; - } - }; - audio.onerror = () => { - if (previewRequestIdRef.current === requestId) { - setPreviewing(false); - audioRef.current = null; - } - }; - await audio.play(); - return; - } } catch (error) { - if (previewRequestIdRef.current === requestId) { - browserPreviewCancelRef.current = null; - setPreviewing(false); - } - if (!isBrowserTTSAbortError(error)) { - const message = - error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed'); - toast.error(message); - } - return; - } - - if (previewRequestIdRef.current === requestId) { - setPreviewing(false); + const message = + error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed'); + toast.error(message); } - }, [previewing, stopPreview, t, ttsProviderId, ttsProvidersConfig, ttsSpeed, ttsVoice]); + }, [ + previewing, + startPreview, + stopPreview, + t, + ttsProviderId, + ttsProvidersConfig, + ttsSpeed, + ttsVoice, + ]); // ASR: only available providers const asrGroups = useMemo( diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx index 31b899247..5315345e1 100644 --- a/components/settings/tts-settings.tsx +++ b/components/settings/tts-settings.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useState, useRef, useEffect, useCallback } from 'react'; +import { useState, useEffect } from 'react'; import { Label } from '@/components/ui/label'; import { Input } from '@/components/ui/input'; import { Button } from '@/components/ui/button'; @@ -11,11 +11,7 @@ import type { TTSProviderId } from '@/lib/audio/types'; import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react'; import { cn } from '@/lib/utils'; import { createLogger } from '@/lib/logger'; -import { - ensureVoicesLoaded, - isBrowserTTSAbortError, - playBrowserTTSPreview, -} from '@/lib/audio/browser-tts-preview'; +import { useTTSPreview } from '@/lib/audio/use-tts-preview'; const log = createLogger('TTSSettings'); @@ -43,31 +39,10 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { const isServerConfigured = !!ttsProvidersConfig[selectedProviderId]?.isServerConfigured; const [showApiKey, setShowApiKey] = useState(false); - const [testingTTS, setTestingTTS] = useState(false); const [testText, setTestText] = useState(t('settings.ttsTestTextDefault')); const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle'); const [testMessage, setTestMessage] = useState(''); - const audioRef = useRef(null); - const audioUrlRef = useRef(null); - const browserPreviewCancelRef = useRef<(() => void) | null>(null); - const testRequestIdRef = useRef(0); - - const stopPreview = useCallback((resetState = true) => { - testRequestIdRef.current += 1; - browserPreviewCancelRef.current?.(); - browserPreviewCancelRef.current = null; - audioRef.current?.pause(); - if (audioRef.current) { - audioRef.current.src = ''; - } - if (audioUrlRef.current) { - URL.revokeObjectURL(audioUrlRef.current); - audioUrlRef.current = null; - } - if (resetState) { - setTestingTTS(false); - } - }, []); + const { previewing: testingTTS, startPreview, stopPreview } = useTTSPreview(); // Update test text when language changes useEffect(() => { @@ -76,104 +51,30 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { // Reset state when provider changes useEffect(() => { - stopPreview(false); + stopPreview(); setShowApiKey(false); - setTestingTTS(false); setTestStatus('idle'); setTestMessage(''); }, [selectedProviderId, stopPreview]); - useEffect(() => { - return () => { - stopPreview(false); - }; - }, [stopPreview]); - const handleTestTTS = async () => { if (!testText.trim()) return; - const requestId = testRequestIdRef.current + 1; - testRequestIdRef.current = requestId; - setTestingTTS(true); setTestStatus('testing'); setTestMessage(''); try { - if (selectedProviderId === 'browser-native-tts') { - if (!('speechSynthesis' in window)) { - setTestStatus('error'); - setTestMessage(t('settings.browserTTSNotSupported')); - return; - } - - const voices = await ensureVoicesLoaded(); - if (testRequestIdRef.current !== requestId) return; - if (voices.length === 0) { - setTestStatus('error'); - setTestMessage(t('settings.browserTTSNoVoices')); - return; - } - - const controller = playBrowserTTSPreview({ - text: testText, - voice: effectiveVoice, - rate: ttsSpeed, - voices, - }); - browserPreviewCancelRef.current = controller.cancel; - await controller.promise; - - if (testRequestIdRef.current !== requestId) return; - setTestStatus('success'); - setTestMessage(t('settings.ttsTestSuccess')); - return; - } - - const requestBody: Record = { + await startPreview({ text: testText, - audioId: 'tts-test', - ttsProviderId: selectedProviderId, - ttsVoice: effectiveVoice, - ttsSpeed: ttsSpeed, - }; - const apiKeyValue = ttsProvidersConfig[selectedProviderId]?.apiKey; - if (apiKeyValue?.trim()) requestBody.ttsApiKey = apiKeyValue; - const baseUrlValue = ttsProvidersConfig[selectedProviderId]?.baseUrl; - if (baseUrlValue?.trim()) requestBody.ttsBaseUrl = baseUrlValue; - - const response = await fetch('/api/generate/tts', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(requestBody), + providerId: selectedProviderId, + voice: effectiveVoice, + speed: ttsSpeed, + apiKey: ttsProvidersConfig[selectedProviderId]?.apiKey, + baseUrl: ttsProvidersConfig[selectedProviderId]?.baseUrl, }); - const data = await response - .json() - .catch(() => ({ success: false, error: response.statusText })); - if (testRequestIdRef.current !== requestId) return; - if (response.ok && data.success) { - const binaryStr = atob(data.base64); - const bytes = new Uint8Array(binaryStr.length); - for (let i = 0; i < binaryStr.length; i++) bytes[i] = binaryStr.charCodeAt(i); - const audioBlob = new Blob([bytes], { type: `audio/${data.format}` }); - if (audioUrlRef.current) { - URL.revokeObjectURL(audioUrlRef.current); - } - const audioUrl = URL.createObjectURL(audioBlob); - audioUrlRef.current = audioUrl; - if (audioRef.current) { - audioRef.current.src = audioUrl; - await audioRef.current.play(); - } - setTestStatus('success'); - setTestMessage(t('settings.ttsTestSuccess')); - } else { - setTestStatus('error'); - setTestMessage(data.error || t('settings.ttsTestFailed')); - } + setTestStatus('success'); + setTestMessage(t('settings.ttsTestSuccess')); } catch (error) { - if (testRequestIdRef.current !== requestId || isBrowserTTSAbortError(error)) { - return; - } log.error('TTS test failed:', error); setTestStatus('error'); setTestMessage( @@ -181,11 +82,6 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { ? `${t('settings.ttsTestFailed')}: ${error.message}` : t('settings.ttsTestFailed'), ); - } finally { - if (testRequestIdRef.current === requestId) { - browserPreviewCancelRef.current = null; - setTestingTTS(false); - } } }; @@ -328,8 +224,6 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { )} - -