diff --git a/app/api/generate/tts/route.ts b/app/api/generate/tts/route.ts
index 4ae820c78..b1de8c2d6 100644
--- a/app/api/generate/tts/route.ts
+++ b/app/api/generate/tts/route.ts
@@ -21,15 +21,17 @@ export const maxDuration = 30;
export async function POST(req: NextRequest) {
try {
const body = await req.json();
- const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as {
- text: string;
- audioId: string;
- ttsProviderId: TTSProviderId;
- ttsVoice: string;
- ttsSpeed?: number;
- ttsApiKey?: string;
- ttsBaseUrl?: string;
- };
+ const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl, ttsModelId } =
+ body as {
+ text: string;
+ audioId: string;
+ ttsProviderId: TTSProviderId;
+ ttsVoice: string;
+ ttsSpeed?: number;
+ ttsApiKey?: string;
+ ttsBaseUrl?: string;
+ ttsModelId?: string;
+ };
// Validate required fields
if (!text || !audioId || !ttsProviderId || !ttsVoice) {
@@ -56,6 +58,7 @@ export async function POST(req: NextRequest) {
speed: ttsSpeed ?? 1.0,
apiKey,
baseUrl,
+ modelId: ttsModelId || undefined,
};
log.info(
diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx
index 9a65ef806..d95de195b 100644
--- a/components/settings/audio-settings.tsx
+++ b/components/settings/audio-settings.tsx
@@ -20,6 +20,7 @@ import {
getTTSVoices,
ASR_PROVIDERS,
getASRSupportedLanguages,
+ DEFAULT_TTS_MODELS,
} from '@/lib/audio/constants';
import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types';
import { Volume2, Mic, MicOff, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
@@ -112,7 +113,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
const handleTTSProviderConfigChange = (
providerId: TTSProviderId,
- config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
+ config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean; modelId: string }>,
) => {
setTTSProviderConfig(providerId, config);
onSave?.();
@@ -316,6 +317,11 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
requestBody.ttsBaseUrl = baseUrlValue;
}
+ const modelIdValue = ttsProvidersConfig[ttsProviderId]?.modelId;
+ if (modelIdValue && modelIdValue.trim()) {
+ requestBody.ttsModelId = modelIdValue;
+ }
+
const response = await fetch('/api/generate/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@@ -591,6 +597,23 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
/>
+
+ {/* Model ID input - only show for providers that use model ID */}
+ {DEFAULT_TTS_MODELS[ttsProviderId] && (
+
+
+
+ handleTTSProviderConfigChange(ttsProviderId, {
+ modelId: e.target.value,
+ })
+ }
+ className="text-sm"
+ />
+
+ )}
{(() => {
const effectiveBaseUrl =
ttsProvidersConfig[ttsProviderId]?.baseUrl || ttsProvider.defaultBaseUrl || '';
diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx
index 45a03f51b..3a44429cb 100644
--- a/components/settings/tts-settings.tsx
+++ b/components/settings/tts-settings.tsx
@@ -6,7 +6,7 @@ import { Input } from '@/components/ui/input';
import { Button } from '@/components/ui/button';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
-import { TTS_PROVIDERS, DEFAULT_TTS_VOICES } from '@/lib/audio/constants';
+import { TTS_PROVIDERS, DEFAULT_TTS_VOICES, DEFAULT_TTS_MODELS } from '@/lib/audio/constants';
import type { TTSProviderId } from '@/lib/audio/types';
import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
import { cn } from '@/lib/utils';
@@ -190,6 +190,24 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
/>
+
+ {/* Model ID input - only show for providers that use model ID */}
+ {DEFAULT_TTS_MODELS[selectedProviderId] && (
+
+
+
+ setTTSProviderConfig(selectedProviderId, {
+ modelId: e.target.value,
+ })
+ }
+ className="text-sm"
+ />
+
+ )}
+
{/* Request URL Preview */}
{(() => {
const effectiveBaseUrl =
diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts
index 55a5cbb36..6203a3b31 100644
--- a/lib/audio/constants.ts
+++ b/lib/audio/constants.ts
@@ -836,6 +836,19 @@ export const DEFAULT_TTS_VOICES: Record = {
'browser-native-tts': 'default',
};
+/**
+ * Default model ID for each TTS provider.
+ * Used as fallback when user has not configured a custom model ID.
+ * Empty string means the provider does not use a model ID parameter.
+ */
+export const DEFAULT_TTS_MODELS: Record = {
+ 'openai-tts': 'gpt-4o-mini-tts',
+ 'azure-tts': '',
+ 'glm-tts': 'glm-tts',
+ 'qwen-tts': 'qwen3-tts-flash',
+ 'browser-native-tts': '',
+};
+
/**
* Get voices for a specific TTS provider
*/
diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts
index bf1f2c128..302db6608 100644
--- a/lib/audio/tts-providers.ts
+++ b/lib/audio/tts-providers.ts
@@ -90,7 +90,7 @@
*/
import type { TTSModelConfig } from './types';
-import { TTS_PROVIDERS } from './constants';
+import { TTS_PROVIDERS, DEFAULT_TTS_MODELS } from './constants';
/**
* Result of TTS generation
@@ -149,7 +149,7 @@ async function generateOpenAITTS(
): Promise {
const baseUrl = config.baseUrl || TTS_PROVIDERS['openai-tts'].defaultBaseUrl;
- // Use gpt-4o-mini-tts for best quality and intelligent realtime applications
+ // Use configurable model, fallback to default
const response = await fetch(`${baseUrl}/audio/speech`, {
method: 'POST',
headers: {
@@ -157,7 +157,7 @@ async function generateOpenAITTS(
'Content-Type': 'application/json; charset=utf-8',
},
body: JSON.stringify({
- model: 'gpt-4o-mini-tts',
+ model: config.modelId || DEFAULT_TTS_MODELS['openai-tts'],
input: text,
voice: config.voice,
speed: config.speed || 1.0,
@@ -229,7 +229,7 @@ async function generateGLMTTS(config: TTSModelConfig, text: string): Promise {
baseUrl: providerConfig?.baseUrl,
voice: ttsVoice,
speed: ttsSpeed,
+ modelId: providerConfig?.modelId,
};
}
diff --git a/lib/audio/types.ts b/lib/audio/types.ts
index 43c37087f..88cf53ce9 100644
--- a/lib/audio/types.ts
+++ b/lib/audio/types.ts
@@ -129,6 +129,7 @@ export interface TTSModelConfig {
voice: string;
speed?: number;
format?: string;
+ modelId?: string;
}
// ============================================================================
@@ -143,10 +144,8 @@ export interface TTSModelConfig {
*/
export type ASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr';
// Add new ASR providers below (uncomment and modify):
-// | 'elevenlabs-asr'
-// | 'assemblyai-asr'
-// | 'deepgram-asr'
-// | 'azure-asr'
+// | 'assemblyai'
+// | 'deepgram'
/**
* ASR Provider Configuration
diff --git a/lib/export/latex-to-omml.ts b/lib/export/latex-to-omml.ts
index 0aa6f926e..f21d31e88 100644
--- a/lib/export/latex-to-omml.ts
+++ b/lib/export/latex-to-omml.ts
@@ -71,7 +71,9 @@ export function latexToOmml(latex: string, fontSize?: number): string | null {
try {
const mathml = temml.renderToString(latex);
const cleaned = stripUnsupportedMathML(mathml);
- const omml = mml2omml(cleaned);
+ const ommlOutput = mml2omml(cleaned);
+ // Handle case where mml2omml might return an object with the OMML string or directly a string
+ const omml = typeof ommlOutput === 'string' ? ommlOutput : String(ommlOutput);
const szHundredths = fontSize ? Math.round(fontSize * 100) : undefined;
return postProcessOmml(omml, szHundredths);
} catch {
diff --git a/lib/hooks/use-scene-generator.ts b/lib/hooks/use-scene-generator.ts
index 1c7d540fa..67019d775 100644
--- a/lib/hooks/use-scene-generator.ts
+++ b/lib/hooks/use-scene-generator.ts
@@ -225,6 +225,7 @@ export async function generateAndStoreTTS(
ttsSpeed: settings.ttsSpeed,
ttsApiKey: ttsProviderConfig?.apiKey || undefined,
ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined,
+ ttsModelId: ttsProviderConfig?.modelId || undefined,
}),
signal,
});
diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts
index 1d2579c95..1d10ed19e 100644
--- a/lib/i18n/settings.ts
+++ b/lib/i18n/settings.ts
@@ -209,6 +209,7 @@ export const settingsZhCN = {
ttsSpeed: '语速',
ttsBaseUrl: 'Base URL',
ttsApiKey: 'API 密钥',
+ ttsModelId: 'Model ID',
asrProvider: 'ASR 提供商',
asrLanguage: '识别语言',
asrBaseUrl: 'Base URL',
@@ -793,6 +794,7 @@ export const settingsEnUS = {
ttsSpeed: 'Speed',
ttsBaseUrl: 'Base URL',
ttsApiKey: 'API Key',
+ ttsModelId: 'Model ID',
asrProvider: 'ASR Provider',
asrLanguage: 'Recognition Language',
asrBaseUrl: 'Base URL',
diff --git a/lib/store/settings.ts b/lib/store/settings.ts
index 2ffc49ed8..7d229640b 100644
--- a/lib/store/settings.ts
+++ b/lib/store/settings.ts
@@ -48,6 +48,7 @@ export interface SettingsState {
apiKey: string;
baseUrl: string;
enabled: boolean;
+ modelId?: string;
isServerConfigured?: boolean;
serverBaseUrl?: string;
}
@@ -175,7 +176,7 @@ export interface SettingsState {
setASRLanguage: (language: string) => void;
setTTSProviderConfig: (
providerId: TTSProviderId,
- config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
+ config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean; modelId: string }>,
) => void;
setASRProviderConfig: (
providerId: ASRProviderId,