Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions app/api/generate/tts/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,17 @@ export const maxDuration = 30;
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
};
const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl, ttsModelId } =
body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
ttsModelId?: string;
};

// Validate required fields
if (!text || !audioId || !ttsProviderId || !ttsVoice) {
Expand All @@ -56,6 +58,7 @@ export async function POST(req: NextRequest) {
speed: ttsSpeed ?? 1.0,
apiKey,
baseUrl,
modelId: ttsModelId || undefined,
};

log.info(
Expand Down
25 changes: 24 additions & 1 deletion components/settings/audio-settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
getTTSVoices,
ASR_PROVIDERS,
getASRSupportedLanguages,
DEFAULT_TTS_MODELS,
} from '@/lib/audio/constants';
import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types';
import { Volume2, Mic, MicOff, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
Expand Down Expand Up @@ -112,7 +113,7 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {

const handleTTSProviderConfigChange = (
providerId: TTSProviderId,
config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean; modelId: string }>,
) => {
setTTSProviderConfig(providerId, config);
onSave?.();
Expand Down Expand Up @@ -316,6 +317,11 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
requestBody.ttsBaseUrl = baseUrlValue;
}

const modelIdValue = ttsProvidersConfig[ttsProviderId]?.modelId;
if (modelIdValue && modelIdValue.trim()) {
requestBody.ttsModelId = modelIdValue;
}

const response = await fetch('/api/generate/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
Expand Down Expand Up @@ -591,6 +597,23 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
/>
</div>
</div>

{/* Model ID input - only show for providers that use model ID */}
{DEFAULT_TTS_MODELS[ttsProviderId] && (
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsModelId')}</Label>
<Input
placeholder={DEFAULT_TTS_MODELS[ttsProviderId]}
value={ttsProvidersConfig[ttsProviderId]?.modelId || ''}
onChange={(e) =>
handleTTSProviderConfigChange(ttsProviderId, {
modelId: e.target.value,
})
}
className="text-sm"
/>
</div>
)}
{(() => {
const effectiveBaseUrl =
ttsProvidersConfig[ttsProviderId]?.baseUrl || ttsProvider.defaultBaseUrl || '';
Expand Down
20 changes: 19 additions & 1 deletion components/settings/tts-settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { Input } from '@/components/ui/input';
import { Button } from '@/components/ui/button';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import { TTS_PROVIDERS, DEFAULT_TTS_VOICES } from '@/lib/audio/constants';
import { TTS_PROVIDERS, DEFAULT_TTS_VOICES, DEFAULT_TTS_MODELS } from '@/lib/audio/constants';
import type { TTSProviderId } from '@/lib/audio/types';
import { Volume2, Loader2, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
import { cn } from '@/lib/utils';
Expand Down Expand Up @@ -190,6 +190,24 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
/>
</div>
</div>

{/* Model ID input - only show for providers that use model ID */}
{DEFAULT_TTS_MODELS[selectedProviderId] && (
<div className="space-y-2">
<Label className="text-sm">{t('settings.ttsModelId')}</Label>
<Input
placeholder={DEFAULT_TTS_MODELS[selectedProviderId]}
value={ttsProvidersConfig[selectedProviderId]?.modelId || ''}
onChange={(e) =>
setTTSProviderConfig(selectedProviderId, {
modelId: e.target.value,
})
}
className="text-sm"
/>
</div>
)}

{/* Request URL Preview */}
{(() => {
const effectiveBaseUrl =
Expand Down
13 changes: 13 additions & 0 deletions lib/audio/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,19 @@ export const DEFAULT_TTS_VOICES: Record<TTSProviderId, string> = {
'browser-native-tts': 'default',
};

/**
* Default model ID for each TTS provider.
* Used as fallback when user has not configured a custom model ID.
* Empty string means the provider does not use a model ID parameter.
*/
export const DEFAULT_TTS_MODELS: Record<TTSProviderId, string> = {
'openai-tts': 'gpt-4o-mini-tts',
'azure-tts': '',
'glm-tts': 'glm-tts',
'qwen-tts': 'qwen3-tts-flash',
'browser-native-tts': '',
};

/**
* Get voices for a specific TTS provider
*/
Expand Down
11 changes: 6 additions & 5 deletions lib/audio/tts-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
*/

import type { TTSModelConfig } from './types';
import { TTS_PROVIDERS } from './constants';
import { TTS_PROVIDERS, DEFAULT_TTS_MODELS } from './constants';

/**
* Result of TTS generation
Expand Down Expand Up @@ -149,15 +149,15 @@ async function generateOpenAITTS(
): Promise<TTSGenerationResult> {
const baseUrl = config.baseUrl || TTS_PROVIDERS['openai-tts'].defaultBaseUrl;

// Use gpt-4o-mini-tts for best quality and intelligent realtime applications
// Use configurable model, fallback to default
const response = await fetch(`${baseUrl}/audio/speech`, {
method: 'POST',
headers: {
Authorization: `Bearer ${config.apiKey}`,
'Content-Type': 'application/json; charset=utf-8',
},
body: JSON.stringify({
model: 'gpt-4o-mini-tts',
model: config.modelId || DEFAULT_TTS_MODELS['openai-tts'],
input: text,
voice: config.voice,
speed: config.speed || 1.0,
Expand Down Expand Up @@ -229,7 +229,7 @@ async function generateGLMTTS(config: TTSModelConfig, text: string): Promise<TTS
'Content-Type': 'application/json; charset=utf-8',
},
body: JSON.stringify({
model: 'glm-tts',
model: config.modelId || DEFAULT_TTS_MODELS['glm-tts'],
input: text,
voice: config.voice,
speed: config.speed || 1.0,
Expand Down Expand Up @@ -276,7 +276,7 @@ async function generateQwenTTS(config: TTSModelConfig, text: string): Promise<TT
'Content-Type': 'application/json; charset=utf-8',
},
body: JSON.stringify({
model: 'qwen3-tts-flash',
model: config.modelId || DEFAULT_TTS_MODELS['qwen-tts'],
input: {
text,
voice: config.voice,
Expand Down Expand Up @@ -337,6 +337,7 @@ export async function getCurrentTTSConfig(): Promise<TTSModelConfig> {
baseUrl: providerConfig?.baseUrl,
voice: ttsVoice,
speed: ttsSpeed,
modelId: providerConfig?.modelId,
};
}

Expand Down
7 changes: 3 additions & 4 deletions lib/audio/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ export interface TTSModelConfig {
voice: string;
speed?: number;
format?: string;
modelId?: string;
}

// ============================================================================
Expand All @@ -143,10 +144,8 @@ export interface TTSModelConfig {
*/
export type ASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr';
// Add new ASR providers below (uncomment and modify):
// | 'elevenlabs-asr'
// | 'assemblyai-asr'
// | 'deepgram-asr'
// | 'azure-asr'
// | 'assemblyai'
// | 'deepgram'

/**
* ASR Provider Configuration
Expand Down
4 changes: 3 additions & 1 deletion lib/export/latex-to-omml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ export function latexToOmml(latex: string, fontSize?: number): string | null {
try {
const mathml = temml.renderToString(latex);
const cleaned = stripUnsupportedMathML(mathml);
const omml = mml2omml(cleaned);
const ommlOutput = mml2omml(cleaned);
// Handle case where mml2omml might return an object with the OMML string or directly a string
const omml = typeof ommlOutput === 'string' ? ommlOutput : String(ommlOutput);
const szHundredths = fontSize ? Math.round(fontSize * 100) : undefined;
return postProcessOmml(omml, szHundredths);
} catch {
Expand Down
1 change: 1 addition & 0 deletions lib/hooks/use-scene-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ export async function generateAndStoreTTS(
ttsSpeed: settings.ttsSpeed,
ttsApiKey: ttsProviderConfig?.apiKey || undefined,
ttsBaseUrl: ttsProviderConfig?.baseUrl || undefined,
ttsModelId: ttsProviderConfig?.modelId || undefined,
}),
signal,
});
Expand Down
2 changes: 2 additions & 0 deletions lib/i18n/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ export const settingsZhCN = {
ttsSpeed: '语速',
ttsBaseUrl: 'Base URL',
ttsApiKey: 'API 密钥',
ttsModelId: 'Model ID',
asrProvider: 'ASR 提供商',
asrLanguage: '识别语言',
asrBaseUrl: 'Base URL',
Expand Down Expand Up @@ -793,6 +794,7 @@ export const settingsEnUS = {
ttsSpeed: 'Speed',
ttsBaseUrl: 'Base URL',
ttsApiKey: 'API Key',
ttsModelId: 'Model ID',
asrProvider: 'ASR Provider',
asrLanguage: 'Recognition Language',
asrBaseUrl: 'Base URL',
Expand Down
3 changes: 2 additions & 1 deletion lib/store/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export interface SettingsState {
apiKey: string;
baseUrl: string;
enabled: boolean;
modelId?: string;
isServerConfigured?: boolean;
serverBaseUrl?: string;
}
Expand Down Expand Up @@ -175,7 +176,7 @@ export interface SettingsState {
setASRLanguage: (language: string) => void;
setTTSProviderConfig: (
providerId: TTSProviderId,
config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>,
config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean; modelId: string }>,
) => void;
setASRProviderConfig: (
providerId: ASRProviderId,
Expand Down
Loading