Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
38f74fb
fix: add configurable models for tts and asr
ShaojieLiu Mar 17, 2026
568ac52
style: format audio model settings changes
ShaojieLiu Mar 18, 2026
15dffc9
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 19, 2026
3fdded0
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 19, 2026
4b5e11a
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 19, 2026
3d4d65e
Merge remote-tracking branch 'origin/main' into lsj/fix-configurable-…
ShaojieLiu Mar 19, 2026
eacfc5c
Merge remote-tracking branch 'github/lsj/fix-configurable-tts-asr' in…
ShaojieLiu Mar 19, 2026
a34884d
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 20, 2026
ef3adcb
Merge remote-tracking branch 'origin/main' into lsj/fix-configurable-…
ShaojieLiu Mar 23, 2026
091b8e6
Merge remote-tracking branch 'github/lsj/fix-configurable-tts-asr' in…
ShaojieLiu Mar 23, 2026
5c852b2
fix: hide audio model selectors for unsupported providers
ShaojieLiu Mar 23, 2026
5193da9
chore: remove unused audio model helpers
ShaojieLiu Mar 23, 2026
bcab3e3
chore: restore settings comments and stable model keys
ShaojieLiu Mar 23, 2026
76751d1
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 24, 2026
71e3255
Merge remote-tracking branch 'origin/main' into lsj/fix-configurable-…
ShaojieLiu Mar 24, 2026
f181a6e
test: update settings sync audio provider mocks
ShaojieLiu Mar 24, 2026
ea05f44
Merge remote-tracking branch 'github/lsj/fix-configurable-tts-asr' in…
ShaojieLiu Mar 24, 2026
20d8421
Merge remote-tracking branch 'origin/main' into lsj/fix-configurable-…
ShaojieLiu Mar 24, 2026
56eb2ef
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 24, 2026
3feb22a
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 25, 2026
7251b79
Merge remote-tracking branch 'origin/main' into lsj/fix-configurable-…
ShaojieLiu Mar 26, 2026
af9f7e6
Merge remote-tracking branch 'github/lsj/fix-configurable-tts-asr' in…
ShaojieLiu Mar 26, 2026
919c702
Merge branch 'main' into lsj/fix-configurable-tts-asr
ShaojieLiu Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions app/api/generate/tts/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,17 @@ export const maxDuration = 30;
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
};
const { text, audioId, ttsProviderId, ttsModelId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } =
body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsModelId?: string;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
};

// Validate required fields
if (!text || !audioId || !ttsProviderId || !ttsVoice) {
Expand Down Expand Up @@ -64,14 +66,15 @@ export async function POST(req: NextRequest) {
// Build TTS config
const config = {
providerId: ttsProviderId,
modelId: ttsModelId,
voice: ttsVoice,
speed: ttsSpeed ?? 1.0,
apiKey,
baseUrl,
};

log.info(
`Generating TTS: provider=${ttsProviderId}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`,
`Generating TTS: provider=${ttsProviderId}, model=${ttsModelId || 'default'}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`,
);

// Generate audio
Expand Down
2 changes: 2 additions & 0 deletions app/api/transcription/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export async function POST(req: NextRequest) {
const formData = await req.formData();
const audioFile = formData.get('audio') as File;
const providerId = formData.get('providerId') as ASRProviderId | null;
const modelId = formData.get('modelId') as string | null;
const language = formData.get('language') as string | null;
const apiKey = formData.get('apiKey') as string | null;
const baseUrl = formData.get('baseUrl') as string | null;
Expand All @@ -35,6 +36,7 @@ export async function POST(req: NextRequest) {

const config = {
providerId: effectiveProviderId,
modelId: modelId || undefined,
language: language || 'auto',
apiKey: clientBaseUrl
? apiKey || ''
Expand Down
1 change: 1 addition & 0 deletions app/generation-preview/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,7 @@ function GenerationPreviewContent() {
text: action.text,
audioId,
ttsProviderId: settings.ttsProviderId,
ttsModelId: settings.ttsModelId,
ttsVoice: settings.ttsVoice,
ttsSpeed: settings.ttsSpeed,
ttsApiKey: ttsProviderConfig?.apiKey || undefined,
Expand Down
3 changes: 3 additions & 0 deletions components/audio/tts-config-popover.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export function TtsConfigPopover() {
const ttsEnabled = useSettingsStore((s) => s.ttsEnabled);
const setTTSEnabled = useSettingsStore((s) => s.setTTSEnabled);
const ttsProviderId = useSettingsStore((s) => s.ttsProviderId);
const ttsModelId = useSettingsStore((s) => s.ttsModelId);
const ttsVoice = useSettingsStore((s) => s.ttsVoice);
const ttsSpeed = useSettingsStore((s) => s.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
Expand Down Expand Up @@ -64,6 +65,7 @@ export function TtsConfigPopover() {
await startPreview({
text: t('settings.ttsTestTextDefault'),
providerId: ttsProviderId,
modelId: ttsModelId,
voice: ttsVoice,
speed: ttsSpeed,
apiKey: providerConfig?.apiKey,
Expand All @@ -79,6 +81,7 @@ export function TtsConfigPopover() {
startPreview,
stopPreview,
t,
ttsModelId,
ttsProviderId,
ttsProvidersConfig,
ttsSpeed,
Expand Down
3 changes: 3 additions & 0 deletions components/generation/media-popover.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
Mic,
SlidersHorizontal,
ChevronRight,
Play,

Check warning on line 12 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'Play' is defined but never used. Allowed unused vars must match /^_/u
Loader2,

Check warning on line 13 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'Loader2' is defined but never used. Allowed unused vars must match /^_/u
} from 'lucide-react';
import { toast } from 'sonner';
import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
Expand All @@ -24,7 +24,7 @@
SelectTrigger,
SelectValue,
} from '@/components/ui/select';
import { Slider } from '@/components/ui/slider';

Check warning on line 27 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'Slider' is defined but never used. Allowed unused vars must match /^_/u
import { Switch } from '@/components/ui/switch';
import { cn } from '@/lib/utils';
import { useI18n } from '@/lib/hooks/use-i18n';
Expand Down Expand Up @@ -132,12 +132,13 @@
const setVideoModelId = useSettingsStore((s) => s.setVideoModelId);

const ttsProviderId = useSettingsStore((s) => s.ttsProviderId);
const ttsModelId = useSettingsStore((s) => s.ttsModelId);
const ttsVoice = useSettingsStore((s) => s.ttsVoice);
const ttsSpeed = useSettingsStore((s) => s.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
const setTTSProvider = useSettingsStore((s) => s.setTTSProvider);

Check warning on line 139 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'setTTSProvider' is assigned a value but never used. Allowed unused vars must match /^_/u
const setTTSVoice = useSettingsStore((s) => s.setTTSVoice);

Check warning on line 140 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'setTTSVoice' is assigned a value but never used. Allowed unused vars must match /^_/u
const setTTSSpeed = useSettingsStore((s) => s.setTTSSpeed);

Check warning on line 141 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'setTTSSpeed' is assigned a value but never used. Allowed unused vars must match /^_/u

const asrProviderId = useSettingsStore((s) => s.asrProviderId);
const asrLanguage = useSettingsStore((s) => s.asrLanguage);
Expand Down Expand Up @@ -165,7 +166,7 @@
needsKey: boolean,
) => !needsKey || !!configs[id]?.apiKey || !!configs[id]?.isServerConfigured;

const ttsSpeedRange = TTS_PROVIDERS[ttsProviderId]?.speedRange;

Check warning on line 169 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'ttsSpeedRange' is assigned a value but never used. Allowed unused vars must match /^_/u

// ─── Dynamic browser voices ───
const [browserVoices, setBrowserVoices] = useState<SpeechSynthesisVoice[]>([]);
Expand Down Expand Up @@ -214,7 +215,7 @@

// TTS: grouped by provider, voices as items (matching Image/Video pattern)
// Browser-native voices are split into sub-groups by language.
const ttsGroups = useMemo(() => {

Check warning on line 218 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'ttsGroups' is assigned a value but never used. Allowed unused vars must match /^_/u
const groups: SelectGroupData[] = [];

for (const p of Object.values(TTS_PROVIDERS)) {
Expand Down Expand Up @@ -259,7 +260,7 @@
}, [ttsProvidersConfig, locale, browserVoices, t]);

// TTS preview
const handlePreview = useCallback(async () => {

Check warning on line 263 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'handlePreview' is assigned a value but never used. Allowed unused vars must match /^_/u
if (previewing) {
stopPreview();
return;
Expand All @@ -269,6 +270,7 @@
await startPreview({
text: t('settings.ttsTestTextDefault'),
providerId: ttsProviderId,
modelId: ttsModelId,
voice: ttsVoice,
speed: ttsSpeed,
apiKey: providerConfig?.apiKey,
Expand All @@ -284,6 +286,7 @@
startPreview,
stopPreview,
t,
ttsModelId,
ttsProviderId,
ttsProvidersConfig,
ttsSpeed,
Expand Down
230 changes: 228 additions & 2 deletions components/settings/asr-settings.tsx
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
'use client';

import { useState, useRef } from 'react';
import { useState, useRef, useEffect, useCallback, useMemo } from 'react';
import { Label } from '@/components/ui/label';
import { Input } from '@/components/ui/input';
import { Button } from '@/components/ui/button';
import { Dialog, DialogContent, DialogDescription, DialogTitle } from '@/components/ui/dialog';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import { ASR_PROVIDERS } from '@/lib/audio/constants';
import type { ASRProviderId } from '@/lib/audio/types';
import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
import {
Mic,
MicOff,
CheckCircle2,
XCircle,
Eye,
EyeOff,
Plus,
Settings2,
Trash2,
Circle,
CircleDot,
} from 'lucide-react';
import { cn } from '@/lib/utils';
import { createLogger } from '@/lib/logger';

Expand All @@ -21,18 +34,33 @@ interface ASRSettingsProps {
export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
const { t } = useI18n();

const asrModelId = useSettingsStore((state) => state.asrModelId);
const asrLanguage = useSettingsStore((state) => state.asrLanguage);
const asrProvidersConfig = useSettingsStore((state) => state.asrProvidersConfig);
const setASRProviderConfig = useSettingsStore((state) => state.setASRProviderConfig);
const setASRModelId = useSettingsStore((state) => state.setASRModelId);

const asrProvider = ASR_PROVIDERS[selectedProviderId] ?? ASR_PROVIDERS['openai-whisper'];
const supportsModelSelection = asrProvider.supportsModelSelection;
const builtInModels = useMemo(
() => (supportsModelSelection ? asrProvider.models : []),
[asrProvider.models, supportsModelSelection],
);
const customModels = useMemo(
() =>
supportsModelSelection ? asrProvidersConfig[selectedProviderId]?.customModels || [] : [],
[selectedProviderId, asrProvidersConfig, supportsModelSelection],
);
const isServerConfigured = !!asrProvidersConfig[selectedProviderId]?.isServerConfigured;

const [showApiKey, setShowApiKey] = useState(false);
const [isRecording, setIsRecording] = useState(false);
const [asrResult, setASRResult] = useState('');
const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle');
const [testMessage, setTestMessage] = useState('');
const [showModelDialog, setShowModelDialog] = useState(false);
const [editingModelIndex, setEditingModelIndex] = useState<number | null>(null);
const [modelForm, setModelForm] = useState({ id: '', name: '' });
const mediaRecorderRef = useRef<MediaRecorder | null>(null);

// Reset state when provider changes (derived state pattern)
Expand All @@ -45,6 +73,67 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
setASRResult('');
}

useEffect(() => {
if (!supportsModelSelection) {
if (asrModelId) setASRModelId('');
return;
}
const availableModelIds = new Set([
...builtInModels.map((model) => model.id),
...customModels.map((model) => model.id),
]);
if (availableModelIds.size > 0 && !availableModelIds.has(asrModelId)) {
const nextModelId = builtInModels[0]?.id || customModels[0]?.id || '';
if (nextModelId) setASRModelId(nextModelId);
}
}, [asrModelId, builtInModels, customModels, setASRModelId, supportsModelSelection]);

const handleOpenAddModel = () => {
setEditingModelIndex(null);
setModelForm({ id: '', name: '' });
setShowModelDialog(true);
};

const handleOpenEditModel = (index: number) => {
setEditingModelIndex(index);
setModelForm({ ...customModels[index] });
setShowModelDialog(true);
};

const handleSaveModel = useCallback(() => {
if (!modelForm.id.trim()) return;
const nextCustomModels = [...customModels];
const normalizedModel = {
id: modelForm.id.trim(),
name: modelForm.name.trim() || modelForm.id.trim(),
};
if (editingModelIndex !== null) {
nextCustomModels[editingModelIndex] = normalizedModel;
} else {
nextCustomModels.push(normalizedModel);
}
setASRProviderConfig(selectedProviderId, { customModels: nextCustomModels });
setASRModelId(normalizedModel.id);
setShowModelDialog(false);
}, [
customModels,
editingModelIndex,
modelForm,
selectedProviderId,
setASRModelId,
setASRProviderConfig,
]);

const handleDeleteModel = (index: number) => {
const targetModel = customModels[index];
const nextCustomModels = customModels.filter((_, i) => i !== index);
setASRProviderConfig(selectedProviderId, { customModels: nextCustomModels });
if (asrModelId === targetModel?.id) {
const nextModelId = builtInModels[0]?.id || nextCustomModels[0]?.id || '';
setASRModelId(nextModelId);
}
};

const handleToggleASRRecording = async () => {
if (isRecording) {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
Expand Down Expand Up @@ -104,6 +193,7 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.webm');
formData.append('providerId', selectedProviderId);
formData.append('modelId', asrModelId);
formData.append('language', asrLanguage);
const apiKeyValue = asrProvidersConfig[selectedProviderId]?.apiKey;
if (apiKeyValue?.trim()) formData.append('apiKey', apiKeyValue);
Expand Down Expand Up @@ -281,6 +371,142 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
</div>
</div>
)}

{/* Model Management */}
{supportsModelSelection && (
<div className="space-y-3">
<div className="flex items-center justify-between flex-wrap gap-2">
<Label className="text-base">{t('settings.models')}</Label>
<Button variant="outline" size="sm" onClick={handleOpenAddModel} className="gap-1.5">
<Plus className="h-3.5 w-3.5" />
{t('settings.addNewModel')}
</Button>
</div>

<div className="space-y-1.5">
{builtInModels.map((model) => {
const selected = asrModelId === model.id;
return (
<button
key={model.id}
type="button"
onClick={() => setASRModelId(model.id)}
className={cn(
'w-full flex items-center gap-3 p-3 rounded-lg border text-left transition-colors',
selected
? 'border-primary/50 bg-primary/5'
: 'border-border/50 bg-card hover:bg-muted/40',
)}
>
{selected ? (
<CircleDot className="h-4 w-4 shrink-0 text-primary" />
) : (
<Circle className="h-4 w-4 shrink-0 text-muted-foreground" />
)}
<div className="flex-1 min-w-0">
<div className="font-mono text-sm font-medium">{model.name}</div>
<div className="text-xs text-muted-foreground font-mono mt-0.5">{model.id}</div>
</div>
</button>
);
})}

{customModels.map((model, index) => {
const selected = asrModelId === model.id;
return (
<div
key={`custom-${model.id}`}
className={cn(
'flex items-center gap-3 p-3 rounded-lg border transition-colors',
selected
? 'border-primary/50 bg-primary/5'
: 'border-border/50 bg-card hover:bg-muted/40',
)}
>
<button
type="button"
onClick={() => setASRModelId(model.id)}
className="flex items-center gap-3 flex-1 min-w-0 text-left"
>
{selected ? (
<CircleDot className="h-4 w-4 shrink-0 text-primary" />
) : (
<Circle className="h-4 w-4 shrink-0 text-muted-foreground" />
)}
<div className="flex-1 min-w-0">
<div className="font-mono text-sm font-medium">{model.name}</div>
<div className="text-xs text-muted-foreground font-mono mt-0.5">
{model.id}
</div>
</div>
</button>
<div className="flex items-center gap-1">
<Button
variant="outline"
size="sm"
className="h-8 px-2"
onClick={() => handleOpenEditModel(index)}
title={t('settings.editModel')}
>
<Settings2 className="h-3.5 w-3.5" />
</Button>
<Button
variant="outline"
size="sm"
className="h-8 px-2 text-destructive hover:text-destructive hover:bg-destructive/10"
onClick={() => handleDeleteModel(index)}
title={t('settings.deleteModel')}
>
<Trash2 className="h-3.5 w-3.5" />
</Button>
</div>
</div>
);
})}
</div>
</div>
)}

{supportsModelSelection && (
<Dialog open={showModelDialog} onOpenChange={setShowModelDialog}>
<DialogContent className="sm:max-w-md">
<DialogTitle>
{editingModelIndex !== null ? t('settings.editModel') : t('settings.addNewModel')}
</DialogTitle>
<DialogDescription className="sr-only">
{editingModelIndex !== null ? t('settings.editModel') : t('settings.addNewModel')}
</DialogDescription>
<div className="space-y-4 pt-2">
<div className="space-y-2">
<Label>{t('settings.modelId')}</Label>
<Input
value={modelForm.id}
onChange={(e) => setModelForm((prev) => ({ ...prev, id: e.target.value }))}
placeholder="e.g. my-custom-asr-model"
className="h-8 font-mono text-sm"
/>
</div>
<div className="space-y-2">
<Label>{t('settings.modelName')}</Label>
<Input
value={modelForm.name}
onChange={(e) => setModelForm((prev) => ({ ...prev, name: e.target.value }))}
placeholder="e.g. My Custom ASR Model"
className="h-8 text-sm"
/>
</div>
<div className="flex justify-end gap-2">
<Button variant="outline" size="sm" onClick={() => setShowModelDialog(false)}>
{t('common.cancel')}
</Button>
<Button size="sm" onClick={handleSaveModel} disabled={!modelForm.id.trim()}>
{t('common.save')}
</Button>
</div>
</div>
</DialogContent>
</Dialog>
)}
</div>
);
}
Loading
Loading