Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions app/api/generate/tts/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,17 @@ export const maxDuration = 30;
export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { text, audioId, ttsProviderId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } = body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
};
const { text, audioId, ttsProviderId, ttsModelId, ttsVoice, ttsSpeed, ttsApiKey, ttsBaseUrl } =
body as {
text: string;
audioId: string;
ttsProviderId: TTSProviderId;
ttsModelId?: string;
ttsVoice: string;
ttsSpeed?: number;
ttsApiKey?: string;
ttsBaseUrl?: string;
};

// Validate required fields
if (!text || !audioId || !ttsProviderId || !ttsVoice) {
Expand Down Expand Up @@ -64,14 +66,15 @@ export async function POST(req: NextRequest) {
// Build TTS config
const config = {
providerId: ttsProviderId,
modelId: ttsModelId,
voice: ttsVoice,
speed: ttsSpeed ?? 1.0,
apiKey,
baseUrl,
};

log.info(
`Generating TTS: provider=${ttsProviderId}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`,
`Generating TTS: provider=${ttsProviderId}, model=${ttsModelId || 'default'}, voice=${ttsVoice}, audioId=${audioId}, textLen=${text.length}`,
);

// Generate audio
Expand Down
2 changes: 2 additions & 0 deletions app/api/transcription/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export async function POST(req: NextRequest) {
const formData = await req.formData();
const audioFile = formData.get('audio') as File;
const providerId = formData.get('providerId') as ASRProviderId | null;
const modelId = formData.get('modelId') as string | null;
const language = formData.get('language') as string | null;
const apiKey = formData.get('apiKey') as string | null;
const baseUrl = formData.get('baseUrl') as string | null;
Expand All @@ -35,6 +36,7 @@ export async function POST(req: NextRequest) {

const config = {
providerId: effectiveProviderId,
modelId: modelId || undefined,
language: language || 'auto',
apiKey: clientBaseUrl
? apiKey || ''
Expand Down
1 change: 1 addition & 0 deletions app/generation-preview/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ function GenerationPreviewContent() {
text: action.text,
audioId,
ttsProviderId: settings.ttsProviderId,
ttsModelId: settings.ttsModelId,
ttsVoice: settings.ttsVoice,
ttsSpeed: settings.ttsSpeed,
ttsApiKey: ttsProviderConfig?.apiKey || undefined,
Expand Down
4 changes: 3 additions & 1 deletion components/audio/tts-config-popover.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export function TtsConfigPopover() {
const ttsEnabled = useSettingsStore((s) => s.ttsEnabled);
const setTTSEnabled = useSettingsStore((s) => s.setTTSEnabled);
const ttsProviderId = useSettingsStore((s) => s.ttsProviderId);
const ttsModelId = useSettingsStore((s) => s.ttsModelId);
const ttsVoice = useSettingsStore((s) => s.ttsVoice);
const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
const setTTSVoice = useSettingsStore((s) => s.setTTSVoice);
Expand Down Expand Up @@ -70,6 +71,7 @@ export function TtsConfigPopover() {
text: '你好,欢迎来到AI课堂!让我们一起学习吧。',
audioId: 'preview',
ttsProviderId: ttsProviderId,
ttsModelId: ttsModelId,
ttsVoice: ttsVoice,
ttsApiKey: providerConfig?.apiKey,
ttsBaseUrl: providerConfig?.baseUrl,
Expand All @@ -95,7 +97,7 @@ export function TtsConfigPopover() {
} catch {
setPreviewing(false);
}
}, [ttsProviderId, ttsVoice, ttsProvidersConfig, previewing]);
}, [ttsProviderId, ttsModelId, ttsVoice, ttsProvidersConfig, previewing]);

return (
<Popover open={open} onOpenChange={setOpen}>
Expand Down
4 changes: 3 additions & 1 deletion components/generation/media-popover.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) {
const setVideoModelId = useSettingsStore((s) => s.setVideoModelId);

const ttsProviderId = useSettingsStore((s) => s.ttsProviderId);
const ttsModelId = useSettingsStore((s) => s.ttsModelId);
const ttsVoice = useSettingsStore((s) => s.ttsVoice);
const ttsSpeed = useSettingsStore((s) => s.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
Expand Down Expand Up @@ -198,6 +199,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) {
text: '你好,欢迎来到AI课堂!让我们一起学习吧。',
audioId: 'preview',
ttsProviderId,
ttsModelId,
ttsVoice,
ttsApiKey: providerConfig?.apiKey,
ttsBaseUrl: providerConfig?.baseUrl,
Expand All @@ -221,7 +223,7 @@ export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) {
} catch {
setPreviewing(false);
}
}, [ttsProviderId, ttsVoice, ttsProvidersConfig, previewing]);
}, [ttsProviderId, ttsModelId, ttsVoice, ttsProvidersConfig, previewing]);

// ASR: only available providers
const asrGroups = useMemo(
Expand Down
214 changes: 212 additions & 2 deletions components/settings/asr-settings.tsx
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
'use client';

import { useState, useRef } from 'react';
import { useState, useRef, useEffect, useCallback, useMemo } from 'react';
import { Label } from '@/components/ui/label';
import { Input } from '@/components/ui/input';
import { Button } from '@/components/ui/button';
import { Dialog, DialogContent, DialogDescription, DialogTitle } from '@/components/ui/dialog';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import { ASR_PROVIDERS } from '@/lib/audio/constants';
import type { ASRProviderId } from '@/lib/audio/types';
import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff } from 'lucide-react';
import {
Mic,
MicOff,
CheckCircle2,
XCircle,
Eye,
EyeOff,
Plus,
Settings2,
Trash2,
Circle,
CircleDot,
} from 'lucide-react';
import { cn } from '@/lib/utils';
import { createLogger } from '@/lib/logger';

Expand All @@ -21,18 +34,28 @@ interface ASRSettingsProps {
export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
const { t } = useI18n();

const asrModelId = useSettingsStore((state) => state.asrModelId);
const asrLanguage = useSettingsStore((state) => state.asrLanguage);
const asrProvidersConfig = useSettingsStore((state) => state.asrProvidersConfig);
const setASRProviderConfig = useSettingsStore((state) => state.setASRProviderConfig);
const setASRModelId = useSettingsStore((state) => state.setASRModelId);

const asrProvider = ASR_PROVIDERS[selectedProviderId] ?? ASR_PROVIDERS['openai-whisper'];
const builtInModels = useMemo(() => asrProvider.models || [], [asrProvider.models]);
const customModels = useMemo(
() => asrProvidersConfig[selectedProviderId]?.customModels || [],
[selectedProviderId, asrProvidersConfig],
);
const isServerConfigured = !!asrProvidersConfig[selectedProviderId]?.isServerConfigured;

const [showApiKey, setShowApiKey] = useState(false);
const [isRecording, setIsRecording] = useState(false);
const [asrResult, setASRResult] = useState('');
const [testStatus, setTestStatus] = useState<'idle' | 'testing' | 'success' | 'error'>('idle');
const [testMessage, setTestMessage] = useState('');
const [showModelDialog, setShowModelDialog] = useState(false);
const [editingModelIndex, setEditingModelIndex] = useState<number | null>(null);
const [modelForm, setModelForm] = useState({ id: '', name: '' });
const mediaRecorderRef = useRef<MediaRecorder | null>(null);

// Reset state when provider changes (derived state pattern)
Expand All @@ -45,6 +68,63 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
setASRResult('');
}

useEffect(() => {
const availableModelIds = new Set([
...builtInModels.map((model) => model.id),
...customModels.map((model) => model.id),
]);
if (availableModelIds.size > 0 && !availableModelIds.has(asrModelId)) {
const nextModelId = builtInModels[0]?.id || customModels[0]?.id || '';
if (nextModelId) setASRModelId(nextModelId);
}
}, [asrModelId, builtInModels, customModels, setASRModelId]);

const handleOpenAddModel = () => {
setEditingModelIndex(null);
setModelForm({ id: '', name: '' });
setShowModelDialog(true);
};

const handleOpenEditModel = (index: number) => {
setEditingModelIndex(index);
setModelForm({ ...customModels[index] });
setShowModelDialog(true);
};

const handleSaveModel = useCallback(() => {
if (!modelForm.id.trim()) return;
const nextCustomModels = [...customModels];
const normalizedModel = {
id: modelForm.id.trim(),
name: modelForm.name.trim() || modelForm.id.trim(),
};
if (editingModelIndex !== null) {
nextCustomModels[editingModelIndex] = normalizedModel;
} else {
nextCustomModels.push(normalizedModel);
}
setASRProviderConfig(selectedProviderId, { customModels: nextCustomModels });
setASRModelId(normalizedModel.id);
setShowModelDialog(false);
}, [
customModels,
editingModelIndex,
modelForm,
selectedProviderId,
setASRModelId,
setASRProviderConfig,
]);

const handleDeleteModel = (index: number) => {
const targetModel = customModels[index];
const nextCustomModels = customModels.filter((_, i) => i !== index);
setASRProviderConfig(selectedProviderId, { customModels: nextCustomModels });
if (asrModelId === targetModel?.id) {
const nextModelId = builtInModels[0]?.id || nextCustomModels[0]?.id || '';
setASRModelId(nextModelId);
}
};

const handleToggleASRRecording = async () => {
if (isRecording) {
if (mediaRecorderRef.current && mediaRecorderRef.current.state === 'recording') {
Expand Down Expand Up @@ -104,6 +184,7 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.webm');
formData.append('providerId', selectedProviderId);
formData.append('modelId', asrModelId);
formData.append('language', asrLanguage);
const apiKeyValue = asrProvidersConfig[selectedProviderId]?.apiKey;
if (apiKeyValue?.trim()) formData.append('apiKey', apiKeyValue);
Expand Down Expand Up @@ -281,6 +362,135 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
</div>
</div>
)}

<div className="space-y-3">
<div className="flex items-center justify-between flex-wrap gap-2">
<Label className="text-base">{t('settings.models')}</Label>
<Button variant="outline" size="sm" onClick={handleOpenAddModel} className="gap-1.5">
<Plus className="h-3.5 w-3.5" />
{t('settings.addNewModel')}
</Button>
</div>

<div className="space-y-1.5">
{builtInModels.map((model) => {
const selected = asrModelId === model.id;
return (
<button
key={model.id}
type="button"
onClick={() => setASRModelId(model.id)}
className={cn(
'w-full flex items-center gap-3 p-3 rounded-lg border text-left transition-colors',
selected
? 'border-primary/50 bg-primary/5'
: 'border-border/50 bg-card hover:bg-muted/40',
)}
>
{selected ? (
<CircleDot className="h-4 w-4 shrink-0 text-primary" />
) : (
<Circle className="h-4 w-4 shrink-0 text-muted-foreground" />
)}
<div className="flex-1 min-w-0">
<div className="font-mono text-sm font-medium">{model.name}</div>
<div className="text-xs text-muted-foreground font-mono mt-0.5">{model.id}</div>
</div>
</button>
);
})}

{customModels.map((model, index) => {
const selected = asrModelId === model.id;
return (
<div
key={`custom-${index}`}
className={cn(
'flex items-center gap-3 p-3 rounded-lg border transition-colors',
selected
? 'border-primary/50 bg-primary/5'
: 'border-border/50 bg-card hover:bg-muted/40',
)}
>
<button
type="button"
onClick={() => setASRModelId(model.id)}
className="flex items-center gap-3 flex-1 min-w-0 text-left"
>
{selected ? (
<CircleDot className="h-4 w-4 shrink-0 text-primary" />
) : (
<Circle className="h-4 w-4 shrink-0 text-muted-foreground" />
)}
<div className="flex-1 min-w-0">
<div className="font-mono text-sm font-medium">{model.name}</div>
<div className="text-xs text-muted-foreground font-mono mt-0.5">{model.id}</div>
</div>
</button>
<div className="flex items-center gap-1">
<Button
variant="outline"
size="sm"
className="h-8 px-2"
onClick={() => handleOpenEditModel(index)}
title={t('settings.editModel')}
>
<Settings2 className="h-3.5 w-3.5" />
</Button>
<Button
variant="outline"
size="sm"
className="h-8 px-2 text-destructive hover:text-destructive hover:bg-destructive/10"
onClick={() => handleDeleteModel(index)}
title={t('settings.deleteModel')}
>
<Trash2 className="h-3.5 w-3.5" />
</Button>
</div>
</div>
);
})}
</div>
</div>

<Dialog open={showModelDialog} onOpenChange={setShowModelDialog}>
<DialogContent className="sm:max-w-md">
<DialogTitle>
{editingModelIndex !== null ? t('settings.editModel') : t('settings.addNewModel')}
</DialogTitle>
<DialogDescription className="sr-only">
{editingModelIndex !== null ? t('settings.editModel') : t('settings.addNewModel')}
</DialogDescription>
<div className="space-y-4 pt-2">
<div className="space-y-2">
<Label>{t('settings.modelId')}</Label>
<Input
value={modelForm.id}
onChange={(e) => setModelForm((prev) => ({ ...prev, id: e.target.value }))}
placeholder="e.g. my-custom-asr-model"
className="h-8 font-mono text-sm"
/>
</div>
<div className="space-y-2">
<Label>{t('settings.modelName')}</Label>
<Input
value={modelForm.name}
onChange={(e) => setModelForm((prev) => ({ ...prev, name: e.target.value }))}
placeholder="e.g. My Custom ASR Model"
className="h-8 text-sm"
/>
</div>
<div className="flex justify-end gap-2">
<Button variant="outline" size="sm" onClick={() => setShowModelDialog(false)}>
{t('common.cancel')}
</Button>
<Button size="sm" onClick={handleSaveModel} disabled={!modelForm.id.trim()}>
{t('common.save')}
</Button>
</div>
</div>
</DialogContent>
</Dialog>
</div>
);
}
Loading
Loading