@@ -276,6 +282,7 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) {
} else {
switch (selectedProviderId) {
case 'openai-whisper':
+ case 'lemonade-asr':
endpointPath = '/audio/transcriptions';
break;
case 'qwen-asr':
diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx
index d0e351ea8..d225f19bb 100644
--- a/components/settings/audio-settings.tsx
+++ b/components/settings/audio-settings.tsx
@@ -28,6 +28,7 @@ import azureVoicesData from '@/lib/audio/azure.json';
import { createLogger } from '@/lib/logger';
import { getVoxCPMVoiceOptions, useVoxCPMVoiceProfiles } from '@/lib/audio/voxcpm-voices';
import { normalizeVoxCPMBackend, voxCPMBackendSupportsReferenceAudio } from '@/lib/audio/voxcpm';
+import { normalizeASRUploadAudio } from '@/lib/audio/wav-utils';
const log = createLogger('AudioSettings');
@@ -44,6 +45,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
'doubao-tts': t('settings.providerDoubaoTTS'),
'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
'minimax-tts': t('settings.providerMiniMaxTTS'),
+ 'lemonade-tts': t('settings.providerLemonadeTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId];
@@ -54,6 +56,7 @@ function getASRProviderName(providerId: ASRProviderId, t: (key: string) => strin
'openai-whisper': t('settings.providerOpenAIWhisper'),
'browser-native': t('settings.providerBrowserNative'),
'qwen-asr': t('settings.providerQwenASR'),
+ 'lemonade-asr': t('settings.providerLemonadeASR'),
};
return names[providerId];
}
@@ -329,26 +332,27 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) {
mediaRecorder.onstop = async () => {
stream.getTracks().forEach((track) => track.stop());
- const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
- const formData = new FormData();
- formData.append('audio', audioBlob, 'recording.webm');
- formData.append('providerId', asrProviderId);
- formData.append('language', asrLanguage);
-
- // Only append non-empty values
- const apiKeyValue = asrProvidersConfig[asrProviderId]?.apiKey;
- if (apiKeyValue && apiKeyValue.trim()) {
- formData.append('apiKey', apiKeyValue);
- }
- const baseUrlValue =
- asrProvidersConfig[asrProviderId]?.baseUrl ||
- asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl ||
- '';
- if (baseUrlValue && baseUrlValue.trim()) {
- formData.append('baseUrl', baseUrlValue);
- }
-
try {
+ const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
+ const uploadAudio = await normalizeASRUploadAudio(asrProviderId, audioBlob);
+ const formData = new FormData();
+ formData.append('audio', uploadAudio.blob, uploadAudio.fileName);
+ formData.append('providerId', asrProviderId);
+ formData.append('language', asrLanguage);
+
+ // Only append non-empty values
+ const apiKeyValue = asrProvidersConfig[asrProviderId]?.apiKey;
+ if (apiKeyValue && apiKeyValue.trim()) {
+ formData.append('apiKey', apiKeyValue);
+ }
+ const baseUrlValue =
+ asrProvidersConfig[asrProviderId]?.baseUrl ||
+ asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl ||
+ '';
+ if (baseUrlValue && baseUrlValue.trim()) {
+ formData.append('baseUrl', baseUrlValue);
+ }
+
const response = await fetch('/api/transcription', {
method: 'POST',
body: formData,
diff --git a/components/settings/image-settings.tsx b/components/settings/image-settings.tsx
index 0931cb2b1..d0bdc619c 100644
--- a/components/settings/image-settings.tsx
+++ b/components/settings/image-settings.tsx
@@ -60,6 +60,7 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) {
[currentConfig?.customModels],
);
const isServerConfigured = !!currentConfig?.isServerConfigured;
+ const requiresApiKey = currentProvider?.requiresApiKey ?? true;
const handleApiKeyChange = (apiKey: string) => {
setImageProviderConfig(selectedProviderId, { apiKey });
@@ -179,7 +180,9 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) {
variant="outline"
size="sm"
onClick={handleTest}
- disabled={testLoading || (!currentConfig?.apiKey && !isServerConfigured)}
+ disabled={
+ testLoading || (requiresApiKey && !currentConfig?.apiKey && !isServerConfigured)
+ }
className="gap-1.5"
>
{testLoading ? (
diff --git a/components/settings/index.tsx b/components/settings/index.tsx
index 0a2ebac41..3d9f483b4 100644
--- a/components/settings/index.tsx
+++ b/components/settings/index.tsx
@@ -145,6 +145,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin
'doubao-tts': t('settings.providerDoubaoTTS'),
'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
'minimax-tts': t('settings.providerMiniMaxTTS'),
+ 'lemonade-tts': t('settings.providerLemonadeTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId] || providerId;
@@ -159,6 +160,7 @@ function getASRProviderName(providerId: ASRProviderId, t: (key: string) => strin
'openai-whisper': t('settings.providerOpenAIWhisper'),
'browser-native': t('settings.providerBrowserNative'),
'qwen-asr': t('settings.providerQwenASR'),
+ 'lemonade-asr': t('settings.providerLemonadeASR'),
};
return names[providerId] || providerId;
}
@@ -171,6 +173,7 @@ const IMAGE_PROVIDER_NAMES: Record
= {
'nano-banana': 'providerNanoBanana',
'minimax-image': 'providerMiniMaxImage',
'grok-image': 'providerGrokImage',
+ lemonade: 'providerLemonadeImage',
};
const IMAGE_PROVIDER_ICONS: Record = {
@@ -180,6 +183,7 @@ const IMAGE_PROVIDER_ICONS: Record = {
'nano-banana': '/logos/gemini.svg',
'minimax-image': '/logos/minimax.svg',
'grok-image': '/logos/grok.svg',
+ lemonade: '/logos/lemonade.svg',
};
const VIDEO_PROVIDER_NAMES: Record = {
diff --git a/components/settings/model-selector.tsx b/components/settings/model-selector.tsx
index 57df7f58b..ef0ba443a 100644
--- a/components/settings/model-selector.tsx
+++ b/components/settings/model-selector.tsx
@@ -75,7 +75,7 @@ export function ModelSelector({
([, config]) =>
(config.requiresApiKey
? config.apiKey || config.isServerConfigured
- : config.isServerConfigured || config.baseUrl) &&
+ : config.isServerConfigured || config.baseUrl || config.defaultBaseUrl) &&
config.models.length >= 1 &&
(config.baseUrl || config.defaultBaseUrl || config.serverBaseUrl),
)
diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx
index 010358aa0..ac28e2e51 100644
--- a/components/settings/tts-settings.tsx
+++ b/components/settings/tts-settings.tsx
@@ -93,6 +93,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
const requiresApiKey = isCustom
? !!providerConfig?.requiresApiKey
: !!ttsProvider?.requiresApiKey;
+ const isKeylessLocalProvider = !isCustom && !requiresApiKey && !!ttsProvider?.defaultBaseUrl;
// When testing a non-active provider, use that provider's default voice
// instead of the active provider's voice (which may be incompatible).
@@ -192,6 +193,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
switch (selectedProviderId) {
case 'openai-tts':
case 'glm-tts':
+ case 'lemonade-tts':
return '/audio/speech';
case 'azure-tts':
return '/cognitiveservices/v1';
@@ -225,7 +227,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) {
)}
{/* API Key & Base URL */}
- {(requiresApiKey || isServerConfigured || isCustom || isVoxCPM) &&
+ {(requiresApiKey || isServerConfigured || isCustom || isVoxCPM || isKeylessLocalProvider) &&
(isVoxCPM ? (
diff --git a/lib/ai/model-metadata.ts b/lib/ai/model-metadata.ts
index 425cf9b1e..3a08cc6d7 100644
--- a/lib/ai/model-metadata.ts
+++ b/lib/ai/model-metadata.ts
@@ -163,6 +163,12 @@ const hunyuanHy3Effort: ThinkingCapability = {
defaultEnabled: false,
};
+const lemonadeToggleBudget = toggleBudgetCapability(
+ 'lemonade',
+ { min: 0, max: 81920, step: 1024, disableValue: 0 },
+ false,
+);
+
const qwenBudgetEnabled = toggleBudgetCapability(
'qwen',
{ min: 0, max: 81920, step: 1024, disableValue: 0 },
@@ -328,13 +334,25 @@ const THINKING_CAPABILITIES: Record
= {
[getModelMetadataKey('xiaomi', 'mimo-v2.5-pro')]: toggleCapability('xiaomi'),
[getModelMetadataKey('xiaomi', 'mimo-v2.5')]: toggleCapability('xiaomi'),
+
+ [getModelMetadataKey('lemonade', 'Qwen3-4B-GGUF')]: lemonadeToggleBudget,
+ [getModelMetadataKey('lemonade', 'Qwen3.5-4B-GGUF')]: lemonadeToggleBudget,
+ [getModelMetadataKey('lemonade', 'gpt-oss-20b')]: lemonadeToggleBudget,
+ [getModelMetadataKey('lemonade', 'GPT-OSS-20B-GGUF')]: lemonadeToggleBudget,
};
export function getCatalogThinkingCapability(
providerId: string,
modelId: string,
): ThinkingCapability | undefined {
- return THINKING_CAPABILITIES[getModelMetadataKey(providerId, modelId)];
+ const exact = THINKING_CAPABILITIES[getModelMetadataKey(providerId, modelId)];
+ if (exact) return exact;
+
+ if (providerId === 'lemonade') {
+ return lemonadeToggleBudget;
+ }
+
+ return undefined;
}
export function applyModelMetadata(providers: Record): void {
diff --git a/lib/ai/providers.ts b/lib/ai/providers.ts
index 011089b30..348cd62c6 100644
--- a/lib/ai/providers.ts
+++ b/lib/ai/providers.ts
@@ -6,7 +6,7 @@
* - Anthropic Claude (native)
* - Google Gemini (native)
* - MiniMax (Anthropic-compatible, recommended by official)
- * - OpenAI-compatible providers (DeepSeek, Qwen, Kimi, GLM, SiliconFlow, Doubao, Tencent, Xiaomi, etc.)
+ * - OpenAI-compatible providers (DeepSeek, Qwen, Kimi, GLM, SiliconFlow, Doubao, Tencent, Xiaomi, Lemonade, etc.)
*
* Sources:
* - https://platform.openai.com/docs/models
@@ -35,7 +35,7 @@ import type {
ThinkingConfig,
} from '@/lib/types/provider';
import { applyModelMetadata, getCatalogThinkingCapability } from './model-metadata';
-import { getThinkingMode, pickThinkingBudget } from './thinking-config';
+import { getDefaultThinkingConfig, getThinkingMode, pickThinkingBudget } from './thinking-config';
import { createLogger } from '@/lib/logger';
// NOTE: Do NOT import thinking-context.ts here — it uses node:async_hooks
// which is server-only, and this file is also used on the client via
@@ -980,6 +980,37 @@ export const PROVIDERS: Record = {
},
],
},
+
+ lemonade: {
+ id: 'lemonade',
+ name: 'Lemonade',
+ type: 'openai',
+ defaultBaseUrl: 'http://localhost:13305/v1',
+ requiresApiKey: false,
+ icon: '/logos/lemonade.svg',
+ models: [
+ {
+ id: 'Qwen3.5-4B-GGUF',
+ name: 'Qwen3.5 4B GGUF',
+ capabilities: { streaming: true, tools: true, vision: true },
+ },
+ {
+ id: 'Qwen3-4B-GGUF',
+ name: 'Qwen3 4B GGUF',
+ capabilities: { streaming: true, tools: true, vision: false },
+ },
+ {
+ id: 'gpt-oss-20b',
+ name: 'GPT-OSS 20B',
+ capabilities: { streaming: true, tools: true, vision: false },
+ },
+ {
+ id: 'Gemma-4-26B-A4B-it-GGUF',
+ name: 'Gemma 4 26B A4B IT GGUF',
+ capabilities: { streaming: true, tools: true, vision: false },
+ },
+ ],
+ },
};
applyModelMetadata(PROVIDERS);
@@ -1127,6 +1158,19 @@ function getCompatThinkingBodyParams(
: undefined;
}
+ case 'lemonade': {
+ const chatTemplateKwargs: Record = {};
+ if (mode === 'enabled') {
+ chatTemplateKwargs.enable_thinking = true;
+ } else {
+ chatTemplateKwargs.enable_thinking = false;
+ }
+ if (mode === 'enabled' && budget !== undefined) {
+ chatTemplateKwargs.thinking_budget = budget;
+ }
+ return { chat_template_kwargs: chatTemplateKwargs };
+ }
+
default:
return undefined;
}
@@ -1217,12 +1261,20 @@ export function getModel(config: ModelConfig): ModelWithInfo {
const thinkingCtx = (globalThis as Record).__thinkingContext as
| { getStore?: () => unknown }
| undefined;
- const thinking = thinkingCtx?.getStore?.() as ThinkingConfig | undefined;
+ const thinkingFromContext = thinkingCtx?.getStore?.() as ThinkingConfig | undefined;
+ const thinking =
+ thinkingFromContext ??
+ (providerId === 'lemonade'
+ ? getDefaultThinkingConfig(getCatalogThinkingCapability(providerId, config.modelId))
+ : undefined);
if (thinking && init?.body && typeof init.body === 'string') {
const extra = getCompatThinkingBodyParams(providerId, config.modelId, thinking);
if (extra) {
try {
const body = JSON.parse(init.body);
+ if (providerId === 'lemonade' && 'stream_options' in body) {
+ delete body.stream_options;
+ }
Object.assign(body, extra);
init = { ...init, body: JSON.stringify(body) };
} catch {
@@ -1230,7 +1282,44 @@ export function getModel(config: ModelConfig): ModelWithInfo {
}
}
}
- return globalThis.fetch(url, init);
+ const response = await globalThis.fetch(url, init);
+
+ if (providerId !== 'lemonade') {
+ return response;
+ }
+
+ const contentType = response.headers.get('content-type') || '';
+ let isStreamingRequest = false;
+ if (init?.body && typeof init.body === 'string') {
+ try {
+ const requestBody = JSON.parse(init.body);
+ isStreamingRequest = requestBody?.stream === true;
+ } catch {
+ /* ignore request-body inspection failure */
+ }
+ }
+
+ if (isStreamingRequest) {
+ return response;
+ }
+
+ try {
+ const cloned = response.clone();
+ const text = await cloned.text();
+
+ try {
+ JSON.parse(text);
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ log.warn(
+ `[Lemonade] Invalid JSON response from OpenAI-compatible path: status=${response.status}, contentType=${contentType || 'n/a'}, bodyLen=${text.length}, first=${JSON.stringify(text.slice(0, 500))}, last=${JSON.stringify(text.slice(Math.max(0, text.length - 500)))}, parseError=${message}`,
+ );
+ }
+ } catch (error) {
+ log.warn('[Lemonade] Failed to inspect JSON response body:', error);
+ }
+
+ return response;
};
}
diff --git a/lib/audio/asr-providers.ts b/lib/audio/asr-providers.ts
index 0fec13dc0..7de04e653 100644
--- a/lib/audio/asr-providers.ts
+++ b/lib/audio/asr-providers.ts
@@ -182,6 +182,9 @@ export async function transcribeAudio(
case 'qwen-asr':
return await transcribeQwenASR(config, audioBuffer);
+ case 'lemonade-asr':
+ return await transcribeLemonadeASR(config, audioBuffer);
+
default:
if (isCustomASRProvider(config.providerId)) {
return await transcribeOpenAIWhisper(config, audioBuffer);
@@ -190,6 +193,101 @@ export async function transcribeAudio(
}
}
+/**
+ * Lemonade ASR implementation (OpenAI-compatible multipart transcription).
+ *
+ * Lemonade currently supports WAV input and JSON response format.
+ */
+async function transcribeLemonadeASR(
+ config: ASRModelConfig,
+ audioBuffer: Buffer | Blob,
+): Promise {
+ const baseUrl = (config.baseUrl || ASR_PROVIDERS['lemonade-asr'].defaultBaseUrl || '').replace(
+ /\/$/,
+ '',
+ );
+
+ const audioBlob = await toAudioBlob(audioBuffer);
+ if (!(await isWavAudio(audioBlob))) {
+ throw new Error(
+ 'Lemonade ASR currently supports WAV input only. Recordings should be converted to WAV before upload.',
+ );
+ }
+
+ const formData = new FormData();
+ formData.set('file', audioBlob, 'audio.wav');
+ formData.set('model', config.modelId || ASR_PROVIDERS['lemonade-asr'].defaultModelId);
+ formData.set('response_format', 'json');
+ if (config.language && config.language !== 'auto') {
+ formData.set('language', config.language);
+ }
+
+ const response = await fetch(`${baseUrl}/audio/transcriptions`, {
+ method: 'POST',
+ headers: getOptionalBearerAuthHeaders(config.apiKey),
+ body: formData,
+ });
+
+ if (!response.ok) {
+ const errorText = await response.text().catch(() => response.statusText);
+ if (errorText.includes('audio is empty') || errorText.includes('too short')) {
+ return { text: '' };
+ }
+ throw new Error(`Lemonade ASR API error: ${errorText || response.statusText}`);
+ }
+
+ const data = await response.json();
+ return { text: typeof data.text === 'string' ? data.text : '' };
+}
+
+async function toAudioBlob(audioBuffer: Buffer | Blob): Promise {
+ if (audioBuffer instanceof Blob) {
+ return audioBuffer;
+ }
+ if (audioBuffer instanceof Buffer) {
+ const arrayBuffer = audioBuffer.buffer.slice(
+ audioBuffer.byteOffset,
+ audioBuffer.byteOffset + audioBuffer.byteLength,
+ ) as ArrayBuffer;
+ return new Blob([arrayBuffer], { type: detectWavBuffer(audioBuffer) ? 'audio/wav' : '' });
+ }
+ throw new Error('Invalid audio buffer type');
+}
+
+async function isWavAudio(blob: Blob): Promise {
+ if (blob.type.includes('audio/wav') || blob.type.includes('audio/x-wav')) {
+ return true;
+ }
+
+ if (blob instanceof File && /\.wav$/i.test(blob.name)) {
+ return true;
+ }
+
+ const header = await blob.slice(0, 12).arrayBuffer();
+ return detectWavBytes(new Uint8Array(header));
+}
+
+function detectWavBuffer(buffer: Buffer): boolean {
+ return (
+ buffer.byteLength >= 12 &&
+ buffer.toString('ascii', 0, 4) === 'RIFF' &&
+ buffer.toString('ascii', 8, 12) === 'WAVE'
+ );
+}
+
+function detectWavBytes(bytes: Uint8Array): boolean {
+ return (
+ bytes.byteLength >= 12 &&
+ String.fromCharCode(...bytes.slice(0, 4)) === 'RIFF' &&
+ String.fromCharCode(...bytes.slice(8, 12)) === 'WAVE'
+ );
+}
+
+function getOptionalBearerAuthHeaders(apiKey?: string): Record {
+ const key = apiKey?.trim();
+ return key ? { Authorization: `Bearer ${key}` } : {};
+}
+
/**
* OpenAI Whisper implementation (using Vercel AI SDK)
*/
diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts
index 1f5167b01..52966e09d 100644
--- a/lib/audio/constants.ts
+++ b/lib/audio/constants.ts
@@ -943,6 +943,86 @@ export const TTS_PROVIDERS: Record = {
supportedFormats: ['browser'], // Browser native audio
speedRange: { min: 0.1, max: 10.0, default: 1.0 },
},
+
+ 'lemonade-tts': {
+ id: 'lemonade-tts',
+ name: 'Lemonade TTS',
+ requiresApiKey: false,
+ defaultBaseUrl: 'http://localhost:13305/v1',
+ icon: '/logos/lemonade.svg',
+ models: [{ id: 'kokoro-v1', name: 'Kokoro v1' }],
+ defaultModelId: 'kokoro-v1',
+ voices: [
+ // American English — female
+ { id: 'af_alloy', name: 'Alloy', language: 'en-US', gender: 'female' },
+ { id: 'af_aoede', name: 'Aoede', language: 'en-US', gender: 'female' },
+ { id: 'af_bella', name: 'Bella', language: 'en-US', gender: 'female' },
+ { id: 'af_heart', name: 'Heart', language: 'en-US', gender: 'female' },
+ { id: 'af_jessica', name: 'Jessica', language: 'en-US', gender: 'female' },
+ { id: 'af_kore', name: 'Kore', language: 'en-US', gender: 'female' },
+ { id: 'af_nicole', name: 'Nicole', language: 'en-US', gender: 'female' },
+ { id: 'af_nova', name: 'Nova', language: 'en-US', gender: 'female' },
+ { id: 'af_river', name: 'River', language: 'en-US', gender: 'female' },
+ { id: 'af_sarah', name: 'Sarah', language: 'en-US', gender: 'female' },
+ { id: 'af_sky', name: 'Sky', language: 'en-US', gender: 'female' },
+ // American English — male
+ { id: 'am_adam', name: 'Adam', language: 'en-US', gender: 'male' },
+ { id: 'am_echo', name: 'Echo', language: 'en-US', gender: 'male' },
+ { id: 'am_eric', name: 'Eric', language: 'en-US', gender: 'male' },
+ { id: 'am_fenrir', name: 'Fenrir', language: 'en-US', gender: 'male' },
+ { id: 'am_liam', name: 'Liam', language: 'en-US', gender: 'male' },
+ { id: 'am_michael', name: 'Michael', language: 'en-US', gender: 'male' },
+ { id: 'am_onyx', name: 'Onyx', language: 'en-US', gender: 'male' },
+ { id: 'am_puck', name: 'Puck', language: 'en-US', gender: 'male' },
+ // British English — female
+ { id: 'bf_alice', name: 'Alice', language: 'en-GB', gender: 'female' },
+ { id: 'bf_emma', name: 'Emma', language: 'en-GB', gender: 'female' },
+ { id: 'bf_isabella', name: 'Isabella', language: 'en-GB', gender: 'female' },
+ { id: 'bf_lily', name: 'Lily', language: 'en-GB', gender: 'female' },
+ // British English — male
+ { id: 'bm_daniel', name: 'Daniel', language: 'en-GB', gender: 'male' },
+ { id: 'bm_fable', name: 'Fable', language: 'en-GB', gender: 'male' },
+ { id: 'bm_george', name: 'George', language: 'en-GB', gender: 'male' },
+ { id: 'bm_lewis', name: 'Lewis', language: 'en-GB', gender: 'male' },
+ // Mandarin Chinese — female
+ { id: 'zf_xiaobei', name: '晓贝', language: 'zh-CN', gender: 'female' },
+ { id: 'zf_xiaoni', name: '晓妮', language: 'zh-CN', gender: 'female' },
+ { id: 'zf_xiaoxiao', name: '晓晓', language: 'zh-CN', gender: 'female' },
+ { id: 'zf_xiaoyi', name: '晓伊', language: 'zh-CN', gender: 'female' },
+ // Mandarin Chinese — male
+ { id: 'zm_yunjian', name: '云健', language: 'zh-CN', gender: 'male' },
+ { id: 'zm_yunxi', name: '云希', language: 'zh-CN', gender: 'male' },
+ { id: 'zm_yunxia', name: '云夏', language: 'zh-CN', gender: 'male' },
+ { id: 'zm_yunyang', name: '云扬', language: 'zh-CN', gender: 'male' },
+ // Japanese — female
+ { id: 'jf_alpha', name: 'Alpha', language: 'ja-JP', gender: 'female' },
+ { id: 'jf_gongitsune', name: 'Gongitsune', language: 'ja-JP', gender: 'female' },
+ { id: 'jf_nezumi', name: 'Nezumi', language: 'ja-JP', gender: 'female' },
+ { id: 'jf_tebukuro', name: 'Tebukuro', language: 'ja-JP', gender: 'female' },
+ // Japanese — male
+ { id: 'jm_kumo', name: 'Kumo', language: 'ja-JP', gender: 'male' },
+ // Spanish
+ { id: 'ef_dora', name: 'Dora', language: 'es-ES', gender: 'female' },
+ { id: 'em_alex', name: 'Alex', language: 'es-ES', gender: 'male' },
+ { id: 'em_santa', name: 'Santa', language: 'es-ES', gender: 'male' },
+ // French
+ { id: 'ff_siwis', name: 'Siwis', language: 'fr-FR', gender: 'female' },
+ // Hindi
+ { id: 'hf_alpha', name: 'Alpha', language: 'hi-IN', gender: 'female' },
+ { id: 'hf_beta', name: 'Beta', language: 'hi-IN', gender: 'female' },
+ { id: 'hm_omega', name: 'Omega', language: 'hi-IN', gender: 'male' },
+ { id: 'hm_psi', name: 'Psi', language: 'hi-IN', gender: 'male' },
+ // Italian
+ { id: 'if_sara', name: 'Sara', language: 'it-IT', gender: 'female' },
+ { id: 'im_nicola', name: 'Nicola', language: 'it-IT', gender: 'male' },
+ // Brazilian Portuguese
+ { id: 'pf_dora', name: 'Dora', language: 'pt-BR', gender: 'female' },
+ { id: 'pm_alex', name: 'Alex', language: 'pt-BR', gender: 'male' },
+ { id: 'pm_santa', name: 'Santa', language: 'pt-BR', gender: 'male' },
+ ],
+ supportedFormats: ['wav'],
+ speedRange: { min: 0.25, max: 4.0, default: 1.0 },
+ },
};
/**
@@ -1142,6 +1222,25 @@ export const ASR_PROVIDERS: Record = {
],
supportedFormats: ['webm'], // MediaRecorder format
},
+
+ 'lemonade-asr': {
+ id: 'lemonade-asr',
+ name: 'Lemonade ASR',
+ requiresApiKey: false,
+ defaultBaseUrl: 'http://localhost:13305/v1',
+ icon: '/logos/lemonade.svg',
+ models: [
+ { id: 'Whisper-Base', name: 'Whisper Base' },
+ { id: 'Whisper-Large-v3', name: 'Whisper Large v3' },
+ { id: 'Whisper-Large-v3-Turbo', name: 'Whisper Large v3 Turbo' },
+ { id: 'Whisper-Medium', name: 'Whisper Medium' },
+ { id: 'Whisper-Small', name: 'Whisper Small' },
+ { id: 'Whisper-Tiny', name: 'Whisper Tiny' },
+ ],
+ defaultModelId: 'Whisper-Base',
+ supportedLanguages: CUSTOM_ASR_DEFAULT_LANGUAGES,
+ supportedFormats: ['wav'],
+ },
};
/**
@@ -1157,6 +1256,7 @@ export const DEFAULT_TTS_VOICES: Record = {
'doubao-tts': 'zh_female_vv_uranus_bigtts',
'elevenlabs-tts': 'EXAVITQu4vr4xnSDxMaL',
'minimax-tts': 'female-yujie',
+ 'lemonade-tts': 'af_heart',
'browser-native-tts': 'default',
};
@@ -1169,6 +1269,7 @@ export const DEFAULT_TTS_MODELS: Record = {
'doubao-tts': '',
'elevenlabs-tts': 'eleven_multilingual_v2',
'minimax-tts': 'speech-2.8-hd',
+ 'lemonade-tts': 'kokoro-v1',
'browser-native-tts': '',
};
diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts
index 3019e940f..788e29d19 100644
--- a/lib/audio/tts-providers.ts
+++ b/lib/audio/tts-providers.ts
@@ -164,6 +164,9 @@ export async function generateTTS(
case 'elevenlabs-tts':
return await generateElevenLabsTTS(config, text);
+ case 'lemonade-tts':
+ return await generateLemonadeTTS(config, text);
+
case 'browser-native-tts':
throw new Error(
'Browser Native TTS must be handled client-side using Web Speech API. This provider cannot be used on the server.',
@@ -215,6 +218,47 @@ async function generateOpenAITTS(
};
}
+/**
+ * Lemonade TTS implementation (OpenAI-compatible /v1/audio/speech).
+ */
+async function generateLemonadeTTS(
+ config: TTSModelConfig,
+ text: string,
+): Promise {
+ const baseUrl = (config.baseUrl || TTS_PROVIDERS['lemonade-tts'].defaultBaseUrl || '').replace(
+ /\/$/,
+ '',
+ );
+ const modelId = config.modelId || TTS_PROVIDERS['lemonade-tts'].defaultModelId;
+ const voice = config.voice || 'af_heart';
+
+ const response = await fetch(`${baseUrl}/audio/speech`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json; charset=utf-8',
+ ...getBackendAuthHeaders(config.apiKey),
+ },
+ body: JSON.stringify({
+ model: modelId,
+ input: text,
+ voice,
+ speed: config.speed || 1.0,
+ response_format: config.format || 'wav',
+ }),
+ });
+
+ if (!response.ok) {
+ throw new Error(`Lemonade TTS API error: ${await readTTSApiError(response)}`);
+ }
+
+ const arrayBuffer = await response.arrayBuffer();
+ const contentType = response.headers.get('content-type') || '';
+ return {
+ audio: new Uint8Array(arrayBuffer),
+ format: getAudioResponseFormat(contentType),
+ };
+}
+
/**
* VoxCPM2 TTS implementation.
*
diff --git a/lib/audio/types.ts b/lib/audio/types.ts
index 5e4cde562..d57960692 100644
--- a/lib/audio/types.ts
+++ b/lib/audio/types.ts
@@ -87,6 +87,7 @@ export type BuiltInTTSProviderId =
| 'doubao-tts'
| 'elevenlabs-tts'
| 'minimax-tts'
+ | 'lemonade-tts'
| 'browser-native-tts';
export type TTSProviderId = BuiltInTTSProviderId | `custom-tts-${string}`;
@@ -151,7 +152,11 @@ export interface TTSModelConfig {
* Add new ASR providers here as union members.
* Keep in sync with ASR_PROVIDERS registry in constants.ts
*/
-export type BuiltInASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr';
+export type BuiltInASRProviderId =
+ | 'openai-whisper'
+ | 'browser-native'
+ | 'qwen-asr'
+ | 'lemonade-asr';
export type ASRProviderId = BuiltInASRProviderId | `custom-asr-${string}`;
diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts
index ca3c42932..c6051eaa1 100644
--- a/lib/audio/voice-resolver.ts
+++ b/lib/audio/voice-resolver.ts
@@ -130,6 +130,13 @@ export function getAvailableProvidersWithVoices(
const providerConfig = ttsProvidersConfig[providerId];
const hasApiKey = providerConfig?.apiKey && providerConfig.apiKey.trim().length > 0;
const isServerConfigured = providerConfig?.isServerConfigured === true;
+ const isKeylessLocalProvider =
+ !config.requiresApiKey &&
+ !!(
+ providerConfig?.serverBaseUrl?.trim() ||
+ providerConfig?.baseUrl?.trim() ||
+ config.defaultBaseUrl
+ );
const isLocalVoxCPM =
providerId === VOXCPM_TTS_PROVIDER_ID &&
!!(providerConfig?.serverBaseUrl?.trim() || providerConfig?.baseUrl?.trim());
@@ -141,7 +148,7 @@ export function getAvailableProvidersWithVoices(
})
: [];
- if (hasApiKey || isServerConfigured || isLocalVoxCPM) {
+ if (hasApiKey || isServerConfigured || isLocalVoxCPM || isKeylessLocalProvider) {
const allVoices = [
...config.voices.map((v) => ({
id: v.id,
diff --git a/lib/audio/wav-utils.ts b/lib/audio/wav-utils.ts
new file mode 100644
index 000000000..1a81a37f9
--- /dev/null
+++ b/lib/audio/wav-utils.ts
@@ -0,0 +1,84 @@
+'use client';
+
+function writeAscii(view: DataView, offset: number, value: string): void {
+ for (let i = 0; i < value.length; i++) {
+ view.setUint8(offset + i, value.charCodeAt(i));
+ }
+}
+
+function audioBufferToMonoWav(audioBuffer: AudioBuffer): ArrayBuffer {
+ const sampleRate = audioBuffer.sampleRate;
+ const sampleCount = audioBuffer.length;
+ const dataSize = sampleCount * 2;
+ const buffer = new ArrayBuffer(44 + dataSize);
+ const view = new DataView(buffer);
+
+ writeAscii(view, 0, 'RIFF');
+ view.setUint32(4, 36 + dataSize, true);
+ writeAscii(view, 8, 'WAVE');
+ writeAscii(view, 12, 'fmt ');
+ view.setUint32(16, 16, true);
+ view.setUint16(20, 1, true);
+ view.setUint16(22, 1, true);
+ view.setUint32(24, sampleRate, true);
+ view.setUint32(28, sampleRate * 2, true);
+ view.setUint16(32, 2, true);
+ view.setUint16(34, 16, true);
+ writeAscii(view, 36, 'data');
+ view.setUint32(40, dataSize, true);
+
+ const channels = Array.from({ length: audioBuffer.numberOfChannels }, (_, index) =>
+ audioBuffer.getChannelData(index),
+ );
+ let offset = 44;
+ for (let i = 0; i < sampleCount; i++) {
+ let mixed = 0;
+ for (const channel of channels) mixed += channel[i];
+ const sample = Math.max(-1, Math.min(1, mixed / channels.length));
+ view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true);
+ offset += 2;
+ }
+
+ return buffer;
+}
+
+export function isWavBlob(blob: Blob, fileName?: string): boolean {
+ return (
+ blob.type.includes('audio/wav') ||
+ blob.type.includes('audio/x-wav') ||
+ /\.wav$/i.test(fileName || '')
+ );
+}
+
+export async function audioBlobToWav(blob: Blob): Promise {
+ if (isWavBlob(blob)) return blob;
+ if (typeof window === 'undefined') {
+ throw new Error('Audio conversion requires a browser environment');
+ }
+
+ const AudioContextConstructor =
+ window.AudioContext ||
+ (window as typeof window & { webkitAudioContext?: typeof AudioContext }).webkitAudioContext;
+ if (!AudioContextConstructor) {
+ throw new Error('This browser does not support audio conversion');
+ }
+
+ const audioContext = new AudioContextConstructor();
+ try {
+ const arrayBuffer = await blob.arrayBuffer();
+ const audioBuffer = await audioContext.decodeAudioData(arrayBuffer.slice(0));
+ return new Blob([audioBufferToMonoWav(audioBuffer)], { type: 'audio/wav' });
+ } finally {
+ await audioContext.close().catch(() => undefined);
+ }
+}
+
+export async function normalizeASRUploadAudio(
+ providerId: string,
+ audioBlob: Blob,
+): Promise<{ blob: Blob; fileName: string }> {
+ if (providerId !== 'lemonade-asr') {
+ return { blob: audioBlob, fileName: 'recording.webm' };
+ }
+ return { blob: await audioBlobToWav(audioBlob), fileName: 'recording.wav' };
+}
diff --git a/lib/generation/json-repair.ts b/lib/generation/json-repair.ts
index 89f7fa0b4..a754c00fe 100644
--- a/lib/generation/json-repair.ts
+++ b/lib/generation/json-repair.ts
@@ -6,6 +6,32 @@ import { jsonrepair } from 'jsonrepair';
import { createLogger } from '@/lib/logger';
const log = createLogger('Generation');
+function repairQuotedPropertyFragments(jsonStr: string): string {
+ return jsonStr.replace(
+ /([,{]\s*)"([A-Za-z_][A-Za-z0-9_]*)\s*:\s*(true|false|null|[+-]?\d+(?:\.\d+)?)"(?=\s*[,}])/g,
+ (_match, prefix, key, value) => `${prefix}"${key}": ${value}`,
+ );
+}
+
+function logJsonParseError(stage: string, jsonStr: string, error: unknown): void {
+ const message = error instanceof Error ? error.message : String(error);
+ const positionMatch = message.match(/position\s+(\d+)/i);
+ const position = positionMatch ? Number(positionMatch[1]) : undefined;
+
+ if (typeof position === 'number' && Number.isFinite(position)) {
+ const start = Math.max(0, position - 120);
+ const end = Math.min(jsonStr.length, position + 120);
+ log.warn(
+ `${stage} parse error at position ${position}: ${message}. Context: ${jsonStr
+ .slice(start, end)
+ .replace(/\n/g, '\\n')}`,
+ );
+ return;
+ }
+
+ log.warn(`${stage} parse error: ${message}`);
+}
+
export function parseJsonResponse(response: string): T | null {
// Strategy 1: Try to extract JSON from markdown code blocks (may have multiple)
const codeBlockMatches = response.matchAll(/```(?:json)?\s*([\s\S]*?)```/g);
@@ -90,6 +116,10 @@ export function parseJsonResponse(response: string): T | null {
log.error('Failed to parse JSON from response');
log.error('Raw response (first 500 chars):', response.substring(0, 500));
+ log.error(
+ 'Raw response (last 500 chars):',
+ response.substring(Math.max(0, response.length - 500)),
+ );
return null;
}
@@ -101,7 +131,8 @@ export function tryParseJson(jsonStr: string): T | null {
// Attempt 1: Try parsing as-is
try {
return JSON.parse(jsonStr) as T;
- } catch {
+ } catch (error) {
+ logJsonParseError('Attempt 1', jsonStr, error);
// Continue to fix attempts
}
@@ -109,6 +140,13 @@ export function tryParseJson(jsonStr: string): T | null {
try {
let fixed = jsonStr;
+ // Fix 0: Recover malformed property fragments that were accidentally
+ // emitted as standalone strings inside an object, such as:
+ // `"height: 76"` -> `"height": 76`
+ // `"fixedRatio: false"` -> `"fixedRatio": false`
+ // The object-context prefix/suffix guards keep valid JSON strings intact.
+ fixed = repairQuotedPropertyFragments(fixed);
+
// Fix 1: Handle LaTeX-style escapes that break JSON (e.g., \frac, \left, \right, \times, etc.)
// These are common in math content and need to be double-escaped
// Match backslash followed by letters (LaTeX commands) inside strings,
@@ -152,7 +190,8 @@ export function tryParseJson(jsonStr: string): T | null {
}
return JSON.parse(fixed) as T;
- } catch {
+ } catch (error) {
+ logJsonParseError('Attempt 2', jsonStr, error);
// Continue to next attempt
}
@@ -160,7 +199,8 @@ export function tryParseJson(jsonStr: string): T | null {
try {
const repaired = jsonrepair(jsonStr);
return JSON.parse(repaired) as T;
- } catch {
+ } catch (error) {
+ logJsonParseError('Attempt 3', jsonStr, error);
// Continue to next attempt
}
@@ -183,7 +223,8 @@ export function tryParseJson(jsonStr: string): T | null {
});
return JSON.parse(fixed) as T;
- } catch {
+ } catch (error) {
+ logJsonParseError('Attempt 4', jsonStr, error);
return null;
}
}
diff --git a/lib/hooks/use-audio-recorder.ts b/lib/hooks/use-audio-recorder.ts
index 327985bb2..a19286e34 100644
--- a/lib/hooks/use-audio-recorder.ts
+++ b/lib/hooks/use-audio-recorder.ts
@@ -1,5 +1,6 @@
import { useState, useRef, useCallback } from 'react';
import { ASR_PROVIDERS } from '@/lib/audio/constants';
+import { normalizeASRUploadAudio } from '@/lib/audio/wav-utils';
import { createLogger } from '@/lib/logger';
const log = createLogger('AudioRecorder');
@@ -41,13 +42,14 @@ export function useAudioRecorder(options: UseAudioRecorderOptions = {}) {
try {
const formData = new FormData();
- formData.append('audio', audioBlob, 'recording.webm');
// Get current ASR configuration from settings store
// Note: This requires importing useSettingsStore in browser context
if (typeof window !== 'undefined') {
const { useSettingsStore } = await import('@/lib/store/settings');
const { asrProviderId, asrLanguage, asrProvidersConfig } = useSettingsStore.getState();
+ const uploadAudio = await normalizeASRUploadAudio(asrProviderId, audioBlob);
+ formData.append('audio', uploadAudio.blob, uploadAudio.fileName);
formData.append('providerId', asrProviderId);
formData.append(
@@ -68,6 +70,8 @@ export function useAudioRecorder(options: UseAudioRecorderOptions = {}) {
if (effectiveBaseUrl) {
formData.append('baseUrl', effectiveBaseUrl);
}
+ } else {
+ formData.append('audio', audioBlob, 'recording.webm');
}
const response = await fetch('/api/transcription', {
diff --git a/lib/i18n/locales/ar-SA.json b/lib/i18n/locales/ar-SA.json
index 5c178050c..92c3aacc5 100644
--- a/lib/i18n/locales/ar-SA.json
+++ b/lib/i18n/locales/ar-SA.json
@@ -454,6 +454,7 @@
"grok": "Grok",
"tencent-hunyuan": "Tencent Hunyuan",
"xiaomi": "Xiaomi MiMo",
+ "lemonade": "Lemonade (محلي)",
"ollama": "Ollama (محلي)",
"tavily": "Tavily",
"bocha": "Bocha"
@@ -603,6 +604,7 @@
"providerDoubaoTTS": "Doubao TTS 2.0 (فولكينجين)",
"providerElevenLabsTTS": "ElevenLabs TTS",
"providerMiniMaxTTS": "MiniMax TTS",
+ "providerLemonadeTTS": "Lemonade TTS (محلي)",
"providerBrowserNativeTTS": "تحويل النص إلى كلام المدمج في المتصفح",
"voxcpmBackend": "الخلفية",
"voxcpmBaseUrlPending": "أدخل Base URL لإنشاء عنوان الطلب",
@@ -649,6 +651,7 @@
"providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)",
"providerBrowserNative": "التعرّف على الكلام المدمج في المتصفح",
"providerQwenASR": "Qwen ASR (سحابة علي بابا بايليان)",
+ "providerLemonadeASR": "Lemonade ASR (محلي)",
"providerUnpdf": "unpdf (مُدمج)",
"providerMinerU": "MinerU",
"providerMinerUCloud": "MinerU (السحابي)",
@@ -900,6 +903,7 @@
"providerNanoBanana": "Nano Banana (Gemini)",
"providerMiniMaxImage": "MiniMax Image",
"providerGrokImage": "Grok Image (xAI)",
+ "providerLemonadeImage": "Lemonade Image (محلي)",
"testImageGeneration": "اختبار توليد الصور",
"testImageConnectivity": "اختبار الاتصال",
"imageConnectivitySuccess": "تم الاتصال بخدمة الصور بنجاح",
diff --git a/lib/i18n/locales/en-US.json b/lib/i18n/locales/en-US.json
index a36b0f15c..19b7282f2 100644
--- a/lib/i18n/locales/en-US.json
+++ b/lib/i18n/locales/en-US.json
@@ -454,6 +454,7 @@
"grok": "Grok",
"tencent-hunyuan": "Tencent Hunyuan",
"xiaomi": "Xiaomi MiMo",
+ "lemonade": "Lemonade (Local)",
"ollama": "Ollama (Local)",
"tavily": "Tavily",
"bocha": "Bocha"
@@ -603,6 +604,7 @@
"providerDoubaoTTS": "Doubao TTS 2.0 (Volcengine)",
"providerElevenLabsTTS": "ElevenLabs TTS",
"providerMiniMaxTTS": "MiniMax TTS",
+ "providerLemonadeTTS": "Lemonade TTS (Local)",
"providerBrowserNativeTTS": "Browser Native TTS",
"voxcpmBackend": "Backend",
"voxcpmBaseUrlPending": "Enter a Base URL to generate the request URL",
@@ -649,6 +651,7 @@
"providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)",
"providerBrowserNative": "Browser Native ASR",
"providerQwenASR": "Qwen ASR (Alibaba Cloud Bailian)",
+ "providerLemonadeASR": "Lemonade ASR (Local)",
"providerUnpdf": "unpdf (Built-in)",
"providerMinerU": "MinerU",
"providerMinerUCloud": "MinerU (Cloud)",
@@ -900,6 +903,7 @@
"providerNanoBanana": "Nano Banana (Gemini)",
"providerMiniMaxImage": "MiniMax Image",
"providerGrokImage": "Grok Image (xAI)",
+ "providerLemonadeImage": "Lemonade Image (Local)",
"testImageGeneration": "Test Image Generation",
"testImageConnectivity": "Test Connection",
"imageConnectivitySuccess": "Image service connected successfully",
diff --git a/lib/i18n/locales/ja-JP.json b/lib/i18n/locales/ja-JP.json
index d59022bb1..bd26eac12 100644
--- a/lib/i18n/locales/ja-JP.json
+++ b/lib/i18n/locales/ja-JP.json
@@ -454,6 +454,7 @@
"grok": "Grok",
"tencent-hunyuan": "Tencent Hunyuan",
"xiaomi": "Xiaomi MiMo",
+ "lemonade": "Lemonade(ローカル)",
"ollama": "Ollama(ローカルモデル)",
"tavily": "Tavily",
"bocha": "Bocha"
@@ -603,6 +604,7 @@
"providerDoubaoTTS": "Doubao TTS 2.0(火山エンジン)",
"providerElevenLabsTTS": "ElevenLabs TTS",
"providerMiniMaxTTS": "MiniMax TTS",
+ "providerLemonadeTTS": "Lemonade TTS(ローカル)",
"providerBrowserNativeTTS": "ブラウザネイティブTTS",
"voxcpmBackend": "バックエンド",
"voxcpmBaseUrlPending": "Base URL を入力するとリクエスト URL が生成されます",
@@ -649,6 +651,7 @@
"providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)",
"providerBrowserNative": "ブラウザネイティブASR",
"providerQwenASR": "Qwen ASR(Alibaba Cloud百錬)",
+ "providerLemonadeASR": "Lemonade ASR(ローカル)",
"providerUnpdf": "unpdf(組み込み)",
"providerMinerU": "MinerU",
"providerMinerUCloud": "MinerU(クラウド)",
@@ -900,6 +903,7 @@
"providerNanoBanana": "Nano Banana(Gemini)",
"providerMiniMaxImage": "MiniMax Image",
"providerGrokImage": "Grok Image(xAI)",
+ "providerLemonadeImage": "Lemonade Image(ローカル)",
"testImageGeneration": "画像生成をテスト",
"testImageConnectivity": "接続テスト",
"imageConnectivitySuccess": "画像サービスへの接続に成功しました",
diff --git a/lib/i18n/locales/ru-RU.json b/lib/i18n/locales/ru-RU.json
index c4be26124..fb5eca058 100644
--- a/lib/i18n/locales/ru-RU.json
+++ b/lib/i18n/locales/ru-RU.json
@@ -454,6 +454,7 @@
"grok": "Grok",
"tencent-hunyuan": "Tencent Hunyuan",
"xiaomi": "Xiaomi MiMo",
+ "lemonade": "Lemonade (Локальный)",
"ollama": "Ollama (Локальный)",
"tavily": "Tavily",
"bocha": "Bocha"
@@ -603,6 +604,7 @@
"providerDoubaoTTS": "Doubao TTS 2.0 (Volcengine)",
"providerElevenLabsTTS": "ElevenLabs TTS",
"providerMiniMaxTTS": "MiniMax TTS",
+ "providerLemonadeTTS": "Lemonade TTS (Локальный)",
"providerBrowserNativeTTS": "Встроенный TTS браузера",
"voxcpmBackend": "Бэкенд",
"voxcpmBaseUrlPending": "Введите Base URL, чтобы сформировать URL запроса",
@@ -649,6 +651,7 @@
"providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)",
"providerBrowserNative": "Встроенный ASR браузера",
"providerQwenASR": "Qwen ASR (Alibaba Cloud Bailian)",
+ "providerLemonadeASR": "Lemonade ASR (Локальный)",
"providerUnpdf": "unpdf (встроенный)",
"providerMinerU": "MinerU",
"providerMinerUCloud": "MinerU (Облако)",
@@ -900,6 +903,7 @@
"providerNanoBanana": "Nano Banana (Gemini)",
"providerMiniMaxImage": "MiniMax Image",
"providerGrokImage": "Grok Image (xAI)",
+ "providerLemonadeImage": "Lemonade Image (Локальный)",
"testImageGeneration": "Тест генерации изображений",
"testImageConnectivity": "Тест подключения",
"imageConnectivitySuccess": "Подключение к сервису изображений успешно",
diff --git a/lib/i18n/locales/zh-CN.json b/lib/i18n/locales/zh-CN.json
index 79892a759..84471f846 100644
--- a/lib/i18n/locales/zh-CN.json
+++ b/lib/i18n/locales/zh-CN.json
@@ -454,6 +454,7 @@
"grok": "Grok",
"tencent-hunyuan": "腾讯混元",
"xiaomi": "小米 MiMo",
+ "lemonade": "Lemonade(本地)",
"ollama": "Ollama(本地模型)",
"tavily": "Tavily",
"bocha": "博查"
@@ -603,6 +604,7 @@
"providerDoubaoTTS": "豆包 TTS 2.0(火山引擎)",
"providerElevenLabsTTS": "ElevenLabs TTS",
"providerMiniMaxTTS": "MiniMax TTS",
+ "providerLemonadeTTS": "Lemonade TTS(本地)",
"providerBrowserNativeTTS": "浏览器原生 TTS",
"voxcpmBackend": "Backend",
"voxcpmBaseUrlPending": "填写 Base URL 后生成",
@@ -649,6 +651,7 @@
"providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)",
"providerBrowserNative": "浏览器原生 ASR",
"providerQwenASR": "Qwen ASR(阿里云百炼)",
+ "providerLemonadeASR": "Lemonade ASR(本地)",
"providerUnpdf": "unpdf(内置)",
"providerMinerU": "MinerU",
"providerMinerUCloud": "MinerU(云端)",
@@ -900,6 +903,7 @@
"providerNanoBanana": "Nano Banana(Gemini)",
"providerMiniMaxImage": "MiniMax 图像",
"providerGrokImage": "Grok Image(xAI)",
+ "providerLemonadeImage": "Lemonade 图像(本地)",
"testImageGeneration": "测试图像生成",
"testImageConnectivity": "测试连接",
"imageConnectivitySuccess": "图像服务连接成功",
diff --git a/lib/i18n/locales/zh-TW.json b/lib/i18n/locales/zh-TW.json
index 9ca932ad3..d7b263e53 100644
--- a/lib/i18n/locales/zh-TW.json
+++ b/lib/i18n/locales/zh-TW.json
@@ -440,6 +440,7 @@
"openrouter": "OpenRouter",
"tencent-hunyuan": "騰訊混元",
"xiaomi": "小米 MiMo",
+ "lemonade": "Lemonade(本機)",
"tavily": "Tavily",
"bocha": "Bocha"
},
@@ -583,9 +584,11 @@
"providerDoubaoTTS": "豆包 TTS 2.0(火山引擎)",
"providerElevenLabsTTS": "ElevenLabs TTS",
"providerMiniMaxTTS": "MiniMax TTS",
+ "providerLemonadeTTS": "Lemonade TTS(本機)",
"providerBrowserNativeTTS": "瀏覽器原生 TTS",
"providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)",
"providerBrowserNative": "瀏覽器原生 ASR",
+ "providerLemonadeASR": "Lemonade ASR(本機)",
"providerQwenASR": "Qwen ASR(阿里雲百煉)",
"providerUnpdf": "unpdf(內建)",
"providerMinerU": "MinerU",
@@ -814,6 +817,7 @@
"providerQwenImage": "Qwen Image(阿里通義)",
"providerNanoBanana": "Nano Banana(Gemini)",
"providerMiniMaxImage": "MiniMax 圖像",
+ "providerLemonadeImage": "Lemonade 圖像(本機)",
"providerGrokImage": "Grok Image(xAI)",
"testImageGeneration": "測試圖像生成",
"testImageConnectivity": "測試連線",
diff --git a/lib/media/adapters/lemonade-image-adapter.ts b/lib/media/adapters/lemonade-image-adapter.ts
new file mode 100644
index 000000000..82d9ed83b
--- /dev/null
+++ b/lib/media/adapters/lemonade-image-adapter.ts
@@ -0,0 +1,90 @@
+/**
+ * Lemonade Image Generation Adapter
+ *
+ * Lemonade exposes OpenAI-compatible image generation at /v1/images/generations.
+ */
+
+import type {
+ ImageGenerationConfig,
+ ImageGenerationOptions,
+ ImageGenerationResult,
+} from '../types';
+
+const DEFAULT_MODEL = 'Qwen-Image-GGUF';
+const DEFAULT_BASE_URL = 'http://localhost:13305/v1';
+
+function normalizeBaseUrl(baseUrl?: string): string {
+ return (baseUrl || DEFAULT_BASE_URL).replace(/\/$/, '');
+}
+
+function authHeaders(apiKey?: string): Record {
+ const key = apiKey?.trim();
+ return key ? { Authorization: `Bearer ${key}` } : {};
+}
+
+function resolveSize(options: ImageGenerationOptions): string {
+ return `${options.width || 1024}x${options.height || 1024}`;
+}
+
+export async function testLemonadeImageConnectivity(
+ config: ImageGenerationConfig,
+): Promise<{ success: boolean; message: string }> {
+ const baseUrl = normalizeBaseUrl(config.baseUrl);
+
+ try {
+ const response = await fetch(`${baseUrl}/models`, {
+ headers: authHeaders(config.apiKey),
+ });
+
+ if (response.ok) {
+ return { success: true, message: 'Connected to Lemonade image generation' };
+ }
+
+ const text = await response.text().catch(() => response.statusText);
+ return { success: false, message: `Lemonade API error (${response.status}): ${text}` };
+ } catch (err) {
+ return { success: false, message: `Lemonade connectivity error: ${err}` };
+ }
+}
+
+export async function generateWithLemonadeImage(
+ config: ImageGenerationConfig,
+ options: ImageGenerationOptions,
+): Promise {
+ const baseUrl = normalizeBaseUrl(config.baseUrl);
+ const width = options.width || 1024;
+ const height = options.height || 1024;
+
+ const response = await fetch(`${baseUrl}/images/generations`, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ ...authHeaders(config.apiKey),
+ },
+ body: JSON.stringify({
+ model: config.model || DEFAULT_MODEL,
+ prompt: options.prompt,
+ n: 1,
+ size: resolveSize(options),
+ response_format: 'b64_json',
+ }),
+ });
+
+ if (!response.ok) {
+ const text = await response.text().catch(() => response.statusText);
+ throw new Error(`Lemonade image generation failed (${response.status}): ${text}`);
+ }
+
+ const data = await response.json();
+ const imageData = data.data?.[0];
+ if (!imageData?.url && !imageData?.b64_json) {
+ throw new Error('Lemonade returned empty image response');
+ }
+
+ return {
+ url: imageData.url,
+ base64: imageData.b64_json,
+ width,
+ height,
+ };
+}
diff --git a/lib/media/image-providers.ts b/lib/media/image-providers.ts
index f2102df27..6a8ea817f 100644
--- a/lib/media/image-providers.ts
+++ b/lib/media/image-providers.ts
@@ -21,6 +21,10 @@ import {
testMiniMaxImageConnectivity,
} from './adapters/minimax-image-adapter';
import { generateWithGrokImage, testGrokImageConnectivity } from './adapters/grok-image-adapter';
+import {
+ generateWithLemonadeImage,
+ testLemonadeImageConnectivity,
+} from './adapters/lemonade-image-adapter';
export const IMAGE_PROVIDERS: Record = {
seedream: {
@@ -116,6 +120,19 @@ export const IMAGE_PROVIDERS: Record = {
],
supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'],
},
+ lemonade: {
+ id: 'lemonade',
+ name: 'Lemonade',
+ requiresApiKey: false,
+ defaultBaseUrl: 'http://localhost:13305/v1',
+ icon: '/logos/lemonade.svg',
+ models: [
+ { id: 'Qwen-Image-GGUF', name: 'Qwen Image GGUF' },
+ { id: 'sd-cpp', name: 'Stable Diffusion (sd-cpp)' },
+ ],
+ supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'],
+ maxResolution: { width: 1024, height: 1024 },
+ },
};
export async function testImageConnectivity(
@@ -134,6 +151,8 @@ export async function testImageConnectivity(
return testMiniMaxImageConnectivity(config);
case 'grok-image':
return testGrokImageConnectivity(config);
+ case 'lemonade':
+ return testLemonadeImageConnectivity(config);
default:
return {
success: false,
@@ -159,6 +178,8 @@ export async function generateImage(
return generateWithMiniMaxImage(config, options);
case 'grok-image':
return generateWithGrokImage(config, options);
+ case 'lemonade':
+ return generateWithLemonadeImage(config, options);
default:
throw new Error(`Unsupported image provider: ${config.providerId}`);
}
diff --git a/lib/media/types.ts b/lib/media/types.ts
index 8a2f8e5f2..47c80ac93 100644
--- a/lib/media/types.ts
+++ b/lib/media/types.ts
@@ -76,7 +76,8 @@ export type ImageProviderId =
| 'qwen-image'
| 'nano-banana'
| 'minimax-image'
- | 'grok-image';
+ | 'grok-image'
+ | 'lemonade';
// Add new image providers below (uncomment and modify):
// | 'dall-e'
// | 'midjourney'
diff --git a/lib/server/classroom-media-generation.ts b/lib/server/classroom-media-generation.ts
index bd307d150..7848c9af2 100644
--- a/lib/server/classroom-media-generation.ts
+++ b/lib/server/classroom-media-generation.ts
@@ -95,11 +95,11 @@ export async function generateMediaForClassroom(
try {
const providerId = imageProviderIds[0] as ImageProviderId;
const apiKey = resolveImageApiKey(providerId);
- if (!apiKey) {
+ const providerConfig = IMAGE_PROVIDERS[providerId];
+ if (providerConfig?.requiresApiKey && !apiKey) {
log.warn(`No API key for image provider "${providerId}", skipping ${req.elementId}`);
continue;
}
- const providerConfig = IMAGE_PROVIDERS[providerId];
const model = providerConfig?.models?.[0]?.id;
const result = await generateImage(
@@ -221,16 +221,14 @@ export async function generateTTSForClassroom(
const providerId = ttsProviderIds[0] as TTSProviderId;
const apiKey = resolveTTSApiKey(providerId);
- if (!apiKey) {
+ const ttsProvider = TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS];
+ if (ttsProvider?.requiresApiKey && !apiKey) {
log.warn(`No API key for TTS provider "${providerId}", skipping TTS generation`);
return;
}
- const ttsBaseUrl =
- resolveTTSBaseUrl(providerId) ||
- TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.defaultBaseUrl;
+ const ttsBaseUrl = resolveTTSBaseUrl(providerId) || ttsProvider?.defaultBaseUrl;
const voice = DEFAULT_TTS_VOICES[providerId as keyof typeof DEFAULT_TTS_VOICES] || 'default';
- const format =
- TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.supportedFormats?.[0] || 'mp3';
+ const format = ttsProvider?.supportedFormats?.[0] || 'mp3';
if (providerId === VOXCPM_TTS_PROVIDER_ID && voice === VOXCPM_AUTO_VOICE_ID) {
log.warn('VoxCPM Auto Voice requires agent context; skipping server-side TTS generation');
return;
diff --git a/lib/server/provider-config.ts b/lib/server/provider-config.ts
index ff1129909..81f758233 100644
--- a/lib/server/provider-config.ts
+++ b/lib/server/provider-config.ts
@@ -55,6 +55,7 @@ const LLM_ENV_MAP: Record = {
XIAOMI: 'xiaomi',
MIMO: 'xiaomi',
OLLAMA: 'ollama',
+ LEMONADE: 'lemonade',
};
const TTS_ENV_MAP: Record = {
@@ -66,11 +67,13 @@ const TTS_ENV_MAP: Record = {
TTS_DOUBAO: 'doubao-tts',
TTS_ELEVENLABS: 'elevenlabs-tts',
TTS_MINIMAX: 'minimax-tts',
+ TTS_LEMONADE: 'lemonade-tts',
};
const ASR_ENV_MAP: Record = {
ASR_OPENAI: 'openai-whisper',
ASR_QWEN: 'qwen-asr',
+ ASR_LEMONADE: 'lemonade-asr',
};
const PDF_ENV_MAP: Record = {
@@ -86,6 +89,7 @@ const IMAGE_ENV_MAP: Record = {
IMAGE_NANO_BANANA: 'nano-banana',
IMAGE_MINIMAX: 'minimax-image',
IMAGE_GROK: 'grok-image',
+ IMAGE_LEMONADE: 'lemonade',
};
const VIDEO_ENV_MAP: Record = {
@@ -231,18 +235,22 @@ function applyOpenAIImageFallback(
function buildConfig(yamlData: YamlData): ServerConfig {
const image = applyOpenAIImageFallback(
- loadEnvSection(IMAGE_ENV_MAP, yamlData.image),
+ loadEnvSection(IMAGE_ENV_MAP, yamlData.image, {
+ keylessProviders: new Set(['lemonade']),
+ }),
yamlData.image,
);
return {
providers: loadEnvSection(LLM_ENV_MAP, yamlData.providers, {
- keylessProviders: new Set(['ollama']),
+ keylessProviders: new Set(['ollama', 'lemonade']),
}),
tts: loadEnvSection(TTS_ENV_MAP, yamlData.tts, {
- keylessProviders: new Set(['voxcpm-tts']),
+ keylessProviders: new Set(['voxcpm-tts', 'lemonade-tts']),
+ }),
+ asr: loadEnvSection(ASR_ENV_MAP, yamlData.asr, {
+ keylessProviders: new Set(['lemonade-asr']),
}),
- asr: loadEnvSection(ASR_ENV_MAP, yamlData.asr),
pdf: loadEnvSection(PDF_ENV_MAP, yamlData.pdf, { requiresBaseUrl: true }),
image,
video: loadEnvSection(VIDEO_ENV_MAP, yamlData.video),
diff --git a/lib/server/resolve-model.ts b/lib/server/resolve-model.ts
index 8887ae43b..8553176be 100644
--- a/lib/server/resolve-model.ts
+++ b/lib/server/resolve-model.ts
@@ -16,8 +16,12 @@ export interface ResolvedModel extends ModelWithInfo {
modelString: string;
/** Resolved provider ID (e.g. "openai", "ollama") */
providerId: string;
+ /** Resolved model ID (e.g. "gpt-4o-mini") */
+ modelId: string;
/** Effective API key after server-side fallback resolution */
apiKey: string;
+ /** Effective base URL after server/client resolution */
+ baseUrl?: string;
/** Optional per-request thinking configuration from the client. */
thinkingConfig?: ThinkingConfig;
}
@@ -67,7 +71,9 @@ export async function resolveModel(params: {
modelInfo,
modelString,
providerId,
+ modelId,
apiKey,
+ baseUrl,
thinkingConfig: params.thinkingConfig,
};
}
diff --git a/lib/store/settings.ts b/lib/store/settings.ts
index e8e82996a..d810c4498 100644
--- a/lib/store/settings.ts
+++ b/lib/store/settings.ts
@@ -356,6 +356,12 @@ const getDefaultAudioConfig = () => ({
'doubao-tts': { apiKey: '', baseUrl: '', enabled: false },
'elevenlabs-tts': { apiKey: '', baseUrl: '', enabled: false },
'minimax-tts': { apiKey: '', baseUrl: '', modelId: 'speech-2.8-hd', enabled: false },
+ 'lemonade-tts': {
+ apiKey: '',
+ baseUrl: '',
+ modelId: 'kokoro-v1',
+ enabled: false,
+ },
'browser-native-tts': { apiKey: '', baseUrl: '', enabled: true },
} as Record<
TTSProviderId,
@@ -365,6 +371,7 @@ const getDefaultAudioConfig = () => ({
'openai-whisper': { apiKey: '', baseUrl: '', enabled: true },
'browser-native': { apiKey: '', baseUrl: '', enabled: true },
'qwen-asr': { apiKey: '', baseUrl: '', enabled: false },
+ 'lemonade-asr': { apiKey: '', baseUrl: '', enabled: false },
} as Record,
});
@@ -389,6 +396,7 @@ const getDefaultImageConfig = () => ({
'nano-banana': { apiKey: '', baseUrl: '', enabled: false },
'minimax-image': { apiKey: '', baseUrl: '', enabled: false },
'grok-image': { apiKey: '', baseUrl: '', enabled: false },
+ lemonade: { apiKey: '', baseUrl: '', enabled: false },
} as Record,
});
@@ -867,7 +875,14 @@ export const useSettingsStore = create()(
})),
// Image Generation actions
- setImageProvider: (providerId) => set({ imageProviderId: providerId }),
+ setImageProvider: (providerId) =>
+ set(() => {
+ const models = IMAGE_PROVIDERS[providerId]?.models || [];
+ return {
+ imageProviderId: providerId,
+ imageModelId: models[0]?.id || '',
+ };
+ }),
setImageModelId: (modelId) => set({ imageModelId: modelId }),
setImageProviderConfig: (providerId, config) =>
diff --git a/lib/types/provider.ts b/lib/types/provider.ts
index 9437e2998..bbbe58b6b 100644
--- a/lib/types/provider.ts
+++ b/lib/types/provider.ts
@@ -20,6 +20,7 @@ export type BuiltInProviderId =
| 'grok'
| 'tencent-hunyuan'
| 'xiaomi'
+ | 'lemonade'
| 'ollama';
/**
@@ -59,7 +60,8 @@ export type ThinkingRequestAdapter =
| 'doubao'
| 'openrouter'
| 'hunyuan'
- | 'xiaomi';
+ | 'xiaomi'
+ | 'lemonade';
/**
* Describes a model's thinking/reasoning API control capability.
diff --git a/public/logos/lemonade.svg b/public/logos/lemonade.svg
new file mode 100644
index 000000000..5b51d7a65
--- /dev/null
+++ b/public/logos/lemonade.svg
@@ -0,0 +1,53 @@
+
diff --git a/tests/ai/openai-provider.test.ts b/tests/ai/openai-provider.test.ts
index 949d8c074..def4781f6 100644
--- a/tests/ai/openai-provider.test.ts
+++ b/tests/ai/openai-provider.test.ts
@@ -16,7 +16,7 @@ import type { ProviderId } from '@/lib/types/provider';
async function captureInjectedRequestBody(
providerId: ProviderId,
modelId: string,
- thinkingConfig: Record,
+ thinkingConfig?: Record,
) {
const originalFetch = globalThis.fetch;
const globalRecord = globalThis as Record;
@@ -145,6 +145,12 @@ describe('OpenAI provider defaults', () => {
{ mode: 'enabled', effort: 'high' },
{ chat_template_kwargs: { reasoning_effort: 'high' } },
],
+ [
+ 'lemonade',
+ 'Qwen3.5-4B-GGUF',
+ { mode: 'enabled', budgetTokens: 4096 },
+ { chat_template_kwargs: { enable_thinking: true, thinking_budget: 4096 } },
+ ],
] as const)(
'injects %s thinking params into the OpenAI-compatible request body',
async (providerId, modelId, thinkingConfig, expected) => {
@@ -152,4 +158,81 @@ describe('OpenAI provider defaults', () => {
expect(body).toMatchObject(expected);
},
);
+
+ it('disables Lemonade thinking by default for recognized local reasoning models', async () => {
+ const body = await captureInjectedRequestBody('lemonade', 'Qwen3.5-4B-GGUF');
+
+ expect(body).toMatchObject({
+ chat_template_kwargs: { enable_thinking: false },
+ });
+ });
+
+ it('recognizes manually added Lemonade reasoning model IDs', async () => {
+ const body = await captureInjectedRequestBody('lemonade', 'custom-gpt-oss-20b-q4');
+
+ expect(body).toMatchObject({
+ chat_template_kwargs: { enable_thinking: false },
+ });
+ });
+
+ it('disables Lemonade thinking by default for non-catalog local models too', async () => {
+ const body = await captureInjectedRequestBody('lemonade', 'Gemma-4-26B-A4B-it-GGUF');
+
+ expect(body).toMatchObject({
+ chat_template_kwargs: { enable_thinking: false },
+ });
+ });
+
+ it('strips unsupported Lemonade stream_options while preserving thinking overrides', async () => {
+ const originalFetch = globalThis.fetch;
+ const globalRecord = globalThis as Record;
+ const originalThinkingContext = globalRecord.__thinkingContext;
+ const fetchMock = vi.fn(async (_url: RequestInfo | URL, _init?: RequestInit) => {
+ return new Response(JSON.stringify({ ok: true }), {
+ status: 200,
+ headers: { 'content-type': 'application/json' },
+ });
+ });
+
+ try {
+ globalThis.fetch = fetchMock as typeof fetch;
+ globalRecord.__thinkingContext = {
+ getStore: () => ({ mode: 'disabled' }),
+ };
+
+ getModel({
+ providerId: 'lemonade',
+ modelId: 'Gemma-4-26B-A4B-it-GGUF',
+ apiKey: '',
+ });
+
+ const lastCall = openAiMock.createOpenAI.mock.calls.at(-1);
+ const options = lastCall?.[0] as { fetch?: typeof fetch } | undefined;
+
+ await options?.fetch?.('https://example.test/v1/chat/completions', {
+ method: 'POST',
+ body: JSON.stringify({
+ model: 'Gemma-4-26B-A4B-it-GGUF',
+ messages: [{ role: 'user', content: 'hi' }],
+ stream: true,
+ stream_options: { include_usage: true },
+ }),
+ });
+
+ const init = fetchMock.mock.calls[0]?.[1] as RequestInit;
+ const body = JSON.parse(init.body as string);
+
+ expect(body.stream_options).toBeUndefined();
+ expect(body).toMatchObject({
+ chat_template_kwargs: { enable_thinking: false },
+ });
+ } finally {
+ globalThis.fetch = originalFetch;
+ if (originalThinkingContext === undefined) {
+ delete globalRecord.__thinkingContext;
+ } else {
+ globalRecord.__thinkingContext = originalThinkingContext;
+ }
+ }
+ });
});
diff --git a/tests/ai/thinking-config.test.ts b/tests/ai/thinking-config.test.ts
index 283d41a07..9dd980485 100644
--- a/tests/ai/thinking-config.test.ts
+++ b/tests/ai/thinking-config.test.ts
@@ -152,6 +152,21 @@ describe('thinking config normalization', () => {
expect(thinking?.effortValues).toEqual(['none', 'low', 'high']);
});
+ it('normalizes Lemonade reasoning models as disabled-by-default token budgets', () => {
+ const thinking = getThinking('lemonade', 'Qwen3.5-4B-GGUF');
+
+ expect(supportsConfigurableThinking(thinking)).toBe(true);
+ expect(thinking?.requestAdapter).toBe('lemonade');
+ expect(getDefaultThinkingConfig(thinking)).toEqual({
+ mode: 'disabled',
+ budgetTokens: undefined,
+ });
+ expect(normalizeThinkingConfig(thinking, { mode: 'enabled', budgetTokens: 4096 })).toEqual({
+ mode: 'enabled',
+ budgetTokens: 4096,
+ });
+ });
+
it('normalizes Doubao Seed 2.0 thinking as reasoning effort levels', () => {
const thinking = getThinking('doubao', 'doubao-seed-2-0-pro-260215');
diff --git a/tests/audio/lemonade-asr.test.ts b/tests/audio/lemonade-asr.test.ts
new file mode 100644
index 000000000..f53617272
--- /dev/null
+++ b/tests/audio/lemonade-asr.test.ts
@@ -0,0 +1,127 @@
+import { beforeEach, describe, expect, it, vi, type Mock } from 'vitest';
+import { transcribeAudio } from '@/lib/audio/asr-providers';
+
+const mockFetch = vi.fn() as Mock;
+vi.stubGlobal('fetch', mockFetch);
+
+function wavBuffer(): Buffer {
+ const buf = Buffer.alloc(16);
+ buf.write('RIFF', 0, 'ascii');
+ buf.writeUInt32LE(8, 4);
+ buf.write('WAVE', 8, 'ascii');
+ return buf;
+}
+
+function wavArrayBuffer(): ArrayBuffer {
+ const buffer = wavBuffer();
+ const arrayBuffer = new ArrayBuffer(buffer.byteLength);
+ new Uint8Array(arrayBuffer).set(buffer);
+ return arrayBuffer;
+}
+
+describe('Lemonade ASR', () => {
+ beforeEach(() => {
+ mockFetch.mockReset();
+ });
+
+ it('posts WAV audio to /audio/transcriptions with the configured model', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ text: 'hello' }),
+ });
+
+ const result = await transcribeAudio(
+ {
+ providerId: 'lemonade-asr',
+ baseUrl: 'http://localhost:13305/v1/',
+ modelId: 'Whisper-Base',
+ },
+ wavBuffer(),
+ );
+
+ expect(mockFetch).toHaveBeenCalledWith(
+ 'http://localhost:13305/v1/audio/transcriptions',
+ expect.objectContaining({ method: 'POST' }),
+ );
+ const formData = mockFetch.mock.calls[0][1].body as FormData;
+ expect(formData.get('model')).toBe('Whisper-Base');
+ expect(formData.get('response_format')).toBe('json');
+ expect(formData.get('file')).toBeInstanceOf(Blob);
+ expect(result).toEqual({ text: 'hello' });
+ });
+
+ it('forwards an explicit language but not when set to "auto"', async () => {
+ mockFetch.mockResolvedValue({
+ ok: true,
+ json: async () => ({ text: '' }),
+ });
+
+ await transcribeAudio({ providerId: 'lemonade-asr', language: 'en' }, wavBuffer());
+ let formData = mockFetch.mock.calls[0][1].body as FormData;
+ expect(formData.get('language')).toBe('en');
+
+ mockFetch.mockClear();
+
+ await transcribeAudio({ providerId: 'lemonade-asr', language: 'auto' }, wavBuffer());
+ formData = mockFetch.mock.calls[0][1].body as FormData;
+ expect(formData.get('language')).toBeNull();
+ });
+
+ it('rejects non-WAV audio buffers', async () => {
+ const notWav = Buffer.from('IDXX' + '\0'.repeat(12));
+
+ await expect(transcribeAudio({ providerId: 'lemonade-asr' }, notWav)).rejects.toThrow(
+ /WAV input only/,
+ );
+ expect(mockFetch).not.toHaveBeenCalled();
+ });
+
+ it('accepts WAV files even when the MIME type is missing', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ text: 'hello' }),
+ });
+
+ const audioFile = new File([wavArrayBuffer()], 'recording.wav');
+ const result = await transcribeAudio({ providerId: 'lemonade-asr' }, audioFile);
+
+ expect(result).toEqual({ text: 'hello' });
+ expect(mockFetch).toHaveBeenCalledTimes(1);
+ });
+
+ it('returns empty text gracefully when upstream reports empty audio', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 400,
+ text: async () => 'audio is empty',
+ statusText: 'Bad Request',
+ });
+
+ const result = await transcribeAudio({ providerId: 'lemonade-asr' }, wavBuffer());
+ expect(result).toEqual({ text: '' });
+ });
+
+ it('throws on unrecognized error payloads', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 500,
+ text: async () => 'model crashed',
+ statusText: 'Internal Server Error',
+ });
+
+ await expect(transcribeAudio({ providerId: 'lemonade-asr' }, wavBuffer())).rejects.toThrow(
+ /Lemonade ASR API error.*model crashed/,
+ );
+ });
+
+ it('falls back to default model id when not provided', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ text: 'ok' }),
+ });
+
+ await transcribeAudio({ providerId: 'lemonade-asr' }, wavBuffer());
+ const formData = mockFetch.mock.calls[0][1].body as FormData;
+ expect(formData.get('model')).toBe('Whisper-Base');
+ });
+});
diff --git a/tests/audio/lemonade-tts.test.ts b/tests/audio/lemonade-tts.test.ts
new file mode 100644
index 000000000..bf9e033ef
--- /dev/null
+++ b/tests/audio/lemonade-tts.test.ts
@@ -0,0 +1,121 @@
+import { beforeEach, describe, expect, it, vi, type Mock } from 'vitest';
+import { generateTTS } from '@/lib/audio/tts-providers';
+
+const mockFetch = vi.fn() as Mock;
+vi.stubGlobal('fetch', mockFetch);
+
+function wavBytes(): ArrayBuffer {
+ const data = new Uint8Array(16);
+ data[0] = 0x52; // 'R'
+ data[1] = 0x49; // 'I'
+ data[2] = 0x46; // 'F'
+ data[3] = 0x46; // 'F'
+ data[8] = 0x57; // 'W'
+ data[9] = 0x41; // 'A'
+ data[10] = 0x56; // 'V'
+ data[11] = 0x45; // 'E'
+ return data.buffer;
+}
+
+describe('Lemonade TTS', () => {
+ beforeEach(() => {
+ mockFetch.mockReset();
+ });
+
+ it('posts to /audio/speech with kokoro-v1 + wav and bubble-up audio bytes', async () => {
+ const buffer = wavBytes();
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ arrayBuffer: async () => buffer,
+ headers: { get: () => 'audio/wav' },
+ });
+
+ const result = await generateTTS(
+ {
+ providerId: 'lemonade-tts',
+ baseUrl: 'http://localhost:13305/v1/',
+ voice: 'af_heart',
+ },
+ 'hello world',
+ );
+
+ expect(mockFetch).toHaveBeenCalledWith(
+ 'http://localhost:13305/v1/audio/speech',
+ expect.objectContaining({ method: 'POST' }),
+ );
+ const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+ expect(body).toEqual({
+ model: 'kokoro-v1',
+ input: 'hello world',
+ voice: 'af_heart',
+ speed: 1.0,
+ response_format: 'wav',
+ });
+ expect(result.audio).toBeInstanceOf(Uint8Array);
+ expect(result.audio.byteLength).toBe(16);
+ expect(result.format).toBe('wav');
+ });
+
+ it('falls back to af_heart when no voice is provided', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ arrayBuffer: async () => wavBytes(),
+ headers: { get: () => 'audio/wav' },
+ });
+
+ await generateTTS({ providerId: 'lemonade-tts', voice: '' }, 'hi');
+
+ const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+ expect(body.voice).toBe('af_heart');
+ });
+
+ it('uses the selected voice consistently regardless of text language', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ arrayBuffer: async () => wavBytes(),
+ headers: { get: () => 'audio/wav' },
+ });
+
+ await generateTTS({ providerId: 'lemonade-tts', voice: 'af_heart' }, '给我讲讲 Python');
+
+ const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+ expect(body.voice).toBe('af_heart');
+ });
+
+ it('does not require an API key (keyless provider)', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ arrayBuffer: async () => wavBytes(),
+ headers: { get: () => 'audio/wav' },
+ });
+
+ await generateTTS({ providerId: 'lemonade-tts', voice: 'af_heart' }, 'hi');
+
+ expect(mockFetch.mock.calls[0][1].headers.Authorization).toBeUndefined();
+ });
+
+ it('attaches Bearer auth when apiKey is provided', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ arrayBuffer: async () => wavBytes(),
+ headers: { get: () => 'audio/wav' },
+ });
+
+ await generateTTS({ providerId: 'lemonade-tts', apiKey: 'sk-lm', voice: 'af_heart' }, 'hi');
+
+ expect(mockFetch.mock.calls[0][1].headers.Authorization).toBe('Bearer sk-lm');
+ });
+
+ it('throws on non-OK responses', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 400,
+ text: async () => 'bad voice',
+ statusText: 'Bad Request',
+ });
+
+ await expect(generateTTS({ providerId: 'lemonade-tts', voice: 'foo' }, 'hi')).rejects.toThrow(
+ /Lemonade TTS API error/,
+ );
+ });
+});
diff --git a/tests/audio/wav-utils.test.ts b/tests/audio/wav-utils.test.ts
new file mode 100644
index 000000000..cbddc46c9
--- /dev/null
+++ b/tests/audio/wav-utils.test.ts
@@ -0,0 +1,42 @@
+import { describe, expect, it } from 'vitest';
+import { isWavBlob, normalizeASRUploadAudio } from '@/lib/audio/wav-utils';
+
+describe('isWavBlob', () => {
+ it('detects audio/wav MIME type', () => {
+ const blob = new Blob([new Uint8Array(4)], { type: 'audio/wav' });
+ expect(isWavBlob(blob)).toBe(true);
+ });
+
+ it('detects audio/x-wav MIME type', () => {
+ const blob = new Blob([new Uint8Array(4)], { type: 'audio/x-wav' });
+ expect(isWavBlob(blob)).toBe(true);
+ });
+
+ it('detects .wav file extension when MIME is missing', () => {
+ const blob = new Blob([new Uint8Array(4)]);
+ expect(isWavBlob(blob, 'recording.wav')).toBe(true);
+ expect(isWavBlob(blob, 'recording.WAV')).toBe(true);
+ });
+
+ it('returns false for non-WAV blobs without a wav filename', () => {
+ const blob = new Blob([new Uint8Array(4)], { type: 'audio/webm' });
+ expect(isWavBlob(blob)).toBe(false);
+ expect(isWavBlob(blob, 'recording.webm')).toBe(false);
+ });
+});
+
+describe('normalizeASRUploadAudio', () => {
+ it('passes through non-lemonade providers unchanged', async () => {
+ const input = new Blob([new Uint8Array([1, 2, 3])], { type: 'audio/webm' });
+ const result = await normalizeASRUploadAudio('openai-whisper', input);
+ expect(result.blob).toBe(input);
+ expect(result.fileName).toBe('recording.webm');
+ });
+
+ it('keeps WAV blobs unchanged for lemonade-asr', async () => {
+ const input = new Blob([new Uint8Array([1, 2, 3])], { type: 'audio/wav' });
+ const result = await normalizeASRUploadAudio('lemonade-asr', input);
+ expect(result.blob).toBe(input);
+ expect(result.fileName).toBe('recording.wav');
+ });
+});
diff --git a/tests/generation/json-repair.test.ts b/tests/generation/json-repair.test.ts
new file mode 100644
index 000000000..a401982ab
--- /dev/null
+++ b/tests/generation/json-repair.test.ts
@@ -0,0 +1,57 @@
+import { describe, expect, it } from 'vitest';
+
+import { parseJsonResponse } from '@/lib/generation/json-repair';
+
+describe('json-repair targeted fixes', () => {
+ it('repairs quoted key-value fragments such as "height: 76"', () => {
+ const raw = `{
+ "background": {
+ "type": "solid",
+ "color": "#ffffff"
+ },
+ "elements": [
+ {
+ "id": "code_text",
+ "type": "text",
+ "left": 80,
+ "top": 420,
+ "width": 840,
+ "height: 76",
+ "content": "age = 25
",
+ "defaultFontName": "",
+ "defaultColor": "#333333"
+ }
+ ]
+}`;
+
+ const parsed = parseJsonResponse<{
+ elements: Array<{ height: number; content: string }>;
+ }>(raw);
+
+ expect(parsed).not.toBeNull();
+ expect(parsed?.elements[0]?.height).toBe(76);
+ expect(parsed?.elements[0]?.content).toContain('age = 25');
+ });
+
+ it('repairs boolean property fragments without touching valid string values', () => {
+ const raw = `{
+ "elements": [
+ {
+ "id": "shape_1",
+ "fixedRatio: false",
+ "height: 58",
+ "content": "literal text: height: 58
"
+ }
+ ]
+}`;
+
+ const parsed = parseJsonResponse<{
+ elements: Array<{ fixedRatio: boolean; height: number; content: string }>;
+ }>(raw);
+
+ expect(parsed).not.toBeNull();
+ expect(parsed?.elements[0]?.fixedRatio).toBe(false);
+ expect(parsed?.elements[0]?.height).toBe(58);
+ expect(parsed?.elements[0]?.content).toBe('literal text: height: 58
');
+ });
+});
diff --git a/tests/media/lemonade-image-adapter.test.ts b/tests/media/lemonade-image-adapter.test.ts
new file mode 100644
index 000000000..43d291c14
--- /dev/null
+++ b/tests/media/lemonade-image-adapter.test.ts
@@ -0,0 +1,153 @@
+import { beforeEach, describe, expect, it, vi, type Mock } from 'vitest';
+import {
+ generateWithLemonadeImage,
+ testLemonadeImageConnectivity,
+} from '@/lib/media/adapters/lemonade-image-adapter';
+
+const mockFetch = vi.fn() as Mock;
+vi.stubGlobal('fetch', mockFetch);
+
+describe('lemonade-image-adapter', () => {
+ beforeEach(() => {
+ mockFetch.mockReset();
+ });
+
+ it('posts generation requests to /images/generations with b64_json response_format', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ data: [{ b64_json: 'aW1n' }] }),
+ });
+
+ const result = await generateWithLemonadeImage(
+ { providerId: 'lemonade', apiKey: '', baseUrl: 'http://localhost:13305/v1/' },
+ { prompt: 'a fox', width: 768, height: 768 },
+ );
+
+ expect(mockFetch).toHaveBeenCalledWith(
+ 'http://localhost:13305/v1/images/generations',
+ expect.objectContaining({
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ }),
+ );
+ const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+ expect(body).toEqual({
+ model: 'Qwen-Image-GGUF',
+ prompt: 'a fox',
+ n: 1,
+ size: '768x768',
+ response_format: 'b64_json',
+ });
+ expect(result).toEqual({
+ url: undefined,
+ base64: 'aW1n',
+ width: 768,
+ height: 768,
+ });
+ });
+
+ it('falls back to default base URL and 1024x1024 when not provided', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ data: [{ b64_json: 'eA==' }] }),
+ });
+
+ await generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'tile' });
+
+ expect(mockFetch.mock.calls[0][0]).toBe('http://localhost:13305/v1/images/generations');
+ const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+ expect(body.size).toBe('1024x1024');
+ });
+
+ it('forwards custom model id when provided', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ data: [{ b64_json: 'eA==' }] }),
+ });
+
+ await generateWithLemonadeImage(
+ { providerId: 'lemonade', apiKey: '', model: 'flux-schnell' },
+ { prompt: 'p' },
+ );
+
+ const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+ expect(body.model).toBe('flux-schnell');
+ });
+
+ it('attaches Bearer auth header when apiKey is provided', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ data: [{ b64_json: 'eA==' }] }),
+ });
+
+ await generateWithLemonadeImage({ providerId: 'lemonade', apiKey: 'sk-lm' }, { prompt: 'p' });
+
+ expect(mockFetch.mock.calls[0][1].headers).toEqual({
+ 'Content-Type': 'application/json',
+ Authorization: 'Bearer sk-lm',
+ });
+ });
+
+ it('omits auth header when apiKey is empty (keyless)', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ data: [{ b64_json: 'eA==' }] }),
+ });
+
+ await generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'p' });
+
+ expect(mockFetch.mock.calls[0][1].headers).toEqual({
+ 'Content-Type': 'application/json',
+ });
+ });
+
+ it('throws a useful error on failed generation responses', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 500,
+ text: async () => 'model unavailable',
+ statusText: 'Internal Server Error',
+ });
+
+ await expect(
+ generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'p' }),
+ ).rejects.toThrow('Lemonade image generation failed (500): model unavailable');
+ });
+
+ it('throws when response payload contains no image data', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ data: [{}] }),
+ });
+
+ await expect(
+ generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'p' }),
+ ).rejects.toThrow('Lemonade returned empty image response');
+ });
+
+ it('reports connectivity success against /models endpoint', async () => {
+ mockFetch.mockResolvedValueOnce({ ok: true });
+
+ const result = await testLemonadeImageConnectivity({ providerId: 'lemonade', apiKey: '' });
+
+ expect(mockFetch).toHaveBeenCalledWith(
+ 'http://localhost:13305/v1/models',
+ expect.objectContaining({ headers: {} }),
+ );
+ expect(result.success).toBe(true);
+ });
+
+ it('reports connectivity failure with response text', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: false,
+ status: 503,
+ text: async () => 'unavailable',
+ statusText: 'Service Unavailable',
+ });
+
+ const result = await testLemonadeImageConnectivity({ providerId: 'lemonade', apiKey: '' });
+
+ expect(result.success).toBe(false);
+ expect(result.message).toBe('Lemonade API error (503): unavailable');
+ });
+});