diff --git a/.env.example b/.env.example index 737a338f4..e84189c48 100644 --- a/.env.example +++ b/.env.example @@ -79,6 +79,10 @@ XIAOMI_MODELS= # OLLAMA_BASE_URL=http://localhost:11434/v1 # OLLAMA_MODELS=llama3.3,llama3.2,qwen2.5,mistral,gemma3 +# Lemonade local server (OpenAI-compatible, no API key required) +# LEMONADE_BASE_URL=http://localhost:13305/v1 +# LEMONADE_MODELS=Qwen3-0.6B-GGUF,Llama-3.2-1B-Instruct-Hybrid,Qwen2.5-VL-7B-Instruct + # --- TTS (Text-to-Speech) ---------------------------------------------------- TTS_OPENAI_API_KEY= @@ -99,6 +103,9 @@ TTS_MINIMAX_BASE_URL=https://api.minimaxi.com TTS_ELEVENLABS_API_KEY= TTS_ELEVENLABS_BASE_URL= +# Lemonade TTS (local, no API key required) +# TTS_LEMONADE_BASE_URL=http://localhost:13305/v1 + # --- ASR (Automatic Speech Recognition) -------------------------------------- ASR_OPENAI_API_KEY= @@ -107,6 +114,9 @@ ASR_OPENAI_BASE_URL= ASR_QWEN_API_KEY= ASR_QWEN_BASE_URL= +# Lemonade ASR (local, WAV input only, no API key required) +# ASR_LEMONADE_BASE_URL=http://localhost:13305/v1 + # --- PDF Processing ----------------------------------------------------------- PDF_UNPDF_API_KEY= @@ -136,6 +146,9 @@ IMAGE_MINIMAX_BASE_URL=https://api.minimaxi.com IMAGE_GROK_API_KEY= IMAGE_GROK_BASE_URL= +# Lemonade image generation (local, no API key required) +# IMAGE_LEMONADE_BASE_URL=http://localhost:13305/v1 + # --- Video Generation --------------------------------------------------------- VIDEO_SEEDANCE_API_KEY= diff --git a/README-zh.md b/README-zh.md index 4b9e58986..93a651c2e 100644 --- a/README-zh.md +++ b/README-zh.md @@ -16,6 +16,7 @@ Live Demo Deploy with Vercel OpenClaw 集成 + Lemonade Local AI Stars
Discord @@ -32,7 +33,7 @@

English | 简体中文
- 在线体验 · 快速开始 · 功能特性 · 使用场景 · OpenClaw + 在线体验 · 快速开始 · Lemonade · 功能特性 · 使用场景 · OpenClaw

@@ -120,7 +121,22 @@ providers: apiKey: sk-ant-... ``` -支持的服务商:**OpenAI**、**Anthropic**、**Google Gemini**、**DeepSeek**、**通义千问 Qwen**、**Kimi**、**MiniMax**、**Grok (xAI)**、**OpenRouter**、**豆包**、**腾讯混元 / TokenHub**、**小米 MiMo**、**智谱 GLM**、**Ollama**(本地)以及任何兼容 OpenAI API 的服务。 +支持的服务商:**OpenAI**、**Anthropic**、**Google Gemini**、**DeepSeek**、**通义千问 Qwen**、**Kimi**、**MiniMax**、**Grok (xAI)**、**OpenRouter**、**豆包**、**腾讯混元 / TokenHub**、**小米 MiMo**、**智谱 GLM**、**Ollama**(本地)、**Lemonade**(本地 LLM / 图像 / TTS / ASR)以及任何兼容 OpenAI API 的服务。 + + + +### 可选:Lemonade(本地 AI 服务商) + +OpenMAIC 支持将 Lemonade 作为本地 OpenAI 兼容服务商使用,可用于 LLM、图像生成、TTS 和 ASR,不需要 API Key。 + +本地启动 Lemonade 后,在 OpenMAIC 中配置: + +```env +LEMONADE_BASE_URL=http://localhost:13305/v1 +TTS_LEMONADE_BASE_URL=http://localhost:13305/v1 +ASR_LEMONADE_BASE_URL=http://localhost:13305/v1 +IMAGE_LEMONADE_BASE_URL=http://localhost:13305/v1 +``` OpenAI 快速示例: diff --git a/README.md b/README.md index b28967dda..0ec2fa192 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ Live Demo Deploy with Vercel OpenClaw Integration + Lemonade Local AI Stars
Discord @@ -32,7 +33,7 @@

English | 简体中文
- Live Demo · Quick Start · Features · Use Cases · OpenClaw + Live Demo · Quick Start · Lemonade · Features · Use Cases · OpenClaw

@@ -120,7 +121,22 @@ providers: apiKey: sk-ant-... ``` -Supported providers: **OpenAI**, **Anthropic**, **Google Gemini**, **DeepSeek**, **Qwen**, **Kimi**, **MiniMax**, **Grok (xAI)**, **OpenRouter**, **Doubao**, **Tencent Hunyuan/TokenHub**, **Xiaomi MiMo**, **GLM (Zhipu)**, **Ollama** (local), and any OpenAI-compatible API. +Supported providers: **OpenAI**, **Anthropic**, **Google Gemini**, **DeepSeek**, **Qwen**, **Kimi**, **MiniMax**, **Grok (xAI)**, **OpenRouter**, **Doubao**, **Tencent Hunyuan/TokenHub**, **Xiaomi MiMo**, **GLM (Zhipu)**, **Ollama** (local), **Lemonade** (local LLM / image / TTS / ASR), and any OpenAI-compatible API. + + + +### Optional: Lemonade (Local AI Provider) + +OpenMAIC supports Lemonade as a local, OpenAI-compatible provider for LLMs, image generation, TTS, and ASR. No API key is required. + +Run Lemonade locally, then point OpenMAIC to it: + +```env +LEMONADE_BASE_URL=http://localhost:13305/v1 +TTS_LEMONADE_BASE_URL=http://localhost:13305/v1 +ASR_LEMONADE_BASE_URL=http://localhost:13305/v1 +IMAGE_LEMONADE_BASE_URL=http://localhost:13305/v1 +``` OpenAI quick example: diff --git a/app/api/generate/agent-profiles/route.ts b/app/api/generate/agent-profiles/route.ts index 523fae819..b9d4710f6 100644 --- a/app/api/generate/agent-profiles/route.ts +++ b/app/api/generate/agent-profiles/route.ts @@ -23,7 +23,12 @@ interface RequestBody { languageDirective: string; availableAvatars: string[]; avatarDescriptions?: Array<{ path: string; desc: string }>; - availableVoices?: Array<{ providerId: string; voiceId: string; voiceName: string }>; + availableVoices?: Array<{ + providerId: string; + voiceId: string; + voiceName: string; + voiceLanguage?: string; + }>; } function stripCodeFences(text: string): string { @@ -89,12 +94,14 @@ export async function POST(req: NextRequest) { availableVoices.map((v) => ({ id: `${v.providerId}::${v.voiceId}`, name: v.voiceName, + language: v.voiceLanguage || 'unknown', })), ) : ''; const voicePrompt = voiceListStr ? `- Each agent should be assigned a voice that matches their persona from this list: ${voiceListStr} + - Prefer a voice whose language matches the course language directive - Pick a voice that suits the agent's personality and role (e.g. authoritative voice for teacher, lively voice for energetic student) - Try to use different voices for each agent` : ''; @@ -139,19 +146,21 @@ Return a JSON object with this exact structure: log.info(`Generating agent profiles for "${stageInfo.name}" [model=${modelString}]`); - const result = await callLLM( - { - model: languageModel, - system: systemPrompt, - prompt: userPrompt, - }, - 'agent-profiles', - undefined, - thinkingConfig, - ); + const rawResult = ( + await callLLM( + { + model: languageModel, + system: systemPrompt, + prompt: userPrompt, + }, + 'agent-profiles', + undefined, + thinkingConfig, + ) + ).text; // ── Parse LLM response ── - const rawText = stripCodeFences(result.text); + const rawText = stripCodeFences(rawResult); let parsed: { agents: Array<{ name: string; diff --git a/app/api/generate/image/route.ts b/app/api/generate/image/route.ts index efcbbb5c0..8629406ac 100644 --- a/app/api/generate/image/route.ts +++ b/app/api/generate/image/route.ts @@ -16,7 +16,11 @@ */ import { NextRequest } from 'next/server'; -import { generateImage, aspectRatioToDimensions } from '@/lib/media/image-providers'; +import { + generateImage, + aspectRatioToDimensions, + IMAGE_PROVIDERS, +} from '@/lib/media/image-providers'; import { resolveImageApiKey, resolveImageBaseUrl } from '@/lib/server/provider-config'; import type { ImageProviderId, ImageGenerationOptions } from '@/lib/media/types'; import { createLogger } from '@/lib/logger'; @@ -50,7 +54,8 @@ export async function POST(request: NextRequest) { const apiKey = clientBaseUrl ? clientApiKey || '' : resolveImageApiKey(providerId, clientApiKey); - if (!apiKey) { + const provider = IMAGE_PROVIDERS[providerId]; + if (provider?.requiresApiKey && !apiKey) { return apiError( 'MISSING_API_KEY', 401, diff --git a/app/api/generate/scene-outlines-stream/route.ts b/app/api/generate/scene-outlines-stream/route.ts index df6d545e9..ce1c3469a 100644 --- a/app/api/generate/scene-outlines-stream/route.ts +++ b/app/api/generate/scene-outlines-stream/route.ts @@ -284,13 +284,16 @@ export async function POST(req: NextRequest) { for (let attempt = 1; attempt <= MAX_STREAM_RETRIES + 1; attempt++) { try { - const result = streamLLM(streamParams, 'scene-outlines-stream', thinkingConfig); - let fullText = ''; parsedOutlines = []; languageDirective = null; + const textStream = streamLLM( + streamParams, + 'scene-outlines-stream', + thinkingConfig, + ).textStream; - for await (const chunk of result.textStream) { + for await (const chunk of textStream) { fullText += chunk; // Try to extract language directive early @@ -332,6 +335,9 @@ export async function POST(req: NextRequest) { lastError = fullText.trim() ? 'LLM response could not be parsed into outlines' : 'LLM returned empty response'; + log.warn( + `Outlines attempt ${attempt} diagnostics: textLen=${fullText.length}, outlines=${parsedOutlines.length}, languageDirective=${languageDirective ? 'yes' : 'no'}, preview=${JSON.stringify(fullText.slice(0, 240))}`, + ); if (attempt <= MAX_STREAM_RETRIES) { log.warn( @@ -347,6 +353,9 @@ export async function POST(req: NextRequest) { } } catch (error) { lastError = error instanceof Error ? error.message : String(error); + log.warn( + `Outlines stream error detail (attempt ${attempt}/${MAX_STREAM_RETRIES + 1}): ${lastError}`, + ); if (attempt <= MAX_STREAM_RETRIES) { log.warn( diff --git a/app/api/transcription/route.ts b/app/api/transcription/route.ts index 708384b63..c1837fae7 100644 --- a/app/api/transcription/route.ts +++ b/app/api/transcription/route.ts @@ -50,12 +50,8 @@ export async function POST(req: NextRequest) { : resolveASRBaseUrl(effectiveProviderId, baseUrl || undefined), }; - // Convert audio file to buffer - const arrayBuffer = await audioFile.arrayBuffer(); - const buffer = Buffer.from(arrayBuffer); - // Transcribe using the provider system - const result = await transcribeAudio(config, buffer); + const result = await transcribeAudio(config, audioFile); return apiSuccess({ text: result.text }); } catch (error) { diff --git a/app/api/verify-image-provider/route.ts b/app/api/verify-image-provider/route.ts index bea247415..4ad581b4b 100644 --- a/app/api/verify-image-provider/route.ts +++ b/app/api/verify-image-provider/route.ts @@ -15,7 +15,7 @@ */ import { NextRequest } from 'next/server'; -import { testImageConnectivity } from '@/lib/media/image-providers'; +import { IMAGE_PROVIDERS, testImageConnectivity } from '@/lib/media/image-providers'; import { resolveImageApiKey, resolveImageBaseUrl } from '@/lib/server/provider-config'; import type { ImageProviderId } from '@/lib/media/types'; import { apiError, apiSuccess } from '@/lib/server/api-response'; @@ -43,7 +43,8 @@ export async function POST(request: NextRequest) { : resolveImageApiKey(providerId, clientApiKey); const baseUrl = clientBaseUrl ? clientBaseUrl : resolveImageBaseUrl(providerId, clientBaseUrl); - if (!apiKey) { + const provider = IMAGE_PROVIDERS[providerId]; + if (provider?.requiresApiKey && !apiKey) { return apiError('MISSING_API_KEY', 400, 'No API key configured'); } diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index 2f7754af7..bcd2b007e 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -577,6 +577,7 @@ function GenerationPreviewContent() { providerId: p.providerId, voiceId: v.id, voiceName: v.name, + voiceLanguage: v.language, })), ); }; diff --git a/components/settings/asr-settings.tsx b/components/settings/asr-settings.tsx index 67f67bfbe..17ae2b401 100644 --- a/components/settings/asr-settings.tsx +++ b/components/settings/asr-settings.tsx @@ -30,6 +30,7 @@ import { Mic, MicOff, CheckCircle2, XCircle, Eye, EyeOff, Plus, Loader2 } from ' import { cn } from '@/lib/utils'; import { toast } from 'sonner'; import { createLogger } from '@/lib/logger'; +import { normalizeASRUploadAudio } from '@/lib/audio/wav-utils'; const log = createLogger('ASRSettings'); @@ -52,6 +53,7 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { const requiresApiKey = isCustom ? !!providerConfig?.requiresApiKey : !!asrProvider?.requiresApiKey; + const isKeylessLocalProvider = !isCustom && !requiresApiKey && !!asrProvider?.defaultBaseUrl; const [showApiKey, setShowApiKey] = useState(false); const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); @@ -128,24 +130,28 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { mediaRecorder.onstop = async () => { stream.getTracks().forEach((track) => track.stop()); setIsProcessing(true); - const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); - const formData = new FormData(); - formData.append('audio', audioBlob, 'recording.webm'); - formData.append('providerId', selectedProviderId); - formData.append( - 'modelId', - asrProvidersConfig[selectedProviderId]?.modelId || asrProvider?.defaultModelId || '', - ); - formData.append('language', asrLanguage); - const apiKeyValue = asrProvidersConfig[selectedProviderId]?.apiKey; - if (apiKeyValue?.trim()) formData.append('apiKey', apiKeyValue); - const baseUrlValue = - asrProvidersConfig[selectedProviderId]?.baseUrl || - providerConfig?.customDefaultBaseUrl || - ''; - if (baseUrlValue?.trim()) formData.append('baseUrl', baseUrlValue); try { + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + const uploadAudio = await normalizeASRUploadAudio(selectedProviderId, audioBlob); + const formData = new FormData(); + formData.append('audio', uploadAudio.blob, uploadAudio.fileName); + formData.append('providerId', selectedProviderId); + formData.append( + 'modelId', + asrProvidersConfig[selectedProviderId]?.modelId || + asrProvider?.defaultModelId || + '', + ); + formData.append('language', asrLanguage); + const apiKeyValue = asrProvidersConfig[selectedProviderId]?.apiKey; + if (apiKeyValue?.trim()) formData.append('apiKey', apiKeyValue); + const baseUrlValue = + asrProvidersConfig[selectedProviderId]?.baseUrl || + providerConfig?.customDefaultBaseUrl || + ''; + if (baseUrlValue?.trim()) formData.append('baseUrl', baseUrlValue); + const response = await fetch('/api/transcription', { method: 'POST', body: formData, @@ -207,7 +213,7 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { )} {/* API Key & Base URL */} - {(requiresApiKey || isServerConfigured || isCustom) && ( + {(requiresApiKey || isServerConfigured || isCustom || isKeylessLocalProvider) && ( <>
@@ -276,6 +282,7 @@ export function ASRSettings({ selectedProviderId }: ASRSettingsProps) { } else { switch (selectedProviderId) { case 'openai-whisper': + case 'lemonade-asr': endpointPath = '/audio/transcriptions'; break; case 'qwen-asr': diff --git a/components/settings/audio-settings.tsx b/components/settings/audio-settings.tsx index d0e351ea8..d225f19bb 100644 --- a/components/settings/audio-settings.tsx +++ b/components/settings/audio-settings.tsx @@ -28,6 +28,7 @@ import azureVoicesData from '@/lib/audio/azure.json'; import { createLogger } from '@/lib/logger'; import { getVoxCPMVoiceOptions, useVoxCPMVoiceProfiles } from '@/lib/audio/voxcpm-voices'; import { normalizeVoxCPMBackend, voxCPMBackendSupportsReferenceAudio } from '@/lib/audio/voxcpm'; +import { normalizeASRUploadAudio } from '@/lib/audio/wav-utils'; const log = createLogger('AudioSettings'); @@ -44,6 +45,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin 'doubao-tts': t('settings.providerDoubaoTTS'), 'elevenlabs-tts': t('settings.providerElevenLabsTTS'), 'minimax-tts': t('settings.providerMiniMaxTTS'), + 'lemonade-tts': t('settings.providerLemonadeTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; return names[providerId]; @@ -54,6 +56,7 @@ function getASRProviderName(providerId: ASRProviderId, t: (key: string) => strin 'openai-whisper': t('settings.providerOpenAIWhisper'), 'browser-native': t('settings.providerBrowserNative'), 'qwen-asr': t('settings.providerQwenASR'), + 'lemonade-asr': t('settings.providerLemonadeASR'), }; return names[providerId]; } @@ -329,26 +332,27 @@ export function AudioSettings({ onSave }: AudioSettingsProps = {}) { mediaRecorder.onstop = async () => { stream.getTracks().forEach((track) => track.stop()); - const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); - const formData = new FormData(); - formData.append('audio', audioBlob, 'recording.webm'); - formData.append('providerId', asrProviderId); - formData.append('language', asrLanguage); - - // Only append non-empty values - const apiKeyValue = asrProvidersConfig[asrProviderId]?.apiKey; - if (apiKeyValue && apiKeyValue.trim()) { - formData.append('apiKey', apiKeyValue); - } - const baseUrlValue = - asrProvidersConfig[asrProviderId]?.baseUrl || - asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl || - ''; - if (baseUrlValue && baseUrlValue.trim()) { - formData.append('baseUrl', baseUrlValue); - } - try { + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + const uploadAudio = await normalizeASRUploadAudio(asrProviderId, audioBlob); + const formData = new FormData(); + formData.append('audio', uploadAudio.blob, uploadAudio.fileName); + formData.append('providerId', asrProviderId); + formData.append('language', asrLanguage); + + // Only append non-empty values + const apiKeyValue = asrProvidersConfig[asrProviderId]?.apiKey; + if (apiKeyValue && apiKeyValue.trim()) { + formData.append('apiKey', apiKeyValue); + } + const baseUrlValue = + asrProvidersConfig[asrProviderId]?.baseUrl || + asrProvidersConfig[asrProviderId]?.customDefaultBaseUrl || + ''; + if (baseUrlValue && baseUrlValue.trim()) { + formData.append('baseUrl', baseUrlValue); + } + const response = await fetch('/api/transcription', { method: 'POST', body: formData, diff --git a/components/settings/image-settings.tsx b/components/settings/image-settings.tsx index 0931cb2b1..d0bdc619c 100644 --- a/components/settings/image-settings.tsx +++ b/components/settings/image-settings.tsx @@ -60,6 +60,7 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) { [currentConfig?.customModels], ); const isServerConfigured = !!currentConfig?.isServerConfigured; + const requiresApiKey = currentProvider?.requiresApiKey ?? true; const handleApiKeyChange = (apiKey: string) => { setImageProviderConfig(selectedProviderId, { apiKey }); @@ -179,7 +180,9 @@ export function ImageSettings({ selectedProviderId }: ImageSettingsProps) { variant="outline" size="sm" onClick={handleTest} - disabled={testLoading || (!currentConfig?.apiKey && !isServerConfigured)} + disabled={ + testLoading || (requiresApiKey && !currentConfig?.apiKey && !isServerConfigured) + } className="gap-1.5" > {testLoading ? ( diff --git a/components/settings/index.tsx b/components/settings/index.tsx index 0a2ebac41..3d9f483b4 100644 --- a/components/settings/index.tsx +++ b/components/settings/index.tsx @@ -145,6 +145,7 @@ function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => strin 'doubao-tts': t('settings.providerDoubaoTTS'), 'elevenlabs-tts': t('settings.providerElevenLabsTTS'), 'minimax-tts': t('settings.providerMiniMaxTTS'), + 'lemonade-tts': t('settings.providerLemonadeTTS'), 'browser-native-tts': t('settings.providerBrowserNativeTTS'), }; return names[providerId] || providerId; @@ -159,6 +160,7 @@ function getASRProviderName(providerId: ASRProviderId, t: (key: string) => strin 'openai-whisper': t('settings.providerOpenAIWhisper'), 'browser-native': t('settings.providerBrowserNative'), 'qwen-asr': t('settings.providerQwenASR'), + 'lemonade-asr': t('settings.providerLemonadeASR'), }; return names[providerId] || providerId; } @@ -171,6 +173,7 @@ const IMAGE_PROVIDER_NAMES: Record = { 'nano-banana': 'providerNanoBanana', 'minimax-image': 'providerMiniMaxImage', 'grok-image': 'providerGrokImage', + lemonade: 'providerLemonadeImage', }; const IMAGE_PROVIDER_ICONS: Record = { @@ -180,6 +183,7 @@ const IMAGE_PROVIDER_ICONS: Record = { 'nano-banana': '/logos/gemini.svg', 'minimax-image': '/logos/minimax.svg', 'grok-image': '/logos/grok.svg', + lemonade: '/logos/lemonade.svg', }; const VIDEO_PROVIDER_NAMES: Record = { diff --git a/components/settings/model-selector.tsx b/components/settings/model-selector.tsx index 57df7f58b..ef0ba443a 100644 --- a/components/settings/model-selector.tsx +++ b/components/settings/model-selector.tsx @@ -75,7 +75,7 @@ export function ModelSelector({ ([, config]) => (config.requiresApiKey ? config.apiKey || config.isServerConfigured - : config.isServerConfigured || config.baseUrl) && + : config.isServerConfigured || config.baseUrl || config.defaultBaseUrl) && config.models.length >= 1 && (config.baseUrl || config.defaultBaseUrl || config.serverBaseUrl), ) diff --git a/components/settings/tts-settings.tsx b/components/settings/tts-settings.tsx index 010358aa0..ac28e2e51 100644 --- a/components/settings/tts-settings.tsx +++ b/components/settings/tts-settings.tsx @@ -93,6 +93,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { const requiresApiKey = isCustom ? !!providerConfig?.requiresApiKey : !!ttsProvider?.requiresApiKey; + const isKeylessLocalProvider = !isCustom && !requiresApiKey && !!ttsProvider?.defaultBaseUrl; // When testing a non-active provider, use that provider's default voice // instead of the active provider's voice (which may be incompatible). @@ -192,6 +193,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { switch (selectedProviderId) { case 'openai-tts': case 'glm-tts': + case 'lemonade-tts': return '/audio/speech'; case 'azure-tts': return '/cognitiveservices/v1'; @@ -225,7 +227,7 @@ export function TTSSettings({ selectedProviderId }: TTSSettingsProps) { )} {/* API Key & Base URL */} - {(requiresApiKey || isServerConfigured || isCustom || isVoxCPM) && + {(requiresApiKey || isServerConfigured || isCustom || isVoxCPM || isKeylessLocalProvider) && (isVoxCPM ? (
diff --git a/lib/ai/model-metadata.ts b/lib/ai/model-metadata.ts index 425cf9b1e..3a08cc6d7 100644 --- a/lib/ai/model-metadata.ts +++ b/lib/ai/model-metadata.ts @@ -163,6 +163,12 @@ const hunyuanHy3Effort: ThinkingCapability = { defaultEnabled: false, }; +const lemonadeToggleBudget = toggleBudgetCapability( + 'lemonade', + { min: 0, max: 81920, step: 1024, disableValue: 0 }, + false, +); + const qwenBudgetEnabled = toggleBudgetCapability( 'qwen', { min: 0, max: 81920, step: 1024, disableValue: 0 }, @@ -328,13 +334,25 @@ const THINKING_CAPABILITIES: Record = { [getModelMetadataKey('xiaomi', 'mimo-v2.5-pro')]: toggleCapability('xiaomi'), [getModelMetadataKey('xiaomi', 'mimo-v2.5')]: toggleCapability('xiaomi'), + + [getModelMetadataKey('lemonade', 'Qwen3-4B-GGUF')]: lemonadeToggleBudget, + [getModelMetadataKey('lemonade', 'Qwen3.5-4B-GGUF')]: lemonadeToggleBudget, + [getModelMetadataKey('lemonade', 'gpt-oss-20b')]: lemonadeToggleBudget, + [getModelMetadataKey('lemonade', 'GPT-OSS-20B-GGUF')]: lemonadeToggleBudget, }; export function getCatalogThinkingCapability( providerId: string, modelId: string, ): ThinkingCapability | undefined { - return THINKING_CAPABILITIES[getModelMetadataKey(providerId, modelId)]; + const exact = THINKING_CAPABILITIES[getModelMetadataKey(providerId, modelId)]; + if (exact) return exact; + + if (providerId === 'lemonade') { + return lemonadeToggleBudget; + } + + return undefined; } export function applyModelMetadata(providers: Record): void { diff --git a/lib/ai/providers.ts b/lib/ai/providers.ts index 011089b30..348cd62c6 100644 --- a/lib/ai/providers.ts +++ b/lib/ai/providers.ts @@ -6,7 +6,7 @@ * - Anthropic Claude (native) * - Google Gemini (native) * - MiniMax (Anthropic-compatible, recommended by official) - * - OpenAI-compatible providers (DeepSeek, Qwen, Kimi, GLM, SiliconFlow, Doubao, Tencent, Xiaomi, etc.) + * - OpenAI-compatible providers (DeepSeek, Qwen, Kimi, GLM, SiliconFlow, Doubao, Tencent, Xiaomi, Lemonade, etc.) * * Sources: * - https://platform.openai.com/docs/models @@ -35,7 +35,7 @@ import type { ThinkingConfig, } from '@/lib/types/provider'; import { applyModelMetadata, getCatalogThinkingCapability } from './model-metadata'; -import { getThinkingMode, pickThinkingBudget } from './thinking-config'; +import { getDefaultThinkingConfig, getThinkingMode, pickThinkingBudget } from './thinking-config'; import { createLogger } from '@/lib/logger'; // NOTE: Do NOT import thinking-context.ts here — it uses node:async_hooks // which is server-only, and this file is also used on the client via @@ -980,6 +980,37 @@ export const PROVIDERS: Record = { }, ], }, + + lemonade: { + id: 'lemonade', + name: 'Lemonade', + type: 'openai', + defaultBaseUrl: 'http://localhost:13305/v1', + requiresApiKey: false, + icon: '/logos/lemonade.svg', + models: [ + { + id: 'Qwen3.5-4B-GGUF', + name: 'Qwen3.5 4B GGUF', + capabilities: { streaming: true, tools: true, vision: true }, + }, + { + id: 'Qwen3-4B-GGUF', + name: 'Qwen3 4B GGUF', + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'gpt-oss-20b', + name: 'GPT-OSS 20B', + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'Gemma-4-26B-A4B-it-GGUF', + name: 'Gemma 4 26B A4B IT GGUF', + capabilities: { streaming: true, tools: true, vision: false }, + }, + ], + }, }; applyModelMetadata(PROVIDERS); @@ -1127,6 +1158,19 @@ function getCompatThinkingBodyParams( : undefined; } + case 'lemonade': { + const chatTemplateKwargs: Record = {}; + if (mode === 'enabled') { + chatTemplateKwargs.enable_thinking = true; + } else { + chatTemplateKwargs.enable_thinking = false; + } + if (mode === 'enabled' && budget !== undefined) { + chatTemplateKwargs.thinking_budget = budget; + } + return { chat_template_kwargs: chatTemplateKwargs }; + } + default: return undefined; } @@ -1217,12 +1261,20 @@ export function getModel(config: ModelConfig): ModelWithInfo { const thinkingCtx = (globalThis as Record).__thinkingContext as | { getStore?: () => unknown } | undefined; - const thinking = thinkingCtx?.getStore?.() as ThinkingConfig | undefined; + const thinkingFromContext = thinkingCtx?.getStore?.() as ThinkingConfig | undefined; + const thinking = + thinkingFromContext ?? + (providerId === 'lemonade' + ? getDefaultThinkingConfig(getCatalogThinkingCapability(providerId, config.modelId)) + : undefined); if (thinking && init?.body && typeof init.body === 'string') { const extra = getCompatThinkingBodyParams(providerId, config.modelId, thinking); if (extra) { try { const body = JSON.parse(init.body); + if (providerId === 'lemonade' && 'stream_options' in body) { + delete body.stream_options; + } Object.assign(body, extra); init = { ...init, body: JSON.stringify(body) }; } catch { @@ -1230,7 +1282,44 @@ export function getModel(config: ModelConfig): ModelWithInfo { } } } - return globalThis.fetch(url, init); + const response = await globalThis.fetch(url, init); + + if (providerId !== 'lemonade') { + return response; + } + + const contentType = response.headers.get('content-type') || ''; + let isStreamingRequest = false; + if (init?.body && typeof init.body === 'string') { + try { + const requestBody = JSON.parse(init.body); + isStreamingRequest = requestBody?.stream === true; + } catch { + /* ignore request-body inspection failure */ + } + } + + if (isStreamingRequest) { + return response; + } + + try { + const cloned = response.clone(); + const text = await cloned.text(); + + try { + JSON.parse(text); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log.warn( + `[Lemonade] Invalid JSON response from OpenAI-compatible path: status=${response.status}, contentType=${contentType || 'n/a'}, bodyLen=${text.length}, first=${JSON.stringify(text.slice(0, 500))}, last=${JSON.stringify(text.slice(Math.max(0, text.length - 500)))}, parseError=${message}`, + ); + } + } catch (error) { + log.warn('[Lemonade] Failed to inspect JSON response body:', error); + } + + return response; }; } diff --git a/lib/audio/asr-providers.ts b/lib/audio/asr-providers.ts index 0fec13dc0..7de04e653 100644 --- a/lib/audio/asr-providers.ts +++ b/lib/audio/asr-providers.ts @@ -182,6 +182,9 @@ export async function transcribeAudio( case 'qwen-asr': return await transcribeQwenASR(config, audioBuffer); + case 'lemonade-asr': + return await transcribeLemonadeASR(config, audioBuffer); + default: if (isCustomASRProvider(config.providerId)) { return await transcribeOpenAIWhisper(config, audioBuffer); @@ -190,6 +193,101 @@ export async function transcribeAudio( } } +/** + * Lemonade ASR implementation (OpenAI-compatible multipart transcription). + * + * Lemonade currently supports WAV input and JSON response format. + */ +async function transcribeLemonadeASR( + config: ASRModelConfig, + audioBuffer: Buffer | Blob, +): Promise { + const baseUrl = (config.baseUrl || ASR_PROVIDERS['lemonade-asr'].defaultBaseUrl || '').replace( + /\/$/, + '', + ); + + const audioBlob = await toAudioBlob(audioBuffer); + if (!(await isWavAudio(audioBlob))) { + throw new Error( + 'Lemonade ASR currently supports WAV input only. Recordings should be converted to WAV before upload.', + ); + } + + const formData = new FormData(); + formData.set('file', audioBlob, 'audio.wav'); + formData.set('model', config.modelId || ASR_PROVIDERS['lemonade-asr'].defaultModelId); + formData.set('response_format', 'json'); + if (config.language && config.language !== 'auto') { + formData.set('language', config.language); + } + + const response = await fetch(`${baseUrl}/audio/transcriptions`, { + method: 'POST', + headers: getOptionalBearerAuthHeaders(config.apiKey), + body: formData, + }); + + if (!response.ok) { + const errorText = await response.text().catch(() => response.statusText); + if (errorText.includes('audio is empty') || errorText.includes('too short')) { + return { text: '' }; + } + throw new Error(`Lemonade ASR API error: ${errorText || response.statusText}`); + } + + const data = await response.json(); + return { text: typeof data.text === 'string' ? data.text : '' }; +} + +async function toAudioBlob(audioBuffer: Buffer | Blob): Promise { + if (audioBuffer instanceof Blob) { + return audioBuffer; + } + if (audioBuffer instanceof Buffer) { + const arrayBuffer = audioBuffer.buffer.slice( + audioBuffer.byteOffset, + audioBuffer.byteOffset + audioBuffer.byteLength, + ) as ArrayBuffer; + return new Blob([arrayBuffer], { type: detectWavBuffer(audioBuffer) ? 'audio/wav' : '' }); + } + throw new Error('Invalid audio buffer type'); +} + +async function isWavAudio(blob: Blob): Promise { + if (blob.type.includes('audio/wav') || blob.type.includes('audio/x-wav')) { + return true; + } + + if (blob instanceof File && /\.wav$/i.test(blob.name)) { + return true; + } + + const header = await blob.slice(0, 12).arrayBuffer(); + return detectWavBytes(new Uint8Array(header)); +} + +function detectWavBuffer(buffer: Buffer): boolean { + return ( + buffer.byteLength >= 12 && + buffer.toString('ascii', 0, 4) === 'RIFF' && + buffer.toString('ascii', 8, 12) === 'WAVE' + ); +} + +function detectWavBytes(bytes: Uint8Array): boolean { + return ( + bytes.byteLength >= 12 && + String.fromCharCode(...bytes.slice(0, 4)) === 'RIFF' && + String.fromCharCode(...bytes.slice(8, 12)) === 'WAVE' + ); +} + +function getOptionalBearerAuthHeaders(apiKey?: string): Record { + const key = apiKey?.trim(); + return key ? { Authorization: `Bearer ${key}` } : {}; +} + /** * OpenAI Whisper implementation (using Vercel AI SDK) */ diff --git a/lib/audio/constants.ts b/lib/audio/constants.ts index 1f5167b01..52966e09d 100644 --- a/lib/audio/constants.ts +++ b/lib/audio/constants.ts @@ -943,6 +943,86 @@ export const TTS_PROVIDERS: Record = { supportedFormats: ['browser'], // Browser native audio speedRange: { min: 0.1, max: 10.0, default: 1.0 }, }, + + 'lemonade-tts': { + id: 'lemonade-tts', + name: 'Lemonade TTS', + requiresApiKey: false, + defaultBaseUrl: 'http://localhost:13305/v1', + icon: '/logos/lemonade.svg', + models: [{ id: 'kokoro-v1', name: 'Kokoro v1' }], + defaultModelId: 'kokoro-v1', + voices: [ + // American English — female + { id: 'af_alloy', name: 'Alloy', language: 'en-US', gender: 'female' }, + { id: 'af_aoede', name: 'Aoede', language: 'en-US', gender: 'female' }, + { id: 'af_bella', name: 'Bella', language: 'en-US', gender: 'female' }, + { id: 'af_heart', name: 'Heart', language: 'en-US', gender: 'female' }, + { id: 'af_jessica', name: 'Jessica', language: 'en-US', gender: 'female' }, + { id: 'af_kore', name: 'Kore', language: 'en-US', gender: 'female' }, + { id: 'af_nicole', name: 'Nicole', language: 'en-US', gender: 'female' }, + { id: 'af_nova', name: 'Nova', language: 'en-US', gender: 'female' }, + { id: 'af_river', name: 'River', language: 'en-US', gender: 'female' }, + { id: 'af_sarah', name: 'Sarah', language: 'en-US', gender: 'female' }, + { id: 'af_sky', name: 'Sky', language: 'en-US', gender: 'female' }, + // American English — male + { id: 'am_adam', name: 'Adam', language: 'en-US', gender: 'male' }, + { id: 'am_echo', name: 'Echo', language: 'en-US', gender: 'male' }, + { id: 'am_eric', name: 'Eric', language: 'en-US', gender: 'male' }, + { id: 'am_fenrir', name: 'Fenrir', language: 'en-US', gender: 'male' }, + { id: 'am_liam', name: 'Liam', language: 'en-US', gender: 'male' }, + { id: 'am_michael', name: 'Michael', language: 'en-US', gender: 'male' }, + { id: 'am_onyx', name: 'Onyx', language: 'en-US', gender: 'male' }, + { id: 'am_puck', name: 'Puck', language: 'en-US', gender: 'male' }, + // British English — female + { id: 'bf_alice', name: 'Alice', language: 'en-GB', gender: 'female' }, + { id: 'bf_emma', name: 'Emma', language: 'en-GB', gender: 'female' }, + { id: 'bf_isabella', name: 'Isabella', language: 'en-GB', gender: 'female' }, + { id: 'bf_lily', name: 'Lily', language: 'en-GB', gender: 'female' }, + // British English — male + { id: 'bm_daniel', name: 'Daniel', language: 'en-GB', gender: 'male' }, + { id: 'bm_fable', name: 'Fable', language: 'en-GB', gender: 'male' }, + { id: 'bm_george', name: 'George', language: 'en-GB', gender: 'male' }, + { id: 'bm_lewis', name: 'Lewis', language: 'en-GB', gender: 'male' }, + // Mandarin Chinese — female + { id: 'zf_xiaobei', name: '晓贝', language: 'zh-CN', gender: 'female' }, + { id: 'zf_xiaoni', name: '晓妮', language: 'zh-CN', gender: 'female' }, + { id: 'zf_xiaoxiao', name: '晓晓', language: 'zh-CN', gender: 'female' }, + { id: 'zf_xiaoyi', name: '晓伊', language: 'zh-CN', gender: 'female' }, + // Mandarin Chinese — male + { id: 'zm_yunjian', name: '云健', language: 'zh-CN', gender: 'male' }, + { id: 'zm_yunxi', name: '云希', language: 'zh-CN', gender: 'male' }, + { id: 'zm_yunxia', name: '云夏', language: 'zh-CN', gender: 'male' }, + { id: 'zm_yunyang', name: '云扬', language: 'zh-CN', gender: 'male' }, + // Japanese — female + { id: 'jf_alpha', name: 'Alpha', language: 'ja-JP', gender: 'female' }, + { id: 'jf_gongitsune', name: 'Gongitsune', language: 'ja-JP', gender: 'female' }, + { id: 'jf_nezumi', name: 'Nezumi', language: 'ja-JP', gender: 'female' }, + { id: 'jf_tebukuro', name: 'Tebukuro', language: 'ja-JP', gender: 'female' }, + // Japanese — male + { id: 'jm_kumo', name: 'Kumo', language: 'ja-JP', gender: 'male' }, + // Spanish + { id: 'ef_dora', name: 'Dora', language: 'es-ES', gender: 'female' }, + { id: 'em_alex', name: 'Alex', language: 'es-ES', gender: 'male' }, + { id: 'em_santa', name: 'Santa', language: 'es-ES', gender: 'male' }, + // French + { id: 'ff_siwis', name: 'Siwis', language: 'fr-FR', gender: 'female' }, + // Hindi + { id: 'hf_alpha', name: 'Alpha', language: 'hi-IN', gender: 'female' }, + { id: 'hf_beta', name: 'Beta', language: 'hi-IN', gender: 'female' }, + { id: 'hm_omega', name: 'Omega', language: 'hi-IN', gender: 'male' }, + { id: 'hm_psi', name: 'Psi', language: 'hi-IN', gender: 'male' }, + // Italian + { id: 'if_sara', name: 'Sara', language: 'it-IT', gender: 'female' }, + { id: 'im_nicola', name: 'Nicola', language: 'it-IT', gender: 'male' }, + // Brazilian Portuguese + { id: 'pf_dora', name: 'Dora', language: 'pt-BR', gender: 'female' }, + { id: 'pm_alex', name: 'Alex', language: 'pt-BR', gender: 'male' }, + { id: 'pm_santa', name: 'Santa', language: 'pt-BR', gender: 'male' }, + ], + supportedFormats: ['wav'], + speedRange: { min: 0.25, max: 4.0, default: 1.0 }, + }, }; /** @@ -1142,6 +1222,25 @@ export const ASR_PROVIDERS: Record = { ], supportedFormats: ['webm'], // MediaRecorder format }, + + 'lemonade-asr': { + id: 'lemonade-asr', + name: 'Lemonade ASR', + requiresApiKey: false, + defaultBaseUrl: 'http://localhost:13305/v1', + icon: '/logos/lemonade.svg', + models: [ + { id: 'Whisper-Base', name: 'Whisper Base' }, + { id: 'Whisper-Large-v3', name: 'Whisper Large v3' }, + { id: 'Whisper-Large-v3-Turbo', name: 'Whisper Large v3 Turbo' }, + { id: 'Whisper-Medium', name: 'Whisper Medium' }, + { id: 'Whisper-Small', name: 'Whisper Small' }, + { id: 'Whisper-Tiny', name: 'Whisper Tiny' }, + ], + defaultModelId: 'Whisper-Base', + supportedLanguages: CUSTOM_ASR_DEFAULT_LANGUAGES, + supportedFormats: ['wav'], + }, }; /** @@ -1157,6 +1256,7 @@ export const DEFAULT_TTS_VOICES: Record = { 'doubao-tts': 'zh_female_vv_uranus_bigtts', 'elevenlabs-tts': 'EXAVITQu4vr4xnSDxMaL', 'minimax-tts': 'female-yujie', + 'lemonade-tts': 'af_heart', 'browser-native-tts': 'default', }; @@ -1169,6 +1269,7 @@ export const DEFAULT_TTS_MODELS: Record = { 'doubao-tts': '', 'elevenlabs-tts': 'eleven_multilingual_v2', 'minimax-tts': 'speech-2.8-hd', + 'lemonade-tts': 'kokoro-v1', 'browser-native-tts': '', }; diff --git a/lib/audio/tts-providers.ts b/lib/audio/tts-providers.ts index 3019e940f..788e29d19 100644 --- a/lib/audio/tts-providers.ts +++ b/lib/audio/tts-providers.ts @@ -164,6 +164,9 @@ export async function generateTTS( case 'elevenlabs-tts': return await generateElevenLabsTTS(config, text); + case 'lemonade-tts': + return await generateLemonadeTTS(config, text); + case 'browser-native-tts': throw new Error( 'Browser Native TTS must be handled client-side using Web Speech API. This provider cannot be used on the server.', @@ -215,6 +218,47 @@ async function generateOpenAITTS( }; } +/** + * Lemonade TTS implementation (OpenAI-compatible /v1/audio/speech). + */ +async function generateLemonadeTTS( + config: TTSModelConfig, + text: string, +): Promise { + const baseUrl = (config.baseUrl || TTS_PROVIDERS['lemonade-tts'].defaultBaseUrl || '').replace( + /\/$/, + '', + ); + const modelId = config.modelId || TTS_PROVIDERS['lemonade-tts'].defaultModelId; + const voice = config.voice || 'af_heart'; + + const response = await fetch(`${baseUrl}/audio/speech`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json; charset=utf-8', + ...getBackendAuthHeaders(config.apiKey), + }, + body: JSON.stringify({ + model: modelId, + input: text, + voice, + speed: config.speed || 1.0, + response_format: config.format || 'wav', + }), + }); + + if (!response.ok) { + throw new Error(`Lemonade TTS API error: ${await readTTSApiError(response)}`); + } + + const arrayBuffer = await response.arrayBuffer(); + const contentType = response.headers.get('content-type') || ''; + return { + audio: new Uint8Array(arrayBuffer), + format: getAudioResponseFormat(contentType), + }; +} + /** * VoxCPM2 TTS implementation. * diff --git a/lib/audio/types.ts b/lib/audio/types.ts index 5e4cde562..d57960692 100644 --- a/lib/audio/types.ts +++ b/lib/audio/types.ts @@ -87,6 +87,7 @@ export type BuiltInTTSProviderId = | 'doubao-tts' | 'elevenlabs-tts' | 'minimax-tts' + | 'lemonade-tts' | 'browser-native-tts'; export type TTSProviderId = BuiltInTTSProviderId | `custom-tts-${string}`; @@ -151,7 +152,11 @@ export interface TTSModelConfig { * Add new ASR providers here as union members. * Keep in sync with ASR_PROVIDERS registry in constants.ts */ -export type BuiltInASRProviderId = 'openai-whisper' | 'browser-native' | 'qwen-asr'; +export type BuiltInASRProviderId = + | 'openai-whisper' + | 'browser-native' + | 'qwen-asr' + | 'lemonade-asr'; export type ASRProviderId = BuiltInASRProviderId | `custom-asr-${string}`; diff --git a/lib/audio/voice-resolver.ts b/lib/audio/voice-resolver.ts index ca3c42932..c6051eaa1 100644 --- a/lib/audio/voice-resolver.ts +++ b/lib/audio/voice-resolver.ts @@ -130,6 +130,13 @@ export function getAvailableProvidersWithVoices( const providerConfig = ttsProvidersConfig[providerId]; const hasApiKey = providerConfig?.apiKey && providerConfig.apiKey.trim().length > 0; const isServerConfigured = providerConfig?.isServerConfigured === true; + const isKeylessLocalProvider = + !config.requiresApiKey && + !!( + providerConfig?.serverBaseUrl?.trim() || + providerConfig?.baseUrl?.trim() || + config.defaultBaseUrl + ); const isLocalVoxCPM = providerId === VOXCPM_TTS_PROVIDER_ID && !!(providerConfig?.serverBaseUrl?.trim() || providerConfig?.baseUrl?.trim()); @@ -141,7 +148,7 @@ export function getAvailableProvidersWithVoices( }) : []; - if (hasApiKey || isServerConfigured || isLocalVoxCPM) { + if (hasApiKey || isServerConfigured || isLocalVoxCPM || isKeylessLocalProvider) { const allVoices = [ ...config.voices.map((v) => ({ id: v.id, diff --git a/lib/audio/wav-utils.ts b/lib/audio/wav-utils.ts new file mode 100644 index 000000000..1a81a37f9 --- /dev/null +++ b/lib/audio/wav-utils.ts @@ -0,0 +1,84 @@ +'use client'; + +function writeAscii(view: DataView, offset: number, value: string): void { + for (let i = 0; i < value.length; i++) { + view.setUint8(offset + i, value.charCodeAt(i)); + } +} + +function audioBufferToMonoWav(audioBuffer: AudioBuffer): ArrayBuffer { + const sampleRate = audioBuffer.sampleRate; + const sampleCount = audioBuffer.length; + const dataSize = sampleCount * 2; + const buffer = new ArrayBuffer(44 + dataSize); + const view = new DataView(buffer); + + writeAscii(view, 0, 'RIFF'); + view.setUint32(4, 36 + dataSize, true); + writeAscii(view, 8, 'WAVE'); + writeAscii(view, 12, 'fmt '); + view.setUint32(16, 16, true); + view.setUint16(20, 1, true); + view.setUint16(22, 1, true); + view.setUint32(24, sampleRate, true); + view.setUint32(28, sampleRate * 2, true); + view.setUint16(32, 2, true); + view.setUint16(34, 16, true); + writeAscii(view, 36, 'data'); + view.setUint32(40, dataSize, true); + + const channels = Array.from({ length: audioBuffer.numberOfChannels }, (_, index) => + audioBuffer.getChannelData(index), + ); + let offset = 44; + for (let i = 0; i < sampleCount; i++) { + let mixed = 0; + for (const channel of channels) mixed += channel[i]; + const sample = Math.max(-1, Math.min(1, mixed / channels.length)); + view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7fff, true); + offset += 2; + } + + return buffer; +} + +export function isWavBlob(blob: Blob, fileName?: string): boolean { + return ( + blob.type.includes('audio/wav') || + blob.type.includes('audio/x-wav') || + /\.wav$/i.test(fileName || '') + ); +} + +export async function audioBlobToWav(blob: Blob): Promise { + if (isWavBlob(blob)) return blob; + if (typeof window === 'undefined') { + throw new Error('Audio conversion requires a browser environment'); + } + + const AudioContextConstructor = + window.AudioContext || + (window as typeof window & { webkitAudioContext?: typeof AudioContext }).webkitAudioContext; + if (!AudioContextConstructor) { + throw new Error('This browser does not support audio conversion'); + } + + const audioContext = new AudioContextConstructor(); + try { + const arrayBuffer = await blob.arrayBuffer(); + const audioBuffer = await audioContext.decodeAudioData(arrayBuffer.slice(0)); + return new Blob([audioBufferToMonoWav(audioBuffer)], { type: 'audio/wav' }); + } finally { + await audioContext.close().catch(() => undefined); + } +} + +export async function normalizeASRUploadAudio( + providerId: string, + audioBlob: Blob, +): Promise<{ blob: Blob; fileName: string }> { + if (providerId !== 'lemonade-asr') { + return { blob: audioBlob, fileName: 'recording.webm' }; + } + return { blob: await audioBlobToWav(audioBlob), fileName: 'recording.wav' }; +} diff --git a/lib/generation/json-repair.ts b/lib/generation/json-repair.ts index 89f7fa0b4..a754c00fe 100644 --- a/lib/generation/json-repair.ts +++ b/lib/generation/json-repair.ts @@ -6,6 +6,32 @@ import { jsonrepair } from 'jsonrepair'; import { createLogger } from '@/lib/logger'; const log = createLogger('Generation'); +function repairQuotedPropertyFragments(jsonStr: string): string { + return jsonStr.replace( + /([,{]\s*)"([A-Za-z_][A-Za-z0-9_]*)\s*:\s*(true|false|null|[+-]?\d+(?:\.\d+)?)"(?=\s*[,}])/g, + (_match, prefix, key, value) => `${prefix}"${key}": ${value}`, + ); +} + +function logJsonParseError(stage: string, jsonStr: string, error: unknown): void { + const message = error instanceof Error ? error.message : String(error); + const positionMatch = message.match(/position\s+(\d+)/i); + const position = positionMatch ? Number(positionMatch[1]) : undefined; + + if (typeof position === 'number' && Number.isFinite(position)) { + const start = Math.max(0, position - 120); + const end = Math.min(jsonStr.length, position + 120); + log.warn( + `${stage} parse error at position ${position}: ${message}. Context: ${jsonStr + .slice(start, end) + .replace(/\n/g, '\\n')}`, + ); + return; + } + + log.warn(`${stage} parse error: ${message}`); +} + export function parseJsonResponse(response: string): T | null { // Strategy 1: Try to extract JSON from markdown code blocks (may have multiple) const codeBlockMatches = response.matchAll(/```(?:json)?\s*([\s\S]*?)```/g); @@ -90,6 +116,10 @@ export function parseJsonResponse(response: string): T | null { log.error('Failed to parse JSON from response'); log.error('Raw response (first 500 chars):', response.substring(0, 500)); + log.error( + 'Raw response (last 500 chars):', + response.substring(Math.max(0, response.length - 500)), + ); return null; } @@ -101,7 +131,8 @@ export function tryParseJson(jsonStr: string): T | null { // Attempt 1: Try parsing as-is try { return JSON.parse(jsonStr) as T; - } catch { + } catch (error) { + logJsonParseError('Attempt 1', jsonStr, error); // Continue to fix attempts } @@ -109,6 +140,13 @@ export function tryParseJson(jsonStr: string): T | null { try { let fixed = jsonStr; + // Fix 0: Recover malformed property fragments that were accidentally + // emitted as standalone strings inside an object, such as: + // `"height: 76"` -> `"height": 76` + // `"fixedRatio: false"` -> `"fixedRatio": false` + // The object-context prefix/suffix guards keep valid JSON strings intact. + fixed = repairQuotedPropertyFragments(fixed); + // Fix 1: Handle LaTeX-style escapes that break JSON (e.g., \frac, \left, \right, \times, etc.) // These are common in math content and need to be double-escaped // Match backslash followed by letters (LaTeX commands) inside strings, @@ -152,7 +190,8 @@ export function tryParseJson(jsonStr: string): T | null { } return JSON.parse(fixed) as T; - } catch { + } catch (error) { + logJsonParseError('Attempt 2', jsonStr, error); // Continue to next attempt } @@ -160,7 +199,8 @@ export function tryParseJson(jsonStr: string): T | null { try { const repaired = jsonrepair(jsonStr); return JSON.parse(repaired) as T; - } catch { + } catch (error) { + logJsonParseError('Attempt 3', jsonStr, error); // Continue to next attempt } @@ -183,7 +223,8 @@ export function tryParseJson(jsonStr: string): T | null { }); return JSON.parse(fixed) as T; - } catch { + } catch (error) { + logJsonParseError('Attempt 4', jsonStr, error); return null; } } diff --git a/lib/hooks/use-audio-recorder.ts b/lib/hooks/use-audio-recorder.ts index 327985bb2..a19286e34 100644 --- a/lib/hooks/use-audio-recorder.ts +++ b/lib/hooks/use-audio-recorder.ts @@ -1,5 +1,6 @@ import { useState, useRef, useCallback } from 'react'; import { ASR_PROVIDERS } from '@/lib/audio/constants'; +import { normalizeASRUploadAudio } from '@/lib/audio/wav-utils'; import { createLogger } from '@/lib/logger'; const log = createLogger('AudioRecorder'); @@ -41,13 +42,14 @@ export function useAudioRecorder(options: UseAudioRecorderOptions = {}) { try { const formData = new FormData(); - formData.append('audio', audioBlob, 'recording.webm'); // Get current ASR configuration from settings store // Note: This requires importing useSettingsStore in browser context if (typeof window !== 'undefined') { const { useSettingsStore } = await import('@/lib/store/settings'); const { asrProviderId, asrLanguage, asrProvidersConfig } = useSettingsStore.getState(); + const uploadAudio = await normalizeASRUploadAudio(asrProviderId, audioBlob); + formData.append('audio', uploadAudio.blob, uploadAudio.fileName); formData.append('providerId', asrProviderId); formData.append( @@ -68,6 +70,8 @@ export function useAudioRecorder(options: UseAudioRecorderOptions = {}) { if (effectiveBaseUrl) { formData.append('baseUrl', effectiveBaseUrl); } + } else { + formData.append('audio', audioBlob, 'recording.webm'); } const response = await fetch('/api/transcription', { diff --git a/lib/i18n/locales/ar-SA.json b/lib/i18n/locales/ar-SA.json index 5c178050c..92c3aacc5 100644 --- a/lib/i18n/locales/ar-SA.json +++ b/lib/i18n/locales/ar-SA.json @@ -454,6 +454,7 @@ "grok": "Grok", "tencent-hunyuan": "Tencent Hunyuan", "xiaomi": "Xiaomi MiMo", + "lemonade": "Lemonade (محلي)", "ollama": "Ollama (محلي)", "tavily": "Tavily", "bocha": "Bocha" @@ -603,6 +604,7 @@ "providerDoubaoTTS": "Doubao TTS 2.0 (فولكينجين)", "providerElevenLabsTTS": "ElevenLabs TTS", "providerMiniMaxTTS": "MiniMax TTS", + "providerLemonadeTTS": "Lemonade TTS (محلي)", "providerBrowserNativeTTS": "تحويل النص إلى كلام المدمج في المتصفح", "voxcpmBackend": "الخلفية", "voxcpmBaseUrlPending": "أدخل Base URL لإنشاء عنوان الطلب", @@ -649,6 +651,7 @@ "providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)", "providerBrowserNative": "التعرّف على الكلام المدمج في المتصفح", "providerQwenASR": "Qwen ASR (سحابة علي بابا بايليان)", + "providerLemonadeASR": "Lemonade ASR (محلي)", "providerUnpdf": "unpdf (مُدمج)", "providerMinerU": "MinerU", "providerMinerUCloud": "MinerU (السحابي)", @@ -900,6 +903,7 @@ "providerNanoBanana": "Nano Banana (Gemini)", "providerMiniMaxImage": "MiniMax Image", "providerGrokImage": "Grok Image (xAI)", + "providerLemonadeImage": "Lemonade Image (محلي)", "testImageGeneration": "اختبار توليد الصور", "testImageConnectivity": "اختبار الاتصال", "imageConnectivitySuccess": "تم الاتصال بخدمة الصور بنجاح", diff --git a/lib/i18n/locales/en-US.json b/lib/i18n/locales/en-US.json index a36b0f15c..19b7282f2 100644 --- a/lib/i18n/locales/en-US.json +++ b/lib/i18n/locales/en-US.json @@ -454,6 +454,7 @@ "grok": "Grok", "tencent-hunyuan": "Tencent Hunyuan", "xiaomi": "Xiaomi MiMo", + "lemonade": "Lemonade (Local)", "ollama": "Ollama (Local)", "tavily": "Tavily", "bocha": "Bocha" @@ -603,6 +604,7 @@ "providerDoubaoTTS": "Doubao TTS 2.0 (Volcengine)", "providerElevenLabsTTS": "ElevenLabs TTS", "providerMiniMaxTTS": "MiniMax TTS", + "providerLemonadeTTS": "Lemonade TTS (Local)", "providerBrowserNativeTTS": "Browser Native TTS", "voxcpmBackend": "Backend", "voxcpmBaseUrlPending": "Enter a Base URL to generate the request URL", @@ -649,6 +651,7 @@ "providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)", "providerBrowserNative": "Browser Native ASR", "providerQwenASR": "Qwen ASR (Alibaba Cloud Bailian)", + "providerLemonadeASR": "Lemonade ASR (Local)", "providerUnpdf": "unpdf (Built-in)", "providerMinerU": "MinerU", "providerMinerUCloud": "MinerU (Cloud)", @@ -900,6 +903,7 @@ "providerNanoBanana": "Nano Banana (Gemini)", "providerMiniMaxImage": "MiniMax Image", "providerGrokImage": "Grok Image (xAI)", + "providerLemonadeImage": "Lemonade Image (Local)", "testImageGeneration": "Test Image Generation", "testImageConnectivity": "Test Connection", "imageConnectivitySuccess": "Image service connected successfully", diff --git a/lib/i18n/locales/ja-JP.json b/lib/i18n/locales/ja-JP.json index d59022bb1..bd26eac12 100644 --- a/lib/i18n/locales/ja-JP.json +++ b/lib/i18n/locales/ja-JP.json @@ -454,6 +454,7 @@ "grok": "Grok", "tencent-hunyuan": "Tencent Hunyuan", "xiaomi": "Xiaomi MiMo", + "lemonade": "Lemonade(ローカル)", "ollama": "Ollama(ローカルモデル)", "tavily": "Tavily", "bocha": "Bocha" @@ -603,6 +604,7 @@ "providerDoubaoTTS": "Doubao TTS 2.0(火山エンジン)", "providerElevenLabsTTS": "ElevenLabs TTS", "providerMiniMaxTTS": "MiniMax TTS", + "providerLemonadeTTS": "Lemonade TTS(ローカル)", "providerBrowserNativeTTS": "ブラウザネイティブTTS", "voxcpmBackend": "バックエンド", "voxcpmBaseUrlPending": "Base URL を入力するとリクエスト URL が生成されます", @@ -649,6 +651,7 @@ "providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)", "providerBrowserNative": "ブラウザネイティブASR", "providerQwenASR": "Qwen ASR(Alibaba Cloud百錬)", + "providerLemonadeASR": "Lemonade ASR(ローカル)", "providerUnpdf": "unpdf(組み込み)", "providerMinerU": "MinerU", "providerMinerUCloud": "MinerU(クラウド)", @@ -900,6 +903,7 @@ "providerNanoBanana": "Nano Banana(Gemini)", "providerMiniMaxImage": "MiniMax Image", "providerGrokImage": "Grok Image(xAI)", + "providerLemonadeImage": "Lemonade Image(ローカル)", "testImageGeneration": "画像生成をテスト", "testImageConnectivity": "接続テスト", "imageConnectivitySuccess": "画像サービスへの接続に成功しました", diff --git a/lib/i18n/locales/ru-RU.json b/lib/i18n/locales/ru-RU.json index c4be26124..fb5eca058 100644 --- a/lib/i18n/locales/ru-RU.json +++ b/lib/i18n/locales/ru-RU.json @@ -454,6 +454,7 @@ "grok": "Grok", "tencent-hunyuan": "Tencent Hunyuan", "xiaomi": "Xiaomi MiMo", + "lemonade": "Lemonade (Локальный)", "ollama": "Ollama (Локальный)", "tavily": "Tavily", "bocha": "Bocha" @@ -603,6 +604,7 @@ "providerDoubaoTTS": "Doubao TTS 2.0 (Volcengine)", "providerElevenLabsTTS": "ElevenLabs TTS", "providerMiniMaxTTS": "MiniMax TTS", + "providerLemonadeTTS": "Lemonade TTS (Локальный)", "providerBrowserNativeTTS": "Встроенный TTS браузера", "voxcpmBackend": "Бэкенд", "voxcpmBaseUrlPending": "Введите Base URL, чтобы сформировать URL запроса", @@ -649,6 +651,7 @@ "providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)", "providerBrowserNative": "Встроенный ASR браузера", "providerQwenASR": "Qwen ASR (Alibaba Cloud Bailian)", + "providerLemonadeASR": "Lemonade ASR (Локальный)", "providerUnpdf": "unpdf (встроенный)", "providerMinerU": "MinerU", "providerMinerUCloud": "MinerU (Облако)", @@ -900,6 +903,7 @@ "providerNanoBanana": "Nano Banana (Gemini)", "providerMiniMaxImage": "MiniMax Image", "providerGrokImage": "Grok Image (xAI)", + "providerLemonadeImage": "Lemonade Image (Локальный)", "testImageGeneration": "Тест генерации изображений", "testImageConnectivity": "Тест подключения", "imageConnectivitySuccess": "Подключение к сервису изображений успешно", diff --git a/lib/i18n/locales/zh-CN.json b/lib/i18n/locales/zh-CN.json index 79892a759..84471f846 100644 --- a/lib/i18n/locales/zh-CN.json +++ b/lib/i18n/locales/zh-CN.json @@ -454,6 +454,7 @@ "grok": "Grok", "tencent-hunyuan": "腾讯混元", "xiaomi": "小米 MiMo", + "lemonade": "Lemonade(本地)", "ollama": "Ollama(本地模型)", "tavily": "Tavily", "bocha": "博查" @@ -603,6 +604,7 @@ "providerDoubaoTTS": "豆包 TTS 2.0(火山引擎)", "providerElevenLabsTTS": "ElevenLabs TTS", "providerMiniMaxTTS": "MiniMax TTS", + "providerLemonadeTTS": "Lemonade TTS(本地)", "providerBrowserNativeTTS": "浏览器原生 TTS", "voxcpmBackend": "Backend", "voxcpmBaseUrlPending": "填写 Base URL 后生成", @@ -649,6 +651,7 @@ "providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)", "providerBrowserNative": "浏览器原生 ASR", "providerQwenASR": "Qwen ASR(阿里云百炼)", + "providerLemonadeASR": "Lemonade ASR(本地)", "providerUnpdf": "unpdf(内置)", "providerMinerU": "MinerU", "providerMinerUCloud": "MinerU(云端)", @@ -900,6 +903,7 @@ "providerNanoBanana": "Nano Banana(Gemini)", "providerMiniMaxImage": "MiniMax 图像", "providerGrokImage": "Grok Image(xAI)", + "providerLemonadeImage": "Lemonade 图像(本地)", "testImageGeneration": "测试图像生成", "testImageConnectivity": "测试连接", "imageConnectivitySuccess": "图像服务连接成功", diff --git a/lib/i18n/locales/zh-TW.json b/lib/i18n/locales/zh-TW.json index 9ca932ad3..d7b263e53 100644 --- a/lib/i18n/locales/zh-TW.json +++ b/lib/i18n/locales/zh-TW.json @@ -440,6 +440,7 @@ "openrouter": "OpenRouter", "tencent-hunyuan": "騰訊混元", "xiaomi": "小米 MiMo", + "lemonade": "Lemonade(本機)", "tavily": "Tavily", "bocha": "Bocha" }, @@ -583,9 +584,11 @@ "providerDoubaoTTS": "豆包 TTS 2.0(火山引擎)", "providerElevenLabsTTS": "ElevenLabs TTS", "providerMiniMaxTTS": "MiniMax TTS", + "providerLemonadeTTS": "Lemonade TTS(本機)", "providerBrowserNativeTTS": "瀏覽器原生 TTS", "providerOpenAIWhisper": "OpenAI ASR (gpt-4o-mini-transcribe)", "providerBrowserNative": "瀏覽器原生 ASR", + "providerLemonadeASR": "Lemonade ASR(本機)", "providerQwenASR": "Qwen ASR(阿里雲百煉)", "providerUnpdf": "unpdf(內建)", "providerMinerU": "MinerU", @@ -814,6 +817,7 @@ "providerQwenImage": "Qwen Image(阿里通義)", "providerNanoBanana": "Nano Banana(Gemini)", "providerMiniMaxImage": "MiniMax 圖像", + "providerLemonadeImage": "Lemonade 圖像(本機)", "providerGrokImage": "Grok Image(xAI)", "testImageGeneration": "測試圖像生成", "testImageConnectivity": "測試連線", diff --git a/lib/media/adapters/lemonade-image-adapter.ts b/lib/media/adapters/lemonade-image-adapter.ts new file mode 100644 index 000000000..82d9ed83b --- /dev/null +++ b/lib/media/adapters/lemonade-image-adapter.ts @@ -0,0 +1,90 @@ +/** + * Lemonade Image Generation Adapter + * + * Lemonade exposes OpenAI-compatible image generation at /v1/images/generations. + */ + +import type { + ImageGenerationConfig, + ImageGenerationOptions, + ImageGenerationResult, +} from '../types'; + +const DEFAULT_MODEL = 'Qwen-Image-GGUF'; +const DEFAULT_BASE_URL = 'http://localhost:13305/v1'; + +function normalizeBaseUrl(baseUrl?: string): string { + return (baseUrl || DEFAULT_BASE_URL).replace(/\/$/, ''); +} + +function authHeaders(apiKey?: string): Record { + const key = apiKey?.trim(); + return key ? { Authorization: `Bearer ${key}` } : {}; +} + +function resolveSize(options: ImageGenerationOptions): string { + return `${options.width || 1024}x${options.height || 1024}`; +} + +export async function testLemonadeImageConnectivity( + config: ImageGenerationConfig, +): Promise<{ success: boolean; message: string }> { + const baseUrl = normalizeBaseUrl(config.baseUrl); + + try { + const response = await fetch(`${baseUrl}/models`, { + headers: authHeaders(config.apiKey), + }); + + if (response.ok) { + return { success: true, message: 'Connected to Lemonade image generation' }; + } + + const text = await response.text().catch(() => response.statusText); + return { success: false, message: `Lemonade API error (${response.status}): ${text}` }; + } catch (err) { + return { success: false, message: `Lemonade connectivity error: ${err}` }; + } +} + +export async function generateWithLemonadeImage( + config: ImageGenerationConfig, + options: ImageGenerationOptions, +): Promise { + const baseUrl = normalizeBaseUrl(config.baseUrl); + const width = options.width || 1024; + const height = options.height || 1024; + + const response = await fetch(`${baseUrl}/images/generations`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...authHeaders(config.apiKey), + }, + body: JSON.stringify({ + model: config.model || DEFAULT_MODEL, + prompt: options.prompt, + n: 1, + size: resolveSize(options), + response_format: 'b64_json', + }), + }); + + if (!response.ok) { + const text = await response.text().catch(() => response.statusText); + throw new Error(`Lemonade image generation failed (${response.status}): ${text}`); + } + + const data = await response.json(); + const imageData = data.data?.[0]; + if (!imageData?.url && !imageData?.b64_json) { + throw new Error('Lemonade returned empty image response'); + } + + return { + url: imageData.url, + base64: imageData.b64_json, + width, + height, + }; +} diff --git a/lib/media/image-providers.ts b/lib/media/image-providers.ts index f2102df27..6a8ea817f 100644 --- a/lib/media/image-providers.ts +++ b/lib/media/image-providers.ts @@ -21,6 +21,10 @@ import { testMiniMaxImageConnectivity, } from './adapters/minimax-image-adapter'; import { generateWithGrokImage, testGrokImageConnectivity } from './adapters/grok-image-adapter'; +import { + generateWithLemonadeImage, + testLemonadeImageConnectivity, +} from './adapters/lemonade-image-adapter'; export const IMAGE_PROVIDERS: Record = { seedream: { @@ -116,6 +120,19 @@ export const IMAGE_PROVIDERS: Record = { ], supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'], }, + lemonade: { + id: 'lemonade', + name: 'Lemonade', + requiresApiKey: false, + defaultBaseUrl: 'http://localhost:13305/v1', + icon: '/logos/lemonade.svg', + models: [ + { id: 'Qwen-Image-GGUF', name: 'Qwen Image GGUF' }, + { id: 'sd-cpp', name: 'Stable Diffusion (sd-cpp)' }, + ], + supportedAspectRatios: ['16:9', '4:3', '1:1', '9:16'], + maxResolution: { width: 1024, height: 1024 }, + }, }; export async function testImageConnectivity( @@ -134,6 +151,8 @@ export async function testImageConnectivity( return testMiniMaxImageConnectivity(config); case 'grok-image': return testGrokImageConnectivity(config); + case 'lemonade': + return testLemonadeImageConnectivity(config); default: return { success: false, @@ -159,6 +178,8 @@ export async function generateImage( return generateWithMiniMaxImage(config, options); case 'grok-image': return generateWithGrokImage(config, options); + case 'lemonade': + return generateWithLemonadeImage(config, options); default: throw new Error(`Unsupported image provider: ${config.providerId}`); } diff --git a/lib/media/types.ts b/lib/media/types.ts index 8a2f8e5f2..47c80ac93 100644 --- a/lib/media/types.ts +++ b/lib/media/types.ts @@ -76,7 +76,8 @@ export type ImageProviderId = | 'qwen-image' | 'nano-banana' | 'minimax-image' - | 'grok-image'; + | 'grok-image' + | 'lemonade'; // Add new image providers below (uncomment and modify): // | 'dall-e' // | 'midjourney' diff --git a/lib/server/classroom-media-generation.ts b/lib/server/classroom-media-generation.ts index bd307d150..7848c9af2 100644 --- a/lib/server/classroom-media-generation.ts +++ b/lib/server/classroom-media-generation.ts @@ -95,11 +95,11 @@ export async function generateMediaForClassroom( try { const providerId = imageProviderIds[0] as ImageProviderId; const apiKey = resolveImageApiKey(providerId); - if (!apiKey) { + const providerConfig = IMAGE_PROVIDERS[providerId]; + if (providerConfig?.requiresApiKey && !apiKey) { log.warn(`No API key for image provider "${providerId}", skipping ${req.elementId}`); continue; } - const providerConfig = IMAGE_PROVIDERS[providerId]; const model = providerConfig?.models?.[0]?.id; const result = await generateImage( @@ -221,16 +221,14 @@ export async function generateTTSForClassroom( const providerId = ttsProviderIds[0] as TTSProviderId; const apiKey = resolveTTSApiKey(providerId); - if (!apiKey) { + const ttsProvider = TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]; + if (ttsProvider?.requiresApiKey && !apiKey) { log.warn(`No API key for TTS provider "${providerId}", skipping TTS generation`); return; } - const ttsBaseUrl = - resolveTTSBaseUrl(providerId) || - TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.defaultBaseUrl; + const ttsBaseUrl = resolveTTSBaseUrl(providerId) || ttsProvider?.defaultBaseUrl; const voice = DEFAULT_TTS_VOICES[providerId as keyof typeof DEFAULT_TTS_VOICES] || 'default'; - const format = - TTS_PROVIDERS[providerId as keyof typeof TTS_PROVIDERS]?.supportedFormats?.[0] || 'mp3'; + const format = ttsProvider?.supportedFormats?.[0] || 'mp3'; if (providerId === VOXCPM_TTS_PROVIDER_ID && voice === VOXCPM_AUTO_VOICE_ID) { log.warn('VoxCPM Auto Voice requires agent context; skipping server-side TTS generation'); return; diff --git a/lib/server/provider-config.ts b/lib/server/provider-config.ts index ff1129909..81f758233 100644 --- a/lib/server/provider-config.ts +++ b/lib/server/provider-config.ts @@ -55,6 +55,7 @@ const LLM_ENV_MAP: Record = { XIAOMI: 'xiaomi', MIMO: 'xiaomi', OLLAMA: 'ollama', + LEMONADE: 'lemonade', }; const TTS_ENV_MAP: Record = { @@ -66,11 +67,13 @@ const TTS_ENV_MAP: Record = { TTS_DOUBAO: 'doubao-tts', TTS_ELEVENLABS: 'elevenlabs-tts', TTS_MINIMAX: 'minimax-tts', + TTS_LEMONADE: 'lemonade-tts', }; const ASR_ENV_MAP: Record = { ASR_OPENAI: 'openai-whisper', ASR_QWEN: 'qwen-asr', + ASR_LEMONADE: 'lemonade-asr', }; const PDF_ENV_MAP: Record = { @@ -86,6 +89,7 @@ const IMAGE_ENV_MAP: Record = { IMAGE_NANO_BANANA: 'nano-banana', IMAGE_MINIMAX: 'minimax-image', IMAGE_GROK: 'grok-image', + IMAGE_LEMONADE: 'lemonade', }; const VIDEO_ENV_MAP: Record = { @@ -231,18 +235,22 @@ function applyOpenAIImageFallback( function buildConfig(yamlData: YamlData): ServerConfig { const image = applyOpenAIImageFallback( - loadEnvSection(IMAGE_ENV_MAP, yamlData.image), + loadEnvSection(IMAGE_ENV_MAP, yamlData.image, { + keylessProviders: new Set(['lemonade']), + }), yamlData.image, ); return { providers: loadEnvSection(LLM_ENV_MAP, yamlData.providers, { - keylessProviders: new Set(['ollama']), + keylessProviders: new Set(['ollama', 'lemonade']), }), tts: loadEnvSection(TTS_ENV_MAP, yamlData.tts, { - keylessProviders: new Set(['voxcpm-tts']), + keylessProviders: new Set(['voxcpm-tts', 'lemonade-tts']), + }), + asr: loadEnvSection(ASR_ENV_MAP, yamlData.asr, { + keylessProviders: new Set(['lemonade-asr']), }), - asr: loadEnvSection(ASR_ENV_MAP, yamlData.asr), pdf: loadEnvSection(PDF_ENV_MAP, yamlData.pdf, { requiresBaseUrl: true }), image, video: loadEnvSection(VIDEO_ENV_MAP, yamlData.video), diff --git a/lib/server/resolve-model.ts b/lib/server/resolve-model.ts index 8887ae43b..8553176be 100644 --- a/lib/server/resolve-model.ts +++ b/lib/server/resolve-model.ts @@ -16,8 +16,12 @@ export interface ResolvedModel extends ModelWithInfo { modelString: string; /** Resolved provider ID (e.g. "openai", "ollama") */ providerId: string; + /** Resolved model ID (e.g. "gpt-4o-mini") */ + modelId: string; /** Effective API key after server-side fallback resolution */ apiKey: string; + /** Effective base URL after server/client resolution */ + baseUrl?: string; /** Optional per-request thinking configuration from the client. */ thinkingConfig?: ThinkingConfig; } @@ -67,7 +71,9 @@ export async function resolveModel(params: { modelInfo, modelString, providerId, + modelId, apiKey, + baseUrl, thinkingConfig: params.thinkingConfig, }; } diff --git a/lib/store/settings.ts b/lib/store/settings.ts index e8e82996a..d810c4498 100644 --- a/lib/store/settings.ts +++ b/lib/store/settings.ts @@ -356,6 +356,12 @@ const getDefaultAudioConfig = () => ({ 'doubao-tts': { apiKey: '', baseUrl: '', enabled: false }, 'elevenlabs-tts': { apiKey: '', baseUrl: '', enabled: false }, 'minimax-tts': { apiKey: '', baseUrl: '', modelId: 'speech-2.8-hd', enabled: false }, + 'lemonade-tts': { + apiKey: '', + baseUrl: '', + modelId: 'kokoro-v1', + enabled: false, + }, 'browser-native-tts': { apiKey: '', baseUrl: '', enabled: true }, } as Record< TTSProviderId, @@ -365,6 +371,7 @@ const getDefaultAudioConfig = () => ({ 'openai-whisper': { apiKey: '', baseUrl: '', enabled: true }, 'browser-native': { apiKey: '', baseUrl: '', enabled: true }, 'qwen-asr': { apiKey: '', baseUrl: '', enabled: false }, + 'lemonade-asr': { apiKey: '', baseUrl: '', enabled: false }, } as Record, }); @@ -389,6 +396,7 @@ const getDefaultImageConfig = () => ({ 'nano-banana': { apiKey: '', baseUrl: '', enabled: false }, 'minimax-image': { apiKey: '', baseUrl: '', enabled: false }, 'grok-image': { apiKey: '', baseUrl: '', enabled: false }, + lemonade: { apiKey: '', baseUrl: '', enabled: false }, } as Record, }); @@ -867,7 +875,14 @@ export const useSettingsStore = create()( })), // Image Generation actions - setImageProvider: (providerId) => set({ imageProviderId: providerId }), + setImageProvider: (providerId) => + set(() => { + const models = IMAGE_PROVIDERS[providerId]?.models || []; + return { + imageProviderId: providerId, + imageModelId: models[0]?.id || '', + }; + }), setImageModelId: (modelId) => set({ imageModelId: modelId }), setImageProviderConfig: (providerId, config) => diff --git a/lib/types/provider.ts b/lib/types/provider.ts index 9437e2998..bbbe58b6b 100644 --- a/lib/types/provider.ts +++ b/lib/types/provider.ts @@ -20,6 +20,7 @@ export type BuiltInProviderId = | 'grok' | 'tencent-hunyuan' | 'xiaomi' + | 'lemonade' | 'ollama'; /** @@ -59,7 +60,8 @@ export type ThinkingRequestAdapter = | 'doubao' | 'openrouter' | 'hunyuan' - | 'xiaomi'; + | 'xiaomi' + | 'lemonade'; /** * Describes a model's thinking/reasoning API control capability. diff --git a/public/logos/lemonade.svg b/public/logos/lemonade.svg new file mode 100644 index 000000000..5b51d7a65 --- /dev/null +++ b/public/logos/lemonade.svg @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/ai/openai-provider.test.ts b/tests/ai/openai-provider.test.ts index 949d8c074..def4781f6 100644 --- a/tests/ai/openai-provider.test.ts +++ b/tests/ai/openai-provider.test.ts @@ -16,7 +16,7 @@ import type { ProviderId } from '@/lib/types/provider'; async function captureInjectedRequestBody( providerId: ProviderId, modelId: string, - thinkingConfig: Record, + thinkingConfig?: Record, ) { const originalFetch = globalThis.fetch; const globalRecord = globalThis as Record; @@ -145,6 +145,12 @@ describe('OpenAI provider defaults', () => { { mode: 'enabled', effort: 'high' }, { chat_template_kwargs: { reasoning_effort: 'high' } }, ], + [ + 'lemonade', + 'Qwen3.5-4B-GGUF', + { mode: 'enabled', budgetTokens: 4096 }, + { chat_template_kwargs: { enable_thinking: true, thinking_budget: 4096 } }, + ], ] as const)( 'injects %s thinking params into the OpenAI-compatible request body', async (providerId, modelId, thinkingConfig, expected) => { @@ -152,4 +158,81 @@ describe('OpenAI provider defaults', () => { expect(body).toMatchObject(expected); }, ); + + it('disables Lemonade thinking by default for recognized local reasoning models', async () => { + const body = await captureInjectedRequestBody('lemonade', 'Qwen3.5-4B-GGUF'); + + expect(body).toMatchObject({ + chat_template_kwargs: { enable_thinking: false }, + }); + }); + + it('recognizes manually added Lemonade reasoning model IDs', async () => { + const body = await captureInjectedRequestBody('lemonade', 'custom-gpt-oss-20b-q4'); + + expect(body).toMatchObject({ + chat_template_kwargs: { enable_thinking: false }, + }); + }); + + it('disables Lemonade thinking by default for non-catalog local models too', async () => { + const body = await captureInjectedRequestBody('lemonade', 'Gemma-4-26B-A4B-it-GGUF'); + + expect(body).toMatchObject({ + chat_template_kwargs: { enable_thinking: false }, + }); + }); + + it('strips unsupported Lemonade stream_options while preserving thinking overrides', async () => { + const originalFetch = globalThis.fetch; + const globalRecord = globalThis as Record; + const originalThinkingContext = globalRecord.__thinkingContext; + const fetchMock = vi.fn(async (_url: RequestInfo | URL, _init?: RequestInit) => { + return new Response(JSON.stringify({ ok: true }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + }); + + try { + globalThis.fetch = fetchMock as typeof fetch; + globalRecord.__thinkingContext = { + getStore: () => ({ mode: 'disabled' }), + }; + + getModel({ + providerId: 'lemonade', + modelId: 'Gemma-4-26B-A4B-it-GGUF', + apiKey: '', + }); + + const lastCall = openAiMock.createOpenAI.mock.calls.at(-1); + const options = lastCall?.[0] as { fetch?: typeof fetch } | undefined; + + await options?.fetch?.('https://example.test/v1/chat/completions', { + method: 'POST', + body: JSON.stringify({ + model: 'Gemma-4-26B-A4B-it-GGUF', + messages: [{ role: 'user', content: 'hi' }], + stream: true, + stream_options: { include_usage: true }, + }), + }); + + const init = fetchMock.mock.calls[0]?.[1] as RequestInit; + const body = JSON.parse(init.body as string); + + expect(body.stream_options).toBeUndefined(); + expect(body).toMatchObject({ + chat_template_kwargs: { enable_thinking: false }, + }); + } finally { + globalThis.fetch = originalFetch; + if (originalThinkingContext === undefined) { + delete globalRecord.__thinkingContext; + } else { + globalRecord.__thinkingContext = originalThinkingContext; + } + } + }); }); diff --git a/tests/ai/thinking-config.test.ts b/tests/ai/thinking-config.test.ts index 283d41a07..9dd980485 100644 --- a/tests/ai/thinking-config.test.ts +++ b/tests/ai/thinking-config.test.ts @@ -152,6 +152,21 @@ describe('thinking config normalization', () => { expect(thinking?.effortValues).toEqual(['none', 'low', 'high']); }); + it('normalizes Lemonade reasoning models as disabled-by-default token budgets', () => { + const thinking = getThinking('lemonade', 'Qwen3.5-4B-GGUF'); + + expect(supportsConfigurableThinking(thinking)).toBe(true); + expect(thinking?.requestAdapter).toBe('lemonade'); + expect(getDefaultThinkingConfig(thinking)).toEqual({ + mode: 'disabled', + budgetTokens: undefined, + }); + expect(normalizeThinkingConfig(thinking, { mode: 'enabled', budgetTokens: 4096 })).toEqual({ + mode: 'enabled', + budgetTokens: 4096, + }); + }); + it('normalizes Doubao Seed 2.0 thinking as reasoning effort levels', () => { const thinking = getThinking('doubao', 'doubao-seed-2-0-pro-260215'); diff --git a/tests/audio/lemonade-asr.test.ts b/tests/audio/lemonade-asr.test.ts new file mode 100644 index 000000000..f53617272 --- /dev/null +++ b/tests/audio/lemonade-asr.test.ts @@ -0,0 +1,127 @@ +import { beforeEach, describe, expect, it, vi, type Mock } from 'vitest'; +import { transcribeAudio } from '@/lib/audio/asr-providers'; + +const mockFetch = vi.fn() as Mock; +vi.stubGlobal('fetch', mockFetch); + +function wavBuffer(): Buffer { + const buf = Buffer.alloc(16); + buf.write('RIFF', 0, 'ascii'); + buf.writeUInt32LE(8, 4); + buf.write('WAVE', 8, 'ascii'); + return buf; +} + +function wavArrayBuffer(): ArrayBuffer { + const buffer = wavBuffer(); + const arrayBuffer = new ArrayBuffer(buffer.byteLength); + new Uint8Array(arrayBuffer).set(buffer); + return arrayBuffer; +} + +describe('Lemonade ASR', () => { + beforeEach(() => { + mockFetch.mockReset(); + }); + + it('posts WAV audio to /audio/transcriptions with the configured model', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'hello' }), + }); + + const result = await transcribeAudio( + { + providerId: 'lemonade-asr', + baseUrl: 'http://localhost:13305/v1/', + modelId: 'Whisper-Base', + }, + wavBuffer(), + ); + + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:13305/v1/audio/transcriptions', + expect.objectContaining({ method: 'POST' }), + ); + const formData = mockFetch.mock.calls[0][1].body as FormData; + expect(formData.get('model')).toBe('Whisper-Base'); + expect(formData.get('response_format')).toBe('json'); + expect(formData.get('file')).toBeInstanceOf(Blob); + expect(result).toEqual({ text: 'hello' }); + }); + + it('forwards an explicit language but not when set to "auto"', async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ text: '' }), + }); + + await transcribeAudio({ providerId: 'lemonade-asr', language: 'en' }, wavBuffer()); + let formData = mockFetch.mock.calls[0][1].body as FormData; + expect(formData.get('language')).toBe('en'); + + mockFetch.mockClear(); + + await transcribeAudio({ providerId: 'lemonade-asr', language: 'auto' }, wavBuffer()); + formData = mockFetch.mock.calls[0][1].body as FormData; + expect(formData.get('language')).toBeNull(); + }); + + it('rejects non-WAV audio buffers', async () => { + const notWav = Buffer.from('IDXX' + '\0'.repeat(12)); + + await expect(transcribeAudio({ providerId: 'lemonade-asr' }, notWav)).rejects.toThrow( + /WAV input only/, + ); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + it('accepts WAV files even when the MIME type is missing', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'hello' }), + }); + + const audioFile = new File([wavArrayBuffer()], 'recording.wav'); + const result = await transcribeAudio({ providerId: 'lemonade-asr' }, audioFile); + + expect(result).toEqual({ text: 'hello' }); + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + + it('returns empty text gracefully when upstream reports empty audio', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 400, + text: async () => 'audio is empty', + statusText: 'Bad Request', + }); + + const result = await transcribeAudio({ providerId: 'lemonade-asr' }, wavBuffer()); + expect(result).toEqual({ text: '' }); + }); + + it('throws on unrecognized error payloads', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + text: async () => 'model crashed', + statusText: 'Internal Server Error', + }); + + await expect(transcribeAudio({ providerId: 'lemonade-asr' }, wavBuffer())).rejects.toThrow( + /Lemonade ASR API error.*model crashed/, + ); + }); + + it('falls back to default model id when not provided', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ text: 'ok' }), + }); + + await transcribeAudio({ providerId: 'lemonade-asr' }, wavBuffer()); + const formData = mockFetch.mock.calls[0][1].body as FormData; + expect(formData.get('model')).toBe('Whisper-Base'); + }); +}); diff --git a/tests/audio/lemonade-tts.test.ts b/tests/audio/lemonade-tts.test.ts new file mode 100644 index 000000000..bf9e033ef --- /dev/null +++ b/tests/audio/lemonade-tts.test.ts @@ -0,0 +1,121 @@ +import { beforeEach, describe, expect, it, vi, type Mock } from 'vitest'; +import { generateTTS } from '@/lib/audio/tts-providers'; + +const mockFetch = vi.fn() as Mock; +vi.stubGlobal('fetch', mockFetch); + +function wavBytes(): ArrayBuffer { + const data = new Uint8Array(16); + data[0] = 0x52; // 'R' + data[1] = 0x49; // 'I' + data[2] = 0x46; // 'F' + data[3] = 0x46; // 'F' + data[8] = 0x57; // 'W' + data[9] = 0x41; // 'A' + data[10] = 0x56; // 'V' + data[11] = 0x45; // 'E' + return data.buffer; +} + +describe('Lemonade TTS', () => { + beforeEach(() => { + mockFetch.mockReset(); + }); + + it('posts to /audio/speech with kokoro-v1 + wav and bubble-up audio bytes', async () => { + const buffer = wavBytes(); + mockFetch.mockResolvedValueOnce({ + ok: true, + arrayBuffer: async () => buffer, + headers: { get: () => 'audio/wav' }, + }); + + const result = await generateTTS( + { + providerId: 'lemonade-tts', + baseUrl: 'http://localhost:13305/v1/', + voice: 'af_heart', + }, + 'hello world', + ); + + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:13305/v1/audio/speech', + expect.objectContaining({ method: 'POST' }), + ); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body).toEqual({ + model: 'kokoro-v1', + input: 'hello world', + voice: 'af_heart', + speed: 1.0, + response_format: 'wav', + }); + expect(result.audio).toBeInstanceOf(Uint8Array); + expect(result.audio.byteLength).toBe(16); + expect(result.format).toBe('wav'); + }); + + it('falls back to af_heart when no voice is provided', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + arrayBuffer: async () => wavBytes(), + headers: { get: () => 'audio/wav' }, + }); + + await generateTTS({ providerId: 'lemonade-tts', voice: '' }, 'hi'); + + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.voice).toBe('af_heart'); + }); + + it('uses the selected voice consistently regardless of text language', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + arrayBuffer: async () => wavBytes(), + headers: { get: () => 'audio/wav' }, + }); + + await generateTTS({ providerId: 'lemonade-tts', voice: 'af_heart' }, '给我讲讲 Python'); + + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.voice).toBe('af_heart'); + }); + + it('does not require an API key (keyless provider)', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + arrayBuffer: async () => wavBytes(), + headers: { get: () => 'audio/wav' }, + }); + + await generateTTS({ providerId: 'lemonade-tts', voice: 'af_heart' }, 'hi'); + + expect(mockFetch.mock.calls[0][1].headers.Authorization).toBeUndefined(); + }); + + it('attaches Bearer auth when apiKey is provided', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + arrayBuffer: async () => wavBytes(), + headers: { get: () => 'audio/wav' }, + }); + + await generateTTS({ providerId: 'lemonade-tts', apiKey: 'sk-lm', voice: 'af_heart' }, 'hi'); + + expect(mockFetch.mock.calls[0][1].headers.Authorization).toBe('Bearer sk-lm'); + }); + + it('throws on non-OK responses', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 400, + text: async () => 'bad voice', + statusText: 'Bad Request', + }); + + await expect(generateTTS({ providerId: 'lemonade-tts', voice: 'foo' }, 'hi')).rejects.toThrow( + /Lemonade TTS API error/, + ); + }); +}); diff --git a/tests/audio/wav-utils.test.ts b/tests/audio/wav-utils.test.ts new file mode 100644 index 000000000..cbddc46c9 --- /dev/null +++ b/tests/audio/wav-utils.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { isWavBlob, normalizeASRUploadAudio } from '@/lib/audio/wav-utils'; + +describe('isWavBlob', () => { + it('detects audio/wav MIME type', () => { + const blob = new Blob([new Uint8Array(4)], { type: 'audio/wav' }); + expect(isWavBlob(blob)).toBe(true); + }); + + it('detects audio/x-wav MIME type', () => { + const blob = new Blob([new Uint8Array(4)], { type: 'audio/x-wav' }); + expect(isWavBlob(blob)).toBe(true); + }); + + it('detects .wav file extension when MIME is missing', () => { + const blob = new Blob([new Uint8Array(4)]); + expect(isWavBlob(blob, 'recording.wav')).toBe(true); + expect(isWavBlob(blob, 'recording.WAV')).toBe(true); + }); + + it('returns false for non-WAV blobs without a wav filename', () => { + const blob = new Blob([new Uint8Array(4)], { type: 'audio/webm' }); + expect(isWavBlob(blob)).toBe(false); + expect(isWavBlob(blob, 'recording.webm')).toBe(false); + }); +}); + +describe('normalizeASRUploadAudio', () => { + it('passes through non-lemonade providers unchanged', async () => { + const input = new Blob([new Uint8Array([1, 2, 3])], { type: 'audio/webm' }); + const result = await normalizeASRUploadAudio('openai-whisper', input); + expect(result.blob).toBe(input); + expect(result.fileName).toBe('recording.webm'); + }); + + it('keeps WAV blobs unchanged for lemonade-asr', async () => { + const input = new Blob([new Uint8Array([1, 2, 3])], { type: 'audio/wav' }); + const result = await normalizeASRUploadAudio('lemonade-asr', input); + expect(result.blob).toBe(input); + expect(result.fileName).toBe('recording.wav'); + }); +}); diff --git a/tests/generation/json-repair.test.ts b/tests/generation/json-repair.test.ts new file mode 100644 index 000000000..a401982ab --- /dev/null +++ b/tests/generation/json-repair.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from 'vitest'; + +import { parseJsonResponse } from '@/lib/generation/json-repair'; + +describe('json-repair targeted fixes', () => { + it('repairs quoted key-value fragments such as "height: 76"', () => { + const raw = `{ + "background": { + "type": "solid", + "color": "#ffffff" + }, + "elements": [ + { + "id": "code_text", + "type": "text", + "left": 80, + "top": 420, + "width": 840, + "height: 76", + "content": "

age = 25

", + "defaultFontName": "", + "defaultColor": "#333333" + } + ] +}`; + + const parsed = parseJsonResponse<{ + elements: Array<{ height: number; content: string }>; + }>(raw); + + expect(parsed).not.toBeNull(); + expect(parsed?.elements[0]?.height).toBe(76); + expect(parsed?.elements[0]?.content).toContain('age = 25'); + }); + + it('repairs boolean property fragments without touching valid string values', () => { + const raw = `{ + "elements": [ + { + "id": "shape_1", + "fixedRatio: false", + "height: 58", + "content": "

literal text: height: 58

" + } + ] +}`; + + const parsed = parseJsonResponse<{ + elements: Array<{ fixedRatio: boolean; height: number; content: string }>; + }>(raw); + + expect(parsed).not.toBeNull(); + expect(parsed?.elements[0]?.fixedRatio).toBe(false); + expect(parsed?.elements[0]?.height).toBe(58); + expect(parsed?.elements[0]?.content).toBe('

literal text: height: 58

'); + }); +}); diff --git a/tests/media/lemonade-image-adapter.test.ts b/tests/media/lemonade-image-adapter.test.ts new file mode 100644 index 000000000..43d291c14 --- /dev/null +++ b/tests/media/lemonade-image-adapter.test.ts @@ -0,0 +1,153 @@ +import { beforeEach, describe, expect, it, vi, type Mock } from 'vitest'; +import { + generateWithLemonadeImage, + testLemonadeImageConnectivity, +} from '@/lib/media/adapters/lemonade-image-adapter'; + +const mockFetch = vi.fn() as Mock; +vi.stubGlobal('fetch', mockFetch); + +describe('lemonade-image-adapter', () => { + beforeEach(() => { + mockFetch.mockReset(); + }); + + it('posts generation requests to /images/generations with b64_json response_format', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ data: [{ b64_json: 'aW1n' }] }), + }); + + const result = await generateWithLemonadeImage( + { providerId: 'lemonade', apiKey: '', baseUrl: 'http://localhost:13305/v1/' }, + { prompt: 'a fox', width: 768, height: 768 }, + ); + + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:13305/v1/images/generations', + expect.objectContaining({ + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }), + ); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body).toEqual({ + model: 'Qwen-Image-GGUF', + prompt: 'a fox', + n: 1, + size: '768x768', + response_format: 'b64_json', + }); + expect(result).toEqual({ + url: undefined, + base64: 'aW1n', + width: 768, + height: 768, + }); + }); + + it('falls back to default base URL and 1024x1024 when not provided', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ data: [{ b64_json: 'eA==' }] }), + }); + + await generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'tile' }); + + expect(mockFetch.mock.calls[0][0]).toBe('http://localhost:13305/v1/images/generations'); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.size).toBe('1024x1024'); + }); + + it('forwards custom model id when provided', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ data: [{ b64_json: 'eA==' }] }), + }); + + await generateWithLemonadeImage( + { providerId: 'lemonade', apiKey: '', model: 'flux-schnell' }, + { prompt: 'p' }, + ); + + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.model).toBe('flux-schnell'); + }); + + it('attaches Bearer auth header when apiKey is provided', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ data: [{ b64_json: 'eA==' }] }), + }); + + await generateWithLemonadeImage({ providerId: 'lemonade', apiKey: 'sk-lm' }, { prompt: 'p' }); + + expect(mockFetch.mock.calls[0][1].headers).toEqual({ + 'Content-Type': 'application/json', + Authorization: 'Bearer sk-lm', + }); + }); + + it('omits auth header when apiKey is empty (keyless)', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ data: [{ b64_json: 'eA==' }] }), + }); + + await generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'p' }); + + expect(mockFetch.mock.calls[0][1].headers).toEqual({ + 'Content-Type': 'application/json', + }); + }); + + it('throws a useful error on failed generation responses', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + text: async () => 'model unavailable', + statusText: 'Internal Server Error', + }); + + await expect( + generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'p' }), + ).rejects.toThrow('Lemonade image generation failed (500): model unavailable'); + }); + + it('throws when response payload contains no image data', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ data: [{}] }), + }); + + await expect( + generateWithLemonadeImage({ providerId: 'lemonade', apiKey: '' }, { prompt: 'p' }), + ).rejects.toThrow('Lemonade returned empty image response'); + }); + + it('reports connectivity success against /models endpoint', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const result = await testLemonadeImageConnectivity({ providerId: 'lemonade', apiKey: '' }); + + expect(mockFetch).toHaveBeenCalledWith( + 'http://localhost:13305/v1/models', + expect.objectContaining({ headers: {} }), + ); + expect(result.success).toBe(true); + }); + + it('reports connectivity failure with response text', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 503, + text: async () => 'unavailable', + statusText: 'Service Unavailable', + }); + + const result = await testLemonadeImageConnectivity({ providerId: 'lemonade', apiKey: '' }); + + expect(result.success).toBe(false); + expect(result.message).toBe('Lemonade API error (503): unavailable'); + }); +});