diff --git a/src/lib/models/providers/ollama/ollamaLLM.ts b/src/lib/models/providers/ollama/ollamaLLM.ts index 3bcd3ccf1..12e7120c3 100644 --- a/src/lib/models/providers/ollama/ollamaLLM.ts +++ b/src/lib/models/providers/ollama/ollamaLLM.ts @@ -13,6 +13,23 @@ import crypto from 'crypto'; import { Message } from '@/lib/types'; import { repairJson } from '@toolsycc/json-repair'; +/** + * Some models wrap their JSON output in markdown code fences like + * ```json\n{...}\n```. This strips those fences so we get raw JSON. + * Also handles the streaming case where only the opening fence is + * present (the closing fence hasn't arrived yet). + */ +function stripMarkdownFences(text: string): string { + const trimmed = text.trim(); + // Full fence pair: ```json\n...\n``` (or same-line ```json{...}```) + const full = trimmed.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```\s*$/); + if (full) return full[1].trim(); + // Opening fence only (streaming partial): ```json\n{... + const leading = trimmed.match(/^```(?:json)?\s*\n?([\s\S]*)$/); + if (leading) return leading[1]; + return trimmed; +} + type OllamaConfig = { baseURL: string; model: string; @@ -206,9 +223,10 @@ class OllamaLLM extends BaseLLM { }); try { + const cleaned = stripMarkdownFences(response.message.content); return input.schema.parse( JSON.parse( - repairJson(response.message.content, { + repairJson(cleaned, { extractJson: true, }) as string, ), @@ -249,7 +267,7 @@ class OllamaLLM extends BaseLLM { recievedObj += chunk.message.content; try { - yield parse(recievedObj) as T; + yield parse(stripMarkdownFences(recievedObj)) as T; } catch (err) { console.log('Error parsing partial object from Ollama:', err); yield {} as T; diff --git a/src/lib/models/providers/openai/openaiLLM.ts b/src/lib/models/providers/openai/openaiLLM.ts index 5ae1538a0..b3384e905 100644 --- a/src/lib/models/providers/openai/openaiLLM.ts +++ b/src/lib/models/providers/openai/openaiLLM.ts @@ -20,6 +20,23 @@ import { import { Message } from '@/lib/types'; import { repairJson } from '@toolsycc/json-repair'; +/** + * Some models wrap their JSON output in markdown code fences like + * ```json\n{...}\n```. This strips those fences so we get raw JSON. + * Also handles the streaming case where only the opening fence is + * present (the closing fence hasn't arrived yet). + */ +function stripMarkdownFences(text: string): string { + const trimmed = text.trim(); + // Full fence pair: ```json\n...\n``` (or same-line ```json{...}```) + const full = trimmed.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```\s*$/); + if (full) return full[1].trim(); + // Opening fence only (streaming partial): ```json\n{... + const leading = trimmed.match(/^```(?:json)?\s*\n?([\s\S]*)$/); + if (leading) return leading[1]; + return trimmed; +} + type OpenAIConfig = { apiKey: string; model: string; @@ -214,9 +231,11 @@ class OpenAILLM extends BaseLLM { if (response.choices && response.choices.length > 0) { try { + const raw = response.choices[0].message.content || ''; + const cleaned = stripMarkdownFences(raw); return input.schema.parse( JSON.parse( - repairJson(response.choices[0].message.content!, { + repairJson(cleaned, { extractJson: true, }) as string, ), @@ -256,14 +275,14 @@ class OpenAILLM extends BaseLLM { recievedObj += chunk.delta; try { - yield parse(recievedObj) as T; + yield parse(stripMarkdownFences(recievedObj)) as T; } catch (err) { console.log('Error parsing partial object from OpenAI:', err); yield {} as T; } } else if (chunk.type === 'response.output_text.done' && chunk.text) { try { - yield parse(chunk.text) as T; + yield parse(stripMarkdownFences(chunk.text)) as T; } catch (err) { throw new Error(`Error parsing response from OpenAI: ${err}`); }