diff --git a/libs/langchain-ollama/src/chat_models.ts b/libs/langchain-ollama/src/chat_models.ts index 5c0b361aeed2..c6ada11271cf 100644 --- a/libs/langchain-ollama/src/chat_models.ts +++ b/libs/langchain-ollama/src/chat_models.ts @@ -56,6 +56,11 @@ export interface ChatOllamaCallOptions extends BaseChatModelCallOptions { tools?: BindToolsInput[]; // eslint-disable-next-line @typescript-eslint/no-explicit-any format?: string | Record; + /** + * Whether to enable thinking mode for this specific invocation. + * Can be a boolean (true/false) or a string intensity level ("high", "medium", "low"). + */ + think?: boolean | 'high' | 'medium' | 'low'; } export interface PullModelOptions { @@ -109,7 +114,13 @@ export interface ChatOllamaInput * @default fetch */ fetch?: typeof fetch; - think?: boolean; + /** + * Whether to enable thinking mode for supported models. + * Can be a boolean (true/false) or a string intensity level ("high", "medium", "low"). + * When enabled, the model's reasoning process is captured separately. + * @default false + */ + think?: boolean | 'high' | 'medium' | 'low'; } /** @@ -490,7 +501,7 @@ export class ChatOllama baseUrl = "http://127.0.0.1:11434"; - think?: boolean; + think?: boolean | 'high' | 'medium' | 'low'; constructor(fields?: ChatOllamaInput) { super(fields ?? {}); @@ -692,6 +703,7 @@ export class ChatOllama const nonChunkMessage = new AIMessage({ id: finalChunk?.id, content: finalChunk?.content ?? "", + additional_kwargs: finalChunk?.additional_kwargs, tool_calls: finalChunk?.tool_calls, response_metadata: finalChunk?.response_metadata, usage_metadata: finalChunk?.usage_metadata, @@ -751,10 +763,8 @@ export class ChatOllama usageMetadata.input_tokens + usageMetadata.output_tokens; lastMetadata = rest; - // when think is enabled, try thinking first - const token = this.think - ? responseMessage.thinking ?? responseMessage.content ?? "" - : responseMessage.content ?? ""; + // Use actual content as token, thinking content is handled in additional_kwargs + const token = responseMessage.content ?? ""; yield new ChatGenerationChunk({ text: token, diff --git a/libs/langchain-ollama/src/tests/chat_models_think.int.test.ts b/libs/langchain-ollama/src/tests/chat_models_think.int.test.ts index 2b11a47bf5e6..37df5bd0af0b 100644 --- a/libs/langchain-ollama/src/tests/chat_models_think.int.test.ts +++ b/libs/langchain-ollama/src/tests/chat_models_think.int.test.ts @@ -1,5 +1,5 @@ import { test, expect } from "@jest/globals"; -import { HumanMessage } from "@langchain/core/messages"; +import { HumanMessage, AIMessageChunk } from "@langchain/core/messages"; import { ChatOllama } from "../chat_models.js"; test("test deep seek model with think=false", async () => { @@ -52,3 +52,175 @@ test("test deep seek model with think=true (default)", async () => { expect(responseContent).toMatch(/photosynthesis/i); // Check it includes the topic expect(responseContent.length).toBeGreaterThan(1); }); + +test("test type safety for thinking parameter values", async () => { + // Test that TypeScript accepts all valid string and boolean values + const ollamaHigh = new ChatOllama({ + model: "deepseek-r1:32b", + think: "high" as const, + maxRetries: 1, + }); + + const ollamaMedium = new ChatOllama({ + model: "deepseek-r1:32b", + think: "medium" as const, + maxRetries: 1, + }); + + const ollamaLow = new ChatOllama({ + model: "deepseek-r1:32b", + think: "low" as const, + maxRetries: 1, + }); + + const ollamaTrue = new ChatOllama({ + model: "deepseek-r1:32b", + think: true, + maxRetries: 1, + }); + + const ollamaFalse = new ChatOllama({ + model: "deepseek-r1:32b", + think: false, + maxRetries: 1, + }); + + // All should be properly instantiated + expect(ollamaHigh).toBeDefined(); + expect(ollamaMedium).toBeDefined(); + expect(ollamaLow).toBeDefined(); + expect(ollamaTrue).toBeDefined(); + expect(ollamaFalse).toBeDefined(); + + // Quick test that string values work in practice + const res = await ollamaHigh.invoke([ + new HumanMessage({ content: "How many r in the word strawberry?" }) + ]); + + expect(res).toBeDefined(); + expect(typeof res.content).toBe("string"); + expect(res.content.length).toBeGreaterThan(0); +}); + +test.each([ + { thinkLevel: "high" as const }, + { thinkLevel: "medium" as const }, + { thinkLevel: "low" as const }, +])("test string thinking parameter '$thinkLevel'", async ({ thinkLevel }) => { + const ollama = new ChatOllama({ + model: "deepseek-r1:32b", + think: thinkLevel, + maxRetries: 1, + }); + + const res = await ollama.invoke([ + new HumanMessage({ + content: "How many r in the word strawberry?", + }), + ]); + + // Ensure the response is defined + expect(res).toBeDefined(); + expect(res.content).toBeDefined(); + expect(res.additional_kwargs).toBeDefined(); + + // Validate basic functionality - response should exist and not contain thinking tags + expect(typeof res.content).toBe("string"); + expect(res.content.length).toBeGreaterThan(0); + expect(res.content).not.toMatch(/.*?<\/think>/is); // No thinking tags in content + + // For string thinking levels, validate thinking content if present + if (res.additional_kwargs?.thinking_content) { + const thinkingContent = res.additional_kwargs.thinking_content as string; + expect(typeof thinkingContent).toBe("string"); + expect(thinkingContent.length).toBeGreaterThan(0); + // Thinking should not be duplicated/corrupted + expect(thinkingContent).not.toMatch(/(.+)\1{3,}/); // No excessive repetition + } +}); + +test("test content separation and deduplication", async () => { + const ollama = new ChatOllama({ + model: "deepseek-r1:32b", + think: "high", + maxRetries: 1, + }); + + const res = await ollama.invoke([ + new HumanMessage({ + content: "How many r in the word strawberry?", + }), + ]); + + // Ensure proper content separation + expect(res).toBeDefined(); + expect(res.content).toBeDefined(); + expect(res.additional_kwargs).toBeDefined(); + + // Main content should exist + expect(typeof res.content).toBe("string"); + expect(res.content.length).toBeGreaterThan(0); + + // Thinking content should be properly separated and clean if present + if (res.additional_kwargs?.thinking_content) { + const thinkingContent = res.additional_kwargs.thinking_content as string; + expect(typeof thinkingContent).toBe("string"); + expect(thinkingContent.length).toBeGreaterThan(10); // Should have substantial thinking + + // Validate comprehensive deduplication - no repetitive patterns + expect(thinkingContent).not.toMatch(/^(.+?)\1+$/); // Not entirely repeated content + + // Should be reasonable length + expect(thinkingContent.length).toBeLessThan(5000); + + // Advanced duplication detection + const words = thinkingContent.split(' '); + const uniqueWords = new Set(words); + const repetitionRatio = words.length / uniqueWords.size; + expect(repetitionRatio).toBeLessThan(3); // Reasonable repetition threshold + } +}); + +test("test streaming with thinking content separation", async () => { + const ollama = new ChatOllama({ + model: "deepseek-r1:32b", + think: "high", + maxRetries: 1, + }); + + const chunks: AIMessageChunk[] = []; + const stream = await ollama.stream([ + new HumanMessage({ + content: "How many r in the word strawberry?", + }), + ]); + + for await (const chunk of stream) { + chunks.push(chunk); + } + + expect(chunks.length).toBeGreaterThan(0); + + // Validate that streaming chunks maintain proper content separation + let hasThinkingContent = false; + let finalContent = ""; + + for (const chunk of chunks) { + if (chunk.content) { + finalContent += chunk.content; + } + + // Check if any chunk has thinking content in additional_kwargs + if (chunk.additional_kwargs?.thinking_content) { + hasThinkingContent = true; + expect(typeof chunk.additional_kwargs.thinking_content).toBe("string"); + } + } + + // Final content should exist and be non-empty + expect(typeof finalContent).toBe("string"); + expect(finalContent.length).toBeGreaterThan(0); + + // At least one chunk should have had thinking content + expect(hasThinkingContent).toBe(true); +}); diff --git a/libs/langchain-ollama/src/utils.ts b/libs/langchain-ollama/src/utils.ts index 231d5dc3d217..c686e5e4008e 100644 --- a/libs/langchain-ollama/src/utils.ts +++ b/libs/langchain-ollama/src/utils.ts @@ -14,6 +14,9 @@ import type { } from "ollama"; import { v4 as uuidv4 } from "uuid"; +// Track previous thinking content to calculate incremental changes +let previousThinkingContent = ""; + export function convertOllamaMessagesToLangChain( messages: OllamaMessage, extra?: { @@ -22,8 +25,24 @@ export function convertOllamaMessagesToLangChain( usageMetadata?: UsageMetadata; } ): AIMessageChunk { + // Prepare additional_kwargs to include thinking content if it exists + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const additionalKwargs: Record = {}; + if (messages.thinking) { + // Calculate incremental thinking content + let incrementalThinking = messages.thinking; + if (messages.thinking.startsWith(previousThinkingContent)) { + incrementalThinking = messages.thinking.slice(previousThinkingContent.length); + } + previousThinkingContent = messages.thinking; + + if (incrementalThinking) { + additionalKwargs.thinking_content = incrementalThinking; + } + } + return new AIMessageChunk({ - content: messages.thinking ?? messages.content ?? "", + content: messages.content ?? "", tool_call_chunks: messages.tool_calls?.map((tc) => ({ name: tc.function.name, args: JSON.stringify(tc.function.arguments), @@ -31,6 +50,7 @@ export function convertOllamaMessagesToLangChain( index: 0, id: uuidv4(), })), + additional_kwargs: additionalKwargs, response_metadata: extra?.responseMetadata, usage_metadata: extra?.usageMetadata, });