langchain-ai · JamesMustafa · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025
diff --git a/libs/langchain-ollama/src/chat_models.ts b/libs/langchain-ollama/src/chat_models.ts
@@ -56,6 +56,11 @@ export interface ChatOllamaCallOptions extends BaseChatModelCallOptions {
   tools?: BindToolsInput[];
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   format?: string | Record<string, any>;
+  /**
+   * Whether to enable thinking mode for this specific invocation.
+   * Can be a boolean (true/false) or a string intensity level ("high", "medium", "low").
+   */
+  think?: boolean | 'high' | 'medium' | 'low';
 }
 
 export interface PullModelOptions {
@@ -109,7 +114,13 @@ export interface ChatOllamaInput
    * @default fetch
    */
   fetch?: typeof fetch;
-  think?: boolean;
+  /**
+   * Whether to enable thinking mode for supported models.
+   * Can be a boolean (true/false) or a string intensity level ("high", "medium", "low").
+   * When enabled, the model's reasoning process is captured separately.
+   * @default false
+   */
+  think?: boolean | 'high' | 'medium' | 'low';
 }
 
 /**
@@ -490,7 +501,7 @@ export class ChatOllama
 
   baseUrl = "http://127.0.0.1:11434";
 
-  think?: boolean;
+  think?: boolean | 'high' | 'medium' | 'low';
 
   constructor(fields?: ChatOllamaInput) {
     super(fields ?? {});
@@ -692,6 +703,7 @@ export class ChatOllama
     const nonChunkMessage = new AIMessage({
       id: finalChunk?.id,
       content: finalChunk?.content ?? "",
+      additional_kwargs: finalChunk?.additional_kwargs,
       tool_calls: finalChunk?.tool_calls,
       response_metadata: finalChunk?.response_metadata,
       usage_metadata: finalChunk?.usage_metadata,
@@ -751,10 +763,8 @@ export class ChatOllama
         usageMetadata.input_tokens + usageMetadata.output_tokens;
       lastMetadata = rest;
 
-      // when think is enabled, try thinking first
-      const token = this.think
-        ? responseMessage.thinking ?? responseMessage.content ?? ""
-        : responseMessage.content ?? "";
+      // Use actual content as token, thinking content is handled in additional_kwargs
+      const token = responseMessage.content ?? "";
 
       yield new ChatGenerationChunk({
         text: token,

diff --git a/libs/langchain-ollama/src/tests/chat_models_think.int.test.ts b/libs/langchain-ollama/src/tests/chat_models_think.int.test.ts
@@ -1,5 +1,5 @@
 import { test, expect } from "@jest/globals";
-import { HumanMessage } from "@langchain/core/messages";
+import { HumanMessage, AIMessageChunk } from "@langchain/core/messages";
 import { ChatOllama } from "../chat_models.js";
 
 test("test deep seek model with think=false", async () => {
@@ -52,3 +52,175 @@ test("test deep seek model with think=true (default)", async () => {
   expect(responseContent).toMatch(/photosynthesis/i); // Check it includes the topic
   expect(responseContent.length).toBeGreaterThan(1);
 });
+
+test("test type safety for thinking parameter values", async () => {
+  // Test that TypeScript accepts all valid string and boolean values
+  const ollamaHigh = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: "high" as const,
+    maxRetries: 1,
+  });
+
+  const ollamaMedium = new ChatOllama({
+    model: "deepseek-r1:32b", 
+    think: "medium" as const,
+    maxRetries: 1,
+  });
+
+  const ollamaLow = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: "low" as const,
+    maxRetries: 1,
+  });
+
+  const ollamaTrue = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: true,
+    maxRetries: 1,
+  });
+
+  const ollamaFalse = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: false,
+    maxRetries: 1,
+  });
+
+  // All should be properly instantiated
+  expect(ollamaHigh).toBeDefined();
+  expect(ollamaMedium).toBeDefined();
+  expect(ollamaLow).toBeDefined();
+  expect(ollamaTrue).toBeDefined();
+  expect(ollamaFalse).toBeDefined();
+
+  // Quick test that string values work in practice
+  const res = await ollamaHigh.invoke([
+    new HumanMessage({ content: "How many r in the word strawberry?" })
+  ]);
+
+  expect(res).toBeDefined();
+  expect(typeof res.content).toBe("string");
+  expect(res.content.length).toBeGreaterThan(0);
+});
+
+test.each([
+  { thinkLevel: "high" as const },
+  { thinkLevel: "medium" as const },
+  { thinkLevel: "low" as const },
+])("test string thinking parameter '$thinkLevel'", async ({ thinkLevel }) => {
+  const ollama = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: thinkLevel,
+    maxRetries: 1,
+  });
+
+  const res = await ollama.invoke([
+    new HumanMessage({
+      content: "How many r in the word strawberry?",
+    }),
+  ]);
+
+  // Ensure the response is defined
+  expect(res).toBeDefined();
+  expect(res.content).toBeDefined();
+  expect(res.additional_kwargs).toBeDefined();
+
+  // Validate basic functionality - response should exist and not contain thinking tags
+  expect(typeof res.content).toBe("string");
+  expect(res.content.length).toBeGreaterThan(0);
+  expect(res.content).not.toMatch(/<think>.*?<\/think>/is); // No thinking tags in content
+
+  // For string thinking levels, validate thinking content if present
+  if (res.additional_kwargs?.thinking_content) {
+    const thinkingContent = res.additional_kwargs.thinking_content as string;
+    expect(typeof thinkingContent).toBe("string");
+    expect(thinkingContent.length).toBeGreaterThan(0);
+    // Thinking should not be duplicated/corrupted
+    expect(thinkingContent).not.toMatch(/(.+)\1{3,}/); // No excessive repetition
+  }
+});
+
+test("test content separation and deduplication", async () => {
+  const ollama = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: "high",
+    maxRetries: 1,
+  });
+
+  const res = await ollama.invoke([
+    new HumanMessage({
+      content: "How many r in the word strawberry?",
+    }),
+  ]);
+
+  // Ensure proper content separation
+  expect(res).toBeDefined();
+  expect(res.content).toBeDefined();
+  expect(res.additional_kwargs).toBeDefined();
+
+  // Main content should exist
+  expect(typeof res.content).toBe("string");
+  expect(res.content.length).toBeGreaterThan(0);
+
+  // Thinking content should be properly separated and clean if present
+  if (res.additional_kwargs?.thinking_content) {
+    const thinkingContent = res.additional_kwargs.thinking_content as string;
+    expect(typeof thinkingContent).toBe("string");
+    expect(thinkingContent.length).toBeGreaterThan(10); // Should have substantial thinking
+
+    // Validate comprehensive deduplication - no repetitive patterns
+    expect(thinkingContent).not.toMatch(/^(.+?)\1+$/); // Not entirely repeated content
+
+    // Should be reasonable length
+    expect(thinkingContent.length).toBeLessThan(5000);
+
+    // Advanced duplication detection
+    const words = thinkingContent.split(' ');
+    const uniqueWords = new Set(words);
+    const repetitionRatio = words.length / uniqueWords.size;
+    expect(repetitionRatio).toBeLessThan(3); // Reasonable repetition threshold
+  }
+});
+
+test("test streaming with thinking content separation", async () => {
+  const ollama = new ChatOllama({
+    model: "deepseek-r1:32b",
+    think: "high",
+    maxRetries: 1,
+  });
+
+  const chunks: AIMessageChunk[] = [];
+  const stream = await ollama.stream([
+    new HumanMessage({
+      content: "How many r in the word strawberry?",
+    }),
+  ]);
+
+  for await (const chunk of stream) {
+    chunks.push(chunk);
+  }
+
+  expect(chunks.length).toBeGreaterThan(0);
+
+  // Validate that streaming chunks maintain proper content separation
+  let hasThinkingContent = false;
+  let finalContent = "";
+
+  for (const chunk of chunks) {
+    if (chunk.content) {
+      finalContent += chunk.content;
+    }
+
+    // Check if any chunk has thinking content in additional_kwargs
+    if (chunk.additional_kwargs?.thinking_content) {
+      hasThinkingContent = true;
+      expect(typeof chunk.additional_kwargs.thinking_content).toBe("string");
+    }
+  }
+
+  // Final content should exist and be non-empty
+  expect(typeof finalContent).toBe("string");
+  expect(finalContent.length).toBeGreaterThan(0);
+
+  // At least one chunk should have had thinking content
+  expect(hasThinkingContent).toBe(true);
+});
diff --git a/libs/langchain-ollama/src/utils.ts b/libs/langchain-ollama/src/utils.ts
@@ -14,6 +14,9 @@ import type {
 } from "ollama";
 import { v4 as uuidv4 } from "uuid";
 
+// Track previous thinking content to calculate incremental changes
+let previousThinkingContent = "";
+
 export function convertOllamaMessagesToLangChain(
   messages: OllamaMessage,
   extra?: {
@@ -22,15 +25,32 @@ export function convertOllamaMessagesToLangChain(
     usageMetadata?: UsageMetadata;
   }
 ): AIMessageChunk {
+  // Prepare additional_kwargs to include thinking content if it exists
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const additionalKwargs: Record<string, any> = {};
+  if (messages.thinking) {
+    // Calculate incremental thinking content
+    let incrementalThinking = messages.thinking;
+    if (messages.thinking.startsWith(previousThinkingContent)) {
+      incrementalThinking = messages.thinking.slice(previousThinkingContent.length);
+    }
+    previousThinkingContent = messages.thinking;
+
+    if (incrementalThinking) {
+      additionalKwargs.thinking_content = incrementalThinking;
+    }
+  }
+
   return new AIMessageChunk({
-    content: messages.thinking ?? messages.content ?? "",
+    content: messages.content ?? "",
     tool_call_chunks: messages.tool_calls?.map((tc) => ({
       name: tc.function.name,
       args: JSON.stringify(tc.function.arguments),
       type: "tool_call_chunk",
       index: 0,
       id: uuidv4(),
     })),
+    additional_kwargs: additionalKwargs,
     response_metadata: extra?.responseMetadata,
     usage_metadata: extra?.usageMetadata,
   });