fix(google-common): set function calling mode to auto when responseMimeType is application/json

athan37 · athan37 · commit b88f8bcddd1c · 2025-10-18T10:25:32.000-05:00
diff --git a/libs/providers/langchain-google-common/src/chat_models.ts b/libs/providers/langchain-google-common/src/chat_models.ts
@@ -39,6 +39,7 @@ import {
   GoogleSearchToolSetting,
   GoogleSpeechConfig,
   GeminiJsonSchema,
+  GoogleAIResponseMimeType,
 } from "./types.js";
 import {
   convertToGeminiTools,
@@ -220,6 +221,8 @@ export abstract class ChatGoogleBase<AuthOptions>
 
   responseModalities?: GoogleAIModelModality[];
 
+  responseMimeType?: GoogleAIResponseMimeType;
+
   // May intentionally be undefined, meaning to compute this.
   convertSystemMessageToHumanContent: boolean | undefined;
 
@@ -455,6 +458,10 @@ export abstract class ChatGoogleBase<AuthOptions>
     config?: StructuredOutputMethodOptions<true>
   ): Runnable<BaseLanguageModelInput, { raw: BaseMessage; parsed: RunOutput }>;
 
+  /**
+   * Creates a structured output version that enforces a specific schema.
+   * Automatically sets responseMimeType to "application/json" for optimal function calling.
+   */
   withStructuredOutput<
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     RunOutput extends Record<string, any> = Record<string, any>
@@ -530,7 +537,13 @@ export abstract class ChatGoogleBase<AuthOptions>
         keyName: functionName,
       });
     }
-    const llm = this.bindTools(tools).withConfig({ tool_choice: functionName });
+    // Configure for structured output: set responseMimeType to "application/json"
+    // This ensures the function calling mode is set to "auto" instead of "any"
+    // for optimal performance when extracting structured data
+    const llm = this.bindTools(tools).withConfig({ 
+      tool_choice: functionName,
+      responseMimeType: "application/json"
+    });
 
     if (!includeRaw) {
       return llm.pipe(outputParser).withConfig({
diff --git a/libs/providers/langchain-google-common/src/tests/chat_models.test.ts b/libs/providers/langchain-google-common/src/tests/chat_models.test.ts
@@ -1721,6 +1721,69 @@ describe("Mock ChatGoogle - Gemini", () => {
     expect(func.parameters?.properties?.greeterName?.nullable).toEqual(true);
   });
 
+  test("4. Functions withStructuredOutput - default behavior", async () => {
+    const record: Record<string, any> = {};
+    const projectId = mockId();
+    const authOptions: MockClientAuthInfo = {
+      record,
+      projectId,
+      resultFile: "chat-4-mock.json",
+    };
+
+    const schema = {
+      type: "object",
+      properties: {
+        name: { type: "string", description: "The person's name" },
+        age: { type: "number", description: "The person's age" },
+      },
+      required: ["name", "age"],
+    };
+
+    // No responseMimeType set - should default to undefined
+    const model = new ChatGoogle({ authOptions }).withStructuredOutput(schema);
+    await model.invoke("Extract info about John Doe, software engineer");
+
+    const toolConfig = record?.opts?.data?.toolConfig;
+    expect(toolConfig.functionCallingConfig.mode).toEqual("auto");
+    expect(toolConfig.functionCallingConfig.allowedFunctionNames).toEqual(["extract"]);
+
+    const generationConfig = record?.opts?.data?.generationConfig;
+    expect(generationConfig.responseMimeType).toEqual("application/json");
+  });
+
+  test("4. Functions withStructuredOutput - overrides user setting", async () => {
+    const record: Record<string, any> = {};
+    const projectId = mockId();
+    const authOptions: MockClientAuthInfo = {
+      record,
+      projectId,
+      resultFile: "chat-4-mock.json",
+    };
+
+    const schema = {
+      type: "object",
+      properties: {
+        sender: { type: "string", description: "Email sender" },
+        subject: { type: "string", description: "Email subject" },
+      },
+      required: ["sender", "subject"],
+    };
+
+    // User explicitly sets text/plain - withStructuredOutput should override it
+    const model = new ChatGoogle({ 
+      authOptions,
+      responseMimeType: "text/plain"
+    }).withStructuredOutput(schema);
+    await model.invoke("Analyze email from sarah@company.com, subject: Project Update");
+
+    const toolConfig = record?.opts?.data?.toolConfig;
+    expect(toolConfig.functionCallingConfig.mode).toEqual("auto");
+    expect(toolConfig.functionCallingConfig.allowedFunctionNames).toEqual(["extract"]);
+
+    const generationConfig = record?.opts?.data?.generationConfig;
+    expect(generationConfig.responseMimeType).toEqual("application/json");
+  });
+
   test("4. Functions - results", async () => {
     const record: Record<string, any> = {};
     const projectId = mockId();
diff --git a/libs/providers/langchain-google-common/src/utils/common.ts b/libs/providers/langchain-google-common/src/utils/common.ts
@@ -31,14 +31,17 @@ export function copyAIModelParams(
 }
 
 function processToolChoice(
-  toolChoice: GoogleAIBaseLanguageModelCallOptions["tool_choice"],
-  allowedFunctionNames: GoogleAIBaseLanguageModelCallOptions["allowed_function_names"]
+  options: GoogleAIBaseLanguageModelCallOptions | undefined
 ):
   | {
       tool_choice: "any" | "auto" | "none";
       allowed_function_names?: string[];
     }
   | undefined {
+  const toolChoice = options?.tool_choice;
+  const allowedFunctionNames = options?.allowed_function_names;
+  const responseMimeType = options?.responseMimeType;
+  
   if (!toolChoice) {
     if (allowedFunctionNames) {
       // Allowed func names is passed, return 'any' so it forces the model to use a tool.
@@ -58,9 +61,11 @@ function processToolChoice(
   }
   if (typeof toolChoice === "string") {
     // String representing the function name.
-    // Return any to force the model to predict the specified function call.
+    // Use "auto" mode when responseMimeType is "application/json" for optimal performance
+    const isJsonMode = responseMimeType === "application/json";
+    const mode = isJsonMode ? "auto" : "any";
     return {
-      tool_choice: "any",
+      tool_choice: mode,
       allowed_function_names: [...(allowedFunctionNames ?? []), toolChoice],
     };
   }
@@ -205,10 +210,10 @@ export function copyAIModelParamsInto(
     options?.speechConfig ?? params?.speechConfig ?? target?.speechConfig
   );
   ret.streaming = options?.streaming ?? params?.streaming ?? target?.streaming;
-  const toolChoice = processToolChoice(
-    options?.tool_choice,
-    options?.allowed_function_names
-  );
+  const toolChoice = processToolChoice({
+    ...options,
+    responseMimeType: options?.responseMimeType ?? params?.responseMimeType ?? target?.responseMimeType
+  });
   if (toolChoice) {
     ret.tool_choice = toolChoice.tool_choice;
     ret.allowed_function_names = toolChoice.allowed_function_names;
diff --git a/libs/providers/langchain-google-common/src/utils/gemini.ts b/libs/providers/langchain-google-common/src/utils/gemini.ts
@@ -1833,10 +1833,12 @@ export function getGeminiAPI(config?: GeminiAPIConfig): GoogleAIAPI {
       };
     }
 
-    // force tool choice to be a single function name in case of structured output
+    const isJsonMode = parameters.responseMimeType === "application/json";
+    const mode = isJsonMode ? "auto" : "any";
+    
     return {
       functionCallingConfig: {
-        mode: "any",
+        mode,
         allowedFunctionNames: [parameters.tool_choice],
       },
     };
diff --git a/libs/providers/langchain-google-genai/src/chat_models.ts b/libs/providers/langchain-google-genai/src/chat_models.ts
@@ -201,6 +201,8 @@ export interface GoogleGenerativeAIChatInput
 
 /**
  * Google Generative AI chat model integration.
+ * 
+ * Supports JSON mode, structured output, function calling, and multimodal inputs.
  *
  * Setup:
  * Install `@langchain/google-genai` and set an environment variable named `GOOGLE_API_KEY`.
@@ -455,6 +457,29 @@ export interface GoogleGenerativeAIChatInput
  * <br />
  *
  * <details>
+ * <summary><strong>JSON Mode</strong></summary>
+ *
+ * ```typescript
+ * const jsonLlm = llm.withConfig({ 
+ *   responseMimeType: "application/json" 
+ * });
+ * const jsonResponse = await jsonLlm.invoke(
+ *   "Return a JSON object with key 'randomInts' and a value of 10 random integers between 0-99"
+ * );
+ * console.log(jsonResponse.content);
+ * ```
+ *
+ * ```txt
+ * {
+ *   "randomInts": [23, 87, 45, 12, 78, 34, 56, 90, 11, 67]
+ * }
+ * ```
+ * </details>
+ *
+ * <br />
+ *
+ *
+ * <details>
  * <summary><strong>Multimodal</strong></summary>
  *
  * ```typescript
diff --git a/libs/providers/langchain-google-vertexai/src/chat_models.ts b/libs/providers/langchain-google-vertexai/src/chat_models.ts
@@ -7,6 +7,8 @@ export interface ChatVertexAIInput extends ChatGoogleInput {}
 
 /**
  * Integration with Google Vertex AI chat models.
+ * 
+ * Supports JSON mode, structured output, function calling, and multimodal inputs.
  *
  * Setup:
  * Install `@langchain/google-vertexai` and set your stringified
@@ -249,6 +251,29 @@ export interface ChatVertexAIInput extends ChatGoogleInput {}
  * <br />
  *
  * <details>
+ * <summary><strong>JSON Mode</strong></summary>
+ *
+ * ```typescript
+ * const jsonLlm = llm.withConfig({ 
+ *   responseMimeType: "application/json" 
+ * });
+ * const jsonResponse = await jsonLlm.invoke(
+ *   "Return a JSON object with key 'randomInts' and a value of 10 random integers between 0-99"
+ * );
+ * console.log(jsonResponse.content);
+ * ```
+ *
+ * ```txt
+ * {
+ *   "randomInts": [23, 87, 45, 12, 78, 34, 56, 90, 11, 67]
+ * }
+ * ```
+ * </details>
+ *
+ * <br />
+ *
+ *
+ * <details>
  * <summary><strong>Usage Metadata</strong></summary>
  *
  * ```typescript