diff --git a/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb b/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb index a7d64bedc25a..70cd45e4c3f7 100644 --- a/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb +++ b/docs/core_docs/docs/integrations/chat/google_generativeai.ipynb @@ -35,7 +35,7 @@ "\n", "| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n", "| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n", - "| ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | \n", + "| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | \n", "\n", "## Setup\n", "\n", diff --git a/docs/core_docs/docs/integrations/chat/google_vertex_ai.ipynb b/docs/core_docs/docs/integrations/chat/google_vertex_ai.ipynb index f6495d6409c3..e817fe1bfb55 100644 --- a/docs/core_docs/docs/integrations/chat/google_vertex_ai.ipynb +++ b/docs/core_docs/docs/integrations/chat/google_vertex_ai.ipynb @@ -41,7 +41,7 @@ "\n", "| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n", "| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n", - "| ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | \n", + "| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | \n", "\n", "Note that while logprobs are supported, Gemini has fairly restricted usage of them.\n", "\n", diff --git a/docs/core_docs/docs/integrations/chat/index.mdx b/docs/core_docs/docs/integrations/chat/index.mdx index c366d6d70585..f90eec547336 100644 --- a/docs/core_docs/docs/integrations/chat/index.mdx +++ b/docs/core_docs/docs/integrations/chat/index.mdx @@ -30,8 +30,8 @@ await model.invoke("Hello, world!") | [ChatCloudflareWorkersAI](/docs/integrations/chat/cloudflare_workersai/) | ✅ | ❌ | ❌ | ❌ | ❌ | | [ChatCohere](/docs/integrations/chat/cohere/) | ✅ | ❌ | ✅ | ✅ | ✅ | | [ChatFireworks](/docs/integrations/chat/fireworks/) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [ChatGoogleGenerativeAI](/docs/integrations/chat/google_generativeai/) | ✅ | ❌ | ✅ | ✅ | ✅ | -| [ChatVertexAI](/docs/integrations/chat/google_vertex_ai/) | ✅ | ❌ | ✅ | ✅ | ✅ | +| [ChatGoogleGenerativeAI](/docs/integrations/chat/google_generativeai/) | ✅ | ✅ | ✅ | ✅ | ✅ | +| [ChatVertexAI](/docs/integrations/chat/google_vertex_ai/) | ✅ | ✅ | ✅ | ✅ | ✅ | | [ChatGroq](/docs/integrations/chat/groq/) | ✅ | ✅ | ✅ | ✅ | ✅ | | [ChatMistralAI](/docs/integrations/chat/mistral/) | ✅ | ✅ | ✅ | ✅ | ✅ | | [ChatOllama](/docs/integrations/chat/ollama/) | ✅ | ✅ | ✅ | ✅ | ✅ | diff --git a/libs/langchain-google-common/src/chat_models.ts b/libs/langchain-google-common/src/chat_models.ts index 61f6bd2049e0..cbd74f33bb44 100644 --- a/libs/langchain-google-common/src/chat_models.ts +++ b/libs/langchain-google-common/src/chat_models.ts @@ -39,6 +39,7 @@ import { GoogleSearchToolSetting, GoogleSpeechConfig, GeminiJsonSchema, + GoogleAIResponseMimeType, } from "./types.js"; import { convertToGeminiTools, @@ -220,6 +221,8 @@ export abstract class ChatGoogleBase responseModalities?: GoogleAIModelModality[]; + responseMimeType?: GoogleAIResponseMimeType; + // May intentionally be undefined, meaning to compute this. convertSystemMessageToHumanContent: boolean | undefined; @@ -455,6 +458,10 @@ export abstract class ChatGoogleBase config?: StructuredOutputMethodOptions ): Runnable; + /** + * Creates a structured output version that enforces a specific schema. + * Automatically sets responseMimeType to "application/json" for optimal function calling. + */ withStructuredOutput< // eslint-disable-next-line @typescript-eslint/no-explicit-any RunOutput extends Record = Record @@ -530,7 +537,13 @@ export abstract class ChatGoogleBase keyName: functionName, }); } - const llm = this.bindTools(tools).withConfig({ tool_choice: functionName }); + // Configure for structured output: set responseMimeType to "application/json" + // This ensures the function calling mode is set to "auto" instead of "any" + // for optimal performance when extracting structured data + const llm = this.bindTools(tools).withConfig({ + tool_choice: functionName, + responseMimeType: "application/json" + }); if (!includeRaw) { return llm.pipe(outputParser).withConfig({ diff --git a/libs/langchain-google-common/src/tests/chat_models.test.ts b/libs/langchain-google-common/src/tests/chat_models.test.ts index ae33e6261262..86a14fd47fd7 100644 --- a/libs/langchain-google-common/src/tests/chat_models.test.ts +++ b/libs/langchain-google-common/src/tests/chat_models.test.ts @@ -1739,6 +1739,69 @@ describe("Mock ChatGoogle - Gemini", () => { expect(func.parameters?.properties?.greeterName?.nullable).toEqual(true); }); + test("4. Functions withStructuredOutput - default behavior", async () => { + const record: Record = {}; + const projectId = mockId(); + const authOptions: MockClientAuthInfo = { + record, + projectId, + resultFile: "chat-4-mock.json", + }; + + const schema = { + type: "object", + properties: { + name: { type: "string", description: "The person's name" }, + age: { type: "number", description: "The person's age" }, + }, + required: ["name", "age"], + }; + + // No responseMimeType set - should default to undefined + const model = new ChatGoogle({ authOptions }).withStructuredOutput(schema); + await model.invoke("Extract info about John Doe, software engineer"); + + const toolConfig = record?.opts?.data?.toolConfig; + expect(toolConfig.functionCallingConfig.mode).toEqual("auto"); + expect(toolConfig.functionCallingConfig.allowedFunctionNames).toEqual(["extract"]); + + const generationConfig = record?.opts?.data?.generationConfig; + expect(generationConfig.responseMimeType).toEqual("application/json"); + }); + + test("4. Functions withStructuredOutput - overrides user setting", async () => { + const record: Record = {}; + const projectId = mockId(); + const authOptions: MockClientAuthInfo = { + record, + projectId, + resultFile: "chat-4-mock.json", + }; + + const schema = { + type: "object", + properties: { + sender: { type: "string", description: "Email sender" }, + subject: { type: "string", description: "Email subject" }, + }, + required: ["sender", "subject"], + }; + + // User explicitly sets text/plain - withStructuredOutput should override it + const model = new ChatGoogle({ + authOptions, + responseMimeType: "text/plain" + }).withStructuredOutput(schema); + await model.invoke("Analyze email from sarah@company.com, subject: Project Update"); + + const toolConfig = record?.opts?.data?.toolConfig; + expect(toolConfig.functionCallingConfig.mode).toEqual("auto"); + expect(toolConfig.functionCallingConfig.allowedFunctionNames).toEqual(["extract"]); + + const generationConfig = record?.opts?.data?.generationConfig; + expect(generationConfig.responseMimeType).toEqual("application/json"); + }); + test("4. Functions - results", async () => { const record: Record = {}; const projectId = mockId(); diff --git a/libs/langchain-google-common/src/utils/common.ts b/libs/langchain-google-common/src/utils/common.ts index b09b602ac028..bafe660e98dd 100644 --- a/libs/langchain-google-common/src/utils/common.ts +++ b/libs/langchain-google-common/src/utils/common.ts @@ -31,14 +31,17 @@ export function copyAIModelParams( } function processToolChoice( - toolChoice: GoogleAIBaseLanguageModelCallOptions["tool_choice"], - allowedFunctionNames: GoogleAIBaseLanguageModelCallOptions["allowed_function_names"] + options: GoogleAIBaseLanguageModelCallOptions | undefined ): | { tool_choice: "any" | "auto" | "none"; allowed_function_names?: string[]; } | undefined { + const toolChoice = options?.tool_choice; + const allowedFunctionNames = options?.allowed_function_names; + const responseMimeType = options?.responseMimeType; + if (!toolChoice) { if (allowedFunctionNames) { // Allowed func names is passed, return 'any' so it forces the model to use a tool. @@ -58,9 +61,11 @@ function processToolChoice( } if (typeof toolChoice === "string") { // String representing the function name. - // Return any to force the model to predict the specified function call. + // Use "auto" mode when responseMimeType is "application/json" for optimal performance + const isJsonMode = responseMimeType === "application/json"; + const mode = isJsonMode ? "auto" : "any"; return { - tool_choice: "any", + tool_choice: mode, allowed_function_names: [...(allowedFunctionNames ?? []), toolChoice], }; } @@ -205,10 +210,10 @@ export function copyAIModelParamsInto( options?.speechConfig ?? params?.speechConfig ?? target?.speechConfig ); ret.streaming = options?.streaming ?? params?.streaming ?? target?.streaming; - const toolChoice = processToolChoice( - options?.tool_choice, - options?.allowed_function_names - ); + const toolChoice = processToolChoice({ + ...options, + responseMimeType: options?.responseMimeType ?? params?.responseMimeType ?? target?.responseMimeType + }); if (toolChoice) { ret.tool_choice = toolChoice.tool_choice; ret.allowed_function_names = toolChoice.allowed_function_names; diff --git a/libs/langchain-google-common/src/utils/gemini.ts b/libs/langchain-google-common/src/utils/gemini.ts index a52a6fe0956f..99ef2de97a98 100644 --- a/libs/langchain-google-common/src/utils/gemini.ts +++ b/libs/langchain-google-common/src/utils/gemini.ts @@ -1843,10 +1843,12 @@ export function getGeminiAPI(config?: GeminiAPIConfig): GoogleAIAPI { }; } - // force tool choice to be a single function name in case of structured output + const isJsonMode = parameters.responseMimeType === "application/json"; + const mode = isJsonMode ? "auto" : "any"; + return { functionCallingConfig: { - mode: "any", + mode, allowedFunctionNames: [parameters.tool_choice], }, }; diff --git a/libs/langchain-google-genai/src/chat_models.ts b/libs/langchain-google-genai/src/chat_models.ts index 6acf223129f6..7c83f4bd8ccc 100644 --- a/libs/langchain-google-genai/src/chat_models.ts +++ b/libs/langchain-google-genai/src/chat_models.ts @@ -201,6 +201,8 @@ export interface GoogleGenerativeAIChatInput /** * Google Generative AI chat model integration. + * + * Supports JSON mode, structured output, function calling, and multimodal inputs. * * Setup: * Install `@langchain/google-genai` and set an environment variable named `GOOGLE_API_KEY`. @@ -455,6 +457,29 @@ export interface GoogleGenerativeAIChatInput *
* *
+ * JSON Mode + * + * ```typescript + * const jsonLlm = llm.withConfig({ + * responseMimeType: "application/json" + * }); + * const jsonResponse = await jsonLlm.invoke( + * "Return a JSON object with key 'randomInts' and a value of 10 random integers between 0-99" + * ); + * console.log(jsonResponse.content); + * ``` + * + * ```txt + * { + * "randomInts": [23, 87, 45, 12, 78, 34, 56, 90, 11, 67] + * } + * ``` + *
+ * + *
+ * + * + *
* Multimodal * * ```typescript diff --git a/libs/langchain-google-vertexai/src/chat_models.ts b/libs/langchain-google-vertexai/src/chat_models.ts index c1866ae2b3ca..159fbe5eb9bb 100644 --- a/libs/langchain-google-vertexai/src/chat_models.ts +++ b/libs/langchain-google-vertexai/src/chat_models.ts @@ -7,6 +7,8 @@ export interface ChatVertexAIInput extends ChatGoogleInput {} /** * Integration with Google Vertex AI chat models. + * + * Supports JSON mode, structured output, function calling, and multimodal inputs. * * Setup: * Install `@langchain/google-vertexai` and set your stringified @@ -249,6 +251,29 @@ export interface ChatVertexAIInput extends ChatGoogleInput {} *
* *
+ * JSON Mode + * + * ```typescript + * const jsonLlm = llm.withConfig({ + * responseMimeType: "application/json" + * }); + * const jsonResponse = await jsonLlm.invoke( + * "Return a JSON object with key 'randomInts' and a value of 10 random integers between 0-99" + * ); + * console.log(jsonResponse.content); + * ``` + * + * ```txt + * { + * "randomInts": [23, 87, 45, 12, 78, 34, 56, 90, 11, 67] + * } + * ``` + *
+ * + *
+ * + * + *
* Usage Metadata * * ```typescript