feat(openai): Adds prediction support to OpenAI (langchain-ai#7162)

cinqisap · Nov 6, 2024 · 938a702 · 938a702
1 parent 4d18c01
commit 938a702
Show file tree

Hide file tree

Showing 5 changed files with 196 additions and 7 deletions.
diff --git a/docs/core_docs/docs/integrations/chat/openai.ipynb b/docs/core_docs/docs/integrations/chat/openai.ipynb
@@ -1028,6 +1028,132 @@
         "console.log(\"USAGE:\", resWitCaching.response_metadata.usage);"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "id": "f755a0b3",
+      "metadata": {},
+      "source": [
+        "## Predicted output\n",
+        "\n",
+        "Some OpenAI models (such as their `gpt-4o` and `gpt-4o-mini` series) support [Predicted Outputs](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs), which allow you to pass in a known portion of the LLM's expected output ahead of time to reduce latency. This is useful for cases such as editing text or code, where only a small part of the model's output will change.\n",
+        "\n",
+        "Here's an example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "4d5a5582",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "AIMessage {\n",
+            "  \"id\": \"chatcmpl-AQLyQKnazr7lEV7ejLTo1UqhzHDBl\",\n",
+            "  \"content\": \"/// <summary>\\n/// Represents a user with a first name, last name, and email.\\n/// </summary>\\npublic class User\\n{\\n/// <summary>\\n/// Gets or sets the user's first name.\\n/// </summary>\\npublic string FirstName { get; set; }\\n\\n/// <summary>\\n/// Gets or sets the user's last name.\\n/// </summary>\\npublic string LastName { get; set; }\\n\\n/// <summary>\\n/// Gets or sets the user's email.\\n/// </summary>\\npublic string Email { get; set; }\\n}\",\n",
+            "  \"additional_kwargs\": {},\n",
+            "  \"response_metadata\": {\n",
+            "    \"tokenUsage\": {\n",
+            "      \"promptTokens\": 148,\n",
+            "      \"completionTokens\": 217,\n",
+            "      \"totalTokens\": 365\n",
+            "    },\n",
+            "    \"finish_reason\": \"stop\",\n",
+            "    \"usage\": {\n",
+            "      \"prompt_tokens\": 148,\n",
+            "      \"completion_tokens\": 217,\n",
+            "      \"total_tokens\": 365,\n",
+            "      \"prompt_tokens_details\": {\n",
+            "        \"cached_tokens\": 0\n",
+            "      },\n",
+            "      \"completion_tokens_details\": {\n",
+            "        \"reasoning_tokens\": 0,\n",
+            "        \"accepted_prediction_tokens\": 36,\n",
+            "        \"rejected_prediction_tokens\": 116\n",
+            "      }\n",
+            "    },\n",
+            "    \"system_fingerprint\": \"fp_0ba0d124f1\"\n",
+            "  },\n",
+            "  \"tool_calls\": [],\n",
+            "  \"invalid_tool_calls\": [],\n",
+            "  \"usage_metadata\": {\n",
+            "    \"output_tokens\": 217,\n",
+            "    \"input_tokens\": 148,\n",
+            "    \"total_tokens\": 365,\n",
+            "    \"input_token_details\": {\n",
+            "      \"cache_read\": 0\n",
+            "    },\n",
+            "    \"output_token_details\": {\n",
+            "      \"reasoning\": 0\n",
+            "    }\n",
+            "  }\n",
+            "}\n"
+          ]
+        }
+      ],
+      "source": [
+        "import { ChatOpenAI } from \"@langchain/openai\";\n",
+        "\n",
+        "const modelWithPredictions = new ChatOpenAI({\n",
+        "  model: \"gpt-4o-mini\",\n",
+        "});\n",
+        "\n",
+        "const codeSample = `\n",
+        "/// <summary>\n",
+        "/// Represents a user with a first name, last name, and username.\n",
+        "/// </summary>\n",
+        "public class User\n",
+        "{\n",
+        "/// <summary>\n",
+        "/// Gets or sets the user's first name.\n",
+        "/// </summary>\n",
+        "public string FirstName { get; set; }\n",
+        "\n",
+        "/// <summary>\n",
+        "/// Gets or sets the user's last name.\n",
+        "/// </summary>\n",
+        "public string LastName { get; set; }\n",
+        "\n",
+        "/// <summary>\n",
+        "/// Gets or sets the user's username.\n",
+        "/// </summary>\n",
+        "public string Username { get; set; }\n",
+        "}\n",
+        "`;\n",
+        "\n",
+        "// Can also be attached ahead of time\n",
+        "// using `model.bind({ prediction: {...} })`;\n",
+        "await modelWithPredictions.invoke(\n",
+        "  [\n",
+        "    {\n",
+        "      role: \"user\",\n",
+        "      content:\n",
+        "        \"Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.\",\n",
+        "    },\n",
+        "    {\n",
+        "      role: \"user\",\n",
+        "      content: codeSample,\n",
+        "    },\n",
+        "  ],\n",
+        "  {\n",
+        "    prediction: {\n",
+        "      type: \"content\",\n",
+        "      content: codeSample,\n",
+        "    },\n",
+        "  }\n",
+        ");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "81f901e4",
+      "metadata": {},
+      "source": [
+        "Note that currently predictions are billed as additional tokens and will increase your usage and costs in exchange for this reduced latency."
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "cc8b3c94",
@@ -1212,4 +1338,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 5
-}
+}
diff --git a/libs/langchain-openai/package.json b/libs/langchain-openai/package.json
@@ -36,7 +36,7 @@
   "license": "MIT",
   "dependencies": {
     "js-tiktoken": "^1.0.12",
-    "openai": "^4.68.0",
+    "openai": "^4.71.0",
     "zod": "^3.22.4",
     "zod-to-json-schema": "^3.22.3"
   },

diff --git a/libs/langchain-openai/src/chat_models.ts b/libs/langchain-openai/src/chat_models.ts
@@ -423,6 +423,11 @@ export interface ChatOpenAICallOptions
    * [Learn more](https://platform.openai.com/docs/guides/audio).
    */
   audio?: OpenAIClient.Chat.ChatCompletionAudioParam;
+  /**
+   * Static predicted output content, such as the content of a text file that is being regenerated.
+   * [Learn more](https://platform.openai.com/docs/guides/latency-optimization#use-predicted-outputs).
+   */
+  prediction?: OpenAIClient.ChatCompletionPredictionContent;
 }
 
 export interface ChatOpenAIFields
@@ -1329,6 +1334,9 @@ export class ChatOpenAI<
         : {}),
       ...this.modelKwargs,
     };
+    if (options?.prediction !== undefined) {
+      params.prediction = options.prediction;
+    }
     return params;
   }
 

diff --git a/libs/langchain-openai/src/tests/chat_models-extended.int.test.ts b/libs/langchain-openai/src/tests/chat_models-extended.int.test.ts
@@ -636,3 +636,58 @@ test.skip("system prompt caching", async () => {
     aggregate?.response_metadata?.usage.prompt_tokens_details.cached_tokens
   ).toBeGreaterThan(0);
 });
+
+test("predicted output", async () => {
+  const model = new ChatOpenAI({
+    model: "gpt-4o-mini",
+  });
+  const code = `
+/// <summary>
+/// Represents a user with a first name, last name, and username.
+/// </summary>
+public class User
+{
+    /// <summary>
+    /// Gets or sets the user's first name.
+    /// </summary>
+    public string FirstName { get; set; }
+
+    /// <summary>
+    /// Gets or sets the user's last name.
+    /// </summary>
+    public string LastName { get; set; }
+
+    /// <summary>
+    /// Gets or sets the user's username.
+    /// </summary>
+    public string Username { get; set; }
+}
+`;
+  const res = await model.invoke(
+    [
+      {
+        role: "user",
+        content:
+          "Replace the Username property with an Email property. Respond only with code, and with no markdown formatting.",
+      },
+      {
+        role: "user",
+        content: code,
+      },
+    ],
+    {
+      prediction: {
+        type: "content",
+        content: code,
+      },
+    }
+  );
+  expect(
+    typeof res.response_metadata?.usage?.completion_tokens_details
+      .accepted_prediction_tokens
+  ).toBe("number");
+  expect(
+    typeof res.response_metadata?.usage?.completion_tokens_details
+      .rejected_prediction_tokens
+  ).toBe("number");
+});
diff --git a/yarn.lock b/yarn.lock
@@ -12552,7 +12552,7 @@ __metadata:
     jest: ^29.5.0
     jest-environment-node: ^29.6.4
     js-tiktoken: ^1.0.12
-    openai: ^4.68.0
+    openai: ^4.71.0
     prettier: ^2.8.3
     release-it: ^17.6.0
     rimraf: ^5.0.1
@@ -35695,9 +35695,9 @@ __metadata:
   languageName: node
   linkType: hard
 
-"openai@npm:^4.68.0":
-  version: 4.68.0
-  resolution: "openai@npm:4.68.0"
+"openai@npm:^4.71.0":
+  version: 4.71.0
+  resolution: "openai@npm:4.71.0"
   dependencies:
     "@types/node": ^18.11.18
     "@types/node-fetch": ^2.6.4
@@ -35713,7 +35713,7 @@ __metadata:
       optional: true
   bin:
     openai: bin/cli
-  checksum: 2866e54ac1b34e074055dde7cc809bcc33d1172f0ab289dacd54ced04a62ab3c2b9f584fdb84ece981edc5c30939497af4e91fe33646f71d5c6ced5d7106a797
+  checksum: ba4b3772e806c59b1ea1235a40486392c797906e45dd97914f2cd819b4be2996e207c7b7c67d43236692300354f4e9ffa8ebfca6e97d3555655ebf0f3f01e3f2
   languageName: node
   linkType: hard