Enable include_reasoning in request (#3067)

Mintplex-Labs · Jan 30, 2025 · 8eac83b · 8eac83b
1 parent e86ad82
commit 8eac83b
Showing 1 changed file with 72 additions and 4 deletions.
diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js
@@ -167,6 +167,18 @@ class OpenRouterLLM {
     return content.flat();
   }
 
+  /**
+   * Parses and prepends reasoning from the response and returns the full text response.
+   * @param {Object} response
+   * @returns {string}
+   */
+  #parseReasoningFromResponse({ message }) {
+    let textResponse = message?.content;
+    if (!!message?.reasoning && message.reasoning.trim().length > 0)
+      textResponse = `<think>${message.reasoning}</think>${textResponse}`;
+    return textResponse;
+  }
+
   constructPrompt({
     systemPrompt = "",
     contextTexts = [],
@@ -200,20 +212,25 @@ class OpenRouterLLM {
           model: this.model,
           messages,
           temperature,
+          // This is an OpenRouter specific option that allows us to get the reasoning text
+          // before the token text.
+          include_reasoning: true,
         })
         .catch((e) => {
           throw new Error(e.message);
         })
     );
 
     if (
-      !result.output.hasOwnProperty("choices") ||
-      result.output.choices.length === 0
+      !result?.output?.hasOwnProperty("choices") ||
+      result?.output?.choices?.length === 0
     )
-      return null;
+      throw new Error(
+        `Invalid response body returned from OpenRouter: ${result.output?.error?.message || "Unknown error"} ${result.output?.error?.code || "Unknown code"}`
+      );
 
     return {
-      textResponse: result.output.choices[0].message.content,
+      textResponse: this.#parseReasoningFromResponse(result.output.choices[0]),
       metrics: {
         prompt_tokens: result.output.usage.prompt_tokens || 0,
         completion_tokens: result.output.usage.completion_tokens || 0,
@@ -236,6 +253,9 @@ class OpenRouterLLM {
         stream: true,
         messages,
         temperature,
+        // This is an OpenRouter specific option that allows us to get the reasoning text
+        // before the token text.
+        include_reasoning: true,
       }),
       messages
       // We have to manually count the tokens
@@ -262,6 +282,7 @@ class OpenRouterLLM {
 
     return new Promise(async (resolve) => {
       let fullText = "";
+      let reasoningText = "";
       let lastChunkTime = null; // null when first token is still not received.
 
       // Establish listener to early-abort a streaming response
@@ -313,8 +334,55 @@ class OpenRouterLLM {
         for await (const chunk of stream) {
           const message = chunk?.choices?.[0];
           const token = message?.delta?.content;
+          const reasoningToken = message?.delta?.reasoning;
           lastChunkTime = Number(new Date());
 
+          // Reasoning models will always return the reasoning text before the token text.
+          // can be null or ''
+          if (reasoningToken) {
+            // If the reasoning text is empty (''), we need to initialize it
+            // and send the first chunk of reasoning text.
+            if (reasoningText.length === 0) {
+              writeResponseChunk(response, {
+                uuid,
+                sources: [],
+                type: "textResponseChunk",
+                textResponse: `<think>${reasoningToken}`,
+                close: false,
+                error: false,
+              });
+              reasoningText += `<think>${reasoningToken}`;
+              continue;
+            } else {
+              // If the reasoning text is not empty, we need to append the reasoning text
+              // to the existing reasoning text.
+              writeResponseChunk(response, {
+                uuid,
+                sources: [],
+                type: "textResponseChunk",
+                textResponse: reasoningToken,
+                close: false,
+                error: false,
+              });
+              reasoningText += reasoningToken;
+            }
+          }
+
+          // If the reasoning text is not empty, but the reasoning token is empty
+          // and the token text is not empty we need to close the reasoning text and begin sending the token text.
+          if (!!reasoningText && !reasoningToken && token) {
+            writeResponseChunk(response, {
+              uuid,
+              sources: [],
+              type: "textResponseChunk",
+              textResponse: `</think>`,
+              close: false,
+              error: false,
+            });
+            fullText += `${reasoningText}</think>`;
+            reasoningText = "";
+          }
+
           if (token) {
             fullText += token;
             writeResponseChunk(response, {