Skip to content

Commit

Permalink
Enable include_reasoning in request (#3067)
Browse files Browse the repository at this point in the history
  • Loading branch information
timothycarambat authored Jan 30, 2025
1 parent e86ad82 commit 8eac83b
Showing 1 changed file with 72 additions and 4 deletions.
76 changes: 72 additions & 4 deletions server/utils/AiProviders/openRouter/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,18 @@ class OpenRouterLLM {
return content.flat();
}

/**
* Parses and prepends reasoning from the response and returns the full text response.
* @param {Object} response
* @returns {string}
*/
#parseReasoningFromResponse({ message }) {
let textResponse = message?.content;
if (!!message?.reasoning && message.reasoning.trim().length > 0)
textResponse = `<think>${message.reasoning}</think>${textResponse}`;
return textResponse;
}

constructPrompt({
systemPrompt = "",
contextTexts = [],
Expand Down Expand Up @@ -200,20 +212,25 @@ class OpenRouterLLM {
model: this.model,
messages,
temperature,
// This is an OpenRouter specific option that allows us to get the reasoning text
// before the token text.
include_reasoning: true,
})
.catch((e) => {
throw new Error(e.message);
})
);

if (
!result.output.hasOwnProperty("choices") ||
result.output.choices.length === 0
!result?.output?.hasOwnProperty("choices") ||
result?.output?.choices?.length === 0
)
return null;
throw new Error(
`Invalid response body returned from OpenRouter: ${result.output?.error?.message || "Unknown error"} ${result.output?.error?.code || "Unknown code"}`
);

return {
textResponse: result.output.choices[0].message.content,
textResponse: this.#parseReasoningFromResponse(result.output.choices[0]),
metrics: {
prompt_tokens: result.output.usage.prompt_tokens || 0,
completion_tokens: result.output.usage.completion_tokens || 0,
Expand All @@ -236,6 +253,9 @@ class OpenRouterLLM {
stream: true,
messages,
temperature,
// This is an OpenRouter specific option that allows us to get the reasoning text
// before the token text.
include_reasoning: true,
}),
messages
// We have to manually count the tokens
Expand All @@ -262,6 +282,7 @@ class OpenRouterLLM {

return new Promise(async (resolve) => {
let fullText = "";
let reasoningText = "";
let lastChunkTime = null; // null when first token is still not received.

// Establish listener to early-abort a streaming response
Expand Down Expand Up @@ -313,8 +334,55 @@ class OpenRouterLLM {
for await (const chunk of stream) {
const message = chunk?.choices?.[0];
const token = message?.delta?.content;
const reasoningToken = message?.delta?.reasoning;
lastChunkTime = Number(new Date());

// Reasoning models will always return the reasoning text before the token text.
// can be null or ''
if (reasoningToken) {
// If the reasoning text is empty (''), we need to initialize it
// and send the first chunk of reasoning text.
if (reasoningText.length === 0) {
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: `<think>${reasoningToken}`,
close: false,
error: false,
});
reasoningText += `<think>${reasoningToken}`;
continue;
} else {
// If the reasoning text is not empty, we need to append the reasoning text
// to the existing reasoning text.
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: reasoningToken,
close: false,
error: false,
});
reasoningText += reasoningToken;
}
}

// If the reasoning text is not empty, but the reasoning token is empty
// and the token text is not empty we need to close the reasoning text and begin sending the token text.
if (!!reasoningText && !reasoningToken && token) {
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: `</think>`,
close: false,
error: false,
});
fullText += `${reasoningText}</think>`;
reasoningText = "";
}

if (token) {
fullText += token;
writeResponseChunk(response, {
Expand Down

0 comments on commit 8eac83b

Please sign in to comment.