From c92a1f1b6d254b9cd688a70f98b1ef172473c936 Mon Sep 17 00:00:00 2001 From: Christian Bromann Date: Mon, 6 Oct 2025 13:23:21 +0200 Subject: [PATCH 1/5] feat(middleware): document new retry_model_request hook --- src/oss/langchain/middleware.mdx | 194 ++++++++++++++++++++++++++++++- 1 file changed, 190 insertions(+), 4 deletions(-) diff --git a/src/oss/langchain/middleware.mdx b/src/oss/langchain/middleware.mdx index 71b074cc7..58ddd5062 100644 --- a/src/oss/langchain/middleware.mdx +++ b/src/oss/langchain/middleware.mdx @@ -44,11 +44,12 @@ graph TD Middleware provides control over what happens before and after those steps. -Each middleware can add in three different types of modifiers: +Each middleware can add in four different types of modifiers: :::python - `before_model`: Runs before model execution. Can update state or jump to a different node (`model`, `tools`, `end`) - `modify_model_request`: Runs before model execution, to prepare the model request object. Can only modify the current model request object (no permanent state updates) and cannot jump to a different node. +- `retry_model_request`: Runs when model execution fails. Can modify the model request to retry with different parameters or return None to propagate the error. - `after_model`: Runs after model execution, before tools are executed. Can update state or jump to a different node (`model`, `tools`, `END`) In addition to that, each middleware can define the following static properties: @@ -59,6 +60,7 @@ In addition to that, each middleware can define the following static properties: :::js - `beforeModel`: Runs before model execution. Can update state or jump to a different node (`model`, `tools`, `end`) - `modifyModelRequest`: Runs before model execution, to prepare the model request object. Can only modify the current model request object (no permanent state updates) and cannot jump to a different node. +- `retryModelRequest`: Runs when model execution fails. Can modify the model request to retry with different parameters or return undefined to propagate the error. - `afterModel`: Runs after model execution, before tools are executed. Can update state or jump to a different node (`model`, `tools`, `__end__`) In addition to that, each middleware can define the following static properties: @@ -69,10 +71,10 @@ In addition to that, each middleware can define the following static properties: ::: :::python -An agent can contain `before_model`, `modify_model_request`, or `after_model` middleware. All three do not need to be implemented. +An agent can contain `before_model`, `modify_model_request`, `retry_model_request`, or `after_model` middleware. All four do not need to be implemented. ::: :::js -An agent can contain multiple middleware. Each middleware does not need to implement all three of `beforeModel`, `modifyModelRequest`, `afterModel`. +An agent can contain multiple middleware. Each middleware does not need to implement all four of `beforeModel`, `modifyModelRequest`, `retryModelRequest`, `afterModel`. ::: @@ -166,6 +168,7 @@ LangChain provides several built in middleware to use off-the-shelf - [Human-in-the-loop](#human-in-the-loop) - [Anthropic prompt caching](#anthropic-prompt-caching) - [Dynamic system prompt](#dynamic-system-prompt) +- [Model fallback](#model-fallback) ### Summarization @@ -453,20 +456,112 @@ const agent = createAgent({ ``` ::: +### Model fallback + +The `ModelFallbackMiddleware` provides automatic model fallback on errors. This middleware attempts to retry failed model calls with alternative models in sequence. When a model call fails, it tries the next model in the fallback list until either a call succeeds or all models have been exhausted. + +**Key features:** + +- Automatic retry with fallback models when primary model fails +- Sequential fallback through multiple models +- Preserves original request parameters while switching models +- Configurable with any combination of model strings or instances + +**Use Cases:** + +- Handling model outages or rate limits +- Cost optimization by trying cheaper models first +- Ensuring high availability for critical applications + +:::python +```python +from langchain.agents import create_agent +from langchain.agents.middleware import ModelFallbackMiddleware + +agent = create_agent( + model="openai:gpt-4o", # Primary model + tools=[weather_tool, calculator_tool], + middleware=[ + ModelFallbackMiddleware( + "openai:gpt-4o-mini", # First fallback + "anthropic:claude-3-5-sonnet-20241022", # Second fallback + ), + ], +) + +# If gpt-4o fails, automatically tries gpt-4o-mini, then claude +result = agent.invoke({"messages": [HumanMessage("Hello")]}) +``` +::: + +:::js +```typescript +import { createAgent, modelFallbackMiddleware, HumanMessage } from "langchain"; + +const agent = createAgent({ + model: "openai:gpt-4o", // Primary model + tools: [weatherTool, calculatorTool], + middleware: [ + modelFallbackMiddleware( + "openai:gpt-4o-mini", // First fallback + "anthropic:claude-3-5-sonnet-20241022" // Second fallback + ), + ], +}); + +// If gpt-4o fails, automatically tries gpt-4o-mini, then claude +const result = await agent.invoke({ + messages: [new HumanMessage("Hello")] +}); +``` +::: + +**Configuration:** + +:::python +The `ModelFallbackMiddleware` constructor accepts fallback models in order of preference: + +- `first_model`: The first fallback model (required) +- `*additional_models`: Additional fallback models in order + +Models can be specified as: +- Model name strings (e.g., `"openai:gpt-4o-mini"`) +- `BaseChatModel` instances for pre-configured models +::: + +:::js +The `modelFallbackMiddleware` function accepts fallback models in order of preference: + +- `...fallbackModels`: Fallback models in order of preference + +Models can be specified as: +- Model name strings (e.g., `"openai:gpt-4o-mini"`) +- `LanguageModelLike` instances for pre-configured models +::: + +The middleware works by: + +1. When the primary model fails, the first fallback model is tried +2. If that fails, the next fallback model is attempted +3. This continues until a model succeeds or all fallbacks are exhausted +4. If all models fail, the original error from the last attempt is raised + ## Custom Middleware Middleware for agents are subclasses of `AgentMiddleware`, which implement one or more of its hooks. -`AgentMiddleware` currently provides three different ways to modify the core agent loop: +`AgentMiddleware` currently provides four different ways to modify the core agent loop: :::python - `before_model`: runs before the model is run. Can update state or exit early with a jump. - `modify_model_request`: runs before the model is run. Cannot update state or exit early with a jump. +- `retry_model_request`: runs when the model call fails. Can modify the request to retry or return None to propagate the error. - `after_model`: runs after the model is run. Can update state or exit early with a jump. ::: :::js - `beforeModel`: runs before the model is run. Can update state or exit early with a jump. - `modifyModelRequest`: runs before the model is run. Cannot update state or exit early with a jump. +- `retryModelRequest`: runs when the model call fails. Can modify the request to retry or return undefined to propagate the error. - `afterModel`: runs after the model is run. Can update state or exit early with a jump. ::: @@ -607,6 +702,95 @@ const myMiddleware = createMiddleware({ ``` ::: +:::python +### `retry_model_request` +::: +:::js +### `retryModelRequest` +::: + +Runs when a model call fails with an exception. This hook allows middleware to handle errors and optionally retry the model call with modified parameters. + +:::python +The `retry_model_request` hook is called with the following parameters: +- `error` (`Exception`): The exception that occurred during model invocation +- `request` (`ModelRequest`): The original model request that failed +- `state` (`AgentState`): The current agent state +- `runtime` (`Runtime`): The runtime context +- `attempt` (`int`): The current attempt number (1-indexed) + +The hook can return: +- `ModelRequest`: A modified request to retry with +- `None`: Propagate the error (re-raise the exception) +::: + +:::js +The `retryModelRequest` hook is called with the following parameters: +- `error` (`Error`): The exception that occurred during model invocation +- `request` (`ModelRequest`): The original model request that failed +- `state` (agent state): The current agent state +- `runtime` (`Runtime`): The runtime context +- `attempt` (`number`): The current attempt number (1-indexed) + +The hook can return: +- `ModelRequest`: A modified request to retry with +- `undefined`: Propagate the error (re-raise the exception) +::: + +**Key behaviors:** + +- Multiple middleware with `retryModelRequest` are processed in order +- The first middleware that returns a modified request will trigger a retry +- Subsequent middleware in the chain are not called for that attempt +- If no middleware wants to retry, the original error is propagated +- There's a hard limit of 100 attempts to prevent infinite loops + +Signature: +:::python +```python +from langchain.agents.middleware import AgentState, ModelRequest, AgentMiddleware +from langgraph.runtime import Runtime + +class RetryMiddleware(AgentMiddleware): + def retry_model_request( + self, + error: Exception, + request: ModelRequest, + state: AgentState, + runtime: Runtime, + attempt: int + ) -> ModelRequest | None: + # Example: Switch to a fallback model on the first retry + if attempt == 1: + # Modify the request to use a different model + request.model = "openai:gpt-4o-mini" + return request + # Don't retry after first attempt + return None +``` +::: +:::js +```typescript +import { createMiddleware } from "langchain"; + +const retryMiddleware = createMiddleware({ + name: "RetryMiddleware", + retryModelRequest: (error, request, state, runtime, attempt) => { + // Example: Switch to a fallback model on the first retry + if (attempt === 1) { + // Modify the request to use a different model + return { + ...request, + model: "openai:gpt-4o-mini", + }; + } + // Don't retry after first attempt + return undefined; + }, +}); +``` +::: + :::python ### `after_model` ::: @@ -897,11 +1081,13 @@ You can provide multiple middlewares. They are executed in the following logic: :::python **`before_model`**: Are run in the order they are passed in. If an earlier middleware exits early, then following middleware are not run **`modify_model_request`**: Are run in the order they are passed in. +**`retry_model_request`**: Are run in the order they are passed in when a model call fails. The first middleware that returns a modified request triggers a retry, and subsequent middleware are not called for that attempt. **`after_model`**: Are run in the _reverse_ order that they are passed in. If an earlier middleware exits early, then following middleware are not run ::: :::js **`beforeModel`**: Are run in the order they are passed in. If an earlier middleware exits early, then following middleware are not run **`modifyModelRequest`**: Are run in the order they are passed in. +**`retryModelRequest`**: Are run in the order they are passed in when a model call fails. The first middleware that returns a modified request triggers a retry, and subsequent middleware are not called for that attempt. **`afterModel`**: Are run in the _reverse_ order that they are passed in. If an earlier middleware exits early, then following middleware are not run ::: From d5616b3ae03409730880113ee8a0106b7bdaf09b Mon Sep 17 00:00:00 2001 From: Christian Bromann Date: Tue, 7 Oct 2025 01:28:28 -0700 Subject: [PATCH 2/5] Update src/oss/langchain/middleware.mdx Co-authored-by: Lauren Hirata Singh --- src/oss/langchain/middleware.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oss/langchain/middleware.mdx b/src/oss/langchain/middleware.mdx index 58ddd5062..43e10e7dd 100644 --- a/src/oss/langchain/middleware.mdx +++ b/src/oss/langchain/middleware.mdx @@ -467,7 +467,7 @@ The `ModelFallbackMiddleware` provides automatic model fallback on errors. This - Preserves original request parameters while switching models - Configurable with any combination of model strings or instances -**Use Cases:** +**Use cases:** - Handling model outages or rate limits - Cost optimization by trying cheaper models first From 9bb46591cb845a5094a2a129d43faf396dd470bb Mon Sep 17 00:00:00 2001 From: Christian Bromann Date: Tue, 7 Oct 2025 01:28:41 -0700 Subject: [PATCH 3/5] Update src/oss/langchain/middleware.mdx Co-authored-by: Lauren Hirata Singh --- src/oss/langchain/middleware.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oss/langchain/middleware.mdx b/src/oss/langchain/middleware.mdx index 43e10e7dd..3e7a2a52e 100644 --- a/src/oss/langchain/middleware.mdx +++ b/src/oss/langchain/middleware.mdx @@ -546,7 +546,7 @@ The middleware works by: 3. This continues until a model succeeds or all fallbacks are exhausted 4. If all models fail, the original error from the last attempt is raised -## Custom Middleware +## Custom middleware Middleware for agents are subclasses of `AgentMiddleware`, which implement one or more of its hooks. From bb571c54225aa7f5f2e44a0dd4988805677ac023 Mon Sep 17 00:00:00 2001 From: Christian Bromann Date: Tue, 7 Oct 2025 10:33:40 +0200 Subject: [PATCH 4/5] cr --- src/oss/langchain/middleware.mdx | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/oss/langchain/middleware.mdx b/src/oss/langchain/middleware.mdx index 3e7a2a52e..97edc4430 100644 --- a/src/oss/langchain/middleware.mdx +++ b/src/oss/langchain/middleware.mdx @@ -458,14 +458,7 @@ const agent = createAgent({ ### Model fallback -The `ModelFallbackMiddleware` provides automatic model fallback on errors. This middleware attempts to retry failed model calls with alternative models in sequence. When a model call fails, it tries the next model in the fallback list until either a call succeeds or all models have been exhausted. - -**Key features:** - -- Automatic retry with fallback models when primary model fails -- Sequential fallback through multiple models -- Preserves original request parameters while switching models -- Configurable with any combination of model strings or instances +The `ModelFallbackMiddleware` provides automatic model fallback on errors. This middleware attempts to retry failed model calls with alternative models in sequence. When a model call fails, it tries the next model in the fallback list until either a call succeeds or all models have been exhausted. If all fallback models fail, the original error from the last attempt is raised. **Use cases:** @@ -539,13 +532,6 @@ Models can be specified as: - `LanguageModelLike` instances for pre-configured models ::: -The middleware works by: - -1. When the primary model fails, the first fallback model is tried -2. If that fails, the next fallback model is attempted -3. This continues until a model succeeds or all fallbacks are exhausted -4. If all models fail, the original error from the last attempt is raised - ## Custom middleware Middleware for agents are subclasses of `AgentMiddleware`, which implement one or more of its hooks. From 6f09d32d1e019dff2b21c98a976b6d4e2ad681fe Mon Sep 17 00:00:00 2001 From: Christian Bromann Date: Tue, 7 Oct 2025 10:37:30 +0200 Subject: [PATCH 5/5] fix python imports --- src/oss/langchain/middleware.mdx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/oss/langchain/middleware.mdx b/src/oss/langchain/middleware.mdx index 97edc4430..98c1f2495 100644 --- a/src/oss/langchain/middleware.mdx +++ b/src/oss/langchain/middleware.mdx @@ -734,7 +734,8 @@ The hook can return: Signature: :::python ```python -from langchain.agents.middleware import AgentState, ModelRequest, AgentMiddleware +from langchain.agents import AgentState +from langchain.agents.middleware import ModelRequest, AgentMiddleware from langgraph.runtime import Runtime class RetryMiddleware(AgentMiddleware): @@ -1149,7 +1150,7 @@ Use middleware to dynamically select which tools are available at runtime based ```python from langchain.agents import create_agent -from langchain.agents.middleware import AgentState, ModelRequest, modify_model_request +from langchain.agents.middleware.types import AgentState, ModelRequest, modify_model_request @modify_model_request def tool_selector(state: AgentState, request: ModelRequest) -> ModelRequest: