fix(json): handle double-encoded tool call arguments in openai request serialization (#2764)

amitksingh1490 · autofix-ci[bot] · web-flow · commit 5a6f3c8ebc77 · 2026-04-01T13:20:55.000Z
Co-authored-by: autofix-ci[bot] &lt;114827586+autofix-ci[bot]@users.noreply.github.com&gt;
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/forge_app/Cargo.toml b/crates/forge_app/Cargo.toml
@@ -43,6 +43,7 @@ reqwest-eventsource.workspace = true
 schemars.workspace = true
 glob.workspace = true
 lazy_static.workspace = true
+forge_json_repair.workspace = true
 
 tonic.workspace = true
 
diff --git a/crates/forge_app/src/dto/openai/request.rs b/crates/forge_app/src/dto/openai/request.rs
@@ -2,13 +2,15 @@ use std::vec;
 
 use derive_more::derive::Display;
 use derive_setters::Setters;
+use forge_json_repair::coerce_to_schema;
 use serde::{Deserialize, Serialize};
+use strum::IntoEnumIterator;
 
 use super::response::{ExtraContent, FunctionCall, ToolCall};
 use super::tool_choice::{FunctionType, ToolChoice};
 use crate::domain::{
-    Context, ContextMessage, ModelId, ToolCallFull, ToolCallId, ToolDefinition, ToolName,
-    ToolResult, ToolValue,
+    Context, ContextMessage, ModelId, ToolCallFull, ToolCallId, ToolCatalog, ToolDefinition,
+    ToolName, ToolResult, ToolValue,
 };
 use crate::dto::openai::ReasoningDetail;
 
@@ -406,17 +408,30 @@ impl From<Context> for Request {
     }
 }
 
+fn serialize_tool_call_arguments(tool_call: &ToolCallFull) -> String {
+    let serialized_arguments = || serde_json::to_string(&tool_call.arguments).unwrap();
+
+    let Ok(parsed_arguments) = tool_call.arguments.parse() else {
+        return serialized_arguments();
+    };
+
+    let normalized_arguments = ToolCatalog::iter()
+        .find(|tool| tool.definition().name == tool_call.name)
+        .map(|tool| coerce_to_schema(parsed_arguments.clone(), &tool.definition().input_schema))
+        .unwrap_or(parsed_arguments);
+
+    serde_json::to_string(&normalized_arguments).unwrap_or_else(|_| serialized_arguments())
+}
+
 impl From<ToolCallFull> for ToolCall {
     fn from(value: ToolCallFull) -> Self {
+        let arguments = serialize_tool_call_arguments(&value);
         let extra_content = value.thought_signature.map(ExtraContent::from);
 
         Self {
             id: value.call_id,
             r#type: FunctionType,
-            function: FunctionCall {
-                arguments: serde_json::to_string(&value.arguments).unwrap(),
-                name: Some(value.name),
-            },
+            function: FunctionCall { arguments, name: Some(value.name) },
             extra_content,
         }
     }
@@ -681,7 +696,8 @@ mod tests {
     }
 
     use forge_domain::{
-        ContextMessage, Role, TextMessage, ToolCallFull, ToolCallId, ToolName, ToolResult,
+        ContextMessage, Role, TextMessage, ToolCallFull, ToolCallId, ToolCatalog, ToolName,
+        ToolResult,
     };
     use insta::assert_json_snapshot;
 
@@ -731,6 +747,43 @@ mod tests {
         assert_json_snapshot!(router_message);
     }
 
+    #[test]
+    fn test_assistant_message_with_dump_style_tool_call_arguments_conversion() {
+        let fixture = ToolCatalog::tool_call_patch(
+            "/tmp/file.txt",
+            "new text",
+            "old text",
+            false,
+        )
+        .arguments(
+            serde_json::from_str::<forge_domain::ToolCallArguments>(
+                r#""{\"file_path\":\"/tmp/file.txt\",\"old_string\":\"old text\",\"new_string\":\"new text\",\"replace_all\":false}""#,
+            )
+            .unwrap(),
+        )
+        .call_id(ToolCallId::new("123"));
+
+        let assistant_message = ContextMessage::Text(
+            TextMessage::new(Role::Assistant, "Using tool")
+                .tool_calls(vec![fixture])
+                .model(ModelId::new("gpt-3.5-turbo")),
+        );
+        let actual = Message::from(assistant_message);
+        let actual =
+            serde_json::to_value(actual.tool_calls.expect("Tool calls should exist")).unwrap();
+        let expected = serde_json::json!([
+            {
+                "id": "123",
+                "type": "function",
+                "function": {
+                    "arguments": "{\"file_path\":\"/tmp/file.txt\",\"new_string\":\"new text\",\"old_string\":\"old text\",\"replace_all\":false}",
+                    "name": "patch"
+                }
+            }
+        ]);
+        assert_eq!(actual, expected);
+    }
+
     #[test]
     fn test_tool_message_conversion() {
         let tool_result = ToolResult::new(ToolName::new("test_tool"))
diff --git a/crates/forge_json_repair/src/schema_coercion.rs b/crates/forge_json_repair/src/schema_coercion.rs
@@ -2,6 +2,8 @@ use schemars::Schema;
 use serde::de::Error as _;
 use serde_json::Value;
 
+use crate::json_repair;
+
 /// Coerces a JSON value to match the expected types defined in a JSON schema.
 ///
 /// This function recursively traverses the JSON value and the schema,
@@ -382,17 +384,40 @@ fn coerce_array_value(
     }
 }
 
-/// Attempts to parse a string as JSON, handling both valid JSON and JSON5
-/// (Python-style) syntax
+/// Attempts to parse a string as JSON, JSON5, or repairable JSON, and unwraps
+/// nested JSON strings when needed.
 fn try_parse_json_string(s: &str) -> Result<Value, serde_json::Error> {
+    let mut parsed = parse_json_like_value(s)?;
+
+    for _ in 0..4 {
+        let Value::String(inner) = &parsed else {
+            return Ok(parsed);
+        };
+
+        let Ok(next) = parse_json_like_value(inner) else {
+            return Ok(parsed);
+        };
+
+        parsed = next;
+    }
+
+    Ok(parsed)
+}
+
+fn parse_json_like_value(s: &str) -> Result<Value, serde_json::Error> {
     // First try parsing as-is (valid JSON)
     if let Ok(parsed) = serde_json::from_str::<Value>(s) {
         return Ok(parsed);
     }
 
     // If that fails, try parsing as JSON5 (handles single quotes, comments, etc.)
-    // Convert serde_json5::Error to serde_json::Error
-    serde_json5::from_str::<Value>(s).map_err(|e| serde_json::Error::custom(e.to_string()))
+    if let Ok(parsed) = serde_json5::from_str::<Value>(s) {
+        return Ok(parsed);
+    }
+
+    // Finally, fall back to Forge's JSON repair for malformed-but-recoverable
+    // payloads such as persisted double-encoded tool arguments.
+    json_repair(s).map_err(|e| serde_json::Error::custom(e.to_string()))
 }
 
 /// Extracts an array from a string that may contain garbage before/after the
@@ -944,12 +969,22 @@ mod tests {
     }
 
     #[test]
-    fn test_preserve_invalid_json_string() {
-        // Test that invalid JSON strings are preserved
+    fn test_coerce_double_encoded_string_to_object() {
+        let fixture = json!({"config": r#""{\"key\":\"value\",\"number\":42}""#});
+        let schema = schema_for!(ConfigData);
+        let actual = coerce_to_schema(fixture, &schema);
+        let expected = json!({"config": {"key": "value", "number": 42}});
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_repairs_invalid_json_string_when_schema_expects_array() {
+        // Invalid JSON-like array strings are repaired into arrays when the schema
+        // expects one.
         let fixture = json!({"data": "[invalid json"});
         let schema = schema_for!(DataArray);
         let actual = coerce_to_schema(fixture, &schema);
-        let expected = json!({"data": "[invalid json"});
+        let expected = json!({"data": ["invalid json"]});
         assert_eq!(actual, expected);
     }