Skip to content

Commit 5a6f3c8

Browse files
fix(json): handle double-encoded tool call arguments in openai request serialization (#2764)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 26007d1 commit 5a6f3c8

File tree

4 files changed

+104
-14
lines changed

4 files changed

+104
-14
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/forge_app/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ reqwest-eventsource.workspace = true
4343
schemars.workspace = true
4444
glob.workspace = true
4545
lazy_static.workspace = true
46+
forge_json_repair.workspace = true
4647

4748
tonic.workspace = true
4849

crates/forge_app/src/dto/openai/request.rs

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ use std::vec;
22

33
use derive_more::derive::Display;
44
use derive_setters::Setters;
5+
use forge_json_repair::coerce_to_schema;
56
use serde::{Deserialize, Serialize};
7+
use strum::IntoEnumIterator;
68

79
use super::response::{ExtraContent, FunctionCall, ToolCall};
810
use super::tool_choice::{FunctionType, ToolChoice};
911
use crate::domain::{
10-
Context, ContextMessage, ModelId, ToolCallFull, ToolCallId, ToolDefinition, ToolName,
11-
ToolResult, ToolValue,
12+
Context, ContextMessage, ModelId, ToolCallFull, ToolCallId, ToolCatalog, ToolDefinition,
13+
ToolName, ToolResult, ToolValue,
1214
};
1315
use crate::dto::openai::ReasoningDetail;
1416

@@ -406,17 +408,30 @@ impl From<Context> for Request {
406408
}
407409
}
408410

411+
fn serialize_tool_call_arguments(tool_call: &ToolCallFull) -> String {
412+
let serialized_arguments = || serde_json::to_string(&tool_call.arguments).unwrap();
413+
414+
let Ok(parsed_arguments) = tool_call.arguments.parse() else {
415+
return serialized_arguments();
416+
};
417+
418+
let normalized_arguments = ToolCatalog::iter()
419+
.find(|tool| tool.definition().name == tool_call.name)
420+
.map(|tool| coerce_to_schema(parsed_arguments.clone(), &tool.definition().input_schema))
421+
.unwrap_or(parsed_arguments);
422+
423+
serde_json::to_string(&normalized_arguments).unwrap_or_else(|_| serialized_arguments())
424+
}
425+
409426
impl From<ToolCallFull> for ToolCall {
410427
fn from(value: ToolCallFull) -> Self {
428+
let arguments = serialize_tool_call_arguments(&value);
411429
let extra_content = value.thought_signature.map(ExtraContent::from);
412430

413431
Self {
414432
id: value.call_id,
415433
r#type: FunctionType,
416-
function: FunctionCall {
417-
arguments: serde_json::to_string(&value.arguments).unwrap(),
418-
name: Some(value.name),
419-
},
434+
function: FunctionCall { arguments, name: Some(value.name) },
420435
extra_content,
421436
}
422437
}
@@ -681,7 +696,8 @@ mod tests {
681696
}
682697

683698
use forge_domain::{
684-
ContextMessage, Role, TextMessage, ToolCallFull, ToolCallId, ToolName, ToolResult,
699+
ContextMessage, Role, TextMessage, ToolCallFull, ToolCallId, ToolCatalog, ToolName,
700+
ToolResult,
685701
};
686702
use insta::assert_json_snapshot;
687703

@@ -731,6 +747,43 @@ mod tests {
731747
assert_json_snapshot!(router_message);
732748
}
733749

750+
#[test]
751+
fn test_assistant_message_with_dump_style_tool_call_arguments_conversion() {
752+
let fixture = ToolCatalog::tool_call_patch(
753+
"/tmp/file.txt",
754+
"new text",
755+
"old text",
756+
false,
757+
)
758+
.arguments(
759+
serde_json::from_str::<forge_domain::ToolCallArguments>(
760+
r#""{\"file_path\":\"/tmp/file.txt\",\"old_string\":\"old text\",\"new_string\":\"new text\",\"replace_all\":false}""#,
761+
)
762+
.unwrap(),
763+
)
764+
.call_id(ToolCallId::new("123"));
765+
766+
let assistant_message = ContextMessage::Text(
767+
TextMessage::new(Role::Assistant, "Using tool")
768+
.tool_calls(vec![fixture])
769+
.model(ModelId::new("gpt-3.5-turbo")),
770+
);
771+
let actual = Message::from(assistant_message);
772+
let actual =
773+
serde_json::to_value(actual.tool_calls.expect("Tool calls should exist")).unwrap();
774+
let expected = serde_json::json!([
775+
{
776+
"id": "123",
777+
"type": "function",
778+
"function": {
779+
"arguments": "{\"file_path\":\"/tmp/file.txt\",\"new_string\":\"new text\",\"old_string\":\"old text\",\"replace_all\":false}",
780+
"name": "patch"
781+
}
782+
}
783+
]);
784+
assert_eq!(actual, expected);
785+
}
786+
734787
#[test]
735788
fn test_tool_message_conversion() {
736789
let tool_result = ToolResult::new(ToolName::new("test_tool"))

crates/forge_json_repair/src/schema_coercion.rs

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use schemars::Schema;
22
use serde::de::Error as _;
33
use serde_json::Value;
44

5+
use crate::json_repair;
6+
57
/// Coerces a JSON value to match the expected types defined in a JSON schema.
68
///
79
/// This function recursively traverses the JSON value and the schema,
@@ -382,17 +384,40 @@ fn coerce_array_value(
382384
}
383385
}
384386

385-
/// Attempts to parse a string as JSON, handling both valid JSON and JSON5
386-
/// (Python-style) syntax
387+
/// Attempts to parse a string as JSON, JSON5, or repairable JSON, and unwraps
388+
/// nested JSON strings when needed.
387389
fn try_parse_json_string(s: &str) -> Result<Value, serde_json::Error> {
390+
let mut parsed = parse_json_like_value(s)?;
391+
392+
for _ in 0..4 {
393+
let Value::String(inner) = &parsed else {
394+
return Ok(parsed);
395+
};
396+
397+
let Ok(next) = parse_json_like_value(inner) else {
398+
return Ok(parsed);
399+
};
400+
401+
parsed = next;
402+
}
403+
404+
Ok(parsed)
405+
}
406+
407+
fn parse_json_like_value(s: &str) -> Result<Value, serde_json::Error> {
388408
// First try parsing as-is (valid JSON)
389409
if let Ok(parsed) = serde_json::from_str::<Value>(s) {
390410
return Ok(parsed);
391411
}
392412

393413
// If that fails, try parsing as JSON5 (handles single quotes, comments, etc.)
394-
// Convert serde_json5::Error to serde_json::Error
395-
serde_json5::from_str::<Value>(s).map_err(|e| serde_json::Error::custom(e.to_string()))
414+
if let Ok(parsed) = serde_json5::from_str::<Value>(s) {
415+
return Ok(parsed);
416+
}
417+
418+
// Finally, fall back to Forge's JSON repair for malformed-but-recoverable
419+
// payloads such as persisted double-encoded tool arguments.
420+
json_repair(s).map_err(|e| serde_json::Error::custom(e.to_string()))
396421
}
397422

398423
/// Extracts an array from a string that may contain garbage before/after the
@@ -944,12 +969,22 @@ mod tests {
944969
}
945970

946971
#[test]
947-
fn test_preserve_invalid_json_string() {
948-
// Test that invalid JSON strings are preserved
972+
fn test_coerce_double_encoded_string_to_object() {
973+
let fixture = json!({"config": r#""{\"key\":\"value\",\"number\":42}""#});
974+
let schema = schema_for!(ConfigData);
975+
let actual = coerce_to_schema(fixture, &schema);
976+
let expected = json!({"config": {"key": "value", "number": 42}});
977+
assert_eq!(actual, expected);
978+
}
979+
980+
#[test]
981+
fn test_repairs_invalid_json_string_when_schema_expects_array() {
982+
// Invalid JSON-like array strings are repaired into arrays when the schema
983+
// expects one.
949984
let fixture = json!({"data": "[invalid json"});
950985
let schema = schema_for!(DataArray);
951986
let actual = coerce_to_schema(fixture, &schema);
952-
let expected = json!({"data": "[invalid json"});
987+
let expected = json!({"data": ["invalid json"]});
953988
assert_eq!(actual, expected);
954989
}
955990

0 commit comments

Comments
 (0)