Skip to content

Commit 5b18cce

Browse files
fix(responses): add message phase support and preserve reasoning (#2722)
1 parent 483bd44 commit 5b18cce

16 files changed

Lines changed: 482 additions & 14 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ gray_matter = "0.3.2"
131131
num-format = "0.4"
132132
humantime = "2.1.0"
133133
dashmap = "7.0.0-rc2"
134-
async-openai = { version = "0.33.1", default-features = false, features = ["response-types"] } # Using only types, not the API client - reduces dependencies
134+
async-openai = { version = "0.34.0", default-features = false, features = ["response-types"] } # Using only types, not the API client - reduces dependencies
135135
google-cloud-auth = "1.7.0" # Google Cloud authentication with automatic token refresh
136136

137137
# Internal crates

crates/forge_app/src/hooks/doom_loop.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ mod tests {
268268
model: None,
269269
reasoning_details: None,
270270
droppable: false,
271+
phase: None,
271272
}
272273
}
273274

@@ -403,6 +404,7 @@ mod tests {
403404
model: None,
404405
reasoning_details: None,
405406
droppable: false,
407+
phase: None,
406408
};
407409

408410
let user_msg = TextMessage {
@@ -414,6 +416,7 @@ mod tests {
414416
model: None,
415417
reasoning_details: None,
416418
droppable: false,
419+
phase: None,
417420
};
418421

419422
let assistant_msg_2 = TextMessage {
@@ -425,6 +428,7 @@ mod tests {
425428
model: None,
426429
reasoning_details: None,
427430
droppable: false,
431+
phase: None,
428432
};
429433

430434
let messages = [

crates/forge_app/src/hooks/tracing.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ mod tests {
204204
tool_calls: vec![],
205205
usage: Default::default(),
206206
finish_reason: None,
207+
phase: None,
207208
};
208209
let event = EventData::new(test_agent(), test_model_id(), ResponsePayload::new(message));
209210

crates/forge_app/src/orch.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ impl<S: AgentService> Orchestrator<S> {
311311
message.reasoning_details.clone(),
312312
message.usage,
313313
tool_call_records,
314+
message.phase,
314315
);
315316

316317
if self.error_tracker.limit_reached() {

crates/forge_app/src/user_prompt.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ impl<S: AttachmentService> UserPromptGenerator<S> {
7676
reasoning_details: None,
7777
model: Some(self.agent.model.clone()),
7878
droppable: true, // Droppable so it can be removed during context compression
79+
phase: None,
7980
};
8081
context = context.add_message(ContextMessage::Text(todo_message));
8182
}
@@ -121,6 +122,7 @@ impl<S: AttachmentService> UserPromptGenerator<S> {
121122
reasoning_details: None,
122123
model: Some(self.agent.model.clone()),
123124
droppable: true, // Piped input is droppable
125+
phase: None,
124126
};
125127
context = context.add_message(ContextMessage::Text(piped_message));
126128
}
@@ -197,6 +199,7 @@ impl<S: AttachmentService> UserPromptGenerator<S> {
197199
reasoning_details: None,
198200
model: Some(self.agent.model.clone()),
199201
droppable: false,
202+
phase: None,
200203
};
201204
context = context.add_message(ContextMessage::Text(message));
202205
}

crates/forge_domain/src/context.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ use crate::temperature::Temperature;
1818
use crate::top_k::TopK;
1919
use crate::top_p::TopP;
2020
use crate::{
21-
Attachment, AttachmentContent, ConversationId, EventValue, Image, ModelId, ReasoningFull,
22-
ToolChoice, ToolDefinition, ToolOutput, ToolValue, Usage,
21+
Attachment, AttachmentContent, ConversationId, EventValue, Image, MessagePhase, ModelId,
22+
ReasoningFull, ToolChoice, ToolDefinition, ToolOutput, ToolValue, Usage,
2323
};
2424

2525
/// Response format for structured output
@@ -169,6 +169,7 @@ impl ContextMessage {
169169
reasoning_details: None,
170170
model,
171171
droppable: false,
172+
phase: None,
172173
}
173174
.into()
174175
}
@@ -183,6 +184,7 @@ impl ContextMessage {
183184
model: None,
184185
reasoning_details: None,
185186
droppable: false,
187+
phase: None,
186188
}
187189
.into()
188190
}
@@ -204,6 +206,7 @@ impl ContextMessage {
204206
reasoning_details,
205207
model: None,
206208
droppable: false,
209+
phase: None,
207210
}
208211
.into()
209212
}
@@ -311,6 +314,11 @@ pub struct TextMessage {
311314
/// Indicates whether this message can be dropped during context compaction
312315
#[serde(default, skip_serializing_if = "is_false")]
313316
pub droppable: bool,
317+
/// Phase label for assistant messages (`Commentary` or `FinalAnswer`).
318+
/// Preserved from OpenAI Responses API and replayed back on subsequent
319+
/// requests.
320+
#[serde(default, skip_serializing_if = "Option::is_none")]
321+
pub phase: Option<MessagePhase>,
314322
}
315323

316324
impl TextMessage {
@@ -325,6 +333,7 @@ impl TextMessage {
325333
model: None,
326334
reasoning_details: None,
327335
droppable: false,
336+
phase: None,
328337
}
329338
}
330339

@@ -346,6 +355,7 @@ impl TextMessage {
346355
reasoning_details,
347356
model,
348357
droppable: false,
358+
phase: None,
349359
}
350360
}
351361
}
@@ -554,6 +564,7 @@ impl Context {
554564
reasoning_details: Option<Vec<ReasoningFull>>,
555565
usage: Usage,
556566
tool_records: Vec<(ToolCallFull, ToolResult)>,
567+
phase: Option<MessagePhase>,
557568
) -> Self {
558569
// Convert flat reasoning string to reasoning_details if present
559570
let merged_reasoning_details = if let Some(reasoning_text) = reasoning {
@@ -573,7 +584,7 @@ impl Context {
573584
};
574585

575586
// Adding tool calls
576-
let message: MessageEntry = ContextMessage::assistant(
587+
let mut message: MessageEntry = ContextMessage::assistant(
577588
content,
578589
thought_signature,
579590
merged_reasoning_details,
@@ -586,6 +597,11 @@ impl Context {
586597
)
587598
.into();
588599

600+
// Set phase on the assistant TextMessage if provided
601+
if let ContextMessage::Text(ref mut text_msg) = message.message {
602+
text_msg.phase = phase;
603+
}
604+
589605
let tool_results = tool_records
590606
.iter()
591607
.map(|record| record.1.clone())

crates/forge_domain/src/hook.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,7 @@ mod tests {
640640
reasoning_details: None,
641641
usage: crate::Usage::default(),
642642
finish_reason: None,
643+
phase: None,
643644
}),
644645
)),
645646
LifecycleEvent::ToolcallStart(EventData::new(

crates/forge_domain/src/message.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@ use super::{ToolCall, ToolCallFull};
77
use crate::TokenCount;
88
use crate::reasoning::{Reasoning, ReasoningFull};
99

10+
/// Labels an assistant message as intermediate commentary or the final answer.
11+
///
12+
/// For models like `gpt-5.3-codex` and beyond, when sending follow-up requests,
13+
/// preserve and resend phase on all assistant messages -- dropping it can
14+
/// degrade performance.
15+
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
16+
#[serde(rename_all = "snake_case")]
17+
pub enum MessagePhase {
18+
/// Intermediate commentary produced while the model is reasoning.
19+
Commentary,
20+
/// The final answer from the model.
21+
FinalAnswer,
22+
}
23+
1024
#[derive(Default, Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
1125
pub struct Usage {
1226
pub prompt_tokens: TokenCount,
@@ -47,6 +61,9 @@ pub struct ChatCompletionMessage {
4761
pub tool_calls: Vec<ToolCall>,
4862
pub finish_reason: Option<FinishReason>,
4963
pub usage: Option<Usage>,
64+
/// Phase label for assistant messages (e.g. `Commentary` or `FinalAnswer`).
65+
/// Preserved from the response and replayed back on subsequent requests.
66+
pub phase: Option<MessagePhase>,
5067
}
5168

5269
impl From<FinishReason> for ChatCompletionMessage {
@@ -176,6 +193,9 @@ pub struct ChatCompletionMessageFull {
176193
pub reasoning_details: Option<Vec<ReasoningFull>>,
177194
pub usage: Usage,
178195
pub finish_reason: Option<FinishReason>,
196+
/// Phase label for the assistant message (e.g. `Commentary` or
197+
/// `FinalAnswer`).
198+
pub phase: Option<MessagePhase>,
179199
}
180200

181201
#[cfg(test)]

crates/forge_domain/src/result_stream_ext.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ impl ResultStreamExt<anyhow::Error> for crate::BoxStream<ChatCompletionMessage,
245245
.rev()
246246
.find_map(|message| message.thought_signature.clone());
247247

248+
// Get phase from the last message that has one
249+
let phase = messages.iter().rev().find_map(|message| message.phase);
250+
248251
// Check for empty completion - map to retryable error for retry
249252
if content.trim().is_empty()
250253
&& tool_calls.is_empty()
@@ -263,6 +266,7 @@ impl ResultStreamExt<anyhow::Error> for crate::BoxStream<ChatCompletionMessage,
263266
reasoning_details: (!total_reasoning_details.is_empty())
264267
.then_some(total_reasoning_details),
265268
finish_reason,
269+
phase,
266270
})
267271
}
268272
}
@@ -315,6 +319,7 @@ mod tests {
315319
reasoning: None,
316320
reasoning_details: None,
317321
finish_reason: None,
322+
phase: None,
318323
};
319324

320325
assert_eq!(actual, expected);
@@ -368,6 +373,7 @@ mod tests {
368373
reasoning: None,
369374
reasoning_details: None,
370375
finish_reason: None,
376+
phase: None,
371377
};
372378

373379
assert_eq!(actual, expected);
@@ -419,6 +425,7 @@ mod tests {
419425
reasoning: None,
420426
reasoning_details: None,
421427
finish_reason: None,
428+
phase: None,
422429
};
423430

424431
assert_eq!(actual, expected);
@@ -471,6 +478,7 @@ mod tests {
471478
reasoning: None,
472479
reasoning_details: None,
473480
finish_reason: None,
481+
phase: None,
474482
};
475483

476484
assert_eq!(actual, expected);
@@ -525,6 +533,7 @@ mod tests {
525533
reasoning: None,
526534
reasoning_details: None,
527535
finish_reason: Some(FinishReason::Stop),
536+
phase: None,
528537
};
529538

530539
assert_eq!(actual, expected);
@@ -652,6 +661,7 @@ mod tests {
652661
reasoning: None,
653662
reasoning_details: None,
654663
finish_reason: None,
664+
phase: None,
655665
};
656666

657667
assert_eq!(actual, expected);
@@ -718,6 +728,7 @@ mod tests {
718728
reasoning: Some("First reasoning: thinking deeply about this...".to_string()),
719729
reasoning_details: None,
720730
finish_reason: None,
731+
phase: None,
721732
};
722733

723734
assert_eq!(actual, expected);
@@ -773,6 +784,7 @@ mod tests {
773784
reasoning: None,
774785
reasoning_details: Some(expected_reasoning_details),
775786
finish_reason: None,
787+
phase: None,
776788
};
777789

778790
assert_eq!(actual, expected);
@@ -803,6 +815,7 @@ mod tests {
803815
reasoning: None, // Empty reasoning should be None
804816
reasoning_details: None,
805817
finish_reason: None,
818+
phase: None,
806819
};
807820

808821
assert_eq!(actual, expected);
@@ -891,6 +904,7 @@ mod tests {
891904
reasoning: None,
892905
reasoning_details: None,
893906
finish_reason: None,
907+
phase: None,
894908
};
895909

896910
assert_eq!(actual, expected);
@@ -933,6 +947,7 @@ mod tests {
933947
reasoning: None,
934948
reasoning_details: None,
935949
finish_reason: None,
950+
phase: None,
936951
};
937952

938953
assert_eq!(actual, expected);
@@ -973,6 +988,7 @@ mod tests {
973988
reasoning_details: None,
974989
finish_reason: Some(FinishReason::Stop), /* Should be from the last message with a
975990
* finish reason */
991+
phase: None,
976992
};
977993

978994
assert_eq!(actual, expected);
@@ -1002,6 +1018,7 @@ mod tests {
10021018
reasoning: None,
10031019
reasoning_details: None,
10041020
finish_reason: Some(FinishReason::ToolCalls),
1021+
phase: None,
10051022
};
10061023

10071024
assert_eq!(actual, expected);
@@ -1030,6 +1047,7 @@ mod tests {
10301047
reasoning: None,
10311048
reasoning_details: None,
10321049
finish_reason: None,
1050+
phase: None,
10331051
};
10341052

10351053
assert_eq!(actual, expected);
@@ -1121,6 +1139,7 @@ mod tests {
11211139
reasoning: None,
11221140
reasoning_details: None,
11231141
finish_reason: Some(FinishReason::Stop),
1142+
phase: None,
11241143
};
11251144

11261145
assert_eq!(actual, expected);
@@ -1155,6 +1174,7 @@ mod tests {
11551174
reasoning: None,
11561175
reasoning_details: None,
11571176
finish_reason: None,
1177+
phase: None,
11581178
};
11591179

11601180
assert_eq!(actual, expected);

0 commit comments

Comments
 (0)