fix(responses): add message phase support and preserve reasoning (#2722)

amitksingh1490 · web-flow · commit 5b18cce776d5 · 2026-03-28T19:53:31.000+05:30
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -131,7 +131,7 @@ gray_matter = "0.3.2"
 num-format = "0.4"
 humantime = "2.1.0"
 dashmap = "7.0.0-rc2"
-async-openai = { version = "0.33.1", default-features = false, features = ["response-types"] } # Using only types, not the API client - reduces dependencies
+async-openai = { version = "0.34.0", default-features = false, features = ["response-types"] } # Using only types, not the API client - reduces dependencies
 google-cloud-auth = "1.7.0" # Google Cloud authentication with automatic token refresh
 
 # Internal crates
diff --git a/crates/forge_app/src/hooks/doom_loop.rs b/crates/forge_app/src/hooks/doom_loop.rs
@@ -268,6 +268,7 @@ mod tests {
             model: None,
             reasoning_details: None,
             droppable: false,
+            phase: None,
         }
     }
 
@@ -403,6 +404,7 @@ mod tests {
             model: None,
             reasoning_details: None,
             droppable: false,
+            phase: None,
         };
 
         let user_msg = TextMessage {
@@ -414,6 +416,7 @@ mod tests {
             model: None,
             reasoning_details: None,
             droppable: false,
+            phase: None,
         };
 
         let assistant_msg_2 = TextMessage {
@@ -425,6 +428,7 @@ mod tests {
             model: None,
             reasoning_details: None,
             droppable: false,
+            phase: None,
         };
 
         let messages = [
diff --git a/crates/forge_app/src/hooks/tracing.rs b/crates/forge_app/src/hooks/tracing.rs
@@ -204,6 +204,7 @@ mod tests {
             tool_calls: vec![],
             usage: Default::default(),
             finish_reason: None,
+            phase: None,
         };
         let event = EventData::new(test_agent(), test_model_id(), ResponsePayload::new(message));
 
diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs
@@ -311,6 +311,7 @@ impl<S: AgentService> Orchestrator<S> {
                 message.reasoning_details.clone(),
                 message.usage,
                 tool_call_records,
+                message.phase,
             );
 
             if self.error_tracker.limit_reached() {
diff --git a/crates/forge_app/src/user_prompt.rs b/crates/forge_app/src/user_prompt.rs
@@ -76,6 +76,7 @@ impl<S: AttachmentService> UserPromptGenerator<S> {
                 reasoning_details: None,
                 model: Some(self.agent.model.clone()),
                 droppable: true, // Droppable so it can be removed during context compression
+                phase: None,
             };
             context = context.add_message(ContextMessage::Text(todo_message));
         }
@@ -121,6 +122,7 @@ impl<S: AttachmentService> UserPromptGenerator<S> {
                 reasoning_details: None,
                 model: Some(self.agent.model.clone()),
                 droppable: true, // Piped input is droppable
+                phase: None,
             };
             context = context.add_message(ContextMessage::Text(piped_message));
         }
@@ -197,6 +199,7 @@ impl<S: AttachmentService> UserPromptGenerator<S> {
                 reasoning_details: None,
                 model: Some(self.agent.model.clone()),
                 droppable: false,
+                phase: None,
             };
             context = context.add_message(ContextMessage::Text(message));
         }
diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs
@@ -18,8 +18,8 @@ use crate::temperature::Temperature;
 use crate::top_k::TopK;
 use crate::top_p::TopP;
 use crate::{
-    Attachment, AttachmentContent, ConversationId, EventValue, Image, ModelId, ReasoningFull,
-    ToolChoice, ToolDefinition, ToolOutput, ToolValue, Usage,
+    Attachment, AttachmentContent, ConversationId, EventValue, Image, MessagePhase, ModelId,
+    ReasoningFull, ToolChoice, ToolDefinition, ToolOutput, ToolValue, Usage,
 };
 
 /// Response format for structured output
@@ -169,6 +169,7 @@ impl ContextMessage {
             reasoning_details: None,
             model,
             droppable: false,
+            phase: None,
         }
         .into()
     }
@@ -183,6 +184,7 @@ impl ContextMessage {
             model: None,
             reasoning_details: None,
             droppable: false,
+            phase: None,
         }
         .into()
     }
@@ -204,6 +206,7 @@ impl ContextMessage {
             reasoning_details,
             model: None,
             droppable: false,
+            phase: None,
         }
         .into()
     }
@@ -311,6 +314,11 @@ pub struct TextMessage {
     /// Indicates whether this message can be dropped during context compaction
     #[serde(default, skip_serializing_if = "is_false")]
     pub droppable: bool,
+    /// Phase label for assistant messages (`Commentary` or `FinalAnswer`).
+    /// Preserved from OpenAI Responses API and replayed back on subsequent
+    /// requests.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub phase: Option<MessagePhase>,
 }
 
 impl TextMessage {
@@ -325,6 +333,7 @@ impl TextMessage {
             model: None,
             reasoning_details: None,
             droppable: false,
+            phase: None,
         }
     }
 
@@ -346,6 +355,7 @@ impl TextMessage {
             reasoning_details,
             model,
             droppable: false,
+            phase: None,
         }
     }
 }
@@ -554,6 +564,7 @@ impl Context {
         reasoning_details: Option<Vec<ReasoningFull>>,
         usage: Usage,
         tool_records: Vec<(ToolCallFull, ToolResult)>,
+        phase: Option<MessagePhase>,
     ) -> Self {
         // Convert flat reasoning string to reasoning_details if present
         let merged_reasoning_details = if let Some(reasoning_text) = reasoning {
@@ -573,7 +584,7 @@ impl Context {
         };
 
         // Adding tool calls
-        let message: MessageEntry = ContextMessage::assistant(
+        let mut message: MessageEntry = ContextMessage::assistant(
             content,
             thought_signature,
             merged_reasoning_details,
@@ -586,6 +597,11 @@ impl Context {
         )
         .into();
 
+        // Set phase on the assistant TextMessage if provided
+        if let ContextMessage::Text(ref mut text_msg) = message.message {
+            text_msg.phase = phase;
+        }
+
         let tool_results = tool_records
             .iter()
             .map(|record| record.1.clone())
diff --git a/crates/forge_domain/src/hook.rs b/crates/forge_domain/src/hook.rs
@@ -640,6 +640,7 @@ mod tests {
                     reasoning_details: None,
                     usage: crate::Usage::default(),
                     finish_reason: None,
+                    phase: None,
                 }),
             )),
             LifecycleEvent::ToolcallStart(EventData::new(
diff --git a/crates/forge_domain/src/message.rs b/crates/forge_domain/src/message.rs
@@ -7,6 +7,20 @@ use super::{ToolCall, ToolCallFull};
 use crate::TokenCount;
 use crate::reasoning::{Reasoning, ReasoningFull};
 
+/// Labels an assistant message as intermediate commentary or the final answer.
+///
+/// For models like `gpt-5.3-codex` and beyond, when sending follow-up requests,
+/// preserve and resend phase on all assistant messages -- dropping it can
+/// degrade performance.
+#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum MessagePhase {
+    /// Intermediate commentary produced while the model is reasoning.
+    Commentary,
+    /// The final answer from the model.
+    FinalAnswer,
+}
+
 #[derive(Default, Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
 pub struct Usage {
     pub prompt_tokens: TokenCount,
@@ -47,6 +61,9 @@ pub struct ChatCompletionMessage {
     pub tool_calls: Vec<ToolCall>,
     pub finish_reason: Option<FinishReason>,
     pub usage: Option<Usage>,
+    /// Phase label for assistant messages (e.g. `Commentary` or `FinalAnswer`).
+    /// Preserved from the response and replayed back on subsequent requests.
+    pub phase: Option<MessagePhase>,
 }
 
 impl From<FinishReason> for ChatCompletionMessage {
@@ -176,6 +193,9 @@ pub struct ChatCompletionMessageFull {
     pub reasoning_details: Option<Vec<ReasoningFull>>,
     pub usage: Usage,
     pub finish_reason: Option<FinishReason>,
+    /// Phase label for the assistant message (e.g. `Commentary` or
+    /// `FinalAnswer`).
+    pub phase: Option<MessagePhase>,
 }
 
 #[cfg(test)]
diff --git a/crates/forge_domain/src/result_stream_ext.rs b/crates/forge_domain/src/result_stream_ext.rs
@@ -245,6 +245,9 @@ impl ResultStreamExt<anyhow::Error> for crate::BoxStream<ChatCompletionMessage,
             .rev()
             .find_map(|message| message.thought_signature.clone());
 
+        // Get phase from the last message that has one
+        let phase = messages.iter().rev().find_map(|message| message.phase);
+
         // Check for empty completion - map to retryable error for retry
         if content.trim().is_empty()
             && tool_calls.is_empty()
@@ -263,6 +266,7 @@ impl ResultStreamExt<anyhow::Error> for crate::BoxStream<ChatCompletionMessage,
             reasoning_details: (!total_reasoning_details.is_empty())
                 .then_some(total_reasoning_details),
             finish_reason,
+            phase,
         })
     }
 }
@@ -315,6 +319,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -368,6 +373,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -419,6 +425,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -471,6 +478,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -525,6 +533,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: Some(FinishReason::Stop),
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -652,6 +661,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -718,6 +728,7 @@ mod tests {
             reasoning: Some("First reasoning: thinking deeply about this...".to_string()),
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -773,6 +784,7 @@ mod tests {
             reasoning: None,
             reasoning_details: Some(expected_reasoning_details),
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -803,6 +815,7 @@ mod tests {
             reasoning: None, // Empty reasoning should be None
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -891,6 +904,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -933,6 +947,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -973,6 +988,7 @@ mod tests {
             reasoning_details: None,
             finish_reason: Some(FinishReason::Stop), /* Should be from the last message with a
                                                       * finish reason */
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -1002,6 +1018,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: Some(FinishReason::ToolCalls),
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -1030,6 +1047,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -1121,6 +1139,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: Some(FinishReason::Stop),
+            phase: None,
         };
 
         assert_eq!(actual, expected);
@@ -1155,6 +1174,7 @@ mod tests {
             reasoning: None,
             reasoning_details: None,
             finish_reason: None,
+            phase: None,
         };
 
         assert_eq!(actual, expected);
diff --git a/crates/forge_domain/src/transformer/transform_tool_calls.rs b/crates/forge_domain/src/transformer/transform_tool_calls.rs
@@ -43,6 +43,7 @@ impl Transformer for TransformToolCalls {
                             reasoning_details: text_msg.reasoning_details.clone(),
                             model: text_msg.model.clone(),
                             droppable: text_msg.droppable,
+                            phase: text_msg.phase,
                         })
                         .into(),
                     );
diff --git a/crates/forge_repo/src/conversation/conversation_record.rs b/crates/forge_repo/src/conversation/conversation_record.rs
@@ -362,6 +362,7 @@ impl TryFrom<TextMessageRecord> for forge_domain::TextMessage {
                 .reasoning_details
                 .map(|details| details.into_iter().map(Into::into).collect()),
             droppable: record.droppable,
+            phase: None,
         })
     }
 }
diff --git a/crates/forge_repo/src/conversation/conversation_repo.rs b/crates/forge_repo/src/conversation/conversation_repo.rs
diff --git a/crates/forge_repo/src/provider/openai_responses/repository.rs b/crates/forge_repo/src/provider/openai_responses/repository.rs
diff --git a/crates/forge_repo/src/provider/openai_responses/request.rs b/crates/forge_repo/src/provider/openai_responses/request.rs
diff --git a/crates/forge_repo/src/provider/openai_responses/response.rs b/crates/forge_repo/src/provider/openai_responses/response.rs