diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 2d8a9113f1f..d3d6aac25d2 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -1379,6 +1379,9 @@ pub enum TurnStatus { pub struct TurnStartParams { pub thread_id: String, pub input: Vec, + /// Helpful information about the user's IDE state. Use judiciously to provide the model with + /// useful context. + pub user_ide_context: Option, /// Override the working directory for this turn and subsequent turns. pub cwd: Option, /// Override the approval policy for this turn and subsequent turns. diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index d17dc76b4b2..11a05dc00e9 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -131,7 +131,9 @@ use codex_core::find_thread_path_by_id_str; use codex_core::git_info::git_diff_to_remote; use codex_core::mcp::collect_mcp_snapshot; use codex_core::mcp::group_tools_by_server; +use codex_core::normalize_user_ide_context; use codex_core::parse_cursor; +use codex_core::prepend_user_ide_context; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_core::protocol::ReviewDelivery as CoreReviewDelivery; @@ -1604,6 +1606,7 @@ impl CodexMessageProcessor { config: request_overrides, base_instructions, developer_instructions, + .. } = params; let overrides_requested = model.is_some() @@ -2744,12 +2747,17 @@ impl CodexMessageProcessor { }; // Map v2 input items to core input items. - let mapped_items: Vec = params + let mut mapped_items: Vec = params .input .into_iter() .map(V2UserInput::into_core) .collect(); + if let Some(user_ide_context) = params.user_ide_context.and_then(normalize_user_ide_context) + { + prepend_user_ide_context(&mut mapped_items, &user_ide_context); + } + let has_any_overrides = params.cwd.is_some() || params.approval_policy.is_some() || params.sandbox_policy.is_some() diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index ab450ea832c..a2321eda46d 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -35,12 +35,22 @@ use codex_core::protocol_config_types::ReasoningSummary; use codex_protocol::openai_models::ReasoningEffort; use core_test_support::skip_if_no_network; use pretty_assertions::assert_eq; +use serde_json::Value; use std::path::Path; use tempfile::TempDir; use tokio::time::timeout; const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +fn value_contains_str(value: &Value, needle: &str) -> bool { + match value { + Value::Null | Value::Bool(_) | Value::Number(_) => false, + Value::String(s) => s.contains(needle), + Value::Array(values) => values.iter().any(|v| value_contains_str(v, needle)), + Value::Object(map) => map.values().any(|v| value_contains_str(v, needle)), + } +} + #[tokio::test] async fn turn_start_emits_notifications_and_accepts_model_override() -> Result<()> { // Provide a mock server and config so model wiring is valid. @@ -148,6 +158,71 @@ async fn turn_start_emits_notifications_and_accepts_model_override() -> Result<( Ok(()) } +#[tokio::test] +async fn turn_start_includes_user_ide_context_in_model_request() -> Result<()> { + let user_ide_context = "some ide context"; + let tagged = format!("{user_ide_context}"); + + // Two Codex turns hit the mock model (session start + turn/start). + let responses = vec![ + create_final_assistant_message_sse_response("Done")?, + create_final_assistant_message_sse_response("Done")?, + ]; + let server = create_mock_chat_completions_server_unchecked(responses).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri(), "never")?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Hello".to_string(), + }], + user_ide_context: Some(user_ide_context.to_string()), + ..Default::default() + }) + .await?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let requests = server.received_requests().await.expect("received requests"); + assert!( + requests.iter().any(|request| { + request + .body_json::() + .is_ok_and(|body| value_contains_str(&body, &tagged)) + }), + "expected request body to contain tagged user_ide_context" + ); + + Ok(()) +} + #[tokio::test] async fn turn_start_accepts_local_image_input() -> Result<()> { // Two Codex turns hit the mock model (session start + turn/start). @@ -530,6 +605,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> { input: vec![V2UserInput::Text { text: "first turn".to_string(), }], + user_ide_context: None, cwd: Some(first_cwd.clone()), approval_policy: Some(codex_app_server_protocol::AskForApproval::Never), sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite { @@ -562,6 +638,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> { input: vec![V2UserInput::Text { text: "second turn".to_string(), }], + user_ide_context: None, cwd: Some(second_cwd.clone()), approval_policy: Some(codex_app_server_protocol::AskForApproval::Never), sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::DangerFullAccess), diff --git a/codex-rs/core/src/environment_context.rs b/codex-rs/core/src/environment_context.rs index 6a0e0f26cd9..532bc5e7cb7 100644 --- a/codex-rs/core/src/environment_context.rs +++ b/codex-rs/core/src/environment_context.rs @@ -8,11 +8,40 @@ use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::ENVIRONMENT_CONTEXT_CLOSE_TAG; use codex_protocol::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG; +use codex_protocol::protocol::USER_IDE_CONTEXT_CLOSE_TAG; +use codex_protocol::protocol::USER_IDE_CONTEXT_OPEN_TAG; +use codex_protocol::user_input::UserInput; use codex_utils_absolute_path::AbsolutePathBuf; use serde::Deserialize; use serde::Serialize; use std::path::PathBuf; +pub(crate) fn is_user_ide_context(text: &str) -> bool { + text.starts_with(USER_IDE_CONTEXT_OPEN_TAG) && text.ends_with(USER_IDE_CONTEXT_CLOSE_TAG) +} + +pub fn normalize_user_ide_context(user_ide_context: String) -> Option { + let trimmed = user_ide_context.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + } +} + +pub fn prepend_user_ide_context(items: &mut Vec, user_ide_context: &str) { + // Bundle IDE context as its own content item so downstream request builders can place it + // into the model input while core can still filter it from user-visible turn items. + items.insert( + 0, + UserInput::Text { + text: format!( + "{USER_IDE_CONTEXT_OPEN_TAG}{user_ide_context}{USER_IDE_CONTEXT_CLOSE_TAG}" + ), + }, + ); +} + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(rename = "environment_context", rename_all = "snake_case")] pub(crate) struct EnvironmentContext { @@ -210,6 +239,36 @@ mod tests { } } + #[test] + fn normalize_user_ide_context_trims_and_drops_empty() { + assert_eq!( + normalize_user_ide_context(" some context \n".to_string()), + Some("some context".to_string()) + ); + assert_eq!(normalize_user_ide_context(" \n\t".to_string()), None); + } + + #[test] + fn prepend_user_ide_context_inserts_tagged_text_first() { + let mut items = vec![UserInput::Text { + text: "Hello world".to_string(), + }]; + + prepend_user_ide_context(&mut items, "ctx"); + + assert_eq!( + items, + vec![ + UserInput::Text { + text: "ctx".to_string(), + }, + UserInput::Text { + text: "Hello world".to_string(), + }, + ] + ); + } + #[test] fn serialize_workspace_write_environment_context() { let cwd = test_path_buf("/repo"); diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs index 6ab6291a4bb..6bc7174f5b2 100644 --- a/codex-rs/core/src/event_mapping.rs +++ b/codex-rs/core/src/event_mapping.rs @@ -13,6 +13,7 @@ use codex_protocol::user_input::UserInput; use tracing::warn; use uuid::Uuid; +use crate::environment_context::is_user_ide_context; use crate::user_instructions::SkillInstructions; use crate::user_instructions::UserInstructions; use crate::user_shell_command::is_user_shell_command_text; @@ -38,6 +39,11 @@ fn parse_user_message(message: &[ContentItem]) -> Option { if is_session_prefix(text) || is_user_shell_command_text(text) { return None; } + // user_ide_context is bundled with the user's prompt in its own content item. + // skip over it, but there might be subsequent content items to include + if is_user_ide_context(text) { + continue; + } content.push(UserInput::Text { text: text.clone() }); } ContentItem::InputImage { image_url } => { @@ -49,12 +55,16 @@ fn parse_user_message(message: &[ContentItem]) -> Option { if is_session_prefix(text) { return None; } - warn!("Output text in user message: {}", text); + warn!("Output text in user message: {text}"); } } } - Some(UserMessageItem::new(&content)) + if content.is_empty() { + None + } else { + Some(UserMessageItem::new(&content)) + } } fn parse_agent_message(id: Option<&String>, message: &[ContentItem]) -> AgentMessageItem { @@ -131,14 +141,26 @@ mod tests { use super::parse_turn_item; use codex_protocol::items::AgentMessageContent; use codex_protocol::items::TurnItem; + use codex_protocol::models::ContentItem; use codex_protocol::models::ReasoningItemContent; use codex_protocol::models::ReasoningItemReasoningSummary; use codex_protocol::models::ResponseItem; use codex_protocol::models::WebSearchAction; + use codex_protocol::protocol::USER_IDE_CONTEXT_CLOSE_TAG; + use codex_protocol::protocol::USER_IDE_CONTEXT_OPEN_TAG; use codex_protocol::user_input::UserInput; use pretty_assertions::assert_eq; + fn assert_eq_user_message_content(actual: TurnItem, expected_content: &[UserInput]) { + match actual { + TurnItem::UserMessage(user) => { + assert_eq!(user.content, expected_content); + } + other => panic!("expected TurnItem::UserMessage, got {other:?}"), + } + } + #[test] fn parses_user_message_with_text_and_two_images() { let img1 = "https://example.com/one.png".to_string(); @@ -162,19 +184,14 @@ mod tests { let turn_item = parse_turn_item(&item).expect("expected user message turn item"); - match turn_item { - TurnItem::UserMessage(user) => { - let expected_content = vec![ - UserInput::Text { - text: "Hello world".to_string(), - }, - UserInput::Image { image_url: img1 }, - UserInput::Image { image_url: img2 }, - ]; - assert_eq!(user.content, expected_content); - } - other => panic!("expected TurnItem::UserMessage, got {other:?}"), - } + let expected_content = vec![ + UserInput::Text { + text: "Hello world".to_string(), + }, + UserInput::Image { image_url: img1 }, + UserInput::Image { image_url: img2 }, + ]; + assert_eq_user_message_content(turn_item, &expected_content); } #[test] @@ -216,6 +233,13 @@ mod tests { text: "echo 42".to_string(), }], }, + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: "echo 42".to_string(), + }], + }, ]; for item in items { @@ -224,6 +248,137 @@ mod tests { } } + #[test] + fn leaves_tags_in_user_message() { + let test_cases = vec![ + "stuff test_text", + "stuff test_text", + "stuff # AGENTS.md instructions for test_directory\n\n\ntest_text\n", + "stuff \ndemo\nskills/demo/SKILL.md\nbody\n", + "stuff echo 42", + "stuff echo 42", + ]; + + for test_case in test_cases { + let item = ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: test_case.to_string(), + }], + }; + let turn_item = parse_turn_item(&item); + match turn_item { + Some(TurnItem::UserMessage(user)) => { + assert_eq!( + user.content, + vec![UserInput::Text { + text: test_case.to_string(), + }] + ); + } + other => panic!("expected TurnItem::UserMessage, got {other:?}"), + } + } + } + + #[test] + fn trims_user_ide_context_item_from_user_message() { + let message = ResponseItem::Message { + id: Some("user-1".to_string()), + role: "user".to_string(), + content: vec![ + ContentItem::InputText { + text: format!( + "{USER_IDE_CONTEXT_OPEN_TAG}some context{USER_IDE_CONTEXT_CLOSE_TAG}" + ), + }, + ContentItem::InputText { + text: "Hello world".to_string(), + }, + ], + }; + + let turn_item = parse_turn_item(&message).expect("expected user message turn item"); + + assert_eq_user_message_content( + turn_item, + &[UserInput::Text { + text: "Hello world".to_string(), + }], + ); + } + + #[test] + fn parses_user_message_with_only_images() { + let img1 = "https://example.com/one.png".to_string(); + let img2 = "https://example.com/two.jpg".to_string(); + let message = ResponseItem::Message { + id: Some("user-2".to_string()), + role: "user".to_string(), + content: vec![ + ContentItem::InputImage { + image_url: img1.clone(), + }, + ContentItem::InputImage { + image_url: img2.clone(), + }, + ], + }; + + let turn_item = parse_turn_item(&message).expect("expected user message turn item"); + + let expected_content = vec![ + UserInput::Image { image_url: img1 }, + UserInput::Image { image_url: img2 }, + ]; + assert_eq_user_message_content(turn_item, &expected_content); + } + + #[test] + fn ignores_output_text_in_user_message() { + let message = ResponseItem::Message { + id: Some("user-3".to_string()), + role: "user".to_string(), + content: vec![ + ContentItem::OutputText { + text: "server echo".to_string(), + }, + ContentItem::InputText { + text: "Hello world".to_string(), + }, + ], + }; + + let turn_item = parse_turn_item(&message).expect("expected user message turn item"); + + assert_eq_user_message_content( + turn_item, + &[UserInput::Text { + text: "Hello world".to_string(), + }], + ); + } + + #[test] + fn drops_user_message_when_shell_command_present() { + let message = ResponseItem::Message { + id: Some("user-4".to_string()), + role: "user".to_string(), + content: vec![ + ContentItem::InputText { + text: "echo 42".to_string(), + }, + ContentItem::InputImage { + image_url: "https://example.com/one.png".to_string(), + }, + ], + }; + + let turn_item = parse_turn_item(&message); + assert!(turn_item.is_none(), "expected none, got {turn_item:?}"); + } + #[test] fn parses_agent_message() { let item = ResponseItem::Message { diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 370c1ecb97e..6ee79e5ddfd 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -111,6 +111,8 @@ pub mod util; pub use apply_patch::CODEX_APPLY_PATCH_ARG1; pub use command_safety::is_dangerous_command; pub use command_safety::is_safe_command; +pub use environment_context::normalize_user_ide_context; +pub use environment_context::prepend_user_ide_context; pub use exec_policy::ExecPolicyError; pub use exec_policy::load_exec_policy; pub use safety::get_platform_sandbox; diff --git a/codex-rs/core/src/rollout/truncation.rs b/codex-rs/core/src/rollout/truncation.rs index b8127f0345b..81c1fe3f01b 100644 --- a/codex-rs/core/src/rollout/truncation.rs +++ b/codex-rs/core/src/rollout/truncation.rs @@ -77,7 +77,7 @@ mod tests { ResponseItem::Message { id: None, role: "user".to_string(), - content: vec![ContentItem::OutputText { + content: vec![ContentItem::InputText { text: text.to_string(), }], } diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index d82b242fe5c..fa202ea6c88 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -310,7 +310,7 @@ mod tests { ResponseItem::Message { id: None, role: "user".to_string(), - content: vec![ContentItem::OutputText { + content: vec![ContentItem::InputText { text: text.to_string(), }], } diff --git a/codex-rs/protocol/src/items.rs b/codex-rs/protocol/src/items.rs index 36ee0be07ce..3d34a753bb4 100644 --- a/codex-rs/protocol/src/items.rs +++ b/codex-rs/protocol/src/items.rs @@ -10,7 +10,7 @@ use serde::Deserialize; use serde::Serialize; use ts_rs::TS; -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] #[serde(tag = "type")] #[ts(tag = "type")] pub enum TurnItem { @@ -20,26 +20,26 @@ pub enum TurnItem { WebSearch(WebSearchItem), } -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] pub struct UserMessageItem { pub id: String, pub content: Vec, } -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] #[serde(tag = "type")] #[ts(tag = "type")] pub enum AgentMessageContent { Text { text: String }, } -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] pub struct AgentMessageItem { pub id: String, pub content: Vec, } -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] pub struct ReasoningItem { pub id: String, pub summary_text: Vec, @@ -47,7 +47,7 @@ pub struct ReasoningItem { pub raw_content: Vec, } -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] pub struct WebSearchItem { pub id: String, pub query: String, diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index e3748bafc6b..835e29a1b24 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -49,6 +49,8 @@ pub const USER_INSTRUCTIONS_OPEN_TAG: &str = ""; pub const USER_INSTRUCTIONS_CLOSE_TAG: &str = ""; pub const ENVIRONMENT_CONTEXT_OPEN_TAG: &str = ""; pub const ENVIRONMENT_CONTEXT_CLOSE_TAG: &str = ""; +pub const USER_IDE_CONTEXT_OPEN_TAG: &str = ""; +pub const USER_IDE_CONTEXT_CLOSE_TAG: &str = ""; pub const USER_MESSAGE_BEGIN: &str = "## My request for Codex:"; /// Submission Queue Entry - requests from user