Skip to content

Commit c6f68c9

Browse files
authored
feat: declare server capability in shell-tool-mcp (#7112)
This introduces a new feature to Codex when it operates as an MCP _client_ where if an MCP _server_ replies that it has an entry named `"codex/sandbox-state"` in its _server capabilities_, then Codex will send it an MCP notification with the following structure: ```json { "method": "codex/sandbox-state/update", "params": { "sandboxPolicy": { "type": "workspace-write", "network-access": false, "exclude-tmpdir-env-var": false "exclude-slash-tmp": false }, "codexLinuxSandboxExe": null, "sandboxCwd": "/Users/mbolin/code/codex2" } } ``` or with whatever values are appropriate for the initial `sandboxPolicy`. **NOTE:** Codex _should_ continue to send the MCP server notifications of the same format if these things change over the lifetime of the thread, but that isn't wired up yet. The result is that `shell-tool-mcp` can consume these values so that when it calls `codex_core::exec::process_exec_tool_call()` in `codex-rs/exec-server/src/posix/escalate_server.rs`, it is now sure to call it with the correct values (whereas previously we relied on hardcoded values). While I would argue this is a supported use case within the MCP protocol, the `rmcp` crate that we are using today does not support custom notifications. As such, I had to patch it and I submitted it for review, so hopefully it will be accepted in some form: modelcontextprotocol/rust-sdk#556 To test out this change from end-to-end: - I ran `cargo build` in `~/code/codex2/codex-rs/exec-server` - I built the fork of Bash in `~/code/bash/bash` - I added the following to my `~/.codex/config.toml`: ```toml # Use with `codex --disable shell_tool`. [mcp_servers.execshell] args = ["--bash", "/Users/mbolin/code/bash/bash"] command = "/Users/mbolin/code/codex2/codex-rs/target/debug/codex-exec-mcp-server" ``` - From `~/code/codex2/codex-rs`, I ran `just codex --disable shell_tool` - When the TUI started up, I verified that the sandbox mode is `workspace-write` - I ran `/mcp` to verify that the shell tool from the MCP is there: <img width="1387" height="1400" alt="image" src="https://github.com/user-attachments/assets/1a8addcc-5005-4e16-b59f-95cfd06fd4ab" /> - Then I asked it: > what is the output of `gh issue list` because this should be auto-approved with our existing dummy policy: https://github.com/openai/codex/blob/af63e6eccc35783f1bf4dca3c61adb090efb6b8a/codex-rs/exec-server/src/posix.rs#L157-L164 And it worked: <img width="1387" height="1400" alt="image" src="https://github.com/user-attachments/assets/7568d2f7-80da-4d68-86d0-c265a6f5e6c1" />
1 parent af63e6e commit c6f68c9

File tree

8 files changed

+190
-27
lines changed

8 files changed

+190
-27
lines changed

codex-rs/Cargo.lock

Lines changed: 5 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/Cargo.toml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ async-trait = "0.1.89"
108108
axum = { version = "0.8", default-features = false }
109109
base64 = "0.22.1"
110110
bytes = "1.10.1"
111-
chrono = "0.4.42"
112111
chardetng = "0.1.17"
112+
chrono = "0.4.42"
113113
clap = "4"
114114
clap_complete = "4"
115115
color-eyre = "0.6.3"
@@ -120,9 +120,9 @@ diffy = "0.4.2"
120120
dirs = "6"
121121
dotenvy = "0.15.7"
122122
dunce = "1.0.4"
123+
encoding_rs = "0.8.35"
123124
env-flags = "0.1.1"
124125
env_logger = "0.11.5"
125-
encoding_rs = "0.8.35"
126126
escargot = "0.5"
127127
eventsource-stream = "0.2.3"
128128
futures = { version = "0.3", default-features = false }
@@ -167,7 +167,7 @@ ratatui-macros = "0.6.0"
167167
regex-lite = "0.1.7"
168168
regex = "1.11.1"
169169
reqwest = "0.12"
170-
rmcp = { version = "0.8.5", default-features = false }
170+
rmcp = { version = "0.9.0", default-features = false }
171171
schemars = "0.8.22"
172172
seccompiler = "0.5.0"
173173
serde = "1"
@@ -261,11 +261,7 @@ unwrap_used = "deny"
261261
# cargo-shear cannot see the platform-specific openssl-sys usage, so we
262262
# silence the false positive here instead of deleting a real dependency.
263263
[workspace.metadata.cargo-shear]
264-
ignored = [
265-
"icu_provider",
266-
"openssl-sys",
267-
"codex-utils-readiness",
268-
]
264+
ignored = ["icu_provider", "openssl-sys", "codex-utils-readiness"]
269265

270266
[profile.release]
271267
lto = "fat"
@@ -286,6 +282,7 @@ opt-level = 0
286282
# ratatui = { path = "../../ratatui" }
287283
crossterm = { git = "https://github.com/nornagon/crossterm", branch = "nornagon/color-query" }
288284
ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }
285+
rmcp = { git = "https://github.com/bolinfest/rust-sdk", branch = "pr556" }
289286

290287
# Uncomment to debug local changes.
291288
# rmcp = { path = "../../rust-sdk/crates/rmcp" }

codex-rs/core/src/codex.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::sync::Arc;
55
use std::sync::atomic::AtomicU64;
66

77
use crate::AuthManager;
8+
use crate::SandboxState;
89
use crate::client_common::REVIEW_PROMPT;
910
use crate::compact;
1011
use crate::compact::run_inline_auto_compact_task;
@@ -614,6 +615,22 @@ impl Session {
614615
)
615616
.await;
616617

618+
let sandbox_state = SandboxState {
619+
sandbox_policy: session_configuration.sandbox_policy.clone(),
620+
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
621+
sandbox_cwd: session_configuration.cwd.clone(),
622+
};
623+
if let Err(e) = sess
624+
.services
625+
.mcp_connection_manager
626+
.read()
627+
.await
628+
.notify_sandbox_state_change(&sandbox_state)
629+
.await
630+
{
631+
tracing::error!("Failed to notify sandbox state change: {e}");
632+
}
633+
617634
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
618635
sess.record_initial_history(initial_history).await;
619636

codex-rs/core/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ pub mod git_info;
3232
pub mod landlock;
3333
pub mod mcp;
3434
mod mcp_connection_manager;
35+
pub use mcp_connection_manager::MCP_SANDBOX_STATE_CAPABILITY;
36+
pub use mcp_connection_manager::MCP_SANDBOX_STATE_NOTIFICATION;
37+
pub use mcp_connection_manager::SandboxState;
3538
mod mcp_tool_call;
3639
mod message_history;
3740
mod model_provider_info;

codex-rs/core/src/mcp_connection_manager.rs

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use std::collections::HashMap;
1010
use std::collections::HashSet;
1111
use std::env;
1212
use std::ffi::OsString;
13+
use std::path::PathBuf;
1314
use std::sync::Arc;
1415
use std::sync::Mutex;
1516
use std::time::Duration;
@@ -28,6 +29,7 @@ use codex_protocol::protocol::McpStartupCompleteEvent;
2829
use codex_protocol::protocol::McpStartupFailure;
2930
use codex_protocol::protocol::McpStartupStatus;
3031
use codex_protocol::protocol::McpStartupUpdateEvent;
32+
use codex_protocol::protocol::SandboxPolicy;
3133
use codex_rmcp_client::ElicitationResponse;
3234
use codex_rmcp_client::OAuthCredentialsStoreMode;
3335
use codex_rmcp_client::RmcpClient;
@@ -48,6 +50,8 @@ use mcp_types::Resource;
4850
use mcp_types::ResourceTemplate;
4951
use mcp_types::Tool;
5052

53+
use serde::Deserialize;
54+
use serde::Serialize;
5155
use serde_json::json;
5256
use sha1::Digest;
5357
use sha1::Sha1;
@@ -174,6 +178,7 @@ struct ManagedClient {
174178
tools: Vec<ToolInfo>,
175179
tool_filter: ToolFilter,
176180
tool_timeout: Option<Duration>,
181+
server_supports_sandbox_state_capability: bool,
177182
}
178183

179184
#[derive(Clone)]
@@ -222,6 +227,35 @@ impl AsyncManagedClient {
222227
async fn client(&self) -> Result<ManagedClient, StartupOutcomeError> {
223228
self.client.clone().await
224229
}
230+
231+
async fn notify_sandbox_state_change(&self, sandbox_state: &SandboxState) -> Result<()> {
232+
let managed = self.client().await?;
233+
if !managed.server_supports_sandbox_state_capability {
234+
return Ok(());
235+
}
236+
237+
managed
238+
.client
239+
.send_custom_notification(
240+
MCP_SANDBOX_STATE_NOTIFICATION,
241+
Some(serde_json::to_value(sandbox_state)?),
242+
)
243+
.await
244+
}
245+
}
246+
247+
pub const MCP_SANDBOX_STATE_CAPABILITY: &str = "codex/sandbox-state";
248+
249+
/// Custom MCP notification for sandbox state updates.
250+
/// When used, the `params` field of the notification is [`SandboxState`].
251+
pub const MCP_SANDBOX_STATE_NOTIFICATION: &str = "codex/sandbox-state/update";
252+
253+
#[derive(Debug, Clone, Serialize, Deserialize)]
254+
#[serde(rename_all = "camelCase")]
255+
pub struct SandboxState {
256+
pub sandbox_policy: SandboxPolicy,
257+
pub codex_linux_sandbox_exe: Option<PathBuf>,
258+
pub sandbox_cwd: PathBuf,
225259
}
226260

227261
/// A thin wrapper around a set of running [`RmcpClient`] instances.
@@ -567,6 +601,34 @@ impl McpConnectionManager {
567601
.get(tool_name)
568602
.map(|tool| (tool.server_name.clone(), tool.tool_name.clone()))
569603
}
604+
605+
pub async fn notify_sandbox_state_change(&self, sandbox_state: &SandboxState) -> Result<()> {
606+
let mut join_set = JoinSet::new();
607+
608+
for async_managed_client in self.clients.values() {
609+
let sandbox_state = sandbox_state.clone();
610+
let async_managed_client = async_managed_client.clone();
611+
join_set.spawn(async move {
612+
async_managed_client
613+
.notify_sandbox_state_change(&sandbox_state)
614+
.await
615+
});
616+
}
617+
618+
while let Some(join_res) = join_set.join_next().await {
619+
match join_res {
620+
Ok(Ok(())) => {}
621+
Ok(Err(err)) => {
622+
warn!("Failed to notify sandbox state change to MCP server: {err:#}");
623+
}
624+
Err(err) => {
625+
warn!("Task panic when notifying sandbox state change to MCP server: {err:#}");
626+
}
627+
}
628+
}
629+
630+
Ok(())
631+
}
570632
}
571633

572634
async fn emit_update(
@@ -700,7 +762,7 @@ async fn start_server_task(
700762

701763
let send_elicitation = elicitation_requests.make_sender(server_name.clone(), tx_event);
702764

703-
client
765+
let initialize_result = client
704766
.initialize(params, startup_timeout, send_elicitation)
705767
.await
706768
.map_err(StartupOutcomeError::from)?;
@@ -709,11 +771,19 @@ async fn start_server_task(
709771
.await
710772
.map_err(StartupOutcomeError::from)?;
711773

774+
let server_supports_sandbox_state_capability = initialize_result
775+
.capabilities
776+
.experimental
777+
.as_ref()
778+
.and_then(|exp| exp.get(MCP_SANDBOX_STATE_CAPABILITY))
779+
.is_some();
780+
712781
let managed = ManagedClient {
713782
client: Arc::clone(&client),
714783
tools,
715784
tool_timeout: Some(tool_timeout),
716785
tool_filter,
786+
server_supports_sandbox_state_capability,
717787
};
718788

719789
Ok(managed)

codex-rs/exec-server/src/posix/escalate_server.rs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ use std::time::Duration;
88
use anyhow::Context as _;
99
use path_absolutize::Absolutize as _;
1010

11+
use codex_core::SandboxState;
1112
use codex_core::exec::process_exec_tool_call;
12-
use codex_core::protocol::SandboxPolicy;
1313
use tokio::process::Command;
1414
use tokio_util::sync::CancellationToken;
1515

@@ -48,6 +48,7 @@ impl EscalateServer {
4848
&self,
4949
params: ExecParams,
5050
cancel_rx: CancellationToken,
51+
sandbox_state: &SandboxState,
5152
) -> anyhow::Result<ExecResult> {
5253
let (escalate_server, escalate_client) = AsyncDatagramSocket::pair()?;
5354
let client_socket = escalate_client.into_inner();
@@ -64,12 +65,6 @@ impl EscalateServer {
6465
self.execve_wrapper.to_string_lossy().to_string(),
6566
);
6667

67-
// TODO: use the sandbox policy and cwd from the calling client.
68-
// Note that sandbox_cwd is ignored for ReadOnly, but needs to be legit
69-
// for `SandboxPolicy::WorkspaceWrite`.
70-
let sandbox_policy = SandboxPolicy::ReadOnly;
71-
let sandbox_cwd = PathBuf::from("/__NONEXISTENT__");
72-
7368
let ExecParams {
7469
command,
7570
workdir,
@@ -94,9 +89,9 @@ impl EscalateServer {
9489
justification: None,
9590
arg0: None,
9691
},
97-
&sandbox_policy,
98-
&sandbox_cwd,
99-
&None,
92+
&sandbox_state.sandbox_policy,
93+
&sandbox_state.sandbox_cwd,
94+
&sandbox_state.codex_linux_sandbox_exe,
10095
None,
10196
)
10297
.await?;

0 commit comments

Comments
 (0)