diff --git a/codex-rs/app-server/tests/suite/send_message.rs b/codex-rs/app-server/tests/suite/send_message.rs index f57b5f2ee4a..83e809f48eb 100644 --- a/codex-rs/app-server/tests/suite/send_message.rs +++ b/codex-rs/app-server/tests/suite/send_message.rs @@ -13,11 +13,15 @@ use codex_app_server_protocol::SendUserMessageParams; use codex_app_server_protocol::SendUserMessageResponse; use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; +use codex_protocol::models::DeveloperInstructions; use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::RawResponseItemEvent; +use codex_protocol::protocol::SandboxPolicy; use core_test_support::responses; use pretty_assertions::assert_eq; use std::path::Path; +use std::path::PathBuf; use tempfile::TempDir; use tokio::time::timeout; @@ -194,6 +198,9 @@ async fn test_send_message_raw_notifications_opt_in() -> Result<()> { }) .await?; + let permissions = read_raw_response_item(&mut mcp, conversation_id).await; + assert_permissions_message(&permissions); + let developer = read_raw_response_item(&mut mcp, conversation_id).await; assert_developer_message(&developer, "Use the test harness tools."); @@ -340,6 +347,27 @@ fn assert_instructions_message(item: &ResponseItem) { } } +fn assert_permissions_message(item: &ResponseItem) { + match item { + ResponseItem::Message { role, content, .. } => { + assert_eq!(role, "developer"); + let texts = content_texts(content); + let expected = DeveloperInstructions::from_policy( + &SandboxPolicy::DangerFullAccess, + AskForApproval::Never, + &PathBuf::from("/tmp"), + ) + .into_text(); + assert_eq!( + texts, + vec![expected.as_str()], + "expected permissions developer message, got {texts:?}" + ); + } + other => panic!("expected permissions message, got {other:?}"), + } +} + fn assert_developer_message(item: &ResponseItem, expected_text: &str) { match item { ResponseItem::Message { role, content, .. } => { diff --git a/codex-rs/core/gpt-5.1-codex-max_prompt.md b/codex-rs/core/gpt-5.1-codex-max_prompt.md index a8227c893f0..8e3f08fb514 100644 --- a/codex-rs/core/gpt-5.1-codex-max_prompt.md +++ b/codex-rs/core/gpt-5.1-codex-max_prompt.md @@ -25,43 +25,6 @@ When using the planning tool: - Do not make single-step plans. - When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan. -## Codex CLI harness, sandboxing, and approvals - -The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from. - -Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are: -- **read-only**: The sandbox only permits reading files. -- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. -- **danger-full-access**: No filesystem sandboxing - all commands are permitted. - -Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are: -- **restricted**: Requires approval -- **enabled**: No approval needed - -Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (for all of these, you should weigh alternative paths that do not require approval) - -When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure. - -Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. - -When requesting approval to execute a command that will require escalated privileges: - - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` - - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter - ## Special user requests - If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so. diff --git a/codex-rs/core/gpt-5.2-codex_prompt.md b/codex-rs/core/gpt-5.2-codex_prompt.md index a8227c893f0..8e3f08fb514 100644 --- a/codex-rs/core/gpt-5.2-codex_prompt.md +++ b/codex-rs/core/gpt-5.2-codex_prompt.md @@ -25,43 +25,6 @@ When using the planning tool: - Do not make single-step plans. - When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan. -## Codex CLI harness, sandboxing, and approvals - -The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from. - -Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are: -- **read-only**: The sandbox only permits reading files. -- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. -- **danger-full-access**: No filesystem sandboxing - all commands are permitted. - -Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are: -- **restricted**: Requires approval -- **enabled**: No approval needed - -Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (for all of these, you should weigh alternative paths that do not require approval) - -When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure. - -Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. - -When requesting approval to execute a command that will require escalated privileges: - - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` - - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter - ## Special user requests - If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so. diff --git a/codex-rs/core/gpt_5_1_prompt.md b/codex-rs/core/gpt_5_1_prompt.md index a4492c6acbc..440422ae6ae 100644 --- a/codex-rs/core/gpt_5_1_prompt.md +++ b/codex-rs/core/gpt_5_1_prompt.md @@ -159,43 +159,6 @@ If completing the user's task requires writing or modifying files, your code and - Do not use one-letter variable names unless explicitly requested. - NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor. -## Codex CLI harness, sandboxing, and approvals - -The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from. - -Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are: -- **read-only**: The sandbox only permits reading files. -- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. -- **danger-full-access**: No filesystem sandboxing - all commands are permitted. - -Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are: -- **restricted**: Requires approval -- **enabled**: No approval needed - -Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for escalating in the tool definition.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. Within this harness, prefer requesting approval via the tool over asking in natural language. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (for all of these, you should weigh alternative paths that do not require approval) - -When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure. - -Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. - -When requesting approval to execute a command that will require escalated privileges: - - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` - - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter - ## Validating your work If the codebase has tests or the ability to build or run, consider using them to verify changes once your work is complete. diff --git a/codex-rs/core/gpt_5_2_prompt.md b/codex-rs/core/gpt_5_2_prompt.md index cfbb220849c..7dd684bf061 100644 --- a/codex-rs/core/gpt_5_2_prompt.md +++ b/codex-rs/core/gpt_5_2_prompt.md @@ -133,43 +133,6 @@ If completing the user's task requires writing or modifying files, your code and - Do not use one-letter variable names unless explicitly requested. - NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor. -## Codex CLI harness, sandboxing, and approvals - -The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from. - -Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are: -- **read-only**: The sandbox only permits reading files. -- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. -- **danger-full-access**: No filesystem sandboxing - all commands are permitted. - -Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are: -- **restricted**: Requires approval -- **enabled**: No approval needed - -Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for escalating in the tool definition.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (for all of these, you should weigh alternative paths that do not require approval) - -When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure. - -Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. - -When requesting approval to execute a command that will require escalated privileges: - - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` - - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter - ## Validating your work If the codebase has tests, or the ability to build or run tests, consider using them to verify changes once your work is complete. diff --git a/codex-rs/core/gpt_5_codex_prompt.md b/codex-rs/core/gpt_5_codex_prompt.md index e2f9017874a..88a569fa723 100644 --- a/codex-rs/core/gpt_5_codex_prompt.md +++ b/codex-rs/core/gpt_5_codex_prompt.md @@ -25,43 +25,6 @@ When using the planning tool: - Do not make single-step plans. - When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan. -## Codex CLI harness, sandboxing, and approvals - -The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from. - -Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are: -- **read-only**: The sandbox only permits reading files. -- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. -- **danger-full-access**: No filesystem sandboxing - all commands are permitted. - -Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are: -- **restricted**: Requires approval -- **enabled**: No approval needed - -Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (for all of these, you should weigh alternative paths that do not require approval) - -When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure. - -Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals. - -When requesting approval to execute a command that will require escalated privileges: - - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` - - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter - ## Special user requests - If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so. diff --git a/codex-rs/core/prompt.md b/codex-rs/core/prompt.md index d8bebc371b2..4886c7ef445 100644 --- a/codex-rs/core/prompt.md +++ b/codex-rs/core/prompt.md @@ -146,41 +146,6 @@ If completing the user's task requires writing or modifying files, your code and - Do not use one-letter variable names unless explicitly requested. - NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor. -## Sandbox and approvals - -The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from. - -Filesystem sandboxing prevents you from editing files without user approval. The options are: - -- **read-only**: You can only read files. -- **workspace-write**: You can read files. You can write to files in your workspace folder, but not outside it. -- **danger-full-access**: No filesystem sandboxing. - -Network sandboxing prevents you from accessing network without approval. Options are - -- **restricted** -- **enabled** - -Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task. Approval options are - -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is pared with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: - -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (For all of these, you should weigh alternative paths that do not require approval.) - -Note that when sandboxing is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing ON, and approval on-failure. - ## Validating your work If the codebase has tests or the ability to build or run, consider using them to verify that your work is complete. diff --git a/codex-rs/core/prompt_with_apply_patch_instructions.md b/codex-rs/core/prompt_with_apply_patch_instructions.md index af5537c924d..f9c308fbd15 100644 --- a/codex-rs/core/prompt_with_apply_patch_instructions.md +++ b/codex-rs/core/prompt_with_apply_patch_instructions.md @@ -146,41 +146,6 @@ If completing the user's task requires writing or modifying files, your code and - Do not use one-letter variable names unless explicitly requested. - NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor. -## Sandbox and approvals - -The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from. - -Filesystem sandboxing prevents you from editing files without user approval. The options are: - -- **read-only**: You can only read files. -- **workspace-write**: You can read files. You can write to files in your workspace folder, but not outside it. -- **danger-full-access**: No filesystem sandboxing. - -Network sandboxing prevents you from accessing network without approval. Options are - -- **restricted** -- **enabled** - -Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task. Approval options are - -- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. -- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. -- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.) -- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is pared with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. - -When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval: - -- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp) -- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. -- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) -- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. -- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for -- (For all of these, you should weigh alternative paths that do not require approval.) - -Note that when sandboxing is set to read-only, you'll need to request approval for any command that isn't a read. - -You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing ON, and approval on-failure. - ## Validating your work If the codebase has tests or the ability to build or run, consider using them to verify that your work is complete. diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index f411cf9aab0..2d96106743b 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -148,7 +148,6 @@ use crate::tools::spec::ToolsConfig; use crate::tools::spec::ToolsConfigParams; use crate::turn_diff_tracker::TurnDiffTracker; use crate::unified_exec::UnifiedExecProcessManager; -use crate::user_instructions::DeveloperInstructions; use crate::user_instructions::UserInstructions; use crate::user_notification::UserNotification; use crate::util::backoff; @@ -156,6 +155,7 @@ use codex_async_utils::OrCancelExt; use codex_otel::OtelManager; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::models::ContentItem; +use codex_protocol::models::DeveloperInstructions; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; @@ -852,6 +852,11 @@ impl Session { if persist && !rollout_items.is_empty() { self.persist_rollout_items(&rollout_items).await; } + + // Append the current session's initial context after the reconstructed history. + let initial_context = self.build_initial_context(&turn_context); + self.record_conversation_items(&turn_context, &initial_context) + .await; // Flush after seeding history and any persisted rollout copy. self.flush_rollout().await; } @@ -1004,6 +1009,28 @@ impl Session { ))) } + fn build_permissions_update_item( + &self, + previous: Option<&Arc>, + next: &TurnContext, + ) -> Option { + let prev = previous?; + if prev.sandbox_policy == next.sandbox_policy + && prev.approval_policy == next.approval_policy + { + return None; + } + + Some( + DeveloperInstructions::from_policy( + &next.sandbox_policy, + next.approval_policy, + &next.cwd, + ) + .into(), + ) + } + /// Persist the event to rollout and send it to clients. pub(crate) async fn send_event(&self, turn_context: &TurnContext, msg: EventMsg) { let legacy_source = msg.clone(); @@ -1335,8 +1362,16 @@ impl Session { } pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec { - let mut items = Vec::::with_capacity(3); + let mut items = Vec::::with_capacity(4); let shell = self.user_shell(); + items.push( + DeveloperInstructions::from_policy( + &turn_context.sandbox_policy, + turn_context.approval_policy, + &turn_context.cwd, + ) + .into(), + ); if let Some(developer_instructions) = turn_context.developer_instructions.as_deref() { items.push(DeveloperInstructions::new(developer_instructions.to_string()).into()); } @@ -1351,8 +1386,6 @@ impl Session { } items.push(ResponseItem::from(EnvironmentContext::new( Some(turn_context.cwd.clone()), - Some(turn_context.approval_policy), - Some(turn_context.sandbox_policy.clone()), shell.as_ref().clone(), ))); items @@ -1853,10 +1886,19 @@ mod handlers { // Attempt to inject input into current task if let Err(items) = sess.inject_input(items).await { + let mut update_items = Vec::new(); if let Some(env_item) = sess.build_environment_update_item(previous_context.as_ref(), ¤t_context) { - sess.record_conversation_items(¤t_context, std::slice::from_ref(&env_item)) + update_items.push(env_item); + } + if let Some(permissions_item) = + sess.build_permissions_update_item(previous_context.as_ref(), ¤t_context) + { + update_items.push(permissions_item); + } + if !update_items.is_empty() { + sess.record_conversation_items(¤t_context, &update_items) .await; } @@ -2927,7 +2969,7 @@ mod tests { #[tokio::test] async fn record_initial_history_reconstructs_resumed_transcript() { let (session, turn_context) = make_session_and_context().await; - let (rollout_items, expected) = sample_rollout(&session, &turn_context); + let (rollout_items, mut expected) = sample_rollout(&session, &turn_context); session .record_initial_history(InitialHistory::Resumed(ResumedHistory { @@ -2937,6 +2979,7 @@ mod tests { })) .await; + expected.extend(session.build_initial_context(&turn_context)); let history = session.state.lock().await.clone_history(); assert_eq!(expected, history.raw_items()); } @@ -3021,12 +3064,13 @@ mod tests { #[tokio::test] async fn record_initial_history_reconstructs_forked_transcript() { let (session, turn_context) = make_session_and_context().await; - let (rollout_items, expected) = sample_rollout(&session, &turn_context); + let (rollout_items, mut expected) = sample_rollout(&session, &turn_context); session .record_initial_history(InitialHistory::Forked(rollout_items)) .await; + expected.extend(session.build_initial_context(&turn_context)); let history = session.state.lock().await.clone_history(); assert_eq!(expected, history.raw_items()); } diff --git a/codex-rs/core/src/environment_context.rs b/codex-rs/core/src/environment_context.rs index 6a0e0f26cd9..3e340ebbd30 100644 --- a/codex-rs/core/src/environment_context.rs +++ b/codex-rs/core/src/environment_context.rs @@ -1,14 +1,9 @@ use crate::codex::TurnContext; -use crate::protocol::AskForApproval; -use crate::protocol::NetworkAccess; -use crate::protocol::SandboxPolicy; use crate::shell::Shell; -use codex_protocol::config_types::SandboxMode; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::ENVIRONMENT_CONTEXT_CLOSE_TAG; use codex_protocol::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG; -use codex_utils_absolute_path::AbsolutePathBuf; use serde::Deserialize; use serde::Serialize; use std::path::PathBuf; @@ -17,55 +12,12 @@ use std::path::PathBuf; #[serde(rename = "environment_context", rename_all = "snake_case")] pub(crate) struct EnvironmentContext { pub cwd: Option, - pub approval_policy: Option, - pub sandbox_mode: Option, - pub network_access: Option, - pub writable_roots: Option>, pub shell: Shell, } impl EnvironmentContext { - pub fn new( - cwd: Option, - approval_policy: Option, - sandbox_policy: Option, - shell: Shell, - ) -> Self { - Self { - cwd, - approval_policy, - sandbox_mode: match sandbox_policy { - Some(SandboxPolicy::DangerFullAccess) => Some(SandboxMode::DangerFullAccess), - Some(SandboxPolicy::ReadOnly) => Some(SandboxMode::ReadOnly), - Some(SandboxPolicy::ExternalSandbox { .. }) => Some(SandboxMode::DangerFullAccess), - Some(SandboxPolicy::WorkspaceWrite { .. }) => Some(SandboxMode::WorkspaceWrite), - None => None, - }, - network_access: match sandbox_policy { - Some(SandboxPolicy::DangerFullAccess) => Some(NetworkAccess::Enabled), - Some(SandboxPolicy::ReadOnly) => Some(NetworkAccess::Restricted), - Some(SandboxPolicy::ExternalSandbox { network_access }) => Some(network_access), - Some(SandboxPolicy::WorkspaceWrite { network_access, .. }) => { - if network_access { - Some(NetworkAccess::Enabled) - } else { - Some(NetworkAccess::Restricted) - } - } - None => None, - }, - writable_roots: match sandbox_policy { - Some(SandboxPolicy::WorkspaceWrite { writable_roots, .. }) => { - if writable_roots.is_empty() { - None - } else { - Some(writable_roots) - } - } - _ => None, - }, - shell, - } + pub fn new(cwd: Option, shell: Shell) -> Self { + Self { cwd, shell } } /// Compares two environment contexts, ignoring the shell. Useful when @@ -74,19 +26,11 @@ impl EnvironmentContext { pub fn equals_except_shell(&self, other: &EnvironmentContext) -> bool { let EnvironmentContext { cwd, - approval_policy, - sandbox_mode, - network_access, - writable_roots, // should compare all fields except shell shell: _, } = other; self.cwd == *cwd - && self.approval_policy == *approval_policy - && self.sandbox_mode == *sandbox_mode - && self.network_access == *network_access - && self.writable_roots == *writable_roots } pub fn diff(before: &TurnContext, after: &TurnContext, shell: &Shell) -> Self { @@ -95,26 +39,11 @@ impl EnvironmentContext { } else { None }; - let approval_policy = if before.approval_policy != after.approval_policy { - Some(after.approval_policy) - } else { - None - }; - let sandbox_policy = if before.sandbox_policy != after.sandbox_policy { - Some(after.sandbox_policy.clone()) - } else { - None - }; - EnvironmentContext::new(cwd, approval_policy, sandbox_policy, shell.clone()) + EnvironmentContext::new(cwd, shell.clone()) } pub fn from_turn_context(turn_context: &TurnContext, shell: &Shell) -> Self { - Self::new( - Some(turn_context.cwd.clone()), - Some(turn_context.approval_policy), - Some(turn_context.sandbox_policy.clone()), - shell.clone(), - ) + Self::new(Some(turn_context.cwd.clone()), shell.clone()) } } @@ -126,10 +55,6 @@ impl EnvironmentContext { /// ```xml /// /// ... - /// ... - /// ... - /// ... - /// ... /// ... /// /// ``` @@ -138,29 +63,6 @@ impl EnvironmentContext { if let Some(cwd) = self.cwd { lines.push(format!(" {}", cwd.to_string_lossy())); } - if let Some(approval_policy) = self.approval_policy { - lines.push(format!( - " {approval_policy}" - )); - } - if let Some(sandbox_mode) = self.sandbox_mode { - lines.push(format!(" {sandbox_mode}")); - } - if let Some(network_access) = self.network_access { - lines.push(format!( - " {network_access}" - )); - } - if let Some(writable_roots) = self.writable_roots { - lines.push(" ".to_string()); - for writable_root in writable_roots { - lines.push(format!( - " {}", - writable_root.to_string_lossy() - )); - } - lines.push(" ".to_string()); - } let shell_name = self.shell.name(); lines.push(format!(" {shell_name}")); @@ -187,7 +89,6 @@ mod tests { use super::*; use core_test_support::test_path_buf; - use core_test_support::test_tmp_path_buf; use pretty_assertions::assert_eq; fn fake_shell() -> Shell { @@ -198,50 +99,17 @@ mod tests { } } - fn workspace_write_policy(writable_roots: Vec<&str>, network_access: bool) -> SandboxPolicy { - SandboxPolicy::WorkspaceWrite { - writable_roots: writable_roots - .into_iter() - .map(|s| AbsolutePathBuf::try_from(s).unwrap()) - .collect(), - network_access, - exclude_tmpdir_env_var: false, - exclude_slash_tmp: false, - } - } - #[test] fn serialize_workspace_write_environment_context() { let cwd = test_path_buf("/repo"); - let writable_root = test_tmp_path_buf(); - let cwd_str = cwd.to_str().expect("cwd is valid utf-8"); - let writable_root_str = writable_root - .to_str() - .expect("writable root is valid utf-8"); - let context = EnvironmentContext::new( - Some(cwd.clone()), - Some(AskForApproval::OnRequest), - Some(workspace_write_policy( - vec![cwd_str, writable_root_str], - false, - )), - fake_shell(), - ); + let context = EnvironmentContext::new(Some(cwd.clone()), fake_shell()); let expected = format!( r#" {cwd} - on-request - workspace-write - restricted - - {cwd} - {writable_root} - bash "#, cwd = cwd.display(), - writable_root = writable_root.display(), ); assert_eq!(context.serialize_to_xml(), expected); @@ -249,17 +117,9 @@ mod tests { #[test] fn serialize_read_only_environment_context() { - let context = EnvironmentContext::new( - None, - Some(AskForApproval::Never), - Some(SandboxPolicy::ReadOnly), - fake_shell(), - ); + let context = EnvironmentContext::new(None, fake_shell()); let expected = r#" - never - read-only - restricted bash "#; @@ -268,19 +128,9 @@ mod tests { #[test] fn serialize_external_sandbox_environment_context() { - let context = EnvironmentContext::new( - None, - Some(AskForApproval::OnRequest), - Some(SandboxPolicy::ExternalSandbox { - network_access: NetworkAccess::Enabled, - }), - fake_shell(), - ); + let context = EnvironmentContext::new(None, fake_shell()); let expected = r#" - on-request - danger-full-access - enabled bash "#; @@ -289,19 +139,9 @@ mod tests { #[test] fn serialize_external_sandbox_with_restricted_network_environment_context() { - let context = EnvironmentContext::new( - None, - Some(AskForApproval::OnRequest), - Some(SandboxPolicy::ExternalSandbox { - network_access: NetworkAccess::Restricted, - }), - fake_shell(), - ); + let context = EnvironmentContext::new(None, fake_shell()); let expected = r#" - on-request - danger-full-access - restricted bash "#; @@ -310,17 +150,9 @@ mod tests { #[test] fn serialize_full_access_environment_context() { - let context = EnvironmentContext::new( - None, - Some(AskForApproval::OnFailure), - Some(SandboxPolicy::DangerFullAccess), - fake_shell(), - ); + let context = EnvironmentContext::new(None, fake_shell()); let expected = r#" - on-failure - danger-full-access - enabled bash "#; @@ -328,55 +160,24 @@ mod tests { } #[test] - fn equals_except_shell_compares_approval_policy() { - // Approval policy - let context1 = EnvironmentContext::new( - Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(workspace_write_policy(vec!["/repo"], false)), - fake_shell(), - ); - let context2 = EnvironmentContext::new( - Some(PathBuf::from("/repo")), - Some(AskForApproval::Never), - Some(workspace_write_policy(vec!["/repo"], true)), - fake_shell(), - ); - assert!(!context1.equals_except_shell(&context2)); + fn equals_except_shell_compares_cwd() { + let context1 = EnvironmentContext::new(Some(PathBuf::from("/repo")), fake_shell()); + let context2 = EnvironmentContext::new(Some(PathBuf::from("/repo")), fake_shell()); + assert!(context1.equals_except_shell(&context2)); } #[test] - fn equals_except_shell_compares_sandbox_policy() { - let context1 = EnvironmentContext::new( - Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(SandboxPolicy::new_read_only_policy()), - fake_shell(), - ); - let context2 = EnvironmentContext::new( - Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(SandboxPolicy::new_workspace_write_policy()), - fake_shell(), - ); + fn equals_except_shell_ignores_sandbox_policy() { + let context1 = EnvironmentContext::new(Some(PathBuf::from("/repo")), fake_shell()); + let context2 = EnvironmentContext::new(Some(PathBuf::from("/repo")), fake_shell()); - assert!(!context1.equals_except_shell(&context2)); + assert!(context1.equals_except_shell(&context2)); } #[test] - fn equals_except_shell_compares_workspace_write_policy() { - let context1 = EnvironmentContext::new( - Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(workspace_write_policy(vec!["/repo", "/tmp", "/var"], false)), - fake_shell(), - ); - let context2 = EnvironmentContext::new( - Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(workspace_write_policy(vec!["/repo", "/tmp"], true)), - fake_shell(), - ); + fn equals_except_shell_compares_cwd_differences() { + let context1 = EnvironmentContext::new(Some(PathBuf::from("/repo1")), fake_shell()); + let context2 = EnvironmentContext::new(Some(PathBuf::from("/repo2")), fake_shell()); assert!(!context1.equals_except_shell(&context2)); } @@ -385,8 +186,6 @@ mod tests { fn equals_except_shell_ignores_shell() { let context1 = EnvironmentContext::new( Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(workspace_write_policy(vec!["/repo"], false)), Shell { shell_type: ShellType::Bash, shell_path: "/bin/bash".into(), @@ -395,8 +194,6 @@ mod tests { ); let context2 = EnvironmentContext::new( Some(PathBuf::from("/repo")), - Some(AskForApproval::OnRequest), - Some(workspace_write_policy(vec!["/repo"], false)), Shell { shell_type: ShellType::Zsh, shell_path: "/bin/zsh".into(), diff --git a/codex-rs/core/src/rollout/truncation.rs b/codex-rs/core/src/rollout/truncation.rs index cd222403246..1f70be46f33 100644 --- a/codex-rs/core/src/rollout/truncation.rs +++ b/codex-rs/core/src/rollout/truncation.rs @@ -206,6 +206,7 @@ mod tests { RolloutItem::ResponseItem(items[0].clone()), RolloutItem::ResponseItem(items[1].clone()), RolloutItem::ResponseItem(items[2].clone()), + RolloutItem::ResponseItem(items[3].clone()), ]; assert_eq!( diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index a4e8f9c34cf..507b068c9d6 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -402,6 +402,7 @@ mod tests { RolloutItem::ResponseItem(items[0].clone()), RolloutItem::ResponseItem(items[1].clone()), RolloutItem::ResponseItem(items[2].clone()), + RolloutItem::ResponseItem(items[3].clone()), ]; assert_eq!( diff --git a/codex-rs/core/src/user_instructions.rs b/codex-rs/core/src/user_instructions.rs index 22b5ad7bbe5..9c563c29c5b 100644 --- a/codex-rs/core/src/user_instructions.rs +++ b/codex-rs/core/src/user_instructions.rs @@ -75,34 +75,6 @@ impl From for ResponseItem { } } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[serde(rename = "developer_instructions", rename_all = "snake_case")] -pub(crate) struct DeveloperInstructions { - text: String, -} - -impl DeveloperInstructions { - pub fn new>(text: T) -> Self { - Self { text: text.into() } - } - - pub fn into_text(self) -> String { - self.text - } -} - -impl From for ResponseItem { - fn from(di: DeveloperInstructions) -> Self { - ResponseItem::Message { - id: None, - role: "developer".to_string(), - content: vec![ContentItem::InputText { - text: di.into_text(), - }], - } - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 06846c46aba..51e4edb41d3 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -284,7 +284,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() { let expected_initial_json = json!([]); assert_eq!(initial_json, expected_initial_json); - // 2) Submit new input; the request body must include the prior item followed by the new user input. + // 2) Submit new input; the request body must include the prior items, then initial context, then new user input. codex .submit(Op::UserInput { items: vec![UserInput::Text { @@ -298,24 +298,55 @@ async fn resume_includes_initial_messages_and_sends_prior_items() { let request = resp_mock.single_request(); let request_body = request.body_json(); - let expected_input = json!([ - { - "type": "message", - "role": "user", - "content": [{ "type": "input_text", "text": "resumed user message" }] - }, - { - "type": "message", - "role": "assistant", - "content": [{ "type": "output_text", "text": "resumed assistant message" }] - }, - { - "type": "message", - "role": "user", - "content": [{ "type": "input_text", "text": "hello" }] - } - ]); - assert_eq!(request_body["input"], expected_input); + let input = request_body["input"].as_array().expect("input array"); + let messages: Vec<(String, String)> = input + .iter() + .filter_map(|item| { + let role = item.get("role")?.as_str()?; + let text = item + .get("content")? + .as_array()? + .first()? + .get("text")? + .as_str()?; + Some((role.to_string(), text.to_string())) + }) + .collect(); + let pos_prior_user = messages + .iter() + .position(|(role, text)| role == "user" && text == "resumed user message") + .expect("prior user message"); + let pos_prior_assistant = messages + .iter() + .position(|(role, text)| role == "assistant" && text == "resumed assistant message") + .expect("prior assistant message"); + let pos_permissions = messages + .iter() + .position(|(role, text)| role == "developer" && text.contains("`approval_policy`")) + .expect("permissions message"); + let pos_user_instructions = messages + .iter() + .position(|(role, text)| { + role == "user" + && text.contains("be nice") + && (text.starts_with("# AGENTS.md instructions for ") + || text.starts_with("")) + }) + .expect("user instructions"); + let pos_environment = messages + .iter() + .position(|(role, text)| role == "user" && text.contains("")) + .expect("environment context"); + let pos_new_user = messages + .iter() + .position(|(role, text)| role == "user" && text == "hello") + .expect("new user message"); + + assert!(pos_prior_user < pos_prior_assistant); + assert!(pos_prior_assistant < pos_permissions); + assert!(pos_permissions < pos_user_instructions); + assert!(pos_user_instructions < pos_environment); + assert!(pos_environment < pos_new_user); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -618,17 +649,26 @@ async fn includes_user_instructions_message_in_request() { .unwrap() .contains("be nice") ); - assert_message_role(&request_body["input"][0], "user"); - assert_message_starts_with(&request_body["input"][0], "# AGENTS.md instructions for "); - assert_message_ends_with(&request_body["input"][0], ""); - let ui_text = request_body["input"][0]["content"][0]["text"] + assert_message_role(&request_body["input"][0], "developer"); + let permissions_text = request_body["input"][0]["content"][0]["text"] + .as_str() + .expect("invalid permissions message content"); + assert!( + permissions_text.contains("`sandbox_mode`"), + "expected permissions message to mention sandbox_mode, got {permissions_text:?}" + ); + + assert_message_role(&request_body["input"][1], "user"); + assert_message_starts_with(&request_body["input"][1], "# AGENTS.md instructions for "); + assert_message_ends_with(&request_body["input"][1], ""); + let ui_text = request_body["input"][1]["content"][0]["text"] .as_str() .expect("invalid message content"); assert!(ui_text.contains("")); assert!(ui_text.contains("be nice")); - assert_message_role(&request_body["input"][1], "user"); - assert_message_starts_with(&request_body["input"][1], ""); - assert_message_ends_with(&request_body["input"][1], ""); + assert_message_role(&request_body["input"][2], "user"); + assert_message_starts_with(&request_body["input"][2], ""); + assert_message_ends_with(&request_body["input"][2], ""); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -682,8 +722,10 @@ async fn skills_append_to_instructions() { let request = resp_mock.single_request(); let request_body = request.body_json(); - assert_message_role(&request_body["input"][0], "user"); - let instructions_text = request_body["input"][0]["content"][0]["text"] + assert_message_role(&request_body["input"][0], "developer"); + + assert_message_role(&request_body["input"][1], "user"); + let instructions_text = request_body["input"][1]["content"][0]["text"] .as_str() .expect("instructions text"); assert!( @@ -1049,6 +1091,10 @@ async fn includes_developer_instructions_message_in_request() { let request = resp_mock.single_request(); let request_body = request.body_json(); + let permissions_text = request_body["input"][0]["content"][0]["text"] + .as_str() + .expect("invalid permissions message content"); + assert!( !request_body["instructions"] .as_str() @@ -1056,18 +1102,24 @@ async fn includes_developer_instructions_message_in_request() { .contains("be nice") ); assert_message_role(&request_body["input"][0], "developer"); - assert_message_equals(&request_body["input"][0], "be useful"); - assert_message_role(&request_body["input"][1], "user"); - assert_message_starts_with(&request_body["input"][1], "# AGENTS.md instructions for "); - assert_message_ends_with(&request_body["input"][1], ""); - let ui_text = request_body["input"][1]["content"][0]["text"] + assert!( + permissions_text.contains("`sandbox_mode`"), + "expected permissions message to mention sandbox_mode, got {permissions_text:?}" + ); + + assert_message_role(&request_body["input"][1], "developer"); + assert_message_equals(&request_body["input"][1], "be useful"); + assert_message_role(&request_body["input"][2], "user"); + assert_message_starts_with(&request_body["input"][2], "# AGENTS.md instructions for "); + assert_message_ends_with(&request_body["input"][2], ""); + let ui_text = request_body["input"][2]["content"][0]["text"] .as_str() .expect("invalid message content"); assert!(ui_text.contains("")); assert!(ui_text.contains("be nice")); - assert_message_role(&request_body["input"][2], "user"); - assert_message_starts_with(&request_body["input"][2], ""); - assert_message_ends_with(&request_body["input"][2], ""); + assert_message_role(&request_body["input"][3], "user"); + assert_message_starts_with(&request_body["input"][3], ""); + assert_message_ends_with(&request_body["input"][3], ""); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index ee583997086..e3c8e0b7c23 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -604,8 +604,14 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() { .and_then(|item| item.get("text")) .and_then(|text| text.as_str()); - // Ignore the cached UI prefix (project docs + skills) since it is not relevant to - // compaction behavior and can change as bundled skills evolve. + // Ignore cached prefix messages (project docs + permissions) since they are not + // relevant to compaction behavior and can change as bundled prompts evolve. + let role = value.get("role").and_then(|role| role.as_str()); + if role == Some("developer") + && text.is_some_and(|text| text.contains("`sandbox_mode`")) + { + return false; + } !text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for ")) }) .cloned() @@ -1726,9 +1732,11 @@ async fn manual_compact_twice_preserves_latest_user_messages() { .into_iter() .collect::>(); - // System prompt + // Permissions developer message + final_output.pop_front(); + // User instructions (project docs/skills) final_output.pop_front(); - // Developer instructions + // Environment context final_output.pop_front(); let _ = final_output diff --git a/codex-rs/core/tests/suite/compact_resume_fork.rs b/codex-rs/core/tests/suite/compact_resume_fork.rs index ad2e0e65ad6..a8de9e5c152 100644 --- a/codex-rs/core/tests/suite/compact_resume_fork.rs +++ b/codex-rs/core/tests/suite/compact_resume_fork.rs @@ -216,11 +216,12 @@ async fn compact_resume_and_fork_preserve_model_history_view() { .as_str() .unwrap_or_default() .to_string(); - let user_instructions = requests[0]["input"][0]["content"][0]["text"] + let permissions_message = requests[0]["input"][0].clone(); + let user_instructions = requests[0]["input"][1]["content"][0]["text"] .as_str() .unwrap_or_default() .to_string(); - let environment_context = requests[0]["input"][1]["content"][0]["text"] + let environment_context = requests[0]["input"][2]["content"][0]["text"] .as_str() .unwrap_or_default() .to_string(); @@ -241,6 +242,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { "model": expected_model, "instructions": prompt, "input": [ + permissions_message, { "type": "message", "role": "user", @@ -290,6 +292,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { "model": expected_model, "instructions": prompt, "input": [ + permissions_message, { "type": "message", "role": "user", @@ -359,6 +362,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { "model": expected_model, "instructions": prompt, "input": [ + permissions_message, { "type": "message", "role": "user", @@ -419,6 +423,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { "model": expected_model, "instructions": prompt, "input": [ + permissions_message, { "type": "message", "role": "user", @@ -470,6 +475,27 @@ async fn compact_resume_and_fork_preserve_model_history_view() { } ] }, + permissions_message, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": user_instructions + } + ] + }, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": environment_context + } + ] + }, { "type": "message", "role": "user", @@ -499,6 +525,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() { "model": expected_model, "instructions": prompt, "input": [ + permissions_message, { "type": "message", "role": "user", @@ -550,6 +577,48 @@ async fn compact_resume_and_fork_preserve_model_history_view() { } ] }, + permissions_message, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": user_instructions + } + ] + }, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": environment_context + } + ] + }, + permissions_message, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": user_instructions + } + ] + }, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": environment_context + } + ] + }, { "type": "message", "role": "user", @@ -664,11 +733,12 @@ async fn compact_resume_after_second_compaction_preserves_history() { .as_str() .unwrap_or_default() .to_string(); - let user_instructions = requests[0]["input"][0]["content"][0]["text"] + let permissions_message = requests[0]["input"][0].clone(); + let user_instructions = requests[0]["input"][1]["content"][0]["text"] .as_str() .unwrap_or_default() .to_string(); - let environment_instructions = requests[0]["input"][1]["content"][0]["text"] + let environment_instructions = requests[0]["input"][2]["content"][0]["text"] .as_str() .unwrap_or_default() .to_string(); @@ -682,6 +752,7 @@ async fn compact_resume_after_second_compaction_preserves_history() { { "instructions": prompt, "input": [ + permissions_message, { "type": "message", "role": "user", @@ -723,6 +794,27 @@ async fn compact_resume_after_second_compaction_preserves_history() { } ] }, + permissions_message, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": user_instructions + } + ] + }, + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": environment_instructions + } + ] + }, { "type": "message", "role": "user", diff --git a/codex-rs/core/tests/suite/fork_thread.rs b/codex-rs/core/tests/suite/fork_thread.rs index 98f1dafd5b1..e2e6535752c 100644 --- a/codex-rs/core/tests/suite/fork_thread.rs +++ b/codex-rs/core/tests/suite/fork_thread.rs @@ -138,8 +138,9 @@ async fn fork_thread_twice_drops_to_first_message() { // GetHistory on fork1 flushed; the file is ready. let fork1_items = read_items(&fork1_path); + assert!(fork1_items.len() > expected_after_first.len()); pretty_assertions::assert_eq!( - serde_json::to_value(&fork1_items).unwrap(), + serde_json::to_value(&fork1_items[..expected_after_first.len()]).unwrap(), serde_json::to_value(&expected_after_first).unwrap() ); @@ -162,8 +163,9 @@ async fn fork_thread_twice_drops_to_first_message() { .unwrap_or(0); let expected_after_second: Vec = fork1_items[..cut_last_on_fork1].to_vec(); let fork2_items = read_items(&fork2_path); + assert!(fork2_items.len() > expected_after_second.len()); pretty_assertions::assert_eq!( - serde_json::to_value(&fork2_items).unwrap(), + serde_json::to_value(&fork2_items[..expected_after_second.len()]).unwrap(), serde_json::to_value(&expected_after_second).unwrap() ); } diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index effbc8a9316..e2708db70ce 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -40,6 +40,7 @@ mod model_overrides; mod model_tools; mod models_etag_responses; mod otel; +mod permissions_messages; mod prompt_caching; mod quota_exceeded; mod read_file; diff --git a/codex-rs/core/tests/suite/permissions_messages.rs b/codex-rs/core/tests/suite/permissions_messages.rs new file mode 100644 index 00000000000..3f8a9b49034 --- /dev/null +++ b/codex-rs/core/tests/suite/permissions_messages.rs @@ -0,0 +1,448 @@ +use anyhow::Result; +use codex_core::config::Constrained; +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::user_input::UserInput; +use codex_utils_absolute_path::AbsolutePathBuf; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_sse_once; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use pretty_assertions::assert_eq; +use std::collections::HashSet; +use tempfile::TempDir; + +fn permissions_texts(input: &[serde_json::Value]) -> Vec { + input + .iter() + .filter_map(|item| { + let role = item.get("role")?.as_str()?; + if role != "developer" { + return None; + } + let text = item + .get("content")? + .as_array()? + .first()? + .get("text")? + .as_str()?; + if text.contains("`approval_policy`") { + Some(text.to_string()) + } else { + None + } + }) + .collect() +} + +fn sse_completed(id: &str) -> String { + sse(vec![ev_response_created(id), ev_completed(id)]) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn permissions_message_sent_once_on_start() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let req = mount_sse_once(&server, sse_completed("resp-1")).await; + + let mut builder = test_codex().with_config(move |config| { + config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + }); + let test = builder.build(&server).await?; + + test.codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let request = req.single_request(); + let body = request.body_json(); + let input = body["input"].as_array().expect("input array"); + let permissions = permissions_texts(input); + assert_eq!(permissions.len(), 1); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn permissions_message_added_on_override_change() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let req1 = mount_sse_once(&server, sse_completed("resp-1")).await; + let req2 = mount_sse_once(&server, sse_completed("resp-2")).await; + + let mut builder = test_codex().with_config(move |config| { + config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + }); + let test = builder.build(&server).await?; + + test.codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 1".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + test.codex + .submit(Op::OverrideTurnContext { + cwd: None, + approval_policy: Some(AskForApproval::Never), + sandbox_policy: None, + model: None, + effort: None, + summary: None, + }) + .await?; + + test.codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 2".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body1 = req1.single_request().body_json(); + let body2 = req2.single_request().body_json(); + let input1 = body1["input"].as_array().expect("input array"); + let input2 = body2["input"].as_array().expect("input array"); + let permissions_1 = permissions_texts(input1); + let permissions_2 = permissions_texts(input2); + + assert_eq!(permissions_1.len(), 1); + assert_eq!(permissions_2.len(), 2); + let unique = permissions_2.into_iter().collect::>(); + assert_eq!(unique.len(), 2); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn permissions_message_not_added_when_no_change() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let req1 = mount_sse_once(&server, sse_completed("resp-1")).await; + let req2 = mount_sse_once(&server, sse_completed("resp-2")).await; + + let mut builder = test_codex().with_config(move |config| { + config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + }); + let test = builder.build(&server).await?; + + test.codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 1".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + test.codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 2".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body1 = req1.single_request().body_json(); + let body2 = req2.single_request().body_json(); + let input1 = body1["input"].as_array().expect("input array"); + let input2 = body2["input"].as_array().expect("input array"); + let permissions_1 = permissions_texts(input1); + let permissions_2 = permissions_texts(input2); + + assert_eq!(permissions_1.len(), 1); + assert_eq!(permissions_2.len(), 1); + assert_eq!(permissions_1, permissions_2); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn resume_replays_permissions_messages() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let _req1 = mount_sse_once(&server, sse_completed("resp-1")).await; + let _req2 = mount_sse_once(&server, sse_completed("resp-2")).await; + let req3 = mount_sse_once(&server, sse_completed("resp-3")).await; + + let mut builder = test_codex().with_config(|config| { + config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + }); + let initial = builder.build(&server).await?; + let rollout_path = initial.session_configured.rollout_path.clone(); + let home = initial.home.clone(); + + initial + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 1".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + initial + .codex + .submit(Op::OverrideTurnContext { + cwd: None, + approval_policy: Some(AskForApproval::Never), + sandbox_policy: None, + model: None, + effort: None, + summary: None, + }) + .await?; + + initial + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 2".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let resumed = builder.resume(&server, home, rollout_path).await?; + resumed + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "after resume".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&resumed.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body3 = req3.single_request().body_json(); + let input = body3["input"].as_array().expect("input array"); + let permissions = permissions_texts(input); + assert_eq!(permissions.len(), 3); + let unique = permissions.into_iter().collect::>(); + assert_eq!(unique.len(), 2); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn resume_and_fork_append_permissions_messages() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let _req1 = mount_sse_once(&server, sse_completed("resp-1")).await; + let req2 = mount_sse_once(&server, sse_completed("resp-2")).await; + let req3 = mount_sse_once(&server, sse_completed("resp-3")).await; + let req4 = mount_sse_once(&server, sse_completed("resp-4")).await; + + let mut builder = test_codex().with_config(|config| { + config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + }); + let initial = builder.build(&server).await?; + let rollout_path = initial.session_configured.rollout_path.clone(); + let home = initial.home.clone(); + + initial + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 1".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + initial + .codex + .submit(Op::OverrideTurnContext { + cwd: None, + approval_policy: Some(AskForApproval::Never), + sandbox_policy: None, + model: None, + effort: None, + summary: None, + }) + .await?; + + initial + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello 2".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body2 = req2.single_request().body_json(); + let input2 = body2["input"].as_array().expect("input array"); + let permissions_base = permissions_texts(input2); + assert_eq!(permissions_base.len(), 2); + + builder = builder.with_config(|config| { + config.approval_policy = Constrained::allow_any(AskForApproval::UnlessTrusted); + }); + let resumed = builder.resume(&server, home, rollout_path.clone()).await?; + resumed + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "after resume".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&resumed.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body3 = req3.single_request().body_json(); + let input3 = body3["input"].as_array().expect("input array"); + let permissions_resume = permissions_texts(input3); + assert_eq!(permissions_resume.len(), permissions_base.len() + 1); + assert_eq!( + &permissions_resume[..permissions_base.len()], + permissions_base.as_slice() + ); + assert!(!permissions_base.contains(permissions_resume.last().expect("new permissions"))); + + let mut fork_config = initial.config.clone(); + fork_config.approval_policy = Constrained::allow_any(AskForApproval::UnlessTrusted); + let forked = initial + .thread_manager + .fork_thread(usize::MAX, fork_config, rollout_path) + .await?; + forked + .thread + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "after fork".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&forked.thread, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body4 = req4.single_request().body_json(); + let input4 = body4["input"].as_array().expect("input array"); + let permissions_fork = permissions_texts(input4); + assert_eq!(permissions_fork.len(), permissions_base.len() + 2); + assert_eq!( + &permissions_fork[..permissions_base.len()], + permissions_base.as_slice() + ); + let new_permissions = &permissions_fork[permissions_base.len()..]; + assert_eq!(new_permissions.len(), 2); + assert_eq!(new_permissions[0], new_permissions[1]); + assert!(!permissions_base.contains(&new_permissions[0])); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn permissions_message_includes_writable_roots() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let req = mount_sse_once(&server, sse_completed("resp-1")).await; + let writable = TempDir::new()?; + let writable_root = AbsolutePathBuf::try_from(writable.path())?; + let sandbox_policy = SandboxPolicy::WorkspaceWrite { + writable_roots: vec![writable_root], + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }; + + let mut builder = test_codex().with_config(move |config| { + config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest); + config.sandbox_policy = Constrained::allow_any(sandbox_policy); + }); + let test = builder.build(&server).await?; + + test.codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello".into(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let body = req.single_request().body_json(); + let input = body["input"].as_array().expect("input array"); + let permissions = permissions_texts(input); + let sandbox_text = "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted."; + let approval_text = " Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-request`: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task.\n\nHere are scenarios where you'll need to request approval:\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n\nWhen requesting approval to execute a command that will require escalated privileges:\n - Provide the `sandbox_permissions` parameter with the value `\"require_escalated\"`\n - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter"; + // Normalize paths by removing trailing slashes to match AbsolutePathBuf behavior + let normalize_path = + |p: &std::path::Path| -> String { p.to_string_lossy().trim_end_matches('/').to_string() }; + let mut roots = vec![ + normalize_path(writable.path()), + normalize_path(test.config.cwd.as_path()), + ]; + if cfg!(unix) && std::path::Path::new("/tmp").is_dir() { + roots.push("/tmp".to_string()); + } + if let Some(tmpdir) = std::env::var_os("TMPDIR") { + let tmpdir_path = std::path::PathBuf::from(&tmpdir); + if tmpdir_path.is_absolute() && !tmpdir.is_empty() { + roots.push(normalize_path(&tmpdir_path)); + } + } + let roots_text = if roots.len() == 1 { + format!(" The writable root is `{}`.", roots[0]) + } else { + format!( + " The writable roots are {}.", + roots + .iter() + .map(|root| format!("`{root}`")) + .collect::>() + .join(", ") + ) + }; + let expected = format!( + "{sandbox_text}{approval_text}{roots_text}" + ); + // Normalize line endings to handle Windows vs Unix differences + let normalize_line_endings = |s: &str| s.replace("\r\n", "\n"); + let expected_normalized = normalize_line_endings(&expected); + let actual_normalized: Vec = permissions + .iter() + .map(|s| normalize_line_endings(s)) + .collect(); + assert_eq!(actual_normalized, vec![expected_normalized]); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs index 160b623c6b7..79c9d6e19ca 100644 --- a/codex-rs/core/tests/suite/prompt_caching.rs +++ b/codex-rs/core/tests/suite/prompt_caching.rs @@ -36,9 +36,6 @@ fn default_env_context_str(cwd: &str, shell: &Shell) -> String { format!( r#" {cwd} - on-request - read-only - restricted {shell_name} "# ) @@ -252,9 +249,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests let body1 = req1.single_request().body_json(); let input1 = body1["input"].as_array().expect("input array"); - assert_eq!(input1.len(), 3, "expected cached prefix + env + user msg"); + assert_eq!( + input1.len(), + 4, + "expected permissions + cached prefix + env + user msg" + ); - let ui_text = input1[0]["content"][0]["text"] + let ui_text = input1[1]["content"][0]["text"] .as_str() .expect("ui message text"); assert!( @@ -266,11 +267,11 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests let cwd_str = config.cwd.to_string_lossy(); let expected_env_text = default_env_context_str(&cwd_str, &shell); assert_eq!( - input1[1], + input1[2], text_user_input(expected_env_text), "expected environment context after UI message" ); - assert_eq!(input1[2], text_user_input("hello 1".to_string())); + assert_eq!(input1[3], text_user_input("hello 1".to_string())); let body2 = req2.single_request().body_json(); let input2 = body2["input"].as_array().expect("input array"); @@ -312,16 +313,17 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; let writable = TempDir::new().unwrap(); + let new_policy = SandboxPolicy::WorkspaceWrite { + writable_roots: vec![writable.path().try_into().unwrap()], + network_access: true, + exclude_tmpdir_env_var: true, + exclude_slash_tmp: true, + }; codex .submit(Op::OverrideTurnContext { cwd: None, approval_policy: Some(AskForApproval::Never), - sandbox_policy: Some(SandboxPolicy::WorkspaceWrite { - writable_roots: vec![writable.path().try_into().unwrap()], - network_access: true, - exclude_tmpdir_env_var: true, - exclude_slash_tmp: true, - }), + sandbox_policy: Some(new_policy.clone()), model: Some("o3".to_string()), effort: Some(Some(ReasoningEffort::High)), summary: Some(ReasoningSummary::Detailed), @@ -354,36 +356,18 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an "role": "user", "content": [ { "type": "input_text", "text": "hello 2" } ] }); - // After overriding the turn context, the environment context should be emitted again - // reflecting the new approval policy and sandbox settings. Omit cwd because it did - // not change. - let shell = default_user_shell(); - let expected_env_text_2 = format!( - r#" - never - workspace-write - enabled - - {} - - {} -"#, - writable.path().display(), - shell.name() + let expected_permissions_msg = body1["input"][0].clone(); + // After overriding the turn context, emit a new permissions message. + let body1_input = body1["input"].as_array().expect("input array"); + let expected_permissions_msg_2 = body2["input"][body1_input.len()].clone(); + assert_ne!( + expected_permissions_msg_2, expected_permissions_msg, + "expected updated permissions message after override" ); - let expected_env_msg_2 = serde_json::json!({ - "type": "message", - "role": "user", - "content": [ { "type": "input_text", "text": expected_env_text_2 } ] - }); - let expected_body2 = serde_json::json!( - [ - body1["input"].as_array().unwrap().as_slice(), - [expected_env_msg_2, expected_user_message_2].as_slice(), - ] - .concat() - ); - assert_eq!(body2["input"], expected_body2); + let mut expected_body2 = body1["input"].as_array().expect("input array").to_vec(); + expected_body2.push(expected_permissions_msg_2); + expected_body2.push(expected_user_message_2); + assert_eq!(body2["input"], serde_json::Value::Array(expected_body2)); Ok(()) } @@ -439,10 +423,8 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul .filter(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG)) .collect(); assert!( - env_texts - .iter() - .any(|text| text.contains("never")), - "environment context should reflect overridden approval policy: {env_texts:?}" + !env_texts.is_empty(), + "expected environment context to be emitted: {env_texts:?}" ); let env_count = input @@ -462,9 +444,29 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul .is_some() }) .count(); - assert_eq!( - env_count, 2, - "environment context should appear exactly twice, found {env_count}" + assert!( + env_count >= 1, + "environment context should appear at least once, found {env_count}" + ); + + let permissions_texts: Vec<&str> = input + .iter() + .filter_map(|msg| { + let role = msg["role"].as_str()?; + if role != "developer" { + return None; + } + msg["content"] + .as_array() + .and_then(|content| content.first()) + .and_then(|item| item["text"].as_str()) + }) + .collect(); + assert!( + permissions_texts + .iter() + .any(|text| text.contains("`approval_policy` is `never`")), + "permissions message should reflect overridden approval policy: {permissions_texts:?}" ); let user_texts: Vec<&str> = input @@ -514,6 +516,12 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res // Second turn using per-turn overrides via UserTurn let new_cwd = TempDir::new().unwrap(); let writable = TempDir::new().unwrap(); + let new_policy = SandboxPolicy::WorkspaceWrite { + writable_roots: vec![AbsolutePathBuf::try_from(writable.path()).unwrap()], + network_access: true, + exclude_tmpdir_env_var: true, + exclude_slash_tmp: true, + }; codex .submit(Op::UserTurn { items: vec![UserInput::Text { @@ -521,12 +529,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res }], cwd: new_cwd.path().to_path_buf(), approval_policy: AskForApproval::Never, - sandbox_policy: SandboxPolicy::WorkspaceWrite { - writable_roots: vec![AbsolutePathBuf::try_from(writable.path()).unwrap()], - network_access: true, - exclude_tmpdir_env_var: true, - exclude_slash_tmp: true, - }, + sandbox_policy: new_policy.clone(), model: "o3".to_string(), effort: Some(ReasoningEffort::High), summary: ReasoningSummary::Detailed, @@ -556,31 +559,28 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res let expected_env_text_2 = format!( r#" {} - never - workspace-write - enabled - - {} - {} "#, new_cwd.path().display(), - writable.path().display(), - shell.name(), + shell.name() ); let expected_env_msg_2 = serde_json::json!({ "type": "message", "role": "user", "content": [ { "type": "input_text", "text": expected_env_text_2 } ] }); - let expected_body2 = serde_json::json!( - [ - body1["input"].as_array().unwrap().as_slice(), - [expected_env_msg_2, expected_user_message_2].as_slice(), - ] - .concat() + let expected_permissions_msg = body1["input"][0].clone(); + let body1_input = body1["input"].as_array().expect("input array"); + let expected_permissions_msg_2 = body2["input"][body1_input.len() + 1].clone(); + assert_ne!( + expected_permissions_msg_2, expected_permissions_msg, + "expected updated permissions message after per-turn override" ); - assert_eq!(body2["input"], expected_body2); + let mut expected_body2 = body1_input.to_vec(); + expected_body2.push(expected_env_msg_2); + expected_body2.push(expected_permissions_msg_2); + expected_body2.push(expected_user_message_2); + assert_eq!(body2["input"], serde_json::Value::Array(expected_body2)); Ok(()) } @@ -648,7 +648,8 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a let body1 = req1.single_request().body_json(); let body2 = req2.single_request().body_json(); - let expected_ui_msg = body1["input"][0].clone(); + let expected_permissions_msg = body1["input"][0].clone(); + let expected_ui_msg = body1["input"][1].clone(); let shell = default_user_shell(); let default_cwd_lossy = default_cwd.to_string_lossy(); @@ -657,6 +658,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a let expected_user_message_1 = text_user_input("hello 1".to_string()); let expected_input_1 = serde_json::Value::Array(vec![ + expected_permissions_msg.clone(), expected_ui_msg.clone(), expected_env_msg_1.clone(), expected_user_message_1.clone(), @@ -665,6 +667,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a let expected_user_message_2 = text_user_input("hello 2".to_string()); let expected_input_2 = serde_json::Value::Array(vec![ + expected_permissions_msg, expected_ui_msg, expected_env_msg_1, expected_user_message_1, @@ -738,34 +741,34 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu let body1 = req1.single_request().body_json(); let body2 = req2.single_request().body_json(); - let expected_ui_msg = body1["input"][0].clone(); + let expected_permissions_msg = body1["input"][0].clone(); + let expected_ui_msg = body1["input"][1].clone(); let shell = default_user_shell(); let expected_env_text_1 = default_env_context_str(&default_cwd.to_string_lossy(), &shell); let expected_env_msg_1 = text_user_input(expected_env_text_1); let expected_user_message_1 = text_user_input("hello 1".to_string()); let expected_input_1 = serde_json::Value::Array(vec![ + expected_permissions_msg.clone(), expected_ui_msg.clone(), expected_env_msg_1.clone(), expected_user_message_1.clone(), ]); assert_eq!(body1["input"], expected_input_1); - let shell_name = shell.name(); - let expected_env_msg_2 = text_user_input(format!( - r#" - never - danger-full-access - enabled - {shell_name} -"# - )); + let body1_input = body1["input"].as_array().expect("input array"); + let expected_permissions_msg_2 = body2["input"][body1_input.len()].clone(); + assert_ne!( + expected_permissions_msg_2, expected_permissions_msg, + "expected updated permissions message after policy change" + ); let expected_user_message_2 = text_user_input("hello 2".to_string()); let expected_input_2 = serde_json::Value::Array(vec![ + expected_permissions_msg, expected_ui_msg, expected_env_msg_1, expected_user_message_1, - expected_env_msg_2, + expected_permissions_msg_2, expected_user_message_2, ]); assert_eq!(body2["input"], expected_input_2); diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index d0a78ae3927..c0f9310c51a 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -381,23 +381,26 @@ async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> { let instructions = request["messages"][0]["content"].as_str().unwrap(); assert!(instructions.starts_with("You are a helpful assistant.")); - let developer_msg = request["messages"] + let developer_messages: Vec<&serde_json::Value> = request["messages"] .as_array() - .and_then(|messages| { - messages - .iter() - .find(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer")) - }) - .unwrap(); - let developer_content = developer_msg - .get("content") - .and_then(|value| value.as_str()) - .unwrap(); + .unwrap() + .iter() + .filter(|msg| msg.get("role").and_then(|role| role.as_str()) == Some("developer")) + .collect(); + let developer_contents: Vec<&str> = developer_messages + .iter() + .filter_map(|msg| msg.get("content").and_then(|value| value.as_str())) + .collect(); + assert!( + developer_contents + .iter() + .any(|content| content.contains("`sandbox_mode`")), + "expected permissions developer message, got {developer_contents:?}" + ); assert!( - !developer_content.contains('<'), - "expected developer instructions without XML tags, got `{developer_content}`" + developer_contents.contains(&"Foreshadow upcoming tool calls."), + "expected developer instructions in developer messages, got {developer_contents:?}" ); - assert_eq!(developer_content, "Foreshadow upcoming tool calls."); Ok(()) } diff --git a/codex-rs/protocol/BUILD.bazel b/codex-rs/protocol/BUILD.bazel index e699c7bf9ad..2c00d076bce 100644 --- a/codex-rs/protocol/BUILD.bazel +++ b/codex-rs/protocol/BUILD.bazel @@ -3,4 +3,5 @@ load("//:defs.bzl", "codex_rust_crate") codex_rust_crate( name = "protocol", crate_name = "codex_protocol", + compile_data = glob(["src/prompts/permissions/**/*.md"]), ) diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 69a682f2df4..475faa82a07 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::path::Path; use codex_utils_image::load_and_resize_to_fit; use mcp_types::CallToolResult; @@ -9,6 +10,11 @@ use serde::Serialize; use serde::ser::Serializer; use ts_rs::TS; +use crate::config_types::SandboxMode; +use crate::protocol::AskForApproval; +use crate::protocol::NetworkAccess; +use crate::protocol::SandboxPolicy; +use crate::protocol::WritableRoot; use crate::user_input::UserInput; use codex_git::GhostCommit; use codex_utils_image::error::ImageProcessingError; @@ -158,6 +164,159 @@ pub enum ResponseItem { Other, } +/// Developer-provided guidance that is injected into a turn as a developer role +/// message. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] +#[serde(rename = "developer_instructions", rename_all = "snake_case")] +pub struct DeveloperInstructions { + text: String, +} + +const APPROVAL_POLICY_NEVER: &str = include_str!("prompts/permissions/approval_policy/never.md"); +const APPROVAL_POLICY_UNLESS_TRUSTED: &str = + include_str!("prompts/permissions/approval_policy/unless_trusted.md"); +const APPROVAL_POLICY_ON_FAILURE: &str = + include_str!("prompts/permissions/approval_policy/on_failure.md"); +const APPROVAL_POLICY_ON_REQUEST: &str = + include_str!("prompts/permissions/approval_policy/on_request.md"); + +const SANDBOX_MODE_DANGER_FULL_ACCESS: &str = + include_str!("prompts/permissions/sandbox_mode/danger_full_access.md"); +const SANDBOX_MODE_WORKSPACE_WRITE: &str = + include_str!("prompts/permissions/sandbox_mode/workspace_write.md"); +const SANDBOX_MODE_READ_ONLY: &str = include_str!("prompts/permissions/sandbox_mode/read_only.md"); + +impl DeveloperInstructions { + pub fn new>(text: T) -> Self { + Self { text: text.into() } + } + + pub fn into_text(self) -> String { + self.text + } + + pub fn concat(self, other: impl Into) -> Self { + let mut text = self.text; + text.push_str(&other.into().text); + Self { text } + } + + pub fn from_policy( + sandbox_policy: &SandboxPolicy, + approval_policy: AskForApproval, + cwd: &Path, + ) -> Self { + let network_access = if sandbox_policy.has_full_network_access() { + NetworkAccess::Enabled + } else { + NetworkAccess::Restricted + }; + + let (sandbox_mode, writable_roots) = match sandbox_policy { + SandboxPolicy::DangerFullAccess => (SandboxMode::DangerFullAccess, None), + SandboxPolicy::ReadOnly => (SandboxMode::ReadOnly, None), + SandboxPolicy::ExternalSandbox { .. } => (SandboxMode::DangerFullAccess, None), + SandboxPolicy::WorkspaceWrite { .. } => { + let roots = sandbox_policy.get_writable_roots_with_cwd(cwd); + (SandboxMode::WorkspaceWrite, Some(roots)) + } + }; + + DeveloperInstructions::from_permissions_with_network( + sandbox_mode, + network_access, + approval_policy, + writable_roots, + ) + } + + fn from_permissions_with_network( + sandbox_mode: SandboxMode, + network_access: NetworkAccess, + approval_policy: AskForApproval, + writable_roots: Option>, + ) -> Self { + let start_tag = DeveloperInstructions::new(""); + let end_tag = DeveloperInstructions::new(""); + start_tag + .concat(DeveloperInstructions::sandbox_text( + sandbox_mode, + network_access, + )) + .concat(DeveloperInstructions::from(approval_policy)) + .concat(DeveloperInstructions::from_writable_roots(writable_roots)) + .concat(end_tag) + } + + fn from_writable_roots(writable_roots: Option>) -> Self { + let Some(roots) = writable_roots else { + return DeveloperInstructions::new(""); + }; + + if roots.is_empty() { + return DeveloperInstructions::new(""); + } + + let roots_list: Vec = roots + .iter() + .map(|r| format!("`{}`", r.root.to_string_lossy())) + .collect(); + let text = if roots_list.len() == 1 { + format!(" The writable root is {}.", roots_list[0]) + } else { + format!(" The writable roots are {}.", roots_list.join(", ")) + }; + DeveloperInstructions::new(text) + } + + fn sandbox_text(mode: SandboxMode, network_access: NetworkAccess) -> DeveloperInstructions { + let template = match mode { + SandboxMode::DangerFullAccess => SANDBOX_MODE_DANGER_FULL_ACCESS.trim_end(), + SandboxMode::WorkspaceWrite => SANDBOX_MODE_WORKSPACE_WRITE.trim_end(), + SandboxMode::ReadOnly => SANDBOX_MODE_READ_ONLY.trim_end(), + }; + let text = template.replace("{network_access}", &network_access.to_string()); + + DeveloperInstructions::new(text) + } +} + +impl From for ResponseItem { + fn from(di: DeveloperInstructions) -> Self { + ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: di.into_text(), + }], + } + } +} + +impl From for DeveloperInstructions { + fn from(mode: SandboxMode) -> Self { + let network_access = match mode { + SandboxMode::DangerFullAccess => NetworkAccess::Enabled, + SandboxMode::WorkspaceWrite | SandboxMode::ReadOnly => NetworkAccess::Restricted, + }; + + DeveloperInstructions::sandbox_text(mode, network_access) + } +} + +impl From for DeveloperInstructions { + fn from(mode: AskForApproval) -> Self { + let text = match mode { + AskForApproval::Never => APPROVAL_POLICY_NEVER.trim_end(), + AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.trim_end(), + AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.trim_end(), + AskForApproval::OnRequest => APPROVAL_POLICY_ON_REQUEST.trim_end(), + }; + + DeveloperInstructions::new(text) + } +} + fn should_serialize_reasoning_content(content: &Option>) -> bool { match content { Some(content) => !content @@ -550,12 +709,71 @@ impl std::ops::Deref for FunctionCallOutputPayload { #[cfg(test)] mod tests { use super::*; + use crate::config_types::SandboxMode; + use crate::protocol::AskForApproval; use anyhow::Result; use mcp_types::ImageContent; use mcp_types::TextContent; use pretty_assertions::assert_eq; + use std::path::PathBuf; use tempfile::tempdir; + #[test] + fn converts_sandbox_mode_into_developer_instructions() { + assert_eq!( + DeveloperInstructions::from(SandboxMode::WorkspaceWrite), + DeveloperInstructions::new( + "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted." + ) + ); + + assert_eq!( + DeveloperInstructions::from(SandboxMode::ReadOnly), + DeveloperInstructions::new( + "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted." + ) + ); + } + + #[test] + fn builds_permissions_with_network_access_override() { + let instructions = DeveloperInstructions::from_permissions_with_network( + SandboxMode::WorkspaceWrite, + NetworkAccess::Enabled, + AskForApproval::OnRequest, + None, + ); + + let text = instructions.into_text(); + assert!( + text.contains("Network access is enabled."), + "expected network access to be enabled in message" + ); + assert!( + text.contains("`approval_policy` is `on-request`"), + "expected approval guidance to be included" + ); + } + + #[test] + fn builds_permissions_from_policy() { + let policy = SandboxPolicy::WorkspaceWrite { + writable_roots: vec![], + network_access: true, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }; + + let instructions = DeveloperInstructions::from_policy( + &policy, + AskForApproval::UnlessTrusted, + &PathBuf::from("/tmp"), + ); + let text = instructions.into_text(); + assert!(text.contains("Network access is enabled.")); + assert!(text.contains("`approval_policy` is `unless-trusted`")); + } + #[test] fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { diff --git a/codex-rs/protocol/src/prompts/permissions/approval_policy/never.md b/codex-rs/protocol/src/prompts/permissions/approval_policy/never.md new file mode 100644 index 00000000000..be8fe34e64e --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/approval_policy/never.md @@ -0,0 +1 @@ + Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `never`: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding. diff --git a/codex-rs/protocol/src/prompts/permissions/approval_policy/on_failure.md b/codex-rs/protocol/src/prompts/permissions/approval_policy/on_failure.md new file mode 100644 index 00000000000..7ee26dbd494 --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/approval_policy/on_failure.md @@ -0,0 +1 @@ +Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-failure`: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox. diff --git a/codex-rs/protocol/src/prompts/permissions/approval_policy/on_request.md b/codex-rs/protocol/src/prompts/permissions/approval_policy/on_request.md new file mode 100644 index 00000000000..16c90e8012f --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/approval_policy/on_request.md @@ -0,0 +1,12 @@ + Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-request`: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task. + +Here are scenarios where you'll need to request approval: +- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var) +- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files. +- You are running sandboxed and need to run a command that requires network access (e.g. installing packages) +- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command. +- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for. + +When requesting approval to execute a command that will require escalated privileges: + - Provide the `sandbox_permissions` parameter with the value `"require_escalated"` + - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter \ No newline at end of file diff --git a/codex-rs/protocol/src/prompts/permissions/approval_policy/unless_trusted.md b/codex-rs/protocol/src/prompts/permissions/approval_policy/unless_trusted.md new file mode 100644 index 00000000000..039f7026568 --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/approval_policy/unless_trusted.md @@ -0,0 +1 @@ + Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `unless-trusted`: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands. diff --git a/codex-rs/protocol/src/prompts/permissions/sandbox_mode/danger_full_access.md b/codex-rs/protocol/src/prompts/permissions/sandbox_mode/danger_full_access.md new file mode 100644 index 00000000000..4a5cfa9fb14 --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/sandbox_mode/danger_full_access.md @@ -0,0 +1 @@ +Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `danger-full-access`: No filesystem sandboxing - all commands are permitted. Network access is {network_access}. diff --git a/codex-rs/protocol/src/prompts/permissions/sandbox_mode/read_only.md b/codex-rs/protocol/src/prompts/permissions/sandbox_mode/read_only.md new file mode 100644 index 00000000000..729264a11f1 --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/sandbox_mode/read_only.md @@ -0,0 +1 @@ +Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is {network_access}. diff --git a/codex-rs/protocol/src/prompts/permissions/sandbox_mode/workspace_write.md b/codex-rs/protocol/src/prompts/permissions/sandbox_mode/workspace_write.md new file mode 100644 index 00000000000..ae74b5f7628 --- /dev/null +++ b/codex-rs/protocol/src/prompts/permissions/sandbox_mode/workspace_write.md @@ -0,0 +1 @@ +Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is {network_access}.