feat: exec policy integration in shell mcp (#7609)

zhao-oai · bolinfest · web-flow · commit b1c918d8f72e · 2025-12-04T21:55:54.000-08:00
adding execpolicy support into the `posix` mcp

Co-authored-by: Michael Bolin &lt;mbolin@openai.com&gt;
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
@@ -97,7 +97,10 @@ mod user_shell_command;
 pub mod util;
 
 pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
+pub use command_safety::is_dangerous_command;
 pub use command_safety::is_safe_command;
+pub use exec_policy::ExecPolicyError;
+pub use exec_policy::load_exec_policy;
 pub use safety::get_platform_sandbox;
 pub use safety::set_windows_sandbox_enabled;
 // Re-export the protocol types from the standalone `codex-protocol` crate so existing
diff --git a/codex-rs/exec-server/Cargo.toml b/codex-rs/exec-server/Cargo.toml
@@ -24,6 +24,7 @@ anyhow = { workspace = true }
 async-trait = { workspace = true }
 clap = { workspace = true, features = ["derive"] }
 codex-core = { workspace = true }
+codex-execpolicy = { workspace = true }
 libc = { workspace = true }
 path-absolutize = { workspace = true }
 rmcp = { workspace = true, default-features = false, features = [
diff --git a/codex-rs/exec-server/src/posix.rs b/codex-rs/exec-server/src/posix.rs
@@ -57,8 +57,17 @@
 //!
 use std::path::Path;
 use std::path::PathBuf;
+use std::sync::Arc;
 
+use anyhow::Context as _;
 use clap::Parser;
+use codex_core::config::find_codex_home;
+use codex_core::is_dangerous_command::command_might_be_dangerous;
+use codex_execpolicy::Decision;
+use codex_execpolicy::Policy;
+use codex_execpolicy::RuleMatch;
+use rmcp::ErrorData as McpError;
+use tokio::sync::RwLock;
 use tracing_subscriber::EnvFilter;
 use tracing_subscriber::{self};
 
@@ -87,6 +96,11 @@ struct McpServerCli {
     /// Path to Bash that has been patched to support execve() wrapping.
     #[arg(long = "bash")]
     bash_path: Option<PathBuf>,
+
+    /// Preserve program paths when applying execpolicy (e.g., keep /usr/bin/echo instead of echo).
+    /// Note: this does change the actual program being run.
+    #[arg(long)]
+    preserve_program_paths: bool,
 }
 
 #[tokio::main]
@@ -113,13 +127,19 @@ pub async fn main_mcp_server() -> anyhow::Result<()> {
         Some(path) => path,
         None => mcp::get_bash_path()?,
     };
+    let policy = Arc::new(RwLock::new(load_exec_policy().await?));
 
     tracing::info!("Starting MCP server");
-    let service = mcp::serve(bash_path, execve_wrapper, dummy_exec_policy)
-        .await
-        .inspect_err(|e| {
-            tracing::error!("serving error: {:?}", e);
-        })?;
+    let service = mcp::serve(
+        bash_path,
+        execve_wrapper,
+        policy,
+        cli.preserve_program_paths,
+    )
+    .await
+    .inspect_err(|e| {
+        tracing::error!("serving error: {:?}", e);
+    })?;
 
     service.waiting().await?;
     Ok(())
@@ -146,26 +166,116 @@ pub async fn main_execve_wrapper() -> anyhow::Result<()> {
     std::process::exit(exit_code);
 }
 
-// TODO: replace with execpolicy
-
-fn dummy_exec_policy(file: &Path, argv: &[String], _workdir: &Path) -> ExecPolicyOutcome {
-    if file.ends_with("rm") {
-        ExecPolicyOutcome::Forbidden
-    } else if file.ends_with("git") {
-        ExecPolicyOutcome::Prompt {
-            run_with_escalated_permissions: false,
-        }
-    } else if file == Path::new("/opt/homebrew/bin/gh")
-        && let [_, arg1, arg2, ..] = argv
-        && arg1 == "issue"
-        && arg2 == "list"
-    {
-        ExecPolicyOutcome::Allow {
-            run_with_escalated_permissions: true,
+/// Decide how to handle an exec() call for a specific command.
+///
+/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
+/// `argv` is the argv, including the program name (`argv[0]`).
+pub(crate) fn evaluate_exec_policy(
+    policy: &Policy,
+    file: &Path,
+    argv: &[String],
+    preserve_program_paths: bool,
+) -> Result<ExecPolicyOutcome, McpError> {
+    let program_name = format_program_name(file, preserve_program_paths).ok_or_else(|| {
+        McpError::internal_error(
+            format!("failed to format program name for `{}`", file.display()),
+            None,
+        )
+    })?;
+    let command: Vec<String> = std::iter::once(program_name)
+        // Use the normalized program name instead of argv[0].
+        .chain(argv.iter().skip(1).cloned())
+        .collect();
+    let evaluation = policy.check(&command, &|cmd| {
+        if command_might_be_dangerous(cmd) {
+            Decision::Prompt
+        } else {
+            Decision::Allow
         }
+    });
+
+    // decisions driven by policy should run outside sandbox
+    let decision_driven_by_policy = evaluation.matched_rules.iter().any(|rule_match| {
+        !matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. })
+            && rule_match.decision() == evaluation.decision
+    });
+
+    Ok(match evaluation.decision {
+        Decision::Forbidden => ExecPolicyOutcome::Forbidden,
+        Decision::Prompt => ExecPolicyOutcome::Prompt {
+            run_with_escalated_permissions: decision_driven_by_policy,
+        },
+        Decision::Allow => ExecPolicyOutcome::Allow {
+            run_with_escalated_permissions: decision_driven_by_policy,
+        },
+    })
+}
+
+fn format_program_name(path: &Path, preserve_program_paths: bool) -> Option<String> {
+    if preserve_program_paths {
+        path.to_str().map(str::to_string)
     } else {
-        ExecPolicyOutcome::Allow {
-            run_with_escalated_permissions: false,
-        }
+        path.file_name()?.to_str().map(str::to_string)
+    }
+}
+
+async fn load_exec_policy() -> anyhow::Result<Policy> {
+    let codex_home = find_codex_home().context("failed to resolve codex_home for execpolicy")?;
+    codex_core::load_exec_policy(&codex_home)
+        .await
+        .map_err(anyhow::Error::from)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use codex_execpolicy::Decision;
+    use codex_execpolicy::Policy;
+    use pretty_assertions::assert_eq;
+    use std::path::Path;
+
+    #[test]
+    fn evaluate_exec_policy_uses_heuristics_for_dangerous_commands() {
+        let policy = Policy::empty();
+        let file = Path::new("/bin/rm");
+        let argv = vec!["rm".to_string(), "-rf".to_string(), "/".to_string()];
+
+        let outcome = evaluate_exec_policy(&policy, file, &argv, false).expect("policy evaluation");
+
+        assert_eq!(
+            outcome,
+            ExecPolicyOutcome::Prompt {
+                run_with_escalated_permissions: false
+            }
+        );
+    }
+
+    #[test]
+    fn evaluate_exec_policy_respects_preserve_program_paths() {
+        let mut policy = Policy::empty();
+        policy
+            .add_prefix_rule(
+                &[
+                    "/usr/local/bin/custom-cmd".to_string(),
+                    "--flag".to_string(),
+                ],
+                Decision::Allow,
+            )
+            .expect("policy rule should be added");
+        let file = Path::new("/usr/local/bin/custom-cmd");
+        let argv = vec![
+            "/usr/local/bin/custom-cmd".to_string(),
+            "--flag".to_string(),
+            "value".to_string(),
+        ];
+
+        let outcome = evaluate_exec_policy(&policy, file, &argv, true).expect("policy evaluation");
+
+        assert_eq!(
+            outcome,
+            ExecPolicyOutcome::Allow {
+                run_with_escalated_permissions: true
+            }
+        );
     }
 }
diff --git a/codex-rs/exec-server/src/posix/mcp.rs b/codex-rs/exec-server/src/posix/mcp.rs
@@ -8,6 +8,7 @@ use codex_core::MCP_SANDBOX_STATE_CAPABILITY;
 use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
 use codex_core::SandboxState;
 use codex_core::protocol::SandboxPolicy;
+use codex_execpolicy::Policy;
 use rmcp::ErrorData as McpError;
 use rmcp::RoleServer;
 use rmcp::ServerHandler;
@@ -27,7 +28,6 @@ use tracing::debug;
 
 use crate::posix::escalate_server::EscalateServer;
 use crate::posix::escalate_server::{self};
-use crate::posix::mcp_escalation_policy::ExecPolicy;
 use crate::posix::mcp_escalation_policy::McpEscalationPolicy;
 use crate::posix::stopwatch::Stopwatch;
 
@@ -78,18 +78,25 @@ pub struct ExecTool {
     tool_router: ToolRouter<ExecTool>,
     bash_path: PathBuf,
     execve_wrapper: PathBuf,
-    policy: ExecPolicy,
+    policy: Arc<RwLock<Policy>>,
+    preserve_program_paths: bool,
     sandbox_state: Arc<RwLock<Option<SandboxState>>>,
 }
 
 #[tool_router]
 impl ExecTool {
-    pub fn new(bash_path: PathBuf, execve_wrapper: PathBuf, policy: ExecPolicy) -> Self {
+    pub fn new(
+        bash_path: PathBuf,
+        execve_wrapper: PathBuf,
+        policy: Arc<RwLock<Policy>>,
+        preserve_program_paths: bool,
+    ) -> Self {
         Self {
             tool_router: Self::tool_router(),
             bash_path,
             execve_wrapper,
             policy,
+            preserve_program_paths,
             sandbox_state: Arc::new(RwLock::new(None)),
         }
     }
@@ -121,7 +128,12 @@ impl ExecTool {
         let escalate_server = EscalateServer::new(
             self.bash_path.clone(),
             self.execve_wrapper.clone(),
-            McpEscalationPolicy::new(self.policy, context, stopwatch.clone()),
+            McpEscalationPolicy::new(
+                self.policy.clone(),
+                context,
+                stopwatch.clone(),
+                self.preserve_program_paths,
+            ),
         );
 
         let result = escalate_server
@@ -198,9 +210,10 @@ impl ServerHandler for ExecTool {
 pub(crate) async fn serve(
     bash_path: PathBuf,
     execve_wrapper: PathBuf,
-    policy: ExecPolicy,
+    policy: Arc<RwLock<Policy>>,
+    preserve_program_paths: bool,
 ) -> Result<RunningService<RoleServer, ExecTool>, rmcp::service::ServerInitializeError> {
-    let tool = ExecTool::new(bash_path, execve_wrapper, policy);
+    let tool = ExecTool::new(bash_path, execve_wrapper, policy, preserve_program_paths);
     tool.serve(stdio()).await
 }
 
diff --git a/codex-rs/exec-server/src/posix/mcp_escalation_policy.rs b/codex-rs/exec-server/src/posix/mcp_escalation_policy.rs
@@ -1,5 +1,6 @@
 use std::path::Path;
 
+use codex_execpolicy::Policy;
 use rmcp::ErrorData as McpError;
 use rmcp::RoleServer;
 use rmcp::model::CreateElicitationRequestParam;
@@ -11,15 +12,10 @@ use rmcp::service::RequestContext;
 use crate::posix::escalate_protocol::EscalateAction;
 use crate::posix::escalation_policy::EscalationPolicy;
 use crate::posix::stopwatch::Stopwatch;
+use std::sync::Arc;
+use tokio::sync::RwLock;
 
-/// This is the policy which decides how to handle an exec() call.
-///
-/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
-/// `argv` is the argv, including the program name (`argv[0]`).
-/// `workdir` is the absolute, canonical path to the working directory in which to execute the
-/// command.
-pub(crate) type ExecPolicy = fn(file: &Path, argv: &[String], workdir: &Path) -> ExecPolicyOutcome;
-
+#[derive(Debug, PartialEq, Eq)]
 pub(crate) enum ExecPolicyOutcome {
     Allow {
         run_with_escalated_permissions: bool,
@@ -33,21 +29,25 @@ pub(crate) enum ExecPolicyOutcome {
 /// ExecPolicy with access to the MCP RequestContext so that it can leverage
 /// elicitations.
 pub(crate) struct McpEscalationPolicy {
-    policy: ExecPolicy,
+    /// In-memory execpolicy rules that drive how to handle an exec() call.
+    policy: Arc<RwLock<Policy>>,
     context: RequestContext<RoleServer>,
     stopwatch: Stopwatch,
+    preserve_program_paths: bool,
 }
 
 impl McpEscalationPolicy {
     pub(crate) fn new(
-        policy: ExecPolicy,
+        policy: Arc<RwLock<Policy>>,
         context: RequestContext<RoleServer>,
         stopwatch: Stopwatch,
+        preserve_program_paths: bool,
     ) -> Self {
         Self {
             policy,
             context,
             stopwatch,
+            preserve_program_paths,
         }
     }
 
@@ -103,7 +103,9 @@ impl EscalationPolicy for McpEscalationPolicy {
         argv: &[String],
         workdir: &Path,
     ) -> Result<EscalateAction, rmcp::ErrorData> {
-        let outcome = (self.policy)(file, argv, workdir);
+        let policy = self.policy.read().await;
+        let outcome =
+            crate::posix::evaluate_exec_policy(&policy, file, argv, self.preserve_program_paths)?;
         let action = match outcome {
             ExecPolicyOutcome::Allow {
                 run_with_escalated_permissions,