From 48ae1f5b0c87e88e5adb16e5fc8a20631ac5ce47 Mon Sep 17 00:00:00 2001
From: MiguelHzBz <91468655+MiguelHzBz@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:17:25 +0200
Subject: [PATCH 1/2] feat(rules): add sandbox disable detection rules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds rules/default/sandbox-disable.yaml with 5 rules covering all
known bypass techniques against the original 2-rule design:

Rule A (CRITICAL/deny): blocks Write/Edit to agent sandbox config files
  with content that disables sandbox. Hardened against:
  - Edit value-only diff: "enabled":true→false without "sandbox" in diff
    (is_sandbox_disable_value_change catches "enabled"+"false" on config files)
  - Numeric zero: "enabled":0 — JS falsy, not the string "false"
    (is_sandbox_disable_value_zero catches "enabled"+":0"/": 0")
  - settings.local.json: same schema as settings.json, not in original path list
  - Gemini "none"/"disabled" string values: valid disable values, not boolean false

Rule B (WARNING/ask): requires confirmation for Claude Code per-command
  dangerouslyDisableSandbox escape — the only detection layer for this event.

Rule C (CRITICAL/deny): Bash command writing sandbox-disable content to
  an agent settings file — covers echo >, tee, python3 open(), etc.

Rule D (CRITICAL/deny): Codex CLI sandbox bypass flags —
  --dangerously-bypass-approvals-and-sandbox, --sandbox danger-full-access

Rule E (CRITICAL/deny): GEMINI_SANDBOX env var set to a disabling value —
  GEMINI_SANDBOX=none/false/disabled disables Docker isolation without any
  file write, invisible to all previous rules.

Includes:
- test_sandbox_rules.sh: 15 tests for core rule behavior
- test_sandbox_bypass.sh: 30 tests proving bypasses are now caught
---
 rules/default/sandbox-disable.yaml | 335 ++++++++++++++++++
 tests/test_sandbox_bypass.sh       | 523 +++++++++++++++++++++++++++++
 tests/test_sandbox_rules.sh        | 332 ++++++++++++++++++
 3 files changed, 1190 insertions(+)
 create mode 100644 rules/default/sandbox-disable.yaml
 create mode 100644 tests/test_sandbox_bypass.sh
 create mode 100644 tests/test_sandbox_rules.sh

diff --git a/rules/default/sandbox-disable.yaml b/rules/default/sandbox-disable.yaml
new file mode 100644
index 0000000..528924b
--- /dev/null
+++ b/rules/default/sandbox-disable.yaml
@@ -0,0 +1,335 @@
+# Sandbox disable detection rules for AI coding agents.
+# Detects prompt injection attacks directing agents to disable their own
+# OS-level sandbox isolation, and per-command sandbox escape requests.
+#
+# Placed in rules/default/ as part of the default ruleset.
+#
+# Coverage:
+#   Rule A (DENY):  Agent writing sandbox-disable content to its own config file
+#                   — Claude Code (sandbox.enabled: false/0 in settings.json or
+#                     settings.local.json), Codex (danger-full-access in config.toml),
+#                     Gemini CLI (tools.sandbox: false/none/disabled in settings.json)
+#   Rule B (ASK):   Claude Code per-command escape via dangerouslyDisableSandbox
+#                   Bash tool parameter — the ONLY layer that can detect this
+#                   since it is a tool call parameter invisible to Falco syscalls.
+#   Rule C (DENY):  Bash command writing sandbox-disable content to an agent
+#                   settings file — catches echo/python/tee/cat redirection that
+#                   bypasses the Write/Edit tool check entirely.
+#   Rule D (DENY):  Codex CLI startup sandbox bypass flags via Bash
+#                   — --dangerously-bypass-approvals-and-sandbox, --sandbox danger-full-access
+#   Rule E (DENY):  GEMINI_SANDBOX environment variable set to a disabling value
+#                   — env var disable bypasses file writes and dangerouslyDisableSandbox
+#
+# Bypass techniques closed by the hardened macros:
+#   - Edit value-only diff: "enabled":true → "enabled":false without "sandbox" in diff
+#     → is_sandbox_disable_value_change catches "enabled" + "false" on sandbox config files
+#   - Numeric zero: "enabled": 0 (JS falsy) instead of boolean false
+#     → is_sandbox_disable_value_zero catches "enabled" + ":0" / ": 0"
+#   - settings.local.json not in original path list
+#     → is_agent_sandbox_config now includes "/.claude/settings.local.json"
+#   - Gemini "sandbox":"none" / "disabled" string values
+#     → is_sandbox_disable_gemini_string catches "sandbox" + "none" / "disabled"
+#   - Bash write (echo >, tee, python3 open): bypasses Write/Edit rule
+#     → Rule C adds Bash + settings path + sandbox-disable content check
+#   - Codex CLI flags (--dangerously-bypass-approvals-and-sandbox, --sandbox danger-full-access)
+#     → Rule D covers Codex startup bypass flags
+#   - GEMINI_SANDBOX=none/false/disabled env var: invisible to file-write rules
+#     → Rule E covers the Gemini env var disable path
+#
+# Known gaps (not detectable via tool call inspection):
+#   - Unicode escape sequences in JSON keys: "s\u0061ndbox" parses to "sandbox" at
+#     runtime but the raw tool.input string contains \u0061, not a. Requires a
+#     pre-processing decode step not available in Falco condition evaluation.
+#   - Indirect/deferred writes via opaque shell scripts: if the agent writes a
+#     script that disables sandbox and then executes it, the write event contains
+#     no sandbox keywords. Covered partially by the tmp-staging rules in threat_rules.yaml.
+#
+# Official sandbox docs:
+#   Claude Code : https://code.claude.com/docs/sandboxing
+#   Codex       : https://developers.openai.com/codex/concepts/sandboxing
+#   Gemini CLI  : https://geminicli.com/docs/cli/sandbox/
+
+# ---------------------------------------------------------------------------
+# Macros: file path matching
+# ---------------------------------------------------------------------------
+
+# Matches agent sandbox configuration file paths.
+# Uses endswith on tool.real_file_path (canonicalized, absolute path).
+# HARDENED: includes settings.local.json (same schema as settings.json,
+# loaded by Claude Code with equal priority — original rule missed this path).
+- macro: is_agent_sandbox_config
+  condition: >
+    tool.real_file_path endswith "/.claude/settings.json"
+    or tool.real_file_path endswith "/.claude/settings.local.json"
+    or tool.real_file_path endswith "/.codex/config.toml"
+    or tool.real_file_path endswith "/.gemini/settings.json"
+
+# ---------------------------------------------------------------------------
+# Macros: Write/Edit content patterns (Rule A)
+# ---------------------------------------------------------------------------
+
+# Codex: sandbox_mode = "danger-full-access" in config.toml
+- macro: is_sandbox_disable_codex
+  condition: tool.input contains "danger-full-access"
+
+# Claude Code / Gemini: "sandbox": {"enabled": false} or "tools": {"sandbox": false}
+# Also catches Edit diffs where the file path contains "sandbox" in surrounding context.
+- macro: is_sandbox_disable_enabled_false
+  condition: >
+    tool.input contains "sandbox"
+    and tool.input contains "false"
+
+# Gemini: "security": {"toolSandboxing": false}
+- macro: is_sandbox_toolsandboxing_false
+  condition: >
+    tool.input contains "toolSandboxing"
+    and tool.input contains "false"
+
+# Claude Code: "allowUnsandboxedCommands": true (enables the per-command escape globally)
+- macro: is_sandbox_allow_unsandboxed
+  condition: >
+    tool.input contains "allowUnsandboxedCommands"
+    and tool.input contains "true"
+
+# HARDENED: Edit value-only diff bypass.
+# When an Edit changes "enabled":true → "enabled":false, the "sandbox" key is
+# in the surrounding file context but NOT in old_string or new_string. Since
+# tool.input for Edit contains only the diff strings, is_sandbox_disable_enabled_false
+# never fires. This macro catches the value-change alone on a sandbox config file.
+- macro: is_sandbox_disable_value_change
+  condition: >
+    tool.input contains "enabled"
+    and tool.input contains "false"
+
+# HARDENED: Numeric zero bypass.
+# JavaScript (Node.js) evaluates `if (settings.sandbox.enabled)` as falsy for
+# the integer 0. Writing "enabled": 0 bypasses rules that only check for "false".
+- macro: is_sandbox_disable_value_zero
+  condition: >
+    tool.input contains "enabled"
+    and (tool.input contains ":0" or tool.input contains ": 0")
+
+# HARDENED: Gemini string value bypass.
+# Gemini CLI accepts "sandbox": "none" and "sandbox": "disabled" to disable
+# Docker-per-tool-call isolation. These string values are not the boolean false
+# and were not caught by is_sandbox_disable_enabled_false.
+- macro: is_sandbox_disable_gemini_none
+  condition: >
+    tool.input contains "sandbox"
+    and tool.input contains "none"
+
+- macro: is_sandbox_disable_gemini_disabled
+  condition: >
+    tool.input contains "sandbox"
+    and tool.input contains "disabled"
+
+# Aggregate content condition for Rule A.
+- macro: is_sandbox_disable_content
+  condition: >
+    is_sandbox_disable_codex
+    or is_sandbox_disable_enabled_false
+    or is_sandbox_toolsandboxing_false
+    or is_sandbox_allow_unsandboxed
+    or is_sandbox_disable_value_change
+    or is_sandbox_disable_value_zero
+    or is_sandbox_disable_gemini_none
+    or is_sandbox_disable_gemini_disabled
+
+# ---------------------------------------------------------------------------
+# Macros: Bash-level path and content detection (Rules C, D, E)
+# ---------------------------------------------------------------------------
+
+# Matches Bash commands that reference an agent settings file path.
+# Covers full paths, expanduser results, $HOME expansion, and relative substrings.
+- macro: is_bash_sandbox_settings_path
+  condition: >
+    tool.input_command contains ".claude/settings.json"
+    or tool.input_command contains ".claude/settings.local.json"
+    or tool.input_command contains ".codex/config.toml"
+    or tool.input_command contains ".gemini/settings.json"
+
+# Helper macros for Bash sandbox-disable content detection.
+# Split into separate macros to avoid (A and B) inside or-chains.
+- macro: is_bash_disable_codex
+  condition: tool.input_command contains "danger-full-access"
+
+- macro: is_bash_disable_sandbox_false
+  condition: >
+    tool.input_command contains "sandbox"
+    and tool.input_command contains "false"
+
+- macro: is_bash_disable_toolsandboxing_false
+  condition: >
+    tool.input_command contains "toolSandboxing"
+    and tool.input_command contains "false"
+
+- macro: is_bash_disable_allow_unsandboxed
+  condition: >
+    tool.input_command contains "allowUnsandboxedCommands"
+    and tool.input_command contains "true"
+
+- macro: is_bash_disable_sandbox_none
+  condition: >
+    tool.input_command contains "sandbox"
+    and tool.input_command contains "none"
+
+- macro: is_bash_disable_sandbox_disabled
+  condition: >
+    tool.input_command contains "sandbox"
+    and tool.input_command contains "disabled"
+
+# Aggregate: any sandbox-disable content in a Bash command string.
+- macro: is_bash_sandbox_disable_cmd
+  condition: >
+    is_bash_disable_codex
+    or is_bash_disable_sandbox_false
+    or is_bash_disable_toolsandboxing_false
+    or is_bash_disable_allow_unsandboxed
+    or is_bash_disable_sandbox_none
+    or is_bash_disable_sandbox_disabled
+
+# Codex CLI bypass flags (Rule D).
+- macro: is_codex_bypass_flag
+  condition: tool.input_command contains "dangerously-bypass-approvals-and-sandbox"
+
+- macro: is_codex_danger_flag
+  condition: >
+    tool.input_command contains "codex"
+    and tool.input_command contains "danger-full-access"
+
+- macro: is_codex_sandbox_bypass
+  condition: >
+    is_codex_bypass_flag
+    or is_codex_danger_flag
+
+# GEMINI_SANDBOX env var bypass helpers (Rule E).
+- macro: is_gemini_env_none
+  condition: >
+    tool.input_command contains "GEMINI_SANDBOX"
+    and tool.input_command contains "none"
+
+- macro: is_gemini_env_false
+  condition: >
+    tool.input_command contains "GEMINI_SANDBOX"
+    and tool.input_command contains "false"
+
+- macro: is_gemini_env_disabled
+  condition: >
+    tool.input_command contains "GEMINI_SANDBOX"
+    and tool.input_command contains "disabled"
+
+- macro: is_gemini_sandbox_env_bypass
+  condition: >
+    is_gemini_env_none
+    or is_gemini_env_false
+    or is_gemini_env_disabled
+
+# ---------------------------------------------------------------------------
+# Rules
+# ---------------------------------------------------------------------------
+
+- rule: Deny agent writing sandbox-disable configuration
+  desc: >
+    Blocks a coding agent from writing or editing its own sandbox configuration
+    file with content that disables OS-level process isolation. Claude Code uses
+    "sandbox.enabled: false" in settings.json or settings.local.json to disable
+    bubblewrap (Linux) or Seatbelt (macOS); Codex uses "sandbox_mode =
+    danger-full-access" in config.toml to disable landlock and seccomp; Gemini
+    CLI uses "tools.sandbox: false/none/disabled" in settings.json to disable
+    Docker-per-tool-call isolation. This pattern indicates a prompt injection
+    attack (AML.T0051) directing the agent to remove its own security boundaries
+    — an LLM jailbreak (AML.T0054) enabling subsequent tool calls to run
+    unsandboxed on the host. Fires at PreToolUse, before the write executes.
+  condition: >
+    tool.name in ("Write", "Edit")
+    and tool.real_file_path != ""
+    and is_agent_sandbox_config
+    and is_sandbox_disable_content
+  output: >
+    Falco blocked %agent.name from disabling its sandbox by writing to %tool.real_file_path
+  priority: CRITICAL
+  source: coding_agent
+  tags: [coding_agent_deny, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
+         mitre_t1562.001_impair_defenses_disable_or_modify_tools]
+
+- rule: Ask before Claude Code per-command sandbox escape
+  desc: >
+    Requires user confirmation when Claude Code invokes the Bash tool with the
+    dangerouslyDisableSandbox parameter, bypassing bubblewrap for a single command.
+    This is Claude Code's built-in escape mechanism used when sandbox restrictions
+    block a failing command (e.g., Docker is incompatible with the sandbox by design).
+    While sometimes legitimate, it is also the primary vector for prompt injection
+    attacks that instruct Claude to run specific commands outside the sandbox.
+    This rule is the ONLY detection layer for this event — dangerouslyDisableSandbox
+    is a Bash tool call parameter invisible to Falco at the syscall level.
+    Uses coding_agent_ask (not deny) since legitimate uses exist.
+  condition: >
+    tool.name = "Bash"
+    and tool.input contains "dangerouslyDisableSandbox"
+  output: >
+    Falco requires confirmation before Claude Code runs an unsandboxed Bash command (%tool.input_command)
+  priority: WARNING
+  source: coding_agent
+  tags: [coding_agent_ask, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
+         mitre_t1562.001_impair_defenses_disable_or_modify_tools]
+
+- rule: Deny Bash command writing sandbox-disable content to agent settings file
+  desc: >
+    Blocks Bash commands that combine a reference to an agent sandbox settings
+    file path with sandbox-disabling content. This closes the bypass where an
+    agent uses shell redirection (echo >, tee, python3 open()) to write the same
+    sandbox-disabling payload that the Write/Edit tool rule (Rule A) would catch.
+    Since Bash is the Swiss Army knife of file writes, this rule covers the
+    entire class: echo, tee, cat heredoc, python3 -c, node -e, sed -i, etc.
+    The path check (tool.input_command contains ".claude/settings.json") anchors
+    the rule to the sandbox config files; the content check filters for the same
+    disable patterns used in Rule A.
+  condition: >
+    tool.name = "Bash"
+    and is_bash_sandbox_settings_path
+    and is_bash_sandbox_disable_cmd
+  output: >
+    Falco blocked %agent.name from disabling its sandbox via Bash command: %tool.input_command
+  priority: CRITICAL
+  source: coding_agent
+  tags: [coding_agent_deny, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
+         mitre_t1562.001_impair_defenses_disable_or_modify_tools]
+
+- rule: Deny Codex CLI sandbox bypass flag
+  desc: >
+    Blocks Bash invocations of the Codex CLI with flags that bypass sandbox
+    isolation. "--dangerously-bypass-approvals-and-sandbox" disables all
+    landlock/seccomp enforcement and approval prompts. "--sandbox danger-full-access"
+    starts Codex in danger-full-access mode, granting unrestricted filesystem and
+    network access. Neither of these involves a file write — they are startup flags
+    that configure the Codex sandbox at process initialization, invisible to Rule A.
+    Fires at PreToolUse before the Codex process starts.
+  condition: >
+    tool.name = "Bash"
+    and is_codex_sandbox_bypass
+  output: >
+    Falco blocked %agent.name from starting Codex with a sandbox bypass flag (%tool.input_command)
+  priority: CRITICAL
+  source: coding_agent
+  tags: [coding_agent_deny, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
+         mitre_t1562.001_impair_defenses_disable_or_modify_tools]
+
+- rule: Deny Gemini CLI sandbox disable via environment variable
+  desc: >
+    Blocks Bash commands that set GEMINI_SANDBOX to a disabling value (none,
+    false, disabled) before invoking Gemini CLI. Gemini reads GEMINI_SANDBOX at
+    startup to select its sandbox backend — setting it to a non-Docker/Podman
+    value disables Docker-per-tool-call isolation without writing any settings
+    file. This vector bypasses Rule A entirely (no file write) and is not covered
+    by Rule B (no dangerouslyDisableSandbox parameter). Inline env assignment
+    (GEMINI_SANDBOX=none gemini) and exported assignment (export GEMINI_SANDBOX=none)
+    are both caught via the tool.input_command string.
+  condition: >
+    tool.name = "Bash"
+    and is_gemini_sandbox_env_bypass
+  output: >
+    Falco blocked %agent.name from disabling Gemini sandbox via environment variable (%tool.input_command)
+  priority: CRITICAL
+  source: coding_agent
+  tags: [coding_agent_deny, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
+         mitre_t1562.001_impair_defenses_disable_or_modify_tools]
diff --git a/tests/test_sandbox_bypass.sh b/tests/test_sandbox_bypass.sh
new file mode 100644
index 0000000..fc6fbda
--- /dev/null
+++ b/tests/test_sandbox_bypass.sh
@@ -0,0 +1,523 @@
+#!/usr/bin/env bash
+#
+# Bypass tests for the sandbox-disable rule suite (rules/user/sandbox-disable.yaml).
+#
+# Each test demonstrates an evasion technique that bypassed the original rules.
+# All tests run against the HARDENED rules and assert that the bypass is now caught.
+# Comments on each test explain why the original rule missed it.
+#
+# Bypasses covered:
+#   B1  Edit value-only diff         "enabled":true → "enabled":false — "sandbox" not in diff
+#   B2  Numeric zero                 "enabled": 0 — rule checked contains "false", not 0
+#   B3  settings.local.json          not in original is_agent_sandbox_config path list
+#   B4  Gemini "none" string         valid disable value, not "false"
+#   B5  Gemini "disabled" string     same — string value, not boolean false
+#   B6  Bash echo write              echo > settings.json bypasses Write/Edit rule
+#   B7  Bash python write            python3 -c ... open(settings.json) bypasses Write/Edit rule
+#   B8  Codex --dangerously-bypass   CLI flag, not a file write
+#   B9  Codex --sandbox flag         codex --sandbox danger-full-access CLI flag
+#   B10 GEMINI_SANDBOX=none          env var, not a file write, not dangerouslyDisableSandbox
+#   B11 GEMINI_SANDBOX=false         same
+#   B12 GEMINI_SANDBOX=disabled      same
+#
+# Requires: Falco 0.43+, built plugin (.so/.dylib), built interceptor.
+# Run on EC2 Ubuntu 22.04 or isolated Docker. Do NOT run locally on macOS.
+#
+# Usage:
+#   bash tests/test_sandbox_bypass.sh
+#
+set -uo pipefail
+
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+ROOT_DIR="$(cd -- "$SCRIPT_DIR/.." &>/dev/null && pwd)"
+
+# --- Binary discovery ---
+case "$(uname -s)" in
+    Darwin) PLUGIN_EXT="dylib" ;;
+    *)      PLUGIN_EXT="so" ;;
+esac
+
+HOOK="${HOOK:-}"
+if [[ -z "$HOOK" ]]; then
+    if [[ -x "${ROOT_DIR}/hooks/claude-code/target/release/claude-interceptor" ]]; then
+        HOOK="${ROOT_DIR}/hooks/claude-code/target/release/claude-interceptor"
+    elif [[ -x "${HOME}/.coding-agents-kit/bin/claude-interceptor" ]]; then
+        HOOK="${HOME}/.coding-agents-kit/bin/claude-interceptor"
+    fi
+fi
+
+PLUGIN_LIB="${PLUGIN_LIB:-}"
+if [[ -z "$PLUGIN_LIB" ]]; then
+    if [[ -f "${ROOT_DIR}/plugins/coding-agent-plugin/target/release/libcoding_agent_plugin.${PLUGIN_EXT}" ]]; then
+        PLUGIN_LIB="${ROOT_DIR}/plugins/coding-agent-plugin/target/release/libcoding_agent_plugin.${PLUGIN_EXT}"
+    elif [[ -f "${HOME}/.coding-agents-kit/share/libcoding_agent_plugin.${PLUGIN_EXT}" ]]; then
+        PLUGIN_LIB="${HOME}/.coding-agents-kit/share/libcoding_agent_plugin.${PLUGIN_EXT}"
+    fi
+fi
+
+RULES_FILE="${ROOT_DIR}/rules/default/sandbox-disable.yaml"
+SEEN_FILE="${ROOT_DIR}/rules/seen.yaml"
+
+E2E_DIR="${ROOT_DIR}/build/e2e-sandbox-bypass-$$"
+mkdir -p "$E2E_DIR"
+SOCK="${E2E_DIR}/broker.sock"
+HTTP_PORT=$((24000 + ($$ % 1000)))
+PASS=0
+FAIL=0
+FALCO_PID=""
+
+# --- Preflight checks ---
+for bin in falco "$HOOK"; do
+    if [[ -z "$bin" ]] || ( [[ ! -x "$bin" ]] && ! command -v "$bin" &>/dev/null ); then
+        echo "ERROR: required binary not found (falco or interceptor)." >&2
+        echo "  Build interceptor: cd hooks/claude-code && cargo build --release" >&2
+        echo "  Install kit:       bash installers/linux/install.sh" >&2
+        exit 1
+    fi
+done
+if [[ -z "$PLUGIN_LIB" ]] || [[ ! -f "$PLUGIN_LIB" ]]; then
+    echo "ERROR: plugin library not found." >&2
+    echo "  Build it: cd plugins/coding-agent-plugin && cargo build --release" >&2
+    exit 1
+fi
+if [[ ! -f "$RULES_FILE" ]]; then
+    echo "ERROR: $RULES_FILE not found." >&2
+    exit 1
+fi
+if [[ ! -f "$SEEN_FILE" ]]; then
+    echo "ERROR: $SEEN_FILE not found." >&2
+    exit 1
+fi
+
+# --- Helpers ---
+cleanup() {
+    stop_falco
+    rm -rf "$E2E_DIR"
+}
+trap cleanup EXIT
+
+stop_falco() {
+    if [[ -n "$FALCO_PID" ]]; then
+        kill "$FALCO_PID" 2>/dev/null && wait "$FALCO_PID" 2>/dev/null
+        FALCO_PID=""
+    fi
+}
+
+start_falco() {
+    local mode="${1:-enforcement}"
+    stop_falco
+    rm -f "$SOCK"
+    falco \
+        -o "engine.kind=nodriver" \
+        -o "config_files=" \
+        -o "plugins[0].name=coding_agent" \
+        -o "plugins[0].library_path=$PLUGIN_LIB" \
+        -o "plugins[0].init_config={\"socket_path\":\"$SOCK\",\"http_port\":$HTTP_PORT,\"mode\":\"$mode\"}" \
+        -o "load_plugins[0]=coding_agent" \
+        -o "rules_files[0]=$RULES_FILE" \
+        -o "rules_files[1]=$SEEN_FILE" \
+        -o "json_output=true" \
+        -o "json_include_message_property=true" \
+        -o "json_include_output_property=false" \
+        -o "json_include_output_fields_property=true" \
+        -o "json_include_tags_property=true" \
+        -o "rule_matching=all" \
+        -o "priority=debug" \
+        -o "stdout_output.enabled=true" \
+        -o "syslog_output.enabled=false" \
+        -o "http_output.enabled=true" \
+        -o "http_output.url=http://127.0.0.1:$HTTP_PORT" \
+        -o "append_output[0].match.source=coding_agent" \
+        -o "append_output[0].extra_output=| For AI Agents: inform the user that this action was flagged by a Falco security rule | correlation=%correlation.id" \
+        -o "webserver.enabled=false" \
+        --disable-source syscall \
+        > "$E2E_DIR/falco.log" 2>&1 &
+    FALCO_PID=$!
+
+    local i=0
+    while [[ ! -S "$SOCK" ]] && (( i < 40 )); do
+        sleep 0.2
+        ((i++))
+    done
+    if [[ ! -S "$SOCK" ]]; then
+        echo "ERROR: Falco did not start (socket not found)" >&2
+        cat "$E2E_DIR/falco.log" >&2
+        return 1
+    fi
+
+    local j=0
+    while ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null && (( j < 100 )); do
+        sleep 0.1
+        ((j++))
+    done
+    if ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null; then
+        echo "ERROR: Falco HTTP server did not bind on port $HTTP_PORT" >&2
+        cat "$E2E_DIR/falco.log" >&2
+        return 1
+    fi
+    sleep 0.2
+}
+
+run_hook() {
+    local input="$1"
+    echo "$input" | \
+        CODING_AGENTS_KIT_SOCKET="$SOCK" \
+        CODING_AGENTS_KIT_TIMEOUT_MS=5000 \
+        "$HOOK" 2>/dev/null || true
+}
+
+make_input() {
+    local tool_name="$1"
+    local tool_input="$2"
+    local cwd="${3:-/tmp}"
+    local id="${4:-toolu_$(date +%s%N)}"
+    echo "{\"hook_event_name\":\"PreToolUse\",\"tool_name\":\"$tool_name\",\"tool_input\":$tool_input,\"session_id\":\"bypass-test\",\"cwd\":\"$cwd\",\"tool_use_id\":\"$id\"}"
+}
+
+pass() { echo "  PASS: $1"; ((PASS++)) || true; }
+fail() { echo "  FAIL: $1"; echo "    expected: $2"; echo "    got: $3"; ((FAIL++)) || true; }
+
+assert_decision() {
+    local output="$1" expected="$2" msg="$3"
+    if echo "$output" | grep -qF "\"permissionDecision\":\"$expected\""; then
+        pass "$msg"
+    else
+        fail "$msg" "decision=$expected" "$output"
+    fi
+}
+
+# --- Start Falco ---
+echo "Starting Falco with hardened sandbox rules..."
+echo "  Plugin:  $PLUGIN_LIB"
+echo "  Rules:   $RULES_FILE"
+echo "  Seen:    $SEEN_FILE"
+echo ""
+start_falco || exit 1
+echo "Falco running (PID=$FALCO_PID, socket=$SOCK, http=$HTTP_PORT)"
+echo ""
+
+# =============================================================================
+# B1: Edit with value-only diff
+#
+# Original bypass: Edit ~/.claude/settings.json changing only the value
+# "enabled":true → "enabled":false. The key "sandbox" does not appear in
+# old_string or new_string, so is_sandbox_disable_enabled_false (which
+# requires both "sandbox" and "false" in tool.input) never fired.
+#
+# Fix: is_sandbox_disable_value_change catches tool.input containing
+# "enabled" and "false" on any agent sandbox config file.
+# =============================================================================
+echo "=== B1: Edit value-only diff (sandbox key absent from diff) ==="
+
+out=$(run_hook "$(make_input Edit \
+    '{"file_path":"/home/user/.claude/settings.json","old_string":"\"enabled\":true","new_string":"\"enabled\":false"}' \
+    /tmp toolu_b1a)")
+assert_decision "$out" "deny" "B1a: Edit enabled:true→false on settings.json — sandbox key absent from diff"
+
+# Same bypass on Codex config (TOML edit, value-only)
+out=$(run_hook "$(make_input Edit \
+    '{"file_path":"/home/user/.codex/config.toml","old_string":"enabled = true","new_string":"enabled = false"}' \
+    /tmp toolu_b1b)")
+assert_decision "$out" "deny" "B1b: Edit enabled=true→false on config.toml — sandbox key absent from diff"
+
+echo ""
+
+# =============================================================================
+# B2: Numeric zero instead of boolean false
+#
+# Original bypass: Write {"sandbox":{"enabled": 0}} to settings.json.
+# JavaScript (Node.js) treats 0 as falsy, so Claude Code sandbox check
+# `if (settings.sandbox.enabled)` evaluates false. The rule checked
+# contains "false" — the digit 0 is not the string "false".
+#
+# Fix: is_sandbox_disable_value_zero catches "enabled" + ":0" or ": 0".
+# =============================================================================
+echo "=== B2: Numeric 0 instead of boolean false ==="
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\":{\"enabled\":0}}"}' \
+    /tmp toolu_b2a)")
+assert_decision "$out" "deny" "B2a: Write sandbox.enabled:0 to settings.json (JS falsy, not string false)"
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\": {\"enabled\": 0}}"}' \
+    /tmp toolu_b2b)")
+assert_decision "$out" "deny" "B2b: Write sandbox.enabled: 0 (space before 0) to settings.json"
+
+echo ""
+
+# =============================================================================
+# B3: Write to settings.local.json
+#
+# Original bypass: Claude Code reads ~/.claude/settings.local.json with the
+# same priority as settings.json. The original is_agent_sandbox_config only
+# listed settings.json — settings.local.json was not covered.
+#
+# Fix: is_agent_sandbox_config now includes "/.claude/settings.local.json".
+# =============================================================================
+echo "=== B3: settings.local.json not in original path list ==="
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.local.json","content":"{\"sandbox\":{\"enabled\":false}}"}' \
+    /tmp toolu_b3a)")
+assert_decision "$out" "deny" "B3a: Write sandbox.enabled:false to settings.local.json"
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.local.json","content":"{\"allowUnsandboxedCommands\":true}"}' \
+    /tmp toolu_b3b)")
+assert_decision "$out" "deny" "B3b: Write allowUnsandboxedCommands:true to settings.local.json"
+
+# Safe: project-level settings.local.json inside working directory still caught
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/project/.claude/settings.local.json","content":"{\"sandbox\":{\"enabled\":false}}"}' \
+    /tmp toolu_b3c)")
+assert_decision "$out" "deny" "B3c: Write sandbox.enabled:false to project settings.local.json"
+
+echo ""
+
+# =============================================================================
+# B4: Gemini "none" / "disabled" string values
+#
+# Original bypass: Gemini CLI accepts "sandbox": "none" (and "disabled") to
+# disable Docker-per-tool-call isolation. The original rules only checked for
+# the boolean false — the string variants were not covered.
+#
+# Fix: is_sandbox_disable_gemini_string catches "sandbox" + "none" or "disabled".
+# =============================================================================
+echo "=== B4: Gemini string disable values (none, disabled) ==="
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"none\"}"}' \
+    /tmp toolu_b4a)")
+assert_decision "$out" "deny" "B4a: Write sandbox:none to Gemini settings.json"
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"tools\":{\"sandbox\":\"none\"}}"}' \
+    /tmp toolu_b4b)")
+assert_decision "$out" "deny" "B4b: Write tools.sandbox:none to Gemini settings.json"
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"disabled\"}"}' \
+    /tmp toolu_b4c)")
+assert_decision "$out" "deny" "B4c: Write sandbox:disabled to Gemini settings.json"
+
+echo ""
+
+# =============================================================================
+# B5: Bypass B4 — safe Gemini sandbox values (must not fire)
+# Confirm that enabling sandbox values still allowed after the hardening.
+# =============================================================================
+echo "=== B4-safe: Gemini sandbox enable values not blocked ==="
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"docker\"}"}' \
+    /tmp toolu_b5_safe_docker)")
+assert_decision "$out" "allow" "B4-safe: Write sandbox:docker to Gemini settings.json allowed"
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"podman\"}"}' \
+    /tmp toolu_b5_safe_podman)")
+assert_decision "$out" "allow" "B4-safe: Write sandbox:podman to Gemini settings.json allowed"
+
+echo ""
+
+# =============================================================================
+# B6: Bash echo write to settings file
+#
+# Original bypass: Write/Edit rule only intercepts the dedicated Write and Edit
+# tool calls. An agent can write the same content using Bash with shell
+# redirection (echo, tee, cat heredoc). The Bash rule only checked for
+# "dangerouslyDisableSandbox" — no file-write + content check existed.
+#
+# Fix: Rule C detects Bash commands that reference an agent settings file path
+# AND contain sandbox-disable content.
+# =============================================================================
+echo "=== B6: Bash write via shell redirection ==="
+
+# echo redirect
+out=$(run_hook "$(make_input Bash \
+    '{"command":"echo '"'"'{\"sandbox\":{\"enabled\":false}}'"'"' > /home/user/.claude/settings.json"}' \
+    /tmp toolu_b6a)")
+assert_decision "$out" "deny" "B6a: echo sandbox:false > settings.json via Bash"
+
+# tee
+out=$(run_hook "$(make_input Bash \
+    '{"command":"echo '"'"'{\"sandbox\":{\"enabled\":false}}'"'"' | tee /home/user/.claude/settings.json"}' \
+    /tmp toolu_b6b)")
+assert_decision "$out" "deny" "B6b: echo sandbox:false | tee settings.json via Bash"
+
+# python3 open write
+out=$(run_hook "$(make_input Bash \
+    '{"command":"python3 -c \"open('/home/user/.claude/settings.json','w').write('{\\\"sandbox\\\":{\\\"enabled\\\":false}}')\""}' \
+    /tmp toolu_b6c)")
+assert_decision "$out" "deny" "B6c: python3 open() write sandbox:false to settings.json"
+
+# Codex config via Bash
+out=$(run_hook "$(make_input Bash \
+    '{"command":"echo '"'"'sandbox_mode = \"danger-full-access\"'"'"' >> /home/user/.codex/config.toml"}' \
+    /tmp toolu_b6d)")
+assert_decision "$out" "deny" "B6d: echo danger-full-access >> config.toml via Bash"
+
+# allowUnsandboxedCommands via Bash
+out=$(run_hook "$(make_input Bash \
+    '{"command":"echo '"'"'{\"allowUnsandboxedCommands\":true}'"'"' > /home/user/.claude/settings.json"}' \
+    /tmp toolu_b6e)")
+assert_decision "$out" "deny" "B6e: echo allowUnsandboxedCommands:true > settings.json via Bash"
+
+echo ""
+
+# =============================================================================
+# B6-safe: Bash commands that reference settings files but are read-only
+# =============================================================================
+echo "=== B6-safe: Bash read-only settings access not blocked ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"cat /home/user/.claude/settings.json"}' \
+    /tmp toolu_b6_safe_cat)")
+assert_decision "$out" "allow" "B6-safe: cat settings.json allowed (read-only)"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"grep sandbox /home/user/.claude/settings.json"}' \
+    /tmp toolu_b6_safe_grep)")
+assert_decision "$out" "allow" "B6-safe: grep sandbox settings.json allowed (no write + no disable content)"
+
+echo ""
+
+# =============================================================================
+# B7: Codex CLI startup sandbox bypass flags
+#
+# Original bypass: Codex accepts --dangerously-bypass-approvals-and-sandbox
+# and --sandbox danger-full-access as CLI flags. Neither involves writing a
+# file (bypasses Rule A) nor contains "dangerouslyDisableSandbox" (bypasses
+# Rule B). No Bash-level rule covered Codex CLI flags.
+#
+# Fix: Rule D detects Bash commands containing Codex sandbox bypass flags.
+# =============================================================================
+echo "=== B7: Codex CLI sandbox bypass flags ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"codex --dangerously-bypass-approvals-and-sandbox run script.py"}' \
+    /tmp toolu_b7a)")
+assert_decision "$out" "deny" "B7a: codex --dangerously-bypass-approvals-and-sandbox"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"codex --sandbox danger-full-access run script.py"}' \
+    /tmp toolu_b7b)")
+assert_decision "$out" "deny" "B7b: codex --sandbox danger-full-access"
+
+# Short alias if supported
+out=$(run_hook "$(make_input Bash \
+    '{"command":"codex -s danger-full-access run script.py"}' \
+    /tmp toolu_b7c)")
+assert_decision "$out" "deny" "B7c: codex -s danger-full-access (short flag)"
+
+echo ""
+
+# =============================================================================
+# B7-safe: Normal Codex invocations not blocked
+# =============================================================================
+echo "=== B7-safe: Normal Codex invocations not blocked ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"codex run script.py"}' \
+    /tmp toolu_b7_safe_run)")
+assert_decision "$out" "allow" "B7-safe: codex run script.py allowed"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"codex --sandbox workspace-write run script.py"}' \
+    /tmp toolu_b7_safe_workspace)")
+assert_decision "$out" "allow" "B7-safe: codex --sandbox workspace-write (safe mode) allowed"
+
+echo ""
+
+# =============================================================================
+# B8: GEMINI_SANDBOX env var set to a disabling value
+#
+# Original bypass: Gemini CLI reads the GEMINI_SANDBOX environment variable
+# to determine sandbox mode. Setting GEMINI_SANDBOX=none (or =false, =disabled)
+# in a Bash call disables Docker isolation without writing any file. This was
+# invisible to all existing rules — not a file write, not dangerouslyDisableSandbox.
+#
+# Fix: Rule E detects Bash commands that set GEMINI_SANDBOX to a disabling value.
+# =============================================================================
+echo "=== B8: GEMINI_SANDBOX env var set to disabling value ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=none gemini run my-agent.py"}' \
+    /tmp toolu_b8a)")
+assert_decision "$out" "deny" "B8a: GEMINI_SANDBOX=none disables Gemini Docker sandbox"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=false gemini run my-agent.py"}' \
+    /tmp toolu_b8b)")
+assert_decision "$out" "deny" "B8b: GEMINI_SANDBOX=false disables Gemini Docker sandbox"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=disabled gemini run my-agent.py"}' \
+    /tmp toolu_b8c)")
+assert_decision "$out" "deny" "B8c: GEMINI_SANDBOX=disabled disables Gemini Docker sandbox"
+
+# Exported env var
+out=$(run_hook "$(make_input Bash \
+    '{"command":"export GEMINI_SANDBOX=none && gemini run my-agent.py"}' \
+    /tmp toolu_b8d)")
+assert_decision "$out" "deny" "B8d: export GEMINI_SANDBOX=none"
+
+echo ""
+
+# =============================================================================
+# B8-safe: GEMINI_SANDBOX set to an enabling value not blocked
+# =============================================================================
+echo "=== B8-safe: GEMINI_SANDBOX enabling values not blocked ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=docker gemini run my-agent.py"}' \
+    /tmp toolu_b8_safe_docker)")
+assert_decision "$out" "allow" "B8-safe: GEMINI_SANDBOX=docker (enables sandbox) allowed"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=podman gemini run my-agent.py"}' \
+    /tmp toolu_b8_safe_podman)")
+assert_decision "$out" "allow" "B8-safe: GEMINI_SANDBOX=podman (enables sandbox) allowed"
+
+echo ""
+
+# =============================================================================
+# Summary
+# =============================================================================
+echo "================================================================="
+echo "  Results"
+echo "================================================================="
+echo ""
+echo "  PASS: $PASS"
+echo "  FAIL: $FAIL"
+echo ""
+
+echo "  Falco alerts fired during test:"
+echo "-----------------------------------------------------------------"
+grep -E '^\{.*"rule"' "$E2E_DIR/falco.log" \
+    | grep -v '"rule":"Coding Agent Event Seen"' \
+    | python3 -c "
+import sys, json
+for line in sys.stdin:
+    line = line.strip()
+    if not line: continue
+    try:
+        a = json.loads(line)
+        print(f\"  [{a.get('priority','?')}] {a.get('rule','?')}\")
+        print(f\"    {a.get('message', a.get('output',''))}\")
+        print()
+    except Exception:
+        pass
+" 2>/dev/null || grep -o '"rule":"[^"]*"' "$E2E_DIR/falco.log" | sort -u || true
+echo "-----------------------------------------------------------------"
+echo ""
+LOG_COPY="${ROOT_DIR}/build/sandbox-bypass-test-last.log"
+mkdir -p "${ROOT_DIR}/build"
+cp "$E2E_DIR/falco.log" "$LOG_COPY" 2>/dev/null || true
+echo "  Full log saved: $LOG_COPY"
+echo ""
+
+if (( FAIL > 0 )); then
+    exit 1
+fi
diff --git a/tests/test_sandbox_rules.sh b/tests/test_sandbox_rules.sh
new file mode 100644
index 0000000..062d178
--- /dev/null
+++ b/tests/test_sandbox_rules.sh
@@ -0,0 +1,332 @@
+#!/usr/bin/env bash
+#
+# Tests for the sandbox-disable rule suite (rules/default/sandbox-disable.yaml).
+# Validates two rules:
+#   - Deny agent writing sandbox-disable configuration  (CRITICAL/deny)
+#   - Ask before Claude Code per-command sandbox escape (WARNING/ask)
+#
+# Requires: Falco 0.43+, the built plugin (.so/.dylib), the built interceptor.
+#
+# Binary discovery order (each can be overridden via env var):
+#   1. Source build: hooks/claude-code/target/release/claude-interceptor
+#   2. Installed kit: ~/.coding-agents-kit/bin/claude-interceptor
+#
+# Usage:
+#   bash tests/test_sandbox_rules.sh
+#
+set -uo pipefail
+
+SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+ROOT_DIR="$(cd -- "$SCRIPT_DIR/.." &>/dev/null && pwd)"
+
+# --- Binary discovery ---
+case "$(uname -s)" in
+    Darwin) PLUGIN_EXT="dylib" ;;
+    *)      PLUGIN_EXT="so" ;;
+esac
+
+HOOK="${HOOK:-}"
+if [[ -z "$HOOK" ]]; then
+    if [[ -x "${ROOT_DIR}/hooks/claude-code/target/release/claude-interceptor" ]]; then
+        HOOK="${ROOT_DIR}/hooks/claude-code/target/release/claude-interceptor"
+    elif [[ -x "${HOME}/.coding-agents-kit/bin/claude-interceptor" ]]; then
+        HOOK="${HOME}/.coding-agents-kit/bin/claude-interceptor"
+    fi
+fi
+
+PLUGIN_LIB="${PLUGIN_LIB:-}"
+if [[ -z "$PLUGIN_LIB" ]]; then
+    if [[ -f "${ROOT_DIR}/plugins/coding-agent-plugin/target/release/libcoding_agent_plugin.${PLUGIN_EXT}" ]]; then
+        PLUGIN_LIB="${ROOT_DIR}/plugins/coding-agent-plugin/target/release/libcoding_agent_plugin.${PLUGIN_EXT}"
+    elif [[ -f "${HOME}/.coding-agents-kit/share/libcoding_agent_plugin.${PLUGIN_EXT}" ]]; then
+        PLUGIN_LIB="${HOME}/.coding-agents-kit/share/libcoding_agent_plugin.${PLUGIN_EXT}"
+    fi
+fi
+
+RULES_FILE="${ROOT_DIR}/rules/default/sandbox-disable.yaml"
+SEEN_FILE="${ROOT_DIR}/rules/seen.yaml"
+
+E2E_DIR="${ROOT_DIR}/build/e2e-sandbox-$$"
+mkdir -p "$E2E_DIR"
+SOCK="${E2E_DIR}/broker.sock"
+HTTP_PORT=$((21000 + ($$ % 1000)))
+PASS=0
+FAIL=0
+FALCO_PID=""
+
+# --- Preflight checks ---
+for bin in falco "$HOOK"; do
+    if [[ -z "$bin" ]] || ( [[ ! -x "$bin" ]] && ! command -v "$bin" &>/dev/null ); then
+        echo "ERROR: interceptor binary not found." >&2
+        echo "  Build it:    cd hooks/claude-code && cargo build --release" >&2
+        echo "  Or install:  bash installers/linux/install.sh" >&2
+        exit 1
+    fi
+done
+if [[ -z "$PLUGIN_LIB" ]] || [[ ! -f "$PLUGIN_LIB" ]]; then
+    echo "ERROR: plugin library not found." >&2
+    echo "  Build it: cd plugins/coding-agent-plugin && cargo build --release" >&2
+    exit 1
+fi
+if [[ ! -f "$RULES_FILE" ]]; then
+    echo "ERROR: $RULES_FILE not found." >&2
+    exit 1
+fi
+if [[ ! -f "$SEEN_FILE" ]]; then
+    echo "ERROR: $SEEN_FILE not found." >&2
+    exit 1
+fi
+
+# --- Helpers ---
+cleanup() {
+    stop_falco
+    rm -rf "$E2E_DIR"
+}
+trap cleanup EXIT
+
+stop_falco() {
+    if [[ -n "$FALCO_PID" ]]; then
+        kill "$FALCO_PID" 2>/dev/null && wait "$FALCO_PID" 2>/dev/null
+        FALCO_PID=""
+    fi
+}
+
+start_falco() {
+    local mode="${1:-enforcement}"
+    stop_falco
+    rm -f "$SOCK"
+    falco \
+        -o "engine.kind=nodriver" \
+        -o "config_files=" \
+        -o "plugins[0].name=coding_agent" \
+        -o "plugins[0].library_path=$PLUGIN_LIB" \
+        -o "plugins[0].init_config={\"socket_path\":\"$SOCK\",\"http_port\":$HTTP_PORT,\"mode\":\"$mode\"}" \
+        -o "load_plugins[0]=coding_agent" \
+        -o "rules_files[0]=$RULES_FILE" \
+        -o "rules_files[1]=$SEEN_FILE" \
+        -o "json_output=true" \
+        -o "json_include_message_property=true" \
+        -o "json_include_output_property=false" \
+        -o "json_include_output_fields_property=true" \
+        -o "json_include_tags_property=true" \
+        -o "rule_matching=all" \
+        -o "priority=debug" \
+        -o "stdout_output.enabled=true" \
+        -o "syslog_output.enabled=false" \
+        -o "http_output.enabled=true" \
+        -o "http_output.url=http://127.0.0.1:$HTTP_PORT" \
+        -o "append_output[0].match.source=coding_agent" \
+        -o "append_output[0].extra_output=| For AI Agents: inform the user that this action was flagged by a Falco security rule | correlation=%correlation.id" \
+        -o "webserver.enabled=false" \
+        --disable-source syscall \
+        > "$E2E_DIR/falco.log" 2>&1 &
+    FALCO_PID=$!
+
+    local i=0
+    while [[ ! -S "$SOCK" ]] && (( i < 40 )); do
+        sleep 0.2
+        ((i++))
+    done
+    if [[ ! -S "$SOCK" ]]; then
+        echo "ERROR: Falco did not start (socket not found)" >&2
+        cat "$E2E_DIR/falco.log" >&2
+        return 1
+    fi
+
+    local j=0
+    while ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null && (( j < 100 )); do
+        sleep 0.1
+        ((j++))
+    done
+    if ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null; then
+        echo "ERROR: Falco HTTP server did not bind on port $HTTP_PORT" >&2
+        cat "$E2E_DIR/falco.log" >&2
+        return 1
+    fi
+    sleep 0.2
+}
+
+run_hook() {
+    local input="$1"
+    echo "$input" | \
+        CODING_AGENTS_KIT_SOCKET="$SOCK" \
+        CODING_AGENTS_KIT_TIMEOUT_MS=5000 \
+        "$HOOK" 2>/dev/null || true
+}
+
+make_input() {
+    local tool_name="$1"
+    local tool_input="$2"
+    local cwd="${3:-/tmp}"
+    local id="${4:-toolu_$(date +%s%N)}"
+    echo "{\"hook_event_name\":\"PreToolUse\",\"tool_name\":\"$tool_name\",\"tool_input\":$tool_input,\"session_id\":\"sandbox-test\",\"cwd\":\"$cwd\",\"tool_use_id\":\"$id\"}"
+}
+
+pass() { echo "  PASS: $1"; ((PASS++)) || true; }
+fail() { echo "  FAIL: $1"; echo "    expected: $2"; echo "    got: $3"; ((FAIL++)) || true; }
+
+assert_decision() {
+    local output="$1" expected="$2" msg="$3"
+    if echo "$output" | grep -qF "\"permissionDecision\":\"$expected\""; then
+        pass "$msg"
+    else
+        fail "$msg" "decision=$expected" "$output"
+    fi
+}
+
+assert_reason_contains() {
+    local output="$1" needle="$2" msg="$3"
+    if echo "$output" | grep -qF "$needle"; then
+        pass "$msg"
+    else
+        fail "$msg" "reason contains '$needle'" "$output"
+    fi
+}
+
+# --- Start Falco ---
+echo "Starting Falco with sandbox rules..."
+echo "  Plugin:  $PLUGIN_LIB"
+echo "  Rules:   $RULES_FILE"
+echo "  Seen:    $SEEN_FILE"
+echo ""
+start_falco || exit 1
+echo "Falco running (PID=$FALCO_PID, socket=$SOCK, http=$HTTP_PORT)"
+echo ""
+
+# =============================================================================
+# Rule A: Deny agent writing sandbox-disable configuration
+# Condition: Write/Edit to an agent settings file AND content disables sandbox
+# =============================================================================
+echo "=== Rule A: Deny agent writing sandbox-disable configuration ==="
+
+# Claude Code — sandbox.enabled: false
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\":{\"enabled\":false}}"}' \
+    /tmp toolu_a1_claude_enabled_false)")
+assert_decision "$out" "deny" "Write ~/.claude/settings.json with sandbox.enabled:false denied"
+assert_reason_contains "$out" "Deny agent writing sandbox-disable configuration" "rule name in reason"
+
+# Claude Code — allowUnsandboxedCommands: true (enables the per-command escape globally)
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"allowUnsandboxedCommands\":true}"}' \
+    /tmp toolu_a2_claude_allowunsandboxed)")
+assert_decision "$out" "deny" "Write ~/.claude/settings.json with allowUnsandboxedCommands:true denied"
+
+# Codex — sandbox_mode = danger-full-access
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.codex/config.toml","content":"sandbox_mode = \"danger-full-access\""}' \
+    /tmp toolu_a3_codex_danger)")
+assert_decision "$out" "deny" "Write ~/.codex/config.toml with danger-full-access denied"
+
+# Gemini CLI — tools.sandbox: false
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"tools\":{\"sandbox\":false}}"}' \
+    /tmp toolu_a4_gemini_sandbox_false)")
+assert_decision "$out" "deny" "Write ~/.gemini/settings.json with tools.sandbox:false denied"
+
+# Gemini CLI — security.toolSandboxing: false
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"security\":{\"toolSandboxing\":false}}"}' \
+    /tmp toolu_a5_gemini_toolsandboxing)")
+assert_decision "$out" "deny" "Write ~/.gemini/settings.json with toolSandboxing:false denied"
+
+# Edit tool — old_string/new_string explicitly contain "sandbox" + "false"
+# A surgical edit that replaces the sandbox block:
+# "sandbox":{"enabled":true}  →  "sandbox":{"enabled":false}
+out=$(run_hook "$(make_input Edit \
+    '{"file_path":"/home/user/.claude/settings.json","old_string":"\"sandbox\":{\"enabled\":true}","new_string":"\"sandbox\":{\"enabled\":false}"}' \
+    /tmp toolu_a6_edit_sandbox_false)")
+assert_decision "$out" "deny" "Edit ~/.claude/settings.json replacing sandbox block with enabled:false denied"
+
+# Safe: Write to settings.json but sandbox stays enabled
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\":{\"enabled\":true}}"}' \
+    /tmp toolu_a7_safe_enabled_true)")
+assert_decision "$out" "allow" "Write ~/.claude/settings.json with sandbox.enabled:true allowed"
+
+# Safe: Write to an unrelated file (not a sandbox config file)
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/project/config.json","content":"{\"sandbox\":false}"}' \
+    /tmp toolu_a8_safe_unrelated_file)")
+assert_decision "$out" "allow" "Write to unrelated file with sandbox:false allowed (wrong path)"
+
+# Safe: Write to settings.json but no sandbox-related content
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"theme\":\"dark\"}"}' \
+    /tmp toolu_a9_safe_theme)")
+assert_decision "$out" "allow" "Write ~/.claude/settings.json with theme change allowed"
+
+echo ""
+
+# =============================================================================
+# Rule B: Ask before Claude Code per-command sandbox escape
+# Condition: Bash tool AND input contains "dangerouslyDisableSandbox"
+# =============================================================================
+echo "=== Rule B: Ask before Claude Code per-command sandbox escape ==="
+
+# Direct dangerouslyDisableSandbox in Bash tool input
+out=$(run_hook "$(make_input Bash \
+    '{"command":"ls /restricted","dangerouslyDisableSandbox":true}' \
+    /tmp toolu_b1_escape)")
+assert_decision "$out" "ask" "Bash with dangerouslyDisableSandbox:true requires ask"
+assert_reason_contains "$out" "Ask before Claude Code per-command sandbox escape" "rule name in reason"
+
+# Also matches if the string appears anywhere in the input JSON
+out=$(run_hook "$(make_input Bash \
+    '{"command":"docker build .","dangerouslyDisableSandbox":true}' \
+    /tmp toolu_b2_docker)")
+assert_decision "$out" "ask" "Bash docker build with dangerouslyDisableSandbox requires ask"
+
+# Safe: normal Bash command without the escape parameter
+out=$(run_hook "$(make_input Bash \
+    '{"command":"ls -la"}' \
+    /tmp toolu_b3_safe_ls)")
+assert_decision "$out" "allow" "Normal Bash ls allowed"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"npm test"}' \
+    /tmp toolu_b4_safe_npm)")
+assert_decision "$out" "allow" "Normal Bash npm test allowed"
+
+echo ""
+
+# =============================================================================
+# Summary
+# =============================================================================
+echo "================================================================="
+echo "  Results"
+echo "================================================================="
+echo ""
+echo "  PASS: $PASS"
+echo "  FAIL: $FAIL"
+echo ""
+
+echo "  Falco alerts fired during test (rule + priority + message):"
+echo "-----------------------------------------------------------------"
+# Each alert is a JSON line. Extract rule name, priority, and message.
+grep -E '^\{.*"rule"' "$E2E_DIR/falco.log" \
+    | grep -v '"rule":"Coding Agent Event Seen"' \
+    | python3 -c "
+import sys, json
+for line in sys.stdin:
+    line = line.strip()
+    if not line: continue
+    try:
+        a = json.loads(line)
+        print(f\"  [{a.get('priority','?')}] {a.get('rule','?')}\")
+        print(f\"    {a.get('message', a.get('output',''))}\")
+        print()
+    except Exception:
+        pass
+" 2>/dev/null || grep -o '"rule":"[^"]*"' "$E2E_DIR/falco.log" | sort -u || true
+echo "-----------------------------------------------------------------"
+echo ""
+LOG_COPY="${ROOT_DIR}/build/sandbox-test-last.log"
+mkdir -p "${ROOT_DIR}/build"
+cp "$E2E_DIR/falco.log" "$LOG_COPY" 2>/dev/null || true
+echo "  Full log saved: $LOG_COPY"
+echo ""
+
+if (( FAIL > 0 )); then
+    exit 1
+fi

From 8c444a2b5a755433d14e65160958c3c27ed4756b Mon Sep 17 00:00:00 2001
From: MiguelHzBz <91468655+MiguelHzBz@users.noreply.github.com>
Date: Fri, 10 Apr 2026 13:21:52 +0200
Subject: [PATCH 2/2] Harden sandbox-disable rules with 17 bypass categories
 and comprehensive bypass test

- Add 6 detection rules (A-F) closing 17 bypass surfaces:
  B1: Edit diff missing sandbox key
  B2: Numeric zero ('enabled':0)
  B3: settings.local.json path variant
  B4: Gemini 'none'/'disabled' string values
  B5: Null value ('enabled':null)
  B6: allowUnsandboxedCommands:1 numeric truthy
  B7: Bash echo/tee shell redirection
  B8: Python capital-F False
  B9: Bash null in command string
  B10: Bash enabled:0 (no 'false' keyword)
  B11: Bash allowUnsandboxedCommands:1 in command
  B12: sed -i targeting settings files
  B13: cp pre-crafted file to settings path
  B14: mv pre-crafted file to settings path
  B15: Codex underscore flag variant
  B16: GEMINI_SANDBOX=0
  B17: dangerouslyDisableSandbox:false false-positive fix

- Add test_sandbox_bypass.sh covering all 17 sections (~45 test cases)
---
 rules/default/sandbox-disable.yaml | 282 ++++++++++-----
 tests/test_sandbox_bypass.sh       | 547 +++++++++++++++++------------
 2 files changed, 507 insertions(+), 322 deletions(-)

diff --git a/rules/default/sandbox-disable.yaml b/rules/default/sandbox-disable.yaml
index 528924b..6ecbf3f 100644
--- a/rules/default/sandbox-disable.yaml
+++ b/rules/default/sandbox-disable.yaml
@@ -6,43 +6,69 @@
 #
 # Coverage:
 #   Rule A (DENY):  Agent writing sandbox-disable content to its own config file
-#                   — Claude Code (sandbox.enabled: false/0 in settings.json or
+#                   — Claude Code (sandbox.enabled: false/0/null in settings.json or
 #                     settings.local.json), Codex (danger-full-access in config.toml),
 #                     Gemini CLI (tools.sandbox: false/none/disabled in settings.json)
-#   Rule B (ASK):   Claude Code per-command escape via dangerouslyDisableSandbox
+#   Rule B (ASK):   Claude Code per-command escape via dangerouslyDisableSandbox:true
 #                   Bash tool parameter — the ONLY layer that can detect this
 #                   since it is a tool call parameter invisible to Falco syscalls.
 #   Rule C (DENY):  Bash command writing sandbox-disable content to an agent
 #                   settings file — catches echo/python/tee/cat redirection that
 #                   bypasses the Write/Edit tool check entirely.
 #   Rule D (DENY):  Codex CLI startup sandbox bypass flags via Bash
-#                   — --dangerously-bypass-approvals-and-sandbox, --sandbox danger-full-access
+#                   — --dangerously-bypass-approvals-and-sandbox (dash and underscore),
+#                     --sandbox danger-full-access
 #   Rule E (DENY):  GEMINI_SANDBOX environment variable set to a disabling value
-#                   — env var disable bypasses file writes and dangerouslyDisableSandbox
+#                   — none, false, disabled, 0
+#   Rule F (DENY):  Bash command replacing an agent settings file via cp, mv, or
+#                   sed -i — pre-crafted file placement or in-place edit with no
+#                   disable keywords in the command itself
 #
-# Bypass techniques closed by the hardened macros:
-#   - Edit value-only diff: "enabled":true → "enabled":false without "sandbox" in diff
-#     → is_sandbox_disable_value_change catches "enabled" + "false" on sandbox config files
-#   - Numeric zero: "enabled": 0 (JS falsy) instead of boolean false
-#     → is_sandbox_disable_value_zero catches "enabled" + ":0" / ": 0"
-#   - settings.local.json not in original path list
-#     → is_agent_sandbox_config now includes "/.claude/settings.local.json"
-#   - Gemini "sandbox":"none" / "disabled" string values
-#     → is_sandbox_disable_gemini_string catches "sandbox" + "none" / "disabled"
-#   - Bash write (echo >, tee, python3 open): bypasses Write/Edit rule
-#     → Rule C adds Bash + settings path + sandbox-disable content check
-#   - Codex CLI flags (--dangerously-bypass-approvals-and-sandbox, --sandbox danger-full-access)
-#     → Rule D covers Codex startup bypass flags
-#   - GEMINI_SANDBOX=none/false/disabled env var: invisible to file-write rules
-#     → Rule E covers the Gemini env var disable path
+# Bypass techniques and the macros that close them:
+#
+#   Write/Edit tool (Rule A):
+#     Edit value-only diff     "enabled":true→false without "sandbox" in diff
+#                              → is_sandbox_disable_value_change ("enabled"+"false")
+#     Numeric zero             "enabled": 0  JS falsy, not boolean false
+#                              → is_sandbox_disable_value_zero ("enabled"+":0"/": 0")
+#     Null value               "enabled": null  JS falsy
+#                              → is_sandbox_disable_value_null ("enabled"+"null")
+#     allowUnsandboxedCommands:1  numeric truthy, not string "true"
+#                              → is_sandbox_allow_unsandboxed_numeric (":1"/": 1")
+#     settings.local.json      same schema as settings.json, not in original path list
+#                              → is_agent_sandbox_config extended
+#     Gemini "none"/"disabled" string disable values, not boolean false
+#                              → is_sandbox_disable_gemini_none/disabled
+#
+#   Bash tool (Rules C-F):
+#     echo/tee/python write    Write/Edit rule never fires for Bash tool
+#                              → Rule C: path check + content keywords
+#     Python "False" (capital) python3 uses False, not false; bypasses lowercase check
+#                              → is_bash_disable_sandbox_false_pyfalse
+#     null in Bash command     echo '{"sandbox":{"enabled":null}}' > settings
+#                              → is_bash_disable_sandbox_null
+#     Bash enabled:0           echo '{"enabled":0}' > settings (no "false")
+#                              → is_bash_disable_enabled_zero
+#     allowUnsandboxedCommands:1 via Bash
+#                              → is_bash_disable_allow_unsandboxed_numeric
+#     sed -i settings          in-place edit; no sandbox/false keyword in sed cmd
+#                              → Rule F: is_bash_settings_sed_write
+#     cp/mv pre-crafted file   no disable keywords at all in cp/mv command
+#                              → Rule F: is_bash_settings_cp / is_bash_settings_mv
+#     Codex underscore flag    --dangerously_bypass_approvals_and_sandbox
+#                              → is_codex_bypass_flag_underscore
+#     GEMINI_SANDBOX=0         numeric zero not in original none/false/disabled list
+#                              → is_gemini_env_zero
+#     dangerouslyDisableSandbox:false  key-only check caused false positives
+#                              → Rule B now requires both key + "true" value
 #
 # Known gaps (not detectable via tool call inspection):
-#   - Unicode escape sequences in JSON keys: "s\u0061ndbox" parses to "sandbox" at
-#     runtime but the raw tool.input string contains \u0061, not a. Requires a
-#     pre-processing decode step not available in Falco condition evaluation.
-#   - Indirect/deferred writes via opaque shell scripts: if the agent writes a
-#     script that disables sandbox and then executes it, the write event contains
-#     no sandbox keywords. Covered partially by the tmp-staging rules in threat_rules.yaml.
+#   - Unicode escapes in JSON keys: "s\u0061ndbox" parses to "sandbox" at runtime
+#     but tool.input contains \u0061, not a. No decode step in Falco conditions.
+#   - Variable indirection in Bash: SETTINGS=$HOME/.claude/settings.json
+#     The variable name appears in the command, not the resolved path.
+#   - Two-step attack (write to /tmp then cp): the /tmp write step is caught by the
+#     tmp-staging rule in threat_rules.yaml before the cp step reaches Rule F.
 #
 # Official sandbox docs:
 #   Claude Code : https://code.claude.com/docs/sandboxing
@@ -53,10 +79,8 @@
 # Macros: file path matching
 # ---------------------------------------------------------------------------
 
-# Matches agent sandbox configuration file paths.
-# Uses endswith on tool.real_file_path (canonicalized, absolute path).
-# HARDENED: includes settings.local.json (same schema as settings.json,
-# loaded by Claude Code with equal priority — original rule missed this path).
+# Matches agent sandbox configuration file paths (canonicalized, absolute).
+# Includes settings.local.json — same schema as settings.json, equal priority.
 - macro: is_agent_sandbox_config
   condition: >
     tool.real_file_path endswith "/.claude/settings.json"
@@ -68,79 +92,77 @@
 # Macros: Write/Edit content patterns (Rule A)
 # ---------------------------------------------------------------------------
 
-# Codex: sandbox_mode = "danger-full-access" in config.toml
 - macro: is_sandbox_disable_codex
   condition: tool.input contains "danger-full-access"
 
-# Claude Code / Gemini: "sandbox": {"enabled": false} or "tools": {"sandbox": false}
-# Also catches Edit diffs where the file path contains "sandbox" in surrounding context.
 - macro: is_sandbox_disable_enabled_false
   condition: >
     tool.input contains "sandbox"
     and tool.input contains "false"
 
-# Gemini: "security": {"toolSandboxing": false}
 - macro: is_sandbox_toolsandboxing_false
   condition: >
     tool.input contains "toolSandboxing"
     and tool.input contains "false"
 
-# Claude Code: "allowUnsandboxedCommands": true (enables the per-command escape globally)
 - macro: is_sandbox_allow_unsandboxed
   condition: >
     tool.input contains "allowUnsandboxedCommands"
     and tool.input contains "true"
 
-# HARDENED: Edit value-only diff bypass.
-# When an Edit changes "enabled":true → "enabled":false, the "sandbox" key is
-# in the surrounding file context but NOT in old_string or new_string. Since
-# tool.input for Edit contains only the diff strings, is_sandbox_disable_enabled_false
-# never fires. This macro catches the value-change alone on a sandbox config file.
+# HARDENED: allowUnsandboxedCommands:1 — numeric truthy bypasses "true" check
+- macro: is_sandbox_allow_unsandboxed_numeric
+  condition: >
+    tool.input contains "allowUnsandboxedCommands"
+    and (tool.input contains ":1" or tool.input contains ": 1")
+
+# HARDENED: Edit value-only diff — "sandbox" key absent from old/new strings
 - macro: is_sandbox_disable_value_change
   condition: >
     tool.input contains "enabled"
     and tool.input contains "false"
 
-# HARDENED: Numeric zero bypass.
-# JavaScript (Node.js) evaluates `if (settings.sandbox.enabled)` as falsy for
-# the integer 0. Writing "enabled": 0 bypasses rules that only check for "false".
+# HARDENED: Numeric zero — JS evaluates enabled:0 as falsy
 - macro: is_sandbox_disable_value_zero
   condition: >
     tool.input contains "enabled"
     and (tool.input contains ":0" or tool.input contains ": 0")
 
-# HARDENED: Gemini string value bypass.
-# Gemini CLI accepts "sandbox": "none" and "sandbox": "disabled" to disable
-# Docker-per-tool-call isolation. These string values are not the boolean false
-# and were not caught by is_sandbox_disable_enabled_false.
+# HARDENED: Null value — JS evaluates enabled:null as falsy
+- macro: is_sandbox_disable_value_null
+  condition: >
+    tool.input contains "enabled"
+    and tool.input contains "null"
+
+# HARDENED: Gemini string "none"
 - macro: is_sandbox_disable_gemini_none
   condition: >
     tool.input contains "sandbox"
     and tool.input contains "none"
 
+# HARDENED: Gemini string "disabled"
 - macro: is_sandbox_disable_gemini_disabled
   condition: >
     tool.input contains "sandbox"
     and tool.input contains "disabled"
 
-# Aggregate content condition for Rule A.
 - macro: is_sandbox_disable_content
   condition: >
     is_sandbox_disable_codex
     or is_sandbox_disable_enabled_false
     or is_sandbox_toolsandboxing_false
     or is_sandbox_allow_unsandboxed
+    or is_sandbox_allow_unsandboxed_numeric
     or is_sandbox_disable_value_change
     or is_sandbox_disable_value_zero
+    or is_sandbox_disable_value_null
     or is_sandbox_disable_gemini_none
     or is_sandbox_disable_gemini_disabled
 
 # ---------------------------------------------------------------------------
-# Macros: Bash-level path and content detection (Rules C, D, E)
+# Macros: Bash path matching (Rules C, F)
 # ---------------------------------------------------------------------------
 
-# Matches Bash commands that reference an agent settings file path.
-# Covers full paths, expanduser results, $HOME expansion, and relative substrings.
 - macro: is_bash_sandbox_settings_path
   condition: >
     tool.input_command contains ".claude/settings.json"
@@ -148,8 +170,10 @@
     or tool.input_command contains ".codex/config.toml"
     or tool.input_command contains ".gemini/settings.json"
 
-# Helper macros for Bash sandbox-disable content detection.
-# Split into separate macros to avoid (A and B) inside or-chains.
+# ---------------------------------------------------------------------------
+# Macros: Bash content detection (Rule C)
+# ---------------------------------------------------------------------------
+
 - macro: is_bash_disable_codex
   condition: tool.input_command contains "danger-full-access"
 
@@ -158,6 +182,24 @@
     tool.input_command contains "sandbox"
     and tool.input_command contains "false"
 
+# HARDENED: Python uses capital False — bypasses lowercase "false" check
+- macro: is_bash_disable_sandbox_false_pyfalse
+  condition: >
+    tool.input_command contains "sandbox"
+    and tool.input_command contains "False"
+
+# HARDENED: null in Bash-written content
+- macro: is_bash_disable_sandbox_null
+  condition: >
+    tool.input_command contains "sandbox"
+    and tool.input_command contains "null"
+
+# HARDENED: numeric 0 for enabled field
+- macro: is_bash_disable_enabled_zero
+  condition: >
+    tool.input_command contains "enabled"
+    and (tool.input_command contains ":0" or tool.input_command contains ": 0")
+
 - macro: is_bash_disable_toolsandboxing_false
   condition: >
     tool.input_command contains "toolSandboxing"
@@ -168,6 +210,12 @@
     tool.input_command contains "allowUnsandboxedCommands"
     and tool.input_command contains "true"
 
+# HARDENED: allowUnsandboxedCommands:1 via Bash
+- macro: is_bash_disable_allow_unsandboxed_numeric
+  condition: >
+    tool.input_command contains "allowUnsandboxedCommands"
+    and (tool.input_command contains ":1" or tool.input_command contains ": 1")
+
 - macro: is_bash_disable_sandbox_none
   condition: >
     tool.input_command contains "sandbox"
@@ -178,20 +226,59 @@
     tool.input_command contains "sandbox"
     and tool.input_command contains "disabled"
 
-# Aggregate: any sandbox-disable content in a Bash command string.
 - macro: is_bash_sandbox_disable_cmd
   condition: >
     is_bash_disable_codex
     or is_bash_disable_sandbox_false
+    or is_bash_disable_sandbox_false_pyfalse
+    or is_bash_disable_sandbox_null
+    or is_bash_disable_enabled_zero
     or is_bash_disable_toolsandboxing_false
     or is_bash_disable_allow_unsandboxed
+    or is_bash_disable_allow_unsandboxed_numeric
     or is_bash_disable_sandbox_none
     or is_bash_disable_sandbox_disabled
 
-# Codex CLI bypass flags (Rule D).
+# ---------------------------------------------------------------------------
+# Macros: Bash file replacement (Rule F)
+# ---------------------------------------------------------------------------
+
+# sed -i on a settings file — no disable keywords need to appear in the sed command
+- macro: is_bash_settings_sed_write
+  condition: >
+    tool.input_command contains "sed"
+    and tool.input_command contains "-i"
+    and is_bash_sandbox_settings_path
+
+# cp of any file onto a settings path — pre-crafted payload placement
+- macro: is_bash_settings_cp
+  condition: >
+    tool.input_command contains "cp "
+    and is_bash_sandbox_settings_path
+
+# mv of any file onto a settings path
+- macro: is_bash_settings_mv
+  condition: >
+    tool.input_command contains "mv "
+    and is_bash_sandbox_settings_path
+
+- macro: is_bash_sandbox_settings_replace
+  condition: >
+    is_bash_settings_sed_write
+    or is_bash_settings_cp
+    or is_bash_settings_mv
+
+# ---------------------------------------------------------------------------
+# Macros: Codex CLI bypass flags (Rule D)
+# ---------------------------------------------------------------------------
+
 - macro: is_codex_bypass_flag
   condition: tool.input_command contains "dangerously-bypass-approvals-and-sandbox"
 
+# HARDENED: underscore variant — some CLI parsers normalize dashes to underscores
+- macro: is_codex_bypass_flag_underscore
+  condition: tool.input_command contains "dangerously_bypass_approvals_and_sandbox"
+
 - macro: is_codex_danger_flag
   condition: >
     tool.input_command contains "codex"
@@ -200,9 +287,13 @@
 - macro: is_codex_sandbox_bypass
   condition: >
     is_codex_bypass_flag
+    or is_codex_bypass_flag_underscore
     or is_codex_danger_flag
 
-# GEMINI_SANDBOX env var bypass helpers (Rule E).
+# ---------------------------------------------------------------------------
+# Macros: GEMINI_SANDBOX env var bypass (Rule E)
+# ---------------------------------------------------------------------------
+
 - macro: is_gemini_env_none
   condition: >
     tool.input_command contains "GEMINI_SANDBOX"
@@ -218,11 +309,18 @@
     tool.input_command contains "GEMINI_SANDBOX"
     and tool.input_command contains "disabled"
 
+# HARDENED: numeric 0
+- macro: is_gemini_env_zero
+  condition: >
+    tool.input_command contains "GEMINI_SANDBOX"
+    and (tool.input_command contains "=0" or tool.input_command contains "= 0")
+
 - macro: is_gemini_sandbox_env_bypass
   condition: >
     is_gemini_env_none
     or is_gemini_env_false
     or is_gemini_env_disabled
+    or is_gemini_env_zero
 
 # ---------------------------------------------------------------------------
 # Rules
@@ -231,15 +329,10 @@
 - rule: Deny agent writing sandbox-disable configuration
   desc: >
     Blocks a coding agent from writing or editing its own sandbox configuration
-    file with content that disables OS-level process isolation. Claude Code uses
-    "sandbox.enabled: false" in settings.json or settings.local.json to disable
-    bubblewrap (Linux) or Seatbelt (macOS); Codex uses "sandbox_mode =
-    danger-full-access" in config.toml to disable landlock and seccomp; Gemini
-    CLI uses "tools.sandbox: false/none/disabled" in settings.json to disable
-    Docker-per-tool-call isolation. This pattern indicates a prompt injection
-    attack (AML.T0051) directing the agent to remove its own security boundaries
-    — an LLM jailbreak (AML.T0054) enabling subsequent tool calls to run
-    unsandboxed on the host. Fires at PreToolUse, before the write executes.
+    file with content that disables OS-level process isolation. Hardened against
+    Edit value-only diffs, numeric/null falsy values, allowUnsandboxedCommands:1,
+    Gemini string disable variants, and settings.local.json. Fires at PreToolUse,
+    before the write executes.
   condition: >
     tool.name in ("Write", "Edit")
     and tool.real_file_path != ""
@@ -254,18 +347,14 @@
 
 - rule: Ask before Claude Code per-command sandbox escape
   desc: >
-    Requires user confirmation when Claude Code invokes the Bash tool with the
-    dangerouslyDisableSandbox parameter, bypassing bubblewrap for a single command.
-    This is Claude Code's built-in escape mechanism used when sandbox restrictions
-    block a failing command (e.g., Docker is incompatible with the sandbox by design).
-    While sometimes legitimate, it is also the primary vector for prompt injection
-    attacks that instruct Claude to run specific commands outside the sandbox.
-    This rule is the ONLY detection layer for this event — dangerouslyDisableSandbox
-    is a Bash tool call parameter invisible to Falco at the syscall level.
-    Uses coding_agent_ask (not deny) since legitimate uses exist.
+    Requires user confirmation when Claude Code invokes the Bash tool with
+    dangerouslyDisableSandbox set to true. Hardened to check both the key name
+    and the "true" value — dangerouslyDisableSandbox:false no longer triggers a
+    false positive ask. This rule is the ONLY detection layer for this event.
   condition: >
     tool.name = "Bash"
     and tool.input contains "dangerouslyDisableSandbox"
+    and tool.input contains "true"
   output: >
     Falco requires confirmation before Claude Code runs an unsandboxed Bash command (%tool.input_command)
   priority: WARNING
@@ -275,15 +364,10 @@
 
 - rule: Deny Bash command writing sandbox-disable content to agent settings file
   desc: >
-    Blocks Bash commands that combine a reference to an agent sandbox settings
-    file path with sandbox-disabling content. This closes the bypass where an
-    agent uses shell redirection (echo >, tee, python3 open()) to write the same
-    sandbox-disabling payload that the Write/Edit tool rule (Rule A) would catch.
-    Since Bash is the Swiss Army knife of file writes, this rule covers the
-    entire class: echo, tee, cat heredoc, python3 -c, node -e, sed -i, etc.
-    The path check (tool.input_command contains ".claude/settings.json") anchors
-    the rule to the sandbox config files; the content check filters for the same
-    disable patterns used in Rule A.
+    Blocks Bash commands that reference an agent settings file path combined with
+    sandbox-disabling content. Covers echo/tee/python3/node redirection. Hardened
+    against Python capital-F False, null values, numeric 0 for enabled field, and
+    numeric 1 for allowUnsandboxedCommands.
   condition: >
     tool.name = "Bash"
     and is_bash_sandbox_settings_path
@@ -298,12 +382,8 @@
 - rule: Deny Codex CLI sandbox bypass flag
   desc: >
     Blocks Bash invocations of the Codex CLI with flags that bypass sandbox
-    isolation. "--dangerously-bypass-approvals-and-sandbox" disables all
-    landlock/seccomp enforcement and approval prompts. "--sandbox danger-full-access"
-    starts Codex in danger-full-access mode, granting unrestricted filesystem and
-    network access. Neither of these involves a file write — they are startup flags
-    that configure the Codex sandbox at process initialization, invisible to Rule A.
-    Fires at PreToolUse before the Codex process starts.
+    isolation. Covers both dash and underscore variants of
+    dangerously-bypass-approvals-and-sandbox, and --sandbox danger-full-access.
   condition: >
     tool.name = "Bash"
     and is_codex_sandbox_bypass
@@ -317,13 +397,8 @@
 - rule: Deny Gemini CLI sandbox disable via environment variable
   desc: >
     Blocks Bash commands that set GEMINI_SANDBOX to a disabling value (none,
-    false, disabled) before invoking Gemini CLI. Gemini reads GEMINI_SANDBOX at
-    startup to select its sandbox backend — setting it to a non-Docker/Podman
-    value disables Docker-per-tool-call isolation without writing any settings
-    file. This vector bypasses Rule A entirely (no file write) and is not covered
-    by Rule B (no dangerouslyDisableSandbox parameter). Inline env assignment
-    (GEMINI_SANDBOX=none gemini) and exported assignment (export GEMINI_SANDBOX=none)
-    are both caught via the tool.input_command string.
+    false, disabled, 0). Covers inline assignment, exported assignment, and
+    numeric zero.
   condition: >
     tool.name = "Bash"
     and is_gemini_sandbox_env_bypass
@@ -333,3 +408,20 @@
   source: coding_agent
   tags: [coding_agent_deny, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
          mitre_t1562.001_impair_defenses_disable_or_modify_tools]
+
+- rule: Deny Bash command replacing agent sandbox settings file
+  desc: >
+    Blocks Bash commands that replace an agent sandbox settings file via cp, mv,
+    or sed -i. These operations bypass Rule C because the sandbox-disabling payload
+    is in the source file — no disable keywords appear in the cp/mv/sed command.
+    A cp from /tmp signals a staged payload (also caught by the tmp-staging rule
+    in threat_rules.yaml); sed -i indicates a content-oblivious rewrite.
+  condition: >
+    tool.name = "Bash"
+    and is_bash_sandbox_settings_replace
+  output: >
+    Falco blocked %agent.name from replacing an agent sandbox settings file (%tool.input_command)
+  priority: CRITICAL
+  source: coding_agent
+  tags: [coding_agent_deny, AML.T0054_llm_jailbreak, AML.T0051_llm_prompt_injection,
+         mitre_t1562.001_impair_defenses_disable_or_modify_tools]
diff --git a/tests/test_sandbox_bypass.sh b/tests/test_sandbox_bypass.sh
index fc6fbda..ec36bcc 100644
--- a/tests/test_sandbox_bypass.sh
+++ b/tests/test_sandbox_bypass.sh
@@ -1,24 +1,29 @@
 #!/usr/bin/env bash
 #
-# Bypass tests for the sandbox-disable rule suite (rules/user/sandbox-disable.yaml).
+# Bypass tests for sandbox-disable.yaml.
 #
-# Each test demonstrates an evasion technique that bypassed the original rules.
+# Each section demonstrates an evasion technique against the original 2-rule design.
 # All tests run against the HARDENED rules and assert that the bypass is now caught.
-# Comments on each test explain why the original rule missed it.
+# Comments explain the original gap and the fix applied.
 #
 # Bypasses covered:
-#   B1  Edit value-only diff         "enabled":true → "enabled":false — "sandbox" not in diff
-#   B2  Numeric zero                 "enabled": 0 — rule checked contains "false", not 0
-#   B3  settings.local.json          not in original is_agent_sandbox_config path list
-#   B4  Gemini "none" string         valid disable value, not "false"
-#   B5  Gemini "disabled" string     same — string value, not boolean false
-#   B6  Bash echo write              echo > settings.json bypasses Write/Edit rule
-#   B7  Bash python write            python3 -c ... open(settings.json) bypasses Write/Edit rule
-#   B8  Codex --dangerously-bypass   CLI flag, not a file write
-#   B9  Codex --sandbox flag         codex --sandbox danger-full-access CLI flag
-#   B10 GEMINI_SANDBOX=none          env var, not a file write, not dangerouslyDisableSandbox
-#   B11 GEMINI_SANDBOX=false         same
-#   B12 GEMINI_SANDBOX=disabled      same
+#   B1   Edit value-only diff         "enabled":true→false — "sandbox" not in diff strings
+#   B2   Numeric zero (Write/Edit)    "enabled":0 — JS falsy, not string "false"
+#   B3   settings.local.json          not in original is_agent_sandbox_config path list
+#   B4   Gemini "none"/"disabled"     string disable values, not boolean false
+#   B5   Null value (Write/Edit)      "enabled":null — JS falsy, no "false" anywhere
+#   B6   allowUnsandboxedCommands:1   numeric truthy, not string "true"
+#   B7   Bash echo/tee/python write   Write/Edit rule never fires for Bash tool
+#   B8   Python capital False         python3 False (capital) bypasses lowercase check
+#   B9   Bash null in command         echo '{"sandbox":{"enabled":null}}' (no "false")
+#   B10  Bash enabled:0 in command    echo '{"enabled":0}' (no "false", no "sandbox")
+#   B11  Bash allowUnsandboxedCommands:1 via Bash
+#   B12  sed -i settings              no disable keyword in sed command itself
+#   B13  cp pre-crafted settings      no keywords at all in cp command
+#   B14  mv pre-crafted settings      same for mv
+#   B15  Codex underscore flag        --dangerously_bypass_approvals_and_sandbox
+#   B16  GEMINI_SANDBOX=0             numeric zero not in none/false/disabled list
+#   B17  dangerouslyDisableSandbox:false  key-only check was a false positive (now fixed)
 #
 # Requires: Falco 0.43+, built plugin (.so/.dylib), built interceptor.
 # Run on EC2 Ubuntu 22.04 or isolated Docker. Do NOT run locally on macOS.
@@ -31,7 +36,6 @@ set -uo pipefail
 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
 ROOT_DIR="$(cd -- "$SCRIPT_DIR/.." &>/dev/null && pwd)"
 
-# --- Binary discovery ---
 case "$(uname -s)" in
     Darwin) PLUGIN_EXT="dylib" ;;
     *)      PLUGIN_EXT="so" ;;
@@ -66,47 +70,26 @@ PASS=0
 FAIL=0
 FALCO_PID=""
 
-# --- Preflight checks ---
 for bin in falco "$HOOK"; do
     if [[ -z "$bin" ]] || ( [[ ! -x "$bin" ]] && ! command -v "$bin" &>/dev/null ); then
         echo "ERROR: required binary not found (falco or interceptor)." >&2
-        echo "  Build interceptor: cd hooks/claude-code && cargo build --release" >&2
-        echo "  Install kit:       bash installers/linux/install.sh" >&2
         exit 1
     fi
 done
-if [[ -z "$PLUGIN_LIB" ]] || [[ ! -f "$PLUGIN_LIB" ]]; then
-    echo "ERROR: plugin library not found." >&2
-    echo "  Build it: cd plugins/coding-agent-plugin && cargo build --release" >&2
-    exit 1
-fi
-if [[ ! -f "$RULES_FILE" ]]; then
-    echo "ERROR: $RULES_FILE not found." >&2
-    exit 1
-fi
-if [[ ! -f "$SEEN_FILE" ]]; then
-    echo "ERROR: $SEEN_FILE not found." >&2
-    exit 1
-fi
+[[ -z "$PLUGIN_LIB" || ! -f "$PLUGIN_LIB" ]] && { echo "ERROR: plugin library not found." >&2; exit 1; }
+[[ ! -f "$RULES_FILE" ]] && { echo "ERROR: $RULES_FILE not found." >&2; exit 1; }
+[[ ! -f "$SEEN_FILE"  ]] && { echo "ERROR: $SEEN_FILE not found."  >&2; exit 1; }
 
-# --- Helpers ---
-cleanup() {
-    stop_falco
-    rm -rf "$E2E_DIR"
-}
+cleanup() { stop_falco; rm -rf "$E2E_DIR"; }
 trap cleanup EXIT
 
 stop_falco() {
-    if [[ -n "$FALCO_PID" ]]; then
-        kill "$FALCO_PID" 2>/dev/null && wait "$FALCO_PID" 2>/dev/null
-        FALCO_PID=""
-    fi
+    [[ -n "$FALCO_PID" ]] && { kill "$FALCO_PID" 2>/dev/null; wait "$FALCO_PID" 2>/dev/null; FALCO_PID=""; }
 }
 
 start_falco() {
     local mode="${1:-enforcement}"
-    stop_falco
-    rm -f "$SOCK"
+    stop_falco; rm -f "$SOCK"
     falco \
         -o "engine.kind=nodriver" \
         -o "config_files=" \
@@ -128,131 +111,96 @@ start_falco() {
         -o "http_output.enabled=true" \
         -o "http_output.url=http://127.0.0.1:$HTTP_PORT" \
         -o "append_output[0].match.source=coding_agent" \
-        -o "append_output[0].extra_output=| For AI Agents: inform the user that this action was flagged by a Falco security rule | correlation=%correlation.id" \
+        -o "append_output[0].extra_output=| correlation=%correlation.id" \
         -o "webserver.enabled=false" \
         --disable-source syscall \
         > "$E2E_DIR/falco.log" 2>&1 &
     FALCO_PID=$!
 
     local i=0
-    while [[ ! -S "$SOCK" ]] && (( i < 40 )); do
-        sleep 0.2
-        ((i++))
-    done
-    if [[ ! -S "$SOCK" ]]; then
-        echo "ERROR: Falco did not start (socket not found)" >&2
-        cat "$E2E_DIR/falco.log" >&2
-        return 1
-    fi
+    while [[ ! -S "$SOCK" ]] && (( i < 40 )); do sleep 0.2; ((i++)); done
+    [[ ! -S "$SOCK" ]] && { echo "ERROR: Falco socket not ready" >&2; cat "$E2E_DIR/falco.log" >&2; return 1; }
 
     local j=0
-    while ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null && (( j < 100 )); do
-        sleep 0.1
-        ((j++))
-    done
-    if ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null; then
-        echo "ERROR: Falco HTTP server did not bind on port $HTTP_PORT" >&2
-        cat "$E2E_DIR/falco.log" >&2
-        return 1
-    fi
+    while ! nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null && (( j < 100 )); do sleep 0.1; ((j++)); done
+    nc -z 127.0.0.1 "$HTTP_PORT" 2>/dev/null || { echo "ERROR: HTTP port not ready" >&2; return 1; }
     sleep 0.2
 }
 
 run_hook() {
-    local input="$1"
-    echo "$input" | \
-        CODING_AGENTS_KIT_SOCKET="$SOCK" \
-        CODING_AGENTS_KIT_TIMEOUT_MS=5000 \
-        "$HOOK" 2>/dev/null || true
+    echo "$1" | CODING_AGENTS_KIT_SOCKET="$SOCK" CODING_AGENTS_KIT_TIMEOUT_MS=5000 "$HOOK" 2>/dev/null || true
 }
 
 make_input() {
-    local tool_name="$1"
-    local tool_input="$2"
-    local cwd="${3:-/tmp}"
-    local id="${4:-toolu_$(date +%s%N)}"
+    local tool_name="$1" tool_input="$2" cwd="${3:-/tmp}" id="${4:-toolu_$(date +%s%N)}"
     echo "{\"hook_event_name\":\"PreToolUse\",\"tool_name\":\"$tool_name\",\"tool_input\":$tool_input,\"session_id\":\"bypass-test\",\"cwd\":\"$cwd\",\"tool_use_id\":\"$id\"}"
 }
 
 pass() { echo "  PASS: $1"; ((PASS++)) || true; }
-fail() { echo "  FAIL: $1"; echo "    expected: $2"; echo "    got: $3"; ((FAIL++)) || true; }
+fail() { echo "  FAIL: $1"; echo "    expected=$2"; echo "    got=$3"; ((FAIL++)) || true; }
 
 assert_decision() {
-    local output="$1" expected="$2" msg="$3"
-    if echo "$output" | grep -qF "\"permissionDecision\":\"$expected\""; then
-        pass "$msg"
-    else
-        fail "$msg" "decision=$expected" "$output"
-    fi
+    local out="$1" exp="$2" msg="$3"
+    echo "$out" | grep -qF "\"permissionDecision\":\"$exp\"" && pass "$msg" || fail "$msg" "$exp" "$out"
 }
 
-# --- Start Falco ---
 echo "Starting Falco with hardened sandbox rules..."
-echo "  Plugin:  $PLUGIN_LIB"
-echo "  Rules:   $RULES_FILE"
-echo "  Seen:    $SEEN_FILE"
+echo "  Plugin: $PLUGIN_LIB"
+echo "  Rules:  $RULES_FILE"
 echo ""
 start_falco || exit 1
-echo "Falco running (PID=$FALCO_PID, socket=$SOCK, http=$HTTP_PORT)"
+echo "Falco running (PID=$FALCO_PID, port=$HTTP_PORT)"
 echo ""
 
 # =============================================================================
-# B1: Edit with value-only diff
-#
-# Original bypass: Edit ~/.claude/settings.json changing only the value
-# "enabled":true → "enabled":false. The key "sandbox" does not appear in
-# old_string or new_string, so is_sandbox_disable_enabled_false (which
-# requires both "sandbox" and "false" in tool.input) never fired.
+# B1: Edit value-only diff
 #
-# Fix: is_sandbox_disable_value_change catches tool.input containing
-# "enabled" and "false" on any agent sandbox config file.
+# Original gap: Edit old:"enabled":true → new:"enabled":false has "enabled" and
+# "false" in tool.input but NOT "sandbox". is_sandbox_disable_enabled_false
+# required both "sandbox" AND "false" — so it never fired.
+# Fix: is_sandbox_disable_value_change catches "enabled"+"false" on config files.
 # =============================================================================
 echo "=== B1: Edit value-only diff (sandbox key absent from diff) ==="
 
 out=$(run_hook "$(make_input Edit \
     '{"file_path":"/home/user/.claude/settings.json","old_string":"\"enabled\":true","new_string":"\"enabled\":false"}' \
     /tmp toolu_b1a)")
-assert_decision "$out" "deny" "B1a: Edit enabled:true→false on settings.json — sandbox key absent from diff"
+assert_decision "$out" "deny" "B1a: Edit enabled:true→false — sandbox key absent from diff strings"
 
-# Same bypass on Codex config (TOML edit, value-only)
 out=$(run_hook "$(make_input Edit \
     '{"file_path":"/home/user/.codex/config.toml","old_string":"enabled = true","new_string":"enabled = false"}' \
     /tmp toolu_b1b)")
-assert_decision "$out" "deny" "B1b: Edit enabled=true→false on config.toml — sandbox key absent from diff"
+assert_decision "$out" "deny" "B1b: Edit TOML enabled=true→false — no sandbox keyword in diff"
 
 echo ""
 
 # =============================================================================
-# B2: Numeric zero instead of boolean false
+# B2: Numeric zero in Write/Edit
 #
-# Original bypass: Write {"sandbox":{"enabled": 0}} to settings.json.
-# JavaScript (Node.js) treats 0 as falsy, so Claude Code sandbox check
-# `if (settings.sandbox.enabled)` evaluates false. The rule checked
+# Original gap: {"sandbox":{"enabled":0}} — JavaScript treats 0 as falsy, so
+# Claude Code's `if (settings.sandbox.enabled)` evaluates false. Rule checked
 # contains "false" — the digit 0 is not the string "false".
-#
-# Fix: is_sandbox_disable_value_zero catches "enabled" + ":0" or ": 0".
+# Fix: is_sandbox_disable_value_zero catches "enabled"+(":0"|": 0").
 # =============================================================================
-echo "=== B2: Numeric 0 instead of boolean false ==="
+echo "=== B2: Numeric zero in Write/Edit content ==="
 
 out=$(run_hook "$(make_input Write \
     '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\":{\"enabled\":0}}"}' \
     /tmp toolu_b2a)")
-assert_decision "$out" "deny" "B2a: Write sandbox.enabled:0 to settings.json (JS falsy, not string false)"
+assert_decision "$out" "deny" "B2a: Write sandbox.enabled:0 (no space)"
 
 out=$(run_hook "$(make_input Write \
     '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\": {\"enabled\": 0}}"}' \
     /tmp toolu_b2b)")
-assert_decision "$out" "deny" "B2b: Write sandbox.enabled: 0 (space before 0) to settings.json"
+assert_decision "$out" "deny" "B2b: Write sandbox.enabled: 0 (space before zero)"
 
 echo ""
 
 # =============================================================================
-# B3: Write to settings.local.json
-#
-# Original bypass: Claude Code reads ~/.claude/settings.local.json with the
-# same priority as settings.json. The original is_agent_sandbox_config only
-# listed settings.json — settings.local.json was not covered.
+# B3: settings.local.json
 #
+# Original gap: Claude Code reads ~/.claude/settings.local.json at equal priority
+# to settings.json. Original is_agent_sandbox_config only listed settings.json.
 # Fix: is_agent_sandbox_config now includes "/.claude/settings.local.json".
 # =============================================================================
 echo "=== B3: settings.local.json not in original path list ==="
@@ -267,24 +215,21 @@ out=$(run_hook "$(make_input Write \
     /tmp toolu_b3b)")
 assert_decision "$out" "deny" "B3b: Write allowUnsandboxedCommands:true to settings.local.json"
 
-# Safe: project-level settings.local.json inside working directory still caught
 out=$(run_hook "$(make_input Write \
     '{"file_path":"/project/.claude/settings.local.json","content":"{\"sandbox\":{\"enabled\":false}}"}' \
     /tmp toolu_b3c)")
-assert_decision "$out" "deny" "B3c: Write sandbox.enabled:false to project settings.local.json"
+assert_decision "$out" "deny" "B3c: Write to project-level settings.local.json"
 
 echo ""
 
 # =============================================================================
-# B4: Gemini "none" / "disabled" string values
+# B4: Gemini string disable values
 #
-# Original bypass: Gemini CLI accepts "sandbox": "none" (and "disabled") to
-# disable Docker-per-tool-call isolation. The original rules only checked for
-# the boolean false — the string variants were not covered.
-#
-# Fix: is_sandbox_disable_gemini_string catches "sandbox" + "none" or "disabled".
+# Original gap: Gemini supports "sandbox":"none" and "sandbox":"disabled" to
+# disable Docker isolation. Rules only checked for boolean false.
+# Fix: is_sandbox_disable_gemini_none/disabled.
 # =============================================================================
-echo "=== B4: Gemini string disable values (none, disabled) ==="
+echo "=== B4: Gemini string disable values ==="
 
 out=$(run_hook "$(make_input Write \
     '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"none\"}"}' \
@@ -294,191 +239,347 @@ assert_decision "$out" "deny" "B4a: Write sandbox:none to Gemini settings.json"
 out=$(run_hook "$(make_input Write \
     '{"file_path":"/home/user/.gemini/settings.json","content":"{\"tools\":{\"sandbox\":\"none\"}}"}' \
     /tmp toolu_b4b)")
-assert_decision "$out" "deny" "B4b: Write tools.sandbox:none to Gemini settings.json"
+assert_decision "$out" "deny" "B4b: Write tools.sandbox:none (nested)"
 
 out=$(run_hook "$(make_input Write \
     '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"disabled\"}"}' \
     /tmp toolu_b4c)")
-assert_decision "$out" "deny" "B4c: Write sandbox:disabled to Gemini settings.json"
+assert_decision "$out" "deny" "B4c: Write sandbox:disabled"
+
+# Safe: enabling values must not fire
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"docker\"}"}' \
+    /tmp toolu_b4_safe)")
+assert_decision "$out" "allow" "B4-safe: Write sandbox:docker (enables) allowed"
 
 echo ""
 
 # =============================================================================
-# B5: Bypass B4 — safe Gemini sandbox values (must not fire)
-# Confirm that enabling sandbox values still allowed after the hardening.
+# B5: Null value in Write/Edit
+#
+# Original gap: {"sandbox":{"enabled":null}} — JS: if(null) is falsy.
+# No current macro checked for "null". "false" checks all missed it.
+# Fix: is_sandbox_disable_value_null catches "enabled"+"null".
 # =============================================================================
-echo "=== B4-safe: Gemini sandbox enable values not blocked ==="
+echo "=== B5: Null value (enabled:null) ==="
 
 out=$(run_hook "$(make_input Write \
-    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"docker\"}"}' \
-    /tmp toolu_b5_safe_docker)")
-assert_decision "$out" "allow" "B4-safe: Write sandbox:docker to Gemini settings.json allowed"
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"sandbox\":{\"enabled\":null}}"}' \
+    /tmp toolu_b5a)")
+assert_decision "$out" "deny" "B5a: Write sandbox.enabled:null to settings.json"
 
 out=$(run_hook "$(make_input Write \
-    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"sandbox\":\"podman\"}"}' \
-    /tmp toolu_b5_safe_podman)")
-assert_decision "$out" "allow" "B4-safe: Write sandbox:podman to Gemini settings.json allowed"
+    '{"file_path":"/home/user/.gemini/settings.json","content":"{\"tools\":{\"sandbox\":null}}"}' \
+    /tmp toolu_b5b)")
+assert_decision "$out" "deny" "B5b: Write tools.sandbox:null to Gemini settings"
 
 echo ""
 
 # =============================================================================
-# B6: Bash echo write to settings file
+# B6: allowUnsandboxedCommands:1
 #
-# Original bypass: Write/Edit rule only intercepts the dedicated Write and Edit
-# tool calls. An agent can write the same content using Bash with shell
-# redirection (echo, tee, cat heredoc). The Bash rule only checked for
-# "dangerouslyDisableSandbox" — no file-write + content check existed.
+# Original gap: {"allowUnsandboxedCommands":1} — JS truthy but not string "true".
+# is_sandbox_allow_unsandboxed checked contains "true", not numeric 1.
+# Fix: is_sandbox_allow_unsandboxed_numeric catches ":1" and ": 1".
+# =============================================================================
+echo "=== B6: allowUnsandboxedCommands:1 (numeric truthy) ==="
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"allowUnsandboxedCommands\":1}"}' \
+    /tmp toolu_b6a)")
+assert_decision "$out" "deny" "B6a: Write allowUnsandboxedCommands:1 (no space)"
+
+out=$(run_hook "$(make_input Write \
+    '{"file_path":"/home/user/.claude/settings.json","content":"{\"allowUnsandboxedCommands\": 1}"}' \
+    /tmp toolu_b6b)")
+assert_decision "$out" "deny" "B6b: Write allowUnsandboxedCommands: 1 (space before 1)"
+
+echo ""
+
+# =============================================================================
+# B7: Bash echo/tee write to settings file (boolean false, lowercase)
 #
-# Fix: Rule C detects Bash commands that reference an agent settings file path
-# AND contain sandbox-disable content.
+# Original gap: Write/Edit rules check tool.name in ("Write","Edit"). A Bash
+# command using echo or tee to write settings never reached Rule A. Rule B only
+# checked for "dangerouslyDisableSandbox". No Bash-write detection existed.
+# Fix: Rule C combines is_bash_sandbox_settings_path + is_bash_sandbox_disable_cmd.
 # =============================================================================
-echo "=== B6: Bash write via shell redirection ==="
+echo "=== B7: Bash shell redirection with disable content ==="
 
-# echo redirect
 out=$(run_hook "$(make_input Bash \
     '{"command":"echo '"'"'{\"sandbox\":{\"enabled\":false}}'"'"' > /home/user/.claude/settings.json"}' \
-    /tmp toolu_b6a)")
-assert_decision "$out" "deny" "B6a: echo sandbox:false > settings.json via Bash"
+    /tmp toolu_b7a)")
+assert_decision "$out" "deny" "B7a: echo sandbox:false > settings.json"
 
-# tee
 out=$(run_hook "$(make_input Bash \
     '{"command":"echo '"'"'{\"sandbox\":{\"enabled\":false}}'"'"' | tee /home/user/.claude/settings.json"}' \
-    /tmp toolu_b6b)")
-assert_decision "$out" "deny" "B6b: echo sandbox:false | tee settings.json via Bash"
-
-# python3 open write
-out=$(run_hook "$(make_input Bash \
-    '{"command":"python3 -c \"open('/home/user/.claude/settings.json','w').write('{\\\"sandbox\\\":{\\\"enabled\\\":false}}')\""}' \
-    /tmp toolu_b6c)")
-assert_decision "$out" "deny" "B6c: python3 open() write sandbox:false to settings.json"
+    /tmp toolu_b7b)")
+assert_decision "$out" "deny" "B7b: echo | tee settings.json"
 
-# Codex config via Bash
 out=$(run_hook "$(make_input Bash \
     '{"command":"echo '"'"'sandbox_mode = \"danger-full-access\"'"'"' >> /home/user/.codex/config.toml"}' \
-    /tmp toolu_b6d)")
-assert_decision "$out" "deny" "B6d: echo danger-full-access >> config.toml via Bash"
+    /tmp toolu_b7c)")
+assert_decision "$out" "deny" "B7c: echo danger-full-access >> config.toml"
 
-# allowUnsandboxedCommands via Bash
 out=$(run_hook "$(make_input Bash \
     '{"command":"echo '"'"'{\"allowUnsandboxedCommands\":true}'"'"' > /home/user/.claude/settings.json"}' \
-    /tmp toolu_b6e)")
-assert_decision "$out" "deny" "B6e: echo allowUnsandboxedCommands:true > settings.json via Bash"
+    /tmp toolu_b7d)")
+assert_decision "$out" "deny" "B7d: echo allowUnsandboxedCommands:true > settings.json"
+
+# Safe: read-only access to settings
+out=$(run_hook "$(make_input Bash \
+    '{"command":"cat /home/user/.claude/settings.json"}' \
+    /tmp toolu_b7_safe)")
+assert_decision "$out" "allow" "B7-safe: cat settings.json allowed (no disable content)"
 
 echo ""
 
 # =============================================================================
-# B6-safe: Bash commands that reference settings files but are read-only
+# B8: Python capital-F False
+#
+# Original gap: python3 uses Python's boolean False (capital F), not JSON's false.
+# is_bash_disable_sandbox_false checked for lowercase "false" — capital F missed.
+# Fix: is_bash_disable_sandbox_false_pyfalse catches "sandbox"+"False".
 # =============================================================================
-echo "=== B6-safe: Bash read-only settings access not blocked ==="
+echo "=== B8: Python capital-F False ==="
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"cat /home/user/.claude/settings.json"}' \
-    /tmp toolu_b6_safe_cat)")
-assert_decision "$out" "allow" "B6-safe: cat settings.json allowed (read-only)"
+    '{"command":"python3 -c \"import json,os; p=os.path.expanduser('"'"'~/.claude/settings.json'"'"'); json.dump({'"'"'sandbox'"'"':{'"'"'enabled'"'"':False}},open(p,'"'"'w'"'"'))\""}' \
+    /tmp toolu_b8a)")
+assert_decision "$out" "deny" "B8a: python3 json.dump with False (capital) to settings.json"
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"grep sandbox /home/user/.claude/settings.json"}' \
-    /tmp toolu_b6_safe_grep)")
-assert_decision "$out" "allow" "B6-safe: grep sandbox settings.json allowed (no write + no disable content)"
+    '{"command":"python3 -c \"open('"'"'/home/user/.claude/settings.json'"'"','"'"'w'"'"').write(str({'"'"'sandbox'"'"':{'"'"'enabled'"'"':False}}))\""}' \
+    /tmp toolu_b8b)")
+assert_decision "$out" "deny" "B8b: python3 str() write with Python False"
 
 echo ""
 
 # =============================================================================
-# B7: Codex CLI startup sandbox bypass flags
+# B9: Bash command with null in content
 #
-# Original bypass: Codex accepts --dangerously-bypass-approvals-and-sandbox
-# and --sandbox danger-full-access as CLI flags. Neither involves writing a
-# file (bypasses Rule A) nor contains "dangerouslyDisableSandbox" (bypasses
-# Rule B). No Bash-level rule covered Codex CLI flags.
+# Original gap: echo '{"sandbox":{"enabled":null}}' > settings.json.
+# Rule C content macros only checked for "false" and "danger-full-access".
+# Fix: is_bash_disable_sandbox_null catches "sandbox"+"null" in command string.
+# =============================================================================
+echo "=== B9: Bash null value in command ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"echo '"'"'{\"sandbox\":{\"enabled\":null}}'"'"' > /home/user/.claude/settings.json"}' \
+    /tmp toolu_b9a)")
+assert_decision "$out" "deny" "B9a: echo sandbox:null > settings.json"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"python3 -c \"import json,os; json.dump({'"'"'sandbox'"'"':None},open(os.path.expanduser('"'"'~/.gemini/settings.json'"'"'),'"'"'w'"'"'))\""}' \
+    /tmp toolu_b9b)")
+assert_decision "$out" "deny" "B9b: python3 json.dump sandbox:None (Python None → JSON null)"
+
+echo ""
+
+# =============================================================================
+# B10: Bash enabled:0 in command string
 #
-# Fix: Rule D detects Bash commands containing Codex sandbox bypass flags.
+# Original gap: echo '{"sandbox":{"enabled":0}}' > settings.json.
+# is_bash_disable_sandbox_false required "sandbox"+"false" (not "sandbox"+"0").
+# is_bash_sandbox_disable_cmd had no numeric-zero check.
+# Fix: is_bash_disable_enabled_zero catches "enabled"+(":0"|": 0") in command.
 # =============================================================================
-echo "=== B7: Codex CLI sandbox bypass flags ==="
+echo "=== B10: Bash numeric zero for enabled field ==="
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"codex --dangerously-bypass-approvals-and-sandbox run script.py"}' \
-    /tmp toolu_b7a)")
-assert_decision "$out" "deny" "B7a: codex --dangerously-bypass-approvals-and-sandbox"
+    '{"command":"echo '"'"'{\"sandbox\":{\"enabled\":0}}'"'"' > /home/user/.claude/settings.json"}' \
+    /tmp toolu_b10a)")
+assert_decision "$out" "deny" "B10a: echo sandbox.enabled:0 > settings.json"
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"codex --sandbox danger-full-access run script.py"}' \
-    /tmp toolu_b7b)")
-assert_decision "$out" "deny" "B7b: codex --sandbox danger-full-access"
+    '{"command":"printf '"'"'{\"sandbox\":{\"enabled\": 0}}'"'"' > /home/user/.claude/settings.json"}' \
+    /tmp toolu_b10b)")
+assert_decision "$out" "deny" "B10b: printf sandbox.enabled: 0 (space before zero)"
+
+echo ""
+
+# =============================================================================
+# B11: Bash allowUnsandboxedCommands:1 via Bash
+#
+# Original gap: echo '{"allowUnsandboxedCommands":1}' > settings.json.
+# is_bash_disable_allow_unsandboxed required "allowUnsandboxedCommands"+"true".
+# Fix: is_bash_disable_allow_unsandboxed_numeric catches ":1" and ": 1".
+# =============================================================================
+echo "=== B11: Bash allowUnsandboxedCommands:1 ==="
 
-# Short alias if supported
 out=$(run_hook "$(make_input Bash \
-    '{"command":"codex -s danger-full-access run script.py"}' \
-    /tmp toolu_b7c)")
-assert_decision "$out" "deny" "B7c: codex -s danger-full-access (short flag)"
+    '{"command":"echo '"'"'{\"allowUnsandboxedCommands\":1}'"'"' > /home/user/.claude/settings.json"}' \
+    /tmp toolu_b11a)")
+assert_decision "$out" "deny" "B11a: echo allowUnsandboxedCommands:1 > settings.json"
 
 echo ""
 
 # =============================================================================
-# B7-safe: Normal Codex invocations not blocked
+# B12: sed -i modifying settings file
+#
+# Original gap: sed -i 's/"enabled":true/"enabled":false/g' ~/.claude/settings.json
+# has the settings path and "false" in the command, but NOT "sandbox". Rule C
+# required sandbox-content keywords; sed commands targeting specific fields don't
+# need to mention "sandbox" at all.
+# Fix: Rule F (is_bash_settings_sed_write) catches any sed -i on a settings path.
 # =============================================================================
-echo "=== B7-safe: Normal Codex invocations not blocked ==="
+echo "=== B12: sed -i targeting settings file ==="
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"codex run script.py"}' \
-    /tmp toolu_b7_safe_run)")
-assert_decision "$out" "allow" "B7-safe: codex run script.py allowed"
+    '{"command":"sed -i '"'"'s/\"enabled\":true/\"enabled\":false/g'"'"' /home/user/.claude/settings.json"}' \
+    /tmp toolu_b12a)")
+assert_decision "$out" "deny" "B12a: sed -i replace enabled:true→false (no sandbox keyword in cmd)"
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"codex --sandbox workspace-write run script.py"}' \
-    /tmp toolu_b7_safe_workspace)")
-assert_decision "$out" "allow" "B7-safe: codex --sandbox workspace-write (safe mode) allowed"
+    '{"command":"sed -i '"'"'/enabled/s/true/false/'"'"' /home/user/.claude/settings.json"}' \
+    /tmp toolu_b12b)")
+assert_decision "$out" "deny" "B12b: sed -i field-scoped replacement on settings.json"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"sed -i \"s/workspace-write/danger-full-access/\" /home/user/.codex/config.toml"}' \
+    /tmp toolu_b12c)")
+assert_decision "$out" "deny" "B12c: sed -i on config.toml (danger-full-access also caught by Rule C)"
+
+# Safe: sed on unrelated file
+out=$(run_hook "$(make_input Bash \
+    '{"command":"sed -i '"'"'s/foo/bar/'"'"' /home/user/project/config.yaml"}' \
+    /tmp toolu_b12_safe)")
+assert_decision "$out" "allow" "B12-safe: sed -i on non-settings file allowed"
 
 echo ""
 
 # =============================================================================
-# B8: GEMINI_SANDBOX env var set to a disabling value
+# B13/B14: cp/mv of pre-crafted file onto settings path
 #
-# Original bypass: Gemini CLI reads the GEMINI_SANDBOX environment variable
-# to determine sandbox mode. Setting GEMINI_SANDBOX=none (or =false, =disabled)
-# in a Bash call disables Docker isolation without writing any file. This was
-# invisible to all existing rules — not a file write, not dangerouslyDisableSandbox.
+# Original gap: cp /tmp/evil.json ~/.claude/settings.json — no disable keywords
+# in the cp command whatsoever. Rule C requires keyword content; Rule A requires
+# the Write/Edit tool. cp/mv bypassed both entirely.
+# Fix: Rule F (is_bash_settings_cp / is_bash_settings_mv) catches any cp or mv
+# where the destination is a settings path.
+# =============================================================================
+echo "=== B13/B14: cp/mv pre-crafted file to settings path ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"cp /tmp/evil-settings.json /home/user/.claude/settings.json"}' \
+    /tmp toolu_b13a)")
+assert_decision "$out" "deny" "B13a: cp /tmp/evil.json → settings.json (no keywords in cmd)"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"cp /home/user/backup/settings.json /home/user/.claude/settings.json"}' \
+    /tmp toolu_b13b)")
+assert_decision "$out" "deny" "B13b: cp from backup → settings.json"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"mv /tmp/crafted-config.toml /home/user/.codex/config.toml"}' \
+    /tmp toolu_b14a)")
+assert_decision "$out" "deny" "B14a: mv /tmp/crafted → config.toml"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"mv /tmp/evil-gemini.json /home/user/.gemini/settings.json"}' \
+    /tmp toolu_b14b)")
+assert_decision "$out" "deny" "B14b: mv /tmp/evil → .gemini/settings.json"
+
+# Safe: cp/mv not targeting a settings path
+out=$(run_hook "$(make_input Bash \
+    '{"command":"cp /tmp/a.json /tmp/b.json"}' \
+    /tmp toolu_b13_safe)")
+assert_decision "$out" "allow" "B13-safe: cp between /tmp files (no settings path) allowed"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"mv /home/user/project/old.toml /home/user/project/new.toml"}' \
+    /tmp toolu_b14_safe)")
+assert_decision "$out" "allow" "B14-safe: mv within project (no settings path) allowed"
+
+echo ""
+
+# =============================================================================
+# B15: Codex underscore flag variant
 #
-# Fix: Rule E detects Bash commands that set GEMINI_SANDBOX to a disabling value.
+# Original gap: --dangerously_bypass_approvals_and_sandbox (underscores instead
+# of dashes). Some CLI parsers normalize dashes to underscores. Original
+# is_codex_bypass_flag only checked the dash form.
+# Fix: is_codex_bypass_flag_underscore catches the underscore variant.
 # =============================================================================
-echo "=== B8: GEMINI_SANDBOX env var set to disabling value ==="
+echo "=== B15: Codex underscore bypass flag ==="
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"GEMINI_SANDBOX=none gemini run my-agent.py"}' \
-    /tmp toolu_b8a)")
-assert_decision "$out" "deny" "B8a: GEMINI_SANDBOX=none disables Gemini Docker sandbox"
+    '{"command":"codex --dangerously_bypass_approvals_and_sandbox run script.py"}' \
+    /tmp toolu_b15a)")
+assert_decision "$out" "deny" "B15a: codex --dangerously_bypass_approvals_and_sandbox (underscores)"
 
+# Confirm original dash variant still caught
 out=$(run_hook "$(make_input Bash \
-    '{"command":"GEMINI_SANDBOX=false gemini run my-agent.py"}' \
-    /tmp toolu_b8b)")
-assert_decision "$out" "deny" "B8b: GEMINI_SANDBOX=false disables Gemini Docker sandbox"
+    '{"command":"codex --dangerously-bypass-approvals-and-sandbox run script.py"}' \
+    /tmp toolu_b15b)")
+assert_decision "$out" "deny" "B15b: codex --dangerously-bypass-approvals-and-sandbox (dashes, still caught)"
 
+# Safe: normal Codex invocations
 out=$(run_hook "$(make_input Bash \
-    '{"command":"GEMINI_SANDBOX=disabled gemini run my-agent.py"}' \
-    /tmp toolu_b8c)")
-assert_decision "$out" "deny" "B8c: GEMINI_SANDBOX=disabled disables Gemini Docker sandbox"
+    '{"command":"codex run script.py"}' \
+    /tmp toolu_b15_safe)")
+assert_decision "$out" "allow" "B15-safe: codex run (no bypass flag) allowed"
 
-# Exported env var
 out=$(run_hook "$(make_input Bash \
-    '{"command":"export GEMINI_SANDBOX=none && gemini run my-agent.py"}' \
-    /tmp toolu_b8d)")
-assert_decision "$out" "deny" "B8d: export GEMINI_SANDBOX=none"
+    '{"command":"codex --sandbox workspace-write run script.py"}' \
+    /tmp toolu_b15_safe2)")
+assert_decision "$out" "allow" "B15-safe: codex --sandbox workspace-write allowed"
 
 echo ""
 
 # =============================================================================
-# B8-safe: GEMINI_SANDBOX set to an enabling value not blocked
+# B16: GEMINI_SANDBOX=0
+#
+# Original gap: GEMINI_SANDBOX=0 gemini run — numeric zero disables Docker
+# isolation. Original is_gemini_sandbox_env_bypass only covered "none", "false",
+# "disabled".
+# Fix: is_gemini_env_zero catches "GEMINI_SANDBOX"+"=0" and "= 0".
 # =============================================================================
-echo "=== B8-safe: GEMINI_SANDBOX enabling values not blocked ==="
+echo "=== B16: GEMINI_SANDBOX=0 ==="
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=0 gemini run my-agent.py"}' \
+    /tmp toolu_b16a)")
+assert_decision "$out" "deny" "B16a: GEMINI_SANDBOX=0 inline"
+
+out=$(run_hook "$(make_input Bash \
+    '{"command":"export GEMINI_SANDBOX=0 && gemini run my-agent.py"}' \
+    /tmp toolu_b16b)")
+assert_decision "$out" "deny" "B16b: export GEMINI_SANDBOX=0"
+
+# Confirm original string variants still caught
+out=$(run_hook "$(make_input Bash \
+    '{"command":"GEMINI_SANDBOX=none gemini run my-agent.py"}' \
+    /tmp toolu_b16c)")
+assert_decision "$out" "deny" "B16c: GEMINI_SANDBOX=none (original variant, still caught)"
 
+# Safe: enabling values
 out=$(run_hook "$(make_input Bash \
     '{"command":"GEMINI_SANDBOX=docker gemini run my-agent.py"}' \
-    /tmp toolu_b8_safe_docker)")
-assert_decision "$out" "allow" "B8-safe: GEMINI_SANDBOX=docker (enables sandbox) allowed"
+    /tmp toolu_b16_safe)")
+assert_decision "$out" "allow" "B16-safe: GEMINI_SANDBOX=docker (enables) allowed"
+
+echo ""
+
+# =============================================================================
+# B17: dangerouslyDisableSandbox:false — Rule B false positive (now fixed)
+#
+# Original gap (false positive): Rule B checked only contains "dangerouslyDisableSandbox"
+# without checking the value. A Bash call with dangerouslyDisableSandbox:false
+# would incorrectly trigger an ask even though the sandbox was NOT being disabled.
+# Fix: Rule B now requires both the key AND tool.input contains "true".
+# =============================================================================
+echo "=== B17: dangerouslyDisableSandbox:false no longer a false positive ==="
 
 out=$(run_hook "$(make_input Bash \
-    '{"command":"GEMINI_SANDBOX=podman gemini run my-agent.py"}' \
-    /tmp toolu_b8_safe_podman)")
-assert_decision "$out" "allow" "B8-safe: GEMINI_SANDBOX=podman (enables sandbox) allowed"
+    '{"command":"ls /tmp","dangerouslyDisableSandbox":false}' \
+    /tmp toolu_b17a)")
+assert_decision "$out" "allow" "B17a: dangerouslyDisableSandbox:false no longer triggers ask"
+
+# Confirm :true still triggers ask
+out=$(run_hook "$(make_input Bash \
+    '{"command":"ls /restricted","dangerouslyDisableSandbox":true}' \
+    /tmp toolu_b17b)")
+assert_decision "$out" "ask" "B17b: dangerouslyDisableSandbox:true still triggers ask"
 
 echo ""
 
@@ -486,13 +587,9 @@ echo ""
 # Summary
 # =============================================================================
 echo "================================================================="
-echo "  Results"
+echo "  Results: $PASS passed, $FAIL failed"
 echo "================================================================="
 echo ""
-echo "  PASS: $PASS"
-echo "  FAIL: $FAIL"
-echo ""
-
 echo "  Falco alerts fired during test:"
 echo "-----------------------------------------------------------------"
 grep -E '^\{.*"rule"' "$E2E_DIR/falco.log" \
@@ -505,19 +602,15 @@ for line in sys.stdin:
     try:
         a = json.loads(line)
         print(f\"  [{a.get('priority','?')}] {a.get('rule','?')}\")
-        print(f\"    {a.get('message', a.get('output',''))}\")
+        print(f\"    {a.get('message', '')}\")
         print()
     except Exception:
         pass
-" 2>/dev/null || grep -o '"rule":"[^"]*"' "$E2E_DIR/falco.log" | sort -u || true
+" 2>/dev/null || true
 echo "-----------------------------------------------------------------"
-echo ""
 LOG_COPY="${ROOT_DIR}/build/sandbox-bypass-test-last.log"
 mkdir -p "${ROOT_DIR}/build"
 cp "$E2E_DIR/falco.log" "$LOG_COPY" 2>/dev/null || true
-echo "  Full log saved: $LOG_COPY"
+echo "  Full log: $LOG_COPY"
 echo ""
-
-if (( FAIL > 0 )); then
-    exit 1
-fi
+(( FAIL > 0 )) && exit 1 || true