From 70ae7df6a3af987c7949a6436952246da287da25 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:29:06 -0700 Subject: [PATCH 01/20] feat(sandbox): integrate OCSF structured logging for all sandbox events WIP: Replace ad-hoc tracing calls with OCSF event builders across all sandbox subsystems (network, SSH, process, filesystem, config, lifecycle). - Register ocsf_logging_enabled setting (defaults false) - Replace stdout/file fmt layers with OcsfShorthandLayer - Add conditional OcsfJsonlLayer for /var/log/openshell-ocsf.log - Update LogPushLayer to extract OCSF shorthand for gRPC push - Migrate ~106 log sites to OCSF builders (NetworkActivity, HttpActivity, SshActivity, ProcessActivity, DetectionFinding, ConfigStateChange, AppLifecycle) - Add openshell-ocsf to all Docker build contexts --- Cargo.lock | 1 + crates/openshell-core/src/settings.rs | 9 +- crates/openshell-ocsf/src/lib.rs | 4 +- .../src/tracing_layers/jsonl_layer.rs | 27 + .../openshell-ocsf/src/tracing_layers/mod.rs | 2 +- crates/openshell-sandbox/Cargo.toml | 1 + .../openshell-sandbox/src/bypass_monitor.rs | 99 ++- crates/openshell-sandbox/src/l7/mod.rs | 26 +- crates/openshell-sandbox/src/l7/relay.rs | 101 ++- crates/openshell-sandbox/src/lib.rs | 595 ++++++++++++--- crates/openshell-sandbox/src/log_push.rs | 21 +- crates/openshell-sandbox/src/main.rs | 56 +- .../src/mechanistic_mapper.rs | 18 +- crates/openshell-sandbox/src/opa.rs | 20 +- crates/openshell-sandbox/src/process.rs | 11 +- crates/openshell-sandbox/src/proxy.rs | 676 +++++++++++++----- .../src/sandbox/linux/landlock.rs | 54 +- .../src/sandbox/linux/netns.rs | 107 ++- crates/openshell-sandbox/src/sandbox/mod.rs | 14 +- crates/openshell-sandbox/src/ssh.rs | 147 +++- deploy/docker/Dockerfile.images | 4 + deploy/docker/Dockerfile.python-wheels | 4 +- deploy/docker/Dockerfile.python-wheels-macos | 4 +- 23 files changed, 1553 insertions(+), 448 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 852d97a0..ac8d1d83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2958,6 +2958,7 @@ dependencies = [ "miette", "nix", "openshell-core", + "openshell-ocsf", "openshell-policy", "openshell-router", "rand_core 0.6.4", diff --git a/crates/openshell-core/src/settings.rs b/crates/openshell-core/src/settings.rs index b94c08fc..e5d58d27 100644 --- a/crates/openshell-core/src/settings.rs +++ b/crates/openshell-core/src/settings.rs @@ -49,8 +49,13 @@ pub struct RegisteredSetting { /// keys are accepted. /// 5. Add a unit test in this module's `tests` section to cover the new key. pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ - // Production settings go here. Add entries following the steps above. - // + // When true the sandbox writes OCSF v1.7.0 JSONL records to + // `/var/log/openshell-ocsf.log` in addition to the human-readable + // shorthand log. Defaults to false (JSONL file is not created). + RegisteredSetting { + key: "ocsf_logging_enabled", + kind: SettingValueKind::Bool, + }, // Test-only keys live behind the `dev-settings` feature flag so they // don't appear in production builds. #[cfg(feature = "dev-settings")] diff --git a/crates/openshell-ocsf/src/lib.rs b/crates/openshell-ocsf/src/lib.rs index a70a344b..b9000afc 100644 --- a/crates/openshell-ocsf/src/lib.rs +++ b/crates/openshell-ocsf/src/lib.rs @@ -62,4 +62,6 @@ pub use builders::{ }; // --- Tracing layers --- -pub use tracing_layers::{OcsfJsonlLayer, OcsfShorthandLayer, emit_ocsf_event}; +pub use tracing_layers::{ + OCSF_TARGET, OcsfJsonlLayer, OcsfShorthandLayer, clone_current_event, emit_ocsf_event, +}; diff --git a/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs b/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs index 4466c0ab..e8592b7d 100644 --- a/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs @@ -4,7 +4,9 @@ //! Tracing layer that writes OCSF JSONL to a writer. use std::io::Write; +use std::sync::Arc; use std::sync::Mutex; +use std::sync::atomic::{AtomicBool, Ordering}; use tracing::Subscriber; use tracing_subscriber::Layer; @@ -15,8 +17,15 @@ use crate::tracing_layers::event_bridge::{OCSF_TARGET, clone_current_event}; /// A tracing `Layer` that intercepts OCSF events and writes JSONL output. /// /// Only events with `target: "ocsf"` are processed; non-OCSF events are ignored. +/// +/// An optional enabled flag (`Arc`) can be set via +/// [`with_enabled_flag`](Self::with_enabled_flag). When the flag is present and +/// `false`, the layer short-circuits without writing. This allows the sandbox +/// to hot-toggle OCSF JSONL output at runtime via the `ocsf_logging_enabled` +/// setting without rebuilding the subscriber. pub struct OcsfJsonlLayer { writer: Mutex, + enabled: Option>, } impl OcsfJsonlLayer { @@ -25,8 +34,19 @@ impl OcsfJsonlLayer { pub fn new(writer: W) -> Self { Self { writer: Mutex::new(writer), + enabled: None, } } + + /// Attach a shared boolean flag that controls whether the layer writes. + /// + /// When the flag is `false`, the layer receives events but discards them. + /// When the flag is absent (the default), the layer always writes. + #[must_use] + pub fn with_enabled_flag(mut self, flag: Arc) -> Self { + self.enabled = Some(flag); + self + } } impl Layer for OcsfJsonlLayer @@ -39,6 +59,13 @@ where return; } + // If an enabled flag is set and it reads `false`, skip writing. + if let Some(ref flag) = self.enabled { + if !flag.load(Ordering::Relaxed) { + return; + } + } + if let Some(ocsf_event) = clone_current_event() && let Ok(line) = ocsf_event.to_json_line() && let Ok(mut w) = self.writer.lock() diff --git a/crates/openshell-ocsf/src/tracing_layers/mod.rs b/crates/openshell-ocsf/src/tracing_layers/mod.rs index a8213a29..c8e5d9f2 100644 --- a/crates/openshell-ocsf/src/tracing_layers/mod.rs +++ b/crates/openshell-ocsf/src/tracing_layers/mod.rs @@ -11,6 +11,6 @@ pub(crate) mod event_bridge; mod jsonl_layer; mod shorthand_layer; -pub use event_bridge::emit_ocsf_event; +pub use event_bridge::{OCSF_TARGET, clone_current_event, emit_ocsf_event}; pub use jsonl_layer::OcsfJsonlLayer; pub use shorthand_layer::OcsfShorthandLayer; diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index e8e7e2c9..541784ee 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -16,6 +16,7 @@ path = "src/main.rs" [dependencies] openshell-core = { path = "../openshell-core" } +openshell-ocsf = { path = "../openshell-ocsf" } openshell-policy = { path = "../openshell-policy" } openshell-router = { path = "../openshell-router" } diff --git a/crates/openshell-sandbox/src/bypass_monitor.rs b/crates/openshell-sandbox/src/bypass_monitor.rs index f99d7493..d0e49c42 100644 --- a/crates/openshell-sandbox/src/bypass_monitor.rs +++ b/crates/openshell-sandbox/src/bypass_monitor.rs @@ -17,10 +17,14 @@ //! still provide fast-fail UX — the monitor only adds diagnostic visibility. use crate::denial_aggregator::DenialEvent; +use openshell_ocsf::{ + ActionId, ActivityId, ConfidenceId, DetectionFindingBuilder, DispositionId, Endpoint, + FindingInfo, NetworkActivityBuilder, Process, SeverityId, ocsf_emit, +}; use std::sync::Arc; use std::sync::atomic::{AtomicU32, Ordering}; use tokio::sync::mpsc; -use tracing::{debug, warn}; +use tracing::debug; /// A parsed iptables LOG entry from `/dev/kmsg`. #[derive(Debug, Clone, PartialEq, Eq)] @@ -126,10 +130,15 @@ pub fn spawn( .status(); if !dmesg_check.is_ok_and(|s| s.success()) { - warn!( - "dmesg not available; bypass detection monitor will not run. \ - Bypass REJECT rules still provide fast-fail behavior." - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .severity(SeverityId::Low) + .message( + "dmesg not available; bypass detection monitor will not run. \ + Bypass REJECT rules still provide fast-fail behavior.", + ) + .build(); + ocsf_emit!(event); return None; } @@ -149,7 +158,14 @@ pub fn spawn( { Ok(c) => c, Err(e) => { - warn!(error = %e, "Failed to start dmesg --follow; bypass monitor will not run"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .severity(SeverityId::Low) + .message(format!( + "Failed to start dmesg --follow; bypass monitor will not run: {e}" + )) + .build(); + ocsf_emit!(event); return; } }; @@ -157,7 +173,12 @@ pub fn spawn( let stdout = match child.stdout.take() { Some(s) => s, None => { - warn!("dmesg --follow produced no stdout; bypass monitor will not run"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .severity(SeverityId::Low) + .message("dmesg --follow produced no stdout; bypass monitor will not run") + .build(); + ocsf_emit!(event); return; } }; @@ -186,19 +207,59 @@ pub fn spawn( }; let hint = hint_for_event(&event); + let reason = "direct connection bypassed HTTP CONNECT proxy"; - warn!( - dst_addr = %event.dst_addr, - dst_port = event.dst_port, - proto = %event.proto, - binary = %binary, - binary_pid = %binary_pid, - ancestors = %ancestors, - action = "reject", - reason = "direct connection bypassed HTTP CONNECT proxy", - hint = hint, - "BYPASS_DETECT", - ); + // Dual-emit: Network Activity [4001] + Detection Finding [2004] + { + let dst_ep = if let Ok(ip) = event.dst_addr.parse::() { + Endpoint::from_ip(ip, event.dst_port) + } else { + Endpoint::from_domain(&event.dst_addr, event.dst_port) + }; + + let net_event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Refuse) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .dst_endpoint(dst_ep.clone()) + .actor_process(Process::from_bypass(&binary, &binary_pid, &ancestors)) + .firewall_rule("bypass-detect", "iptables") + .observation_point(3) + .message(format!( + "BYPASS_DETECT {}:{} proto={} binary={binary} action=reject reason={reason}", + event.dst_addr, event.dst_port, event.proto, + )) + .build(); + ocsf_emit!(net_event); + + let finding_event = DetectionFindingBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .is_alert(true) + .confidence(ConfidenceId::High) + .finding_info( + FindingInfo::new("bypass-detect", "Proxy Bypass Detected") + .with_desc(reason), + ) + .remediation(hint) + .evidence_pairs(&[ + ("dst_addr", &event.dst_addr), + ("dst_port", &event.dst_port.to_string()), + ("proto", &event.proto), + ("binary", &binary), + ("binary_pid", &binary_pid), + ("ancestors", &ancestors), + ]) + .message(format!( + "BYPASS_DETECT {}:{} proto={} binary={binary} hint={hint}", + event.dst_addr, event.dst_port, event.proto, + )) + .build(); + ocsf_emit!(finding_event); + } // Send to denial aggregator if available. if let Some(ref tx) = denial_tx { diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 880b6fd9..ca76dc47 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -91,17 +91,27 @@ pub fn parse_l7_config(val: ®orus::Value) -> Option { let tls = match get_object_str(val, "tls").as_deref() { Some("skip") => TlsMode::Skip, Some("terminate") => { - tracing::warn!( - "'tls: terminate' is deprecated; TLS termination is now automatic. \ - Use 'tls: skip' to explicitly disable. This field will be removed in a future version." - ); + let event = openshell_ocsf::NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Other) + .severity(openshell_ocsf::SeverityId::Medium) + .message( + "'tls: terminate' is deprecated; TLS termination is now automatic. \ + Use 'tls: skip' to explicitly disable. This field will be removed in a future version.", + ) + .build(); + openshell_ocsf::ocsf_emit!(event); TlsMode::Auto } Some("passthrough") => { - tracing::warn!( - "'tls: passthrough' is deprecated; TLS termination is now automatic. \ - Use 'tls: skip' to explicitly disable. This field will be removed in a future version." - ); + let event = openshell_ocsf::NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Other) + .severity(openshell_ocsf::SeverityId::Medium) + .message( + "'tls: passthrough' is deprecated; TLS termination is now automatic. \ + Use 'tls: skip' to explicitly disable. This field will be removed in a future version.", + ) + .build(); + openshell_ocsf::ocsf_emit!(event); TlsMode::Auto } _ => TlsMode::Auto, diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index b2fb34b6..280c1dae 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -11,6 +11,10 @@ use crate::l7::provider::{L7Provider, RelayOutcome}; use crate::l7::{EnforcementMode, L7EndpointConfig, L7Protocol, L7RequestInfo}; use crate::secrets::{self, SecretResolver}; use miette::{IntoDiagnostic, Result, miette}; +use openshell_ocsf::{ + ActionId, ActivityId, DispositionId, Endpoint, HttpActivityBuilder, HttpRequest, + NetworkActivityBuilder, SeverityId, Url as OcsfUrl, ocsf_emit, +}; use std::sync::{Arc, Mutex}; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tracing::{debug, info, warn}; @@ -55,11 +59,15 @@ where L7Protocol::Rest => relay_rest(config, &engine, client, upstream, ctx).await, L7Protocol::Sql => { // SQL provider is Phase 3 — fall through to passthrough with warning - warn!( - host = %ctx.host, - port = ctx.port, - "SQL L7 provider not yet implemented, falling back to passthrough" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .severity(SeverityId::Low) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message("SQL L7 provider not yet implemented, falling back to passthrough") + .build(); + ocsf_emit!(event); + } tokio::io::copy_bidirectional(client, upstream) .await .into_diagnostic()?; @@ -128,12 +136,13 @@ where "L7 connection closed" ); } else { - warn!( - host = %ctx.host, - port = ctx.port, - error = %e, - "HTTP parse error in L7 relay" - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message(format!("HTTP parse error in L7 relay: {e}")) + .build(); + ocsf_emit!(event); } return Ok(()); // Close connection on parse error } @@ -191,19 +200,33 @@ where (false, EnforcementMode::Enforce, _) => "deny", }; - // Log every L7 decision (using redacted target — never log real secrets) - info!( - dst_host = %ctx.host, - dst_port = ctx.port, - policy = %ctx.policy_name, - l7_protocol = "rest", - l7_action = %request_info.action, - l7_target = %redacted_target, - l7_query_params = ?request_info.query_params, - l7_decision = decision_str, - l7_deny_reason = %reason, - "L7_REQUEST", - ); + // Log every L7 decision as an OCSF HTTP Activity event. + // Uses redacted_target (path only, no query params) to avoid logging secrets. + { + let (action_id, disposition_id) = match decision_str { + "allow" => (ActionId::Allowed, DispositionId::Allowed), + "deny" => (ActionId::Denied, DispositionId::Blocked), + "audit" => (ActionId::Allowed, DispositionId::Allowed), + _ => (ActionId::Other, DispositionId::Other), + }; + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(action_id) + .disposition(disposition_id) + .severity(SeverityId::Informational) + .http_request(HttpRequest::new( + &request_info.action, + OcsfUrl::new("http", &ctx.host, &redacted_target, ctx.port), + )) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .firewall_rule(&ctx.policy_name, "l7") + .message(format!( + "L7_REQUEST {decision_str} {} {}:{}{} reason={}", + request_info.action, ctx.host, ctx.port, redacted_target, reason, + )) + .build(); + ocsf_emit!(event); + } // Store the resolved target for the deny response redaction let _ = &eval_target; @@ -373,17 +396,27 @@ where req.target.clone() }; - // Log for observability (using redacted target — never log real secrets). + // Log for observability via OCSF HTTP Activity event. + // Uses redacted_target (path only, no query params) to avoid logging secrets. let has_creds = resolver.is_some(); - info!( - host = %ctx.host, - port = ctx.port, - method = %req.action, - path = %redacted_target, - credentials_injected = has_creds, - request_num = request_count, - "HTTP_REQUEST", - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .http_request(HttpRequest::new( + &req.action, + OcsfUrl::new("http", &ctx.host, &redacted_target, ctx.port), + )) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message(format!( + "HTTP_REQUEST {} {}:{}{} credentials_injected={has_creds} request_num={request_count}", + req.action, ctx.host, ctx.port, redacted_target, + )) + .build(); + ocsf_emit!(event); + } // Forward request with credential rewriting and relay the response. // relay_http_request_with_resolver handles both directions: it sends diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 2384a217..8a75841b 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -27,12 +27,44 @@ use miette::{IntoDiagnostic, Result}; use std::collections::HashSet; use std::net::SocketAddr; use std::sync::Arc; +use std::sync::OnceLock; use std::sync::atomic::{AtomicU32, Ordering}; #[cfg(target_os = "linux")] use std::sync::{LazyLock, Mutex}; use std::time::Duration; use tokio::time::timeout; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, info, trace, warn}; + +use openshell_ocsf::{ + ActionId, ActivityId, AppLifecycleBuilder, ConfigStateChangeBuilder, DetectionFindingBuilder, + DispositionId, FindingInfo, LaunchTypeId, Process as OcsfProcess, ProcessActivityBuilder, + SandboxContext, SeverityId, StateId, StatusId, ocsf_emit, +}; + +/// Process-wide OCSF sandbox context. Initialized once during `run_sandbox()` +/// startup and accessible from any module in the crate via [`ocsf_ctx()`]. +static OCSF_CTX: OnceLock = OnceLock::new(); + +/// Fallback context used when `OCSF_CTX` has not been initialized (e.g. in +/// unit tests that exercise individual functions without calling `run_sandbox`). +static OCSF_CTX_FALLBACK: std::sync::LazyLock = + std::sync::LazyLock::new(|| SandboxContext { + sandbox_id: String::new(), + sandbox_name: String::new(), + container_image: String::new(), + hostname: "test".to_string(), + product_version: openshell_core::VERSION.to_string(), + proxy_ip: std::net::IpAddr::from([127, 0, 0, 1]), + proxy_port: 3128, + }); + +/// Return a reference to the process-wide [`SandboxContext`]. +/// +/// Falls back to a default context if `run_sandbox()` has not yet been called +/// (e.g. during unit tests). +pub(crate) fn ocsf_ctx() -> &'static SandboxContext { + OCSF_CTX.get().unwrap_or(&OCSF_CTX_FALLBACK) +} use crate::identity::BinaryIdentityCache; use crate::l7::tls::{ @@ -162,11 +194,32 @@ pub async fn run_sandbox( _health_check: bool, _health_port: u16, inference_routes: Option, + ocsf_enabled: Arc, ) -> Result { let (program, args) = command .split_first() .ok_or_else(|| miette::miette!("No command specified"))?; + // Initialize the process-wide OCSF context early so that events emitted + // during policy loading (filesystem config, validation) have a context. + // Proxy IP/port use defaults here; they are only significant for network + // events which happen after the netns is created. + { + let hostname = std::fs::read_to_string("/etc/hostname") + .map(|s| s.trim().to_string()) + .unwrap_or_else(|_| "openshell-sandbox".to_string()); + + let _ = OCSF_CTX.set(SandboxContext { + sandbox_id: sandbox_id.clone().unwrap_or_default(), + sandbox_name: sandbox.as_deref().unwrap_or_default().to_string(), + container_image: std::env::var("OPENSHELL_CONTAINER_IMAGE").unwrap_or_default(), + hostname, + product_version: openshell_core::VERSION.to_string(), + proxy_ip: std::net::IpAddr::from([127, 0, 0, 1]), + proxy_port: 3128, + }); + } + // Load policy and initialize OPA engine let openshell_endpoint_for_proxy = openshell_endpoint.clone(); let sandbox_name_for_agg = sandbox.clone(); @@ -190,11 +243,30 @@ pub async fn run_sandbox( let provider_env = if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) { match grpc_client::fetch_provider_environment(endpoint, id).await { Ok(env) => { - info!(env_count = env.len(), "Fetched provider environment"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .message(format!( + "Fetched provider environment [env_count:{}]", + env.len() + )) + .build() + ); env } Err(e) => { - warn!(error = %e, "Failed to fetch provider environment, continuing without"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "degraded") + .message(format!( + "Failed to fetch provider environment, continuing without: {e}" + )) + .build() + ); std::collections::HashMap::new() } } @@ -228,22 +300,41 @@ pub async fn run_sandbox( let upstream_config = build_upstream_client_config(); let cert_cache = CertCache::new(ca); let state = Arc::new(ProxyTlsState::new(cert_cache, upstream_config)); - info!("TLS termination enabled: ephemeral CA generated"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enabled") + .message("TLS termination enabled: ephemeral CA generated") + .build() + ); (Some(state), Some(paths)) } Err(e) => { - tracing::warn!( - error = %e, - "Failed to write CA files, TLS termination disabled" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Disabled, "disabled") + .message(format!( + "Failed to write CA files, TLS termination disabled: {e}" + )) + .build() ); (None, None) } } } Err(e) => { - tracing::warn!( - error = %e, - "Failed to generate ephemeral CA, TLS termination disabled" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Disabled, "disabled") + .message(format!( + "Failed to generate ephemeral CA, TLS termination disabled: {e}" + )) + .build() ); (None, None) } @@ -269,9 +360,15 @@ pub async fn run_sandbox( .and_then(|p| p.http_addr) .map_or(3128, |addr| addr.port()); if let Err(e) = ns.install_bypass_rules(proxy_port) { - warn!( - error = %e, - "Failed to install bypass detection rules (non-fatal)" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Disabled, "degraded") + .message(format!( + "Failed to install bypass detection rules (non-fatal): {e}" + )) + .build() ); } Some(ns) @@ -514,7 +611,14 @@ pub async fn run_sandbox( ) .await { - tracing::error!(error = %err, "SSH server failed"); + ocsf_emit!( + AppLifecycleBuilder::new(ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Critical) + .status(StatusId::Failure) + .message(format!("SSH server failed: {err}")) + .build() + ); } }); @@ -523,7 +627,14 @@ pub async fn run_sandbox( // SSH server startup when Kubernetes marks the pod Ready. match timeout(Duration::from_secs(10), ssh_ready_rx).await { Ok(Ok(Ok(()))) => { - info!("SSH server is ready to accept connections"); + ocsf_emit!( + AppLifecycleBuilder::new(ocsf_ctx()) + .activity(ActivityId::Open) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .message("SSH server is ready to accept connections") + .build() + ); } Ok(Ok(Err(err))) => { return Err(err.context("SSH server failed during startup")); @@ -566,7 +677,18 @@ pub async fn run_sandbox( // Store the entrypoint PID so the proxy can resolve TCP peer identity entrypoint_pid.store(handle.pid(), Ordering::Release); - info!(pid = handle.pid(), "Process started"); + ocsf_emit!( + ProcessActivityBuilder::new(ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .launch_type(LaunchTypeId::Spawn) + .process(OcsfProcess::new(program, i64::from(handle.pid()))) + .message(format!("Process started: pid={}", handle.pid())) + .build() + ); // Spawn background policy poll task (gRPC mode only). if let (Some(id), Some(endpoint), Some(engine)) = @@ -575,17 +697,30 @@ pub async fn run_sandbox( let poll_id = id.clone(); let poll_endpoint = endpoint.clone(); let poll_engine = engine.clone(); + let poll_ocsf_enabled = ocsf_enabled.clone(); let poll_interval_secs: u64 = std::env::var("OPENSHELL_POLICY_POLL_INTERVAL_SECS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(10); tokio::spawn(async move { - if let Err(e) = - run_policy_poll_loop(&poll_endpoint, &poll_id, &poll_engine, poll_interval_secs) - .await + if let Err(e) = run_policy_poll_loop( + &poll_endpoint, + &poll_id, + &poll_engine, + poll_interval_secs, + &poll_ocsf_enabled, + ) + .await { - warn!(error = %e, "Policy poll loop exited with error"); + ocsf_emit!( + AppLifecycleBuilder::new(ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .message(format!("Policy poll loop exited with error: {e}")) + .build() + ); } }); @@ -625,7 +760,16 @@ pub async fn run_sandbox( if let Ok(result) = timeout(Duration::from_secs(timeout_secs), handle.wait()).await { result } else { - error!("Process timed out, killing"); + ocsf_emit!( + ProcessActivityBuilder::new(ocsf_ctx()) + .activity(ActivityId::Close) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Critical) + .status(StatusId::Failure) + .message("Process timed out, killing") + .build() + ); handle.kill()?; return Ok(124); // Standard timeout exit code } @@ -635,7 +779,17 @@ pub async fn run_sandbox( let status = result.into_diagnostic()?; - info!(exit_code = status.code(), "Process exited"); + ocsf_emit!( + ProcessActivityBuilder::new(ocsf_ctx()) + .activity(ActivityId::Close) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .exit_code(status.code()) + .message(format!("Process exited with code {}", status.code())) + .build() + ); Ok(status.code()) } @@ -672,12 +826,25 @@ async fn build_inference_context( // Standalone mode: load routes from file (fail-fast on errors) if sandbox_id.is_some() { - info!( - inference_routes = %path, - "Inference routes file takes precedence over cluster bundle" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .unmapped("inference_routes", serde_json::json!(path)) + .message(format!( + "Inference routes file takes precedence over cluster bundle [path:{path}]" + )) + .build()); } - info!(inference_routes = %path, "Loading inference routes from file"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Other, "loading") + .unmapped("inference_routes", serde_json::json!(path)) + .message(format!("Loading inference routes from file [path:{path}]")) + .build() + ); let config = RouterConfig::load_from_file(std::path::Path::new(path)) .map_err(|e| miette::miette!("failed to load inference routes {path}: {e}"))?; config @@ -694,10 +861,19 @@ async fn build_inference_context( match grpc_client::fetch_inference_bundle(endpoint).await { Ok(bundle) => { initial_revision = Some(bundle.revision.clone()); - info!( - route_count = bundle.routes.len(), - revision = %bundle.revision, - "Loaded inference route bundle" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .unmapped("route_count", serde_json::json!(bundle.routes.len())) + .unmapped("revision", serde_json::json!(&bundle.revision)) + .message(format!( + "Loaded inference route bundle [route_count:{} revision:{}]", + bundle.routes.len(), + bundle.revision + )) + .build() ); bundle_to_resolved_routes(&bundle) } @@ -707,10 +883,28 @@ async fn build_inference_context( // for this sandbox — skip gracefully. Other errors are unexpected. let msg = e.to_string(); if msg.contains("permission denied") || msg.contains("not found") { - info!(error = %e, "Inference bundle unavailable, routing disabled"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Disabled, "disabled") + .unmapped("error", serde_json::json!(e.to_string())) + .message(format!( + "Inference bundle unavailable, routing disabled [error:{e}]" + )) + .build() + ); return Ok(None); } - warn!(error = %e, "Failed to fetch inference bundle, inference routing disabled"); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Disabled, "disabled") + .unmapped("error", serde_json::json!(e.to_string())) + .message(format!( + "Failed to fetch inference bundle, inference routing disabled [error:{e}]" + )) + .build()); return Ok(None); } } @@ -722,17 +916,37 @@ async fn build_inference_context( }; if routes.is_empty() && disable_inference_on_empty_routes(source) { - info!("No usable inference routes, inference routing disabled"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Disabled, "disabled") + .message("No usable inference routes, inference routing disabled") + .build() + ); return Ok(None); } if routes.is_empty() { - info!("Inference route bundle is empty; keeping routing enabled and waiting for refresh"); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Other, "waiting") + .message("Inference route bundle is empty; keeping routing enabled and waiting for refresh") + .build()); } - info!( - route_count = routes.len(), - "Inference routing enabled with local execution" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enabled") + .unmapped("route_count", serde_json::json!(routes.len())) + .message(format!( + "Inference routing enabled with local execution [route_count:{}]", + routes.len() + )) + .build() ); // Partition routes by name into user-facing and system caches. @@ -853,18 +1067,34 @@ pub(crate) fn spawn_route_refresh( let routes = bundle_to_resolved_routes(&bundle); let (user_routes, system_routes) = partition_routes(routes); - info!( - user_route_count = user_routes.len(), - system_route_count = system_routes.len(), - revision = %bundle.revision, - "Inference routes updated" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "updated") + .unmapped("user_route_count", serde_json::json!(user_routes.len())) + .unmapped("system_route_count", serde_json::json!(system_routes.len())) + .unmapped("revision", serde_json::json!(&bundle.revision)) + .message(format!( + "Inference routes updated [user_route_count:{} system_route_count:{} revision:{}]", + user_routes.len(), + system_routes.len(), + bundle.revision + )) + .build()); current_revision = Some(bundle.revision); *user_cache.write().await = user_routes; *system_cache.write().await = system_routes; } Err(e) => { - warn!(error = %e, "Failed to refresh inference route cache, keeping stale routes"); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "stale") + .unmapped("error", serde_json::json!(e.to_string())) + .message(format!( + "Failed to refresh inference route cache, keeping stale routes [error:{e}]" + )) + .build()); } } } @@ -1031,7 +1261,14 @@ fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) } if modified { - info!("Enriched policy with baseline filesystem paths for proxy mode"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enriched") + .message("Enriched policy with baseline filesystem paths for proxy mode") + .build() + ); } modified @@ -1078,7 +1315,14 @@ fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) { } if modified { - info!("Enriched policy with baseline filesystem paths for proxy mode"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enriched") + .message("Enriched policy with baseline filesystem paths for proxy mode") + .build() + ); } } @@ -1167,11 +1411,16 @@ async fn load_policy( ) -> Result<(SandboxPolicy, Option>)> { // File mode: load OPA engine from rego rules + YAML data (dev override) if let (Some(policy_file), Some(data_file)) = (&policy_rules, &policy_data) { - info!( - policy_rules = %policy_file, - policy_data = %data_file, - "Loading OPA policy engine from local files" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Other, "loading") + .unmapped("policy_rules", serde_json::json!(policy_file)) + .unmapped("policy_data", serde_json::json!(data_file)) + .message(format!( + "Loading OPA policy engine from local files [rules:{policy_file} data:{data_file}]" + )) + .build()); let engine = OpaEngine::from_files( std::path::Path::new(policy_file), std::path::Path::new(data_file), @@ -1206,7 +1455,14 @@ async fn load_policy( // No policy configured on the server. Discover from disk or // fall back to the restrictive default, then sync to the // gateway so it becomes the authoritative baseline. - info!("Server returned no policy; attempting local discovery"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Other, "discovery") + .message("Server returned no policy; attempting local discovery") + .build() + ); let mut discovered = discover_policy_from_disk_or_default(); // Enrich before syncing so the gateway baseline includes // baseline paths from the start. @@ -1268,10 +1524,22 @@ fn discover_policy_from_disk_or_default() -> openshell_core::proto::SandboxPolic } let legacy = std::path::Path::new(openshell_policy::LEGACY_CONTAINER_POLICY_PATH); if legacy.exists() { - info!( - legacy_path = %legacy.display(), - new_path = %primary.display(), - "Policy found at legacy path; consider moving to the new path" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .unmapped( + "legacy_path", + serde_json::json!(legacy.display().to_string()) + ) + .unmapped("new_path", serde_json::json!(primary.display().to_string())) + .message(format!( + "Policy found at legacy path; consider moving [legacy_path:{} new_path:{}]", + legacy.display(), + primary.display() + )) + .build() ); return discover_policy_from_path(legacy); } @@ -1287,9 +1555,16 @@ fn discover_policy_from_path(path: &std::path::Path) -> openshell_core::proto::S match std::fs::read_to_string(path) { Ok(yaml) => { - info!( - path = %path.display(), - "Loaded sandbox policy from container disk" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .message(format!( + "Loaded sandbox policy from container disk [path:{}]", + path.display() + )) + .build() ); match parse_sandbox_policy(&yaml) { Ok(policy) => { @@ -1297,29 +1572,56 @@ fn discover_policy_from_path(path: &std::path::Path) -> openshell_core::proto::S if let Err(violations) = validate_sandbox_policy(&policy) { let messages: Vec = violations.iter().map(ToString::to_string).collect(); - warn!( - path = %path.display(), - violations = %messages.join("; "), - "Disk policy contains unsafe content, using restrictive default" - ); + ocsf_emit!(DetectionFindingBuilder::new(ocsf_ctx()) + .activity(ActivityId::Open) + .severity(SeverityId::Medium) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .finding_info( + FindingInfo::new( + "unsafe-disk-policy", + "Unsafe Disk Policy Content", + ) + .with_desc(&format!( + "Disk policy at {} contains unsafe content: {}", + path.display(), + messages.join("; "), + )), + ) + .message(format!( + "Disk policy contains unsafe content, using restrictive default [path:{}]", + path.display() + )) + .build()); return restrictive_default_policy(); } policy } Err(e) => { - warn!( - path = %path.display(), - error = %e, - "Failed to parse disk policy, using restrictive default" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "fallback") + .message(format!( + "Failed to parse disk policy, using restrictive default [path:{} error:{e}]", + path.display() + )) + .build()); restrictive_default_policy() } } } Err(_) => { - info!( - path = %path.display(), - "No policy file on disk, using restrictive default" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "default") + .message(format!( + "No policy file on disk, using restrictive default [path:{}]", + path.display() + )) + .build() ); restrictive_default_policy() } @@ -1341,7 +1643,14 @@ fn validate_sandbox_user(policy: &SandboxPolicy) -> Result<()> { if user_name.is_empty() || user_name == "sandbox" { match User::from_name("sandbox") { Ok(Some(_)) => { - info!("Validated 'sandbox' user exists in image"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "validated") + .message("Validated 'sandbox' user exists in image") + .build() + ); } Ok(None) => { return Err(miette::miette!( @@ -1512,9 +1821,11 @@ async fn run_policy_poll_loop( sandbox_id: &str, opa_engine: &Arc, interval_secs: u64, + ocsf_enabled: &std::sync::atomic::AtomicBool, ) -> Result<()> { use crate::grpc_client::CachedOpenShellClient; use openshell_core::proto::PolicySource; + use std::sync::atomic::Ordering; let client = CachedOpenShellClient::connect(endpoint).await?; let mut current_config_revision: u64 = 0; @@ -1561,19 +1872,28 @@ async fn run_policy_poll_loop( // Log which settings changed. log_setting_changes(¤t_settings, &result.settings); - info!( - old_config_revision = current_config_revision, - new_config_revision = result.config_revision, - policy_changed, - "Settings poll: config change detected" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Other, "detected") + .unmapped("old_config_revision", serde_json::json!(current_config_revision)) + .unmapped("new_config_revision", serde_json::json!(result.config_revision)) + .unmapped("policy_changed", serde_json::json!(policy_changed)) + .message(format!( + "Settings poll: config change detected [old_revision:{current_config_revision} new_revision:{} policy_changed:{policy_changed}]", + result.config_revision + )) + .build()); // Only reload OPA when the policy payload actually changed. if policy_changed { let Some(policy) = result.policy.as_ref() else { - warn!( - "Settings poll: policy hash changed but no policy payload present; skipping reload" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "skipped") + .message("Settings poll: policy hash changed but no policy payload present; skipping reload") + .build()); current_config_revision = result.config_revision; current_policy_hash = result.policy_hash; current_settings = result.settings; @@ -1583,15 +1903,30 @@ async fn run_policy_poll_loop( match opa_engine.reload_from_proto(policy) { Ok(()) => { if result.global_policy_version > 0 { - info!( - policy_hash = %result.policy_hash, - global_version = result.global_policy_version, - "Policy reloaded successfully (global)" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .unmapped("policy_hash", serde_json::json!(&result.policy_hash)) + .unmapped("global_version", serde_json::json!(result.global_policy_version)) + .message(format!( + "Policy reloaded successfully (global) [policy_hash:{} global_version:{}]", + result.policy_hash, + result.global_policy_version + )) + .build()); } else { - info!( - policy_hash = %result.policy_hash, - "Policy reloaded successfully" + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .unmapped("policy_hash", serde_json::json!(&result.policy_hash)) + .message(format!( + "Policy reloaded successfully [policy_hash:{}]", + result.policy_hash + )) + .build() ); } if result.version > 0 && result.policy_source == PolicySource::Sandbox { @@ -1604,11 +1939,17 @@ async fn run_policy_poll_loop( } } Err(e) => { - warn!( - version = result.version, - error = %e, - "Policy reload failed, keeping last-known-good policy" - ); + ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "failed") + .unmapped("version", serde_json::json!(result.version)) + .unmapped("error", serde_json::json!(e.to_string())) + .message(format!( + "Policy reload failed, keeping last-known-good policy [version:{} error:{e}]", + result.version + )) + .build()); if result.version > 0 && result.policy_source == PolicySource::Sandbox { if let Err(report_err) = client .report_policy_status(sandbox_id, result.version, false, &e.to_string()) @@ -1621,12 +1962,39 @@ async fn run_policy_poll_loop( } } + // Apply OCSF logging toggle from the `ocsf_logging_enabled` setting. + let new_ocsf = + extract_bool_setting(&result.settings, "ocsf_logging_enabled").unwrap_or(false); + let prev_ocsf = ocsf_enabled.swap(new_ocsf, Ordering::Relaxed); + if new_ocsf != prev_ocsf { + info!( + ocsf_logging_enabled = new_ocsf, + "OCSF JSONL logging toggled" + ); + } + current_config_revision = result.config_revision; current_policy_hash = result.policy_hash; current_settings = result.settings; } } +/// Extract a bool value from an effective setting, if present. +fn extract_bool_setting( + settings: &std::collections::HashMap, + key: &str, +) -> Option { + use openshell_core::proto::setting_value; + settings + .get(key) + .and_then(|es| es.value.as_ref()) + .and_then(|sv| sv.value.as_ref()) + .and_then(|v| match v { + setting_value::Value::BoolValue(b) => Some(*b), + _ => None, + }) +} + /// Log individual setting changes between two snapshots. fn log_setting_changes( old: &std::collections::HashMap, @@ -1638,17 +2006,46 @@ fn log_setting_changes( Some(old_es) => { let old_val = format_setting_value(old_es); if old_val != new_val { - info!(key, old = %old_val, new = %new_val, "Setting changed"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "updated") + .unmapped("key", serde_json::json!(key)) + .unmapped("old", serde_json::json!(old_val.to_string())) + .unmapped("new", serde_json::json!(new_val.to_string())) + .message(format!( + "Setting changed [key:{key} old:{old_val} new:{new_val}]" + )) + .build() + ); } } None => { - info!(key, value = %new_val, "Setting added"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "enabled") + .unmapped("key", serde_json::json!(key)) + .unmapped("value", serde_json::json!(new_val.to_string())) + .message(format!("Setting added [key:{key} value:{new_val}]")) + .build() + ); } } } for key in old.keys() { if !new.contains_key(key) { - info!(key, "Setting removed"); + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Disabled, "disabled") + .unmapped("key", serde_json::json!(key)) + .message(format!("Setting removed [key:{key}]")) + .build() + ); } } } diff --git a/crates/openshell-sandbox/src/log_push.rs b/crates/openshell-sandbox/src/log_push.rs index 21c272a2..22b02e83 100644 --- a/crates/openshell-sandbox/src/log_push.rs +++ b/crates/openshell-sandbox/src/log_push.rs @@ -48,10 +48,25 @@ impl Layer for LogPushLayer { if *meta.level() > self.max_level { return; } - let mut visitor = LogVisitor::default(); - event.record(&mut visitor); - let (msg, fields) = visitor.into_parts(meta.name()); + // OCSF events carry their payload in a thread-local; extract the + // shorthand representation for the push message. Non-OCSF events + // use the original visitor-based extraction. + let (msg, fields) = if meta.target() == openshell_ocsf::OCSF_TARGET { + if let Some(ocsf_event) = openshell_ocsf::clone_current_event() { + ( + ocsf_event.format_shorthand(), + std::collections::HashMap::new(), + ) + } else { + return; + } + } else { + let mut visitor = LogVisitor::default(); + event.record(&mut visitor); + visitor.into_parts(meta.name()) + }; + let ts = current_time_ms().unwrap_or(0); let log = SandboxLogLine { diff --git a/crates/openshell-sandbox/src/main.rs b/crates/openshell-sandbox/src/main.rs index cdf5f6ff..0b373f27 100644 --- a/crates/openshell-sandbox/src/main.rs +++ b/crates/openshell-sandbox/src/main.rs @@ -3,10 +3,15 @@ //! OpenShell Sandbox - process sandbox and monitor. +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + use clap::Parser; use miette::Result; +use openshell_ocsf::{OcsfJsonlLayer, OcsfShorthandLayer}; use tracing::{info, warn}; use tracing_subscriber::EnvFilter; +use tracing_subscriber::filter::LevelFilter; use tracing_subscriber::{Layer, layer::SubscriberExt, util::SubscriberInitExt}; use openshell_sandbox::run_sandbox; @@ -130,37 +135,60 @@ async fn main() -> Result<()> { let push_layer = log_push_state.as_ref().map(|(layer, _)| layer.clone()); let _log_push_handle = log_push_state.map(|(_, handle)| handle); - // Keep the file guard alive for the entire process. When the guard is - // dropped the non-blocking writer flushes remaining logs. - let _file_guard = if let Some((file_writer, file_guard)) = file_logging { + // Shared flag: the sandbox poll loop toggles this when the + // `ocsf_logging_enabled` setting changes. The JSONL layer checks it + // on each event and short-circuits when false. + let ocsf_enabled = Arc::new(AtomicBool::new(false)); + + // Keep guards alive for the entire process. When a guard is dropped the + // non-blocking writer flushes remaining logs. + let (_file_guard, _jsonl_guard) = if let Some((file_writer, file_guard)) = file_logging { let file_filter = EnvFilter::new("info"); + + // OCSF JSONL file: append-only, created eagerly but gated by the + // enabled flag. The file exists on disk even when OCSF is off (0 bytes). + let jsonl_logging = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open("/var/log/openshell-ocsf.log") + .ok() + .map(|f| { + let (writer, guard) = tracing_appender::non_blocking(f); + let layer = OcsfJsonlLayer::new(writer).with_enabled_flag(ocsf_enabled.clone()); + (layer, guard) + }); + let (jsonl_layer, jsonl_guard) = match jsonl_logging { + Some((layer, guard)) => (Some(layer), Some(guard)), + None => (None, None), + }; + tracing_subscriber::registry() .with( - tracing_subscriber::fmt::layer() - .with_writer(std::io::stdout) + OcsfShorthandLayer::new(std::io::stdout()) + .with_non_ocsf(true) .with_filter(stdout_filter), ) .with( - tracing_subscriber::fmt::layer() - .with_writer(file_writer) - .with_ansi(false) + OcsfShorthandLayer::new(file_writer) + .with_non_ocsf(true) .with_filter(file_filter), ) + .with(jsonl_layer.with_filter(LevelFilter::INFO)) .with(push_layer.clone()) .init(); - Some(file_guard) + (Some(file_guard), jsonl_guard) } else { tracing_subscriber::registry() .with( - tracing_subscriber::fmt::layer() - .with_writer(std::io::stdout) + OcsfShorthandLayer::new(std::io::stdout()) + .with_non_ocsf(true) .with_filter(stdout_filter), ) .with(push_layer) .init(); // Log the warning after the subscriber is initialized warn!("Could not open /var/log for log rotation; using stdout-only logging"); - None + (None, None) }; // Get command - either from CLI args, environment variable, or default to /bin/bash @@ -174,6 +202,9 @@ async fn main() -> Result<()> { }; info!(command = ?command, "Starting sandbox"); + // Note: "Starting sandbox" stays as plain info!() since the OCSF context + // is not yet initialized at this point (run_sandbox hasn't been called). + // The shorthand layer will render it in fallback format. let exit_code = run_sandbox( command, @@ -191,6 +222,7 @@ async fn main() -> Result<()> { args.health_check, args.health_port, args.inference_routes, + ocsf_enabled, ) .await?; diff --git a/crates/openshell-sandbox/src/mechanistic_mapper.rs b/crates/openshell-sandbox/src/mechanistic_mapper.rs index 4fe90d08..95800854 100644 --- a/crates/openshell-sandbox/src/mechanistic_mapper.rs +++ b/crates/openshell-sandbox/src/mechanistic_mapper.rs @@ -449,13 +449,27 @@ async fn resolve_allowed_ips_if_private(host: &str, port: u32) -> Vec { let addrs = match tokio::net::lookup_host(&addr).await { Ok(addrs) => addrs.collect::>(), Err(e) => { - tracing::warn!(host, port, error = %e, "DNS resolution failed for allowed_ips check"); + let event = openshell_ocsf::NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Fail) + .severity(openshell_ocsf::SeverityId::Low) + .dst_endpoint(openshell_ocsf::Endpoint::from_domain(host, port as u16)) + .message(format!("DNS resolution failed for allowed_ips check: {e}")) + .build(); + openshell_ocsf::ocsf_emit!(event); return Vec::new(); } }; if addrs.is_empty() { - tracing::warn!(host, port, "DNS resolution returned no addresses"); + let event = openshell_ocsf::NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Fail) + .severity(openshell_ocsf::SeverityId::Low) + .dst_endpoint(openshell_ocsf::Endpoint::from_domain(host, port as u16)) + .message(format!( + "DNS resolution returned no addresses for {host}:{port}" + )) + .build(); + openshell_ocsf::ocsf_emit!(event); return Vec::new(); } diff --git a/crates/openshell-sandbox/src/opa.rs b/crates/openshell-sandbox/src/opa.rs index f1c0ad29..970c9226 100644 --- a/crates/openshell-sandbox/src/opa.rs +++ b/crates/openshell-sandbox/src/opa.rs @@ -121,7 +121,15 @@ impl OpaEngine { // Validate BEFORE expanding presets let (errors, warnings) = crate::l7::validate_l7_policies(&data); for w in &warnings { - tracing::warn!(warning = %w, "L7 policy validation warning"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Medium) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "validated") + .unmapped("warning", serde_json::json!(w.to_string())) + .message(format!("L7 policy validation warning: {w}")) + .build() + ); } if !errors.is_empty() { return Err(miette::miette!( @@ -520,7 +528,15 @@ fn preprocess_yaml_data(yaml_str: &str) -> Result { // Validate BEFORE expanding presets (catches user errors like rules+access) let (errors, warnings) = crate::l7::validate_l7_policies(&data); for w in &warnings { - tracing::warn!(warning = %w, "L7 policy validation warning"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Medium) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "validated") + .unmapped("warning", serde_json::json!(w.to_string())) + .message(format!("L7 policy validation warning: {w}")) + .build() + ); } if !errors.is_empty() { return Err(miette::miette!( diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index b93d125a..b29682cf 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -20,7 +20,7 @@ use std::os::unix::io::RawFd; use std::path::PathBuf; use std::process::Stdio; use tokio::process::{Child, Command}; -use tracing::{debug, warn}; +use tracing::debug; const SSH_HANDSHAKE_SECRET_ENV: &str = "OPENSHELL_SSH_HANDSHAKE_SECRET"; @@ -325,7 +325,14 @@ impl ProcessHandle { pub fn kill(&mut self) -> Result<()> { // First try SIGTERM if let Err(e) = self.signal(Signal::SIGTERM) { - warn!(error = %e, "Failed to send SIGTERM"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ProcessActivityBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Close) + .severity(openshell_ocsf::SeverityId::Medium) + .status(openshell_ocsf::StatusId::Failure) + .message(format!("Failed to send SIGTERM: {e}")) + .build() + ); } // Give the process a moment to terminate gracefully diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index 9e87450d..fbba63a0 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -10,6 +10,10 @@ use crate::opa::{NetworkAction, OpaEngine}; use crate::policy::ProxyPolicy; use crate::secrets::{SecretResolver, rewrite_header_line}; use miette::{IntoDiagnostic, Result}; +use openshell_ocsf::{ + ActionId, ActivityId, DispositionId, Endpoint, HttpActivityBuilder, HttpRequest, + NetworkActivityBuilder, Process, SeverityId, StatusId, Url as OcsfUrl, ocsf_emit, +}; use std::net::{IpAddr, SocketAddr}; use std::path::PathBuf; use std::sync::Arc; @@ -18,7 +22,7 @@ use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::{TcpListener, TcpStream}; use tokio::sync::mpsc; use tokio::task::JoinHandle; -use tracing::{debug, info, warn}; +use tracing::{debug, warn}; const MAX_HEADER_BYTES: usize = 8192; const INFERENCE_LOCAL_HOST: &str = "inference.local"; @@ -156,7 +160,16 @@ impl ProxyHandle { let listener = TcpListener::bind(http_addr).await.into_diagnostic()?; let local_addr = listener.local_addr().into_diagnostic()?; - info!(addr = %local_addr, "Proxy listening (tcp)"); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Listen) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .dst_endpoint(Endpoint::from_ip(local_addr.ip(), local_addr.port())) + .message("Proxy listening") + .build(); + ocsf_emit!(event); + } let join = tokio::spawn(async move { loop { @@ -175,12 +188,24 @@ impl ProxyHandle { ) .await { - warn!(error = %err, "Proxy connection error"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .message(format!("Proxy connection error: {err}")) + .build(); + ocsf_emit!(event); } }); } Err(err) => { - warn!(error = %err, "Proxy accept error"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .message(format!("Proxy accept error: {err}")) + .build(); + ocsf_emit!(event); break; } } @@ -334,13 +359,23 @@ async fn handle_tcp_connection( ) .await?; if let InferenceOutcome::Denied { reason } = outcome { - info!(action = "deny", reason = %reason, host = INFERENCE_LOCAL_HOST, "Inference interception denied"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, port)) + .message(format!("Inference interception denied: {reason}")) + .status_detail(&reason) + .build(); + ocsf_emit!(event); } return Ok(()); } let peer_addr = client.peer_addr().into_diagnostic()?; - let local_addr = client.local_addr().into_diagnostic()?; + let _local_addr = client.local_addr().into_diagnostic()?; // Evaluate OPA policy with process-identity binding. // Wrapped in spawn_blocking because identity resolution does heavy sync I/O: @@ -402,22 +437,23 @@ async fn handle_tcp_connection( // Allowed connections are logged after the L7 config check (below) // so we can distinguish CONNECT (L4-only) from CONNECT_L7 (L7 follows). if matches!(decision.action, NetworkAction::Deny { .. }) { - info!( - src_addr = %peer_addr.ip(), - src_port = peer_addr.port(), - proxy_addr = %local_addr, - dst_host = %host_lc, - dst_port = port, - binary = %binary_str, - binary_pid = %pid_str, - ancestors = %ancestors_str, - cmdline = %cmdline_str, - action = "deny", - engine = "opa", - policy = "-", - reason = %deny_reason, - "CONNECT", - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint_addr(peer_addr.ip(), peer_addr.port()) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule("-", "opa") + .message(format!("CONNECT denied {host_lc}:{port}")) + .status_detail(&deny_reason) + .build(); + ocsf_emit!(event); emit_denial( &denial_tx, &host_lc, @@ -452,12 +488,27 @@ async fn handle_tcp_connection( .await .into_diagnostic()?, Err(reason) => { - warn!( - dst_host = %host_lc, - dst_port = port, - reason = %reason, - "CONNECT blocked: allowed_ips check failed" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint_addr(peer_addr.ip(), peer_addr.port()) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule("-", "ssrf") + .message(format!( + "CONNECT blocked: allowed_ips check failed for {host_lc}:{port}" + )) + .status_detail(&reason) + .build(); + ocsf_emit!(event); + } emit_denial( &denial_tx, &host_lc, @@ -472,12 +523,27 @@ async fn handle_tcp_connection( } }, Err(reason) => { - warn!( - dst_host = %host_lc, - dst_port = port, - reason = %reason, - "CONNECT blocked: invalid allowed_ips in policy" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint_addr(peer_addr.ip(), peer_addr.port()) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule("-", "ssrf") + .message(format!( + "CONNECT blocked: invalid allowed_ips in policy for {host_lc}:{port}" + )) + .status_detail(&reason) + .build(); + ocsf_emit!(event); + } emit_denial( &denial_tx, &host_lc, @@ -498,12 +564,27 @@ async fn handle_tcp_connection( .await .into_diagnostic()?, Err(reason) => { - warn!( - dst_host = %host_lc, - dst_port = port, - reason = %reason, - "CONNECT blocked: internal address" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint_addr(peer_addr.ip(), peer_addr.port()) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule("-", "ssrf") + .message(format!( + "CONNECT blocked: internal address {host_lc}:{port}" + )) + .status_detail(&reason) + .build(); + ocsf_emit!(event); + } emit_denial( &denial_tx, &host_lc, @@ -536,22 +617,24 @@ async fn handle_tcp_connection( } else { "CONNECT" }; - info!( - src_addr = %peer_addr.ip(), - src_port = peer_addr.port(), - proxy_addr = %local_addr, - dst_host = %host_lc, - dst_port = port, - binary = %binary_str, - binary_pid = %pid_str, - ancestors = %ancestors_str, - cmdline = %cmdline_str, - action = "allow", - engine = "opa", - policy = %policy_str, - reason = "", - "{connect_msg}", - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint_addr(peer_addr.ip(), peer_addr.port()) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule(policy_str, "opa") + .message(format!("{connect_msg} allowed {host_lc}:{port}")) + .build(); + ocsf_emit!(event); + } // Determine effective TLS mode. Check the raw endpoint config for // `tls: skip` independently of L7 config (which requires `protocol`). @@ -616,11 +699,19 @@ async fn handle_tcp_connection( if let Some(ref l7_config) = l7_config { // L7 inspection on terminated TLS traffic. - let tunnel_engine = - opa_engine.clone_engine_for_tunnel().unwrap_or_else(|e| { - warn!(error = %e, "Failed to clone OPA engine for L7, falling back to relay-only"); - regorus::Engine::new() - }); + let tunnel_engine = opa_engine.clone_engine_for_tunnel().unwrap_or_else(|e| { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!( + "Failed to clone OPA engine for L7, falling back to relay-only: {e}" + )) + .build(); + ocsf_emit!(event); + regorus::Engine::new() + }); crate::l7::relay::relay_with_inspection( l7_config, std::sync::Mutex::new(tunnel_engine), @@ -648,20 +739,29 @@ async fn handle_tcp_connection( "TLS connection closed" ); } else { - warn!( - host = %host_lc, - port = port, - error = %e, - "TLS relay error" - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!("TLS relay error: {e}")) + .build(); + ocsf_emit!(event); } } } else { - warn!( - host = %host_lc, - port = port, - "TLS detected but TLS state not configured, falling back to raw tunnel" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!( + "TLS detected but TLS state not configured for {host_lc}:{port}, falling back to raw tunnel" + )) + .build(); + ocsf_emit!(event); + } let _ = tokio::io::copy_bidirectional(&mut client, &mut upstream) .await .into_diagnostic()?; @@ -670,7 +770,16 @@ async fn handle_tcp_connection( // Plaintext HTTP detected. if let Some(ref l7_config) = l7_config { let tunnel_engine = opa_engine.clone_engine_for_tunnel().unwrap_or_else(|e| { - warn!(error = %e, "Failed to clone OPA engine for L7, falling back to relay-only"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!( + "Failed to clone OPA engine for L7, falling back to relay-only: {e}" + )) + .build(); + ocsf_emit!(event); regorus::Engine::new() }); if let Err(e) = crate::l7::relay::relay_with_inspection( @@ -685,7 +794,14 @@ async fn handle_tcp_connection( if is_benign_relay_error(&e) { debug!(host = %host_lc, port = port, error = %e, "L7 connection closed"); } else { - warn!(host = %host_lc, port = port, error = %e, "L7 relay error"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!("L7 relay error: {e}")) + .build(); + ocsf_emit!(event); } } } else { @@ -700,7 +816,14 @@ async fn handle_tcp_connection( if is_benign_relay_error(&e) { debug!(host = %host_lc, port = port, error = %e, "HTTP relay closed"); } else { - warn!(host = %host_lc, port = port, error = %e, "HTTP relay error"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!("HTTP relay error: {e}")) + .build(); + ocsf_emit!(event); } } } @@ -998,13 +1121,21 @@ async fn route_inference_request( if let Some(pattern) = detect_inference_pattern(&request.method, &normalized_path, &ctx.patterns) { - info!( - method = %request.method, - path = %normalized_path, - protocol = %pattern.protocol, - kind = %pattern.kind, - "Intercepted inference request, routing locally" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Allowed) + .disposition(DispositionId::Detected) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, 443)) + .message(format!( + "Intercepted inference request, routing locally: {} {} (protocol={}, kind={})", + request.method, normalized_path, pattern.protocol, pattern.kind + )) + .build(); + ocsf_emit!(event); + } // Strip credential + framing/hop-by-hop headers. let filtered_headers = sanitize_inference_request_headers(&request.headers); @@ -1070,14 +1201,25 @@ async fn route_inference_request( } Ok(Ok(None)) => break, Ok(Err(e)) => { - warn!(error = %e, "error reading upstream response chunk"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, 443)) + .message(format!("error reading upstream response chunk: {e}")) + .build(); + ocsf_emit!(event); break; } Err(_) => { - warn!( - idle_timeout_secs = CHUNK_IDLE_TIMEOUT.as_secs(), - "streaming response chunk idle timeout, closing" - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, 443)) + .message("streaming response chunk idle timeout, closing") + .build(); + ocsf_emit!(event); break; } } @@ -1087,7 +1229,18 @@ async fn route_inference_request( write_all(tls_client, format_chunk_terminator()).await?; } Err(e) => { - warn!(error = %e, "inference endpoint detected but upstream service failed"); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, 443)) + .message(format!( + "inference endpoint detected but upstream service failed: {e}" + )) + .build(); + ocsf_emit!(event); + } let (status, msg) = router_error_to_http(&e); let body = serde_json::json!({"error": msg}); let body_bytes = body.to_string(); @@ -1102,11 +1255,21 @@ async fn route_inference_request( Ok(true) } else { // Not an inference request — deny - info!( - method = %request.method, - path = %normalized_path, - "connection not allowed by policy" - ); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, 443)) + .message(format!( + "connection not allowed by policy: {} {}", + request.method, normalized_path + )) + .build(); + ocsf_emit!(event); + } let body = serde_json::json!({"error": "connection not allowed by policy"}); let body_bytes = body.to_string(); let response = format_http_response( @@ -1222,7 +1385,14 @@ fn query_l7_config( Ok(Some(val)) => crate::l7::parse_l7_config(&val), Ok(None) => None, Err(e) => { - warn!(error = %e, "Failed to query L7 endpoint config"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(host, port)) + .message(format!("Failed to query L7 endpoint config: {e}")) + .build(); + ocsf_emit!(event); None } } @@ -1457,12 +1627,16 @@ fn parse_allowed_ips(raw: &[String]) -> std::result::Result, S match parsed { Ok(n) => { if n.prefix_len() < MIN_SAFE_PREFIX_LEN { - warn!( - cidr = %n, - prefix_len = n.prefix_len(), - "allowed_ips entry has a very broad CIDR (< /{MIN_SAFE_PREFIX_LEN}); \ - this may expose control-plane services on the same network" - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .severity(SeverityId::Medium) + .message(format!( + "allowed_ips entry has a very broad CIDR {n} (/{}) < /{MIN_SAFE_PREFIX_LEN}; \ + this may expose control-plane services on the same network", + n.prefix_len() + )) + .build(); + ocsf_emit!(event); } nets.push(n); } @@ -1505,7 +1679,16 @@ fn query_allowed_ips( match engine.query_allowed_ips(&input) { Ok(ips) => ips, Err(e) => { - warn!(error = %e, "Failed to query allowed_ips from endpoint config"); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(host, port)) + .message(format!( + "Failed to query allowed_ips from endpoint config: {e}" + )) + .build(); + ocsf_emit!(event); vec![] } } @@ -1754,7 +1937,13 @@ async fn handle_forward_proxy( let (scheme, host, port, path) = match parse_proxy_uri(target_uri) { Ok(parsed) => parsed, Err(e) => { - warn!(target_uri = %target_uri, error = %e, "FORWARD parse error"); + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .message(format!("FORWARD parse error for {target_uri}: {e}")) + .build(); + ocsf_emit!(event); respond(client, b"HTTP/1.1 400 Bad Request\r\n\r\n").await?; return Ok(()); } @@ -1763,11 +1952,20 @@ async fn handle_forward_proxy( // 2. Reject HTTPS — must use CONNECT for TLS if scheme == "https" { - info!( - dst_host = %host_lc, - dst_port = port, - "FORWARD rejected: HTTPS requires CONNECT" - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Refuse) + .action(ActionId::Denied) + .disposition(DispositionId::Rejected) + .severity(SeverityId::Informational) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!( + "FORWARD rejected: HTTPS requires CONNECT for {host_lc}:{port}" + )) + .build(); + ocsf_emit!(event); + } respond( client, b"HTTP/1.1 400 Bad Request\r\nContent-Length: 27\r\n\r\nUse CONNECT for HTTPS URLs", @@ -1778,7 +1976,7 @@ async fn handle_forward_proxy( // 3. Evaluate OPA policy (same identity binding as CONNECT) let peer_addr = client.peer_addr().into_diagnostic()?; - let local_addr = client.local_addr().into_diagnostic()?; + let _local_addr = client.local_addr().into_diagnostic()?; let opa_clone = opa_engine.clone(); let cache_clone = identity_cache.clone(); @@ -1830,24 +2028,28 @@ async fn handle_forward_proxy( let matched_policy = match &decision.action { NetworkAction::Allow { matched_policy } => matched_policy.clone(), NetworkAction::Deny { reason } => { - info!( - src_addr = %peer_addr.ip(), - src_port = peer_addr.port(), - proxy_addr = %local_addr, - dst_host = %host_lc, - dst_port = port, - method = %method, - path = %path, - binary = %binary_str, - binary_pid = %pid_str, - ancestors = %ancestors_str, - cmdline = %cmdline_str, - action = "deny", - engine = "opa", - policy = "-", - reason = %reason, - "FORWARD", - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule("-", "opa") + .message(format!("FORWARD denied {method} {host_lc}:{port}{path}")) + .build(); + ocsf_emit!(event); + } emit_denial_simple( denial_tx, &host_lc, @@ -1868,10 +2070,16 @@ async fn handle_forward_proxy( // connection (Connection: close), so a single evaluation suffices. if let Some(l7_config) = query_l7_config(&opa_engine, &decision, &host_lc, port) { let tunnel_engine = opa_engine.clone_engine_for_tunnel().unwrap_or_else(|e| { - warn!( - error = %e, - "Failed to clone OPA engine for forward L7" - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!( + "Failed to clone OPA engine for forward L7: {e}" + )) + .build(); + ocsf_emit!(event); regorus::Engine::new() }); let engine_mutex = std::sync::Mutex::new(tunnel_engine); @@ -1909,10 +2117,14 @@ async fn handle_forward_proxy( let (allowed, reason) = crate::l7::relay::evaluate_l7_request(&engine_mutex, &l7_ctx, &request_info) .unwrap_or_else(|e| { - warn!( - error = %e, - "L7 eval failed, denying request" - ); + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!("L7 eval failed, denying request: {e}")) + .build(); + ocsf_emit!(event); (false, format!("L7 evaluation error: {e}")) }); @@ -1922,18 +2134,35 @@ async fn handle_forward_proxy( (false, crate::l7::EnforcementMode::Enforce) => "deny", }; - info!( - dst_host = %host_lc, - dst_port = port, - method = %method, - path = %path, - binary = %binary_str, - policy = %policy_str, - l7_protocol = "rest", - l7_decision = decision_str, - l7_deny_reason = %reason, - "FORWARD_L7", - ); + { + let (action_id, disposition_id) = match decision_str { + "allow" => (ActionId::Allowed, DispositionId::Allowed), + "deny" => (ActionId::Denied, DispositionId::Blocked), + "audit" => (ActionId::Allowed, DispositionId::Allowed), + _ => (ActionId::Other, DispositionId::Other), + }; + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(action_id) + .disposition(disposition_id) + .severity(SeverityId::Informational) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule(policy_str, "l7") + .message(format!( + "FORWARD_L7 {decision_str} {method} {host_lc}:{port}{path} reason={reason}" + )) + .build(); + ocsf_emit!(event); + } let effectively_denied = !allowed && l7_config.enforcement == crate::l7::EnforcementMode::Enforce; @@ -1970,12 +2199,30 @@ async fn handle_forward_proxy( Ok(nets) => match resolve_and_check_allowed_ips(&host, port, &nets).await { Ok(addrs) => addrs, Err(reason) => { - warn!( - dst_host = %host_lc, - dst_port = port, - reason = %reason, - "FORWARD blocked: allowed_ips check failed" - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule(policy_str, "ssrf") + .message(format!( + "FORWARD blocked: allowed_ips check failed for {host_lc}:{port}: {reason}" + )) + .build(); + ocsf_emit!(event); + } emit_denial_simple( denial_tx, &host_lc, @@ -1990,12 +2237,30 @@ async fn handle_forward_proxy( } }, Err(reason) => { - warn!( - dst_host = %host_lc, - dst_port = port, - reason = %reason, - "FORWARD blocked: invalid allowed_ips in policy" - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule(policy_str, "ssrf") + .message(format!( + "FORWARD blocked: invalid allowed_ips in policy for {host_lc}:{port}: {reason}" + )) + .build(); + ocsf_emit!(event); + } emit_denial_simple( denial_tx, &host_lc, @@ -2014,12 +2279,30 @@ async fn handle_forward_proxy( match resolve_and_reject_internal(&host, port).await { Ok(addrs) => addrs, Err(reason) => { - warn!( - dst_host = %host_lc, - dst_port = port, - reason = %reason, - "FORWARD blocked: internal IP without allowed_ips" - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule(policy_str, "ssrf") + .message(format!( + "FORWARD blocked: internal IP without allowed_ips for {host_lc}:{port}: {reason}" + )) + .build(); + ocsf_emit!(event); + } emit_denial_simple( denial_tx, &host_lc, @@ -2039,36 +2322,53 @@ async fn handle_forward_proxy( let mut upstream = match TcpStream::connect(addrs.as_slice()).await { Ok(s) => s, Err(e) => { - warn!( - dst_host = %host_lc, - dst_port = port, - error = %e, - "FORWARD upstream connect failed" - ); + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .message(format!( + "FORWARD upstream connect failed for {host_lc}:{port}: {e}" + )) + .build(); + ocsf_emit!(event); respond(client, b"HTTP/1.1 502 Bad Gateway\r\n\r\n").await?; return Ok(()); } }; // Log success - info!( - src_addr = %peer_addr.ip(), - src_port = peer_addr.port(), - proxy_addr = %local_addr, - dst_host = %host_lc, - dst_port = port, - method = %method, - path = %path, - binary = %binary_str, - binary_pid = %pid_str, - ancestors = %ancestors_str, - cmdline = %cmdline_str, - action = "allow", - engine = "opa", - policy = %policy_str, - reason = "", - "FORWARD", - ); + { + let event = HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .http_request(HttpRequest::new( + method, + OcsfUrl::new("http", &host_lc, &path, port), + )) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .src_endpoint(Endpoint::from_ip(peer_addr.ip(), peer_addr.port())) + .actor_process( + Process::from_bypass(&binary_str, &pid_str, &ancestors_str) + .with_cmd_line(&cmdline_str), + ) + .firewall_rule(policy_str, "opa") + .message(format!("FORWARD allowed {method} {host_lc}:{port}{path}")) + .build(); + ocsf_emit!(event); + } // 9. Rewrite request and forward to upstream let rewritten = match rewrite_forward_request(buf, used, &path, secret_resolver.as_deref()) { diff --git a/crates/openshell-sandbox/src/sandbox/linux/landlock.rs b/crates/openshell-sandbox/src/sandbox/linux/landlock.rs index abb91fd4..1f168cc2 100644 --- a/crates/openshell-sandbox/src/sandbox/linux/landlock.rs +++ b/crates/openshell-sandbox/src/sandbox/linux/landlock.rs @@ -10,7 +10,7 @@ use landlock::{ }; use miette::{IntoDiagnostic, Result}; use std::path::{Path, PathBuf}; -use tracing::{debug, info, warn}; +use tracing::debug; pub fn apply(policy: &SandboxPolicy, workdir: Option<&str>) -> Result<()> { let read_only = policy.filesystem.read_only.clone(); @@ -31,12 +31,18 @@ pub fn apply(policy: &SandboxPolicy, workdir: Option<&str>) -> Result<()> { let total_paths = read_only.len() + read_write.len(); let abi = ABI::V2; - info!( - abi = ?abi, - compatibility = ?policy.landlock.compatibility, - read_only_paths = read_only.len(), - read_write_paths = read_write.len(), - "Applying Landlock filesystem sandbox" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "applying") + .message(format!( + "Applying Landlock filesystem sandbox [abi:{abi:?} compat:{:?} ro:{} rw:{}]", + policy.landlock.compatibility, + read_only.len(), + read_write.len(), + )) + .build() ); let compatibility = &policy.landlock.compatibility; @@ -83,9 +89,15 @@ pub fn apply(policy: &SandboxPolicy, workdir: Option<&str>) -> Result<()> { } let skipped = total_paths - rules_applied; - info!( - rules_applied, - skipped, "Landlock ruleset built successfully" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "built") + .message(format!( + "Landlock ruleset built [rules_applied:{rules_applied} skipped:{skipped}]" + )) + .build() ); ruleset.restrict_self().into_diagnostic()?; @@ -94,10 +106,24 @@ pub fn apply(policy: &SandboxPolicy, workdir: Option<&str>) -> Result<()> { if let Err(err) = result { if matches!(compatibility, LandlockCompatibility::BestEffort) { - warn!( - error = %err, - "Landlock filesystem sandbox is UNAVAILABLE — running WITHOUT filesystem restrictions. \ - Set landlock.compatibility to 'hard_requirement' to make this a fatal error." + openshell_ocsf::ocsf_emit!( + openshell_ocsf::DetectionFindingBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Open) + .severity(openshell_ocsf::SeverityId::High) + .confidence(openshell_ocsf::ConfidenceId::High) + .is_alert(true) + .finding_info( + openshell_ocsf::FindingInfo::new( + "landlock-unavailable", + "Landlock Filesystem Sandbox Unavailable", + ) + .with_desc(&format!( + "Running WITHOUT filesystem restrictions: {err}. \ + Set landlock.compatibility to 'hard_requirement' to make this fatal." + )), + ) + .message(format!("Landlock filesystem sandbox unavailable: {err}")) + .build() ); return Ok(()); } diff --git a/crates/openshell-sandbox/src/sandbox/linux/netns.rs b/crates/openshell-sandbox/src/sandbox/linux/netns.rs index 095ed86c..b1f4aeed 100644 --- a/crates/openshell-sandbox/src/sandbox/linux/netns.rs +++ b/crates/openshell-sandbox/src/sandbox/linux/netns.rs @@ -62,11 +62,15 @@ impl NetworkNamespace { .parse() .unwrap(); - info!( - namespace = %name, - host_veth = %veth_host, - sandbox_veth = %veth_sandbox, - "Creating network namespace" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "creating") + .message(format!( + "Creating network namespace [ns:{name} host_veth:{veth_host} sandbox_veth:{veth_sandbox}]" + )) + .build() ); // Create the namespace @@ -152,11 +156,15 @@ impl NetworkNamespace { } }; - info!( - namespace = %name, - host_ip = %host_ip, - sandbox_ip = %sandbox_ip, - "Network namespace created" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "created") + .message(format!( + "Network namespace created [ns:{name} host_ip:{host_ip} sandbox_ip:{sandbox_ip}]" + )) + .build() ); Ok(Self { @@ -246,12 +254,17 @@ impl NetworkNamespace { let iptables_path = match find_iptables() { Some(path) => path, None => { - warn!( - namespace = %self.name, - search_paths = ?IPTABLES_SEARCH_PATHS, - "iptables not found; bypass detection rules will not be installed. \ - Install the iptables package for proxy bypass diagnostics." - ); + openshell_ocsf::ocsf_emit!(openshell_ocsf::ConfigStateChangeBuilder::new( + crate::ocsf_ctx() + ) + .severity(openshell_ocsf::SeverityId::Medium) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Disabled, "degraded") + .message(format!( + "iptables not found; bypass detection rules will not be installed [ns:{}]", + self.name + )) + .build()); return Ok(()); } }; @@ -260,12 +273,8 @@ impl NetworkNamespace { let proxy_port_str = proxy_port.to_string(); let log_prefix = format!("openshell:bypass:{}:", &self.name); - info!( - namespace = %self.name, - iptables = %iptables_path, - proxy_addr = %format!("{}:{}", host_ip_str, proxy_port), - "Installing bypass detection rules" - ); + // "Installing bypass detection rules" is a transient step — skip OCSF. + // The completion event below covers the outcome. // Install IPv4 rules if let Err(e) = self.install_bypass_rules_for( @@ -274,11 +283,17 @@ impl NetworkNamespace { &proxy_port_str, &log_prefix, ) { - warn!( - namespace = %self.name, - error = %e, - "Failed to install IPv4 bypass detection rules" - ); + openshell_ocsf::ocsf_emit!(openshell_ocsf::ConfigStateChangeBuilder::new( + crate::ocsf_ctx() + ) + .severity(openshell_ocsf::SeverityId::Medium) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Disabled, "failed") + .message(format!( + "Failed to install IPv4 bypass detection rules [ns:{}]: {e}", + self.name + )) + .build()); return Err(e); } @@ -286,17 +301,30 @@ impl NetworkNamespace { // Skip the proxy ACCEPT rule for IPv6 since the proxy address is IPv4. if let Some(ip6_path) = find_ip6tables(&iptables_path) { if let Err(e) = self.install_bypass_rules_for_v6(&ip6_path, &log_prefix) { - warn!( - namespace = %self.name, - error = %e, - "Failed to install IPv6 bypass detection rules (non-fatal)" - ); + openshell_ocsf::ocsf_emit!(openshell_ocsf::ConfigStateChangeBuilder::new( + crate::ocsf_ctx() + ) + .severity(openshell_ocsf::SeverityId::Low) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Other, "degraded") + .message(format!( + "Failed to install IPv6 bypass detection rules (non-fatal) [ns:{}]: {e}", + self.name + )) + .build()); } } - info!( - namespace = %self.name, - "Bypass detection rules installed" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Enabled, "installed") + .message(format!( + "Bypass detection rules installed [ns:{}]", + self.name + )) + .build() ); Ok(()) @@ -588,7 +616,14 @@ impl Drop for NetworkNamespace { ); } - info!(namespace = %self.name, "Network namespace cleaned up"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Informational) + .status(openshell_ocsf::StatusId::Success) + .state(openshell_ocsf::StateId::Disabled, "cleaned_up") + .message(format!("Network namespace cleaned up [ns:{}]", self.name)) + .build() + ); } } diff --git a/crates/openshell-sandbox/src/sandbox/mod.rs b/crates/openshell-sandbox/src/sandbox/mod.rs index f512a8e3..f7b03733 100644 --- a/crates/openshell-sandbox/src/sandbox/mod.rs +++ b/crates/openshell-sandbox/src/sandbox/mod.rs @@ -5,8 +5,6 @@ use crate::policy::SandboxPolicy; use miette::Result; -#[cfg(not(target_os = "linux"))] -use tracing::warn; #[cfg(target_os = "linux")] pub mod linux; @@ -26,7 +24,17 @@ pub fn apply(policy: &SandboxPolicy, workdir: Option<&str>) -> Result<()> { #[cfg(not(target_os = "linux"))] { let _ = (policy, workdir); - warn!("Sandbox policy provided but platform sandboxing is not yet implemented"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::DetectionFindingBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Open) + .severity(openshell_ocsf::SeverityId::Medium) + .finding_info(openshell_ocsf::FindingInfo::new( + "platform-sandbox-unavailable", + "Platform Sandboxing Not Implemented", + ).with_desc("Sandbox policy provided but platform sandboxing is not yet implemented on this OS")) + .message("Platform sandboxing not yet implemented") + .build() + ); Ok(()) } } diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index e3add887..a8fdb299 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -12,7 +12,12 @@ use crate::{register_managed_child, unregister_managed_child}; use miette::{IntoDiagnostic, Result}; use nix::pty::{Winsize, openpty}; use nix::unistd::setsid; +use openshell_ocsf::{ + ActionId, ActivityId, AuthTypeId, ConfidenceId, DetectionFindingBuilder, DispositionId, + FindingInfo, SeverityId, SshActivityBuilder, StatusId, ocsf_emit, +}; use rand_core::OsRng; +use tracing::warn; use russh::keys::{Algorithm, PrivateKey}; use russh::server::{Auth, Handle, Session}; use russh::{ChannelId, CryptoVec}; @@ -26,7 +31,6 @@ use std::sync::{Arc, Mutex, mpsc}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::TcpListener; -use tracing::{info, warn}; const PREFACE_MAGIC: &str = "NSSH1"; #[cfg(test)] @@ -60,7 +64,14 @@ async fn ssh_server_init( let config = Arc::new(config); let ca_paths = ca_file_paths.as_ref().map(|p| Arc::new(p.clone())); let listener = TcpListener::bind(listen_addr).await.into_diagnostic()?; - info!(addr = %listen_addr, "SSH server listening"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Listen) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .message(format!("SSH server listening on {listen_addr}")) + .build() + ); Ok((listener, config, ca_paths)) } @@ -139,7 +150,14 @@ pub async fn run_ssh_server( ) .await { - warn!(error = %err, "SSH connection failed"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .message(format!("SSH connection failed: {err}")) + .build() + ); } }); } @@ -160,17 +178,59 @@ async fn handle_connection( provider_env: HashMap, nonce_cache: &NonceCache, ) -> Result<()> { - info!(peer = %peer, "SSH connection: reading handshake preface"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .severity(SeverityId::Informational) + .src_endpoint_addr(peer.ip(), peer.port()) + .message(format!( + "SSH connection: reading handshake preface from {peer}" + )) + .build() + ); let mut line = String::new(); read_line(&mut stream, &mut line).await?; - info!(peer = %peer, preface_len = line.len(), "SSH connection: preface received, verifying"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .severity(SeverityId::Informational) + .src_endpoint_addr(peer.ip(), peer.port()) + .message(format!( + "SSH connection: preface received from {peer}, verifying (len={})", + line.len() + )) + .build() + ); if !verify_preface(&line, secret, handshake_skew_secs, nonce_cache)? { - warn!(peer = %peer, "SSH connection: handshake verification failed"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .src_endpoint_addr(peer.ip(), peer.port()) + .message(format!( + "SSH connection: handshake verification failed from {peer}" + )) + .build() + ); let _ = stream.write_all(b"ERR\n").await; return Ok(()); } stream.write_all(b"OK\n").await.into_diagnostic()?; - info!(peer = %peer, "SSH handshake accepted"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .src_endpoint_addr(peer.ip(), peer.port()) + .auth_type(AuthTypeId::Other, "NSSH1") + .message(format!("SSH handshake accepted from {peer}")) + .build() + ); let handler = SshHandler::new( policy, @@ -245,7 +305,31 @@ fn verify_preface( .lock() .map_err(|_| miette::miette!("nonce cache lock poisoned"))?; if cache.contains_key(nonce) { - warn!(nonce = nonce, "NSSH1 nonce replay detected"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::High) + .auth_type(AuthTypeId::Other, "NSSH1") + .message(format!("NSSH1 nonce replay detected: {nonce}")) + .build() + ); + ocsf_emit!( + DetectionFindingBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::High) + .is_alert(true) + .confidence(ConfidenceId::High) + .finding_info(FindingInfo::new( + "nssh1-nonce-replay", + "NSSH1 Nonce Replay Attack" + )) + .evidence("nonce", nonce) + .build() + ); return Ok(false); } cache.insert(nonce.to_string(), Instant::now()); @@ -358,22 +442,30 @@ impl russh::server::Handler for SshHandler { // uses u32 for ports, but valid TCP ports are 0-65535. Without this // check, port 65537 truncates to port 1 (privileged). if port_to_connect > u32::from(u16::MAX) { - warn!( - host = host_to_connect, - port = port_to_connect, - "direct-tcpip rejected: port exceeds valid TCP range (0-65535)" - ); + ocsf_emit!(SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Refuse) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .message(format!( + "direct-tcpip rejected: port {port_to_connect} exceeds valid TCP range for host {host_to_connect}" + )) + .build()); return Ok(false); } // Only allow forwarding to loopback destinations to prevent the // sandbox SSH server from being used as a generic proxy. if !is_loopback_host(host_to_connect) { - warn!( - host = host_to_connect, - port = port_to_connect, - "direct-tcpip rejected: non-loopback destination" - ); + ocsf_emit!(SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Refuse) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .message(format!( + "direct-tcpip rejected: non-loopback destination {host_to_connect}:{port_to_connect}" + )) + .build()); return Ok(false); } @@ -386,7 +478,14 @@ impl russh::server::Handler for SshHandler { let tcp = match connect_in_netns(&addr, netns_fd).await { Ok(stream) => stream, Err(err) => { - warn!(addr = %addr, error = %err, "direct-tcpip: failed to connect"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Low) + .status(StatusId::Failure) + .message(format!("direct-tcpip: failed to connect to {addr}: {err}")) + .build() + ); let _ = channel.close().await; return; } @@ -513,7 +612,15 @@ impl russh::server::Handler for SshHandler { })?; state.input_sender = Some(input_sender); } else { - warn!(subsystem = name, "unsupported subsystem requested"); + ocsf_emit!( + SshActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Refuse) + .action(ActionId::Denied) + .disposition(DispositionId::Rejected) + .severity(SeverityId::Medium) + .message(format!("unsupported subsystem requested: {name}")) + .build() + ); session.channel_failure(channel)?; } Ok(()) diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index d078429d..e7669c26 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -47,6 +47,7 @@ COPY Cargo.toml Cargo.lock ./ COPY crates/openshell-bootstrap/Cargo.toml crates/openshell-bootstrap/Cargo.toml COPY crates/openshell-cli/Cargo.toml crates/openshell-cli/Cargo.toml COPY crates/openshell-core/Cargo.toml crates/openshell-core/Cargo.toml +COPY crates/openshell-ocsf/Cargo.toml crates/openshell-ocsf/Cargo.toml COPY crates/openshell-policy/Cargo.toml crates/openshell-policy/Cargo.toml COPY crates/openshell-providers/Cargo.toml crates/openshell-providers/Cargo.toml COPY crates/openshell-router/Cargo.toml crates/openshell-router/Cargo.toml @@ -60,6 +61,7 @@ RUN mkdir -p \ crates/openshell-bootstrap/src \ crates/openshell-cli/src \ crates/openshell-core/src \ + crates/openshell-ocsf/src \ crates/openshell-policy/src \ crates/openshell-providers/src \ crates/openshell-router/src \ @@ -69,6 +71,7 @@ RUN mkdir -p \ touch crates/openshell-bootstrap/src/lib.rs && \ printf 'fn main() {}\n' > crates/openshell-cli/src/main.rs && \ touch crates/openshell-core/src/lib.rs && \ + touch crates/openshell-ocsf/src/lib.rs && \ touch crates/openshell-policy/src/lib.rs && \ touch crates/openshell-providers/src/lib.rs && \ touch crates/openshell-router/src/lib.rs && \ @@ -129,6 +132,7 @@ FROM rust-deps AS supervisor-workspace ARG OPENSHELL_CARGO_VERSION COPY crates/openshell-core/ crates/openshell-core/ +COPY crates/openshell-ocsf/ crates/openshell-ocsf/ COPY crates/openshell-policy/ crates/openshell-policy/ COPY crates/openshell-router/ crates/openshell-router/ COPY crates/openshell-sandbox/ crates/openshell-sandbox/ diff --git a/deploy/docker/Dockerfile.python-wheels b/deploy/docker/Dockerfile.python-wheels index 000150dd..78d4dc82 100644 --- a/deploy/docker/Dockerfile.python-wheels +++ b/deploy/docker/Dockerfile.python-wheels @@ -40,6 +40,7 @@ RUN . cross-build.sh && install_cross_toolchain && install_sccache && add_rust_t COPY Cargo.toml Cargo.lock ./ COPY crates/openshell-cli/Cargo.toml crates/openshell-cli/Cargo.toml COPY crates/openshell-core/Cargo.toml crates/openshell-core/Cargo.toml +COPY crates/openshell-ocsf/Cargo.toml crates/openshell-ocsf/Cargo.toml COPY crates/openshell-providers/Cargo.toml crates/openshell-providers/Cargo.toml COPY crates/openshell-router/Cargo.toml crates/openshell-router/Cargo.toml COPY crates/openshell-sandbox/Cargo.toml crates/openshell-sandbox/Cargo.toml @@ -49,11 +50,12 @@ COPY crates/openshell-core/build.rs crates/openshell-core/build.rs COPY proto/ proto/ # Create dummy source files to build dependencies. -RUN mkdir -p crates/openshell-cli/src crates/openshell-core/src crates/openshell-providers/src crates/openshell-router/src crates/openshell-sandbox/src crates/openshell-server/src crates/openshell-bootstrap/src && \ +RUN mkdir -p crates/openshell-cli/src crates/openshell-core/src crates/openshell-ocsf/src crates/openshell-providers/src crates/openshell-router/src crates/openshell-sandbox/src crates/openshell-server/src crates/openshell-bootstrap/src && \ echo "fn main() {}" > crates/openshell-cli/src/main.rs && \ echo "fn main() {}" > crates/openshell-sandbox/src/main.rs && \ echo "fn main() {}" > crates/openshell-server/src/main.rs && \ touch crates/openshell-core/src/lib.rs && \ + touch crates/openshell-ocsf/src/lib.rs && \ touch crates/openshell-providers/src/lib.rs && \ touch crates/openshell-router/src/lib.rs && \ touch crates/openshell-bootstrap/src/lib.rs diff --git a/deploy/docker/Dockerfile.python-wheels-macos b/deploy/docker/Dockerfile.python-wheels-macos index 0bf02a49..9b16ff23 100644 --- a/deploy/docker/Dockerfile.python-wheels-macos +++ b/deploy/docker/Dockerfile.python-wheels-macos @@ -48,6 +48,7 @@ ENV CARGO_TARGET_AARCH64_APPLE_DARWIN_AR=aarch64-apple-darwin25.1-ar COPY Cargo.toml Cargo.lock ./ COPY crates/openshell-cli/Cargo.toml crates/openshell-cli/Cargo.toml COPY crates/openshell-core/Cargo.toml crates/openshell-core/Cargo.toml +COPY crates/openshell-ocsf/Cargo.toml crates/openshell-ocsf/Cargo.toml COPY crates/openshell-providers/Cargo.toml crates/openshell-providers/Cargo.toml COPY crates/openshell-router/Cargo.toml crates/openshell-router/Cargo.toml COPY crates/openshell-sandbox/Cargo.toml crates/openshell-sandbox/Cargo.toml @@ -57,11 +58,12 @@ COPY crates/openshell-core/build.rs crates/openshell-core/build.rs COPY proto/ proto/ # Create dummy source files to build dependencies. -RUN mkdir -p crates/openshell-cli/src crates/openshell-core/src crates/openshell-providers/src crates/openshell-router/src crates/openshell-sandbox/src crates/openshell-server/src crates/openshell-bootstrap/src && \ +RUN mkdir -p crates/openshell-cli/src crates/openshell-core/src crates/openshell-ocsf/src crates/openshell-providers/src crates/openshell-router/src crates/openshell-sandbox/src crates/openshell-server/src crates/openshell-bootstrap/src && \ echo "fn main() {}" > crates/openshell-cli/src/main.rs && \ echo "fn main() {}" > crates/openshell-sandbox/src/main.rs && \ echo "fn main() {}" > crates/openshell-server/src/main.rs && \ touch crates/openshell-core/src/lib.rs && \ + touch crates/openshell-ocsf/src/lib.rs && \ touch crates/openshell-providers/src/lib.rs && \ touch crates/openshell-router/src/lib.rs && \ touch crates/openshell-bootstrap/src/lib.rs From 22fc37632b63eaa691b731a5de68cfdf40c936b5 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:52:56 -0700 Subject: [PATCH 02/20] fix(scripts): attach provider to all smoke test phases to avoid rate limits GitHub's unauthenticated API rate limit (60/hour) causes flaky 403s for Phases 1, 2, and 4. Fix by attaching the provider to all sandboxes and upgrading the Phase 1 policy to L7 so credential injection works. Phase 4 (tls:skip) cannot inject credentials by design, so relax the assertion to accept either 200 or 403 from upstream -- both prove the proxy forwarded the request. --- scripts/smoke-test-network-policy.sh | 55 +++++++++++++++++----------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/scripts/smoke-test-network-policy.sh b/scripts/smoke-test-network-policy.sh index 383bde0f..ee5dbfdc 100755 --- a/scripts/smoke-test-network-policy.sh +++ b/scripts/smoke-test-network-policy.sh @@ -19,14 +19,15 @@ # # What it tests: # -# Phase 1 — L4 allow/deny (no L7 rules): -# Creates a sandbox with L4-only policy for api.github.com. -# - curl api.github.com/zen -> should succeed (TLS auto-terminated) +# Phase 1 — L4 allow/deny (credential injection, TLS auto-terminated): +# Creates a sandbox with L4+L7 policy for api.github.com (provider +# attached for authenticated requests). +# - curl api.github.com/zen -> should succeed (authenticated, 200) # - curl httpbin.org -> should be blocked (implicit deny) # # Phase 2 — L7 enforcement (method + path rules): -# Creates a sandbox with read-only L7 enforcement. -# - GET /zen -> should succeed +# Creates a sandbox with read-only L7 enforcement (provider attached). +# - GET /zen -> should succeed (200) # - POST /user/repos -> should be blocked (403) # # Phase 3 — Credential injection: @@ -35,8 +36,8 @@ # (proxy auto-injects GITHUB_TOKEN via TLS MITM) # # Phase 4 — tls: skip escape hatch: -# Creates a sandbox with tls: skip. -# - curl /zen -> should succeed (raw tunnel, no auth needed) +# Creates a sandbox with tls: skip (provider attached but no MITM). +# - curl /zen -> should get response from upstream (raw tunnel) # - curl /user -> should get 401 (no credential injection) # # After all tests, sandboxes are kept alive for log inspection. @@ -47,10 +48,10 @@ # Embedded Policy YAMLs # ============================================================================= # -# POLICY_L4_ONLY (L4 allow api.github.com:443, deny everything else): +# POLICY_L4 (allow api.github.com:443 with credential injection, deny everything else): # network_policies: # github_api: -# endpoints: [{ host: api.github.com, port: 443 }] +# endpoints: [{ host: api.github.com, port: 443, protocol: rest, access: full }] # binaries: [{ path: /usr/bin/curl }] # # POLICY_L7_READONLY (L7 read-only enforcement): @@ -149,6 +150,9 @@ create_sandbox() { # Kill the blocking create process (sandbox stays alive with --keep) kill "$pid" 2>/dev/null || true wait "$pid" 2>/dev/null || true + # Brief settle time — SSH server inside the sandbox may still be + # binding its port even though the status flipped to Ready. + sleep 3 return 0 fi sleep 2 @@ -185,7 +189,7 @@ sandbox_exec() { # Write policies # --------------------------------------------------------------------------- -POLICY_L4=$(write_policy l4-only <<'YAML' +POLICY_L4=$(write_policy l4-allow-deny <<'YAML' version: 1 filesystem_policy: include_workdir: true @@ -202,6 +206,9 @@ network_policies: endpoints: - host: api.github.com port: 443 + protocol: rest + enforcement: enforce + access: full binaries: - { path: /usr/bin/curl } YAML @@ -302,18 +309,18 @@ fi # Phase 1: L4 allow/deny # --------------------------------------------------------------------------- -header "Phase 1: L4 Allow/Deny (no L7 rules, TLS auto-terminated)" +header "Phase 1: L4 Allow/Deny (TLS auto-terminated, credential injection)" SB1="smoke-l4" -if create_sandbox "$SB1"; then +if create_sandbox "$SB1" --provider "$PROVIDER_NAME"; then echo " Setting L4-only policy..." openshell policy set "$SB1" --policy "$POLICY_L4" >/dev/null 2>&1 echo " Waiting for policy propagation (15s)..." sleep 15 - # Test 1: L4 allow + # Test 1: L4 allow (authenticated via credential injection) echo " Running: curl api.github.com/zen" - output=$(sandbox_exec "$SB1" "curl -s -o /dev/null -w '%{http_code}' --max-time 10 https://api.github.com/zen") + output=$(sandbox_exec "$SB1" 'curl -s -o /dev/null -w "%{http_code}" --max-time 10 -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/zen') if [[ "$output" == *"200"* ]]; then pass "L4 allow: curl to api.github.com succeeded (HTTP 200)" else @@ -340,15 +347,15 @@ fi header "Phase 2: L7 Enforcement (read-only, TLS auto-terminated)" SB2="smoke-l7" -if create_sandbox "$SB2"; then +if create_sandbox "$SB2" --provider "$PROVIDER_NAME"; then echo " Setting L7 read-only policy..." openshell policy set "$SB2" --policy "$POLICY_L7_RO" >/dev/null 2>&1 echo " Waiting for policy propagation (15s)..." sleep 15 - # Test 3: L7 allow (GET) + # Test 3: L7 allow (GET, authenticated via credential injection) echo " Running: GET /zen" - output=$(sandbox_exec "$SB2" "curl -s -o /dev/null -w '%{http_code}' --max-time 10 https://api.github.com/zen") + output=$(sandbox_exec "$SB2" 'curl -s -o /dev/null -w "%{http_code}" --max-time 10 -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/zen') if [[ "$output" == *"200"* ]]; then pass "L7 allow: GET /zen succeeded (read-only allows GET)" else @@ -411,13 +418,17 @@ if create_sandbox "$SB4" --provider "$PROVIDER_NAME"; then echo " Waiting for policy propagation (15s)..." sleep 15 - # Test 6: L4 connection succeeds (raw tunnel, /zen needs no auth) - echo " Running: curl /zen (should succeed via raw tunnel)" + # Test 6: L4 connection reaches upstream (raw tunnel, no MITM). + # Without credential injection the request is unauthenticated, so + # GitHub may return 200 or 403 (rate-limited). Either proves the + # proxy forwarded the request — a proxy block would return "000" + # or the sandbox-policy 403 body. + echo " Running: curl /zen (should reach upstream via raw tunnel)" output=$(sandbox_exec "$SB4" "curl -s -o /dev/null -w '%{http_code}' --max-time 10 https://api.github.com/zen" || true) - if [[ "$output" == *"200"* ]]; then - pass "tls: skip: L4 connection succeeded (raw tunnel)" + if [[ "$output" == *"200"* || "$output" == *"403"* ]]; then + pass "tls: skip: request reached upstream (raw tunnel, HTTP $output)" else - fail "tls: skip: expected 200 for /zen" "got: $output" + fail "tls: skip: expected upstream response (200 or 403)" "got: $output" fi # Test 7: Credential injection does NOT work with tls: skip. From 6eb4f6500f763290d14f4d9fb531ffa4a6599ada Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:05:11 -0700 Subject: [PATCH 03/20] fix(ocsf): remove timestamp from shorthand format to avoid double-timestamp The display layer (gateway logs, TUI, sandbox logs CLI) already prepends a timestamp. Having one in the shorthand output too produces redundant double-timestamps like: 15:49:11 sandbox INFO 15:49:11.649 I NET:OPEN ALLOWED ... Now the shorthand is just the severity + structured content: 15:49:11 sandbox INFO I NET:OPEN ALLOWED ... --- ...-preview-pr.yml => docs-preview-build.yml} | 33 +++-- .github/workflows/docs-preview-deploy.yml | 117 ++++++++++++++++++ .github/workflows/slack-new-issue.yml | 66 ++++++++++ crates/openshell-ocsf/src/format/shorthand.rs | 46 +++---- 4 files changed, 223 insertions(+), 39 deletions(-) rename .github/workflows/{docs-preview-pr.yml => docs-preview-build.yml} (58%) create mode 100644 .github/workflows/docs-preview-deploy.yml create mode 100644 .github/workflows/slack-new-issue.yml diff --git a/.github/workflows/docs-preview-pr.yml b/.github/workflows/docs-preview-build.yml similarity index 58% rename from .github/workflows/docs-preview-pr.yml rename to .github/workflows/docs-preview-build.yml index 6c0672ba..be360bd2 100644 --- a/.github/workflows/docs-preview-pr.yml +++ b/.github/workflows/docs-preview-build.yml @@ -1,4 +1,4 @@ -name: Docs PR Preview +name: Docs PR Preview Build on: pull_request: @@ -12,8 +12,6 @@ concurrency: cancel-in-progress: true permissions: - contents: write - pull-requests: write packages: read defaults: @@ -24,7 +22,7 @@ env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} jobs: - preview: + build: runs-on: build-arm64 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -36,6 +34,7 @@ jobs: uses: actions/checkout@v4 - name: Install tools + if: github.event.action != 'closed' run: mise install - name: Build documentation @@ -48,11 +47,23 @@ jobs: find _build -name .doctrees -prune -exec rm -rf {} \; find _build -name .buildinfo -exec rm {} \; - - name: Deploy preview - if: github.event.pull_request.head.repo.full_name == github.repository - uses: rossjrw/pr-preview-action@v1 + - name: Upload docs artifact + if: github.event.action != 'closed' + uses: actions/upload-artifact@v4 + with: + name: docs-preview + path: _build/docs/ + retention-days: 1 + + - name: Save PR metadata + run: | + mkdir -p pr-metadata + echo "${{ github.event.number }}" > pr-metadata/pr-number + echo "${{ github.event.action }}" > pr-metadata/event-action + + - name: Upload PR metadata + uses: actions/upload-artifact@v4 with: - source-dir: ./_build/docs/ - preview-branch: gh-pages - umbrella-dir: pr-preview - action: auto + name: pr-metadata + path: pr-metadata/ + retention-days: 1 diff --git a/.github/workflows/docs-preview-deploy.yml b/.github/workflows/docs-preview-deploy.yml new file mode 100644 index 00000000..d7b4d7af --- /dev/null +++ b/.github/workflows/docs-preview-deploy.yml @@ -0,0 +1,117 @@ +name: Docs PR Preview Deploy + +# Runs after the build workflow completes. Uses workflow_run so the +# GITHUB_TOKEN has write access to the base repo — this is the standard +# pattern for deploying from fork PRs where the pull_request token is +# read-only. +on: + workflow_run: + workflows: ["Docs PR Preview Build"] + types: [completed] + +concurrency: + group: preview-deploy-${{ github.event.workflow_run.head_branch }} + cancel-in-progress: true + +permissions: + contents: write + pull-requests: write + actions: read + +jobs: + deploy: + runs-on: ubuntu-latest + if: github.event.workflow_run.conclusion == 'success' + steps: + - name: Download PR metadata + uses: actions/download-artifact@v4 + with: + name: pr-metadata + path: pr-metadata/ + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Read PR metadata + id: metadata + run: | + echo "pr-number=$(cat pr-metadata/pr-number)" >> "$GITHUB_OUTPUT" + event_action="$(cat pr-metadata/event-action)" + echo "event-action=$event_action" >> "$GITHUB_OUTPUT" + if [[ "$event_action" == "closed" ]]; then + echo "action=remove" >> "$GITHUB_OUTPUT" + else + echo "action=deploy" >> "$GITHUB_OUTPUT" + fi + + - name: Download docs artifact + if: steps.metadata.outputs.action == 'deploy' + uses: actions/download-artifact@v4 + with: + name: docs-preview + path: docs-preview/ + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Deploy preview + if: steps.metadata.outputs.action == 'deploy' + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages + folder: docs-preview/ + target-folder: pr-preview/pr-${{ steps.metadata.outputs.pr-number }} + commit-message: "Deploy preview for PR ${{ steps.metadata.outputs.pr-number }}" + clean: true + + - name: Remove preview + if: steps.metadata.outputs.action == 'remove' + run: | + git fetch origin gh-pages + git worktree add gh-pages-branch origin/gh-pages + cd gh-pages-branch + PREVIEW_DIR="pr-preview/pr-${{ steps.metadata.outputs.pr-number }}" + if [ -d "$PREVIEW_DIR" ]; then + git rm -rf "$PREVIEW_DIR" + git commit -m "Remove preview for PR ${{ steps.metadata.outputs.pr-number }}" + git push origin gh-pages + else + echo "Preview directory $PREVIEW_DIR does not exist, nothing to remove." + fi + + - name: Comment on PR + uses: actions/github-script@v7 + with: + script: | + const prNumber = parseInt('${{ steps.metadata.outputs.pr-number }}'); + const action = '${{ steps.metadata.outputs.action }}'; + const marker = ''; + const repo = context.repo; + + // Find existing preview comment + const comments = await github.rest.issues.listComments({ + ...repo, + issue_number: prNumber, + per_page: 100, + }); + const existing = comments.data.find(c => c.body.includes(marker)); + + let body; + if (action === 'deploy') { + const url = `https://${repo.owner}.github.io/${repo.repo}/pr-preview/pr-${prNumber}/`; + body = `${marker}\n### Docs Preview\n\nPreview deployed to ${url}`; + } else { + body = `${marker}\n### Docs Preview\n\nPreview removed.`; + } + + if (existing) { + await github.rest.issues.updateComment({ + ...repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + ...repo, + issue_number: prNumber, + body, + }); + } diff --git a/.github/workflows/slack-new-issue.yml b/.github/workflows/slack-new-issue.yml new file mode 100644 index 00000000..0fba9258 --- /dev/null +++ b/.github/workflows/slack-new-issue.yml @@ -0,0 +1,66 @@ +name: "Slack: New Issue Alert" + +on: + issues: + types: [opened] + workflow_dispatch: + +permissions: + issues: read + +jobs: + notify: + runs-on: ubuntu-latest + if: github.repository_owner == 'NVIDIA' + steps: + - name: Send Slack notification + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_COMMUNITY_WH }} + EVENT_NAME: ${{ github.event_name }} + REPO: ${{ github.repository }} + RUN_ID: ${{ github.run_id }} + run: | + if [ "$EVENT_NAME" = "workflow_dispatch" ]; then + TITLE="[Smoke Test] Example issue title" + AUTHOR="${{ github.actor }}" + URL="https://github.com/$REPO/actions/runs/$RUN_ID" + NUMBER="0" + LABELS="test" + else + TITLE=$(echo "${{ github.event.issue.title }}" | head -c 150) + AUTHOR="${{ github.event.issue.user.login }}" + URL="${{ github.event.issue.html_url }}" + NUMBER="${{ github.event.issue.number }}" + LABELS=$(echo '${{ toJSON(github.event.issue.labels.*.name) }}' \ + | jq -r 'if length > 0 then join(", ") else "none" end') + fi + + curl -sf "$SLACK_WEBHOOK" \ + -H "Content-Type: application/json" \ + -d "$(jq -n \ + --arg title "$TITLE" \ + --arg author "$AUTHOR" \ + --arg url "$URL" \ + --arg number "$NUMBER" \ + --arg labels "$LABELS" \ + '{ + blocks: [ + { + type: "section", + text: { + type: "mrkdwn", + text: ("*New Issue #" + $number + "*\n<" + $url + "|" + $title + ">") + } + }, + { + type: "context", + elements: [ + { + type: "mrkdwn", + text: ("*Author:* " + $author + " | *Labels:* " + $labels) + } + ] + } + ] + }' + )" diff --git a/crates/openshell-ocsf/src/format/shorthand.rs b/crates/openshell-ocsf/src/format/shorthand.rs index e9c99ab5..f8506b6d 100644 --- a/crates/openshell-ocsf/src/format/shorthand.rs +++ b/crates/openshell-ocsf/src/format/shorthand.rs @@ -43,7 +43,6 @@ impl OcsfEvent { #[must_use] pub fn format_shorthand(&self) -> String { let base = self.base(); - let ts = format_ts(base.time); let sev = severity_char(base.severity.as_u8()); match self { @@ -85,7 +84,7 @@ impl OcsfEvent { format!(" {actor_str} -> {dst}") }; - format!("{ts} {sev} NET:{activity} {action}{arrow}{rule_ctx}") + format!("{sev} NET:{activity} {action}{arrow}{rule_ctx}") } Self::HttpActivity(e) => { @@ -116,7 +115,7 @@ impl OcsfEvent { format!(" {actor_str} -> {method} {url_str}") }; - format!("{ts} {sev} HTTP:{method} {action}{arrow}{rule_ctx}") + format!("{sev} HTTP:{method} {action}{arrow}{rule_ctx}") } Self::SshActivity(e) => { @@ -143,7 +142,7 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("{ts} {sev} SSH:{activity} {action} {peer}{auth_ctx}") + format!("{sev} SSH:{activity} {action} {peer}{auth_ctx}") } Self::ProcessActivity(e) => { @@ -160,7 +159,7 @@ impl OcsfEvent { .map(|c| format!(" [cmd:{c}]")) .unwrap_or_default(); - format!("{ts} {sev} PROC:{activity} {proc_str}{exit_ctx}{cmd_ctx}") + format!("{sev} PROC:{activity} {proc_str}{exit_ctx}{cmd_ctx}") } Self::DetectionFinding(e) => { @@ -173,7 +172,7 @@ impl OcsfEvent { .map(|c| format!(" [confidence:{}]", c.label().to_lowercase())) .unwrap_or_default(); - format!("{ts} {sev} FINDING:{disposition} \"{title}\"{confidence_ctx}") + format!("{sev} FINDING:{disposition} \"{title}\"{confidence_ctx}") } Self::ApplicationLifecycle(e) => { @@ -185,7 +184,7 @@ impl OcsfEvent { .map(|s| s.label().to_lowercase()) .unwrap_or_default(); - format!("{ts} {sev} LIFECYCLE:{activity} {app} {status}") + format!("{sev} LIFECYCLE:{activity} {app} {status}") } Self::DeviceConfigStateChange(e) => { @@ -214,7 +213,7 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("{ts} {sev} CONFIG:{state} {what}{version_ctx}") + format!("{sev} CONFIG:{state} {what}{version_ctx}") } Self::Base(e) => { @@ -240,7 +239,7 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("{ts} {sev} EVENT {message}{unmapped_ctx}") + format!("{sev} EVENT {message}{unmapped_ctx}") } } } @@ -337,7 +336,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 I NET:OPEN ALLOWED python3(42) -> api.example.com:443 [policy:default-egress engine:mechanistic]" + "I NET:OPEN ALLOWED python3(42) -> api.example.com:443 [policy:default-egress engine:mechanistic]" ); } @@ -366,7 +365,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 M NET:REFUSE DENIED node(1234) -> 93.184.216.34:443/tcp [policy:bypass-detect engine:iptables]" + "M NET:REFUSE DENIED node(1234) -> 93.184.216.34:443/tcp [policy:bypass-detect engine:iptables]" ); } @@ -395,7 +394,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 I HTTP:GET ALLOWED curl(88) -> GET https://api.example.com/v1/data [policy:default-egress]" + "I HTTP:GET ALLOWED curl(88) -> GET https://api.example.com/v1/data [policy:default-egress]" ); } @@ -414,10 +413,7 @@ mod tests { }); let shorthand = event.format_shorthand(); - assert_eq!( - shorthand, - "14:00:00.000 I SSH:OPEN ALLOWED 10.42.0.1:48201 [auth:NSSH1]" - ); + assert_eq!(shorthand, "I SSH:OPEN ALLOWED 10.42.0.1:48201 [auth:NSSH1]"); } #[test] @@ -435,7 +431,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 I PROC:LAUNCH python3(42) [cmd:python3 /app/main.py]" + "I PROC:LAUNCH python3(42) [cmd:python3 /app/main.py]" ); } @@ -459,10 +455,7 @@ mod tests { }); let shorthand = event.format_shorthand(); - assert_eq!( - shorthand, - "14:00:00.000 I PROC:TERMINATE python3(42) [exit:0]" - ); + assert_eq!(shorthand, "I PROC:TERMINATE python3(42) [exit:0]"); } #[test] @@ -487,7 +480,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 H FINDING:BLOCKED \"NSSH1 Nonce Replay Attack\" [confidence:high]" + "H FINDING:BLOCKED \"NSSH1 Nonce Replay Attack\" [confidence:high]" ); } @@ -512,10 +505,7 @@ mod tests { }); let shorthand = event.format_shorthand(); - assert_eq!( - shorthand, - "14:00:00.000 I LIFECYCLE:START openshell-sandbox success" - ); + assert_eq!(shorthand, "I LIFECYCLE:START openshell-sandbox success"); } #[test] @@ -536,7 +526,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 I CONFIG:LOADED policy reloaded [version:v3 hash:sha256:abc123def456]" + "I CONFIG:LOADED policy reloaded [version:v3 hash:sha256:abc123def456]" ); } @@ -551,7 +541,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "14:00:00.000 I EVENT Network namespace created [ns:openshell-sandbox-abc123]" + "I EVENT Network namespace created [ns:openshell-sandbox-abc123]" ); } } From c99041b16c3a22ef8cb5780b7e0071b27f2b1229 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:09:34 -0700 Subject: [PATCH 04/20] refactor(ocsf): replace single-char severity with bracketed labels Replace cryptic single-character severity codes (I/L/M/H/C/F) with readable bracketed labels: [LOW], [MED], [HIGH], [CRIT], [FATAL]. Informational severity (the happy-path default) is omitted entirely to keep normal log output clean and avoid redundancy with the tracing-level INFO that the display layer already provides. Before: sandbox INFO I NET:OPEN ALLOWED ... After: sandbox INFO NET:OPEN ALLOWED ... Before: sandbox INFO M NET:OPEN DENIED ... After: sandbox INFO [MED] NET:OPEN DENIED ... --- crates/openshell-ocsf/src/format/shorthand.rs | 67 +++++++++++++------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/crates/openshell-ocsf/src/format/shorthand.rs b/crates/openshell-ocsf/src/format/shorthand.rs index f8506b6d..de260ba5 100644 --- a/crates/openshell-ocsf/src/format/shorthand.rs +++ b/crates/openshell-ocsf/src/format/shorthand.rs @@ -36,6 +36,29 @@ pub fn severity_char(severity_id: u8) -> char { } } +/// Format the severity as a bracketed tag placed after the `CLASS:ACTIVITY`. +/// +/// Placed as a suffix so the class name always starts at column 0, keeping +/// logs vertically scannable: +/// +/// ```text +/// NET:OPEN [INFO] ALLOWED python3(42) -> api.example.com:443 +/// NET:OPEN [MED] DENIED python3(42) -> blocked.com:443 +/// FINDING:BLOCKED [HIGH] "NSSH1 Nonce Replay Attack" +/// ``` +#[must_use] +pub fn severity_tag(severity_id: u8) -> &'static str { + match severity_id { + 1 => "[INFO]", + 2 => "[LOW]", + 3 => "[MED]", + 4 => "[HIGH]", + 5 => "[CRIT]", + 6 => "[FATAL]", + _ => "[INFO]", + } +} + impl OcsfEvent { /// Produce the single-line shorthand for `openshell.log` and gRPC log push. /// @@ -43,7 +66,7 @@ impl OcsfEvent { #[must_use] pub fn format_shorthand(&self) -> String { let base = self.base(); - let sev = severity_char(base.severity.as_u8()); + let sev = severity_tag(base.severity.as_u8()); match self { Self::NetworkActivity(e) => { @@ -84,7 +107,7 @@ impl OcsfEvent { format!(" {actor_str} -> {dst}") }; - format!("{sev} NET:{activity} {action}{arrow}{rule_ctx}") + format!("NET:{activity} {sev} {action}{arrow}{rule_ctx}") } Self::HttpActivity(e) => { @@ -115,7 +138,7 @@ impl OcsfEvent { format!(" {actor_str} -> {method} {url_str}") }; - format!("{sev} HTTP:{method} {action}{arrow}{rule_ctx}") + format!("HTTP:{method} {sev} {action}{arrow}{rule_ctx}") } Self::SshActivity(e) => { @@ -142,7 +165,7 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("{sev} SSH:{activity} {action} {peer}{auth_ctx}") + format!("SSH:{activity} {sev} {action} {peer}{auth_ctx}") } Self::ProcessActivity(e) => { @@ -159,7 +182,7 @@ impl OcsfEvent { .map(|c| format!(" [cmd:{c}]")) .unwrap_or_default(); - format!("{sev} PROC:{activity} {proc_str}{exit_ctx}{cmd_ctx}") + format!("PROC:{activity} {sev} {proc_str}{exit_ctx}{cmd_ctx}") } Self::DetectionFinding(e) => { @@ -172,7 +195,7 @@ impl OcsfEvent { .map(|c| format!(" [confidence:{}]", c.label().to_lowercase())) .unwrap_or_default(); - format!("{sev} FINDING:{disposition} \"{title}\"{confidence_ctx}") + format!("FINDING:{disposition} {sev} \"{title}\"{confidence_ctx}") } Self::ApplicationLifecycle(e) => { @@ -184,7 +207,7 @@ impl OcsfEvent { .map(|s| s.label().to_lowercase()) .unwrap_or_default(); - format!("{sev} LIFECYCLE:{activity} {app} {status}") + format!("LIFECYCLE:{activity} {sev} {app} {status}") } Self::DeviceConfigStateChange(e) => { @@ -213,7 +236,7 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("{sev} CONFIG:{state} {what}{version_ctx}") + format!("CONFIG:{state} {sev} {what}{version_ctx}") } Self::Base(e) => { @@ -239,7 +262,7 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("{sev} EVENT {message}{unmapped_ctx}") + format!("EVENT {sev} {message}{unmapped_ctx}") } } } @@ -336,7 +359,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "I NET:OPEN ALLOWED python3(42) -> api.example.com:443 [policy:default-egress engine:mechanistic]" + "NET:OPEN [INFO] ALLOWED python3(42) -> api.example.com:443 [policy:default-egress engine:mechanistic]" ); } @@ -365,7 +388,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "M NET:REFUSE DENIED node(1234) -> 93.184.216.34:443/tcp [policy:bypass-detect engine:iptables]" + "NET:REFUSE [MED] DENIED node(1234) -> 93.184.216.34:443/tcp [policy:bypass-detect engine:iptables]" ); } @@ -394,7 +417,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "I HTTP:GET ALLOWED curl(88) -> GET https://api.example.com/v1/data [policy:default-egress]" + "HTTP:GET [INFO] ALLOWED curl(88) -> GET https://api.example.com/v1/data [policy:default-egress]" ); } @@ -413,7 +436,10 @@ mod tests { }); let shorthand = event.format_shorthand(); - assert_eq!(shorthand, "I SSH:OPEN ALLOWED 10.42.0.1:48201 [auth:NSSH1]"); + assert_eq!( + shorthand, + "SSH:OPEN [INFO] ALLOWED 10.42.0.1:48201 [auth:NSSH1]" + ); } #[test] @@ -431,7 +457,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "I PROC:LAUNCH python3(42) [cmd:python3 /app/main.py]" + "PROC:LAUNCH [INFO] python3(42) [cmd:python3 /app/main.py]" ); } @@ -455,7 +481,7 @@ mod tests { }); let shorthand = event.format_shorthand(); - assert_eq!(shorthand, "I PROC:TERMINATE python3(42) [exit:0]"); + assert_eq!(shorthand, "PROC:TERMINATE [INFO] python3(42) [exit:0]"); } #[test] @@ -480,7 +506,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "H FINDING:BLOCKED \"NSSH1 Nonce Replay Attack\" [confidence:high]" + "FINDING:BLOCKED [HIGH] \"NSSH1 Nonce Replay Attack\" [confidence:high]" ); } @@ -505,7 +531,10 @@ mod tests { }); let shorthand = event.format_shorthand(); - assert_eq!(shorthand, "I LIFECYCLE:START openshell-sandbox success"); + assert_eq!( + shorthand, + "LIFECYCLE:START [INFO] openshell-sandbox success" + ); } #[test] @@ -526,7 +555,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "I CONFIG:LOADED policy reloaded [version:v3 hash:sha256:abc123def456]" + "CONFIG:LOADED [INFO] policy reloaded [version:v3 hash:sha256:abc123def456]" ); } @@ -541,7 +570,7 @@ mod tests { let shorthand = event.format_shorthand(); assert_eq!( shorthand, - "I EVENT Network namespace created [ns:openshell-sandbox-abc123]" + "EVENT [INFO] Network namespace created [ns:openshell-sandbox-abc123]" ); } } From 445bf21f711c98f22e26c41215ad84f5923c9dd0 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Thu, 26 Mar 2026 10:44:58 -0700 Subject: [PATCH 05/20] feat(sandbox): use OCSF level label for structured events in log push Set the level field to 'OCSF' instead of 'INFO' for OCSF events in the gRPC log push. This visually distinguishes structured OCSF events from plain tracing output in the TUI and CLI sandbox logs: sandbox OCSF NET:OPEN [INFO] ALLOWED python3(42) -> api.example.com:443 sandbox OCSF NET:OPEN [MED] DENIED python3(42) -> blocked.com:443 sandbox INFO Fetching sandbox policy via gRPC --- .../openshell-ocsf/src/tracing_layers/shorthand_layer.rs | 2 +- crates/openshell-sandbox/src/log_push.rs | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs index f8a39f6a..0c254070 100644 --- a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs @@ -53,7 +53,7 @@ where if let Some(ocsf_event) = clone_current_event() { let line = ocsf_event.format_shorthand(); if let Ok(mut w) = self.writer.lock() { - let _ = writeln!(w, "{line}"); + let _ = writeln!(w, "OCSF {line}"); } } } else if self.include_non_ocsf { diff --git a/crates/openshell-sandbox/src/log_push.rs b/crates/openshell-sandbox/src/log_push.rs index 22b02e83..17f9bcc3 100644 --- a/crates/openshell-sandbox/src/log_push.rs +++ b/crates/openshell-sandbox/src/log_push.rs @@ -69,10 +69,16 @@ impl Layer for LogPushLayer { let ts = current_time_ms().unwrap_or(0); + let is_ocsf = meta.target() == openshell_ocsf::OCSF_TARGET; + let log = SandboxLogLine { sandbox_id: self.sandbox_id.clone(), timestamp_ms: ts, - level: meta.level().to_string(), + level: if is_ocsf { + "OCSF".to_string() + } else { + meta.level().to_string() + }, target: meta.target().to_string(), message: msg, source: "sandbox".to_string(), From 6525bab272aca07e82bd59b10cb08c0120343015 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 17:29:59 -0700 Subject: [PATCH 06/20] fix(sandbox): convert new Landlock path-skip warning to OCSF PR #677 added a warn!() for inaccessible Landlock paths in best-effort mode. Convert to ConfigStateChangeBuilder with degraded state so it flows through the OCSF shorthand format consistently. --- crates/openshell-sandbox/src/proxy.rs | 4 +--- .../src/sandbox/linux/landlock.rs | 15 ++++++++----- .../src/sandbox/linux/netns.rs | 22 +++++++++---------- crates/openshell-sandbox/src/ssh.rs | 2 +- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index fbba63a0..f77e156e 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -2075,9 +2075,7 @@ async fn handle_forward_proxy( .severity(SeverityId::Low) .status(StatusId::Failure) .dst_endpoint(Endpoint::from_domain(&host_lc, port)) - .message(format!( - "Failed to clone OPA engine for forward L7: {e}" - )) + .message(format!("Failed to clone OPA engine for forward L7: {e}")) .build(); ocsf_emit!(event); regorus::Engine::new() diff --git a/crates/openshell-sandbox/src/sandbox/linux/landlock.rs b/crates/openshell-sandbox/src/sandbox/linux/landlock.rs index 1f168cc2..4dcc5544 100644 --- a/crates/openshell-sandbox/src/sandbox/linux/landlock.rs +++ b/crates/openshell-sandbox/src/sandbox/linux/landlock.rs @@ -169,11 +169,16 @@ fn try_open_path(path: &Path, compatibility: &LandlockCompatibility) -> Result Date: Tue, 31 Mar 2026 17:33:11 -0700 Subject: [PATCH 07/20] fix(sandbox): use rolling appender for OCSF JSONL file Match the main openshell.log rotation mechanics (daily, 3 files max) instead of a single unbounded append-only file. Prevents disk exhaustion when ocsf_logging_enabled is left on in long-running sandboxes. --- crates/openshell-sandbox/src/main.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/openshell-sandbox/src/main.rs b/crates/openshell-sandbox/src/main.rs index 0b373f27..4b707a5b 100644 --- a/crates/openshell-sandbox/src/main.rs +++ b/crates/openshell-sandbox/src/main.rs @@ -145,15 +145,18 @@ async fn main() -> Result<()> { let (_file_guard, _jsonl_guard) = if let Some((file_writer, file_guard)) = file_logging { let file_filter = EnvFilter::new("info"); - // OCSF JSONL file: append-only, created eagerly but gated by the - // enabled flag. The file exists on disk even when OCSF is off (0 bytes). - let jsonl_logging = std::fs::OpenOptions::new() - .create(true) - .append(true) - .open("/var/log/openshell-ocsf.log") + // OCSF JSONL file: rolling appender matching the main log file + // (daily rotation, 3 files max). Created eagerly but gated by the + // enabled flag — no JSONL is written until ocsf_logging_enabled is set. + let jsonl_logging = tracing_appender::rolling::RollingFileAppender::builder() + .rotation(tracing_appender::rolling::Rotation::DAILY) + .filename_prefix("openshell-ocsf") + .filename_suffix("log") + .max_log_files(3) + .build("/var/log") .ok() - .map(|f| { - let (writer, guard) = tracing_appender::non_blocking(f); + .map(|roller| { + let (writer, guard) = tracing_appender::non_blocking(roller); let layer = OcsfJsonlLayer::new(writer).with_enabled_flag(ocsf_enabled.clone()); (layer, guard) }); From dd69f8f575bec8bf5b4f52de60e9b9df0af35d24 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:26:34 -0700 Subject: [PATCH 08/20] fix(sandbox): address reviewer warnings for OCSF integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit W1: Remove redundant 'OCSF' prefix from shorthand file layer — the class name (NET:OPEN, HTTP:GET) already identifies structured events and the LogPushLayer separately sets the level field. W2: Log a debug message when OCSF_CTX.set() is called a second time instead of silently discarding via let _. W3: Document the boundary between OCSF-migrated events and intentionally plain tracing calls (DEBUG/TRACE, transient, internal plumbing). W4: Migrate remaining iptables LOG rule failure warnings in netns.rs (IPv4 TCP/UDP, IPv6 TCP/UDP) to ConfigStateChangeBuilder for consistency with the IPv4 bypass rule failure already migrated. W5: Migrate malformed inference request warn to NetworkActivity with ActivityId::Refuse and SeverityId::Medium. W6: Use Medium severity for L7 deny decisions (both CONNECT tunnel and FORWARD proxy paths) to match the CONNECT deny severity pattern. Allows and audits remain Informational. --- .../src/tracing_layers/shorthand_layer.rs | 2 +- crates/openshell-sandbox/src/l7/relay.rs | 24 ++++++--- crates/openshell-sandbox/src/lib.rs | 42 +++++++++++---- crates/openshell-sandbox/src/proxy.rs | 40 +++++++++++--- .../src/sandbox/linux/netns.rs | 53 +++++++++++++++---- 5 files changed, 127 insertions(+), 34 deletions(-) diff --git a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs index 0c254070..f8a39f6a 100644 --- a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs @@ -53,7 +53,7 @@ where if let Some(ocsf_event) = clone_current_event() { let line = ocsf_event.format_shorthand(); if let Ok(mut w) = self.writer.lock() { - let _ = writeln!(w, "OCSF {line}"); + let _ = writeln!(w, "{line}"); } } } else if self.include_non_ocsf { diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 280c1dae..5feea2ca 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -203,17 +203,29 @@ where // Log every L7 decision as an OCSF HTTP Activity event. // Uses redacted_target (path only, no query params) to avoid logging secrets. { - let (action_id, disposition_id) = match decision_str { - "allow" => (ActionId::Allowed, DispositionId::Allowed), - "deny" => (ActionId::Denied, DispositionId::Blocked), - "audit" => (ActionId::Allowed, DispositionId::Allowed), - _ => (ActionId::Other, DispositionId::Other), + let (action_id, disposition_id, severity) = match decision_str { + "allow" => ( + ActionId::Allowed, + DispositionId::Allowed, + SeverityId::Informational, + ), + "deny" => (ActionId::Denied, DispositionId::Blocked, SeverityId::Medium), + "audit" => ( + ActionId::Allowed, + DispositionId::Allowed, + SeverityId::Informational, + ), + _ => ( + ActionId::Other, + DispositionId::Other, + SeverityId::Informational, + ), }; let event = HttpActivityBuilder::new(crate::ocsf_ctx()) .activity(ActivityId::Other) .action(action_id) .disposition(disposition_id) - .severity(SeverityId::Informational) + .severity(severity) .http_request(HttpRequest::new( &request_info.action, OcsfUrl::new("http", &ctx.host, &redacted_target, ctx.port), diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 8a75841b..1370fdf5 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -41,6 +41,25 @@ use openshell_ocsf::{ SandboxContext, SeverityId, StateId, StatusId, ocsf_emit, }; +// --------------------------------------------------------------------------- +// OCSF Context +// --------------------------------------------------------------------------- +// +// The following log sites intentionally remain as plain `tracing` macros +// and are NOT migrated to OCSF builders: +// +// - DEBUG/TRACE events (zombie reaping, ip commands, gRPC connects, PTY state) +// - Transient "about to do X" events where the result is logged separately +// (e.g., "Fetching sandbox policy via gRPC", "Creating OPA engine from proto") +// - Internal SSH channel warnings (unknown channel, PTY resize failures) +// - Denial flush telemetry (the individual denials are already OCSF events) +// - Status reporting failures (sync to gateway, non-actionable) +// - Route refresh interval validation warnings +// +// These are operational plumbing that don't represent security decisions, +// policy changes, or observable sandbox behavior worth structuring. +// --------------------------------------------------------------------------- + /// Process-wide OCSF sandbox context. Initialized once during `run_sandbox()` /// startup and accessible from any module in the crate via [`ocsf_ctx()`]. static OCSF_CTX: OnceLock = OnceLock::new(); @@ -209,15 +228,20 @@ pub async fn run_sandbox( .map(|s| s.trim().to_string()) .unwrap_or_else(|_| "openshell-sandbox".to_string()); - let _ = OCSF_CTX.set(SandboxContext { - sandbox_id: sandbox_id.clone().unwrap_or_default(), - sandbox_name: sandbox.as_deref().unwrap_or_default().to_string(), - container_image: std::env::var("OPENSHELL_CONTAINER_IMAGE").unwrap_or_default(), - hostname, - product_version: openshell_core::VERSION.to_string(), - proxy_ip: std::net::IpAddr::from([127, 0, 0, 1]), - proxy_port: 3128, - }); + if OCSF_CTX + .set(SandboxContext { + sandbox_id: sandbox_id.clone().unwrap_or_default(), + sandbox_name: sandbox.as_deref().unwrap_or_default().to_string(), + container_image: std::env::var("OPENSHELL_CONTAINER_IMAGE").unwrap_or_default(), + hostname, + product_version: openshell_core::VERSION.to_string(), + proxy_ip: std::net::IpAddr::from([127, 0, 0, 1]), + proxy_port: 3128, + }) + .is_err() + { + debug!("OCSF context already initialized, keeping existing"); + } } // Load policy and initialize OPA engine diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index f77e156e..2674df94 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -1004,7 +1004,7 @@ const INITIAL_INFERENCE_BUF: usize = 65536; async fn handle_inference_interception( client: TcpStream, host: &str, - _port: u16, + port: u16, tls_state: Option<&Arc>, inference_ctx: Option<&Arc>, ) -> Result { @@ -1094,7 +1094,19 @@ async fn handle_inference_interception( } } ParseResult::Invalid(reason) => { - warn!(reason = %reason, "rejecting malformed inference request"); + { + let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Refuse) + .action(ActionId::Denied) + .disposition(DispositionId::Rejected) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(INFERENCE_LOCAL_HOST, port)) + .message(format!("Rejecting malformed inference request: {reason}")) + .status_detail(&reason) + .build(); + ocsf_emit!(event); + } let response = format_http_response(400, &[], b"Bad Request"); write_all(&mut tls_client, &response).await?; return Ok(InferenceOutcome::Denied { reason }); @@ -2133,17 +2145,29 @@ async fn handle_forward_proxy( }; { - let (action_id, disposition_id) = match decision_str { - "allow" => (ActionId::Allowed, DispositionId::Allowed), - "deny" => (ActionId::Denied, DispositionId::Blocked), - "audit" => (ActionId::Allowed, DispositionId::Allowed), - _ => (ActionId::Other, DispositionId::Other), + let (action_id, disposition_id, severity) = match decision_str { + "allow" => ( + ActionId::Allowed, + DispositionId::Allowed, + SeverityId::Informational, + ), + "deny" => (ActionId::Denied, DispositionId::Blocked, SeverityId::Medium), + "audit" => ( + ActionId::Allowed, + DispositionId::Allowed, + SeverityId::Informational, + ), + _ => ( + ActionId::Other, + DispositionId::Other, + SeverityId::Informational, + ), }; let event = HttpActivityBuilder::new(crate::ocsf_ctx()) .activity(ActivityId::Other) .action(action_id) .disposition(disposition_id) - .severity(SeverityId::Informational) + .severity(severity) .http_request(HttpRequest::new( method, OcsfUrl::new("http", &host_lc, &path, port), diff --git a/crates/openshell-sandbox/src/sandbox/linux/netns.rs b/crates/openshell-sandbox/src/sandbox/linux/netns.rs index b2cb3501..37d11f0c 100644 --- a/crates/openshell-sandbox/src/sandbox/linux/netns.rs +++ b/crates/openshell-sandbox/src/sandbox/linux/netns.rs @@ -403,11 +403,17 @@ impl NetworkNamespace { "--log-uid", ], ) { - warn!( - error = %e, - "Failed to install LOG rule for TCP (xt_LOG module may not be loaded); \ - bypass REJECT rules will still be installed" - ); + openshell_ocsf::ocsf_emit!(openshell_ocsf::ConfigStateChangeBuilder::new( + crate::ocsf_ctx() + ) + .severity(openshell_ocsf::SeverityId::Low) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Other, "degraded") + .message(format!( + "Failed to install LOG rule for TCP (xt_LOG module may not be loaded) [ns:{}]: {e}", + self.name + )) + .build()); } // Rule 5: REJECT TCP bypass attempts (fast-fail) @@ -448,9 +454,16 @@ impl NetworkNamespace { "--log-uid", ], ) { - warn!( - error = %e, - "Failed to install LOG rule for UDP; bypass REJECT rules will still be installed" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Low) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Other, "degraded") + .message(format!( + "Failed to install LOG rule for UDP [ns:{}]: {e}", + self.name + )) + .build() ); } @@ -525,7 +538,17 @@ impl NetworkNamespace { "--log-uid", ], ) { - warn!(error = %e, "Failed to install IPv6 LOG rule for TCP"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Low) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Other, "degraded") + .message(format!( + "Failed to install IPv6 LOG rule for TCP [ns:{}]: {e}", + self.name + )) + .build() + ); } // REJECT TCP bypass attempts @@ -566,7 +589,17 @@ impl NetworkNamespace { "--log-uid", ], ) { - warn!(error = %e, "Failed to install IPv6 LOG rule for UDP"); + openshell_ocsf::ocsf_emit!( + openshell_ocsf::ConfigStateChangeBuilder::new(crate::ocsf_ctx()) + .severity(openshell_ocsf::SeverityId::Low) + .status(openshell_ocsf::StatusId::Failure) + .state(openshell_ocsf::StateId::Other, "degraded") + .message(format!( + "Failed to install IPv6 LOG rule for UDP [ns:{}]: {e}", + self.name + )) + .build() + ); } // REJECT UDP bypass attempts From fcfd69e3563ac00879cb8f77e2d06c4be84187e7 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:37:23 -0700 Subject: [PATCH 09/20] refactor(sandbox): rename ocsf_logging_enabled to ocsf_json_enabled The shorthand logs are already OCSF-structured events. The setting specifically controls the JSONL file export, so the name should reflect that: ocsf_json_enabled. --- crates/openshell-core/src/settings.rs | 6 +++--- .../openshell-ocsf/src/tracing_layers/jsonl_layer.rs | 2 +- crates/openshell-sandbox/src/lib.rs | 10 +++------- crates/openshell-sandbox/src/main.rs | 4 ++-- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/openshell-core/src/settings.rs b/crates/openshell-core/src/settings.rs index e5d58d27..995fe6e2 100644 --- a/crates/openshell-core/src/settings.rs +++ b/crates/openshell-core/src/settings.rs @@ -50,10 +50,10 @@ pub struct RegisteredSetting { /// 5. Add a unit test in this module's `tests` section to cover the new key. pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ // When true the sandbox writes OCSF v1.7.0 JSONL records to - // `/var/log/openshell-ocsf.log` in addition to the human-readable - // shorthand log. Defaults to false (JSONL file is not created). + // `/var/log/openshell-ocsf*.log` (daily rotation, 3 files) in addition + // to the human-readable shorthand log. Defaults to false (no JSONL written). RegisteredSetting { - key: "ocsf_logging_enabled", + key: "ocsf_json_enabled", kind: SettingValueKind::Bool, }, // Test-only keys live behind the `dev-settings` feature flag so they diff --git a/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs b/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs index e8592b7d..1f7022ef 100644 --- a/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/jsonl_layer.rs @@ -21,7 +21,7 @@ use crate::tracing_layers::event_bridge::{OCSF_TARGET, clone_current_event}; /// An optional enabled flag (`Arc`) can be set via /// [`with_enabled_flag`](Self::with_enabled_flag). When the flag is present and /// `false`, the layer short-circuits without writing. This allows the sandbox -/// to hot-toggle OCSF JSONL output at runtime via the `ocsf_logging_enabled` +/// to hot-toggle OCSF JSONL output at runtime via the `ocsf_json_enabled` /// setting without rebuilding the subscriber. pub struct OcsfJsonlLayer { writer: Mutex, diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 1370fdf5..f9e8fb4c 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -1986,15 +1986,11 @@ async fn run_policy_poll_loop( } } - // Apply OCSF logging toggle from the `ocsf_logging_enabled` setting. - let new_ocsf = - extract_bool_setting(&result.settings, "ocsf_logging_enabled").unwrap_or(false); + // Apply OCSF JSON toggle from the `ocsf_json_enabled` setting. + let new_ocsf = extract_bool_setting(&result.settings, "ocsf_json_enabled").unwrap_or(false); let prev_ocsf = ocsf_enabled.swap(new_ocsf, Ordering::Relaxed); if new_ocsf != prev_ocsf { - info!( - ocsf_logging_enabled = new_ocsf, - "OCSF JSONL logging toggled" - ); + info!(ocsf_json_enabled = new_ocsf, "OCSF JSONL logging toggled"); } current_config_revision = result.config_revision; diff --git a/crates/openshell-sandbox/src/main.rs b/crates/openshell-sandbox/src/main.rs index 4b707a5b..a37dce0e 100644 --- a/crates/openshell-sandbox/src/main.rs +++ b/crates/openshell-sandbox/src/main.rs @@ -136,7 +136,7 @@ async fn main() -> Result<()> { let _log_push_handle = log_push_state.map(|(_, handle)| handle); // Shared flag: the sandbox poll loop toggles this when the - // `ocsf_logging_enabled` setting changes. The JSONL layer checks it + // `ocsf_json_enabled` setting changes. The JSONL layer checks it // on each event and short-circuits when false. let ocsf_enabled = Arc::new(AtomicBool::new(false)); @@ -147,7 +147,7 @@ async fn main() -> Result<()> { // OCSF JSONL file: rolling appender matching the main log file // (daily rotation, 3 files max). Created eagerly but gated by the - // enabled flag — no JSONL is written until ocsf_logging_enabled is set. + // enabled flag — no JSONL is written until ocsf_json_enabled is set. let jsonl_logging = tracing_appender::rolling::RollingFileAppender::builder() .rotation(tracing_appender::rolling::Rotation::DAILY) .filename_prefix("openshell-ocsf") From f530cc6a126fe1690547e8e3bc652b20f0e0e1fb Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:28:00 -0700 Subject: [PATCH 10/20] fix(ocsf): add timestamps to shorthand file layer output The OcsfShorthandLayer writes directly to the log file with no outer display layer to supply timestamps. Add a UTC timestamp prefix to every line so the file output matches what tracing::fmt used to provide. Before: CONFIG:VALIDATED [INFO] Validated 'sandbox' user exists in image After: 2026-04-01T15:49:11.649Z CONFIG:VALIDATED [INFO] Validated ... --- .../src/tracing_layers/shorthand_layer.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs index f8a39f6a..b4be3d47 100644 --- a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs @@ -6,6 +6,7 @@ use std::io::Write; use std::sync::Mutex; +use chrono::Utc; use tracing::Subscriber; use tracing_subscriber::Layer; use tracing_subscriber::layer::Context; @@ -16,6 +17,10 @@ use crate::tracing_layers::event_bridge::{OCSF_TARGET, clone_current_event}; /// /// Events with `target: "ocsf"` are formatted via `format_shorthand()`. /// Non-OCSF events are formatted with a simple fallback. +/// +/// Each line is prefixed with a UTC timestamp (`YYYY-MM-DDTHH:MM:SS.mmmZ`) +/// since this layer writes directly to a file with no outer display layer +/// to supply timestamps. pub struct OcsfShorthandLayer { writer: Mutex, /// Whether to include non-OCSF events in the output. @@ -48,12 +53,14 @@ where fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) { let meta = event.metadata(); + let ts = Utc::now().format("%Y-%m-%dT%H:%M:%S%.3fZ"); + if meta.target() == OCSF_TARGET { // This is an OCSF event — clone from thread-local (non-consuming) if let Some(ocsf_event) = clone_current_event() { let line = ocsf_event.format_shorthand(); if let Ok(mut w) = self.writer.lock() { - let _ = writeln!(w, "{line}"); + let _ = writeln!(w, "{ts} {line}"); } } } else if self.include_non_ocsf { @@ -64,7 +71,7 @@ where let mut message = String::new(); event.record(&mut MessageVisitor(&mut message)); if let Ok(mut w) = self.writer.lock() { - let _ = writeln!(w, "{level} {target}: {message}"); + let _ = writeln!(w, "{ts} {level} {target}: {message}"); } } } From c57b1352625e1f723a63d216acd9dfd81aa655c3 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:54:38 -0700 Subject: [PATCH 11/20] fix(docker): touch openshell-ocsf source to invalidate cargo cache The supervisor-workspace stage touches sandbox and core sources to force recompilation over the rust-deps dummy stubs, but openshell-ocsf was missing. This caused the Docker cargo cache to use stale ocsf objects from the deps stage, preventing changes to the ocsf crate (like the timestamp fix) from appearing in the final binary. Also adds a shorthand layer test verifying timestamp output, and drafts the observability docs section. --- .../src/tracing_layers/shorthand_layer.rs | 42 ++++ deploy/docker/Dockerfile.images | 1 + docs/index.md | 20 ++ docs/observability/accessing-logs.md | 104 ++++++++++ docs/observability/index.md | 43 ++++ docs/observability/logging.md | 161 +++++++++++++++ docs/observability/ocsf-json-export.md | 187 ++++++++++++++++++ 7 files changed, 558 insertions(+) create mode 100644 docs/observability/accessing-logs.md create mode 100644 docs/observability/index.md create mode 100644 docs/observability/logging.md create mode 100644 docs/observability/ocsf-json-export.md diff --git a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs index b4be3d47..b28922d7 100644 --- a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs @@ -110,4 +110,46 @@ mod tests { let layer = OcsfShorthandLayer::new(buffer).with_non_ocsf(false); assert!(!layer.include_non_ocsf); } + + #[test] + fn test_non_ocsf_fallback_includes_timestamp() { + use std::sync::Arc; + use tracing_subscriber::layer::SubscriberExt; + use tracing_subscriber::util::SubscriberInitExt; + + let buffer = Arc::new(Mutex::new(Vec::::new())); + let writer = SyncWriter(buffer.clone()); + let layer = OcsfShorthandLayer::new(writer).with_non_ocsf(true); + + let subscriber = tracing_subscriber::registry().with(layer); + let _guard = subscriber.set_default(); + + tracing::info!("test message"); + + let output = buffer.lock().unwrap(); + let line = String::from_utf8_lossy(&output); + // Should start with a timestamp like 2026-04-01T... + assert!( + line.contains('T') && line.contains('Z'), + "Expected timestamp in output, got: {line}" + ); + assert!( + line.contains("test message"), + "Expected message, got: {line}" + ); + } +} + +/// Test helper: wraps `Arc>>` so it implements `Write + Send`. +#[cfg(test)] +struct SyncWriter(std::sync::Arc>>); + +#[cfg(test)] +impl Write for SyncWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0.lock().unwrap().write(buf) + } + fn flush(&mut self) -> std::io::Result<()> { + self.0.lock().unwrap().flush() + } } diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index e7669c26..3fe56570 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -139,6 +139,7 @@ COPY crates/openshell-sandbox/ crates/openshell-sandbox/ RUN touch \ crates/openshell-core/build.rs \ + crates/openshell-ocsf/src/lib.rs \ crates/openshell-sandbox/src/main.rs \ proto/*.proto && \ if [ -n "${OPENSHELL_CARGO_VERSION:-}" ]; then \ diff --git a/docs/index.md b/docs/index.md index 89979a76..0945e523 100644 --- a/docs/index.md +++ b/docs/index.md @@ -164,6 +164,16 @@ Keep inference traffic private by routing API calls to local or self-hosted back {bdg-secondary}`Concept` ::: +:::{grid-item-card} Observability +:link: observability/index +:link-type: doc + +Understand sandbox logs, access them via CLI and TUI, and export OCSF JSON records. + ++++ +{bdg-secondary}`How-To` +::: + :::{grid-item-card} Reference :link: reference/default-policy :link-type: doc @@ -248,6 +258,16 @@ reference/policy-schema reference/support-matrix ``` +```{toctree} +:caption: Observability +:hidden: + +observability/index +observability/logging +observability/accessing-logs +observability/ocsf-json-export +``` + ```{toctree} :caption: Security :hidden: diff --git a/docs/observability/accessing-logs.md b/docs/observability/accessing-logs.md new file mode 100644 index 00000000..31993f1b --- /dev/null +++ b/docs/observability/accessing-logs.md @@ -0,0 +1,104 @@ +--- +title: + page: Accessing Logs + nav: Accessing Logs +description: How to view sandbox logs through the CLI, TUI, and directly on the sandbox filesystem. +topics: +- Generative AI +- Cybersecurity +tags: +- Logging +- CLI +- TUI +- Observability +content: + type: how_to + difficulty: technical_beginner + audience: + - engineer + - data_scientist +--- + + + +# Accessing Logs + +OpenShell provides three ways to access sandbox logs: the CLI, the TUI, and direct filesystem access inside the sandbox. + +## CLI + +Use `openshell logs` to stream logs from a running sandbox: + +```console +$ openshell logs smoke-l4 --source sandbox +``` + +The CLI receives logs from the gateway over gRPC. Each line includes a timestamp, source, level, and message: + +``` +[1775014138.811] [sandbox] [OCSF ] [ocsf] NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] +[1775014138.886] [sandbox] [OCSF ] [ocsf] HTTP:GET [INFO] ALLOWED GET http://api.github.com/zen [policy:github_api] +[1775014139.212] [sandbox] [OCSF ] [ocsf] NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] +[1775014119.160] [sandbox] [INFO ] [openshell_sandbox] Fetching sandbox policy via gRPC +``` + +OCSF structured events show `OCSF` as the level. Standard tracing events show `INFO`, `WARN`, or `ERROR`. + +## TUI + +The TUI dashboard displays sandbox logs in real time. Logs appear in the log panel with the same format as the CLI. + +## Gateway Log Storage + +The sandbox pushes logs to the gateway over gRPC in real time. The gateway stores a bounded buffer of recent log lines per sandbox. This buffer is not persisted to disk and is lost when the gateway restarts. + +For durable log storage, use the log files inside the sandbox or enable [OCSF JSON export](ocsf-json-export.md) and ship the JSONL files to an external log aggregator. + +## Direct Filesystem Access + +If you have SSH access to the sandbox, you can read the log files directly: + +```console +$ ssh sandbox@ cat /var/log/openshell.2026-04-01.log +``` + +Or through the OpenShell SSH config: + +```console +$ ssh -F <(openshell sandbox ssh-config smoke-l4) openshell-smoke-l4 \ + "cat /var/log/openshell.2026-04-01.log" +``` + +The log files inside the sandbox contain the complete record, including events that may have been dropped from the gRPC push channel under load (the push channel is bounded and drops events rather than blocking). + +## Filtering by Event Type + +The shorthand format is designed for `grep`. Some useful patterns: + +```console +# All denied connections +$ grep "DENIED\|BLOCKED" /var/log/openshell.*.log + +# All network events +$ grep "^NET:" /var/log/openshell.*.log + +# All L7 enforcement decisions +$ grep "^HTTP:" /var/log/openshell.*.log + +# Security findings only +$ grep "^FINDING:" /var/log/openshell.*.log + +# Policy changes +$ grep "^CONFIG:" /var/log/openshell.*.log + +# Events at medium severity or above +$ grep "\[MED\]\|\[HIGH\]\|\[CRIT\]\|\[FATAL\]" /var/log/openshell.*.log +``` + +## Next Steps + +- Learn how the [log formats](logging.md) work and how to read the shorthand. +- [Enable OCSF JSON export](ocsf-json-export.md) for machine-readable structured output. diff --git a/docs/observability/index.md b/docs/observability/index.md new file mode 100644 index 00000000..a9607457 --- /dev/null +++ b/docs/observability/index.md @@ -0,0 +1,43 @@ +--- +title: + page: Observability + nav: Observability +description: Understand how OpenShell logs sandbox activity, how to access logs, and how to export structured OCSF records. +topics: +- Generative AI +- Cybersecurity +tags: +- Logging +- OCSF +- Observability +- Monitoring +content: + type: concept + difficulty: technical_beginner + audience: + - engineer + - data_scientist +--- + + + +# Observability + +OpenShell provides structured logging for every sandbox. Every network connection, process lifecycle event, filesystem policy decision, and configuration change is recorded so you can understand exactly what happened inside a sandbox. + +This section covers: + +- **[Sandbox Logging](logging.md)** -- How the two log formats work (standard tracing and OCSF structured events), where logs are stored, and how to read them. +- **[Accessing Logs](accessing-logs.md)** -- How to view logs through the CLI, TUI, and directly on the sandbox filesystem. +- **[OCSF JSON Export](ocsf-json-export.md)** -- How to enable full OCSF JSON output for integration with SIEMs, log aggregators, and compliance tools. + +```{toctree} +:hidden: + +logging +accessing-logs +ocsf-json-export +``` diff --git a/docs/observability/logging.md b/docs/observability/logging.md new file mode 100644 index 00000000..bc7e040d --- /dev/null +++ b/docs/observability/logging.md @@ -0,0 +1,161 @@ +--- +title: + page: Sandbox Logging + nav: Logging +description: How OpenShell logs sandbox activity using standard tracing and OCSF structured events. +topics: +- Generative AI +- Cybersecurity +tags: +- Logging +- OCSF +- Observability +content: + type: concept + difficulty: technical_beginner + audience: + - engineer + - data_scientist +--- + + + +# Sandbox Logging + +Every OpenShell sandbox produces a log that records network connections, process lifecycle events, filesystem policy decisions, and configuration changes. The log uses two formats depending on the type of event. + +## Log Formats + +### Standard tracing + +Internal operational events use Rust's `tracing` framework with a conventional format: + +``` +2026-04-01T03:28:39.160Z INFO openshell_sandbox: Fetching sandbox policy via gRPC +2026-04-01T03:28:39.175Z INFO openshell_sandbox: Creating OPA engine from proto policy data +``` + +These events cover startup plumbing, gRPC communication, and internal state transitions that are useful for debugging but don't represent security-relevant decisions. + +### OCSF structured events + +Network, process, filesystem, and configuration events use the [Open Cybersecurity Schema Framework (OCSF)](https://ocsf.io) format. OCSF is an open standard for normalizing security telemetry across tools and platforms. OpenShell maps sandbox events to OCSF v1.7.0 event classes. + +In the log file, OCSF events appear in a shorthand format designed for quick human and agent scanning: + +``` +2026-04-01T03:28:39.811Z NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] +2026-04-01T03:28:39.886Z HTTP:GET [INFO] ALLOWED GET http://api.github.com/zen [policy:github_api] +2026-04-01T03:28:40.212Z NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] +``` + +When viewed through the CLI or TUI (which receive logs via gRPC), OCSF events are labeled with `OCSF` instead of `INFO` to distinguish them from standard tracing: + +``` +[1775014138.811] [sandbox] [OCSF ] [ocsf] NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] +[1775014139.212] [sandbox] [OCSF ] [ocsf] NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] +[1775014119.160] [sandbox] [INFO ] [openshell_sandbox] Fetching sandbox policy via gRPC +``` + +## OCSF Event Classes + +OpenShell maps sandbox events to these OCSF classes: + +| Shorthand prefix | OCSF class | Class UID | What it covers | +|---|---|---|---| +| `NET:` | Network Activity | 4001 | TCP proxy CONNECT tunnels, bypass detection, DNS failures | +| `HTTP:` | HTTP Activity | 4002 | HTTP FORWARD requests, L7 enforcement decisions | +| `SSH:` | SSH Activity | 4007 | SSH handshakes, authentication, channel operations | +| `PROC:` | Process Activity | 1007 | Process start, exit, timeout, signal failures | +| `FINDING:` | Detection Finding | 2004 | Security findings (nonce replay, proxy bypass, unsafe policy) | +| `CONFIG:` | Device Config State Change | 5019 | Policy load/reload, Landlock, TLS setup, inference routes | +| `LIFECYCLE:` | Application Lifecycle | 6002 | Sandbox supervisor start, SSH server ready | + +## Reading the Shorthand Format + +The shorthand format follows this pattern: + +``` +CLASS:ACTIVITY [SEVERITY] ACTION DETAILS [CONTEXT] +``` + +### Components + +**Class and activity** (`NET:OPEN`, `HTTP:GET`, `PROC:LAUNCH`) identify the OCSF event class and what happened. The class name always starts at the same column position for vertical scanning. + +**Severity** indicates the OCSF severity of the event: + +| Tag | Meaning | When used | +|---|---|---| +| `[INFO]` | Informational | Allowed connections, successful operations | +| `[LOW]` | Low | DNS failures, operational warnings | +| `[MED]` | Medium | Denied connections, policy violations | +| `[HIGH]` | High | Security findings (nonce replay, bypass detection) | +| `[CRIT]` | Critical | Process timeout kills | +| `[FATAL]` | Fatal | Unrecoverable failures | + +**Action** (`ALLOWED`, `DENIED`, `BLOCKED`) is the security control disposition. Not all events have an action (informational config events, for example). + +**Details** vary by event class: + +- Network: `process(pid) -> host:port` with the process identity and destination +- HTTP: `METHOD url` with the HTTP method and target +- SSH: peer address and authentication type +- Process: `name(pid)` with exit code or command line +- Config: description of what changed +- Finding: quoted title with confidence level + +**Context** (in brackets at the end) provides the policy rule and enforcement engine that produced the decision. + +### Examples + +A allowed HTTPS connection: +``` +NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] +``` + +An L7 read-only policy denying a POST: +``` +HTTP:POST [MED] DENIED POST http://api.github.com/user/repos [policy:github_api] +``` + +A connection denied because no policy matched: +``` +NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] +``` + +An SSH handshake accepted: +``` +SSH:OPEN [INFO] ALLOWED 10.42.0.31:37494 [auth:NSSH1] +``` + +A process launched inside the sandbox: +``` +PROC:LAUNCH [INFO] sleep(49) +``` + +A policy reload after a settings change: +``` +CONFIG:DETECTED [INFO] Settings poll: config change detected [old_revision:2915564174587774909 new_revision:11008534403127604466 policy_changed:true] +CONFIG:LOADED [INFO] Policy reloaded successfully [policy_hash:0cc0c2b525573c07] +``` + +## Log File Location + +Inside the sandbox, logs are written to `/var/log/`: + +| File | Format | Rotation | +|---|---|---| +| `openshell.YYYY-MM-DD.log` | Shorthand + standard tracing | Daily, 3 files max | +| `openshell-ocsf.YYYY-MM-DD.log` | OCSF JSONL (when enabled) | Daily, 3 files max | + +Both files rotate daily and retain the 3 most recent files to bound disk usage. + +## Next Steps + +- [Access logs](accessing-logs.md) through the CLI, TUI, or sandbox filesystem. +- [Enable OCSF JSON export](ocsf-json-export.md) for SIEM integration and compliance. +- Learn about [network policies](../sandboxes/policies.md) that generate these events. diff --git a/docs/observability/ocsf-json-export.md b/docs/observability/ocsf-json-export.md new file mode 100644 index 00000000..b55419d6 --- /dev/null +++ b/docs/observability/ocsf-json-export.md @@ -0,0 +1,187 @@ +--- +title: + page: OCSF JSON Export + nav: OCSF JSON Export +description: How to enable full OCSF JSON logging for SIEM integration, compliance, and structured analysis. +topics: +- Generative AI +- Cybersecurity +tags: +- OCSF +- JSON +- SIEM +- Compliance +- Observability +content: + type: how_to + difficulty: technical_intermediate + audience: + - engineer +--- + + + +# OCSF JSON Export + +The [shorthand log format](logging.md) is optimized for humans and agents reading logs in real time. For machine consumption, compliance archival, or SIEM integration, you can enable full OCSF JSON export. This writes every OCSF event as a complete JSON record in JSONL format (one JSON object per line). + +## Enable JSON Export + +Use the `ocsf_json_enabled` setting to toggle JSON export. The setting can be applied globally (all sandboxes) or per-sandbox. + +Global: + +```console +$ openshell settings set --global ocsf_json_enabled true +``` + +Per-sandbox: + +```console +$ openshell settings set --sandbox smoke-l4 ocsf_json_enabled true +``` + +The setting takes effect on the next poll cycle (default: 10 seconds). No sandbox restart is required. + +To disable: + +```console +$ openshell settings set --global ocsf_json_enabled false +``` + +## Output Location + +When enabled, OCSF JSON records are written to `/var/log/openshell-ocsf.YYYY-MM-DD.log` inside the sandbox. The file rotates daily and retains the 3 most recent files, matching the main log file rotation. + +## JSON Record Structure + +Each line is a complete OCSF v1.7.0 JSON object. Here is an example of a network connection event: + +```json +{ + "class_uid": 4001, + "class_name": "Network Activity", + "category_uid": 4, + "category_name": "Network Activity", + "activity_id": 1, + "activity_name": "Open", + "severity_id": 1, + "severity": "Informational", + "status_id": 1, + "status": "Success", + "time": 1775014138811, + "message": "CONNECT allowed api.github.com:443", + "metadata": { + "product": { + "name": "OpenShell Sandbox Supervisor", + "vendor_name": "NVIDIA", + "version": "0.3.0" + }, + "version": "1.7.0" + }, + "action_id": 1, + "action": "Allowed", + "disposition_id": 1, + "disposition": "Allowed", + "dst_endpoint": { + "domain": "api.github.com", + "port": 443 + }, + "src_endpoint": { + "ip": "10.42.0.31", + "port": 37494 + }, + "actor": { + "process": { + "name": "/usr/bin/curl", + "pid": 57 + } + }, + "firewall_rule": { + "name": "github_api", + "type": "opa" + } +} +``` + +And a denied connection: + +```json +{ + "class_uid": 4001, + "class_name": "Network Activity", + "activity_id": 1, + "activity_name": "Open", + "severity_id": 3, + "severity": "Medium", + "status_id": 2, + "status": "Failure", + "action_id": 2, + "action": "Denied", + "disposition_id": 2, + "disposition": "Blocked", + "message": "CONNECT denied httpbin.org:443", + "dst_endpoint": { + "domain": "httpbin.org", + "port": 443 + }, + "actor": { + "process": { + "name": "/usr/bin/curl", + "pid": 63 + } + }, + "firewall_rule": { + "name": "-", + "type": "opa" + } +} +``` + +:::{note} +The JSON examples above are formatted for readability. The actual JSONL file contains one JSON object per line with no whitespace formatting. +::: + +## OCSF Event Classes in JSON + +The `class_uid` field identifies the event type: + +| `class_uid` | Class | Shorthand prefix | +|---|---|---| +| 4001 | Network Activity | `NET:` | +| 4002 | HTTP Activity | `HTTP:` | +| 4007 | SSH Activity | `SSH:` | +| 1007 | Process Activity | `PROC:` | +| 2004 | Detection Finding | `FINDING:` | +| 5019 | Device Config State Change | `CONFIG:` | +| 6002 | Application Lifecycle | `LIFECYCLE:` | + +## Integration with External Tools + +The JSONL file can be shipped to any tool that accepts OCSF-formatted data: + +- **Splunk**: Use the [Splunk OCSF Add-on](https://splunkbase.splunk.com/app/6943) to ingest OCSF JSONL files. +- **Amazon Security Lake**: OCSF is the native schema for Security Lake. +- **Elastic**: Use Filebeat to ship JSONL files with the OCSF field mappings. +- **Custom pipelines**: Parse the JSONL file with `jq`, Python, or any JSON-capable tool. + +Example with `jq` to extract all denied connections: + +```console +$ cat /var/log/openshell-ocsf.2026-04-01.log | \ + jq -c 'select(.action == "Denied")' +``` + +## Relationship to Shorthand Logs + +The shorthand format in `openshell.YYYY-MM-DD.log` and the JSON format in `openshell-ocsf.YYYY-MM-DD.log` are derived from the same OCSF events. The shorthand is a human-readable projection; the JSON is the complete record. Both are generated at the same time from the same event data. + +The shorthand log is always active. The JSON export is opt-in via `ocsf_json_enabled`. + +## Next Steps + +- Learn how to [read the shorthand format](logging.md) for real-time monitoring. +- See the [OCSF specification](https://schema.ocsf.io/) for the full schema reference. From 3d2b63242424226e62421d5c3c89db612d31eace Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 21:14:29 -0700 Subject: [PATCH 12/20] fix(ocsf): add OCSF level prefix to file layer shorthand output Without a level prefix, OCSF events in the log file have no visual anchor at the position where standard tracing lines show INFO/WARN. This makes scanning the file harder since the eye has nothing consistent to lock onto after the timestamp. Before: 2026-04-01T04:04:13.065Z CONFIG:DISCOVERY [INFO] ... After: 2026-04-01T04:04:13.065Z OCSF CONFIG:DISCOVERY [INFO] ... --- crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs index b28922d7..ea75cf0d 100644 --- a/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs +++ b/crates/openshell-ocsf/src/tracing_layers/shorthand_layer.rs @@ -60,7 +60,7 @@ where if let Some(ocsf_event) = clone_current_event() { let line = ocsf_event.format_shorthand(); if let Ok(mut w) = self.writer.lock() { - let _ = writeln!(w, "{ts} {line}"); + let _ = writeln!(w, "{ts} OCSF {line}"); } } } else if self.include_non_ocsf { From ce4a130af58fd6586ae2f9c43493a7e90a6f7dea Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 21:18:52 -0700 Subject: [PATCH 13/20] fix(ocsf): clean up shorthand formatting for listen and SSH events - Fix double space in NET:LISTEN, SSH:LISTEN, and other events where action is empty (e.g., 'NET:LISTEN [INFO] 10.200.0.1' -> 'NET:LISTEN [INFO] 10.200.0.1') - Add listen address to SSH:LISTEN event (was empty) - Downgrade SSH handshake intermediate steps (reading preface, verifying) from OCSF events to debug!() traces. Only the final verdict (accepted/denied) is an OCSF event now, reducing noise from 3 events to 1 per SSH connection. - Apply same spacing fix to HTTP shorthand for consistency. --- crates/openshell-ocsf/src/format/shorthand.rs | 32 +++++++++++++++++-- crates/openshell-sandbox/src/proxy.rs | 2 +- crates/openshell-sandbox/src/ssh.rs | 24 ++------------ 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/crates/openshell-ocsf/src/format/shorthand.rs b/crates/openshell-ocsf/src/format/shorthand.rs index de260ba5..3b245e10 100644 --- a/crates/openshell-ocsf/src/format/shorthand.rs +++ b/crates/openshell-ocsf/src/format/shorthand.rs @@ -107,7 +107,13 @@ impl OcsfEvent { format!(" {actor_str} -> {dst}") }; - format!("NET:{activity} {sev} {action}{arrow}{rule_ctx}") + let detail = match (action.is_empty(), arrow.is_empty()) { + (true, true) => String::new(), + (true, false) => arrow, + (false, true) => format!(" {action}"), + (false, false) => format!(" {action}{arrow}"), + }; + format!("NET:{activity} {sev}{detail}{rule_ctx}") } Self::HttpActivity(e) => { @@ -138,7 +144,13 @@ impl OcsfEvent { format!(" {actor_str} -> {method} {url_str}") }; - format!("HTTP:{method} {sev} {action}{arrow}{rule_ctx}") + let detail = match (action.is_empty(), arrow.is_empty()) { + (true, true) => String::new(), + (true, false) => arrow, + (false, true) => format!(" {action}"), + (false, false) => format!(" {action}{arrow}"), + }; + format!("HTTP:{method} {sev}{detail}{rule_ctx}") } Self::SshActivity(e) => { @@ -165,7 +177,21 @@ impl OcsfEvent { }) .unwrap_or_default(); - format!("SSH:{activity} {sev} {action} {peer}{auth_ctx}") + let detail = [ + if action.is_empty() { "" } else { &action }, + if peer.is_empty() { "" } else { &peer }, + ] + .iter() + .filter(|s| !s.is_empty()) + .copied() + .collect::>() + .join(" "); + let detail = if detail.is_empty() { + String::new() + } else { + format!(" {detail}") + }; + format!("SSH:{activity} {sev}{detail}{auth_ctx}") } Self::ProcessActivity(e) => { diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index 2674df94..b52cc60b 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -166,7 +166,7 @@ impl ProxyHandle { .severity(SeverityId::Informational) .status(StatusId::Success) .dst_endpoint(Endpoint::from_ip(local_addr.ip(), local_addr.port())) - .message("Proxy listening") + .message(format!("Proxy listening on {local_addr}")) .build(); ocsf_emit!(event); } diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index 15cbd901..b9f94739 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -69,6 +69,7 @@ async fn ssh_server_init( .activity(ActivityId::Listen) .severity(SeverityId::Informational) .status(StatusId::Success) + .src_endpoint_addr(listen_addr.ip(), listen_addr.port()) .message(format!("SSH server listening on {listen_addr}")) .build() ); @@ -178,29 +179,10 @@ async fn handle_connection( provider_env: HashMap, nonce_cache: &NonceCache, ) -> Result<()> { - ocsf_emit!( - SshActivityBuilder::new(crate::ocsf_ctx()) - .activity(ActivityId::Open) - .severity(SeverityId::Informational) - .src_endpoint_addr(peer.ip(), peer.port()) - .message(format!( - "SSH connection: reading handshake preface from {peer}" - )) - .build() - ); + tracing::debug!(peer = %peer, "SSH connection: reading handshake preface"); let mut line = String::new(); read_line(&mut stream, &mut line).await?; - ocsf_emit!( - SshActivityBuilder::new(crate::ocsf_ctx()) - .activity(ActivityId::Open) - .severity(SeverityId::Informational) - .src_endpoint_addr(peer.ip(), peer.port()) - .message(format!( - "SSH connection: preface received from {peer}, verifying (len={})", - line.len() - )) - .build() - ); + tracing::debug!(peer = %peer, preface_len = line.len(), "SSH connection: preface received, verifying"); if !verify_preface(&line, secret, handshake_skew_secs, nonce_cache)? { ocsf_emit!( SshActivityBuilder::new(crate::ocsf_ctx()) From 657aa34a496481309b002d893e6febb5188b3895 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 21:23:48 -0700 Subject: [PATCH 14/20] docs(observability): update examples with OCSF prefix and formatting fixes Align doc examples with the deployed output: - Add OCSF level prefix to all shorthand examples in the log file - Show mixed OCSF + standard tracing in the file format section - Update listen events (no double space, SSH includes address) - Show one SSH:OPEN per connection instead of three - Update grep patterns to use 'OCSF NET:' etc. --- docs/observability/accessing-logs.md | 19 +++++++----- docs/observability/logging.md | 46 ++++++++++++++++++---------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/docs/observability/accessing-logs.md b/docs/observability/accessing-logs.md index 31993f1b..ca5c8883 100644 --- a/docs/observability/accessing-logs.md +++ b/docs/observability/accessing-logs.md @@ -39,10 +39,10 @@ $ openshell logs smoke-l4 --source sandbox The CLI receives logs from the gateway over gRPC. Each line includes a timestamp, source, level, and message: ``` -[1775014138.811] [sandbox] [OCSF ] [ocsf] NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] -[1775014138.886] [sandbox] [OCSF ] [ocsf] HTTP:GET [INFO] ALLOWED GET http://api.github.com/zen [policy:github_api] -[1775014139.212] [sandbox] [OCSF ] [ocsf] NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] -[1775014119.160] [sandbox] [INFO ] [openshell_sandbox] Fetching sandbox policy via gRPC +[1775014132.118] [sandbox] [OCSF ] [ocsf] NET:OPEN [INFO] ALLOWED /usr/bin/curl(58) -> api.github.com:443 [policy:github_api engine:opa] +[1775014132.190] [sandbox] [OCSF ] [ocsf] HTTP:GET [INFO] ALLOWED GET http://api.github.com/zen [policy:github_api] +[1775014132.690] [sandbox] [OCSF ] [ocsf] NET:OPEN [MED] DENIED /usr/bin/curl(64) -> httpbin.org:443 [policy:- engine:opa] +[1775014113.058] [sandbox] [INFO ] [openshell_sandbox] Starting sandbox ``` OCSF structured events show `OCSF` as the level. Standard tracing events show `INFO`, `WARN`, or `ERROR`. @@ -83,16 +83,19 @@ The shorthand format is designed for `grep`. Some useful patterns: $ grep "DENIED\|BLOCKED" /var/log/openshell.*.log # All network events -$ grep "^NET:" /var/log/openshell.*.log +$ grep "OCSF NET:" /var/log/openshell.*.log # All L7 enforcement decisions -$ grep "^HTTP:" /var/log/openshell.*.log +$ grep "OCSF HTTP:" /var/log/openshell.*.log # Security findings only -$ grep "^FINDING:" /var/log/openshell.*.log +$ grep "OCSF FINDING:" /var/log/openshell.*.log # Policy changes -$ grep "^CONFIG:" /var/log/openshell.*.log +$ grep "OCSF CONFIG:" /var/log/openshell.*.log + +# All OCSF events (exclude standard tracing) +$ grep "^.* OCSF " /var/log/openshell.*.log # Events at medium severity or above $ grep "\[MED\]\|\[HIGH\]\|\[CRIT\]\|\[FATAL\]" /var/log/openshell.*.log diff --git a/docs/observability/logging.md b/docs/observability/logging.md index bc7e040d..680f7e16 100644 --- a/docs/observability/logging.md +++ b/docs/observability/logging.md @@ -44,20 +44,26 @@ These events cover startup plumbing, gRPC communication, and internal state tran Network, process, filesystem, and configuration events use the [Open Cybersecurity Schema Framework (OCSF)](https://ocsf.io) format. OCSF is an open standard for normalizing security telemetry across tools and platforms. OpenShell maps sandbox events to OCSF v1.7.0 event classes. -In the log file, OCSF events appear in a shorthand format designed for quick human and agent scanning: +In the log file, OCSF events appear in a shorthand format with an `OCSF` level label, designed for quick human and agent scanning: ``` -2026-04-01T03:28:39.811Z NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] -2026-04-01T03:28:39.886Z HTTP:GET [INFO] ALLOWED GET http://api.github.com/zen [policy:github_api] -2026-04-01T03:28:40.212Z NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] +2026-04-01T04:04:13.058Z INFO openshell_sandbox: Starting sandbox +2026-04-01T04:04:13.065Z OCSF CONFIG:DISCOVERY [INFO] Server returned no policy; attempting local discovery +2026-04-01T04:04:13.074Z INFO openshell_sandbox: Creating OPA engine from proto policy data +2026-04-01T04:04:13.078Z OCSF CONFIG:VALIDATED [INFO] Validated 'sandbox' user exists in image +2026-04-01T04:04:32.118Z OCSF NET:OPEN [INFO] ALLOWED /usr/bin/curl(58) -> api.github.com:443 [policy:github_api engine:opa] +2026-04-01T04:04:32.190Z OCSF HTTP:GET [INFO] ALLOWED GET http://api.github.com/zen [policy:github_api] +2026-04-01T04:04:32.690Z OCSF NET:OPEN [MED] DENIED /usr/bin/curl(64) -> httpbin.org:443 [policy:- engine:opa] ``` -When viewed through the CLI or TUI (which receive logs via gRPC), OCSF events are labeled with `OCSF` instead of `INFO` to distinguish them from standard tracing: +The `OCSF` label at column 25 distinguishes structured events from standard `INFO` tracing at the same position. Both formats appear in the same file. + +When viewed through the CLI or TUI (which receive logs via gRPC), the same distinction applies: ``` -[1775014138.811] [sandbox] [OCSF ] [ocsf] NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] -[1775014139.212] [sandbox] [OCSF ] [ocsf] NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] -[1775014119.160] [sandbox] [INFO ] [openshell_sandbox] Fetching sandbox policy via gRPC +[1775014132.118] [sandbox] [OCSF ] [ocsf] NET:OPEN [INFO] ALLOWED /usr/bin/curl(58) -> api.github.com:443 [policy:github_api engine:opa] +[1775014132.690] [sandbox] [OCSF ] [ocsf] NET:OPEN [MED] DENIED /usr/bin/curl(64) -> httpbin.org:443 [policy:- engine:opa] +[1775014113.058] [sandbox] [INFO ] [openshell_sandbox] Starting sandbox ``` ## OCSF Event Classes @@ -112,35 +118,41 @@ CLASS:ACTIVITY [SEVERITY] ACTION DETAILS [CONTEXT] ### Examples -A allowed HTTPS connection: +An allowed HTTPS connection: ``` -NET:OPEN [INFO] ALLOWED /usr/bin/curl(57) -> api.github.com:443 [policy:github_api engine:opa] +OCSF NET:OPEN [INFO] ALLOWED /usr/bin/curl(58) -> api.github.com:443 [policy:github_api engine:opa] ``` An L7 read-only policy denying a POST: ``` -HTTP:POST [MED] DENIED POST http://api.github.com/user/repos [policy:github_api] +OCSF HTTP:POST [MED] DENIED POST http://api.github.com/user/repos [policy:github_api] ``` A connection denied because no policy matched: ``` -NET:OPEN [MED] DENIED /usr/bin/curl(63) -> httpbin.org:443 [policy:- engine:opa] +OCSF NET:OPEN [MED] DENIED /usr/bin/curl(64) -> httpbin.org:443 [policy:- engine:opa] +``` + +Proxy and SSH servers ready: +``` +OCSF NET:LISTEN [INFO] 10.200.0.1:3128 +OCSF SSH:LISTEN [INFO] 0.0.0.0:2222 ``` -An SSH handshake accepted: +An SSH handshake accepted (one event per connection): ``` -SSH:OPEN [INFO] ALLOWED 10.42.0.31:37494 [auth:NSSH1] +OCSF SSH:OPEN [INFO] ALLOWED 10.42.0.52:42706 [auth:NSSH1] ``` A process launched inside the sandbox: ``` -PROC:LAUNCH [INFO] sleep(49) +OCSF PROC:LAUNCH [INFO] sleep(49) ``` A policy reload after a settings change: ``` -CONFIG:DETECTED [INFO] Settings poll: config change detected [old_revision:2915564174587774909 new_revision:11008534403127604466 policy_changed:true] -CONFIG:LOADED [INFO] Policy reloaded successfully [policy_hash:0cc0c2b525573c07] +OCSF CONFIG:DETECTED [INFO] Settings poll: config change detected [old_revision:2915564174587774909 new_revision:11008534403127604466 policy_changed:true] +OCSF CONFIG:LOADED [INFO] Policy reloaded successfully [policy_hash:0cc0c2b525573c07] ``` ## Log File Location From aad9177db225916febac8bbc8027545432c6b23d Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 21:30:35 -0700 Subject: [PATCH 15/20] docs(agents): add OCSF logging guidance to AGENTS.md Add a Sandbox Logging (OCSF) section to AGENTS.md so agents have in-context guidance for deciding whether new log emissions should use OCSF structured logging or plain tracing. Covers event class selection, severity guidelines, builder API usage, dual-emit pattern for security findings, and the no-secrets rule. Also adds openshell-ocsf to the Architecture Overview table. --- AGENTS.md | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 79dc29d1..97996594 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,6 +35,7 @@ These pipelines connect skills into end-to-end workflows. Individual skill files | `crates/openshell-policy/` | Policy engine | Filesystem, network, process, and inference constraints | | `crates/openshell-router/` | Privacy router | Privacy-aware LLM routing | | `crates/openshell-bootstrap/` | Cluster bootstrap | K3s cluster setup, image loading, mTLS PKI | +| `crates/openshell-ocsf/` | OCSF logging | OCSF v1.7.0 event types, builders, shorthand/JSONL formatters, tracing layers | | `crates/openshell-core/` | Shared core | Common types, configuration, error handling | | `crates/openshell-providers/` | Provider management | Credential provider backends | | `crates/openshell-tui/` | Terminal UI | Ratatui-based dashboard for monitoring | @@ -66,6 +67,85 @@ These pipelines connect skills into end-to-end workflows. Individual skill files - Store plan documents in `architecture/plans`. This is git ignored so its for easier access for humans. When asked to create Spikes or issues, you can skip to GitHub issues. Only use the plans dir when you aren't writing data somewhere else specific. - When asked to write a plan, write it there without asking for the location. +## Sandbox Logging (OCSF) + +When adding or modifying log emissions in `openshell-sandbox`, determine whether the event should use OCSF structured logging or plain `tracing`. + +### When to use OCSF + +Use an OCSF builder + `ocsf_emit!()` for events that represent **observable sandbox behavior** visible to operators, security teams, or agents monitoring the sandbox: + +- Network decisions (allow, deny, bypass detection) +- HTTP/L7 enforcement decisions +- SSH authentication (accepted, denied, nonce replay) +- Process lifecycle (start, exit, timeout, signal failure) +- Security findings (unsafe policy, unavailable controls, replay attacks) +- Configuration changes (policy load/reload, TLS setup, inference routes, settings) +- Application lifecycle (supervisor start, SSH server ready) + +### When to use plain tracing + +Use `info!()`, `debug!()`, `warn!()` for **internal operational plumbing** that doesn't represent a security decision or observable state change: + +- gRPC connection attempts and retries +- "About to do X" events where the result is logged separately +- Internal SSH channel state (unknown channel, PTY resize) +- Zombie process reaping, denial flush telemetry +- DEBUG/TRACE level diagnostics + +### Choosing the OCSF event class + +| Event type | Builder | When to use | +|---|---|---| +| TCP connections, proxy tunnels, bypass | `NetworkActivityBuilder` | L4 network decisions, proxy operational events | +| HTTP requests, L7 enforcement | `HttpActivityBuilder` | Per-request method/path decisions | +| SSH sessions | `SshActivityBuilder` | Authentication, channel operations | +| Process start/stop | `ProcessActivityBuilder` | Entrypoint lifecycle, signal failures | +| Security alerts | `DetectionFindingBuilder` | Nonce replay, bypass detection, unsafe policy. Dual-emit with the domain event. | +| Policy/config changes | `ConfigStateChangeBuilder` | Policy load, Landlock apply, TLS setup, inference routes, settings | +| Supervisor lifecycle | `AppLifecycleBuilder` | Sandbox start, SSH server ready/failed | + +### Severity guidelines + +| Severity | When | +|---|---| +| `Informational` | Allowed connections, successful operations, config loaded | +| `Low` | DNS failures, non-fatal operational warnings, LOG rule failures | +| `Medium` | Denied connections, policy violations, deprecated config | +| `High` | Security findings (nonce replay, Landlock unavailable) | +| `Critical` | Process timeout kills | + +### Example: adding a new network event + +```rust +use openshell_ocsf::{ + ocsf_emit, NetworkActivityBuilder, ActivityId, ActionId, + DispositionId, Endpoint, Process, SeverityId, StatusId, +}; + +let event = NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Open) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host, port)) + .actor_process(Process::new(&binary, pid)) + .firewall_rule(&policy_name, &engine_type) + .message(format!("CONNECT denied {host}:{port}")) + .build(); +ocsf_emit!(event); +``` + +### Key points + +- `crate::ocsf_ctx()` returns the process-wide `SandboxContext`. It is always available (falls back to defaults in tests). +- `ocsf_emit!()` is non-blocking and cannot panic. It stores the event in a thread-local and emits via `tracing::info!()`. +- The shorthand layer and JSONL layer extract the event from the thread-local. The shorthand format is derived automatically from the builder fields. +- For security findings, **dual-emit**: one domain event (e.g., `SshActivityBuilder`) AND one `DetectionFindingBuilder` for the same incident. +- Never log secrets, credentials, or query parameters in OCSF messages. The OCSF JSONL file may be shipped to external systems. +- The `message` field should be a concise, grep-friendly summary. Details go in builder fields (dst_endpoint, firewall_rule, etc.). + ## Sandbox Infra Changes - If you change sandbox infrastructure, ensure `mise run sandbox` succeeds. From 9015d8e783fdc25c5f750920ce8e5b4d7feba23c Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 21:33:10 -0700 Subject: [PATCH 16/20] fix: remove workflow files accidentally included during rebase These files were already merged to main in separate PRs. They got pulled into our branch during rebase conflict resolution for the deleted docs-preview-pr.yml file. --- .github/workflows/docs-preview-deploy.yml | 117 ------------------ ...-preview-build.yml => docs-preview-pr.yml} | 33 ++--- .github/workflows/slack-new-issue.yml | 66 ---------- 3 files changed, 11 insertions(+), 205 deletions(-) delete mode 100644 .github/workflows/docs-preview-deploy.yml rename .github/workflows/{docs-preview-build.yml => docs-preview-pr.yml} (58%) delete mode 100644 .github/workflows/slack-new-issue.yml diff --git a/.github/workflows/docs-preview-deploy.yml b/.github/workflows/docs-preview-deploy.yml deleted file mode 100644 index d7b4d7af..00000000 --- a/.github/workflows/docs-preview-deploy.yml +++ /dev/null @@ -1,117 +0,0 @@ -name: Docs PR Preview Deploy - -# Runs after the build workflow completes. Uses workflow_run so the -# GITHUB_TOKEN has write access to the base repo — this is the standard -# pattern for deploying from fork PRs where the pull_request token is -# read-only. -on: - workflow_run: - workflows: ["Docs PR Preview Build"] - types: [completed] - -concurrency: - group: preview-deploy-${{ github.event.workflow_run.head_branch }} - cancel-in-progress: true - -permissions: - contents: write - pull-requests: write - actions: read - -jobs: - deploy: - runs-on: ubuntu-latest - if: github.event.workflow_run.conclusion == 'success' - steps: - - name: Download PR metadata - uses: actions/download-artifact@v4 - with: - name: pr-metadata - path: pr-metadata/ - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Read PR metadata - id: metadata - run: | - echo "pr-number=$(cat pr-metadata/pr-number)" >> "$GITHUB_OUTPUT" - event_action="$(cat pr-metadata/event-action)" - echo "event-action=$event_action" >> "$GITHUB_OUTPUT" - if [[ "$event_action" == "closed" ]]; then - echo "action=remove" >> "$GITHUB_OUTPUT" - else - echo "action=deploy" >> "$GITHUB_OUTPUT" - fi - - - name: Download docs artifact - if: steps.metadata.outputs.action == 'deploy' - uses: actions/download-artifact@v4 - with: - name: docs-preview - path: docs-preview/ - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Deploy preview - if: steps.metadata.outputs.action == 'deploy' - uses: JamesIves/github-pages-deploy-action@v4 - with: - branch: gh-pages - folder: docs-preview/ - target-folder: pr-preview/pr-${{ steps.metadata.outputs.pr-number }} - commit-message: "Deploy preview for PR ${{ steps.metadata.outputs.pr-number }}" - clean: true - - - name: Remove preview - if: steps.metadata.outputs.action == 'remove' - run: | - git fetch origin gh-pages - git worktree add gh-pages-branch origin/gh-pages - cd gh-pages-branch - PREVIEW_DIR="pr-preview/pr-${{ steps.metadata.outputs.pr-number }}" - if [ -d "$PREVIEW_DIR" ]; then - git rm -rf "$PREVIEW_DIR" - git commit -m "Remove preview for PR ${{ steps.metadata.outputs.pr-number }}" - git push origin gh-pages - else - echo "Preview directory $PREVIEW_DIR does not exist, nothing to remove." - fi - - - name: Comment on PR - uses: actions/github-script@v7 - with: - script: | - const prNumber = parseInt('${{ steps.metadata.outputs.pr-number }}'); - const action = '${{ steps.metadata.outputs.action }}'; - const marker = ''; - const repo = context.repo; - - // Find existing preview comment - const comments = await github.rest.issues.listComments({ - ...repo, - issue_number: prNumber, - per_page: 100, - }); - const existing = comments.data.find(c => c.body.includes(marker)); - - let body; - if (action === 'deploy') { - const url = `https://${repo.owner}.github.io/${repo.repo}/pr-preview/pr-${prNumber}/`; - body = `${marker}\n### Docs Preview\n\nPreview deployed to ${url}`; - } else { - body = `${marker}\n### Docs Preview\n\nPreview removed.`; - } - - if (existing) { - await github.rest.issues.updateComment({ - ...repo, - comment_id: existing.id, - body, - }); - } else { - await github.rest.issues.createComment({ - ...repo, - issue_number: prNumber, - body, - }); - } diff --git a/.github/workflows/docs-preview-build.yml b/.github/workflows/docs-preview-pr.yml similarity index 58% rename from .github/workflows/docs-preview-build.yml rename to .github/workflows/docs-preview-pr.yml index be360bd2..6c0672ba 100644 --- a/.github/workflows/docs-preview-build.yml +++ b/.github/workflows/docs-preview-pr.yml @@ -1,4 +1,4 @@ -name: Docs PR Preview Build +name: Docs PR Preview on: pull_request: @@ -12,6 +12,8 @@ concurrency: cancel-in-progress: true permissions: + contents: write + pull-requests: write packages: read defaults: @@ -22,7 +24,7 @@ env: MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} jobs: - build: + preview: runs-on: build-arm64 container: image: ghcr.io/nvidia/openshell/ci:latest @@ -34,7 +36,6 @@ jobs: uses: actions/checkout@v4 - name: Install tools - if: github.event.action != 'closed' run: mise install - name: Build documentation @@ -47,23 +48,11 @@ jobs: find _build -name .doctrees -prune -exec rm -rf {} \; find _build -name .buildinfo -exec rm {} \; - - name: Upload docs artifact - if: github.event.action != 'closed' - uses: actions/upload-artifact@v4 - with: - name: docs-preview - path: _build/docs/ - retention-days: 1 - - - name: Save PR metadata - run: | - mkdir -p pr-metadata - echo "${{ github.event.number }}" > pr-metadata/pr-number - echo "${{ github.event.action }}" > pr-metadata/event-action - - - name: Upload PR metadata - uses: actions/upload-artifact@v4 + - name: Deploy preview + if: github.event.pull_request.head.repo.full_name == github.repository + uses: rossjrw/pr-preview-action@v1 with: - name: pr-metadata - path: pr-metadata/ - retention-days: 1 + source-dir: ./_build/docs/ + preview-branch: gh-pages + umbrella-dir: pr-preview + action: auto diff --git a/.github/workflows/slack-new-issue.yml b/.github/workflows/slack-new-issue.yml deleted file mode 100644 index 0fba9258..00000000 --- a/.github/workflows/slack-new-issue.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "Slack: New Issue Alert" - -on: - issues: - types: [opened] - workflow_dispatch: - -permissions: - issues: read - -jobs: - notify: - runs-on: ubuntu-latest - if: github.repository_owner == 'NVIDIA' - steps: - - name: Send Slack notification - env: - SLACK_WEBHOOK: ${{ secrets.SLACK_COMMUNITY_WH }} - EVENT_NAME: ${{ github.event_name }} - REPO: ${{ github.repository }} - RUN_ID: ${{ github.run_id }} - run: | - if [ "$EVENT_NAME" = "workflow_dispatch" ]; then - TITLE="[Smoke Test] Example issue title" - AUTHOR="${{ github.actor }}" - URL="https://github.com/$REPO/actions/runs/$RUN_ID" - NUMBER="0" - LABELS="test" - else - TITLE=$(echo "${{ github.event.issue.title }}" | head -c 150) - AUTHOR="${{ github.event.issue.user.login }}" - URL="${{ github.event.issue.html_url }}" - NUMBER="${{ github.event.issue.number }}" - LABELS=$(echo '${{ toJSON(github.event.issue.labels.*.name) }}' \ - | jq -r 'if length > 0 then join(", ") else "none" end') - fi - - curl -sf "$SLACK_WEBHOOK" \ - -H "Content-Type: application/json" \ - -d "$(jq -n \ - --arg title "$TITLE" \ - --arg author "$AUTHOR" \ - --arg url "$URL" \ - --arg number "$NUMBER" \ - --arg labels "$LABELS" \ - '{ - blocks: [ - { - type: "section", - text: { - type: "mrkdwn", - text: ("*New Issue #" + $number + "*\n<" + $url + "|" + $title + ">") - } - }, - { - type: "context", - elements: [ - { - type: "mrkdwn", - text: ("*Author:* " + $author + " | *Labels:* " + $labels) - } - ] - } - ] - }' - )" From 721e2fa6207b6bda1ac9045ccaacc95df1713982 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 21:58:10 -0700 Subject: [PATCH 17/20] docs(observability): use sandbox connect instead of raw SSH Users access sandboxes via 'openshell sandbox connect', not direct SSH. --- docs/observability/accessing-logs.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/observability/accessing-logs.md b/docs/observability/accessing-logs.md index ca5c8883..1140a9ad 100644 --- a/docs/observability/accessing-logs.md +++ b/docs/observability/accessing-logs.md @@ -59,17 +59,17 @@ For durable log storage, use the log files inside the sandbox or enable [OCSF JS ## Direct Filesystem Access -If you have SSH access to the sandbox, you can read the log files directly: +Use `openshell sandbox connect` to open a shell inside the sandbox and read the log files directly: ```console -$ ssh sandbox@ cat /var/log/openshell.2026-04-01.log +$ openshell sandbox connect my-sandbox +sandbox@my-sandbox:~$ cat /var/log/openshell.2026-04-01.log ``` -Or through the OpenShell SSH config: +You can also run a one-off command without an interactive shell: ```console -$ ssh -F <(openshell sandbox ssh-config smoke-l4) openshell-smoke-l4 \ - "cat /var/log/openshell.2026-04-01.log" +$ openshell sandbox connect my-sandbox -- cat /var/log/openshell.2026-04-01.log ``` The log files inside the sandbox contain the complete record, including events that may have been dropped from the gRPC push channel under load (the push channel is bounded and drops events rather than blocking). From 87b4317de8101c3e6aee5d81be4a8a94f684834c Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 22:04:57 -0700 Subject: [PATCH 18/20] fix(docs): correct settings CLI syntax in OCSF JSON export page The settings CLI requires --key and --value named flags, not positional arguments. Also fix the per-sandbox form: the sandbox name is a positional argument, not a --sandbox flag. --- docs/observability/ocsf-json-export.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/observability/ocsf-json-export.md b/docs/observability/ocsf-json-export.md index b55419d6..696ae264 100644 --- a/docs/observability/ocsf-json-export.md +++ b/docs/observability/ocsf-json-export.md @@ -35,13 +35,13 @@ Use the `ocsf_json_enabled` setting to toggle JSON export. The setting can be ap Global: ```console -$ openshell settings set --global ocsf_json_enabled true +$ openshell settings set --global --key ocsf_json_enabled --value true ``` Per-sandbox: ```console -$ openshell settings set --sandbox smoke-l4 ocsf_json_enabled true +$ openshell settings set my-sandbox --key ocsf_json_enabled --value true ``` The setting takes effect on the next poll cycle (default: 10 seconds). No sandbox restart is required. @@ -49,7 +49,7 @@ The setting takes effect on the next poll cycle (default: 10 seconds). No sandbo To disable: ```console -$ openshell settings set --global ocsf_json_enabled false +$ openshell settings set --global --key ocsf_json_enabled --value false ``` ## Output Location From 11ec88c179a8bbb16d3ae500f860418c3e7c6510 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Tue, 31 Mar 2026 22:34:51 -0700 Subject: [PATCH 19/20] fix(e2e): update log assertions for OCSF shorthand format The E2E tests asserted on the old tracing::fmt key=value format (action=allow, l7_decision=audit, FORWARD, L7_REQUEST, always-blocked). Update to match the new OCSF shorthand (ALLOWED/DENIED, HTTP:, NET:, engine:ssrf, policy:). --- e2e/python/test_sandbox_policy.py | 39 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/e2e/python/test_sandbox_policy.py b/e2e/python/test_sandbox_policy.py index 092f9978..a56eb599 100644 --- a/e2e/python/test_sandbox_policy.py +++ b/e2e/python/test_sandbox_policy.py @@ -622,13 +622,13 @@ def test_l4_log_fields( assert log_result.exit_code == 0, log_result.stderr log = log_result.stdout - # Verify structured fields in allow line - assert "action=allow" in log or 'action="allow"' in log or "action=allow" in log - assert "dst_host=api.anthropic.com" in log or "dst_host" in log - assert "engine=opa" in log or 'engine="opa"' in log + # Verify OCSF shorthand fields in allow line + assert "ALLOWED" in log, "Expected ALLOWED in OCSF shorthand" + assert "api.anthropic.com" in log, "Expected destination host in log" + assert "engine:opa" in log, "Expected engine:opa in log context" # Verify deny line exists - assert "action=deny" in log or 'action="deny"' in log + assert "DENIED" in log, "Expected DENIED in OCSF shorthand" # ============================================================================= @@ -715,8 +715,9 @@ def test_ssrf_log_shows_blocked_address( log_result = sb.exec_python(_read_openshell_log()) assert log_result.exit_code == 0, log_result.stderr log = log_result.stdout - assert "always-blocked" in log.lower(), ( - f"Expected 'always-blocked' in proxy log, got:\n{log}" + # OCSF shorthand uses "engine:ssrf" for SSRF blocks + assert "engine:ssrf" in log.lower() or "ssrf" in log.lower(), ( + f"Expected SSRF block indicator in proxy log, got:\n{log}" ) @@ -1001,7 +1002,9 @@ def test_l7_tls_audit_mode_allows_but_logs( log_result = sb.exec_python(_read_openshell_log()) assert log_result.exit_code == 0, log_result.stderr log = log_result.stdout - assert "l7_decision=audit" in log or 'l7_decision="audit"' in log + # OCSF shorthand: audit decisions show as ALLOWED (audit mode allows through) + assert "HTTP:" in log, "Expected OCSF HTTP activity event in log" + assert "ALLOWED" in log, "Expected ALLOWED for audit-mode decision" def test_l7_tls_explicit_path_rules( @@ -1179,11 +1182,10 @@ def test_l7_tls_log_fields( assert log_result.exit_code == 0, log_result.stderr log = log_result.stdout - assert "L7_REQUEST" in log - assert "l7_protocol" in log - assert "l7_action" in log - assert "l7_target" in log - assert "l7_decision" in log + # OCSF shorthand: L7 requests show as HTTP:method events + assert "HTTP:" in log, "Expected OCSF HTTP activity event in log" + assert "ALLOWED" in log or "DENIED" in log, "Expected L7 decision in log" + assert "policy:" in log, "Expected policy context in log" def test_l7_query_matchers_enforced( @@ -1581,13 +1583,10 @@ def test_forward_proxy_log_fields( assert result.exit_code == 0, result.stderr log = result.stdout - assert "FORWARD" in log, "Expected FORWARD log lines" - # tracing key-value pairs quote string values: action="allow" - assert 'action="allow"' in log, "Expected allowed FORWARD in logs" - assert f"dst_host={_SANDBOX_IP}" in log, "Expected dst_host in FORWARD log" - assert f"dst_port={_FORWARD_PROXY_PORT}" in log, ( - "Expected dst_port in FORWARD log" - ) + # OCSF shorthand: FORWARD requests show as HTTP:method events + assert "HTTP:" in log, "Expected OCSF HTTP activity event for FORWARD request" + assert "ALLOWED" in log, "Expected ALLOWED for forward proxy allow" + assert f"{_SANDBOX_IP}" in log, "Expected destination IP in FORWARD log" # ============================================================================= From 4039c2785cda431e0b81650d2738a0b6e7d56c4c Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:44:41 -0700 Subject: [PATCH 20/20] feat(sandbox): convert WebSocket upgrade log calls to OCSF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #718 added two log calls for WebSocket upgrade handling: - 101 Switching Protocols info → NetworkActivity with Upgrade activity. This is a significant state change (L7 enforcement drops to raw relay). - Unsolicited 101 without client Upgrade header → DetectionFinding with High severity. A non-compliant upstream sending 101 without a client Upgrade request could be attempting to bypass L7 inspection. --- crates/openshell-sandbox/src/l7/relay.rs | 18 ++++++++++----- crates/openshell-sandbox/src/l7/rest.rs | 28 ++++++++++++++++++++---- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 5feea2ca..110f777e 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -93,12 +93,18 @@ where C: AsyncRead + AsyncWrite + Unpin + Send, U: AsyncRead + AsyncWrite + Unpin + Send, { - info!( - host = %host, - port = port, - overflow_bytes = overflow.len(), - "101 Switching Protocols — switching to raw bidirectional relay \ - (L7 enforcement no longer active)" + ocsf_emit!( + NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .activity_name("Upgrade") + .severity(SeverityId::Informational) + .dst_endpoint(Endpoint::from_domain(host, port)) + .message(format!( + "101 Switching Protocols — raw bidirectional relay (L7 enforcement no longer active) \ + [host:{host} port:{port} overflow_bytes:{}]", + overflow.len() + )) + .build() ); if !overflow.is_empty() { client.write_all(&overflow).await.into_diagnostic()?; diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 0c136be7..6bbf7be4 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -309,10 +309,30 @@ where if matches!(outcome, RelayOutcome::Upgraded { .. }) { let header_str = String::from_utf8_lossy(&req.raw_header[..header_end]); if !client_requested_upgrade(&header_str) { - warn!( - method = %req.action, - target = %req.target, - "upstream sent unsolicited 101 without client Upgrade request — closing connection" + openshell_ocsf::ocsf_emit!( + openshell_ocsf::DetectionFindingBuilder::new(crate::ocsf_ctx()) + .activity(openshell_ocsf::ActivityId::Open) + .action(openshell_ocsf::ActionId::Denied) + .disposition(openshell_ocsf::DispositionId::Blocked) + .severity(openshell_ocsf::SeverityId::High) + .confidence(openshell_ocsf::ConfidenceId::High) + .is_alert(true) + .finding_info( + openshell_ocsf::FindingInfo::new( + "unsolicited-101-upgrade", + "Unsolicited 101 Switching Protocols", + ) + .with_desc(&format!( + "Upstream sent 101 without client Upgrade request for {} {} — \ + possible L7 inspection bypass. Connection closed.", + req.action, req.target, + )), + ) + .message(format!( + "Unsolicited 101 upgrade blocked: {} {}", + req.action, req.target, + )) + .build() ); return Ok(RelayOutcome::Consumed); }