diff --git a/Cargo.lock b/Cargo.lock index 7d20c9bd8..852d97a0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,7 +125,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -136,7 +136,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1283,7 +1283,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2497,6 +2497,16 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libyml" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980" +dependencies = [ + "anyhow", + "version_check", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -2691,7 +2701,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2902,7 +2912,7 @@ dependencies = [ "miette", "openshell-core", "serde", - "serde_yaml", + "serde_yml", ] [[package]] @@ -2922,7 +2932,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "serde_yaml", + "serde_yml", "tempfile", "thiserror 2.0.18", "tokio", @@ -2958,7 +2968,7 @@ dependencies = [ "rustls-pemfile", "seccompiler", "serde_json", - "serde_yaml", + "serde_yml", "sha2 0.10.9", "temp-env", "tempfile", @@ -4044,7 +4054,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4348,6 +4358,21 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "serde_yml" +version = "0.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" +dependencies = [ + "indexmap 2.13.0", + "itoa", + "libyml", + "memchr", + "ryu", + "serde", + "version_check", +] + [[package]] name = "serdect" version = "0.4.2" @@ -4519,7 +4544,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4930,7 +4955,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4fecf1940..08b699d47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,7 @@ nix = { version = "0.29", features = ["signal", "process", "user", "fs", "term"] # Serialization serde = { version = "1", features = ["derive"] } serde_json = "1" -serde_yaml = "0.9" +serde_yml = "0.0.12" # HTTP client reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } diff --git a/crates/openshell-policy/Cargo.toml b/crates/openshell-policy/Cargo.toml index 311bb4e86..f26136c6b 100644 --- a/crates/openshell-policy/Cargo.toml +++ b/crates/openshell-policy/Cargo.toml @@ -13,7 +13,7 @@ repository.workspace = true [dependencies] openshell-core = { path = "../openshell-core" } serde = { workspace = true } -serde_yaml = { workspace = true } +serde_yml = { workspace = true } miette = { workspace = true } [lints] diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 7adb4dfda..9cf543bdf 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -82,11 +82,12 @@ struct NetworkEndpointDef { #[serde(default, skip_serializing_if = "String::is_empty")] host: String, /// Single port (backwards compat). Mutually exclusive with `ports`. + /// Uses `u16` to reject invalid values >65535 at parse time. #[serde(default, skip_serializing_if = "is_zero")] - port: u32, + port: u16, /// Multiple ports. When non-empty, this endpoint covers all listed ports. #[serde(default, skip_serializing_if = "Vec::is_empty")] - ports: Vec, + ports: Vec, #[serde(default, skip_serializing_if = "String::is_empty")] protocol: String, #[serde(default, skip_serializing_if = "String::is_empty")] @@ -101,7 +102,7 @@ struct NetworkEndpointDef { allowed_ips: Vec, } -fn is_zero(v: &u32) -> bool { +fn is_zero(v: &u16) -> bool { *v == 0 } @@ -169,10 +170,10 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { .map(|e| { // Normalize port/ports: ports takes precedence, else // single port is promoted to ports array. - let normalized_ports = if !e.ports.is_empty() { - e.ports + let normalized_ports: Vec = if !e.ports.is_empty() { + e.ports.into_iter().map(u32::from).collect() } else if e.port > 0 { - vec![e.port] + vec![u32::from(e.port)] } else { vec![] }; @@ -285,10 +286,12 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { .map(|e| { // Use compact form: if ports has exactly 1 element, // emit port (scalar). If >1, emit ports (array). + // Proto uses u32; YAML uses u16. Clamp at boundary. + let clamp = |v: u32| -> u16 { v.min(65535) as u16 }; let (port, ports) = if e.ports.len() > 1 { - (0, e.ports.clone()) + (0, e.ports.iter().map(|&p| clamp(p)).collect()) } else { - (e.ports.first().copied().unwrap_or(e.port), vec![]) + (clamp(e.ports.first().copied().unwrap_or(e.port)), vec![]) }; NetworkEndpointDef { host: e.host.clone(), @@ -358,7 +361,7 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { /// Parse a sandbox policy from a YAML string. pub fn parse_sandbox_policy(yaml: &str) -> Result { - let raw: PolicyFile = serde_yaml::from_str(yaml) + let raw: PolicyFile = serde_yml::from_str(yaml) .into_diagnostic() .wrap_err("failed to parse sandbox policy YAML")?; Ok(to_proto(raw)) @@ -371,7 +374,7 @@ pub fn parse_sandbox_policy(yaml: &str) -> Result { /// and is round-trippable through `parse_sandbox_policy`. pub fn serialize_sandbox_policy(policy: &SandboxPolicy) -> Result { let yaml_repr = from_proto(policy); - serde_yaml::to_string(&yaml_repr) + serde_yml::to_string(&yaml_repr) .into_diagnostic() .wrap_err("failed to serialize policy to YAML") } @@ -1207,4 +1210,20 @@ network_policies: proto2.network_policies["test"].endpoints[0].host ); } + + #[test] + fn rejects_port_above_65535() { + let yaml = r#" +version: 1 +network_policies: + test: + endpoints: + - host: example.com + port: 70000 +"#; + assert!( + parse_sandbox_policy(yaml).is_err(), + "port >65535 should fail to parse" + ); + } } diff --git a/crates/openshell-router/Cargo.toml b/crates/openshell-router/Cargo.toml index dc8e9c924..e4c3d5ea7 100644 --- a/crates/openshell-router/Cargo.toml +++ b/crates/openshell-router/Cargo.toml @@ -19,7 +19,7 @@ serde_json = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } -serde_yaml = { workspace = true } +serde_yml = { workspace = true } uuid = { workspace = true } [dev-dependencies] diff --git a/crates/openshell-router/src/config.rs b/crates/openshell-router/src/config.rs index 52c22da9f..b531e091d 100644 --- a/crates/openshell-router/src/config.rs +++ b/crates/openshell-router/src/config.rs @@ -75,7 +75,7 @@ impl RouterConfig { path.display() )) })?; - let config: Self = serde_yaml::from_str(&content).map_err(|e| { + let config: Self = serde_yml::from_str(&content).map_err(|e| { RouterError::Internal(format!( "failed to parse router config {}: {e}", path.display() diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index 68e696e95..e8e7e2c97 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -60,7 +60,7 @@ ipnet = "2" # Serialization serde_json = { workspace = true } -serde_yaml = { workspace = true } +serde_yml = { workspace = true } # Logging tracing = { workspace = true } diff --git a/crates/openshell-sandbox/src/opa.rs b/crates/openshell-sandbox/src/opa.rs index f1df12ff4..f1c0ad293 100644 --- a/crates/openshell-sandbox/src/opa.rs +++ b/crates/openshell-sandbox/src/opa.rs @@ -511,7 +511,7 @@ fn parse_process_policy(val: ®orus::Value) -> ProcessPolicy { /// Preprocess YAML policy data: parse, normalize, validate, expand access presets, return JSON. fn preprocess_yaml_data(yaml_str: &str) -> Result { - let mut data: serde_json::Value = serde_yaml::from_str(yaml_str) + let mut data: serde_json::Value = serde_yml::from_str(yaml_str) .map_err(|e| miette::miette!("failed to parse YAML data: {e}"))?; // Normalize port → ports for all endpoints so Rego always sees "ports" array. diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index a7df76e2f..9e87450d4 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -23,6 +23,12 @@ use tracing::{debug, info, warn}; const MAX_HEADER_BYTES: usize = 8192; const INFERENCE_LOCAL_HOST: &str = "inference.local"; +/// Maximum total bytes for a streaming inference response body (32 MiB). +const MAX_STREAMING_BODY: usize = 32 * 1024 * 1024; + +/// Idle timeout per chunk when relaying streaming inference responses. +const CHUNK_IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); + /// Result of a proxy CONNECT policy decision. struct ConnectDecision { action: NetworkAction, @@ -1045,18 +1051,35 @@ async fn route_inference_request( let header_bytes = format_http_response_header(resp.status, &resp_headers); write_all(tls_client, &header_bytes).await?; - // Stream body chunks as they arrive from the upstream. + // Stream body chunks with byte cap and idle timeout. + let mut total_bytes: usize = 0; loop { - match resp.next_chunk().await { - Ok(Some(chunk)) => { + match tokio::time::timeout(CHUNK_IDLE_TIMEOUT, resp.next_chunk()).await { + Ok(Ok(Some(chunk))) => { + total_bytes += chunk.len(); + if total_bytes > MAX_STREAMING_BODY { + warn!( + total_bytes = total_bytes, + limit = MAX_STREAMING_BODY, + "streaming response exceeded byte limit, truncating" + ); + break; + } let encoded = format_chunk(&chunk); write_all(tls_client, &encoded).await?; } - Ok(None) => break, - Err(e) => { + Ok(Ok(None)) => break, + Ok(Err(e)) => { warn!(error = %e, "error reading upstream response chunk"); break; } + Err(_) => { + warn!( + idle_timeout_secs = CHUNK_IDLE_TIMEOUT.as_secs(), + "streaming response chunk idle timeout, closing" + ); + break; + } } } diff --git a/crates/openshell-server/src/auth.rs b/crates/openshell-server/src/auth.rs index 5a3229ffa..b896d062c 100644 --- a/crates/openshell-server/src/auth.rs +++ b/crates/openshell-server/src/auth.rs @@ -22,11 +22,28 @@ use axum::{ response::{Html, IntoResponse}, routing::get, }; +use http::header; use serde::Deserialize; use std::sync::Arc; use crate::ServerState; +/// Validate that a confirmation code matches the CLI-generated format. +/// +/// Codes are 3 alphanumeric characters, a dash, then 4 alphanumeric characters +/// (e.g., "AB7-X9KM"). The CLI generates these from the charset `[A-Z2-9]`. +fn is_valid_code(code: &str) -> bool { + let bytes = code.as_bytes(); + bytes.len() == 8 + && bytes[3] == b'-' + && bytes[..3] + .iter() + .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit()) + && bytes[4..] + .iter() + .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit()) +} + #[derive(Deserialize)] struct ConnectParams { callback_port: u16, @@ -54,6 +71,15 @@ async fn auth_connect( Query(params): Query, headers: HeaderMap, ) -> impl IntoResponse { + // Reject codes that don't match the CLI-generated format to prevent + // reflected XSS via crafted URLs. + if !is_valid_code(¶ms.code) { + return Html( + "

Invalid confirmation code format.

".to_string(), + ) + .into_response(); + } + let cf_token = headers .get("cookie") .and_then(|v| v.to_str().ok()) @@ -68,14 +94,34 @@ async fn auth_connect( .and_then(|v| v.to_str().ok()) .map_or_else(|| state.config.bind_address.to_string(), String::from); + let safe_gateway = html_escape(&gateway_display); + match cf_token { - Some(token) => Html(render_connect_page( - &gateway_display, - params.callback_port, - &token, - ¶ms.code, - )), - None => Html(render_waiting_page(params.callback_port, ¶ms.code)), + Some(token) => { + let nonce = uuid::Uuid::new_v4().to_string(); + let csp = format!( + "default-src 'none'; script-src 'nonce-{nonce}'; style-src 'unsafe-inline'; connect-src http://127.0.0.1:*" + ); + ( + [(header::CONTENT_SECURITY_POLICY, csp)], + Html(render_connect_page( + &safe_gateway, + params.callback_port, + &token, + ¶ms.code, + &nonce, + )), + ) + .into_response() + } + None => { + let csp = "default-src 'none'; style-src 'unsafe-inline'".to_string(); + ( + [(header::CONTENT_SECURITY_POLICY, csp)], + Html(render_waiting_page(params.callback_port, ¶ms.code)), + ) + .into_response() + } } } @@ -104,22 +150,27 @@ fn render_connect_page( callback_port: u16, cf_token: &str, code: &str, + nonce: &str, ) -> String { - // Escape the token for safe embedding in a JS string literal. - let escaped_token = cf_token - .replace('\\', "\\\\") - .replace('\'', "\\'") - .replace('"', "\\\"") - .replace('<', "\\x3c") - .replace('>', "\\x3e"); + // Use JSON serialization for JS-safe string embedding — handles all + // edge cases including \n, \r, U+2028, U+2029 that break JS string + // literals. serde_json::to_string produces a quoted JSON string + // (e.g., "value") which is a valid JS string literal. + // + // We additionally escape < and > to \u003c / \u003e because while + // they're valid in JSON, they're dangerous inside an HTML before the JS parser runs). + let json_token = serde_json::to_string(cf_token) + .unwrap_or_else(|_| "\"\"".to_string()) + .replace('<', "\\u003c") + .replace('>', "\\u003e"); + let json_code = serde_json::to_string(code) + .unwrap_or_else(|_| "\"\"".to_string()) + .replace('<', "\\u003c") + .replace('>', "\\u003e"); - // Escape the code the same way (it's alphanumeric + dash, but be safe). - let escaped_code = code - .replace('\\', "\\\\") - .replace('\'', "\\'") - .replace('"', "\\\"") - .replace('<', "\\x3c") - .replace('>', "\\x3e"); + // HTML-safe version of the code for display in the page body. + let html_code = html_escape(code); let version = openshell_core::VERSION; @@ -250,7 +301,7 @@ fn render_connect_page(
Connect to Gateway
Confirmation Code
-
{escaped_code}
+
{html_code}
Verify this matches the code shown in your terminal
@@ -271,9 +322,9 @@ fn render_connect_page(
- ", "ABC-1234"); - // < and > should be escaped + let html = render_connect_page( + "gw", + 1234, + "token", + "ABC-1234", + "nonce", + ); + // < and > should be escaped via JSON encoding (\u003c) assert!(!html.contains("