From 1d4f0ab7fe40fa57ada5193b4e763d0e4a3e1d96 Mon Sep 17 00:00:00 2001 From: Kalvin Chau Date: Thu, 4 Jun 2026 13:25:15 -0700 Subject: [PATCH 1/4] fix(doctor): bound subprocess probes add a shared timeout runner for local doctor probes with 10s defaults and 15s freshness subprocess limits. kill timed-out process groups on unix and return specific doctor timeout rows or freshness details instead of hanging the report. --- Cargo.lock | 2 + crates/doctor/Cargo.toml | 4 + crates/doctor/src/agents.rs | 35 ++++- crates/doctor/src/checks.rs | 143 +++++++++++++++-- crates/doctor/src/command.rs | 272 +++++++++++++++++++++++++++++++++ crates/doctor/src/freshness.rs | 128 ++++++++++++---- crates/doctor/src/lib.rs | 158 ++++++++++++++++--- crates/doctor/src/resolve.rs | 107 +++++++++---- 8 files changed, 754 insertions(+), 95 deletions(-) create mode 100644 crates/doctor/src/command.rs diff --git a/Cargo.lock b/Cargo.lock index 73663321..ecf0474c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -610,10 +610,12 @@ version = "0.1.0" dependencies = [ "dirs", "futures", + "nix", "reqwest", "serde", "serde_json", "tokio", + "wait-timeout", ] [[package]] diff --git a/crates/doctor/Cargo.toml b/crates/doctor/Cargo.toml index a4b2600f..537f8595 100644 --- a/crates/doctor/Cargo.toml +++ b/crates/doctor/Cargo.toml @@ -11,3 +11,7 @@ tokio = { version = "1.0", features = ["rt", "macros"] } futures = "0.3" reqwest = { version = "0.13", features = ["blocking", "json"] } dirs = "6" +wait-timeout = "0.2" + +[target.'cfg(unix)'.dependencies] +nix = { version = "0.31", features = ["signal"] } diff --git a/crates/doctor/src/agents.rs b/crates/doctor/src/agents.rs index d7d85ce9..08420002 100644 --- a/crates/doctor/src/agents.rs +++ b/crates/doctor/src/agents.rs @@ -1,9 +1,11 @@ //! AI agent checks and fix command lookup. use std::path::{Path, PathBuf}; -use std::process::Command; use crate::checks::CLONEFILE_FIX_COMMAND; +use crate::command::{ + run_command_with_timeout, CommandError, CommandTimeout, DEFAULT_PROBE_TIMEOUT, +}; use crate::resolve::format_command_output; use crate::types::{ AgentVersionInfo, AuthStatus, CheckStatus, DoctorCheck, FixType, InstallSource, ResolvedBinary, @@ -303,7 +305,9 @@ pub fn check_single_ai_agent( if let Some(ref path_str) = resolved_path { if info.id == "ai-agent-goose" { - match Command::new(path_str).arg("acp").arg("--help").output() { + let mut command = std::process::Command::new(path_str); + command.arg("acp").arg("--help"); + match run_command_with_timeout(command, "goose acp --help", DEFAULT_PROBE_TIMEOUT) { Ok(output) if output.status.success() => { let raw = format!( "{header}\n{}\n{}", @@ -363,6 +367,29 @@ pub fn check_single_ai_agent( bridge: None, } } + Err(CommandError::Timeout { command, timeout }) => { + let timeout = CommandTimeout::new(info.label, command, timeout); + DoctorCheck { + id: info.id.to_string(), + label: info.label.to_string(), + status: CheckStatus::Fail, + message: timeout.message(), + fix_url: None, + fix_command: None, + fix_type: None, + path: resolved_path, + bridge_path: None, + raw_output: Some(format!("{header}\n{}\n{search}", timeout.raw_output())), + auth_status: None, + installed_version: None, + latest_version: None, + update_available: None, + install_source: bridge_install_source.clone(), + self_updating: None, + main: version_readout(bridge_install_source.clone()), + bridge: None, + } + } Err(e) => DoctorCheck { id: info.id.to_string(), label: info.label.to_string(), @@ -431,6 +458,10 @@ pub fn check_single_ai_agent( Some(AuthStatus::Unknown), Some(format!("$ {cmd}\n(spawn failure: {e})")), ), + ExecOutcome::Timeout { command, timeout } => { + let timeout = CommandTimeout::new(info.label, command, timeout); + (Some(AuthStatus::Unknown), Some(timeout.raw_output())) + } // Exit 127 means the shell couldn't find the command — // PATH-shadowed or uninstalled. Not the same as "signed // out"; report Unknown so the UI doesn't offer a useless diff --git a/crates/doctor/src/checks.rs b/crates/doctor/src/checks.rs index 897f942a..cafc9cd3 100644 --- a/crates/doctor/src/checks.rs +++ b/crates/doctor/src/checks.rs @@ -2,12 +2,68 @@ use std::process::Command; +use crate::command::{ + run_command_with_timeout, CommandError, CommandTimeout, DEFAULT_PROBE_TIMEOUT, +}; use crate::resolve::format_command_output; -use crate::types::{CheckStatus, DoctorCheck, FixType, ResolvedBinary}; +use crate::types::{CheckStatus, DoctorCheck, FixType, InstallSource, ResolvedBinary}; /// Fix command for enabling copy-on-write git clones. pub(crate) const CLONEFILE_FIX_COMMAND: &str = "git config --global core.clonefile true"; +struct TimeoutCheck<'a> { + id: String, + label: String, + status: CheckStatus, + header: &'a str, + command: String, + timeout: std::time::Duration, + path: Option, + install_source: Option, + raw_suffix: Option<&'a str>, +} + +fn command_timeout_check(input: TimeoutCheck<'_>) -> DoctorCheck { + let TimeoutCheck { + id, + label, + status, + header, + command, + timeout, + path, + install_source, + raw_suffix, + } = input; + let timeout = CommandTimeout::new(label.clone(), command, timeout); + let mut raw = format!("{header}\n{}", timeout.raw_output()); + if let Some(suffix) = raw_suffix { + raw.push('\n'); + raw.push_str(suffix); + } + + DoctorCheck { + id, + label, + status, + message: timeout.message(), + fix_url: None, + fix_command: None, + fix_type: None, + path, + bridge_path: None, + raw_output: Some(raw), + auth_status: None, + installed_version: None, + latest_version: None, + update_available: None, + install_source, + self_updating: None, + main: None, + bridge: None, + } +} + /// Check that `git` is installed and reachable. pub fn check_git(resolved: &ResolvedBinary) -> DoctorCheck { let label = "Git".to_string(); @@ -42,7 +98,9 @@ pub fn check_git(resolved: &ResolvedBinary) -> DoctorCheck { }; let path_str = git_path.to_string_lossy().to_string(); - match Command::new(git_path).arg("--version").output() { + let mut command = Command::new(git_path); + command.arg("--version"); + match run_command_with_timeout(command, "git --version", DEFAULT_PROBE_TIMEOUT) { Ok(output) if output.status.success() => { let version = String::from_utf8_lossy(&output.stdout).trim().to_string(); let raw = format!( @@ -98,6 +156,17 @@ pub fn check_git(resolved: &ResolvedBinary) -> DoctorCheck { bridge: None, } } + Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { + id, + label, + status: CheckStatus::Fail, + header, + command, + timeout, + path: Some(path_str), + install_source: resolved.install_source.clone(), + raw_suffix: Some(search), + }), Err(e) => DoctorCheck { id, label, @@ -155,7 +224,9 @@ pub fn check_gh(resolved: &ResolvedBinary) -> DoctorCheck { }; let path_str = gh_path.to_string_lossy().to_string(); - match Command::new(gh_path).arg("--version").output() { + let mut command = Command::new(gh_path); + command.arg("--version"); + match run_command_with_timeout(command, "gh --version", DEFAULT_PROBE_TIMEOUT) { Ok(output) if output.status.success() => { let version = String::from_utf8_lossy(&output.stdout); let first_line = version.lines().next().unwrap_or("gh").trim().to_string(); @@ -212,6 +283,17 @@ pub fn check_gh(resolved: &ResolvedBinary) -> DoctorCheck { bridge: None, } } + Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { + id, + label, + status: CheckStatus::Fail, + header, + command, + timeout, + path: Some(path_str), + install_source: resolved.install_source.clone(), + raw_suffix: Some(search), + }), Err(e) => DoctorCheck { id, label, @@ -267,7 +349,9 @@ pub fn check_gh_auth(gh: &ResolvedBinary) -> DoctorCheck { } }; - match Command::new(gh_path).args(["auth", "status"]).output() { + let mut command = Command::new(gh_path); + command.args(["auth", "status"]); + match run_command_with_timeout(command, "gh auth status", DEFAULT_PROBE_TIMEOUT) { Ok(output) => { let raw = format!( "{header}\n{}", @@ -324,6 +408,17 @@ pub fn check_gh_auth(gh: &ResolvedBinary) -> DoctorCheck { } } } + Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { + id, + label, + status: CheckStatus::Fail, + header, + command, + timeout, + path: None, + install_source: None, + raw_suffix: None, + }), Err(e) => DoctorCheck { id, label, @@ -383,7 +478,9 @@ pub fn check_git_lfs(git: &ResolvedBinary, git_lfs: &ResolvedBinary) -> DoctorCh } }; - match Command::new(git_path).args(["lfs", "version"]).output() { + let mut command = Command::new(git_path); + command.args(["lfs", "version"]); + match run_command_with_timeout(command, "git lfs version", DEFAULT_PROBE_TIMEOUT) { Ok(output) if output.status.success() => { let version = String::from_utf8_lossy(&output.stdout).trim().to_string(); let path = git_lfs @@ -443,6 +540,20 @@ pub fn check_git_lfs(git: &ResolvedBinary, git_lfs: &ResolvedBinary) -> DoctorCh bridge: None, } } + Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { + id, + label, + status: CheckStatus::Warn, + header, + command, + timeout, + path: git_lfs + .path + .as_ref() + .map(|p| p.to_string_lossy().to_string()), + install_source: git_lfs.install_source.clone(), + raw_suffix: Some(search), + }), Err(e) => DoctorCheck { id, label, @@ -499,10 +610,13 @@ pub fn check_clonefile(git: &ResolvedBinary) -> DoctorCheck { } }; - match Command::new(git_path) - .args(["config", "--global", "core.clonefile"]) - .output() - { + let mut command = Command::new(git_path); + command.args(["config", "--global", "core.clonefile"]); + match run_command_with_timeout( + command, + "git config --global core.clonefile", + DEFAULT_PROBE_TIMEOUT, + ) { Ok(output) if output.status.success() => { let raw = format!( "{header}\n{}", @@ -583,6 +697,17 @@ pub fn check_clonefile(git: &ResolvedBinary) -> DoctorCheck { bridge: None, } } + Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { + id, + label, + status: CheckStatus::Warn, + header, + command, + timeout, + path: None, + install_source: git.install_source.clone(), + raw_suffix: None, + }), Err(e) => DoctorCheck { id, label, diff --git a/crates/doctor/src/command.rs b/crates/doctor/src/command.rs new file mode 100644 index 00000000..07456521 --- /dev/null +++ b/crates/doctor/src/command.rs @@ -0,0 +1,272 @@ +//! Bounded subprocess helpers for doctor probes. +//! +//! The doctor crate runs many small local probes. None of them should be able +//! to pin a blocking worker forever, so probe call sites route through this +//! module instead of `Command::output()`. + +use std::fmt; +use std::io::Read; +use std::process::{Command, Output, Stdio}; +use std::thread::{self, JoinHandle}; +use std::time::Duration; + +use wait_timeout::ChildExt; + +pub(crate) const DEFAULT_PROBE_TIMEOUT: Duration = Duration::from_secs(10); +pub(crate) const FRESHNESS_PROBE_TIMEOUT: Duration = Duration::from_secs(15); + +#[derive(Debug)] +pub(crate) enum CommandError { + Spawn { + command: String, + source: std::io::Error, + }, + Wait { + command: String, + source: std::io::Error, + }, + Timeout { + command: String, + timeout: Duration, + }, +} + +impl fmt::Display for CommandError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CommandError::Spawn { command, source } => { + write!(f, "failed to spawn `{command}`: {source}") + } + CommandError::Wait { command, source } => { + write!(f, "failed to wait for `{command}`: {source}") + } + CommandError::Timeout { command, timeout } => { + write!( + f, + "`{command}` timed out after {}", + format_duration(*timeout) + ) + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct CommandTimeout { + pub label: String, + pub command: String, + pub timeout: Duration, +} + +impl CommandTimeout { + pub(crate) fn new( + label: impl Into, + command: impl Into, + timeout: Duration, + ) -> Self { + Self { + label: label.into(), + command: command.into(), + timeout, + } + } + + pub(crate) fn message(&self) -> String { + format!( + "{} timed out after {} running {}", + self.label, + format_duration(self.timeout), + self.command + ) + } + + pub(crate) fn raw_output(&self) -> String { + format!( + "$ {}\ntimed out after {}", + self.command, + format_duration(self.timeout) + ) + } +} + +pub(crate) fn format_duration(duration: Duration) -> String { + if duration.subsec_nanos() == 0 { + format!("{}s", duration.as_secs()) + } else { + format!("{duration:?}") + } +} + +pub(crate) fn run_command_with_timeout( + mut command: Command, + display_command: impl Into, + timeout: Duration, +) -> Result { + let display_command = display_command.into(); + command + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + #[cfg(unix)] + { + use std::os::unix::process::CommandExt; + command.process_group(0); + } + + let mut child = command.spawn().map_err(|source| CommandError::Spawn { + command: display_command.clone(), + source, + })?; + + let stdout = child.stdout.take(); + let stderr = child.stderr.take(); + let stdout_thread = stdout.map(|mut stdout| { + thread::spawn(move || { + let mut bytes = Vec::new(); + let _ = stdout.read_to_end(&mut bytes); + bytes + }) + }); + let stderr_thread = stderr.map(|mut stderr| { + thread::spawn(move || { + let mut bytes = Vec::new(); + let _ = stderr.read_to_end(&mut bytes); + bytes + }) + }); + + let status = match child.wait_timeout(timeout) { + Ok(Some(status)) => status, + Ok(None) => { + kill_child_process_group(&mut child); + let _ = child.wait(); + if let Some(handle) = stdout_thread { + let _ = join_reader(handle); + } + if let Some(handle) = stderr_thread { + let _ = join_reader(handle); + } + return Err(CommandError::Timeout { + command: display_command, + timeout, + }); + } + Err(source) => { + kill_child_process_group(&mut child); + let _ = child.wait(); + if let Some(handle) = stdout_thread { + let _ = join_reader(handle); + } + if let Some(handle) = stderr_thread { + let _ = join_reader(handle); + } + return Err(CommandError::Wait { + command: display_command, + source, + }); + } + }; + + let stdout = stdout_thread.map(join_reader).unwrap_or_default(); + let stderr = stderr_thread.map(join_reader).unwrap_or_default(); + + Ok(Output { + status, + stdout, + stderr, + }) +} + +fn join_reader(handle: JoinHandle>) -> Vec { + handle.join().unwrap_or_default() +} + +fn kill_child_process_group(child: &mut std::process::Child) { + #[cfg(unix)] + if let Ok(pid) = i32::try_from(child.id()) { + let _ = nix::sys::signal::kill( + nix::unistd::Pid::from_raw(-pid), + nix::sys::signal::Signal::SIGKILL, + ); + } + + let _ = child.kill(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn command_runner_captures_success_output() { + let mut command = Command::new("sh"); + command.arg("-c").arg("printf stdout; printf stderr >&2"); + + let output = + run_command_with_timeout(command, "sh -c output", Duration::from_secs(2)).unwrap(); + + assert!(output.status.success()); + assert_eq!(String::from_utf8_lossy(&output.stdout), "stdout"); + assert_eq!(String::from_utf8_lossy(&output.stderr), "stderr"); + } + + #[test] + fn command_runner_times_out() { + let mut command = Command::new("sh"); + command.arg("-c").arg("sleep 5"); + + let err = run_command_with_timeout(command, "sh -c sleep", Duration::from_millis(100)) + .unwrap_err(); + + match err { + CommandError::Timeout { command, timeout } => { + assert_eq!(command, "sh -c sleep"); + assert_eq!(timeout, Duration::from_millis(100)); + } + other => panic!("expected timeout, got {other:?}"), + } + } + + #[cfg(unix)] + #[test] + fn command_runner_kills_process_group_descendant() { + use nix::sys::signal; + use nix::unistd::Pid; + + let dir = std::env::temp_dir().join(format!( + "doctor-command-timeout-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(), + )); + std::fs::create_dir_all(&dir).unwrap(); + let pid_file = dir.join("child.pid"); + let script = format!("sleep 30 & echo $! > {}; wait", pid_file.display()); + + let mut command = Command::new("sh"); + command.arg("-c").arg(script); + let err = run_command_with_timeout(command, "sh -c descendant", Duration::from_millis(250)) + .unwrap_err(); + assert!(matches!(err, CommandError::Timeout { .. })); + + let pid = std::fs::read_to_string(&pid_file) + .expect("descendant pid should be written before timeout") + .trim() + .parse::() + .unwrap(); + + let exited = (0..20).any(|_| { + let gone = signal::kill(Pid::from_raw(pid), None).is_err(); + if !gone { + std::thread::sleep(Duration::from_millis(50)); + } + gone + }); + + let _ = std::fs::remove_dir_all(&dir); + assert!(exited, "descendant process {pid} survived timeout cleanup"); + } +} diff --git a/crates/doctor/src/freshness.rs b/crates/doctor/src/freshness.rs index 5c548f9d..f2480837 100644 --- a/crates/doctor/src/freshness.rs +++ b/crates/doctor/src/freshness.rs @@ -13,6 +13,9 @@ use std::time::{SystemTime, UNIX_EPOCH}; use serde::{Deserialize, Serialize}; use serde_json::Value; +use crate::command::{ + run_command_with_timeout, CommandError, CommandTimeout, FRESHNESS_PROBE_TIMEOUT, +}; use crate::package_ids::LatestSource; use crate::types::InstallSource; @@ -27,6 +30,13 @@ pub(crate) struct VersionInfo { pub installed: Option, pub latest: Option, pub update_available: Option, + pub command_timeouts: Vec, +} + +#[derive(Debug, Default)] +struct ProbeResult { + value: Option, + timeouts: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -296,14 +306,31 @@ fn installed_version_from_package_json( /// Run ` ` and parse the first semver-shaped token out of the /// combined stdout/stderr. Errors and missing tokens both yield `None`. -fn installed_version(binary_path: &Path, version_args: &[&str]) -> Option { - let output = Command::new(binary_path).args(version_args).output().ok()?; +fn installed_version(binary_path: &Path, version_args: &[&str]) -> ProbeResult { + let mut command = Command::new(binary_path); + command.args(version_args); + let display_command = format!("{} {}", binary_path.display(), version_args.join(" ")) + .trim() + .to_string(); + let output = match run_command_with_timeout(command, display_command, FRESHNESS_PROBE_TIMEOUT) { + Ok(output) => output, + Err(CommandError::Timeout { command, timeout }) => { + return ProbeResult { + value: None, + timeouts: vec![CommandTimeout::new("installed version", command, timeout)], + }; + } + Err(_) => return ProbeResult::default(), + }; let combined = format!( "{}\n{}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr), ); - extract_version(&combined) + ProbeResult { + value: extract_version(&combined), + timeouts: Vec::new(), + } } /// Whether a tool keeps itself up to date and therefore should never raise an @@ -321,24 +348,42 @@ fn latest_version( source: LatestSource, package_id: &str, npm_registry: Option<&str>, -) -> Option { +) -> ProbeResult { match source { LatestSource::Brew => latest_brew(package_id), LatestSource::Npm => latest_npm(package_id, npm_registry), - LatestSource::CratesIo => latest_crates_io(package_id), - LatestSource::GitHubReleases => latest_github_releases(package_id), + LatestSource::CratesIo => ProbeResult { + value: latest_crates_io(package_id), + timeouts: Vec::new(), + }, + LatestSource::GitHubReleases => ProbeResult { + value: latest_github_releases(package_id), + timeouts: Vec::new(), + }, } } -fn latest_brew(package_id: &str) -> Option { - let output = Command::new("brew") - .args(["info", "--json=v2", package_id]) - .output() - .ok()?; +fn latest_brew(package_id: &str) -> ProbeResult { + let mut command = Command::new("brew"); + command.args(["info", "--json=v2", package_id]); + let display_command = format!("brew info --json=v2 {package_id}"); + let output = match run_command_with_timeout(command, display_command, FRESHNESS_PROBE_TIMEOUT) { + Ok(output) => output, + Err(CommandError::Timeout { command, timeout }) => { + return ProbeResult { + value: None, + timeouts: vec![CommandTimeout::new("brew latest version", command, timeout)], + }; + } + Err(_) => return ProbeResult::default(), + }; if !output.status.success() { - return None; + return ProbeResult::default(); + } + ProbeResult { + value: parse_brew_info_v2(&output.stdout, package_id), + timeouts: Vec::new(), } - parse_brew_info_v2(&output.stdout, package_id) } /// Pull `formulae[0].versions.stable` or `casks[0].version` out of a @@ -367,21 +412,38 @@ pub(crate) fn parse_brew_info_v2(bytes: &[u8], _package_id: &str) -> Option) -> Option { +fn latest_npm(package_id: &str, npm_registry: Option<&str>) -> ProbeResult { let mut cmd = Command::new("npm"); cmd.args(["view", package_id, "version"]); if let Some(registry) = npm_registry { cmd.args(["--registry", registry]); } - let output = cmd.output().ok()?; + let display_command = if let Some(registry) = npm_registry { + format!("npm view {package_id} version --registry {registry}") + } else { + format!("npm view {package_id} version") + }; + let output = match run_command_with_timeout(cmd, display_command, FRESHNESS_PROBE_TIMEOUT) { + Ok(output) => output, + Err(CommandError::Timeout { command, timeout }) => { + return ProbeResult { + value: None, + timeouts: vec![CommandTimeout::new("npm latest version", command, timeout)], + }; + } + Err(_) => return ProbeResult::default(), + }; if !output.status.success() { - return None; + return ProbeResult::default(); } let raw = String::from_utf8_lossy(&output.stdout).trim().to_string(); if raw.is_empty() { - None + ProbeResult::default() } else { - Some(raw) + ProbeResult { + value: Some(raw), + timeouts: Vec::new(), + } } } @@ -470,14 +532,19 @@ pub(crate) async fn fetch_version_info( let npm_registry = npm_registry.map(|s| s.to_string()); let result = tokio::task::spawn_blocking(move || { + let mut command_timeouts = Vec::new(); let installed = if path.as_os_str().is_empty() { None } else { match &probe { - OwnedProbe::Cli(args) => installed_version( - &path, - &args.iter().map(|s| s.as_str()).collect::>(), - ), + OwnedProbe::Cli(args) => { + let result = installed_version( + &path, + &args.iter().map(|s| s.as_str()).collect::>(), + ); + command_timeouts.extend(result.timeouts); + result.value + } OwnedProbe::NpmPackageJson { package_id } => { installed_version_from_package_json(&path, package_id.as_deref()) } @@ -495,13 +562,17 @@ pub(crate) async fn fetch_version_info( }; if let Some(v) = cached { Some(v) - } else if let Some(v) = latest_version(source, &pkg, npm_registry.as_deref()) { - if let Ok(mut guard) = cache.lock() { - guard.insert(source, &pkg, v.clone(), now); - } - Some(v) } else { - None + let result = latest_version(source, &pkg, npm_registry.as_deref()); + command_timeouts.extend(result.timeouts); + if let Some(v) = result.value { + if let Ok(mut guard) = cache.lock() { + guard.insert(source, &pkg, v.clone(), now); + } + Some(v) + } else { + None + } } } else { None @@ -512,6 +583,7 @@ pub(crate) async fn fetch_version_info( installed, latest, update_available, + command_timeouts, } }) .await; diff --git a/crates/doctor/src/lib.rs b/crates/doctor/src/lib.rs index 30f9b519..ad814c85 100644 --- a/crates/doctor/src/lib.rs +++ b/crates/doctor/src/lib.rs @@ -6,6 +6,7 @@ pub mod agents; pub mod checks; +mod command; pub(crate) mod freshness; pub(crate) mod package_ids; pub mod resolve; @@ -19,11 +20,14 @@ use std::sync::{Arc, Mutex}; use agents::{check_single_ai_agent, derive_update_command, lookup_fix_command, AI_AGENT_CHECKS}; use checks::{check_clonefile, check_gh, check_gh_auth, check_git, check_git_lfs}; +use command::{ + format_duration, run_command_with_timeout, CommandError, CommandTimeout, DEFAULT_PROBE_TIMEOUT, +}; use freshness::{ fetch_version_info, is_self_updating, load_cache, save_cache, select_installed_probe, }; use package_ids::{lookup_package_id, LatestSource, Role}; -use resolve::resolve_binary; +use resolve::resolve_binary_with_diagnostics; use types::{InstallSource, ResolvedBinary}; /// Fallback check returned when a spawn_blocking task panics. @@ -105,13 +109,20 @@ async fn collect_base_report(npm_registry: Option<&str>) -> DoctorReport { let handles: Vec<_> = binary_names .iter() - .map(|&name| tokio::task::spawn_blocking(move || (name, resolve_binary(name)))) + .map(|&name| { + tokio::task::spawn_blocking(move || { + let (resolved, timeouts) = resolve_binary_with_diagnostics(name); + (name, resolved, timeouts) + }) + }) .collect(); let mut resolved: HashMap<&str, ResolvedBinary> = HashMap::new(); + let mut resolution_timeouts = Vec::new(); for handle in handles { - if let Ok((name, rb)) = handle.await { + if let Ok((name, rb, timeouts)) = handle.await { resolved.insert(name, rb); + resolution_timeouts.extend(timeouts); } } @@ -195,9 +206,63 @@ async fn collect_base_report(npm_registry: Option<&str>) -> DoctorReport { ); } + checks.extend(timeout_diagnostic_checks(resolution_timeouts)); + DoctorReport { checks } } +fn timeout_diagnostic_checks(timeouts: Vec) -> Vec { + let mut seen = std::collections::HashSet::new(); + let mut checks = Vec::new(); + for timeout in timeouts { + if !seen.insert((timeout.label.clone(), timeout.command.clone())) { + continue; + } + checks.push(timeout_diagnostic_check(timeout)); + } + checks +} + +fn timeout_diagnostic_check(timeout: CommandTimeout) -> DoctorCheck { + let slug_source = format!("{} {}", timeout.label, timeout.command); + DoctorCheck { + id: format!("subprocess-timeout-{}", slug(&slug_source)), + label: timeout.label.clone(), + status: CheckStatus::Warn, + message: timeout.message(), + fix_url: None, + fix_command: None, + fix_type: None, + path: None, + bridge_path: None, + raw_output: Some(format!( + "# Check: {}\n{}", + timeout.label, + timeout.raw_output() + )), + auth_status: None, + installed_version: None, + latest_version: None, + update_available: None, + install_source: None, + self_updating: None, + main: None, + bridge: None, + } +} + +fn slug(value: &str) -> String { + let mut out = String::new(); + for ch in value.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if !out.ends_with('-') { + out.push('-'); + } + } + out.trim_matches('-').to_string() +} + /// Which binary behind a check a freshness probe targets. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum ReadoutSlot { @@ -265,6 +330,44 @@ fn apply_freshness( } } +fn apply_freshness_timeouts(check: &mut DoctorCheck, info: &freshness::VersionInfo) { + if info.command_timeouts.is_empty() { + return; + } + + if check.status == CheckStatus::Pass { + check.status = CheckStatus::Warn; + } + + let first = &info.command_timeouts[0]; + let summary = if info.command_timeouts.len() == 1 { + format!( + "freshness timed out after {} running {}", + format_duration(first.timeout), + first.command + ) + } else { + format!( + "{} freshness probes timed out, first after {} running {}", + info.command_timeouts.len(), + format_duration(first.timeout), + first.command + ) + }; + check.message = format!("{}; {summary}", check.message); + + let mut raw = check.raw_output.take().unwrap_or_default(); + if !raw.is_empty() { + raw.push('\n'); + } + raw.push_str("# Freshness subprocess timeouts:"); + for timeout in &info.command_timeouts { + raw.push('\n'); + raw.push_str(&timeout.raw_output()); + } + check.raw_output = Some(raw); +} + /// Post-hoc pass: for every check that has a usable binary path and a known /// package id, run the installed/latest version probes in parallel and update /// the corresponding fields on the report. The on-disk cache is read once at @@ -361,17 +464,17 @@ async fn populate_freshness( for check in &mut report.checks { let is_agent = check.main.is_some() || check.bridge.is_some(); if is_agent { - if let (Some(readout), Some((info, pkg))) = ( - check.main.as_mut(), - by_target.remove(&(check.id.clone(), ReadoutSlot::Main)), - ) { - apply_freshness(readout, &info, ReadoutSlot::Main, pkg.as_deref()); + if let Some((info, pkg)) = by_target.remove(&(check.id.clone(), ReadoutSlot::Main)) { + if let Some(readout) = check.main.as_mut() { + apply_freshness(readout, &info, ReadoutSlot::Main, pkg.as_deref()); + } + apply_freshness_timeouts(check, &info); } - if let (Some(readout), Some((info, pkg))) = ( - check.bridge.as_mut(), - by_target.remove(&(check.id.clone(), ReadoutSlot::Bridge)), - ) { - apply_freshness(readout, &info, ReadoutSlot::Bridge, pkg.as_deref()); + if let Some((info, pkg)) = by_target.remove(&(check.id.clone(), ReadoutSlot::Bridge)) { + if let Some(readout) = check.bridge.as_mut() { + apply_freshness(readout, &info, ReadoutSlot::Bridge, pkg.as_deref()); + } + apply_freshness_timeouts(check, &info); } // Mirror the headline readout (bridge if present, else main) into the // flat fields for backward-compatible consumers. @@ -391,6 +494,7 @@ async fn populate_freshness( } } else if let Some((info, _pkg)) = by_target.remove(&(check.id.clone(), ReadoutSlot::Flat)) { + apply_freshness_timeouts(check, &info); check.installed_version = info.installed; check.latest_version = info.latest; let self_updating = is_self_updating(check.install_source.as_ref()); @@ -571,6 +675,11 @@ pub(crate) enum ExecOutcome { /// The shell itself couldn't be spawned (or `wait()` failed). Rare; means /// we have no signal at all from the inner command. Spawn(std::io::Error), + /// The command ran longer than the bounded doctor probe timeout. + Timeout { + command: String, + timeout: std::time::Duration, + }, /// The shell ran, the inner command exited non-zero. Code is `Some(127)` /// when the shell reports "command not found". Exit { @@ -586,16 +695,15 @@ pub(crate) fn execute_command_with_path_prefix( command: &str, path_prefix: &[PathBuf], ) -> ExecOutcome { - let mut cmd = build_shell_command(command, path_prefix); - cmd.stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()); - let child = match cmd.spawn() { - Ok(c) => c, - Err(e) => return ExecOutcome::Spawn(e), - }; - let output = match child.wait_with_output() { - Ok(o) => o, - Err(e) => return ExecOutcome::Spawn(e), + let cmd = build_shell_command(command, path_prefix); + let output = match run_command_with_timeout(cmd, command, DEFAULT_PROBE_TIMEOUT) { + Ok(output) => output, + Err(CommandError::Spawn { source, .. } | CommandError::Wait { source, .. }) => { + return ExecOutcome::Spawn(source) + } + Err(CommandError::Timeout { command, timeout }) => { + return ExecOutcome::Timeout { command, timeout } + } }; if output.status.success() { ExecOutcome::Ok @@ -803,6 +911,7 @@ mod tests { installed: Some("0.1.0".into()), latest: Some("0.2.0".into()), update_available: Some(true), + command_timeouts: Vec::new(), }; apply_freshness( &mut readout, @@ -829,6 +938,7 @@ mod tests { installed: Some("0.1.0".into()), latest: Some("0.2.0".into()), update_available: Some(true), + command_timeouts: Vec::new(), }; apply_freshness(&mut readout, &info, ReadoutSlot::Bridge, Some("amp")); assert_eq!(readout.update_command.as_deref(), Some("brew upgrade amp"),); @@ -848,6 +958,7 @@ mod tests { installed: Some("1.0.0".into()), latest: Some("2.0.0".into()), update_available: Some(true), + command_timeouts: Vec::new(), }; apply_freshness( &mut readout, @@ -875,6 +986,7 @@ mod tests { installed: Some("0.2.0".into()), latest: Some("0.2.0".into()), update_available: Some(false), + command_timeouts: Vec::new(), }; apply_freshness(&mut readout, &info, ReadoutSlot::Main, Some("amp-acp")); assert!(readout.update_command.is_none()); diff --git a/crates/doctor/src/resolve.rs b/crates/doctor/src/resolve.rs index d0e1d368..f162969a 100644 --- a/crates/doctor/src/resolve.rs +++ b/crates/doctor/src/resolve.rs @@ -3,17 +3,29 @@ use std::path::{Path, PathBuf}; use std::process::Command; +use crate::command::{ + format_duration, run_command_with_timeout, CommandError, CommandTimeout, DEFAULT_PROBE_TIMEOUT, +}; + use super::types::{InstallSource, ResolvedBinary}; /// Resolve a binary by trying login shell path lookup, common install paths, /// then npm global install dirs. pub fn resolve_binary(cmd: &str) -> ResolvedBinary { + resolve_binary_with_diagnostics(cmd).0 +} + +pub(crate) fn resolve_binary_with_diagnostics(cmd: &str) -> (ResolvedBinary, Vec) { let mut lines = vec![format!("resolve '{cmd}':")]; + let mut timeouts = Vec::new(); // Strategy 1: Login shell path lookup (primary) lines.push(" strategy 1 — login shell path lookup:".to_string()); for (shell, lookup_cmd) in shell_lookup_commands(cmd) { - match Command::new(shell).args(["-l", "-c", &lookup_cmd]).output() { + let display_command = format!("{shell} -l -c '{lookup_cmd}'"); + let mut command = Command::new(shell); + command.args(["-l", "-c", &lookup_cmd]); + match run_command_with_timeout(command, &display_command, DEFAULT_PROBE_TIMEOUT) { Ok(output) => { let stdout = String::from_utf8_lossy(&output.stdout); if !output.status.success() { @@ -31,12 +43,7 @@ pub fn resolve_binary(cmd: &str) -> ResolvedBinary { " {shell} -l -c '{lookup_cmd}' => {} (resolved)", path.display() )); - let install_source = Some(detect_install_source(path)); - return ResolvedBinary { - path: Some(path.clone()), - search_output: lines.join("\n"), - install_source, - }; + return resolved_binary(path.clone(), &lines, timeouts); } if let Some(path) = candidate_paths.first() { @@ -53,6 +60,17 @@ pub fn resolve_binary(cmd: &str) -> ResolvedBinary { )); } } + Err(CommandError::Timeout { command, timeout }) => { + lines.push(format!( + " {display_command} => timed out after {}", + format_duration(timeout), + )); + timeouts.push(CommandTimeout::new( + format!("{cmd} binary resolution"), + command, + timeout, + )); + } Err(e) => { lines.push(format!(" {shell} -l -c '{lookup_cmd}' => error: {e}")); } @@ -70,12 +88,7 @@ pub fn resolve_binary(cmd: &str) -> ResolvedBinary { let path = PathBuf::from(dir).join(cmd); if is_executable_file(&path) { lines.push(format!(" {} => found (resolved)", path.display())); - let install_source = Some(detect_install_source(&path)); - return ResolvedBinary { - path: Some(path), - search_output: lines.join("\n"), - install_source, - }; + return resolved_binary(path, &lines, timeouts); } lines.push(format!(" {} => not found", path.display())); } @@ -94,12 +107,7 @@ pub fn resolve_binary(cmd: &str) -> ResolvedBinary { let path = dir.join(cmd); if path.exists() { lines.push(format!(" {} => found (resolved)", path.display())); - let install_source = Some(detect_install_source(&path)); - return ResolvedBinary { - path: Some(path), - search_output: lines.join("\n"), - install_source, - }; + return resolved_binary(path, &lines, timeouts); } lines.push(format!(" {} => not found", path.display())); } @@ -111,26 +119,40 @@ pub fn resolve_binary(cmd: &str) -> ResolvedBinary { // but costs one subprocess — only invoked when the static probes above // didn't find the binary). `npm prefix -g` is the version-stable // equivalent of the older `npm bin -g`; the bin dir is `/bin`. - if let Some(npm_bin_dir) = npm_global_bin_dir(&mut lines) { + if let Some(npm_bin_dir) = npm_global_bin_dir(&mut lines, &mut timeouts) { let path = npm_bin_dir.join(cmd); if path.exists() { lines.push(format!(" {} => found (resolved)", path.display())); - let install_source = Some(detect_install_source(&path)); - return ResolvedBinary { - path: Some(path), - search_output: lines.join("\n"), - install_source, - }; + return resolved_binary(path, &lines, timeouts); } lines.push(format!(" {} => not found", path.display())); } lines.push(" not found in any location".to_string()); - ResolvedBinary { - path: None, - search_output: lines.join("\n"), - install_source: None, - } + ( + ResolvedBinary { + path: None, + search_output: lines.join("\n"), + install_source: None, + }, + timeouts, + ) +} + +fn resolved_binary( + path: PathBuf, + lines: &[String], + timeouts: Vec, +) -> (ResolvedBinary, Vec) { + let install_source = Some(detect_install_source(&path)); + ( + ResolvedBinary { + path: Some(path), + search_output: lines.join("\n"), + install_source, + }, + timeouts, + ) } fn shell_lookup_commands(cmd: &str) -> [(&'static str, String); 2] { @@ -216,8 +238,27 @@ fn read_subdirs(parent: &Path) -> Vec { } } -fn npm_global_bin_dir(lines: &mut Vec) -> Option { - let output = Command::new("npm").args(["prefix", "-g"]).output().ok()?; +fn npm_global_bin_dir( + lines: &mut Vec, + timeouts: &mut Vec, +) -> Option { + let mut command = Command::new("npm"); + command.args(["prefix", "-g"]); + let output = match run_command_with_timeout(command, "npm prefix -g", DEFAULT_PROBE_TIMEOUT) { + Ok(output) => output, + Err(CommandError::Timeout { command, timeout }) => { + lines.push(format!( + " npm prefix -g => timed out after {}", + format_duration(timeout), + )); + timeouts.push(CommandTimeout::new("npm global prefix", command, timeout)); + return None; + } + Err(e) => { + lines.push(format!(" npm prefix -g => error: {e}")); + return None; + } + }; if !output.status.success() { lines.push(" npm prefix -g => failed".to_string()); return None; From 2b4d6d01901e7dcf904a7ffbea31a7c6de2d1e89 Mon Sep 17 00:00:00 2001 From: Kalvin Chau Date: Thu, 4 Jun 2026 13:45:26 -0700 Subject: [PATCH 2/4] fix(doctor): bound timeout probe cleanup kill and reap timed-out probes without joining reader threads so escaped descendants cannot block doctor callers. share timeout check construction and aggregate freshness timeout output while keeping public report fields unchanged. --- crates/doctor/src/agents.rs | 38 +++--- crates/doctor/src/checks.rs | 148 +++++----------------- crates/doctor/src/command.rs | 197 ++++++++++++++++++++++------- crates/doctor/src/lib.rs | 146 ++++++++++++++++++--- crates/doctor/src/timeout_check.rs | 97 ++++++++++++++ 5 files changed, 427 insertions(+), 199 deletions(-) create mode 100644 crates/doctor/src/timeout_check.rs diff --git a/crates/doctor/src/agents.rs b/crates/doctor/src/agents.rs index 08420002..cac3adc6 100644 --- a/crates/doctor/src/agents.rs +++ b/crates/doctor/src/agents.rs @@ -7,6 +7,7 @@ use crate::command::{ run_command_with_timeout, CommandError, CommandTimeout, DEFAULT_PROBE_TIMEOUT, }; use crate::resolve::format_command_output; +use crate::timeout_check::{command_timeout_check, TimeoutCheck}; use crate::types::{ AgentVersionInfo, AuthStatus, CheckStatus, DoctorCheck, FixType, InstallSource, ResolvedBinary, }; @@ -367,29 +368,20 @@ pub fn check_single_ai_agent( bridge: None, } } - Err(CommandError::Timeout { command, timeout }) => { - let timeout = CommandTimeout::new(info.label, command, timeout); - DoctorCheck { - id: info.id.to_string(), - label: info.label.to_string(), - status: CheckStatus::Fail, - message: timeout.message(), - fix_url: None, - fix_command: None, - fix_type: None, - path: resolved_path, - bridge_path: None, - raw_output: Some(format!("{header}\n{}\n{search}", timeout.raw_output())), - auth_status: None, - installed_version: None, - latest_version: None, - update_available: None, - install_source: bridge_install_source.clone(), - self_updating: None, - main: version_readout(bridge_install_source.clone()), - bridge: None, - } - } + Err(CommandError::Timeout { command, timeout }) => command_timeout_check( + TimeoutCheck::new( + info.id, + info.label, + CheckStatus::Fail, + &header, + command, + timeout, + ) + .path(resolved_path) + .install_source(bridge_install_source.clone()) + .main(version_readout(bridge_install_source.clone())) + .raw_suffix(Some(&search)), + ), Err(e) => DoctorCheck { id: info.id.to_string(), label: info.label.to_string(), diff --git a/crates/doctor/src/checks.rs b/crates/doctor/src/checks.rs index cafc9cd3..01157450 100644 --- a/crates/doctor/src/checks.rs +++ b/crates/doctor/src/checks.rs @@ -2,68 +2,14 @@ use std::process::Command; -use crate::command::{ - run_command_with_timeout, CommandError, CommandTimeout, DEFAULT_PROBE_TIMEOUT, -}; +use crate::command::{run_command_with_timeout, CommandError, DEFAULT_PROBE_TIMEOUT}; use crate::resolve::format_command_output; -use crate::types::{CheckStatus, DoctorCheck, FixType, InstallSource, ResolvedBinary}; +use crate::timeout_check::{command_timeout_check, TimeoutCheck}; +use crate::types::{CheckStatus, DoctorCheck, FixType, ResolvedBinary}; /// Fix command for enabling copy-on-write git clones. pub(crate) const CLONEFILE_FIX_COMMAND: &str = "git config --global core.clonefile true"; -struct TimeoutCheck<'a> { - id: String, - label: String, - status: CheckStatus, - header: &'a str, - command: String, - timeout: std::time::Duration, - path: Option, - install_source: Option, - raw_suffix: Option<&'a str>, -} - -fn command_timeout_check(input: TimeoutCheck<'_>) -> DoctorCheck { - let TimeoutCheck { - id, - label, - status, - header, - command, - timeout, - path, - install_source, - raw_suffix, - } = input; - let timeout = CommandTimeout::new(label.clone(), command, timeout); - let mut raw = format!("{header}\n{}", timeout.raw_output()); - if let Some(suffix) = raw_suffix { - raw.push('\n'); - raw.push_str(suffix); - } - - DoctorCheck { - id, - label, - status, - message: timeout.message(), - fix_url: None, - fix_command: None, - fix_type: None, - path, - bridge_path: None, - raw_output: Some(raw), - auth_status: None, - installed_version: None, - latest_version: None, - update_available: None, - install_source, - self_updating: None, - main: None, - bridge: None, - } -} - /// Check that `git` is installed and reachable. pub fn check_git(resolved: &ResolvedBinary) -> DoctorCheck { let label = "Git".to_string(); @@ -156,17 +102,12 @@ pub fn check_git(resolved: &ResolvedBinary) -> DoctorCheck { bridge: None, } } - Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { - id, - label, - status: CheckStatus::Fail, - header, - command, - timeout, - path: Some(path_str), - install_source: resolved.install_source.clone(), - raw_suffix: Some(search), - }), + Err(CommandError::Timeout { command, timeout }) => command_timeout_check( + TimeoutCheck::new(id, label, CheckStatus::Fail, header, command, timeout) + .path(Some(path_str)) + .install_source(resolved.install_source.clone()) + .raw_suffix(Some(search)), + ), Err(e) => DoctorCheck { id, label, @@ -283,17 +224,12 @@ pub fn check_gh(resolved: &ResolvedBinary) -> DoctorCheck { bridge: None, } } - Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { - id, - label, - status: CheckStatus::Fail, - header, - command, - timeout, - path: Some(path_str), - install_source: resolved.install_source.clone(), - raw_suffix: Some(search), - }), + Err(CommandError::Timeout { command, timeout }) => command_timeout_check( + TimeoutCheck::new(id, label, CheckStatus::Fail, header, command, timeout) + .path(Some(path_str)) + .install_source(resolved.install_source.clone()) + .raw_suffix(Some(search)), + ), Err(e) => DoctorCheck { id, label, @@ -408,17 +344,9 @@ pub fn check_gh_auth(gh: &ResolvedBinary) -> DoctorCheck { } } } - Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { - id, - label, - status: CheckStatus::Fail, - header, - command, - timeout, - path: None, - install_source: None, - raw_suffix: None, - }), + Err(CommandError::Timeout { command, timeout }) => command_timeout_check( + TimeoutCheck::new(id, label, CheckStatus::Fail, header, command, timeout), + ), Err(e) => DoctorCheck { id, label, @@ -540,20 +468,17 @@ pub fn check_git_lfs(git: &ResolvedBinary, git_lfs: &ResolvedBinary) -> DoctorCh bridge: None, } } - Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { - id, - label, - status: CheckStatus::Warn, - header, - command, - timeout, - path: git_lfs - .path - .as_ref() - .map(|p| p.to_string_lossy().to_string()), - install_source: git_lfs.install_source.clone(), - raw_suffix: Some(search), - }), + Err(CommandError::Timeout { command, timeout }) => command_timeout_check( + TimeoutCheck::new(id, label, CheckStatus::Warn, header, command, timeout) + .path( + git_lfs + .path + .as_ref() + .map(|p| p.to_string_lossy().to_string()), + ) + .install_source(git_lfs.install_source.clone()) + .raw_suffix(Some(search)), + ), Err(e) => DoctorCheck { id, label, @@ -697,17 +622,10 @@ pub fn check_clonefile(git: &ResolvedBinary) -> DoctorCheck { bridge: None, } } - Err(CommandError::Timeout { command, timeout }) => command_timeout_check(TimeoutCheck { - id, - label, - status: CheckStatus::Warn, - header, - command, - timeout, - path: None, - install_source: git.install_source.clone(), - raw_suffix: None, - }), + Err(CommandError::Timeout { command, timeout }) => command_timeout_check( + TimeoutCheck::new(id, label, CheckStatus::Warn, header, command, timeout) + .install_source(git.install_source.clone()), + ), Err(e) => DoctorCheck { id, label, diff --git a/crates/doctor/src/command.rs b/crates/doctor/src/command.rs index 07456521..d9adc7e1 100644 --- a/crates/doctor/src/command.rs +++ b/crates/doctor/src/command.rs @@ -2,11 +2,15 @@ //! //! The doctor crate runs many small local probes. None of them should be able //! to pin a blocking worker forever, so probe call sites route through this -//! module instead of `Command::output()`. +//! module instead of `Command::output()`. On timeout or wait failure, the +//! runner kills and reaps the direct child; on Unix it first targets the +//! child's process group. Descendants that deliberately detach from that group +//! can survive best-effort cleanup and keep pipe reader threads alive until +//! their inherited stdout/stderr handles close, but the caller still returns. use std::fmt; use std::io::Read; -use std::process::{Command, Output, Stdio}; +use std::process::{Child, Command, Output, Stdio}; use std::thread::{self, JoinHandle}; use std::time::Duration; @@ -139,28 +143,14 @@ pub(crate) fn run_command_with_timeout( let status = match child.wait_timeout(timeout) { Ok(Some(status)) => status, Ok(None) => { - kill_child_process_group(&mut child); - let _ = child.wait(); - if let Some(handle) = stdout_thread { - let _ = join_reader(handle); - } - if let Some(handle) = stderr_thread { - let _ = join_reader(handle); - } + clean_up_after_incomplete_wait(&mut child); return Err(CommandError::Timeout { command: display_command, timeout, }); } Err(source) => { - kill_child_process_group(&mut child); - let _ = child.wait(); - if let Some(handle) = stdout_thread { - let _ = join_reader(handle); - } - if let Some(handle) = stderr_thread { - let _ = join_reader(handle); - } + clean_up_after_incomplete_wait(&mut child); return Err(CommandError::Wait { command: display_command, source, @@ -182,22 +172,79 @@ fn join_reader(handle: JoinHandle>) -> Vec { handle.join().unwrap_or_default() } -fn kill_child_process_group(child: &mut std::process::Child) { - #[cfg(unix)] - if let Ok(pid) = i32::try_from(child.id()) { - let _ = nix::sys::signal::kill( - nix::unistd::Pid::from_raw(-pid), - nix::sys::signal::Signal::SIGKILL, - ); +fn clean_up_after_incomplete_wait(child: &mut Child) { + kill_child_process_group_or_child(child); + let _ = child.wait(); +} + +fn kill_child_process_group_or_child(child: &mut Child) { + if kill_child_process_group(child) { + return; } let _ = child.kill(); } +#[cfg(unix)] +fn kill_child_process_group(child: &Child) -> bool { + let Ok(pid) = i32::try_from(child.id()) else { + return false; + }; + + nix::sys::signal::kill( + nix::unistd::Pid::from_raw(-pid), + nix::sys::signal::Signal::SIGKILL, + ) + .is_ok() +} + +#[cfg(not(unix))] +fn kill_child_process_group(_child: &Child) -> bool { + false +} + +#[cfg(test)] +fn process_exists(pid: i32) -> bool { + #[cfg(unix)] + { + nix::sys::signal::kill(nix::unistd::Pid::from_raw(pid), None).is_ok() + } + + #[cfg(not(unix))] + { + let _ = pid; + false + } +} + #[cfg(test)] mod tests { use super::*; + #[cfg(unix)] + fn read_pid_file_with_retries(path: &std::path::Path) -> i32 { + for _ in 0..20 { + if let Ok(raw) = std::fs::read_to_string(path) { + if let Ok(pid) = raw.trim().parse::() { + return pid; + } + } + std::thread::sleep(Duration::from_millis(50)); + } + panic!("pid file was not written: {}", path.display()); + } + + #[cfg(unix)] + fn wait_until_process_exits(pid: i32) -> bool { + (0..20).any(|_| { + let gone = !process_exists(pid); + if !gone { + std::thread::sleep(Duration::from_millis(50)); + } + gone + }) + } + #[test] fn command_runner_captures_success_output() { let mut command = Command::new("sh"); @@ -216,24 +263,34 @@ mod tests { let mut command = Command::new("sh"); command.arg("-c").arg("sleep 5"); - let err = run_command_with_timeout(command, "sh -c sleep", Duration::from_millis(100)) + let err = run_command_with_timeout(command, "sh -c sleep", Duration::from_millis(250)) .unwrap_err(); match err { CommandError::Timeout { command, timeout } => { assert_eq!(command, "sh -c sleep"); - assert_eq!(timeout, Duration::from_millis(100)); + assert_eq!(timeout, Duration::from_millis(250)); } other => panic!("expected timeout, got {other:?}"), } } + #[test] + fn command_runner_captures_large_output() { + let mut command = Command::new("sh"); + command.arg("-c").arg("yes | head -c 200000"); + + let output = + run_command_with_timeout(command, "sh -c large output", Duration::from_secs(2)) + .unwrap(); + + assert!(output.status.success()); + assert_eq!(output.stdout.len(), 200_000); + } + #[cfg(unix)] #[test] fn command_runner_kills_process_group_descendant() { - use nix::sys::signal; - use nix::unistd::Pid; - let dir = std::env::temp_dir().join(format!( "doctor-command-timeout-{}-{}", std::process::id(), @@ -243,8 +300,13 @@ mod tests { .as_nanos(), )); std::fs::create_dir_all(&dir).unwrap(); + let shell_pid_file = dir.join("shell.pid"); let pid_file = dir.join("child.pid"); - let script = format!("sleep 30 & echo $! > {}; wait", pid_file.display()); + let script = format!( + "echo $$ > {}; sleep 30 & echo $! > {}; wait", + shell_pid_file.display(), + pid_file.display() + ); let mut command = Command::new("sh"); command.arg("-c").arg(script); @@ -252,21 +314,70 @@ mod tests { .unwrap_err(); assert!(matches!(err, CommandError::Timeout { .. })); - let pid = std::fs::read_to_string(&pid_file) - .expect("descendant pid should be written before timeout") - .trim() - .parse::() - .unwrap(); + let shell_pid = read_pid_file_with_retries(&shell_pid_file); + let pid = read_pid_file_with_retries(&pid_file); + let shell_exited = wait_until_process_exits(shell_pid); + let exited = wait_until_process_exits(pid); - let exited = (0..20).any(|_| { - let gone = signal::kill(Pid::from_raw(pid), None).is_err(); - if !gone { - std::thread::sleep(Duration::from_millis(50)); + let _ = std::fs::remove_dir_all(&dir); + assert!( + shell_exited, + "direct shell process {shell_pid} survived timeout cleanup" + ); + assert!(exited, "descendant process {pid} survived timeout cleanup"); + } + + #[cfg(unix)] + #[test] + fn command_runner_returns_when_escaped_descendant_keeps_pipes_open() { + use nix::sys::signal; + use nix::unistd::Pid; + + let perl = match std::process::Command::new("sh") + .arg("-c") + .arg("command -v perl") + .output() + { + Ok(output) if output.status.success() => { + String::from_utf8_lossy(&output.stdout).trim().to_string() } - gone - }); + _ => return, + }; + let dir = std::env::temp_dir().join(format!( + "doctor-command-escaped-timeout-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(), + )); + std::fs::create_dir_all(&dir).unwrap(); + let pid_file = dir.join("escaped.pid"); + let script = format!( + "\"{perl}\" -MPOSIX=setsid -e 'setsid(); open(my $fh, q(>), $ARGV[0]) or die $!; print $fh qq($$\\n); close($fh); sleep 5' {} & wait", + pid_file.display() + ); + + let mut command = Command::new("sh"); + command.arg("-c").arg(script); + let started = std::time::Instant::now(); + let err = run_command_with_timeout( + command, + "sh -c escaped descendant", + Duration::from_millis(250), + ) + .unwrap_err(); + let elapsed = started.elapsed(); + + let pid = read_pid_file_with_retries(&pid_file); + let _ = signal::kill(Pid::from_raw(pid), signal::Signal::SIGKILL); let _ = std::fs::remove_dir_all(&dir); - assert!(exited, "descendant process {pid} survived timeout cleanup"); + + assert!(matches!(err, CommandError::Timeout { .. })); + assert!( + elapsed < Duration::from_secs(2), + "timeout path waited {elapsed:?} for escaped descendant pipe EOF" + ); } } diff --git a/crates/doctor/src/lib.rs b/crates/doctor/src/lib.rs index ad814c85..8cbfd753 100644 --- a/crates/doctor/src/lib.rs +++ b/crates/doctor/src/lib.rs @@ -10,11 +10,12 @@ mod command; pub(crate) mod freshness; pub(crate) mod package_ids; pub mod resolve; +mod timeout_check; pub mod types; pub use types::{AgentVersionInfo, CheckStatus, DoctorCheck, DoctorReport, FixType}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -212,21 +213,73 @@ async fn collect_base_report(npm_registry: Option<&str>) -> DoctorReport { } fn timeout_diagnostic_checks(timeouts: Vec) -> Vec { - let mut seen = std::collections::HashSet::new(); + let mut seen_timeouts = HashSet::new(); + let mut used_ids = HashSet::new(); let mut checks = Vec::new(); for timeout in timeouts { - if !seen.insert((timeout.label.clone(), timeout.command.clone())) { + let fingerprint = ( + timeout.label.clone(), + timeout.command.clone(), + timeout.timeout, + ); + if !seen_timeouts.insert(fingerprint) { continue; } - checks.push(timeout_diagnostic_check(timeout)); + let id = unique_timeout_diagnostic_id(&timeout, &mut used_ids); + checks.push(timeout_diagnostic_check(timeout, id)); } checks } -fn timeout_diagnostic_check(timeout: CommandTimeout) -> DoctorCheck { +fn unique_timeout_diagnostic_id( + timeout: &CommandTimeout, + used_ids: &mut HashSet, +) -> String { let slug_source = format!("{} {}", timeout.label, timeout.command); + let base = format!("subprocess-timeout-{}", slug(&slug_source)); + if used_ids.insert(base.clone()) { + return base; + } + + let suffixed = format!("{base}-{}", stable_timeout_suffix(timeout)); + if used_ids.insert(suffixed.clone()) { + return suffixed; + } + + let mut index = 2; + loop { + let candidate = format!("{suffixed}-{index}"); + if used_ids.insert(candidate.clone()) { + return candidate; + } + index += 1; + } +} + +fn stable_timeout_suffix(timeout: &CommandTimeout) -> String { + const FNV_OFFSET: u64 = 0xcbf29ce484222325; + const FNV_PRIME: u64 = 0x00000100000001b3; + + fn update(mut hash: u64, bytes: &[u8]) -> u64 { + for byte in bytes { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(FNV_PRIME); + } + hash + } + + let mut hash = FNV_OFFSET; + hash = update(hash, timeout.label.as_bytes()); + hash = update(hash, &[0]); + hash = update(hash, timeout.command.as_bytes()); + hash = update(hash, &[0]); + hash = update(hash, timeout.timeout.as_nanos().to_string().as_bytes()); + format!("{hash:016x}") +} + +fn timeout_diagnostic_check(timeout: CommandTimeout, id: String) -> DoctorCheck { DoctorCheck { - id: format!("subprocess-timeout-{}", slug(&slug_source)), + id, label: timeout.label.clone(), status: CheckStatus::Warn, message: timeout.message(), @@ -330,8 +383,8 @@ fn apply_freshness( } } -fn apply_freshness_timeouts(check: &mut DoctorCheck, info: &freshness::VersionInfo) { - if info.command_timeouts.is_empty() { +fn apply_freshness_timeouts(check: &mut DoctorCheck, timeouts: &[CommandTimeout]) { + if timeouts.is_empty() { return; } @@ -339,8 +392,8 @@ fn apply_freshness_timeouts(check: &mut DoctorCheck, info: &freshness::VersionIn check.status = CheckStatus::Warn; } - let first = &info.command_timeouts[0]; - let summary = if info.command_timeouts.len() == 1 { + let first = &timeouts[0]; + let summary = if timeouts.len() == 1 { format!( "freshness timed out after {} running {}", format_duration(first.timeout), @@ -349,7 +402,7 @@ fn apply_freshness_timeouts(check: &mut DoctorCheck, info: &freshness::VersionIn } else { format!( "{} freshness probes timed out, first after {} running {}", - info.command_timeouts.len(), + timeouts.len(), format_duration(first.timeout), first.command ) @@ -361,7 +414,7 @@ fn apply_freshness_timeouts(check: &mut DoctorCheck, info: &freshness::VersionIn raw.push('\n'); } raw.push_str("# Freshness subprocess timeouts:"); - for timeout in &info.command_timeouts { + for timeout in timeouts { raw.push('\n'); raw.push_str(&timeout.raw_output()); } @@ -464,18 +517,20 @@ async fn populate_freshness( for check in &mut report.checks { let is_agent = check.main.is_some() || check.bridge.is_some(); if is_agent { + let mut freshness_timeouts = Vec::new(); if let Some((info, pkg)) = by_target.remove(&(check.id.clone(), ReadoutSlot::Main)) { if let Some(readout) = check.main.as_mut() { apply_freshness(readout, &info, ReadoutSlot::Main, pkg.as_deref()); } - apply_freshness_timeouts(check, &info); + freshness_timeouts.extend(info.command_timeouts); } if let Some((info, pkg)) = by_target.remove(&(check.id.clone(), ReadoutSlot::Bridge)) { if let Some(readout) = check.bridge.as_mut() { apply_freshness(readout, &info, ReadoutSlot::Bridge, pkg.as_deref()); } - apply_freshness_timeouts(check, &info); + freshness_timeouts.extend(info.command_timeouts); } + apply_freshness_timeouts(check, &freshness_timeouts); // Mirror the headline readout (bridge if present, else main) into the // flat fields for backward-compatible consumers. let headline = check.bridge.as_ref().or(check.main.as_ref()).map(|r| { @@ -494,7 +549,7 @@ async fn populate_freshness( } } else if let Some((info, _pkg)) = by_target.remove(&(check.id.clone(), ReadoutSlot::Flat)) { - apply_freshness_timeouts(check, &info); + apply_freshness_timeouts(check, &info.command_timeouts); check.installed_version = info.installed; check.latest_version = info.latest; let self_updating = is_self_updating(check.install_source.as_ref()); @@ -601,6 +656,9 @@ where // writes into `raw_output`. on_line(&format!("$ {command}")); + // Fixes are intentionally not routed through the bounded probe runner: + // these are user-triggered install/auth/update actions and can reasonably + // be interactive or long-running. run_command_streaming(command, on_line).await } @@ -716,9 +774,11 @@ pub(crate) fn execute_command_with_path_prefix( } /// Spawn `command` through a login shell, stream stdout/stderr lines to -/// `on_line`, and return based on the process exit status. Stderr lines are -/// also accumulated so a non-zero exit can surface a useful error message -/// (matching the non-streaming behavior of the previous `execute_command`). +/// `on_line`, and return based on the process exit status. This path is +/// deliberately unbounded: fix commands are user-triggered install/auth/update +/// actions and may prompt or run package managers. Stderr lines are also +/// accumulated so a non-zero exit can surface a useful error message (matching +/// the non-streaming behavior of the previous `execute_command`). fn run_command_streaming_blocking(command: &str, mut on_line: F) -> Result<(), String> where F: FnMut(&str), @@ -794,6 +854,56 @@ mod tests { use super::*; use std::sync::{Arc, Mutex}; + use std::time::Duration; + + #[test] + fn timeout_diagnostic_ids_are_collision_safe() { + let timeout = Duration::from_secs(10); + let checks = timeout_diagnostic_checks(vec![ + CommandTimeout::new("A B", "c", timeout), + CommandTimeout::new("A", "B C", timeout), + CommandTimeout::new("A B", "c", timeout), + ]); + + assert_eq!(checks.len(), 2, "exact duplicate timeout should be deduped"); + assert_eq!(checks[0].id, "subprocess-timeout-a-b-c"); + assert_ne!(checks[0].id, checks[1].id); + assert!( + checks[1].id.starts_with("subprocess-timeout-a-b-c-"), + "slug collision should get deterministic suffix: {:?}", + checks.iter().map(|c| &c.id).collect::>(), + ); + } + + #[test] + fn apply_freshness_timeouts_appends_one_combined_block() { + let mut check = empty_check("ai-agent-test", "Test Agent"); + check.status = CheckStatus::Pass; + check.message = "Installed".to_string(); + check.raw_output = Some("base raw".to_string()); + + let timeouts = vec![ + CommandTimeout::new( + "installed version", + "agent --version", + Duration::from_secs(15), + ), + CommandTimeout::new( + "npm latest version", + "npm view agent-acp version", + Duration::from_secs(15), + ), + ]; + + apply_freshness_timeouts(&mut check, &timeouts); + + assert_eq!(check.status, CheckStatus::Warn); + assert!(check.message.contains("2 freshness probes timed out")); + let raw = check.raw_output.unwrap(); + assert_eq!(raw.matches("# Freshness subprocess timeouts:").count(), 1); + assert!(raw.contains("$ agent --version")); + assert!(raw.contains("$ npm view agent-acp version")); + } /// Streaming helper must invoke `on_line` for each output line of a /// successful command. Lines from `.zshrc` etc. may also appear; we only diff --git a/crates/doctor/src/timeout_check.rs b/crates/doctor/src/timeout_check.rs new file mode 100644 index 00000000..e4bfa607 --- /dev/null +++ b/crates/doctor/src/timeout_check.rs @@ -0,0 +1,97 @@ +use std::time::Duration; + +use crate::command::CommandTimeout; +use crate::types::{AgentVersionInfo, AuthStatus, CheckStatus, DoctorCheck, InstallSource}; + +pub(crate) struct TimeoutCheck<'a> { + id: String, + label: String, + status: CheckStatus, + header: &'a str, + command: String, + timeout: Duration, + path: Option, + bridge_path: Option, + install_source: Option, + auth_status: Option, + main: Option, + bridge: Option, + raw_suffix: Option<&'a str>, +} + +impl<'a> TimeoutCheck<'a> { + pub(crate) fn new( + id: impl Into, + label: impl Into, + status: CheckStatus, + header: &'a str, + command: impl Into, + timeout: Duration, + ) -> Self { + Self { + id: id.into(), + label: label.into(), + status, + header, + command: command.into(), + timeout, + path: None, + bridge_path: None, + install_source: None, + auth_status: None, + main: None, + bridge: None, + raw_suffix: None, + } + } + + pub(crate) fn path(mut self, path: Option) -> Self { + self.path = path; + self + } + + pub(crate) fn install_source(mut self, install_source: Option) -> Self { + self.install_source = install_source; + self + } + + pub(crate) fn main(mut self, main: Option) -> Self { + self.main = main; + self + } + + pub(crate) fn raw_suffix(mut self, raw_suffix: Option<&'a str>) -> Self { + self.raw_suffix = raw_suffix; + self + } +} + +pub(crate) fn command_timeout_check(input: TimeoutCheck<'_>) -> DoctorCheck { + let timeout = CommandTimeout::new(input.label.clone(), input.command, input.timeout); + let mut raw = format!("{}\n{}", input.header, timeout.raw_output()); + if let Some(suffix) = input.raw_suffix { + raw.push('\n'); + raw.push_str(suffix); + } + + DoctorCheck { + id: input.id, + label: input.label, + status: input.status, + message: timeout.message(), + fix_url: None, + fix_command: None, + fix_type: None, + path: input.path, + bridge_path: input.bridge_path, + raw_output: Some(raw), + auth_status: input.auth_status, + installed_version: None, + latest_version: None, + update_available: None, + install_source: input.install_source, + self_updating: None, + main: input.main, + bridge: input.bridge, + } +} From f92fb6c4aaf56021902ed030f0ca4014cdeed426 Mon Sep 17 00:00:00 2001 From: Kalvin Chau Date: Thu, 4 Jun 2026 13:47:52 -0700 Subject: [PATCH 3/4] test(doctor): trim timeout runner tests remove duplicate descendant cleanup coverage and keep the timeout runner tests focused on caller bounds, large output capture, and basic timeout behavior. --- crates/doctor/src/command.rs | 64 ------------------------------------ 1 file changed, 64 deletions(-) diff --git a/crates/doctor/src/command.rs b/crates/doctor/src/command.rs index d9adc7e1..bc3a3a7e 100644 --- a/crates/doctor/src/command.rs +++ b/crates/doctor/src/command.rs @@ -203,20 +203,6 @@ fn kill_child_process_group(_child: &Child) -> bool { false } -#[cfg(test)] -fn process_exists(pid: i32) -> bool { - #[cfg(unix)] - { - nix::sys::signal::kill(nix::unistd::Pid::from_raw(pid), None).is_ok() - } - - #[cfg(not(unix))] - { - let _ = pid; - false - } -} - #[cfg(test)] mod tests { use super::*; @@ -234,17 +220,6 @@ mod tests { panic!("pid file was not written: {}", path.display()); } - #[cfg(unix)] - fn wait_until_process_exits(pid: i32) -> bool { - (0..20).any(|_| { - let gone = !process_exists(pid); - if !gone { - std::thread::sleep(Duration::from_millis(50)); - } - gone - }) - } - #[test] fn command_runner_captures_success_output() { let mut command = Command::new("sh"); @@ -288,45 +263,6 @@ mod tests { assert_eq!(output.stdout.len(), 200_000); } - #[cfg(unix)] - #[test] - fn command_runner_kills_process_group_descendant() { - let dir = std::env::temp_dir().join(format!( - "doctor-command-timeout-{}-{}", - std::process::id(), - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos(), - )); - std::fs::create_dir_all(&dir).unwrap(); - let shell_pid_file = dir.join("shell.pid"); - let pid_file = dir.join("child.pid"); - let script = format!( - "echo $$ > {}; sleep 30 & echo $! > {}; wait", - shell_pid_file.display(), - pid_file.display() - ); - - let mut command = Command::new("sh"); - command.arg("-c").arg(script); - let err = run_command_with_timeout(command, "sh -c descendant", Duration::from_millis(250)) - .unwrap_err(); - assert!(matches!(err, CommandError::Timeout { .. })); - - let shell_pid = read_pid_file_with_retries(&shell_pid_file); - let pid = read_pid_file_with_retries(&pid_file); - let shell_exited = wait_until_process_exits(shell_pid); - let exited = wait_until_process_exits(pid); - - let _ = std::fs::remove_dir_all(&dir); - assert!( - shell_exited, - "direct shell process {shell_pid} survived timeout cleanup" - ); - assert!(exited, "descendant process {pid} survived timeout cleanup"); - } - #[cfg(unix)] #[test] fn command_runner_returns_when_escaped_descendant_keeps_pipes_open() { From 35494ab6ef52e97a6c4199c57655c8e6d0e8ef27 Mon Sep 17 00:00:00 2001 From: Kalvin Chau Date: Thu, 4 Jun 2026 13:55:30 -0700 Subject: [PATCH 4/4] test(doctor): reduce timeout test coverage collapse redundant runner assertions into the remaining regression tests and shorten timeout diagnostic setup while preserving deadlock and aggregation coverage. --- crates/doctor/src/command.rs | 110 +++++++---------------------------- crates/doctor/src/lib.rs | 37 +++++------- 2 files changed, 38 insertions(+), 109 deletions(-) diff --git a/crates/doctor/src/command.rs b/crates/doctor/src/command.rs index bc3a3a7e..e7a9e5fe 100644 --- a/crates/doctor/src/command.rs +++ b/crates/doctor/src/command.rs @@ -207,53 +207,12 @@ fn kill_child_process_group(_child: &Child) -> bool { mod tests { use super::*; - #[cfg(unix)] - fn read_pid_file_with_retries(path: &std::path::Path) -> i32 { - for _ in 0..20 { - if let Ok(raw) = std::fs::read_to_string(path) { - if let Ok(pid) = raw.trim().parse::() { - return pid; - } - } - std::thread::sleep(Duration::from_millis(50)); - } - panic!("pid file was not written: {}", path.display()); - } - - #[test] - fn command_runner_captures_success_output() { - let mut command = Command::new("sh"); - command.arg("-c").arg("printf stdout; printf stderr >&2"); - - let output = - run_command_with_timeout(command, "sh -c output", Duration::from_secs(2)).unwrap(); - - assert!(output.status.success()); - assert_eq!(String::from_utf8_lossy(&output.stdout), "stdout"); - assert_eq!(String::from_utf8_lossy(&output.stderr), "stderr"); - } - - #[test] - fn command_runner_times_out() { - let mut command = Command::new("sh"); - command.arg("-c").arg("sleep 5"); - - let err = run_command_with_timeout(command, "sh -c sleep", Duration::from_millis(250)) - .unwrap_err(); - - match err { - CommandError::Timeout { command, timeout } => { - assert_eq!(command, "sh -c sleep"); - assert_eq!(timeout, Duration::from_millis(250)); - } - other => panic!("expected timeout, got {other:?}"), - } - } - #[test] fn command_runner_captures_large_output() { let mut command = Command::new("sh"); - command.arg("-c").arg("yes | head -c 200000"); + command + .arg("-c") + .arg("yes | head -c 200000; printf stderr >&2"); let output = run_command_with_timeout(command, "sh -c large output", Duration::from_secs(2)) @@ -261,59 +220,34 @@ mod tests { assert!(output.status.success()); assert_eq!(output.stdout.len(), 200_000); + assert_eq!(String::from_utf8_lossy(&output.stderr), "stderr"); } #[cfg(unix)] #[test] fn command_runner_returns_when_escaped_descendant_keeps_pipes_open() { - use nix::sys::signal; - use nix::unistd::Pid; - - let perl = match std::process::Command::new("sh") - .arg("-c") - .arg("command -v perl") - .output() - { - Ok(output) if output.status.success() => { - String::from_utf8_lossy(&output.stdout).trim().to_string() - } - _ => return, - }; - - let dir = std::env::temp_dir().join(format!( - "doctor-command-escaped-timeout-{}-{}", - std::process::id(), - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos(), - )); - std::fs::create_dir_all(&dir).unwrap(); - let pid_file = dir.join("escaped.pid"); - let script = format!( - "\"{perl}\" -MPOSIX=setsid -e 'setsid(); open(my $fh, q(>), $ARGV[0]) or die $!; print $fh qq($$\\n); close($fh); sleep 5' {} & wait", - pid_file.display() - ); - let mut command = Command::new("sh"); - command.arg("-c").arg(script); + command + .arg("-c") + .arg("perl -MPOSIX=setsid -e 'setsid(); sleep 2' & wait"); + let timeout = Duration::from_millis(250); let started = std::time::Instant::now(); - let err = run_command_with_timeout( - command, - "sh -c escaped descendant", - Duration::from_millis(250), - ) - .unwrap_err(); - let elapsed = started.elapsed(); + let err = + run_command_with_timeout(command, "sh -c escaped descendant", timeout).unwrap_err(); - let pid = read_pid_file_with_retries(&pid_file); - let _ = signal::kill(Pid::from_raw(pid), signal::Signal::SIGKILL); - let _ = std::fs::remove_dir_all(&dir); - - assert!(matches!(err, CommandError::Timeout { .. })); + match err { + CommandError::Timeout { + command, + timeout: actual, + } => { + assert_eq!(command, "sh -c escaped descendant"); + assert_eq!(actual, timeout); + } + other => panic!("expected timeout, got {other:?}"), + } assert!( - elapsed < Duration::from_secs(2), - "timeout path waited {elapsed:?} for escaped descendant pipe EOF" + started.elapsed() < Duration::from_secs(1), + "timeout path waited for escaped descendant pipe EOF" ); } } diff --git a/crates/doctor/src/lib.rs b/crates/doctor/src/lib.rs index 8cbfd753..917495c6 100644 --- a/crates/doctor/src/lib.rs +++ b/crates/doctor/src/lib.rs @@ -856,18 +856,20 @@ mod tests { use std::sync::{Arc, Mutex}; use std::time::Duration; + fn timeout(label: &str, command: &str) -> CommandTimeout { + CommandTimeout::new(label, command, Duration::from_secs(15)) + } + #[test] fn timeout_diagnostic_ids_are_collision_safe() { - let timeout = Duration::from_secs(10); let checks = timeout_diagnostic_checks(vec![ - CommandTimeout::new("A B", "c", timeout), - CommandTimeout::new("A", "B C", timeout), - CommandTimeout::new("A B", "c", timeout), + timeout("A B", "c"), + timeout("A", "B C"), + timeout("A B", "c"), ]); assert_eq!(checks.len(), 2, "exact duplicate timeout should be deduped"); assert_eq!(checks[0].id, "subprocess-timeout-a-b-c"); - assert_ne!(checks[0].id, checks[1].id); assert!( checks[1].id.starts_with("subprocess-timeout-a-b-c-"), "slug collision should get deterministic suffix: {:?}", @@ -877,22 +879,15 @@ mod tests { #[test] fn apply_freshness_timeouts_appends_one_combined_block() { - let mut check = empty_check("ai-agent-test", "Test Agent"); - check.status = CheckStatus::Pass; - check.message = "Installed".to_string(); - check.raw_output = Some("base raw".to_string()); - - let timeouts = vec![ - CommandTimeout::new( - "installed version", - "agent --version", - Duration::from_secs(15), - ), - CommandTimeout::new( - "npm latest version", - "npm view agent-acp version", - Duration::from_secs(15), - ), + let mut check = DoctorCheck { + status: CheckStatus::Pass, + message: "Installed".into(), + raw_output: Some("base raw".into()), + ..empty_check("ai-agent-test", "Test Agent") + }; + let timeouts = [ + timeout("installed version", "agent --version"), + timeout("npm latest version", "npm view agent-acp version"), ]; apply_freshness_timeouts(&mut check, &timeouts);