Skip to content

Commit e8e7983

Browse files
committed
Enhance libvirt ssh timeouts and feedback
Reduce default SSH timeout from 30s to 5s for faster failure detection. When SSH is not ready, retry up to 2 times while scraping the VM console via to show boot progress. This provides users with feedback during the boot process instead of silent waiting. Changes: - Reduce default SSH connection timeout from 30s to 5s - Implement retry logic with console scraping between attempts - Show systemd boot messages, service starts, and SSH readiness - Properly clean up virsh console child processes to avoid zombies - Thread-based console reading to prevent indefinite blocking Addresses the idea to speed up timeouts and ensure feedback by scraping hvc0 console output when vsock is unavailable. Assisted-by: Claude Code (Sonnet 4.5) Signed-off-by: gursewak1997 <gursmangat@gmail.com>
1 parent 8f8d301 commit e8e7983

File tree

2 files changed

+178
-65
lines changed

2 files changed

+178
-65
lines changed

crates/kit/src/libvirt/ssh.rs

Lines changed: 176 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ use color_eyre::{
1111
Result,
1212
};
1313
use std::fs::Permissions;
14-
use std::io::Write;
14+
use std::io::{BufRead, BufReader, Write};
1515
use std::os::unix::fs::PermissionsExt as _;
1616
use std::os::unix::process::CommandExt;
17-
use std::process::Command;
17+
use std::process::{Command, Stdio};
18+
use std::time::{Duration, Instant};
1819
use tempfile;
1920
use tracing::debug;
2021

@@ -36,7 +37,7 @@ pub struct LibvirtSshOpts {
3637
pub strict_host_keys: bool,
3738

3839
/// SSH connection timeout in seconds
39-
#[clap(long, default_value = "30")]
40+
#[clap(long, default_value = "5")]
4041
pub timeout: u32,
4142

4243
/// SSH log level
@@ -236,8 +237,129 @@ impl LibvirtSshOpts {
236237
Ok(temp_key)
237238
}
238239

239-
/// Execute SSH connection to domain
240-
fn connect_ssh(&self, ssh_config: &DomainSshConfig) -> Result<()> {
240+
/// Build SSH command with configured options
241+
fn build_ssh_command(
242+
&self,
243+
ssh_config: &DomainSshConfig,
244+
temp_key: &tempfile::NamedTempFile,
245+
parsed_extra_options: Vec<(String, String)>,
246+
) -> Command {
247+
let mut ssh_cmd = Command::new("ssh");
248+
ssh_cmd
249+
.arg("-i")
250+
.arg(temp_key.path())
251+
.arg("-p")
252+
.arg(ssh_config.ssh_port.to_string());
253+
254+
let common_opts = crate::ssh::CommonSshOptions {
255+
strict_host_keys: self.strict_host_keys,
256+
connect_timeout: self.timeout,
257+
server_alive_interval: 60,
258+
log_level: self.log_level.clone(),
259+
extra_options: parsed_extra_options,
260+
};
261+
common_opts.apply_to_command(&mut ssh_cmd);
262+
ssh_cmd.arg(format!("{}@127.0.0.1", self.user));
263+
264+
ssh_cmd
265+
}
266+
267+
/// Show recent console output from domain
268+
fn show_console_feedback(&self, global_opts: &crate::libvirt::LibvirtOptions) -> Result<()> {
269+
debug!("Fetching console output for feedback");
270+
271+
let mut cmd = global_opts.virsh_command();
272+
cmd.args(&["console", "--force", &self.domain_name]);
273+
274+
let mut child = cmd.stdout(Stdio::piped()).stderr(Stdio::null()).spawn()?;
275+
276+
let mut lines_shown = 0;
277+
const MAX_LINES: usize = 5;
278+
279+
if let Some(stdout) = child.stdout.take() {
280+
// Spawn a thread to read console output
281+
let suppress_output = self.suppress_output;
282+
let handle = std::thread::spawn(move || {
283+
let reader = BufReader::new(stdout);
284+
let mut lines = Vec::new();
285+
286+
for line in reader.lines() {
287+
if let Ok(line) = line {
288+
// Only collect interesting lines
289+
if line.contains("Reached target")
290+
|| line.contains("Started")
291+
|| line.contains("ssh")
292+
|| line.contains("sshd")
293+
|| line.contains("login:")
294+
{
295+
lines.push(line);
296+
if lines.len() >= MAX_LINES {
297+
break;
298+
}
299+
}
300+
}
301+
}
302+
(lines, suppress_output)
303+
});
304+
305+
// Give the thread a moment to read available output
306+
std::thread::sleep(Duration::from_millis(500));
307+
308+
// Kill the virsh console process to close the pipe and allow thread to exit
309+
if let Err(e) = child.kill() {
310+
debug!("Failed to kill virsh console: {}", e);
311+
}
312+
313+
// Now join the thread - it should exit quickly since the pipe is closed
314+
match handle.join() {
315+
Ok((lines, suppress)) => {
316+
if !lines.is_empty() {
317+
for line in lines {
318+
if !suppress {
319+
eprintln!(" Console: {}", line.trim());
320+
}
321+
lines_shown += 1;
322+
}
323+
} else if !suppress {
324+
eprintln!(" Console: (no recent output)");
325+
}
326+
}
327+
Err(_) => {
328+
debug!("Console reader thread panicked");
329+
}
330+
}
331+
} else {
332+
// No stdout, just kill the process
333+
if let Err(e) = child.kill() {
334+
debug!("Failed to kill virsh console: {}", e);
335+
}
336+
}
337+
338+
// Wait for child to terminate to avoid zombie processes
339+
let wait_start = Instant::now();
340+
while wait_start.elapsed() < Duration::from_millis(500) {
341+
match child.try_wait() {
342+
Ok(Some(_)) => break,
343+
Ok(None) => std::thread::sleep(Duration::from_millis(50)),
344+
Err(e) => {
345+
debug!("Error waiting for virsh console: {}", e);
346+
break;
347+
}
348+
}
349+
}
350+
// Final wait to reap zombie
351+
let _ = child.wait();
352+
353+
debug!("Showed {} console lines", lines_shown);
354+
Ok(())
355+
}
356+
357+
/// Execute SSH connection to domain with retries and feedback
358+
fn connect_ssh(
359+
&self,
360+
global_opts: &crate::libvirt::LibvirtOptions,
361+
ssh_config: &DomainSshConfig,
362+
) -> Result<()> {
241363
debug!(
242364
"Connecting to domain '{}' via SSH on port {} (user: {})",
243365
self.domain_name, ssh_config.ssh_port, self.user
@@ -250,17 +372,7 @@ impl LibvirtSshOpts {
250372
// Create temporary SSH key file
251373
let temp_key = self.create_temp_ssh_key(ssh_config)?;
252374

253-
// Build SSH command
254-
let mut ssh_cmd = Command::new("ssh");
255-
256-
// Add SSH key and port
257-
ssh_cmd
258-
.arg("-i")
259-
.arg(temp_key.path())
260-
.arg("-p")
261-
.arg(ssh_config.ssh_port.to_string());
262-
263-
// Parse extra options from key=value format
375+
// Parse extra options
264376
let mut parsed_extra_options = Vec::new();
265377
for option in &self.extra_options {
266378
if let Some((key, value)) = option.split_once('=') {
@@ -273,76 +385,77 @@ impl LibvirtSshOpts {
273385
}
274386
}
275387

276-
// Apply common SSH options
277-
let common_opts = crate::ssh::CommonSshOptions {
278-
strict_host_keys: self.strict_host_keys,
279-
connect_timeout: self.timeout,
280-
server_alive_interval: 60,
281-
log_level: self.log_level.clone(),
282-
extra_options: parsed_extra_options,
283-
};
284-
common_opts.apply_to_command(&mut ssh_cmd);
388+
// For interactive SSH, just exec directly
389+
if self.command.is_empty() {
390+
debug!("Executing interactive SSH session via exec");
391+
let mut ssh_cmd = self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options);
392+
let error = ssh_cmd.exec();
393+
return Err(eyre!("Failed to exec SSH command: {}", error));
394+
}
285395

286-
// Target host
287-
ssh_cmd.arg(format!("{}@127.0.0.1", self.user));
396+
// For command execution: retry with console feedback (2 attempts)
397+
let start_time = Instant::now();
398+
399+
for attempt in 1..=2 {
400+
debug!("SSH connection attempt {}/2", attempt);
401+
402+
// Build SSH command
403+
let mut ssh_cmd = self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options.clone());
288404

289-
// Add command if specified - use the same argument escaping logic as container SSH
290-
if !self.command.is_empty() {
405+
// Add command
291406
ssh_cmd.arg("--");
292407
if self.command.len() > 1 {
293-
// Multiple arguments need proper shell escaping
294408
let combined_command = crate::ssh::shell_escape_command(&self.command)
295409
.map_err(|e| eyre!("Failed to escape shell command: {}", e))?;
296-
debug!("Combined escaped command: {}", combined_command);
297410
ssh_cmd.arg(combined_command);
298411
} else {
299-
// Single argument can be passed directly
300412
ssh_cmd.args(&self.command);
301413
}
302-
}
303-
304-
debug!("Executing SSH command: {:?}", ssh_cmd);
305414

306-
// For commands (non-interactive SSH), capture output
307-
// For interactive SSH (no command), exec to replace current process
308-
if self.command.is_empty() {
309-
// Interactive SSH - exec to replace the current process
310-
// This provides the cleanest terminal experience
311-
debug!("Executing interactive SSH session via exec");
312-
313-
let error = ssh_cmd.exec();
314-
// exec() only returns on error
315-
return Err(eyre!("Failed to exec SSH command: {}", error));
316-
} else {
317-
// Command execution - capture and forward output
415+
// Try SSH
318416
let output = ssh_cmd
319417
.output()
320418
.map_err(|e| eyre!("Failed to execute SSH command: {}", e))?;
321419

322-
if !output.stdout.is_empty() {
323-
if !self.suppress_output {
324-
// Forward stdout to parent process
420+
if output.status.success() {
421+
if !output.stdout.is_empty() && !self.suppress_output {
325422
print!("{}", String::from_utf8_lossy(&output.stdout));
326423
}
327-
debug!("SSH stdout: {}", String::from_utf8_lossy(&output.stdout));
424+
debug!(
425+
"SSH connected after {:.1}s",
426+
start_time.elapsed().as_secs_f64()
427+
);
428+
return Ok(());
328429
}
329-
if !output.stderr.is_empty() {
430+
431+
// Check if retryable (common errors only)
432+
let stderr_str = String::from_utf8_lossy(&output.stderr);
433+
let is_retryable = stderr_str.contains("Connection refused")
434+
|| stderr_str.contains("Connection timed out")
435+
|| stderr_str.contains("banner exchange");
436+
437+
if !is_retryable || attempt == 2 {
438+
// Non-retryable or last attempt - fail
330439
if !self.suppress_output {
331-
// Forward stderr to parent process
332-
eprint!("{}", String::from_utf8_lossy(&output.stderr));
440+
eprint!("{}", stderr_str);
333441
}
334-
debug!("SSH stderr: {}", String::from_utf8_lossy(&output.stderr));
335-
}
336-
337-
if !output.status.success() {
338442
return Err(eyre!(
339-
"SSH connection failed with exit code: {}",
340-
output.status.code().unwrap_or(-1)
443+
"SSH connection failed after {:.1}s",
444+
start_time.elapsed().as_secs_f64()
341445
));
342446
}
447+
448+
// Retryable error - show console feedback
449+
if !self.suppress_output {
450+
eprintln!("SSH not ready yet, checking console output...");
451+
}
452+
if let Err(e) = self.show_console_feedback(global_opts) {
453+
debug!("Failed to fetch console output: {}", e);
454+
}
455+
std::thread::sleep(Duration::from_secs(2));
343456
}
344457

345-
Ok(())
458+
unreachable!()
346459
}
347460
}
348461

@@ -377,8 +490,8 @@ pub fn run_ssh_impl(
377490
// Extract SSH configuration from domain metadata
378491
let ssh_config = opts.extract_ssh_config(global_opts)?;
379492

380-
// Connect via SSH
381-
opts.connect_ssh(&ssh_config)?;
493+
// Connect via SSH with retries and console feedback
494+
opts.connect_ssh(global_opts, &ssh_config)?;
382495

383496
Ok(())
384497
}

crates/kit/src/ssh.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ impl Default for CommonSshOptions {
220220
fn default() -> Self {
221221
Self {
222222
strict_host_keys: false,
223-
connect_timeout: 30,
223+
connect_timeout: 5,
224224
server_alive_interval: 60,
225225
log_level: "ERROR".to_string(),
226226
extra_options: vec![],
@@ -363,7 +363,7 @@ mod tests {
363363
fn test_ssh_connection_options() {
364364
// Test default options
365365
let default_opts = SshConnectionOptions::default();
366-
assert_eq!(default_opts.common.connect_timeout, 30);
366+
assert_eq!(default_opts.common.connect_timeout, 5);
367367
assert!(default_opts.allocate_tty);
368368
assert_eq!(default_opts.common.log_level, "ERROR");
369369
assert!(default_opts.common.extra_options.is_empty());

0 commit comments

Comments
 (0)