Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ base64.workspace = true
bootstrap-agent-lockstep-client.workspace = true
bytes.workspace = true
camino.workspace = true
camino-tempfile.workspace = true
chrono.workspace = true
clap.workspace = true
clickhouse-admin-single-client.workspace = true
Expand Down Expand Up @@ -54,6 +55,7 @@ nexus-db-queries.workspace = true
nexus-db-schema.workspace = true
nexus-inventory.workspace = true
nexus-lockstep-client.workspace = true
nexus-networking.workspace = true
nexus-reconfigurator-preparation.workspace = true
nexus-saga-recovery.workspace = true
nexus-types.workspace = true
Expand Down Expand Up @@ -83,6 +85,7 @@ slog.workspace = true
slog-error-chain.workspace = true
steno.workspace = true
strum.workspace = true
support-bundle-collection.workspace = true
support-bundle-viewer.workspace = true
supports-color.workspace = true
tabled.workspace = true
Expand All @@ -104,6 +107,7 @@ nexus-test-utils-macros.workspace = true
omicron-nexus.workspace = true
omicron-test-utils.workspace = true
subprocess.workspace = true
zip.workspace = true

# Disable doc builds by default for our binaries to work around issue
# rust-lang/cargo#8373. These docs would not be very useful anyway.
Expand Down
4 changes: 4 additions & 0 deletions dev-tools/omdb/src/bin/omdb/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ mod oxql;
mod reconfigurator;
mod sled_agent;
mod support_bundle;
mod support_bundle_collect;

fn main() -> Result<(), anyhow::Error> {
sigpipe::reset();
Expand All @@ -83,6 +84,7 @@ async fn main_impl() -> Result<(), anyhow::Error> {
reconfig.run_cmd(&args, &log).await
}
OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await,
OmdbCommands::SupportBundle(sb) => sb.run_cmd(&args, &log).await,
OmdbCommands::CrucibleAgent(crucible) => crucible.run_cmd(&args).await,
OmdbCommands::CruciblePantry(crucible) => crucible.run_cmd(&args).await,
OmdbCommands::ClickhouseAdmin(ch) => ch.run_cmd(&args, &log).await,
Expand Down Expand Up @@ -297,6 +299,8 @@ enum OmdbCommands {
Reconfigurator(reconfigurator::ReconfiguratorArgs),
/// Debug a specific Sled
SledAgent(sled_agent::SledAgentArgs),
/// Collect or inspect a support bundle
SupportBundle(support_bundle_collect::SupportBundleArgs),
}

fn parse_dropshot_log_level(
Expand Down
221 changes: 221 additions & 0 deletions dev-tools/omdb/src/bin/omdb/support_bundle_collect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! `omdb support-bundle collect` — collect a support bundle locally,
//! without going through Nexus.
//!
//! Unlike the Nexus background task, this path:
//!
//! - Does not register a row in the `support_bundle` table.
//! - Does not transfer the resulting bundle to a sled-agent for durable
//! storage. The zip is written to a local file path.
//! - Does not require Nexus to be up. It only needs CRDB, internal
//! DNS, MGS, and the rack's sled-agents reachable on the underlay.
//!
//! This is intended for incident response, where the operator may need
//! to collect a bundle precisely because Nexus is unhealthy.

use crate::Omdb;
use crate::db::DbUrlOptions;
use anyhow::Context;
use camino::Utf8PathBuf;
use camino_tempfile::tempdir_in;
use clap::Args;
use clap::Subcommand;
use clap::ValueEnum;
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::DataStore;
use nexus_types::fm::ereport::EreportFilters;
use nexus_types::support_bundle::BundleDataSelection;
use omicron_uuid_kinds::SupportBundleUuid;
use std::io::Seek;
use std::io::SeekFrom;
use std::sync::Arc;
use support_bundle_collection::BundleCollection;
use support_bundle_collection::BundleInfo;
use support_bundle_collection::zip::bundle_to_zipfile;

/// Categories of data the bundle collector knows how to gather.
///
/// Mirrors `nexus_types::support_bundle::BundleDataCategory`, but is
/// declared here so it can derive `clap::ValueEnum` without making
/// `nexus-types` depend on clap.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, ValueEnum)]
enum BundleCategory {
Reconfigurator,
HostInfo,
SledCubbyInfo,
SpDumps,
Ereports,
}

/// Arguments to the "omdb support-bundle" subcommand
#[derive(Debug, Args)]
pub struct SupportBundleArgs {
#[command(subcommand)]
command: SupportBundleCommands,
}

#[derive(Debug, Subcommand)]
enum SupportBundleCommands {
/// Collect a support bundle without involving Nexus.
///
/// Connects directly to CockroachDB, internal DNS, MGS, and the
/// rack's sled-agents — none of which depend on Nexus being up.
/// The bundle is written to a local zip file. No row is created
/// in the `support_bundle` table.
Collect(CollectArgs),
}

#[derive(Debug, Args)]
struct CollectArgs {
#[command(flatten)]
db_url_opts: DbUrlOptions,

/// Path where the resulting bundle zip will be written.
#[clap(long, short = 'o')]
output: Utf8PathBuf,

/// Reason recorded inside the bundle's metadata.
#[clap(long, default_value = "collected via omdb")]
reason: String,

/// Directory to use for staging the bundle contents before zipping.
#[clap(long, default_value = "/var/tmp")]
tempdir: Utf8PathBuf,

/// Categories of data to collect. May be supplied multiple times.
/// Defaults to all categories.
#[clap(long, value_enum)]
include: Vec<BundleCategory>,
}

impl CollectArgs {
fn data_selection(&self) -> BundleDataSelection {
let categories: &[BundleCategory] = if self.include.is_empty() {
BundleCategory::value_variants()
} else {
self.include.as_slice()
};

let mut sel = BundleDataSelection::new();
for category in categories {
sel = match category {
BundleCategory::Reconfigurator => sel.with_reconfigurator(),
BundleCategory::HostInfo => sel.with_all_sleds(),
BundleCategory::SledCubbyInfo => sel.with_sled_cubby_info(),
BundleCategory::SpDumps => sel.with_sp_dumps(),
BundleCategory::Ereports => sel.with_ereports(
EreportFilters::new()
.with_start_time(
omicron_common::now_db_precision()
- chrono::Days::new(7),
)
.expect("no end time set, cannot fail"),
),
};
}
sel
}
}

impl SupportBundleArgs {
pub async fn run_cmd(
&self,
omdb: &Omdb,
log: &slog::Logger,
) -> anyhow::Result<()> {
match &self.command {
SupportBundleCommands::Collect(args) => args.run(omdb, log).await,
}
}
}

impl CollectArgs {
async fn run(&self, omdb: &Omdb, log: &slog::Logger) -> anyhow::Result<()> {
self.db_url_opts
.with_datastore(omdb, log, async |opctx, datastore| {
self.collect(omdb, log, opctx, datastore).await
})
.await
}

async fn collect(
&self,
omdb: &Omdb,
log: &slog::Logger,
opctx: OpContext,
datastore: Arc<DataStore>,
) -> anyhow::Result<()> {
let resolver = omdb.dns_resolver(log.clone()).await?;

let bundle = BundleInfo {
id: SupportBundleUuid::new_v4(),
reason_for_creation: self.reason.clone(),
};
let bundle_log = log.new(slog::o!("bundle" => bundle.id.to_string()));
eprintln!("Collecting support bundle {}", bundle.id);

let collection = Arc::new(BundleCollection::new(
datastore,
resolver,
bundle_log,
opctx,
self.data_selection(),
bundle,
));

// Wire Ctrl-C to cancel the in-flight collection.
let cancel_handle = tokio::spawn({
let token = collection.cancellation_token().clone();
async move {
let _ = tokio::signal::ctrl_c().await;
eprintln!("\nCtrl-C received — cancelling bundle collection.");
token.cancel();
}
});

let dir = tempdir_in(&self.tempdir).with_context(|| {
format!("creating temp dir under {}", self.tempdir)
})?;
let collect_result = collection.collect_bundle_locally(&dir).await;
cancel_handle.abort();
let _ = cancel_handle.await;
let report = collect_result?;

let zip_tempdir = self.tempdir.clone();
let output = self.output.clone();
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
let mut tempfile = bundle_to_zipfile(&dir, &zip_tempdir)?;
tempfile.seek(SeekFrom::Start(0))?;
let mut out = std::fs::File::create(&output)
.with_context(|| format!("creating {output}"))?;
std::io::copy(&mut tempfile, &mut out)?;
Ok(())
})
.await
.context("zip task panicked")??;

eprintln!("Wrote bundle to {}", self.output);
eprintln!("{} steps executed:", report.steps.len());
for step in &report.steps {
let dur = step.end - step.start;
eprintln!(
" {:>9}ms {:?} {}",
dur.num_milliseconds(),
step.status,
step.name,
);
}
if let Some(ereports) = &report.ereports {
eprintln!(
"ereports: {} found, {} collected, {} errors",
ereports.n_found,
ereports.n_collected,
ereports.errors.len(),
);
}
Ok(())
}
}
45 changes: 45 additions & 0 deletions dev-tools/omdb/tests/test_all_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,51 @@ async fn test_omdb_success_cases() {
);
assert!(!parsed.collections.is_empty());

// Exercise `omdb support-bundle collect` end-to-end. We don't add this
// to the `successes.out` snapshot because the output includes a
// randomly-generated bundle UUID, timing-dependent step durations,
// and per-sled step names that would all need redaction. Instead we
// run the command and verify the resulting zip is well-formed and
// contains the expected metadata files.
let bundle_path = tmpdir.path().join("bundle.zip");
let bundle_args: &[&str] = &[
"support-bundle",
"collect",
"--output",
bundle_path.as_str(),
"--tempdir",
tmpdir.path().as_str(),
"--reason",
"integration test",
];
let mut bundle_output = String::new();
let p = postgres_url.clone();
let dns = cptestctx.internal_dns.dns_server.local_address().to_string();
do_run_no_redactions(
&mut bundle_output,
move |exec| exec.env("OMDB_DB_URL", &p).env("OMDB_DNS_SERVER", &dns),
&cmd_path,
bundle_args,
)
.await;
let zip_file = std::fs::File::open(&bundle_path).unwrap_or_else(|err| {
panic!(
"bundle zip not produced at {bundle_path}: {}\n\
omdb output was:\n{bundle_output}",
InlineErrorChain::new(&err),
)
});
let mut archive =
zip::ZipArchive::new(zip_file).expect("bundle is a valid zip archive");
for required in
["bundle_id.txt", "meta/reason_for_creation.txt", "meta/trace.json"]
{
assert!(
archive.by_name(required).is_ok(),
"bundle zip is missing expected entry {required}",
);
}

let ox_invocation = &["oximeter", "list-producers"];
let mut ox_output = String::new();
let ox = ox_url.clone();
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Commands:
oxql Enter the Oximeter Query Language shell for interactive querying
reconfigurator Interact with the Reconfigurator system
sled-agent Debug a specific Sled
support-bundle Collect or inspect a support bundle
help Print this message or the help of the given subcommand(s)

Options:
Expand Down Expand Up @@ -54,6 +55,7 @@ Commands:
oxql Enter the Oximeter Query Language shell for interactive querying
reconfigurator Interact with the Reconfigurator system
sled-agent Debug a specific Sled
support-bundle Collect or inspect a support bundle
help Print this message or the help of the given subcommand(s)

Options:
Expand Down
Loading