Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3333,17 +3333,22 @@ fn print_task_sp_ereport_ingester(details: &serde_json::Value) {
use nexus_types::internal_api::background::SpEreportIngesterStatus;
use nexus_types::internal_api::background::SpEreporterStatus;

let SpEreportIngesterStatus { sps, errors, disabled, sps_not_present } =
match serde_json::from_value(details.clone()) {
Err(error) => {
eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
);
return;
}
Ok(status) => status,
};
let SpEreportIngesterStatus {
sps,
errors,
disabled,
sps_found,
sps_not_present,
} = match serde_json::from_value(details.clone()) {
Err(error) => {
eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
);
return;
}
Ok(status) => status,
};

if !errors.is_empty() {
println!(" errors listing reporters:");
Expand All @@ -3355,13 +3360,15 @@ fn print_task_sp_ereport_ingester(details: &serde_json::Value) {
if disabled {
println!(" SP ereport ingestion explicitly disabled by config!");
} else {
println!(" {SPS_FOUND:<WIDTH$}{sps_found:>NUM_WIDTH$}");
if sps_not_present > 0 {
println!(
"(i) {SPS_NOT_PRESENT:<WIDTH$}{sps_not_present:>NUM_WIDTH$}"
);
}
print_ereporter_status_totals(sps.iter().map(|sp| &sp.status));
}

if sps_not_present > 0 {
println!("(i) {SPS_NOT_PRESENT:<WIDTH$}{sps_not_present:>NUM_WIDTH$}");
}

if !sps.is_empty() {
if disabled {
println!(
Expand Down Expand Up @@ -3473,6 +3480,7 @@ mod ereporter_status_fields {
pub const REPORTERS_WITH_EREPORTS: &str = " with ereports:";
pub const REPORTERS_WITHOUT_EREPORTS: &str = " without ereports:";
pub const REPORTERS_WITH_ERRORS: &str = " with collection errors:";
pub const SPS_FOUND: &str = "SPs found via ignition:";
pub const SPS_NOT_PRESENT: &str = "SPs not present:";
pub const WIDTH: usize = super::const_max_len(&[
TOTAL_NEW_EREPORTS,
Expand All @@ -3485,6 +3493,7 @@ mod ereporter_status_fields {
REPORTERS_WITH_EREPORTS,
REPORTERS_WITHOUT_EREPORTS,
REPORTERS_WITH_ERRORS,
SPS_FOUND,
SPS_NOT_PRESENT,
]) + 1;
pub const NUM_WIDTH: usize = 4;
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,7 @@ task: "sp_ereport_ingester"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
SPs found via ignition: 0
total ereports received: <TOTAL_EREPORTS_RECEIVED_REDACTED>
new ereports ingested: <NEW_EREPORTS_INGESTED_REDACTED>
total HTTP requests sent: <TOTAL_HTTP_REQUESTS_SENT_REDACTED>
Expand Down Expand Up @@ -1648,6 +1649,7 @@ task: "sp_ereport_ingester"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
SPs found via ignition: 0
total ereports received: <TOTAL_EREPORTS_RECEIVED_REDACTED>
new ereports ingested: <NEW_EREPORTS_INGESTED_REDACTED>
total HTTP requests sent: <TOTAL_HTTP_REQUESTS_SENT_REDACTED>
Expand Down
90 changes: 58 additions & 32 deletions nexus/src/app/background/tasks/ereport_ingester.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ impl SpEreportIngester {
continue;
}
};

status.sps_found += 1;

let SpIdentifier { type_, slot } = id;
let sp_result = tasks
.spawn({
Expand All @@ -166,30 +169,28 @@ impl SpEreportIngester {
async move {
let status = ingester
.ingest_sp_ereports(opctx, &clients, type_, slot)
.await?;
Some(SpEreporterStatus {
.await;
SpEreporterStatus {
sp_type: type_,
slot,
ignition_type,
status,
})
}
}
})
.await;
if let Some(Some(sp_status)) = sp_result {
if let Some(sp_status) = sp_result {
total_ereports += sp_status.status.ereports_received;
total_new_ereports += sp_status.status.new_ereports;
status.sps.push(sp_status);
}
}

// Wait for remaining ingestion tasks to come back.
while let Some(sp_result) = tasks.join_next().await {
if let Some(sp_status) = sp_result {
total_ereports += sp_status.status.ereports_received;
total_new_ereports += sp_status.status.new_ereports;
status.sps.push(sp_status);
}
while let Some(sp_status) = tasks.join_next().await {
total_ereports += sp_status.status.ereports_received;
total_new_ereports += sp_status.status.new_ereports;
status.sps.push(sp_status);
}

// If any ereports were ingested that were not already in the database,
Expand Down Expand Up @@ -236,55 +237,47 @@ impl Ingester {
clients: &[GatewayClient],
sp_type: nexus_types::inventory::SpType,
slot: u16,
) -> Option<EreporterStatus> {
) -> EreporterStatus {
// Fetch the latest ereport from this SP.
let reporter = nexus_types::fm::ereport::Reporter::Sp { sp_type, slot };
let latest =
match self.datastore.latest_ereport_id(&opctx, reporter).await {
Ok(latest) => latest,
Err(error) => {
return Some(EreporterStatus {
return EreporterStatus {
errors: vec![format!(
"failed to query for latest ereport: {error:#}"
)],
..Default::default()
});
};
}
};

let mut params = EreportQueryParams::from_latest(latest);
let mut status = None;
let mut status = EreporterStatus::default();

// Continue requesting ereports from this SP in a loop until we have
// received all its ereports.
while let Some(ereport_types::Ereports { restart_id, reports }) = self
.mgs_requests(&opctx, clients, &params, sp_type, slot, &mut status)
.await
{
status.requests += 1;
if reports.items.is_empty() {
if let Some(ref mut status) = status {
status.requests += 1;
}
slog::trace!(
&opctx.log,
"no ereports returned by SP";
"committed_ena" => ?params.committed_ena,
"start_ena" => ?params.start_ena,
"restart_id" => ?params.restart_id,
"total_ereports_received" => status
.as_ref()
.map(|s| s.ereports_received),
"total_new_ereports" => status
.as_ref()
.map(|s| s.new_ereports),
"total_ereports_received" => status.ereports_received,
"total_new_ereports" => status.new_ereports,
);
break;
} else {
status.get_or_insert_default().requests += 1;
}

let time_collected = Utc::now();
let received = reports.items.len();
let status = status.get_or_insert_default();
status.ereports_received += received;

let db_ereports = reports.items.into_iter().map(|ereport| {
Expand Down Expand Up @@ -347,7 +340,7 @@ impl Ingester {
EreportQueryParams { committed_ena,start_ena, restart_id }: &EreportQueryParams,
sp_type: nexus_types::inventory::SpType,
slot: u16,
status: &mut Option<EreporterStatus>,
status: &mut EreporterStatus,
) -> Option<ereport_types::Ereports> {
// If an attempt to collect ereports from one gateway fails, we will try
// any other discovered gateways.
Expand Down Expand Up @@ -384,9 +377,8 @@ impl Ingester {
"gateway_addr" => %addr,
"error" => ?e,
);
let stats = status.get_or_insert_default();
stats.requests += 1;
stats.errors.push(format!("MGS {addr}: {e:#}"));
status.requests += 1;
status.errors.push(format!("MGS {addr}: {e:#}"));
}
}
}
Expand Down Expand Up @@ -466,6 +458,7 @@ mod tests {
"ereports from 4 SPs should be observed: {:?}",
activation1.sps,
);
assert_eq!(activation1.sps_found, 4);
tokio_test::assert_ready!(
analysis_activated.poll(),
"fm analysis task should be activated"
Expand Down Expand Up @@ -671,8 +664,41 @@ mod tests {
"fm analysis task should not be activated when no new ereports \
have been ingested"
);

assert_eq!(activation2.sps, &[], "no new ereports should be observed");
assert_eq!(
activation2.sps_found, 4,
"4 present SPs should have been found via ignition",
);
assert_eq!(
activation2.sps.len(),
4,
"all 4 SPs should be reported in the status, even when no new \
ereports were observed: {:?}",
activation2.sps,
);
for SpEreporterStatus { sp_type, slot, status, ignition_type: _ } in
&activation2.sps
{
assert_eq!(
status.ereports_received, 0,
"no ereports should have been received from SP \
{sp_type:?} {slot}",
);
assert_eq!(
status.new_ereports, 0,
"no new ereports should have been ingested from SP \
{sp_type:?} {slot}",
);
assert_eq!(
status.requests, 1,
"one HTTP request should have been sent for SP \
{sp_type:?} {slot} (empty response)",
);
assert_eq!(
&status.errors,
&Vec::<String>::new(),
"there should be no errors from SP {sp_type:?} {slot}",
);
}

check_sp_ereports_exist(
datastore,
Expand Down
2 changes: 2 additions & 0 deletions nexus/types/src/internal_api/background.rs
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,8 @@ pub struct SpEreportIngesterStatus {
/// the config file.
pub disabled: bool,
pub sps: Vec<SpEreporterStatus>,
/// Total number of present SPs discovered via ignition.
pub sps_found: usize,
pub sps_not_present: usize,
pub errors: Vec<String>,
}
Expand Down
Loading