From 66b6b9ad48d410c0c50803e89baab678843dc929 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 5 May 2026 20:04:52 -0700 Subject: [PATCH 1/3] fm: make `sp_ereport_ingester` status output less gaslighty Originally, when implementing the `sp_ereport_ingester` background task, I had the foolish and bad idea to make the task's status output omit entries for SPs which we communicated with successfully but which had no new ereports, thinking that it was "not interesting" to list them. Unfortunately, this also means that the total count of SPs that the task talked to, and the count of total HTTP requests sent, will also not count any SPs which had no ereports. This behavior is *incredibly misleading* to anyone who isn't aware that they are being intentionally excluded, which, as it turns out, includes me (since I had forgotten that I did this). This commit fixes this by not going out of our way to exclude these SPs from the status output. It turns out that this actually makes the code somewhat simpler, which is another argument in favor of the idea that I never should have done this. Sigh. Fixes #10380 --- dev-tools/omdb/src/bin/omdb/nexus.rs | 39 +++++++++------ dev-tools/omdb/tests/successes.out | 2 + .../app/background/tasks/ereport_ingester.rs | 48 ++++++++----------- nexus/types/src/internal_api/background.rs | 2 + 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 2bec4896d33..fe5801c7181 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -3333,17 +3333,22 @@ fn print_task_sp_ereport_ingester(details: &serde_json::Value) { use nexus_types::internal_api::background::SpEreportIngesterStatus; use nexus_types::internal_api::background::SpEreporterStatus; - let SpEreportIngesterStatus { sps, errors, disabled, sps_not_present } = - match serde_json::from_value(details.clone()) { - Err(error) => { - eprintln!( - "warning: failed to interpret task details: {:?}: {:?}", - error, details - ); - return; - } - Ok(status) => status, - }; + let SpEreportIngesterStatus { + sps, + errors, + disabled, + sps_found, + sps_not_present, + } = match serde_json::from_value(details.clone()) { + Err(error) => { + eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ); + return; + } + Ok(status) => status, + }; if !errors.is_empty() { println!(" errors listing reporters:"); @@ -3355,13 +3360,15 @@ fn print_task_sp_ereport_ingester(details: &serde_json::Value) { if disabled { println!(" SP ereport ingestion explicitly disabled by config!"); } else { + println!(" {SPS_FOUND:NUM_WIDTH$}"); + if sps_not_present > 0 { + println!( + "(i) {SPS_NOT_PRESENT:NUM_WIDTH$}" + ); + } print_ereporter_status_totals(sps.iter().map(|sp| &sp.status)); } - if sps_not_present > 0 { - println!("(i) {SPS_NOT_PRESENT:NUM_WIDTH$}"); - } - if !sps.is_empty() { if disabled { println!( @@ -3473,6 +3480,7 @@ mod ereporter_status_fields { pub const REPORTERS_WITH_EREPORTS: &str = " with ereports:"; pub const REPORTERS_WITHOUT_EREPORTS: &str = " without ereports:"; pub const REPORTERS_WITH_ERRORS: &str = " with collection errors:"; + pub const SPS_FOUND: &str = "SPs found via ignition:"; pub const SPS_NOT_PRESENT: &str = "SPs not present:"; pub const WIDTH: usize = super::const_max_len(&[ TOTAL_NEW_EREPORTS, @@ -3485,6 +3493,7 @@ mod ereporter_status_fields { REPORTERS_WITH_EREPORTS, REPORTERS_WITHOUT_EREPORTS, REPORTERS_WITH_ERRORS, + SPS_FOUND, SPS_NOT_PRESENT, ]) + 1; pub const NUM_WIDTH: usize = 4; diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index ba66911f6e4..ec2f47fba08 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -967,6 +967,7 @@ task: "sp_ereport_ingester" configured period: every s last completed activation: , triggered by started at (s ago) and ran for ms + SPs found via ignition: 0 total ereports received: new ereports ingested: total HTTP requests sent: @@ -1648,6 +1649,7 @@ task: "sp_ereport_ingester" configured period: every s last completed activation: , triggered by started at (s ago) and ran for ms + SPs found via ignition: 0 total ereports received: new ereports ingested: total HTTP requests sent: diff --git a/nexus/src/app/background/tasks/ereport_ingester.rs b/nexus/src/app/background/tasks/ereport_ingester.rs index 2475e931de2..63f169a1019 100644 --- a/nexus/src/app/background/tasks/ereport_ingester.rs +++ b/nexus/src/app/background/tasks/ereport_ingester.rs @@ -166,17 +166,17 @@ impl SpEreportIngester { async move { let status = ingester .ingest_sp_ereports(opctx, &clients, type_, slot) - .await?; - Some(SpEreporterStatus { + .await; + SpEreporterStatus { sp_type: type_, slot, ignition_type, status, - }) + } } }) .await; - if let Some(Some(sp_status)) = sp_result { + if let Some(sp_status) = sp_result { total_ereports += sp_status.status.ereports_received; total_new_ereports += sp_status.status.new_ereports; status.sps.push(sp_status); @@ -184,12 +184,10 @@ impl SpEreportIngester { } // Wait for remaining ingestion tasks to come back. - while let Some(sp_result) = tasks.join_next().await { - if let Some(sp_status) = sp_result { - total_ereports += sp_status.status.ereports_received; - total_new_ereports += sp_status.status.new_ereports; - status.sps.push(sp_status); - } + while let Some(sp_status) = tasks.join_next().await { + total_ereports += sp_status.status.ereports_received; + total_new_ereports += sp_status.status.new_ereports; + status.sps.push(sp_status); } // If any ereports were ingested that were not already in the database, @@ -236,24 +234,24 @@ impl Ingester { clients: &[GatewayClient], sp_type: nexus_types::inventory::SpType, slot: u16, - ) -> Option { + ) -> EreporterStatus { // Fetch the latest ereport from this SP. let reporter = nexus_types::fm::ereport::Reporter::Sp { sp_type, slot }; let latest = match self.datastore.latest_ereport_id(&opctx, reporter).await { Ok(latest) => latest, Err(error) => { - return Some(EreporterStatus { + return EreporterStatus { errors: vec![format!( "failed to query for latest ereport: {error:#}" )], ..Default::default() - }); + }; } }; let mut params = EreportQueryParams::from_latest(latest); - let mut status = None; + let mut status = EreporterStatus::default(); // Continue requesting ereports from this SP in a loop until we have // received all its ereports. @@ -262,29 +260,22 @@ impl Ingester { .await { if reports.items.is_empty() { - if let Some(ref mut status) = status { - status.requests += 1; - } + status.requests += 1; slog::trace!( &opctx.log, "no ereports returned by SP"; "committed_ena" => ?params.committed_ena, "start_ena" => ?params.start_ena, "restart_id" => ?params.restart_id, - "total_ereports_received" => status - .as_ref() - .map(|s| s.ereports_received), - "total_new_ereports" => status - .as_ref() - .map(|s| s.new_ereports), + "total_ereports_received" => status.ereports_received, + "total_new_ereports" => status.new_ereports, ); break; } else { - status.get_or_insert_default().requests += 1; + status.requests += 1; } let time_collected = Utc::now(); let received = reports.items.len(); - let status = status.get_or_insert_default(); status.ereports_received += received; let db_ereports = reports.items.into_iter().map(|ereport| { @@ -347,7 +338,7 @@ impl Ingester { EreportQueryParams { committed_ena,start_ena, restart_id }: &EreportQueryParams, sp_type: nexus_types::inventory::SpType, slot: u16, - status: &mut Option, + status: &mut EreporterStatus, ) -> Option { // If an attempt to collect ereports from one gateway fails, we will try // any other discovered gateways. @@ -384,9 +375,8 @@ impl Ingester { "gateway_addr" => %addr, "error" => ?e, ); - let stats = status.get_or_insert_default(); - stats.requests += 1; - stats.errors.push(format!("MGS {addr}: {e:#}")); + status.requests += 1; + status.errors.push(format!("MGS {addr}: {e:#}")); } } } diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index cbf09135bde..f8536163f29 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -852,6 +852,8 @@ pub struct SpEreportIngesterStatus { /// the config file. pub disabled: bool, pub sps: Vec, + /// Total number of present SPs discovered via ignition. + pub sps_found: usize, pub sps_not_present: usize, pub errors: Vec, } From eb42eebaeaea3b309968030987fb188d397a9330 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 5 May 2026 20:08:38 -0700 Subject: [PATCH 2/3] oh also fix the tests ahaha --- .../app/background/tasks/ereport_ingester.rs | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/nexus/src/app/background/tasks/ereport_ingester.rs b/nexus/src/app/background/tasks/ereport_ingester.rs index 63f169a1019..71df73ce9db 100644 --- a/nexus/src/app/background/tasks/ereport_ingester.rs +++ b/nexus/src/app/background/tasks/ereport_ingester.rs @@ -456,6 +456,7 @@ mod tests { "ereports from 4 SPs should be observed: {:?}", activation1.sps, ); + assert_eq!(activation1.sps_found, 4); tokio_test::assert_ready!( analysis_activated.poll(), "fm analysis task should be activated" @@ -661,8 +662,41 @@ mod tests { "fm analysis task should not be activated when no new ereports \ have been ingested" ); - - assert_eq!(activation2.sps, &[], "no new ereports should be observed"); + assert_eq!( + activation2.sps_found, 4, + "4 present SPs should have been found via ignition", + ); + assert_eq!( + activation2.sps.len(), + 4, + "all 4 SPs should be reported in the status, even when no new \ + ereports were observed: {:?}", + activation2.sps, + ); + for SpEreporterStatus { sp_type, slot, status, ignition_type: _ } in + &activation2.sps + { + assert_eq!( + status.ereports_received, 0, + "no ereports should have been received from SP \ + {sp_type:?} {slot}", + ); + assert_eq!( + status.new_ereports, 0, + "no new ereports should have been ingested from SP \ + {sp_type:?} {slot}", + ); + assert_eq!( + status.requests, 1, + "one HTTP request should have been sent for SP \ + {sp_type:?} {slot} (empty response)", + ); + assert_eq!( + &status.errors, + &Vec::::new(), + "there should be no errors from SP {sp_type:?} {slot}", + ); + } check_sp_ereports_exist( datastore, From 30ff563e9f2934744df9de4b7ce980a0a19b5a23 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 5 May 2026 20:58:04 -0700 Subject: [PATCH 3/3] you have to actually increment the things --- nexus/src/app/background/tasks/ereport_ingester.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nexus/src/app/background/tasks/ereport_ingester.rs b/nexus/src/app/background/tasks/ereport_ingester.rs index 71df73ce9db..a853d58d9b1 100644 --- a/nexus/src/app/background/tasks/ereport_ingester.rs +++ b/nexus/src/app/background/tasks/ereport_ingester.rs @@ -149,6 +149,9 @@ impl SpEreportIngester { continue; } }; + + status.sps_found += 1; + let SpIdentifier { type_, slot } = id; let sp_result = tasks .spawn({ @@ -259,8 +262,8 @@ impl Ingester { .mgs_requests(&opctx, clients, ¶ms, sp_type, slot, &mut status) .await { + status.requests += 1; if reports.items.is_empty() { - status.requests += 1; slog::trace!( &opctx.log, "no ereports returned by SP"; @@ -271,9 +274,8 @@ impl Ingester { "total_new_ereports" => status.new_ereports, ); break; - } else { - status.requests += 1; } + let time_collected = Utc::now(); let received = reports.items.len(); status.ereports_received += received;