Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3514,6 +3514,13 @@ fn print_task_fm_analysis(details: &serde_json::Value) {
println!(" FAULT MANAGEMENT ANALYSIS SUMMARY");
println!(" =================================");
let (prep_status, analysis_status) = match outcome {
Outcome::Disabled => {
println!(
" fault management analysis explicitly disabled \
by config!"
);
return;
}
Outcome::WaitingForInventory => {
println!(
" analysis was not performed, as the inventory has\n \
Expand Down
11 changes: 11 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -968,9 +968,18 @@ impl Default for MulticastGroupReconcilerConfig {
}
}

/// Default for [`FmTasksConfig::analysis_enabled`].
fn default_fm_analysis_enabled() -> bool {
// TODO(#10349): Flip to true
false
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct FmTasksConfig {
/// whether the fault management analysis background task runs.
#[serde(default = "default_fm_analysis_enabled")]
pub analysis_enabled: bool,
/// period (in seconds) for periodic activations of the background task that
/// drives fault management analysis.
#[serde_as(as = "DurationSeconds<u64>")]
Expand All @@ -993,6 +1002,7 @@ pub struct FmTasksConfig {
impl Default for FmTasksConfig {
fn default() -> Self {
Self {
analysis_enabled: default_fm_analysis_enabled(),
// Analysis is generally triggered by changes in the current sitrep,
// inventory, or by the ereport ingester(s), so it need not be
// periodically activated all that frequently.
Expand Down Expand Up @@ -1575,6 +1585,7 @@ mod test {
disable: false,
},
fm: FmTasksConfig {
analysis_enabled: default_fm_analysis_enabled(),
analysis_period_secs: Duration::from_secs(52),
sitrep_load_period_secs: Duration::from_secs(48),
sitrep_gc_period_secs: Duration::from_secs(49),
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ sp_ereport_ingester.period_secs = 30
# Nexus).
# This is cheap, so we should check frequently.
fm.sitrep_load_period_secs = 15
fm.analysis_enabled = true
# How frequently to run analysis from the current sitrep.
fm.analysis_period_secs = 120
# Sitrep GC, on the other hand, does not need to be activated very frequently,
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ sp_ereport_ingester.period_secs = 30
# Nexus).
# This is cheap, so we should check frequently.
fm.sitrep_load_period_secs = 15
fm.analysis_enabled = true
# How frequently to run analysis from the current sitrep.
fm.analysis_period_secs = 120
# Sitrep GC, on the other hand, does not need to be activated very frequently,
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,7 @@ impl BackgroundTasksInitializer {
sitrep_gc: task_fm_sitrep_gc.clone(),
},
nexus_id,
config.fm.analysis_enabled,
);
driver.register(TaskDefinition {
name: "fm_analysis",
Expand Down
32 changes: 30 additions & 2 deletions nexus/src/app/background/tasks/fm_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub struct FmAnalysis {
inv_rx: watch::Receiver<Option<Arc<inventory::Collection>>>,
activators: Activators,
nexus_id: OmicronZoneUuid,
analysis_enabled: bool,
}

/// This is just because I don't like it when a constructor takes multiple
Expand All @@ -48,7 +49,19 @@ impl BackgroundTask for FmAnalysis {
opctx: &'a OpContext,
) -> BoxFuture<'a, serde_json::Value> {
Box::pin(async {
let status = self.actually_activate(opctx).await;
let status = if self.analysis_enabled {
self.actually_activate(opctx).await
} else {
slog::info!(
opctx.log,
"fault management analysis explicitly disabled by config",
);
FmAnalysisStatus {
parent_sitrep_id: None,
inv_collection_id: None,
outcome: status::Outcome::Disabled,
}
};
match serde_json::to_value(status) {
Ok(val) => val,
Err(err) => {
Expand All @@ -70,8 +83,16 @@ impl FmAnalysis {
inv_rx: watch::Receiver<Option<Arc<inventory::Collection>>>,
activators: Activators,
nexus_id: OmicronZoneUuid,
analysis_enabled: bool,
) -> Self {
Self { datastore, sitrep_rx, inv_rx, activators, nexus_id }
Self {
datastore,
sitrep_rx,
inv_rx,
activators,
nexus_id,
analysis_enabled,
}
}

async fn actually_activate(
Expand Down Expand Up @@ -349,6 +370,8 @@ mod tests {
use omicron_test_utils::dev;
use omicron_uuid_kinds::SitrepUuid;

const ANALYSIS_ENABLED: bool = true;

fn activators() -> Activators {
let a = Activators {
inventory_loader: Activator::new(),
Expand Down Expand Up @@ -432,6 +455,7 @@ mod tests {
inv_rx,
activators(),
OmicronZoneUuid::new_v4(),
ANALYSIS_ENABLED,
);

let result = task.actually_activate(opctx).await;
Expand Down Expand Up @@ -464,6 +488,7 @@ mod tests {
inv_rx,
activators(),
OmicronZoneUuid::new_v4(),
ANALYSIS_ENABLED,
);

let result = task.actually_activate(opctx).await;
Expand All @@ -489,6 +514,7 @@ mod tests {
inv_rx,
activators(),
OmicronZoneUuid::new_v4(),
ANALYSIS_ENABLED,
);

let result = task.actually_activate(opctx).await;
Expand Down Expand Up @@ -518,6 +544,7 @@ mod tests {
inv_rx,
activators(),
OmicronZoneUuid::new_v4(),
ANALYSIS_ENABLED,
);

let result = task.actually_activate(opctx).await;
Expand Down Expand Up @@ -547,6 +574,7 @@ mod tests {
inv_rx,
activators(),
OmicronZoneUuid::new_v4(),
ANALYSIS_ENABLED,
);

let result = task.actually_activate(opctx).await;
Expand Down
1 change: 1 addition & 0 deletions nexus/tests/config.test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ sp_ereport_ingester.disable = true
# How frequently to check for a new fault management sitrep (made by any Nexus).
# This is cheap, so we should check frequently.
fm.sitrep_load_period_secs = 15
fm.analysis_enabled = true
# How frequently to run analysis from the current sitrep.
fm.analysis_period_secs = 120
# Sitrep GC, on the other hand, does not need to be activated very frequently,
Expand Down
3 changes: 3 additions & 0 deletions nexus/types/src/internal_api/background.rs
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,9 @@ pub mod fm_analysis {
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
#[allow(clippy::large_enum_variant)]
pub enum Outcome {
/// The task is disabled by config.
Disabled,

/// Fault management analysis was not performed, as no inventory
/// collection has been loaded.
WaitingForInventory,
Expand Down
Loading