Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions dev-tools/omdb/src/bin/omdb/db/alert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1031,10 +1031,12 @@ async fn cmd_db_alert_info(
payload,
num_dispatched,
case_id,
time_deleted,
} = alert;

const CLASS: &str = "class";
const TIME_DISPATCHED: &str = "fully dispatched at";
const TIME_DELETED: &str = "tombstoned at";
const NUM_DISPATCHED: &str = "deliveries dispatched";
const CASE_ID: &str = "requested by FM case";

Expand All @@ -1043,6 +1045,7 @@ async fn cmd_db_alert_info(
TIME_CREATED,
TIME_MODIFIED,
TIME_DISPATCHED,
TIME_DELETED,
NUM_DISPATCHED,
CLASS,
CASE_ID,
Expand All @@ -1058,6 +1061,9 @@ async fn cmd_db_alert_info(
if let Some(t) = time_dispatched {
println!(" {TIME_DISPATCHED:>WIDTH$}: {t}")
}
if let Some(t) = time_deleted {
println!(" {TIME_DELETED:>WIDTH$}: {t}")
}
if let Some(case_id) = case_id {
println!(" {CASE_ID:>WIDTH$}: {case_id:?}");
}
Expand Down
52 changes: 47 additions & 5 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3727,6 +3727,7 @@ fn print_task_fm_rendezvous(details: &serde_json::Value) {
alerts,
support_bundles,
ereport_marking: marking,
latest_processed_sitrep_version,
} = match serde_json::from_value::<FmRendezvousStatus>(details.clone()) {
Err(error) => {
eprintln!(
Expand All @@ -3738,7 +3739,36 @@ fn print_task_fm_rendezvous(details: &serde_json::Value) {
Ok(status) => status,
};
match sitrep_id {
Some(id) => println!(" current sitrep: {id}"),
Some(id) => {
println!(" current sitrep: {id}");
match latest_processed_sitrep_version {
Some(v) => {
// We show this line when `fm_rendezvous_progress_advance`
// succeeded, meaning that either the sitrep successfully
// executed, or it was stale and the work was skipped.
println!(" latest fully-processed sitrep version: {v}",)
Comment on lines +3742 to +3749
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small note here: i notice that here we are printing the current sitrep's ID, and the version of the last fully executed sitrep...this is a bit unhelpful, as the reader cannot tell whether or not those refer to the same sitrep without having to run more commands to figure out which sitrep the latest executed version is. what do you think about maybe changing the FmRendezvousStatus type to include both the current sitrep's ID and version, so that we can print something like:

    current sitrep: f5391283-aa51-4abc-bee3-b4181a056ebc
      version: 69
      latest fully-processed sitrep version: 42

}
None => {
// Disambiguate: a `None` here means either the advance
// was skipped because at least one subtask recorded
// per-request errors, or the advance call itself failed.
// Both cases are also logged separately by Nexus.
let any_subtask_errors = !alerts.details.errors.is_empty()
|| !support_bundles.details.errors.is_empty()
|| !marking.details.errors.is_empty();
if any_subtask_errors {
println!(
"(i) tracker advance skipped: subtasks recorded \
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mentioned this below, but I find the term "tracker" vague, and hard to parse here without knowing the precise context of this PR: Is it tracking the most recent sitrep? The sitrep we tried to execute? The sitrep which fully executed? That nuance matters, and "tracker" omits it

per-request errors"
);
} else {
println!(
"(i) tracker advance failed at this activation"
);
}
}
}
}
None => println!(
"(i) no FM situation report loaded, so rendezvous was not \
performed",
Expand All @@ -3751,20 +3781,25 @@ fn print_task_fm_rendezvous(details: &serde_json::Value) {
total_alerts_requested,
current_sitrep_alerts_requested,
alerts_created,
stale_sitrep,
errors,
}| {
let already_created =
total_alerts_requested - alerts_created - errors.len();
let already_created = total_alerts_requested
- alerts_created
- stale_sitrep
- errors.len();
const REQUESTED: &str = "alerts requested:";
const REQUESTED_THIS_SITREP: &str = " requested in this sitrep:";
const CREATED: &str = " created in this activation:";
const ALREADY_CREATED: &str = " already created:";
const STALE_SITREP: &str = " skipped (stale sitrep):";
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm not totally sure what "skipped (stale sitrep)" means --- is this referring to the count of requested entities which were not created because they were already deleted in a future sitrep? it would be nice if this was a bit clearer about its meaning...

const ERRORS: &str = " errors:";
const WIDTH: usize = const_max_len(&[
REQUESTED,
REQUESTED_THIS_SITREP,
CREATED,
ALREADY_CREATED,
STALE_SITREP,
ERRORS,
]) + 1;
pub const NUM_WIDTH: usize = 4;
Expand All @@ -3779,6 +3814,7 @@ fn print_task_fm_rendezvous(details: &serde_json::Value) {
println!(
" {ALREADY_CREATED:<WIDTH$}{already_created:>NUM_WIDTH$}"
);
println!(" {STALE_SITREP:<WIDTH$}{stale_sitrep:>NUM_WIDTH$}");
println!(
"{} {ERRORS:<WIDTH$}{:>NUM_WIDTH$}",
warn_if_nonzero(errors.len()),
Expand All @@ -3796,20 +3832,25 @@ fn print_task_fm_rendezvous(details: &serde_json::Value) {
total_bundles_requested,
current_sitrep_bundles_requested,
bundles_created,
stale_sitrep,
errors,
}| {
let already_created =
total_bundles_requested - bundles_created - errors.len();
let already_created = total_bundles_requested
- bundles_created
- stale_sitrep
- errors.len();
const REQUESTED: &str = "support bundles requested:";
const REQUESTED_THIS_SITREP: &str = " requested in this sitrep:";
const CREATED: &str = " created in this activation:";
const ALREADY_CREATED: &str = " already created:";
const STALE_SITREP: &str = " skipped (stale sitrep):";
const ERRORS: &str = " errors:";
const WIDTH: usize = const_max_len(&[
REQUESTED,
REQUESTED_THIS_SITREP,
CREATED,
ALREADY_CREATED,
STALE_SITREP,
ERRORS,
]) + 1;
pub const NUM_WIDTH: usize = 4;
Expand All @@ -3824,6 +3865,7 @@ fn print_task_fm_rendezvous(details: &serde_json::Value) {
println!(
" {ALREADY_CREATED:<WIDTH$}{already_created:>NUM_WIDTH$}"
);
println!(" {STALE_SITREP:<WIDTH$}{stale_sitrep:>NUM_WIDTH$}");
println!(
"{} {ERRORS:<WIDTH$}{:>NUM_WIDTH$}",
warn_if_nonzero(errors.len()),
Expand Down
4 changes: 4 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -725,13 +725,15 @@ task: "fm_rendezvous"
requested in this sitrep: 0
created in this activation: 0
already created: 0
skipped (stale sitrep): 0
errors: 0
creating requested support bundles:
(i) note: this operation was not executed
support bundles requested: 0
requested in this sitrep: 0
created in this activation: 0
already created: 0
skipped (stale sitrep): 0
errors: 0
marking ereports as seen:
(i) note: this operation was not executed
Expand Down Expand Up @@ -1406,13 +1408,15 @@ task: "fm_rendezvous"
requested in this sitrep: 0
created in this activation: 0
already created: 0
skipped (stale sitrep): 0
errors: 0
creating requested support bundles:
(i) note: this operation was not executed
support bundles requested: 0
requested in this sitrep: 0
created in this activation: 0
already created: 0
skipped (stale sitrep): 0
errors: 0
marking ereports as seen:
(i) note: this operation was not executed
Expand Down
7 changes: 7 additions & 0 deletions nexus/db-model/src/alert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ pub struct Alert {

/// The ID of the fault management case that created this alert, if any.
pub case_id: Option<DbTypedUuid<CaseKind>>,

/// Tombstone timestamp: set when an FM-created alert is soft-deleted.
///
/// Only meaningful when `case_id` is `Some` (FM-created alerts).
/// Non-FM alerts continue to hard-delete and never have this set.
pub time_deleted: Option<DateTime<Utc>>,
}
Comment on lines +54 to 57
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not convinced that this is true (or necessary?), since we don't really have any other code paths that delete alerts at this time. We might end up also having those code paths soft-delete alert records as well. I don't think it's necessarily worth designing for both soft- and hard-deletes depending on where the alert came from at this point?


impl Alert {
Expand All @@ -69,6 +75,7 @@ impl Alert {
payload: payload.into(),
num_dispatched: 0,
case_id: None,
time_deleted: None,
}
}

Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: Version = Version::new(256, 0, 0);
pub const SCHEMA_VERSION: Version = Version::new(257, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(257, "fm-resource-deletion"),
KnownVersion::new(256, "bgp-unnumbered-peer-cleanup"),
KnownVersion::new(255, "blueprint-add-external-networking-generation"),
KnownVersion::new(
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/support_bundle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ pub struct SupportBundle {
pub assigned_nexus: Option<DbTypedUuid<OmicronZoneKind>>,
pub user_comment: Option<String>,
pub fm_case_id: Option<DbTypedUuid<CaseKind>>,
pub time_deleted: Option<DateTime<Utc>>,
}

impl SupportBundle {
Expand All @@ -132,6 +133,7 @@ impl SupportBundle {
assigned_nexus: Some(nexus_id.into()),
user_comment,
fm_case_id: fm_case_id.map(Into::into),
time_deleted: None,
}
}

Expand Down
Loading
Loading