diff --git a/dev-tools/reconfigurator-cli/tests/input/cmds-mupdate-with-hidden-sled.txt b/dev-tools/reconfigurator-cli/tests/input/cmds-mupdate-with-hidden-sled.txt new file mode 100644 index 00000000000..8d55d615652 --- /dev/null +++ b/dev-tools/reconfigurator-cli/tests/input/cmds-mupdate-with-hidden-sled.txt @@ -0,0 +1,30 @@ +# Test the scenario where every sled is MUPdated, and then one sled +# is missing from inventory. +# +# This produces a blippy BACKCOMPAT note about Nexus generations: +# +# blueprint: at Nexus generation 1, found zones with different +# image sources (install dataset vs artifact) + +load-example --seed test-basic --nsleds 3 + +# Hide one sled from inventory. +sled-set 89d02b1b-478c-401a-8e28-7a26f74fa41b inventory-hidden + +# Assemble the fake TUF repo and set it as the target release. +# (Setting the TUF repo as the target release is important +# because it causes the mupdate/update machinery to start working.) +tuf-assemble ../../update-common/manifests/fake-0.0.1.toml +set target-release repo-0.0.1.zip + +# MUPdate all the sleds. +sled-update-install-dataset serial0 --to-target-release +sled-update-install-dataset serial1 --to-target-release +sled-update-install-dataset serial2 --to-target-release + +# Refresh inventory and run the planner. +inventory-generate +blueprint-plan latest latest + +# Run blippy, which will produce a warning. +blueprint-blippy latest diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-with-hidden-sled-stderr b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-with-hidden-sled-stderr new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-with-hidden-sled-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-with-hidden-sled-stdout new file mode 100644 index 00000000000..6360f1471f0 --- /dev/null +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-mupdate-with-hidden-sled-stdout @@ -0,0 +1,121 @@ +using provided RNG seed: reconfigurator-cli-test +> # Test the scenario where every sled is MUPdated, and then one sled +> # is missing from inventory. +> # +> # This produces a blippy BACKCOMPAT note about Nexus generations: +> # +> # blueprint: at Nexus generation 1, found zones with different +> # image sources (install dataset vs artifact) + +> load-example --seed test-basic --nsleds 3 +loaded example system with: +- collection: 9e187896-7809-46d0-9210-d75be1b3c4d4 +- blueprint: ade5749d-bdf3-4fab-a8ae-00bea01b3a5a + + +> # Hide one sled from inventory. +> sled-set 89d02b1b-478c-401a-8e28-7a26f74fa41b inventory-hidden +set sled 89d02b1b-478c-401a-8e28-7a26f74fa41b inventory visibility: visible -> hidden + + +> # Assemble the fake TUF repo and set it as the target release. +> # (Setting the TUF repo as the target release is important +> # because it causes the mupdate/update machinery to start working.) +> tuf-assemble ../../update-common/manifests/fake-0.0.1.toml +INFO assembling repository in +INFO artifacts assembled and archived to `repo-0.0.1.zip`, component: OmicronRepoAssembler +created repo-0.0.1.zip for system version 0.0.1 + +> set target-release repo-0.0.1.zip +INFO extracting uploaded archive to +INFO created directory to store extracted artifacts, path: +INFO added artifact, name: fake-gimlet-sp, kind: gimlet_sp, version: 0.0.1, hash: 716e29860eade5de4cf28d2c81f1c3fcaf3a3c07af52961c0e231e3dd0ba4db8, length: 734 +INFO added artifact, name: fake-rot, kind: gimlet_rot_image_a, version: 0.0.1, hash: 244d553f832cf74043bbcc8a747c8d05384a0f89f7809dcab28c3f707b11f985, length: 787 +INFO added artifact, name: fake-rot, kind: gimlet_rot_image_b, version: 0.0.1, hash: 244d553f832cf74043bbcc8a747c8d05384a0f89f7809dcab28c3f707b11f985, length: 787 +INFO added artifact, name: fake-rot-bootloader, kind: gimlet_rot_bootloader, version: 0.0.1, hash: 5bfb2fef5a25100e7813636699bd365bbcd623980ae00e876ad705ef591feded, length: 794 +INFO added artifact, name: fake-host, kind: gimlet_host_phase_1, version: 0.0.1, hash: 143aa9751a0bb16ab3d2c8b56d2874eeab14e1ac3413aa0edf1dbf56900f3fcc, length: 524288 +INFO added artifact, name: fake-host, kind: cosmo_host_phase_1, version: 0.0.1, hash: 0c43c53453b1113d8ec83d0e3cb8139094b6f4594c304645a33248863141bac6, length: 524288 +INFO added artifact, name: fake-host, kind: host_phase_2, version: 0.0.1, hash: 7cd830e1682d50620de0f5c24b8cca15937eb10d2a415ade6ad28c0d314408eb, length: 1048576 +INFO added artifact, name: fake-trampoline, kind: gimlet_trampoline_phase_1, version: 0.0.1, hash: 040e9ffbd212b790da4fc3a6376c9ff102c852c4ac1f1a1bc84c7d8edc64029f, length: 524288 +INFO added artifact, name: fake-trampoline, kind: cosmo_trampoline_phase_1, version: 0.0.1, hash: bfc7d2bf0d2e5dde41b8ea85beca7f01262688297f438727fdeb8543c1ceb25e, length: 524288 +INFO added artifact, name: fake-trampoline, kind: trampoline_phase_2, version: 0.0.1, hash: a05417d8d03400b9d556b63563c9958da983a0cdcc3259669966ad45e395c277, length: 1048576 +INFO added artifact, name: clickhouse, kind: zone, version: 0.0.1, hash: 0cc283162daad1dd9d63cd20a484f4e0157b6895c179defa8a99fd220323a6c5, length: 1687 +INFO added artifact, name: clickhouse_keeper, kind: zone, version: 0.0.1, hash: f27ef7d2ce10696c4583ea194cdf61c3907f2143f666af964b8ed3bee1346be0, length: 1691 +INFO added artifact, name: clickhouse_server, kind: zone, version: 0.0.1, hash: bc35f79e04956e284c230f324fe7475ad5cb2ede08e6b4a77addcd9e6f50d33b, length: 1691 +INFO added artifact, name: cockroachdb, kind: zone, version: 0.0.1, hash: a1dc64b896b4bb5d0d295f63b5edeb82b3f945e1f830b06c32f96f9de30b93d1, length: 1690 +INFO added artifact, name: crucible-zone, kind: zone, version: 0.0.1, hash: f3694b20fa1de79fb1f7c3a9f89f9f9eb5ebaaefc3caba7e1991e7e2b3191ed4, length: 1691 +INFO added artifact, name: crucible-pantry-zone, kind: zone, version: 0.0.1, hash: 6055871bfa626d582162302bf027102d90a03a42866867df2582f8eba231fc6d, length: 1696 +INFO added artifact, name: external-dns, kind: zone, version: 0.0.1, hash: 584217eae459e4c2bd00621cf1910d06edb8258948a4832ab0329cf42067c0c7, length: 1690 +INFO added artifact, name: internal-dns, kind: zone, version: 0.0.1, hash: c29c262c79d8f3fa4e0bbec221a286ca6e02b64719b6d35f32cc5e92e36b9173, length: 1690 +INFO added artifact, name: ntp, kind: zone, version: 0.0.1, hash: b661b5d1370f5ac593b4c15b5fcd22c904991cf33b6db32f886374bc022a3531, length: 1682 +INFO added artifact, name: nexus, kind: zone, version: 0.0.1, hash: 5f0b97b090966bb754485c3d397d0918d54bf4ffdc6fa691b77f61686f2ac8cc, length: 1683 +INFO added artifact, name: oximeter, kind: zone, version: 0.0.1, hash: 7ea25be50cd4e98e2ba20916cb98fe8ea457372f5973eb6ac691b5bc90dbddc0, length: 1683 +INFO added artifact, name: fake-corpus, kind: measurement_corpus, version: 1.0.0, hash: 8a0e23157bae655fceec7376926c9758efee6511c7b7ff8355bbb49545a2257f, length: 1048576 +INFO added artifact, name: fake-psc-sp, kind: psc_sp, version: 0.0.1, hash: 3a63db2465b433f7b2f2816f833dcce90e0aa7e7472b1735c63faf93a48bb2ab, length: 726 +INFO added artifact, name: fake-psc-rot, kind: psc_rot_image_a, version: 0.0.1, hash: 9bdc198ad072c74cfc1e145355eef307028067776b19f9e2a7830934176fe406, length: 770 +INFO added artifact, name: fake-psc-rot, kind: psc_rot_image_b, version: 0.0.1, hash: 9bdc198ad072c74cfc1e145355eef307028067776b19f9e2a7830934176fe406, length: 770 +INFO added artifact, name: fake-psc-rot-bootloader, kind: psc_rot_bootloader, version: 0.0.1, hash: a58c577f5c33e0a8176f078183a0c94b84ab1e1e7118c441f6b82551fba58f46, length: 794 +INFO added artifact, name: fake-switch-sp, kind: switch_sp, version: 0.0.1, hash: 9a559c6734981ec74fee73a56826f8a91beec39a59dea497f67d55c91ab74328, length: 736 +INFO added artifact, name: fake-switch-rot, kind: switch_rot_image_a, version: 0.0.1, hash: 7776db817d1f1b1a2f578050742e33bd4e805a4c76f36bce84dcb509b900249c, length: 776 +INFO added artifact, name: fake-switch-rot, kind: switch_rot_image_b, version: 0.0.1, hash: 7776db817d1f1b1a2f578050742e33bd4e805a4c76f36bce84dcb509b900249c, length: 776 +INFO added artifact, name: fake-switch-rot-bootloader, kind: switch_rot_bootloader, version: 0.0.1, hash: 0686443d50db2247077dc70b6543cea9a90a9792de00e06c06cff4c91fa5a4a8, length: 792 +INFO added artifact, name: installinator_document, kind: installinator_document, version: 0.0.1, hash: 6f0eebe1001e3d00b02b2dcac5b3d883cb88222fde16a028f5431e87a5feee72, length: 526 +set target release based on repo-0.0.1.zip + + +> # MUPdate all the sleds. +> sled-update-install-dataset serial0 --to-target-release +sled 89d02b1b-478c-401a-8e28-7a26f74fa41b: install dataset updated: to target release (system version 0.0.1) + +> sled-update-install-dataset serial1 --to-target-release +sled 2eb69596-f081-4e2d-9425-9994926e0832: install dataset updated: to target release (system version 0.0.1) + +> sled-update-install-dataset serial2 --to-target-release +sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646: install dataset updated: to target release (system version 0.0.1) + + +> # Refresh inventory and run the planner. +> inventory-generate +generated inventory collection 972ca69a-384c-4a9c-a87d-c2cf21e114e0 from configured sleds + +> blueprint-plan latest latest +WARN skipping zones eligible for cleanup check (sled not present in latest inventory collection), sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b +WARN no inventory found for in-service sled, phase: do_plan_mupdate_override, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b +INFO performed noop zone image source checks on sled, sled_id: 2eb69596-f081-4e2d-9425-9994926e0832, num_total: 16, num_already_artifact: 0, num_eligible: 16, num_ineligible: 0 +INFO BootPartitionDetails inventory hash not found in TUF repo, ignoring for noop checks, sled_id: 2eb69596-f081-4e2d-9425-9994926e0832, slot: a, expected_hash: 0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a +INFO BootPartitionDetails inventory hash not found in TUF repo, ignoring for noop checks, sled_id: 2eb69596-f081-4e2d-9425-9994926e0832, slot: b, expected_hash: 0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b +INFO current sled measurements are in an unknown state, sled_id: 2eb69596-f081-4e2d-9425-9994926e0832 +INFO performed noop zone image source checks on sled, sled_id: 32d8d836-4d8a-4e54-8fa9-f31d79c42646, num_total: 15, num_already_artifact: 0, num_eligible: 15, num_ineligible: 0 +INFO BootPartitionDetails inventory hash not found in TUF repo, ignoring for noop checks, sled_id: 32d8d836-4d8a-4e54-8fa9-f31d79c42646, slot: a, expected_hash: 0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a0a +INFO BootPartitionDetails inventory hash not found in TUF repo, ignoring for noop checks, sled_id: 32d8d836-4d8a-4e54-8fa9-f31d79c42646, slot: b, expected_hash: 0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b +INFO current sled measurements are in an unknown state, sled_id: 32d8d836-4d8a-4e54-8fa9-f31d79c42646 +INFO skipped noop image source check on sled, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b, reason: sled not found in inventory +generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a +blueprint source: planner with report: +planning report: +* noop converting 16/16 install-dataset zones to artifact store on sled 2eb69596-f081-4e2d-9425-9994926e0832 +* noop converting 15/15 install-dataset zones to artifact store on sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 +* zone adds waiting on blockers +* zone adds and updates are blocked: + - sleds have deployment units with image sources not set to Artifact: + - sled 89d02b1b-478c-401a-8e28-7a26f74fa41b: 15 zones + +* zone updates waiting on zone add blockers +* waiting to update top-level nexus_generation: some non-Nexus zone are not yet updated +Measurement updates: +Waiting on zone add/update blockers + + + + +> # Run blippy, which will produce a warning. +> blueprint-blippy latest +blippy report for blueprint 86db3308-f817-4626-8838-4085949a6a41: 1 note + BACKCOMPAT note: blueprint: at Nexus generation 1, found zones with different image sources: + - install dataset: + - zone a67ac9b3-427b-4ea6-a891-1c76a22720f5 on sled 89d02b1b-478c-401a-8e28-7a26f74fa41b + - artifact: version 0.0.1 (hash: 5f0b97b090966bb754485c3d397d0918d54bf4ffdc6fa691b77f61686f2ac8cc): + - zone e246f5e3-0650-4afc-860f-ee7114d309c5 on sled 2eb69596-f081-4e2d-9425-9994926e0832 + - zone 6c2a57b0-2de0-4409-a6b9-c9aa5614eefa on sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 + + diff --git a/nexus/reconfigurator/blippy/src/blippy.rs b/nexus/reconfigurator/blippy/src/blippy.rs index 0aa39014fb4..abff71cdb47 100644 --- a/nexus/reconfigurator/blippy/src/blippy.rs +++ b/nexus/reconfigurator/blippy/src/blippy.rs @@ -10,6 +10,7 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintArtifactVersion; use nexus_types::deployment::BlueprintDatasetConfig; use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneImageSource; use nexus_types::deployment::OmicronZoneExternalIp; use nexus_types::deployment::OmicronZoneNicEntry; use nexus_types::deployment::PlanningInput; @@ -25,6 +26,7 @@ use omicron_uuid_kinds::MupdateOverrideUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; +use std::collections::BTreeMap; use std::collections::BTreeSet; use std::net::IpAddr; use std::net::Ipv6Addr; @@ -37,7 +39,7 @@ pub struct Note { pub kind: Kind, } -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum Severity { /// Indicates an issue with a blueprint that should be corrected by a future /// planning run. @@ -114,8 +116,20 @@ impl Kind { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum BlueprintKind { - /// No zones exist in the blueprint using the active Nexus generation + /// No zones exist in the blueprint using the active Nexus generation. NoZonesWithActiveNexusGeneration(Generation), + + /// Nexus zones at the same generation have two or more distinct image + /// sources. + /// + /// The map groups the zones at this generation by their image source. + NexusZoneGenerationImageSourceMismatch { + generation: Generation, + zones_by_source: BTreeMap< + BlueprintZoneImageSource, + BTreeSet<(SledUuid, OmicronZoneUuid)>, + >, + }, } impl fmt::Display for BlueprintKind { @@ -124,6 +138,28 @@ impl fmt::Display for BlueprintKind { BlueprintKind::NoZonesWithActiveNexusGeneration(r#gen) => { write!(f, "No zones with active nexus generation @ {gen}",) } + BlueprintKind::NexusZoneGenerationImageSourceMismatch { + generation, + zones_by_source, + } => { + write!( + f, + "at Nexus generation {generation}, found zones with \ + different image sources:", + )?; + for (source, zones) in zones_by_source { + // Display the source with the alternate format ({:#}) to + // show the artifact hash as well. + write!(f, "\n - {source:#}:")?; + for (sled_id, zone_id) in zones { + write!( + f, + "\n - zone {zone_id} on sled {sled_id}" + )?; + } + } + Ok(()) + } } } } @@ -246,12 +282,6 @@ pub enum SledKind { zone_generation: Generation, id: OmicronZoneUuid, }, - /// Nexus zones with the same generation have different image sources. - NexusZoneGenerationImageSourceMismatch { - zone1: BlueprintZoneConfig, - zone2: BlueprintZoneConfig, - generation: Generation, - }, } impl fmt::Display for SledKind { @@ -511,18 +541,6 @@ impl fmt::Display for SledKind { is too new relative to the active generation {active_generation}" ) } - SledKind::NexusZoneGenerationImageSourceMismatch { - zone1, - zone2, - generation, - } => { - write!( - f, - "Nexus zones {} and {} both have generation {generation} but \ - different image sources ({:?} vs {:?})", - zone1.id, zone2.id, zone1.image_source, zone2.image_source, - ) - } } } } diff --git a/nexus/reconfigurator/blippy/src/checks.rs b/nexus/reconfigurator/blippy/src/checks.rs index aad1f9b3c49..08aaba9ee8c 100644 --- a/nexus/reconfigurator/blippy/src/checks.rs +++ b/nexus/reconfigurator/blippy/src/checks.rs @@ -27,6 +27,7 @@ use omicron_common::api::external::Generation; use omicron_common::disk::DatasetKind; use omicron_common::disk::M2Slot; use omicron_uuid_kinds::MupdateOverrideUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use sled_agent_types::inventory::ZoneKind; @@ -693,72 +694,103 @@ fn check_mupdate_override_host_phase_2_contents( } fn check_nexus_generation_consistency(blippy: &mut Blippy<'_>) { - use std::collections::HashMap; - - // Map from generation -> (sled_id, image_source, zone) - let mut generation_info: HashMap< + let mut by_generation: BTreeMap< Generation, - Vec<(SledUuid, BlueprintZoneImageSource, &BlueprintZoneConfig)>, - > = HashMap::new(); - - // Collect all Nexus zones and their generations - for (sled_id, zone) in blippy.blueprint().in_service_zones() { - if let BlueprintZoneType::Nexus(nexus) = &zone.zone_type { - generation_info.entry(nexus.nexus_generation).or_default().push(( - sled_id, - zone.image_source.clone(), - zone, - )); - } + BTreeMap< + BlueprintZoneImageSource, + BTreeSet<(SledUuid, OmicronZoneUuid)>, + >, + > = BTreeMap::new(); + for (sled_id, zone, nexus) in blippy.blueprint().in_service_nexus_zones() { + by_generation + .entry(nexus.nexus_generation) + .or_default() + .entry(zone.image_source.clone()) + .or_default() + .insert((sled_id, zone.id)); } - // Check that the top-level Nexus generation is consistent with the images let active_gen = blippy.blueprint().nexus_generation; - if !generation_info.contains_key(&active_gen) { + if !by_generation.contains_key(&active_gen) { blippy.push_blueprint_note( Severity::Fatal, BlueprintKind::NoZonesWithActiveNexusGeneration(active_gen), ); return; - }; - - // Check each generation for image source consistency - for (generation, zones_with_gen) in &generation_info { - // Take the first zone as the reference - let (ref_sled_id, ref_image_source, ref_zone) = &zones_with_gen[0]; + } - if *generation > active_gen.next() { - blippy.push_sled_note( - *ref_sled_id, - Severity::Fatal, - SledKind::NexusZoneGenerationTooNew { - active_generation: active_gen, - zone_generation: *generation, - id: ref_zone.id, - }, - ); - } + for (generation, zones_by_source) in by_generation { + check_one_nexus_generation( + blippy, + generation, + active_gen, + zones_by_source, + ); + } +} - if zones_with_gen.len() < 2 { - // Only one zone with this generation, no consistency issue - continue; - } +fn check_one_nexus_generation( + blippy: &mut Blippy<'_>, + generation: Generation, + active_gen: Generation, + zones_by_source: BTreeMap< + BlueprintZoneImageSource, + BTreeSet<(SledUuid, OmicronZoneUuid)>, + >, +) { + if generation > active_gen.next() { + // Pick a stable representative zone for the note. zones_by_source + // is an ordered map so this is deterministic. + let (sled_id, zone_id) = *zones_by_source + .values() + .next() + .and_then(|zones| zones.iter().next()) + .expect("every source has at least one zone associated with it"); + blippy.push_sled_note( + sled_id, + Severity::Fatal, + SledKind::NexusZoneGenerationTooNew { + active_generation: active_gen, + zone_generation: generation, + id: zone_id, + }, + ); + } - // Compare all other zones to the reference - for (_sled_id, image_source, zone) in &zones_with_gen[1..] { - if image_source != ref_image_source { - blippy.push_sled_note( - *ref_sled_id, - Severity::Fatal, - SledKind::NexusZoneGenerationImageSourceMismatch { - zone1: (*ref_zone).clone(), - zone2: (*zone).clone(), - generation: *generation, - }, - ); - } - } + // One distinct image source means all zones agree. For the + // all-InstallDataset case, we can't tell from a blueprint whether the + // zones are running the same version, and rely on the operator to + // MUPdate the rack consistently. + if zones_by_source.len() < 2 { + return; } + + // Two or more Artifact entries means we know that multiple Nexus versions + // exist for the same generation. This is FATAL. + // + // The other mismatch case (exactly one Artifact entry, plus InstallDataset) + // is a transient state which can be seen immediately after a full-rack + // MUPdate, in case some sleds are missing from inventory. We treat this as + // BACKCOMPAT, not FATAL. + // + // XXX do we need another severity level for this? + let artifact_versions = zones_by_source + .keys() + .filter(|s| matches!(s, BlueprintZoneImageSource::Artifact { .. })) + .count(); + let severity = if artifact_versions >= 2 { + Severity::Fatal + } else { + Severity::BackwardsCompatibility + }; + + blippy.push_blueprint_note( + severity, + BlueprintKind::NexusZoneGenerationImageSourceMismatch { + generation, + zones_by_source, + }, + ); } #[cfg(test)] @@ -770,6 +802,7 @@ mod tests { use ipnet::IpAdd; use nexus_reconfigurator_planning::example::ExampleSystemBuilder; use nexus_reconfigurator_planning::example::example; + use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintArtifactVersion; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::blueprint_zone_type; @@ -2082,88 +2115,253 @@ mod tests { .nexus_count(3) .build(); - // Find the Nexus zones - let ((sled1, zone1_id), (sled2, zone2_id)) = { - let nexus_zones: Vec<_> = blueprint - .in_service_zones() - .filter_map(|(sled_id, zone)| { - if matches!(zone.zone_type, BlueprintZoneType::Nexus(_)) { - Some((sled_id, zone)) - } else { - None - } - }) - .collect(); + let [(sled1, zone1_id), (sled2, zone2_id), (sled3, zone3_id)] = + nexus_zone_ids(&blueprint); - // Should have exactly 3 Nexus zones - assert_eq!(nexus_zones.len(), 3); + let generation = Generation::new(); + let install_dataset = BlueprintZoneImageSource::InstallDataset; + let artifact_1_0_0 = BlueprintZoneImageSource::Artifact { + version: BlueprintArtifactVersion::Available { + version: "1.0.0".parse().unwrap(), + }, + hash: ArtifactHash([0; 32]), + }; - // Modify two zones to have the same generation but different image sources - let (sled1, zone1) = nexus_zones[0]; - let (sled2, zone2) = nexus_zones[1]; + set_nexus_image_source( + &mut blueprint, + sled1, + zone1_id, + generation, + install_dataset.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled2, + zone2_id, + generation, + artifact_1_0_0.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled3, + zone3_id, + generation, + install_dataset.clone(), + ); - ((sled1, zone1.id), (sled2, zone2.id)) - }; + let expected_notes = [Note { + // One artifact version + install dataset is a non-fatal note. + severity: Severity::BackwardsCompatibility, + kind: Kind::Blueprint( + BlueprintKind::NexusZoneGenerationImageSourceMismatch { + generation, + zones_by_source: BTreeMap::from([ + ( + install_dataset, + BTreeSet::from([ + (sled1, zone1_id), + (sled3, zone3_id), + ]), + ), + (artifact_1_0_0, BTreeSet::from([(sled2, zone2_id)])), + ]), + }, + ), + }]; - let generation = Generation::new(); + let report = Blippy::new_blueprint_only(&blueprint) + .into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + assert_eq!(report.notes(), &expected_notes); - let zone1 = { - // Find the zones in the blueprint and modify them - let mut zone1_config = blueprint - .sleds - .get_mut(&sled1) - .unwrap() - .zones - .get_mut(&zone1_id) - .unwrap(); + logctx.cleanup_successful(); + } - match &mut zone1_config.zone_type { - BlueprintZoneType::Nexus(nexus) => { - nexus.nexus_generation = generation; - } - _ => unreachable!("this is a Nexus zone"), - } - zone1_config.image_source = - BlueprintZoneImageSource::InstallDataset; - zone1_config.clone() + #[test] + fn test_nexus_generation_image_consistency_multiple_artifact_versions() { + static TEST_NAME: &str = "test_nexus_generation_image_consistency_multiple_artifact_versions"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .nsleds(3) + .nexus_count(3) + .build(); + + let [ + (sled_install, zone_install_id), + (sled_v1, zone_v1_id), + (sled_v2, zone_v2_id), + ] = nexus_zone_ids(&blueprint); + + let generation = Generation::new(); + + let install_dataset = BlueprintZoneImageSource::InstallDataset; + let artifact_v1 = BlueprintZoneImageSource::Artifact { + version: BlueprintArtifactVersion::Available { + version: "1.0.0".parse().unwrap(), + }, + hash: ArtifactHash([0x11; 32]), + }; + let artifact_v2 = BlueprintZoneImageSource::Artifact { + version: BlueprintArtifactVersion::Available { + version: "2.0.0".parse().unwrap(), + }, + hash: ArtifactHash([0x22; 32]), }; - let zone2 = { - let mut zone2_config = blueprint - .sleds - .get_mut(&sled2) - .unwrap() - .zones - .get_mut(&zone2_id) - .unwrap(); + set_nexus_image_source( + &mut blueprint, + sled_install, + zone_install_id, + generation, + install_dataset.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled_v1, + zone_v1_id, + generation, + artifact_v1.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled_v2, + zone_v2_id, + generation, + artifact_v2.clone(), + ); - match &mut zone2_config.zone_type { - BlueprintZoneType::Nexus(nexus) => { - nexus.nexus_generation = generation; - } - _ => unreachable!("this is a Nexus zone"), - } - zone2_config.image_source = BlueprintZoneImageSource::Artifact { - version: BlueprintArtifactVersion::Available { - version: "1.0.0".parse().unwrap(), + let expected_notes = [Note { + // More than one artifact version is a fatal note. + severity: Severity::Fatal, + kind: Kind::Blueprint( + BlueprintKind::NexusZoneGenerationImageSourceMismatch { + generation, + zones_by_source: BTreeMap::from([ + ( + install_dataset, + BTreeSet::from([(sled_install, zone_install_id)]), + ), + (artifact_v1, BTreeSet::from([(sled_v1, zone_v1_id)])), + (artifact_v2, BTreeSet::from([(sled_v2, zone_v2_id)])), + ]), }, - hash: ArtifactHash([0; 32]), - }; - zone2_config.clone() - }; + ), + }]; + + let report = Blippy::new_blueprint_only(&blueprint) + .into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + assert_eq!(report.notes(), &expected_notes); + + logctx.cleanup_successful(); + } + + // A zone whose Nexus generation equals `active_gen.next()` is on the + // boundary of the `generation > active_gen.next()` check in + // `check_one_nexus_generation`, and must NOT trigger + // `NexusZoneGenerationTooNew`. + #[test] + fn test_nexus_generation_at_active_next_is_not_too_new() { + static TEST_NAME: &str = + "test_nexus_generation_at_active_next_is_not_too_new"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .nsleds(3) + .nexus_count(3) + .build(); + + let [(sled1, zone1_id), (sled2, zone2_id), (sled3, zone3_id)] = + nexus_zone_ids(&blueprint); + + let active_gen = blueprint.nexus_generation; + let install_dataset = BlueprintZoneImageSource::InstallDataset; + + // Two zones at the active generation, one at active_gen.next(). + set_nexus_image_source( + &mut blueprint, + sled1, + zone1_id, + active_gen, + install_dataset.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled2, + zone2_id, + active_gen, + install_dataset.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled3, + zone3_id, + active_gen.next(), + install_dataset, + ); + + let report = Blippy::new_blueprint_only(&blueprint) + .into_report(BlippyReportSortKey::Kind); + eprintln!("{}", report.display()); + assert_eq!(report.notes(), &[]); + + logctx.cleanup_successful(); + } + + // A zone whose Nexus generation is strictly greater than + // `active_gen.next()` triggers a Fatal `NexusZoneGenerationTooNew` note. + #[test] + fn test_nexus_generation_above_active_next_is_too_new() { + static TEST_NAME: &str = + "test_nexus_generation_above_active_next_is_too_new"; + let logctx = test_setup_log(TEST_NAME); + let (_, mut blueprint) = + ExampleSystemBuilder::new(&logctx.log, TEST_NAME) + .nsleds(3) + .nexus_count(3) + .build(); + + let [(sled1, zone1_id), (sled2, zone2_id), (sled3, zone3_id)] = + nexus_zone_ids(&blueprint); + + let active_gen = blueprint.nexus_generation; + let too_new_gen = active_gen.next().next(); + let install_dataset = BlueprintZoneImageSource::InstallDataset; + + // Two zones at the active generation, one at active_gen.next().next() + // (i.e., strictly greater than active_gen.next()). + set_nexus_image_source( + &mut blueprint, + sled1, + zone1_id, + active_gen, + install_dataset.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled2, + zone2_id, + active_gen, + install_dataset.clone(), + ); + set_nexus_image_source( + &mut blueprint, + sled3, + zone3_id, + too_new_gen, + install_dataset, + ); - // Run blippy checks let expected_notes = [Note { severity: Severity::Fatal, kind: Kind::Sled { - sled_id: sled1, - kind: Box::new( - SledKind::NexusZoneGenerationImageSourceMismatch { - zone1, - zone2, - generation, - }, - ), + sled_id: sled3, + kind: Box::new(SledKind::NexusZoneGenerationTooNew { + active_generation: active_gen, + zone_generation: too_new_gen, + id: zone3_id, + }), }, }]; @@ -2174,6 +2372,39 @@ mod tests { logctx.cleanup_successful(); } + + fn nexus_zone_ids( + blueprint: &Blueprint, + ) -> [(SledUuid, OmicronZoneUuid); N] { + let zones: Vec<_> = blueprint + .in_service_nexus_zones() + .map(|(sled_id, zone, _nexus)| (sled_id, zone.id)) + .collect(); + zones.try_into().unwrap_or_else(|v: Vec<_>| { + panic!("expected {N} Nexus zones, found {}", v.len()) + }) + } + + fn set_nexus_image_source( + blueprint: &mut Blueprint, + sled_id: SledUuid, + zone_id: OmicronZoneUuid, + generation: Generation, + image_source: BlueprintZoneImageSource, + ) { + let mut cfg = blueprint + .sleds + .get_mut(&sled_id) + .expect("sled is in the blueprint") + .zones + .get_mut(&zone_id) + .expect("zone is on the sled"); + let BlueprintZoneType::Nexus(nexus) = &mut cfg.zone_type else { + panic!("zone {zone_id} is a Nexus zone"); + }; + nexus.nexus_generation = generation; + cfg.image_source = image_source; + } } // For a given `PlanningInput` / `Blueprint` pair that could be passed to the diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index a20702af8ea..969e9029707 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -2073,8 +2073,15 @@ impl fmt::Display for BlueprintZoneImageSource { BlueprintZoneImageSource::InstallDataset => { write!(f, "install dataset") } - BlueprintZoneImageSource::Artifact { version, hash: _ } => { - write!(f, "artifact: {version}") + BlueprintZoneImageSource::Artifact { version, hash } => { + // Most callers don't care about the hash, so don't show it by + // default. But the hash is sometimes useful. Callers can opt + // into it by using the alternate display mode (`{:#}`). + write!(f, "artifact: {version}")?; + if f.alternate() { + write!(f, " (hash: {hash})")?; + } + Ok(()) } } }