diff --git a/dev-tools/omdb/src/bin/omdb/nexus/reconfigurator_config.rs b/dev-tools/omdb/src/bin/omdb/nexus/reconfigurator_config.rs index b293250baf8..095c72d2986 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus/reconfigurator_config.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus/reconfigurator_config.rs @@ -9,12 +9,14 @@ use crate::check_allow_destructive::DestructiveOperationToken; use clap::ArgAction; use clap::Args; use clap::Subcommand; +use clap::ValueEnum; use daft::Diffable; use http::StatusCode; use indent_write::io::IndentWriter; use nexus_types::deployment::PlannerConfig; use nexus_types::deployment::ReconfiguratorConfig; use nexus_types::deployment::ReconfiguratorConfigParam; +use nexus_types::deployment::ReconfiguratorDisruptionPolicy; use std::io; use std::io::Write; use std::num::ParseIntError; @@ -55,6 +57,9 @@ pub struct ReconfiguratorConfigOpts { #[clap(long, action = ArgAction::Set)] tuf_repo_pruner_enabled: Option, + + #[clap(long)] + disruption_policy: Option, } impl ReconfiguratorConfigOpts { @@ -75,6 +80,10 @@ impl ReconfiguratorConfigOpts { tuf_repo_pruner_enabled: self .tuf_repo_pruner_enabled .unwrap_or(current.tuf_repo_pruner_enabled), + disruption_policy: self + .disruption_policy + .map(|p| p.into()) + .unwrap_or(current.disruption_policy), } } @@ -93,6 +102,27 @@ impl ReconfiguratorConfigOpts { } } +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum ReconfiguratorDisruptionPolicyOpt { + Terminate, + MigrateOrTerminate, + MigrateOnly, +} + +impl From + for ReconfiguratorDisruptionPolicy +{ + fn from(value: ReconfiguratorDisruptionPolicyOpt) -> Self { + match value { + ReconfiguratorDisruptionPolicyOpt::Terminate => Self::Terminate, + ReconfiguratorDisruptionPolicyOpt::MigrateOrTerminate => { + Self::MigrateOrTerminate + } + ReconfiguratorDisruptionPolicyOpt::MigrateOnly => Self::MigrateOnly, + } + } +} + #[derive(Debug, Clone, Copy, Args)] pub struct ReconfiguratorConfigShowArgs { version: ReconfiguratorConfigVersionOrCurrent, diff --git a/dev-tools/omdb/src/bin/omdb/reconfigurator.rs b/dev-tools/omdb/src/bin/omdb/reconfigurator.rs index cc2b267d1ad..7aaf1ba7f94 100644 --- a/dev-tools/omdb/src/bin/omdb/reconfigurator.rs +++ b/dev-tools/omdb/src/bin/omdb/reconfigurator.rs @@ -422,6 +422,7 @@ async fn cmd_reconfigurator_config_history( planner_enabled: String, add_zones_with_mupdate_override: String, tuf_repo_pruner_enabled: String, + disruption_policy: String, time_modified: String, } @@ -436,6 +437,7 @@ async fn cmd_reconfigurator_config_history( planner_config: PlannerConfig { add_zones_with_mupdate_override }, tuf_repo_pruner_enabled, + disruption_policy, }, time_modified, } = s; @@ -445,6 +447,7 @@ async fn cmd_reconfigurator_config_history( add_zones_with_mupdate_override: add_zones_with_mupdate_override.to_string(), tuf_repo_pruner_enabled: tuf_repo_pruner_enabled.to_string(), + disruption_policy: disruption_policy.to_string(), time_modified: time_modified.to_string(), } }) diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index c877645a239..4fe59b68929 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -1116,6 +1116,7 @@ mod test { use super::*; use nexus_types::deployment::PlannerConfig; + use nexus_types::deployment::ReconfiguratorDisruptionPolicy; use omicron_common::address::{ CLICKHOUSE_TCP_PORT, Ipv6Subnet, RACK_PREFIX, }; @@ -1257,6 +1258,7 @@ mod test { planner_enabled = true planner_config.add_zones_with_mupdate_override = true tuf_repo_pruner_enabled = false + disruption_policy = "terminate" [background_tasks] dns_internal.period_secs_config = 1 dns_internal.period_secs_servers = 2 @@ -1431,6 +1433,7 @@ mod test { add_zones_with_mupdate_override: true, }, tuf_repo_pruner_enabled: false, + disruption_policy: ReconfiguratorDisruptionPolicy::Terminate, }), background_tasks: BackgroundTaskConfig { dns_internal: DnsTasksConfig { diff --git a/nexus/db-model/src/reconfigurator_config.rs b/nexus/db-model/src/reconfigurator_config.rs index 801bdb41369..31ebb1cbd94 100644 --- a/nexus/db-model/src/reconfigurator_config.rs +++ b/nexus/db-model/src/reconfigurator_config.rs @@ -4,7 +4,7 @@ //! Types representing runtime configuration for reconfigurator -use crate::SqlU32; +use crate::{SqlU32, impl_enum_type}; use chrono::{DateTime, Utc}; use nexus_db_schema::schema::reconfigurator_config; use nexus_types::deployment; @@ -17,6 +17,7 @@ pub struct ReconfiguratorConfig { pub time_modified: DateTime, pub add_zones_with_mupdate_override: bool, pub tuf_repo_pruner_enabled: bool, + pub disruption_policy: DbReconfiguratorDisruptionPolicy, } impl From for ReconfiguratorConfig { @@ -30,6 +31,7 @@ impl From for ReconfiguratorConfig { .planner_config .add_zones_with_mupdate_override, tuf_repo_pruner_enabled: value.config.tuf_repo_pruner_enabled, + disruption_policy: value.config.disruption_policy.into(), } } } @@ -45,8 +47,63 @@ impl From for deployment::ReconfiguratorConfigView { .add_zones_with_mupdate_override, }, tuf_repo_pruner_enabled: value.tuf_repo_pruner_enabled, + disruption_policy: value.disruption_policy.into(), }, time_modified: value.time_modified, } } } + +impl_enum_type!( + ReconfiguratorDisruptionPolicyEnum: + + #[derive( + Copy, + Clone, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + )] + pub enum DbReconfiguratorDisruptionPolicy; + + Terminate => b"terminate" + MigrateOrTerminate => b"migrate_or_terminate" + MigrateOnly => b"migrate_only" +); + +impl From + for deployment::ReconfiguratorDisruptionPolicy +{ + fn from(value: DbReconfiguratorDisruptionPolicy) -> Self { + match value { + DbReconfiguratorDisruptionPolicy::Terminate => { + deployment::ReconfiguratorDisruptionPolicy::Terminate + } + DbReconfiguratorDisruptionPolicy::MigrateOrTerminate => { + deployment::ReconfiguratorDisruptionPolicy::MigrateOrTerminate + } + DbReconfiguratorDisruptionPolicy::MigrateOnly => { + deployment::ReconfiguratorDisruptionPolicy::MigrateOnly + } + } + } +} + +impl From + for DbReconfiguratorDisruptionPolicy +{ + fn from(value: deployment::ReconfiguratorDisruptionPolicy) -> Self { + match value { + deployment::ReconfiguratorDisruptionPolicy::Terminate => { + DbReconfiguratorDisruptionPolicy::Terminate + } + deployment::ReconfiguratorDisruptionPolicy::MigrateOrTerminate => { + DbReconfiguratorDisruptionPolicy::MigrateOrTerminate + } + deployment::ReconfiguratorDisruptionPolicy::MigrateOnly => { + DbReconfiguratorDisruptionPolicy::MigrateOnly + } + } + } +} diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index c927e8cfc64..f871fd2df02 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(253, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(254, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(254, "add-disruption-policy"), KnownVersion::new(253, "delete-nexus-default-allow-firewall-rule"), KnownVersion::new(252, "fm-support-bundle-and-alert-request-comments"), KnownVersion::new(251, "fm-sitrep-next-inv-min-time-started"), diff --git a/nexus/db-queries/src/db/datastore/reconfigurator_config.rs b/nexus/db-queries/src/db/datastore/reconfigurator_config.rs index df34d2ce883..3b7a4814112 100644 --- a/nexus/db-queries/src/db/datastore/reconfigurator_config.rs +++ b/nexus/db-queries/src/db/datastore/reconfigurator_config.rs @@ -18,8 +18,10 @@ use diesel::sql_types; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_lookup::DbConnection; +use nexus_db_model::DbReconfiguratorDisruptionPolicy; use nexus_db_model::ReconfiguratorConfig as DbReconfiguratorConfig; use nexus_db_model::SqlU32; +use nexus_db_schema::enums::ReconfiguratorDisruptionPolicyEnum; use nexus_types::deployment::PlannerConfig; use nexus_types::deployment::ReconfiguratorConfig; use nexus_types::deployment::ReconfiguratorConfigParam; @@ -157,6 +159,7 @@ impl DataStore { planner_config: PlannerConfig { add_zones_with_mupdate_override }, tuf_repo_pruner_enabled, + disruption_policy, }, time_modified, } = *switches; @@ -164,8 +167,9 @@ impl DataStore { sql_query( r"INSERT INTO reconfigurator_config (version, planner_enabled, time_modified, - add_zones_with_mupdate_override, tuf_repo_pruner_enabled) - SELECT $1, $2, $3, $4, $5 + add_zones_with_mupdate_override, tuf_repo_pruner_enabled, + disruption_policy) + SELECT $1, $2, $3, $4, $5, $6 WHERE $1 - 1 IN ( SELECT COALESCE(MAX(version), 0) FROM reconfigurator_config @@ -176,6 +180,9 @@ impl DataStore { .bind::(time_modified) .bind::(add_zones_with_mupdate_override) .bind::(tuf_repo_pruner_enabled) + .bind::( + DbReconfiguratorDisruptionPolicy::from(disruption_policy), + ) .execute_async(conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) @@ -185,7 +192,9 @@ impl DataStore { mod tests { use super::*; use crate::db::pub_test_utils::TestDatabase; - use nexus_types::deployment::{PlannerConfig, ReconfiguratorConfig}; + use nexus_types::deployment::{ + PlannerConfig, ReconfiguratorConfig, ReconfiguratorDisruptionPolicy, + }; use omicron_test_utils::dev; #[tokio::test] @@ -211,6 +220,7 @@ mod tests { planner_enabled: false, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: true, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), }, }; diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index d2288443874..5f178309f87 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -88,6 +88,7 @@ define_enums! { PhysicalDiskStateEnum => "physical_disk_state", ProducerKindEnum => "producer_kind", ReadOnlyTargetReplacementTypeEnum => "read_only_target_replacement_type", + ReconfiguratorDisruptionPolicyEnum => "reconfigurator_disruption_policy", RegionReplacementStateEnum => "region_replacement_state", RegionReplacementStepTypeEnum => "region_replacement_step_type", RegionReservationPercentEnum => "region_reservation_percent", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 0a66531be52..3fbc9f01c12 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2150,6 +2150,7 @@ table! { time_modified -> Timestamptz, add_zones_with_mupdate_override -> Bool, tuf_repo_pruner_enabled -> Bool, + disruption_policy -> crate::enums::ReconfiguratorDisruptionPolicyEnum, } } diff --git a/nexus/src/app/background/tasks/reconfigurator_config.rs b/nexus/src/app/background/tasks/reconfigurator_config.rs index d6a4ee8c40f..902785775e1 100644 --- a/nexus/src/app/background/tasks/reconfigurator_config.rs +++ b/nexus/src/app/background/tasks/reconfigurator_config.rs @@ -91,6 +91,7 @@ mod test { use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ PlannerConfig, ReconfiguratorConfig, ReconfiguratorConfigParam, + ReconfiguratorDisruptionPolicy, }; use nexus_types::internal_api::background::BlueprintPlannerStatus; use nexus_types::internal_api::background::TufRepoPrunerStatus; @@ -150,6 +151,7 @@ mod test { planner_enabled: !default_switches.config.planner_enabled, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: true, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), }; let switches = ReconfiguratorConfigParam { version: 1, config: expected_switches }; @@ -181,6 +183,7 @@ mod test { planner_enabled: !expected_switches.planner_enabled, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: true, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), }; let switches = ReconfiguratorConfigParam { version: 2, config: expected_switches }; @@ -236,6 +239,7 @@ mod test { planner_enabled: false, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: false, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), }; let switches = ReconfiguratorConfigParam { version: initial_config_version + 1, @@ -270,6 +274,7 @@ mod test { planner_enabled: true, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: true, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), }; let switches = ReconfiguratorConfigParam { version: initial_config_version + 2, diff --git a/nexus/test-utils/src/starter.rs b/nexus/test-utils/src/starter.rs index d05e24f10a2..1d29a0e28a5 100644 --- a/nexus/test-utils/src/starter.rs +++ b/nexus/test-utils/src/starter.rs @@ -56,6 +56,7 @@ use nexus_types::deployment::OximeterReadMode; use nexus_types::deployment::PendingMgsUpdates; use nexus_types::deployment::PlannerConfig; use nexus_types::deployment::ReconfiguratorConfig; +use nexus_types::deployment::ReconfiguratorDisruptionPolicy; use nexus_types::deployment::blueprint_zone_type; use nexus_types::external_api::sled::SledState; use nexus_types::internal_api::params::DnsConfigParams; @@ -541,6 +542,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { planner_enabled: false, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: true, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), }); self.config.deployment.internal_dns = InternalDns::FromAddress { address: self diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index a20702af8ea..6423b746104 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -162,6 +162,7 @@ pub use reconfigurator_config::ReconfiguratorConfigDisplay; pub use reconfigurator_config::ReconfiguratorConfigParam; pub use reconfigurator_config::ReconfiguratorConfigView; pub use reconfigurator_config::ReconfiguratorConfigViewDisplay; +pub use reconfigurator_config::ReconfiguratorDisruptionPolicy; use sled_hardware_types::BaseboardId; pub use zone_type::BlueprintZoneType; pub use zone_type::DurableDataset; diff --git a/nexus/types/src/deployment/reconfigurator_config.rs b/nexus/types/src/deployment/reconfigurator_config.rs index 41f461bc2e8..52c4fca30da 100644 --- a/nexus/types/src/deployment/reconfigurator_config.rs +++ b/nexus/types/src/deployment/reconfigurator_config.rs @@ -118,6 +118,7 @@ pub struct ReconfiguratorConfig { pub planner_enabled: bool, pub planner_config: PlannerConfig, pub tuf_repo_pruner_enabled: bool, + pub disruption_policy: ReconfiguratorDisruptionPolicy, } impl ReconfiguratorConfig { @@ -132,6 +133,49 @@ impl Default for ReconfiguratorConfig { planner_enabled: true, planner_config: PlannerConfig::default(), tuf_repo_pruner_enabled: true, + disruption_policy: ReconfiguratorDisruptionPolicy::default(), + } + } +} + +/// Controls how instances are disrupted during updates. +#[derive( + Clone, + Copy, + Debug, + Default, + Diffable, + PartialEq, + Eq, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum ReconfiguratorDisruptionPolicy { + /// Terminate instances during updates -- do not attempt to migrate + /// instances. This is currently the default. + #[default] + Terminate, + + /// Attempt to live-migrate instances, and terminate instances if migration + /// is not possible (XXX due to???). + MigrateOrTerminate, + + /// Live-migrate (XXX migratable?) instances and don't ever terminate them. + MigrateOnly, +} + +impl fmt::Display for ReconfiguratorDisruptionPolicy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ReconfiguratorDisruptionPolicy::Terminate => write!(f, "terminate"), + ReconfiguratorDisruptionPolicy::MigrateOrTerminate => { + write!(f, "live-migrate or terminate") + } + ReconfiguratorDisruptionPolicy::MigrateOnly => { + write!(f, "live-migrate only") + } } } } @@ -149,9 +193,11 @@ impl fmt::Display for ReconfiguratorConfigDisplay<'_> { planner_enabled, planner_config, tuf_repo_pruner_enabled, + disruption_policy, }, } = self; writeln!(f, "tuf repo pruner enabled: {}", tuf_repo_pruner_enabled)?; + writeln!(f, "disruption policy: {}", disruption_policy)?; writeln!(f, "planner enabled: {}", planner_enabled)?; writeln!(f, "planner config:")?; // planner_config does its own indentation, so it's not necessary to @@ -179,6 +225,7 @@ impl fmt::Display for ReconfiguratorConfigDiffDisplay<'_, '_> { planner_enabled, planner_config, tuf_repo_pruner_enabled, + disruption_policy, } = self.diff; let list = KvList::new( @@ -186,6 +233,7 @@ impl fmt::Display for ReconfiguratorConfigDiffDisplay<'_, '_> { vec![ diff_row!(tuf_repo_pruner_enabled, "tuf repo pruner enabled"), diff_row!(planner_enabled, "planner enabled"), + diff_row!(disruption_policy, "disruption policy"), ], ); // No need for writeln! here because KvList adds its own newlines. diff --git a/schema/crdb/add-disruption-policy/up1.sql b/schema/crdb/add-disruption-policy/up1.sql new file mode 100644 index 00000000000..08076cefabb --- /dev/null +++ b/schema/crdb/add-disruption-policy/up1.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS +omicron.public.reconfigurator_disruption_policy AS ENUM ( + 'terminate', + 'migrate_or_terminate', + 'migrate_only' +); diff --git a/schema/crdb/add-disruption-policy/up2.sql b/schema/crdb/add-disruption-policy/up2.sql new file mode 100644 index 00000000000..f3b5dd14165 --- /dev/null +++ b/schema/crdb/add-disruption-policy/up2.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.reconfigurator_config + ADD COLUMN IF NOT EXISTS disruption_policy + omicron.public.reconfigurator_disruption_policy + NOT NULL DEFAULT 'terminate'; diff --git a/schema/crdb/add-disruption-policy/up3.sql b/schema/crdb/add-disruption-policy/up3.sql new file mode 100644 index 00000000000..3b2b85db0ad --- /dev/null +++ b/schema/crdb/add-disruption-policy/up3.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.reconfigurator_config + ALTER COLUMN disruption_policy DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 42bad504270..10616aa0798 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -5140,6 +5140,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_svc_enabled_not_online_parse_error * * See https://github.com/oxidecomputer/omicron/issues/8253 for more details. */ +CREATE TYPE IF NOT EXISTS omicron.public.reconfigurator_disruption_policy AS ENUM ( + 'terminate', + 'migrate_or_terminate', + 'migrate_only' +); + CREATE TABLE IF NOT EXISTS omicron.public.reconfigurator_config ( -- Monotonically increasing version for all bp_targets version INT8 PRIMARY KEY, @@ -5154,7 +5160,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.reconfigurator_config ( add_zones_with_mupdate_override BOOL NOT NULL, -- Enable the TUF repo pruner background task - tuf_repo_pruner_enabled BOOL NOT NULL + tuf_repo_pruner_enabled BOOL NOT NULL, + + -- How to disrupt instances during updates. + disruption_policy omicron.public.reconfigurator_disruption_policy NOT NULL ); /* @@ -8475,7 +8484,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '253.0.0', NULL) + (TRUE, NOW(), NOW(), '254.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;