diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index 7a8b56d499d..466a8883918 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -13,6 +13,7 @@ pub use ddm_admin_client::types; use ddm_admin_client::Client as InnerClient; use either::Either; +use omicron_common::address::DDMD_PORT; use oxnet::Ipv6Net; use sled_hardware_types::underlay::BOOTSTRAP_MASK; use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; @@ -26,9 +27,6 @@ use thiserror::Error; use crate::types::EnableStatsRequest; -// TODO-cleanup Is it okay to hardcode this port number here? -const DDMD_PORT: u16 = 8000; - #[derive(Debug, Error, SlogInlineError)] pub enum DdmError { #[error("Failed to construct an HTTP client:")] diff --git a/internal-dns/types/src/config.rs b/internal-dns/types/src/config.rs index d5bef144343..f6b04753a77 100644 --- a/internal-dns/types/src/config.rs +++ b/internal-dns/types/src/config.rs @@ -399,6 +399,7 @@ impl DnsConfigBuilder { dendrite_port: u16, mgs_port: u16, mgd_port: u16, + ddm_port: u16, ) -> anyhow::Result<()> { let zone = self.host_dendrite(sled_id, switch_zone_ip)?; self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?; @@ -407,7 +408,8 @@ impl DnsConfigBuilder { &zone, mgs_port, )?; - self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port) + self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)?; + self.service_backend_zone(ServiceName::Ddm, &zone, ddm_port) } /// Higher-level shorthand for adding a Nexus zone with both its internal @@ -731,7 +733,7 @@ impl DnsConfigBuilder { #[cfg(test)] mod test { - use super::{DnsConfigBuilder, Host, ServiceName}; + use super::{DnsConfigBuilder, DnsRecord, Host, ServiceName}; use crate::{config::Zone, names::DNS_ZONE}; use omicron_common::api::external::Generation; use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid}; @@ -779,6 +781,8 @@ mod test { "_oximeter-reader._tcp", ); assert_eq!(ServiceName::Dendrite.dns_name(), "_dendrite._tcp",); + assert_eq!(ServiceName::Mgd.dns_name(), "_mgd._tcp",); + assert_eq!(ServiceName::Ddm.dns_name(), "_ddm._tcp",); assert_eq!( ServiceName::CruciblePantry.dns_name(), "_crucible-pantry._tcp", @@ -796,6 +800,69 @@ mod test { ); } + #[test] + fn host_zone_switch_publishes_all_services() { + let sled_uuid: SledUuid = + "001de000-51ed-4000-8000-000000000001".parse().unwrap(); + let switch_zone_ip = Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1); + + // Use distinct port numbers so an arg-order swap in `host_zone_switch` + // surfaces as a port mismatch on the affected service. + let dendrite_port = 11; + let mgs_port = 13; + let mgd_port = 17; + let ddm_port = 19; + + let mut builder = DnsConfigBuilder::new(); + builder + .host_zone_switch( + sled_uuid, + switch_zone_ip, + dendrite_port, + mgs_port, + mgd_port, + ddm_port, + ) + .unwrap(); + + let config = builder.build_full_config_for_initial_generation(); + + let mut by_name: BTreeMap<&str, &[DnsRecord]> = BTreeMap::new(); + for zone in &config.zones { + for (name, records) in &zone.records { + by_name.insert(name.as_str(), records.as_slice()); + } + } + + for (expected_name, expected_port) in [ + ("_dendrite._tcp", dendrite_port), + ("_mgs._tcp", mgs_port), + ("_mgd._tcp", mgd_port), + ("_ddm._tcp", ddm_port), + ] { + let records = by_name.get(expected_name).unwrap_or_else(|| { + panic!( + "expected {expected_name} in published switch-zone \ + services; got {by_name:?}" + ) + }); + let srv_port = records + .iter() + .find_map(|r| match r { + DnsRecord::Srv(s) => Some(s.port), + _ => None, + }) + .unwrap_or_else(|| { + panic!("no SRV record for {expected_name}: {records:?}") + }); + + assert_eq!( + srv_port, expected_port, + "wrong SRV port for {expected_name}" + ); + } + } + #[test] fn display_hosts() { let sled_uuid = SledUuid::nil(); diff --git a/internal-dns/types/src/names.rs b/internal-dns/types/src/names.rs index 73b2439e48e..105d0222f3c 100644 --- a/internal-dns/types/src/names.rs +++ b/internal-dns/types/src/names.rs @@ -75,6 +75,7 @@ pub enum ServiceName { BoundaryNtp, InternalNtp, Mgd, + Ddm, } impl ServiceName { @@ -116,6 +117,7 @@ impl ServiceName { ServiceName::BoundaryNtp => "boundary-ntp", ServiceName::InternalNtp => "internal-ntp", ServiceName::Mgd => "mgd", + ServiceName::Ddm => "ddm", } } @@ -144,7 +146,8 @@ impl ServiceName { | ServiceName::CruciblePantry | ServiceName::BoundaryNtp | ServiceName::InternalNtp - | ServiceName::Mgd => { + | ServiceName::Mgd + | ServiceName::Ddm => { format!("_{}._tcp", self.service_kind()) } ServiceName::SledAgent(id) => { diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 685c7c85e6f..0a85c4dd114 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -988,9 +988,8 @@ mod test { // the previous pass (i.e., that corresponds to an Omicron zone). // // There are some ServiceNames missing here because they are not part of - // our representative config (e.g., ClickhouseKeeper) or they don't - // currently have DNS record at all (e.g., SledAgent, Maghemite, Mgd, - // Tfport). + // our representative config (e.g., ClickhouseKeeper) or because they + // do not currently have a DNS record at all (e.g., SledAgent). let mut srv_kinds_expected = BTreeSet::from([ ServiceName::Clickhouse, ServiceName::ClickhouseNative, @@ -1001,6 +1000,8 @@ mod test { ServiceName::NexusLockstep, ServiceName::Oximeter, ServiceName::Dendrite, + ServiceName::Mgd, + ServiceName::Ddm, ServiceName::CruciblePantry, ServiceName::BoundaryNtp, ServiceName::InternalNtp, diff --git a/nexus/reconfigurator/execution/src/test_utils.rs b/nexus/reconfigurator/execution/src/test_utils.rs index cd46adacd0b..fdb17289225 100644 --- a/nexus/reconfigurator/execution/src/test_utils.rs +++ b/nexus/reconfigurator/execution/src/test_utils.rs @@ -113,10 +113,12 @@ pub fn overridables_for_test( let dendrite_port = cptestctx.dendrite.read().unwrap().get(&switch_slot).unwrap().port; let mgd_port = cptestctx.mgd.get(&switch_slot).unwrap().port; + let ddm_port = cptestctx.ddm.get(&switch_slot).unwrap().port; overrides.override_switch_zone_ip(sled_id, ip); overrides.override_dendrite_port(sled_id, dendrite_port); overrides.override_mgs_port(sled_id, mgs_port); overrides.override_mgd_port(sled_id, mgd_port); + overrides.override_ddm_port(sled_id, ddm_port); } overrides } diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index a1f865e2934..7dbbf3640dc 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -1854,7 +1854,8 @@ mod tests { | ServiceName::RepoDepot | ServiceName::ManagementGatewayService | ServiceName::Dendrite - | ServiceName::Mgd => { + | ServiceName::Mgd + | ServiceName::Ddm => { out.insert(service, Ok(())); } // InternalNtp is too large to fit in a single DNS packet and diff --git a/nexus/test-utils/src/nexus_test.rs b/nexus/test-utils/src/nexus_test.rs index 693aea88732..329f6f37d29 100644 --- a/nexus/test-utils/src/nexus_test.rs +++ b/nexus/test-utils/src/nexus_test.rs @@ -117,6 +117,7 @@ pub struct ControlPlaneTestContext { /// Ports of stopped dendrite instances (for use by start_dendrite) pub stopped_dendrite_ports: RwLock>, pub mgd: HashMap, + pub ddm: HashMap, pub external_dns_zone_name: String, pub external_dns: TransientDnsServer, pub internal_dns: TransientDnsServer, @@ -320,6 +321,9 @@ impl ControlPlaneTestContext { for (_, mut mgd) in self.mgd { mgd.cleanup().await.unwrap(); } + for (_, mut ddm) in self.ddm { + ddm.cleanup().await; + } self.logctx.cleanup_successful(); } } diff --git a/nexus/test-utils/src/starter.rs b/nexus/test-utils/src/starter.rs index aa9c5cbd268..8a646afea12 100644 --- a/nexus/test-utils/src/starter.rs +++ b/nexus/test-utils/src/starter.rs @@ -146,6 +146,7 @@ pub struct ControlPlaneStarter<'a, N: NexusServer> { pub gateway: BTreeMap, pub dendrite: RwLock>, pub mgd: HashMap, + pub ddm: HashMap, // NOTE: Only exists after starting Nexus, until external Nexus is // initialized. @@ -203,6 +204,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { gateway: BTreeMap::new(), dendrite: RwLock::new(HashMap::new()), mgd: HashMap::new(), + ddm: HashMap::new(), nexus_internal: None, nexus_internal_addr: None, external_dns_zone_name: None, @@ -461,6 +463,17 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { self.config.pkg.mgd.insert(switch_slot, config); } + pub async fn start_ddm(&mut self, switch_slot: SwitchSlot) { + let log = &self.logctx.log; + debug!(log, "Starting DDM sim"; "switch_slot" => ?switch_slot); + + let ddm = dev::maghemite::DdmInstance::start().await.unwrap(); + let port = ddm.port; + self.ddm.insert(switch_slot, ddm); + + debug!(log, "DDM sim started"; "port" => port); + } + pub async fn record_switch_dns( &mut self, sled_id: SledUuid, @@ -482,6 +495,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { self.dendrite.read().unwrap().get(&switch_slot).unwrap().port, self.gateway.get(&switch_slot).unwrap().port, self.mgd.get(&switch_slot).unwrap().port, + self.ddm.get(&switch_slot).unwrap().port, ) .unwrap() } @@ -1250,6 +1264,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { dendrite: RwLock::new(self.dendrite.into_inner().unwrap()), stopped_dendrite_ports: RwLock::new(HashMap::new()), mgd: self.mgd, + ddm: self.ddm, external_dns_zone_name: self.external_dns_zone_name.unwrap(), external_dns: self.external_dns.unwrap(), internal_dns: self.internal_dns.unwrap(), @@ -1291,6 +1306,9 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> { for (_, mut mgd) in self.mgd { mgd.cleanup().await.unwrap(); } + for (_, mut ddm) in self.ddm { + ddm.cleanup().await; + } self.logctx.cleanup_successful(); } @@ -1631,6 +1649,12 @@ pub(crate) async fn setup_with_config_impl( builder.start_mgd(SwitchSlot::Switch0).boxed() }), ), + ( + "start_ddm_switch0", + Box::new(|builder| { + builder.start_ddm(SwitchSlot::Switch0).boxed() + }), + ), ( "record_switch_dns", Box::new(|builder| { @@ -1675,6 +1699,12 @@ pub(crate) async fn setup_with_config_impl( builder.start_mgd(SwitchSlot::Switch1).boxed() }), ), + ( + "start_ddm_switch1", + Box::new(|builder| { + builder.start_ddm(SwitchSlot::Switch1).boxed() + }), + ), ( "record_switch_dns", Box::new(|builder| { diff --git a/nexus/tests/integration_tests/initialization.rs b/nexus/tests/integration_tests/initialization.rs index 350757cf1de..714880feb37 100644 --- a/nexus/tests/integration_tests/initialization.rs +++ b/nexus/tests/integration_tests/initialization.rs @@ -158,6 +158,11 @@ async fn test_nexus_boots_before_dendrite() { starter.start_mgd(SwitchSlot::Switch1).await; info!(log, "Started mgd"); + info!(log, "Starting ddm"); + starter.start_ddm(SwitchSlot::Switch0).await; + starter.start_ddm(SwitchSlot::Switch1).await; + info!(log, "Started ddm"); + info!(log, "Populating internal DNS records"); starter .record_switch_dns( @@ -197,6 +202,8 @@ async fn nexus_schema_test_setup( starter.start_dendrite(SwitchSlot::Switch1).await; starter.start_mgd(SwitchSlot::Switch0).await; starter.start_mgd(SwitchSlot::Switch1).await; + starter.start_ddm(SwitchSlot::Switch0).await; + starter.start_ddm(SwitchSlot::Switch1).await; starter.populate_internal_dns().await; } diff --git a/nexus/types/src/deployment/execution/dns.rs b/nexus/types/src/deployment/execution/dns.rs index 009377fd8d9..3730576eda2 100644 --- a/nexus/types/src/deployment/execution/dns.rs +++ b/nexus/types/src/deployment/execution/dns.rs @@ -158,6 +158,7 @@ pub fn blueprint_internal_dns_config( overrides.dendrite_port(scrimlet.id()), overrides.mgs_port(scrimlet.id()), overrides.mgd_port(scrimlet.id()), + overrides.ddm_port(scrimlet.id()), )?; } diff --git a/nexus/types/src/deployment/execution/overridables.rs b/nexus/types/src/deployment/execution/overridables.rs index 881a7c49bdd..7dc3ae0bf4d 100644 --- a/nexus/types/src/deployment/execution/overridables.rs +++ b/nexus/types/src/deployment/execution/overridables.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use omicron_common::address::DDMD_PORT; use omicron_common::address::DENDRITE_PORT; use omicron_common::address::Ipv6Subnet; use omicron_common::address::MGD_PORT; @@ -29,6 +30,8 @@ pub struct Overridables { pub mgs_ports: BTreeMap, /// map: sled id -> TCP port on which that sled's MGD is listening pub mgd_ports: BTreeMap, + /// map: sled id -> TCP port on which that sled's DDM is listening + pub ddm_ports: BTreeMap, /// map: sled id -> IP address of the sled's switch zone pub switch_zone_ips: BTreeMap, } @@ -67,6 +70,16 @@ impl Overridables { self.mgd_ports.get(&sled_id).copied().unwrap_or(MGD_PORT) } + /// Specify the TCP port on which this sled's DDM is listening + pub fn override_ddm_port(&mut self, sled_id: SledUuid, port: u16) { + self.ddm_ports.insert(sled_id, port); + } + + /// Returns the TCP port on which this sled's DDM is listening + pub fn ddm_port(&self, sled_id: SledUuid) -> u16 { + self.ddm_ports.get(&sled_id).copied().unwrap_or(DDMD_PORT) + } + /// Specify the IP address of this switch zone pub fn override_switch_zone_ip( &mut self, diff --git a/sled-agent/rack-setup/src/plan/service.rs b/sled-agent/rack-setup/src/plan/service.rs index 2ef6d79489a..59ffc7eb64e 100644 --- a/sled-agent/rack-setup/src/plan/service.rs +++ b/sled-agent/rack-setup/src/plan/service.rs @@ -29,10 +29,10 @@ use nexus_types::deployment::{ }; use nexus_types::external_api::sled::SledState; use omicron_common::address::{ - CP_SERVICES_RESERVED_ADDRESSES, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, - Ipv6Subnet, MGD_PORT, MGS_PORT, NEXUS_INTERNAL_PORT, NEXUS_LOCKSTEP_PORT, - NTP_PORT, NUM_SOURCE_NAT_PORTS, REPO_DEPOT_PORT, ReservedRackSubnet, - SLED_PREFIX, SLED_RESERVED_ADDRESSES, get_sled_address, + CP_SERVICES_RESERVED_ADDRESSES, DDMD_PORT, DENDRITE_PORT, DNS_HTTP_PORT, + DNS_PORT, Ipv6Subnet, MGD_PORT, MGS_PORT, NEXUS_INTERNAL_PORT, + NEXUS_LOCKSTEP_PORT, NTP_PORT, NUM_SOURCE_NAT_PORTS, REPO_DEPOT_PORT, + ReservedRackSubnet, SLED_PREFIX, SLED_RESERVED_ADDRESSES, get_sled_address, get_switch_zone_address, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; @@ -341,6 +341,7 @@ impl ServicePlan { DENDRITE_PORT, MGS_PORT, MGD_PORT, + DDMD_PORT, ) .unwrap(); } diff --git a/test-utils/src/dev/maghemite.rs b/test-utils/src/dev/maghemite.rs index 4c2d85df3ee..d3cf6524f1a 100644 --- a/test-utils/src/dev/maghemite.rs +++ b/test-utils/src/dev/maghemite.rs @@ -4,11 +4,13 @@ //! Tools for managing Maghemite during development +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::path::{Path, PathBuf}; use std::process::Stdio; use std::time::Duration; use anyhow::Context; +use slog::{Discard, Logger, o}; use tempfile::TempDir; use tokio::{ fs::File, @@ -163,13 +165,91 @@ async fn find_mgd_port_in_log(logfile: String) -> Result { } } +/// In-process stand-in for the `ddmd` (Delay Driven Multipath daemon) +/// admin API. +/// +/// `ddmd` runs in sled global zones and switch zones in real deployments, +/// and depends on illumos networking facilities not available in a generic +/// dev test toolchain the way `mgd` is. This binds a dropshot server on an +/// auto-assigned port so the test suite has a real socket to publish in +/// internal DNS as `ServiceName::Ddm`. +/// +/// This currently has no registered routes. Any integration needing +/// concrete endpoints (e.g., peer lists) must extend the `ApiDescription`. +pub struct DdmInstance { + pub port: u16, + server: Option>, +} + +impl DdmInstance { + /// Start a DDM sim server bound to a random localhost port. + pub async fn start() -> Result { + let dropshot_config = dropshot::ConfigDropshot { + bind_address: SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + )), + ..Default::default() + }; + + let api: dropshot::ApiDescription<()> = dropshot::ApiDescription::new(); + let log = Logger::root(Discard, o!()); + + let server = dropshot::ServerBuilder::new(api, (), log) + .config(dropshot_config) + .start() + .context("failed to start DDM sim server")?; + + let port = server.local_addr().port(); + Ok(Self { port, server: Some(server) }) + } + + pub async fn cleanup(&mut self) { + if let Some(server) = self.server.take() { + server.close().await.expect("failed to close DDM sim server"); + } + } +} + +impl Drop for DdmInstance { + fn drop(&mut self) { + if self.server.is_some() { + eprintln!( + "WARN: dropped DdmInstance without cleaning it up first \ + (the dropshot server's tokio task may still be running)" + ); + } + } +} + #[cfg(test)] mod tests { + use super::DdmInstance; use super::find_mgd_port_in_log; use std::io::Write; use std::process::Stdio; use tempfile::NamedTempFile; + /// Smoke-test `DdmInstance`. We bind and serve a 404 for an unregistered + /// route, then shut down cleanly. + #[tokio::test] + async fn test_ddm_sim_binds_and_serves_404() { + let mut sim = DdmInstance::start().await.expect("DDM sim starts"); + assert!(sim.port > 0, "DDM sim should auto-assign a port"); + + let url = format!("http://[::1]:{}/peers", sim.port); + let resp = reqwest::get(&url).await.expect("server reachable"); + assert_eq!( + resp.status(), + reqwest::StatusCode::NOT_FOUND, + "no routes registered yet: expected 404" + ); + + sim.cleanup().await; + } + const EXPECTED_PORT: u16 = 4676; #[tokio::test]