Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion clients/nexus-lockstep-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ progenitor::generate_api!(
DnsConfigParams = nexus_types::internal_api::params::DnsConfigParams,
DnsConfigZone = nexus_types::internal_api::params::DnsConfigZone,
DnsRecord = nexus_types::internal_api::params::DnsRecord,
ExternalPortDiscovery = nexus_types::internal_api::params::ExternalPortDiscovery,
Generation = omicron_common::api::external::Generation,
ImportExportPolicy = sled_agent_types::early_networking::ImportExportPolicy,
MacAddr = omicron_common::api::external::MacAddr,
Expand Down
39 changes: 39 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ use nexus_types::internal_api::background::SupportBundleCleanupReport;
use nexus_types::internal_api::background::SupportBundleCollectionReport;
use nexus_types::internal_api::background::SupportBundleCollectionStepStatus;
use nexus_types::internal_api::background::SupportBundleEreportStatus;
use nexus_types::internal_api::background::SwitchPortPopulatorStatus;
use nexus_types::internal_api::background::SwitchPortPopulatorStatusKind;
use nexus_types::internal_api::background::TrustQuorumManagerStatus;
use nexus_types::internal_api::background::TufArtifactReplicationCounters;
use nexus_types::internal_api::background::TufArtifactReplicationRequest;
Expand Down Expand Up @@ -1369,6 +1371,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"trust_quorum_manager" => {
print_task_trust_quorum_manager(details);
}
"populate_switch_ports" => {
print_task_populate_switch_ports(details);
}
_ => {
println!(
"warning: unknown background task: {:?} \
Expand Down Expand Up @@ -3940,6 +3945,40 @@ fn print_task_trust_quorum_manager(details: &serde_json::Value) {
}
}

fn print_task_populate_switch_ports(details: &serde_json::Value) {
fn print_one(
name: &str,
result: Result<SwitchPortPopulatorStatusKind, String>,
) {
match result {
Ok(SwitchPortPopulatorStatusKind::Populated { num_ports }) => {
println!("{name}: populated {num_ports} ports");
}
Ok(SwitchPortPopulatorStatusKind::PreviouslyPopulated) => {
println!("{name} skipped: previously populated ports");
}
Err(err) => println!("{name} failed: {err}"),
}
}

let status = match serde_json::from_value::<SwitchPortPopulatorStatus>(
details.clone(),
) {
Ok(status) => status,
Err(error) => {
eprintln!(
"warning: failed to interpret task details: {:?}: {:#?}",
error, details
);
return;
}
};

let SwitchPortPopulatorStatus { switch0, switch1 } = status;
print_one("switch0", switch0);
print_one("switch1", switch1);
}

const ERRICON: &str = "/!\\";

fn warn_if_nonzero(n: usize) -> &'static str {
Expand Down
15 changes: 15 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "populate_switch_ports"
one-time population of the `switch_port` table containing all QSFP ports
managed by dendrite


task: "probe_distributor"
distributes networking probe zones to sleds

Expand Down Expand Up @@ -439,6 +444,11 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "populate_switch_ports"
one-time population of the `switch_port` table containing all QSFP ports
managed by dendrite


task: "probe_distributor"
distributes networking probe zones to sleds

Expand Down Expand Up @@ -689,6 +699,11 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "populate_switch_ports"
one-time population of the `switch_port` table containing all QSFP ports
managed by dendrite


task: "probe_distributor"
distributes networking probe zones to sleds

Expand Down
19 changes: 19 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,11 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "populate_switch_ports"
one-time population of the `switch_port` table containing all QSFP ports
managed by dendrite


task: "probe_distributor"
distributes networking probe zones to sleds

Expand Down Expand Up @@ -854,6 +859,13 @@ task: "physical_disk_adoption"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: task disabled

task: "populate_switch_ports"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
switch0 failed: failed to look up dendrite clients: proto error: no records found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
switch1 failed: failed to look up dendrite clients: proto error: no records found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "probe_distributor"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down Expand Up @@ -1535,6 +1547,13 @@ task: "physical_disk_adoption"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: task disabled

task: "populate_switch_ports"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
switch0 failed: failed to look up dendrite clients: proto error: no records found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
switch1 failed: failed to look up dendrite clients: proto error: no records found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "probe_distributor"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down
16 changes: 16 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ pub struct BackgroundTaskConfig {
pub audit_log_timeout_incomplete: AuditLogTimeoutIncompleteConfig,
/// configuration for audit log cleanup (retention) task
pub audit_log_cleanup: AuditLogCleanupConfig,
/// configuration for populate switch ports task
pub populate_switch_ports: PopulateSwitchPortsConfig,
}

#[serde_as]
Expand Down Expand Up @@ -488,6 +490,15 @@ pub struct AuditLogCleanupConfig {
pub max_deleted_per_activation: u32,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct PopulateSwitchPortsConfig {
/// period (in seconds) for periodic activations of the background task that
/// attempts to populate the `switch_port` table.
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct DnsTasksConfig {
Expand Down Expand Up @@ -1339,6 +1350,7 @@ mod test {
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
populate_switch_ports.period_secs = 31
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1620,6 +1632,9 @@ mod test {
retention_days: NonZeroU32::new(90).unwrap(),
max_deleted_per_activation: 10_000,
},
populate_switch_ports: PopulateSwitchPortsConfig {
period_secs: Duration::from_secs(31),
},
},
multicast: MulticastConfig { enabled: false },
default_region_allocation_strategy:
Expand Down Expand Up @@ -1735,6 +1750,7 @@ mod test {
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
populate_switch_ports.period_secs = 31

[default_region_allocation_strategy]
type = "random"
Expand Down
1 change: 1 addition & 0 deletions nexus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ slog.workspace = true
slog-async.workspace = true
slog-dtrace.workspace = true
slog-error-chain.workspace = true
strum.workspace = true
swrite.workspace = true
display-error-chain.workspace = true
slog-term.workspace = true
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pub struct BackgroundTasks {
pub task_trust_quorum_manager: Activator,
pub task_attached_subnet_manager: Activator,
pub task_session_cleanup: Activator,
pub task_populate_switch_ports: Activator,

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
populate_switch_ports.period_secs = 30

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
populate_switch_ports.period_secs = 30

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
24 changes: 22 additions & 2 deletions nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ use super::tasks::v2p_mappings::V2PManager;
use super::tasks::vpc_routes;
use super::tasks::webhook_deliverator;
use crate::Nexus;
use crate::app::background::tasks::populate_switch_ports;
use crate::app::oximeter::PRODUCER_LEASE_DURATION;
use crate::app::quiesce::NexusQuiesceHandle;
use crate::app::saga::StartSaga;
Expand Down Expand Up @@ -277,6 +278,7 @@ impl BackgroundTasksInitializer {
task_trust_quorum_manager: Activator::new(),
task_attached_subnet_manager: Activator::new(),
task_session_cleanup: Activator::new(),
task_populate_switch_ports: Activator::new(),

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down Expand Up @@ -372,6 +374,7 @@ impl BackgroundTasksInitializer {
task_session_cleanup,
task_audit_log_timeout_incomplete,
task_audit_log_cleanup,
task_populate_switch_ports,
// Add new background tasks here. Be sure to use this binding in a
// call to `Driver::register()` below. That's what actually wires
// up the Activator to the corresponding background task.
Expand Down Expand Up @@ -1224,7 +1227,7 @@ impl BackgroundTasksInitializer {
description: "distributes attached subnets to sleds and switch",
period: config.attached_subnet_manager.period_secs,
task_impl: Box::new(attached_subnets::Manager::new(
resolver,
resolver.clone(),
datastore.clone(),
)),
opctx: opctx.child(BTreeMap::new()),
Expand Down Expand Up @@ -1272,7 +1275,7 @@ impl BackgroundTasksInitializer {
than the retention period",
period: config.audit_log_cleanup.period_secs,
task_impl: Box::new(audit_log_cleanup::AuditLogCleanup::new(
datastore,
datastore.clone(),
config.audit_log_cleanup.retention_days,
config.audit_log_cleanup.max_deleted_per_activation,
)),
Expand All @@ -1281,6 +1284,23 @@ impl BackgroundTasksInitializer {
activator: task_audit_log_cleanup,
});

driver.register(TaskDefinition {
name: "populate_switch_ports",
description: "one-time population of the `switch_port` table \
containing all QSFP ports managed by dendrite",
period: config.populate_switch_ports.period_secs,
task_impl: Box::new(
populate_switch_ports::SwitchPortPopulator::new(
rack_id,
datastore.clone(),
resolver.clone(),
),
),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_populate_switch_ports,
});

driver
}
}
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/background/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub mod nat_cleanup;
pub mod networking;
pub mod phantom_disks;
pub mod physical_disk_adoption;
pub mod populate_switch_ports;
pub mod probe_distributor;
pub mod read_only_region_replacement_start;
pub mod reconfigurator_config;
Expand Down
Loading
Loading