Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ use nexus_types::internal_api::background::TufArtifactReplicationCounters;
use nexus_types::internal_api::background::TufArtifactReplicationRequest;
use nexus_types::internal_api::background::TufArtifactReplicationStatus;
use nexus_types::internal_api::background::TufRepoPrunerStatus;
use nexus_types::internal_api::background::UserDataExportCoordinatorStatus;
use nexus_types::internal_api::background::fm_rendezvous;
use omicron_uuid_kinds::BlueprintUuid;
use omicron_uuid_kinds::CollectionUuid;
Expand Down Expand Up @@ -1369,6 +1370,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"trust_quorum_manager" => {
print_task_trust_quorum_manager(details);
}
"user_data_export_coordinator" => {
print_task_user_data_export_coordinator(details);
}
_ => {
println!(
"warning: unknown background task: {:?} \
Expand Down Expand Up @@ -3174,6 +3178,7 @@ fn print_task_alert_dispatcher(details: &serde_json::Value) {
);
}
}

fn print_task_webhook_deliverator(details: &serde_json::Value) {
use nexus_types::external_api::alert::WebhookDeliveryAttemptResult;
use nexus_types::internal_api::background::WebhookDeliveratorStatus;
Expand Down Expand Up @@ -3933,6 +3938,53 @@ fn print_task_trust_quorum_manager(details: &serde_json::Value) {
}
}

fn print_task_user_data_export_coordinator(details: &serde_json::Value) {
match serde_json::from_value::<UserDataExportCoordinatorStatus>(
details.clone(),
) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),

Ok(status) => {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How hard would it be to make this cumulative instead of just showing the values
from the last run?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not much work :) 9ac0abc only sets deleted for non-deleted records, meaning the background task will now only show the latest number of affected records

println!(
" total create steps invoked ok: {}",
status.create_invoked_ok.len(),
);
for line in &status.create_invoked_ok {
println!(" > {line}");
}

println!(
" total delete steps invoked ok: {}",
status.delete_invoked_ok.len(),
);
for line in &status.delete_invoked_ok {
println!(" > {line}");
}

println!(
" total records affected by expunge: {}",
status.records_marked_for_deletion,
);

println!(
" total records fast-deleted (resource was deleted): {}",
status.records_bypassed_ok.len(),
);
for line in &status.records_bypassed_ok {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
}
}
}
}

const ERRICON: &str = "/!\\";

fn warn_if_nonzero(n: usize) -> &'static str {
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,10 @@ task: "tuf_repo_pruner"
determine which TUF repos' artifacts can be pruned


task: "user_data_export_coordinator"
make the user resources available for export


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -517,6 +521,10 @@ task: "tuf_repo_pruner"
determine which TUF repos' artifacts can be pruned


task: "user_data_export_coordinator"
make the user resources available for export


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -767,6 +775,10 @@ task: "tuf_repo_pruner"
determine which TUF repos' artifacts can be pruned


task: "user_data_export_coordinator"
make the user resources available for export


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down
26 changes: 26 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,10 @@ task: "tuf_repo_pruner"
determine which TUF repos' artifacts can be pruned


task: "user_data_export_coordinator"
make the user resources available for export


task: "v2p_manager"
manages opte v2p mappings for vpc networking

Expand Down Expand Up @@ -1039,6 +1043,17 @@ task: "tuf_repo_pruner"
repos kept because they're recent uploads: none
other repos eligible for pruning: none

task: "user_data_export_coordinator"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total create steps invoked ok: 0
total delete steps invoked ok: 0
total records affected by expunge: 0
total records fast-deleted (resource was deleted): 0
errors: 1
> error looking up in-service Pantries: proto error: no records found for Query { name: Name("_crucible-pantry._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "v2p_manager"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down Expand Up @@ -1720,6 +1735,17 @@ task: "tuf_repo_pruner"
repos kept because they're recent uploads: none
other repos eligible for pruning: none

task: "user_data_export_coordinator"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total create steps invoked ok: 0
total delete steps invoked ok: 0
total records affected by expunge: 0
total records fast-deleted (resource was deleted): 0
errors: 1
> error looking up in-service Pantries: proto error: no records found for Query { name: Name("_crucible-pantry._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "v2p_manager"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down
17 changes: 17 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ pub struct BackgroundTaskConfig {
pub audit_log_timeout_incomplete: AuditLogTimeoutIncompleteConfig,
/// configuration for audit log cleanup (retention) task
pub audit_log_cleanup: AuditLogCleanupConfig,
/// configuration for user data export coordinator task
pub user_data_export_coordinator: UserDataExportCoordinatorConfig,
}

#[serde_as]
Expand Down Expand Up @@ -1042,6 +1044,15 @@ pub struct TrustQuorumConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct UserDataExportCoordinatorConfig {
/// period (in seconds) for periodic activations of this background task
/// that managed user data export objects
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -1329,6 +1340,7 @@ mod test {
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
user_data_export_coordinator.period_secs = 60
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1609,6 +1621,10 @@ mod test {
retention_days: NonZeroU32::new(90).unwrap(),
max_deleted_per_activation: 10_000,
},
user_data_export_coordinator:
UserDataExportCoordinatorConfig {
period_secs: Duration::from_secs(60),
},
},
multicast: MulticastConfig { enabled: false },
default_region_allocation_strategy:
Expand Down Expand Up @@ -1724,6 +1740,7 @@ mod test {
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
user_data_export_coordinator.period_secs = 60

[default_region_allocation_strategy]
type = "random"
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pub struct BackgroundTasks {
pub task_trust_quorum_manager: Activator,
pub task_attached_subnet_manager: Activator,
pub task_session_cleanup: Activator,
pub task_user_data_export_coordinator: Activator,

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down
119 changes: 113 additions & 6 deletions nexus/db-queries/src/db/datastore/user_data_export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ impl DataStore {
/// associated delete saga run
///
/// This function also marks user data export records as deleted if the
/// associated resource was itself delete.
/// associated resource was itself deleted.
pub async fn compute_user_data_export_changeset(
&self,
opctx: &OpContext,
Expand Down Expand Up @@ -421,8 +421,12 @@ impl DataStore {
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

// Filter on state != Deleted to avoid returning a constantly
// accumulating list records to delete.

let records: Vec<UserDataExportRecord> = dsl::user_data_export
.filter(dsl::resource_deleted.eq(true))
.filter(dsl::state.ne(UserDataExportState::Deleted))
.select(UserDataExportRecord::as_select())
.load_async(&*conn)
.await
Expand Down Expand Up @@ -468,6 +472,7 @@ impl DataStore {
.filter(diesel::dsl::not(
dsl::pantry_ip.eq_any(in_service_pantries),
))
.filter(dsl::resource_deleted.eq(false))
.set(dsl::resource_deleted.eq(true))
.execute_async(&*conn)
.await
Expand Down Expand Up @@ -1815,9 +1820,8 @@ mod tests {
.await
.unwrap();

// The changeset should now show that a record for this iamge needs to
// be created, along with the old record still needing the associated
// delete saga.
// The changeset should now show that a record for this image needs to
// be created.

let changeset =
datastore.compute_user_data_export_changeset(&opctx).await.unwrap();
Expand All @@ -1826,8 +1830,7 @@ mod tests {
&[UserDataExportResource::Image { id: image.id() }],
);
assert!(changeset.create_required.is_empty());
assert_eq!(changeset.delete_required.len(), 1);
assert_eq!(changeset.delete_required[0].id(), record.id());
assert!(changeset.delete_required.is_empty());

// Now it should work.

Expand Down Expand Up @@ -1906,4 +1909,108 @@ mod tests {
db.terminate().await;
logctx.cleanup_successful();
}

/// Assert that deleted records are not part of the changeset anymore.
#[tokio::test]
async fn test_deleted_not_part_of_changeset() {
let logctx = dev::test_setup_log("test_deleted_not_part_of_changeset");
let log = logctx.log.new(o!());
let db = TestDatabase::new_with_datastore(&log).await;
let (opctx, datastore) = (db.opctx(), db.datastore());

let (authz_project, _) =
create_project(&opctx, &datastore, PROJECT_NAME).await;

let snapshot = create_project_snapshot(
&opctx,
&datastore,
&authz_project,
Uuid::new_v4(),
"snap",
)
.await;

// Create a regular record and move it to Live

let record = datastore
.user_data_export_create_for_snapshot(
&opctx,
UserDataExportUuid::new_v4(),
snapshot.id(),
)
.await
.unwrap();

let operating_saga_id = Uuid::new_v4();

datastore
.set_user_data_export_requested_to_assigning(
&opctx,
record.id(),
operating_saga_id,
1,
)
.await
.unwrap();

datastore
.set_user_data_export_assigning_to_live(
&opctx,
record.id(),
operating_saga_id,
1,
SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0),
VolumeUuid::new_v4(),
)
.await
.unwrap();

// The changeset should be empty now

let changeset =
datastore.compute_user_data_export_changeset(&opctx).await.unwrap();
assert!(changeset.request_required.is_empty());
assert!(changeset.create_required.is_empty());
assert!(changeset.delete_required.is_empty());

// Marking the record's resource as deleted means it will show up in the
// delete_required list.

datastore.user_data_export_mark_deleted(record.id()).await.unwrap();

let changeset =
datastore.compute_user_data_export_changeset(&opctx).await.unwrap();
assert!(!changeset.delete_required.is_empty());

// Move the record to deleting.

datastore
.set_user_data_export_live_to_deleting(
&opctx,
record.id(),
operating_saga_id,
2,
)
.await
.unwrap();

datastore
.set_user_data_export_deleting_to_deleted(
&opctx,
record.id(),
operating_saga_id,
2,
)
.await
.unwrap();

// The record should _not_ show up in the delete_required list anymore.

let changeset =
datastore.compute_user_data_export_changeset(&opctx).await.unwrap();
assert!(changeset.delete_required.is_empty());

db.terminate().await;
logctx.cleanup_successful();
}
}
1 change: 1 addition & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
user_data_export_coordinator.period_secs = 240
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where did the 240 come from? :)


[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
1 change: 1 addition & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
audit_log_cleanup.period_secs = 600
audit_log_cleanup.retention_days = 90
audit_log_cleanup.max_deleted_per_activation = 10000
user_data_export_coordinator.period_secs = 240

[default_region_allocation_strategy]
# allocate region on 3 random distinct zpools, on 3 random distinct sleds.
Expand Down
Loading
Loading