Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ use nexus_types::internal_api::background::SupportBundleCleanupReport;
use nexus_types::internal_api::background::SupportBundleCollectionReport;
use nexus_types::internal_api::background::SupportBundleCollectionStepStatus;
use nexus_types::internal_api::background::SupportBundleEreportStatus;
use nexus_types::internal_api::background::TokenCleanupStatus;
use nexus_types::internal_api::background::TrustQuorumManagerStatus;
use nexus_types::internal_api::background::TufArtifactReplicationCounters;
use nexus_types::internal_api::background::TufArtifactReplicationRequest;
Expand Down Expand Up @@ -1340,6 +1341,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"session_cleanup" => {
print_task_session_cleanup(details);
}
"token_cleanup" => {
print_task_token_cleanup(details);
}
"sp_ereport_ingester" => {
print_task_sp_ereport_ingester(details);
}
Expand Down Expand Up @@ -2813,6 +2817,33 @@ fn print_task_session_cleanup(details: &serde_json::Value) {
};
}

fn print_task_token_cleanup(details: &serde_json::Value) {
match serde_json::from_value::<TokenCleanupStatus>(details.clone()) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),
Ok(status) => {
const DELETED: &str = "deleted:";
const CUTOFF: &str = "cutoff:";
const LIMIT: &str = "limit:";
const ERROR: &str = "error:";
const WIDTH: usize =
const_max_len(&[DELETED, CUTOFF, LIMIT, ERROR]) + 1;

println!(" {DELETED:<WIDTH$}{}", status.deleted);
println!(
" {CUTOFF:<WIDTH$}{}",
status.cutoff.to_rfc3339_opts(SecondsFormat::AutoSi, true),
);
println!(" {LIMIT:<WIDTH$}{}", status.limit);
if let Some(error) = &status.error {
println!(" {ERROR:<WIDTH$}{error}");
}
}
};
}

fn print_task_service_firewall_rule_propagation(details: &serde_json::Value) {
match serde_json::from_value::<ServiceFirewallRuleStatus>(details.clone()) {
Err(error) => eprintln!(
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "token_cleanup"
hard-deletes expired device access tokens


task: "trust_quorum_manager"
Drive trust quorum reconfigurations to completion

Expand Down Expand Up @@ -505,6 +509,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "token_cleanup"
hard-deletes expired device access tokens


task: "trust_quorum_manager"
Drive trust quorum reconfigurations to completion

Expand Down Expand Up @@ -755,6 +763,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "token_cleanup"
hard-deletes expired device access tokens


task: "trust_quorum_manager"
Drive trust quorum reconfigurations to completion

Expand Down
20 changes: 20 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,10 @@ task: "switch_port_config_manager"
manages switch port settings for rack switches


task: "token_cleanup"
hard-deletes expired device access tokens


task: "trust_quorum_manager"
Drive trust quorum reconfigurations to completion

Expand Down Expand Up @@ -996,6 +1000,14 @@ task: "switch_port_config_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "token_cleanup"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
deleted: 0
cutoff: <REDACTED_TIMESTAMP>
limit: 10000

task: "trust_quorum_manager"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down Expand Up @@ -1679,6 +1691,14 @@ task: "switch_port_config_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "token_cleanup"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
deleted: 0
cutoff: <REDACTED_TIMESTAMP>
limit: 10000

task: "trust_quorum_manager"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down
21 changes: 21 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ pub struct BackgroundTaskConfig {
pub attached_subnet_manager: AttachedSubnetManagerConfig,
/// configuration for console session cleanup task
pub session_cleanup: SessionCleanupConfig,
/// configuration for device access token cleanup task
pub token_cleanup: TokenCleanupConfig,
/// configuration for audit log incomplete timeout task
pub audit_log_timeout_incomplete: AuditLogTimeoutIncompleteConfig,
/// configuration for audit log cleanup (retention) task
Expand All @@ -455,6 +457,17 @@ pub struct SessionCleanupConfig {
pub max_delete_per_activation: u32,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct TokenCleanupConfig {
/// period (in seconds) for periodic activations of the token cleanup task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,

/// maximum rows hard-deleted per activation
pub max_delete_per_activation: u32,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct AuditLogTimeoutIncompleteConfig {
Expand Down Expand Up @@ -1333,6 +1346,8 @@ mod test {
attached_subnet_manager.period_secs = 60
session_cleanup.period_secs = 300
session_cleanup.max_delete_per_activation = 10000
token_cleanup.period_secs = 300
token_cleanup.max_delete_per_activation = 10000
audit_log_timeout_incomplete.period_secs = 600
audit_log_timeout_incomplete.timeout_secs = 14400
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
Expand Down Expand Up @@ -1609,6 +1624,10 @@ mod test {
period_secs: Duration::from_secs(300),
max_delete_per_activation: 10_000,
},
token_cleanup: TokenCleanupConfig {
period_secs: Duration::from_secs(300),
max_delete_per_activation: 10_000,
},
audit_log_timeout_incomplete:
AuditLogTimeoutIncompleteConfig {
period_secs: Duration::from_secs(600),
Expand Down Expand Up @@ -1729,6 +1748,8 @@ mod test {
attached_subnet_manager.period_secs = 60
session_cleanup.period_secs = 300
session_cleanup.max_delete_per_activation = 10000
token_cleanup.period_secs = 300
token_cleanup.max_delete_per_activation = 10000
audit_log_timeout_incomplete.period_secs = 600
audit_log_timeout_incomplete.timeout_secs = 14400
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pub struct BackgroundTasks {
pub task_trust_quorum_manager: Activator,
pub task_attached_subnet_manager: Activator,
pub task_session_cleanup: Activator,
pub task_token_cleanup: Activator,

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: Version = Version::new(257, 0, 0);
pub const SCHEMA_VERSION: Version = Version::new(258, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(258, "device-access-token-time-expires-index"),
KnownVersion::new(257, "add-disk-adoption-requests"),
KnownVersion::new(256, "bgp-unnumbered-peer-cleanup"),
KnownVersion::new(255, "blueprint-add-external-networking-generation"),
Expand Down
27 changes: 27 additions & 0 deletions nexus/db-queries/src/db/datastore/device_auth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ use crate::db::model::DeviceAuthRequest;
use crate::db::model::to_db_typed_uuid;
use crate::db::pagination::paginated;
use async_bb8_diesel::AsyncRunQueryDsl;
use chrono::DateTime;
use chrono::Utc;
use diesel::dsl::sql_query;
use diesel::prelude::*;
use diesel::sql_types;
use nexus_db_errors::ErrorHandler;
use nexus_db_errors::public_error_from_diesel;
use nexus_db_schema::schema::device_access_token;
Expand Down Expand Up @@ -287,6 +290,30 @@ impl DataStore {
Ok(())
}

/// Hard-delete up to `limit` device access tokens whose `time_expires` is
/// non-NULL and older than `cutoff`, returning the number deleted. Tokens
/// with NULL `time_expires` never expire and are not eligible.
pub async fn token_cleanup_batch(
&self,
opctx: &OpContext,
cutoff: DateTime<Utc>,
limit: u32,
) -> Result<usize, Error> {
opctx.authorize(authz::Action::Modify, &authz::FLEET).await?;

sql_query(
"DELETE FROM omicron.public.device_access_token \
WHERE time_expires IS NOT NULL AND time_expires < $1 \
ORDER BY time_expires \
LIMIT $2",
)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as https://github.com/oxidecomputer/omicron/pull/10009/changes#r2908069585: the query DSL can't do order by and limit on a delete.

.bind::<sql_types::Timestamptz, _>(cutoff)
.bind::<sql_types::BigInt, _>(i64::from(limit))
.execute_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Delete all tokens for the user
pub async fn silo_user_tokens_delete(
&self,
Expand Down
2 changes: 2 additions & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ trust_quorum.period_secs = 60
attached_subnet_manager.period_secs = 60
session_cleanup.period_secs = 300
session_cleanup.max_delete_per_activation = 10000
token_cleanup.period_secs = 300
token_cleanup.max_delete_per_activation = 10000
audit_log_timeout_incomplete.period_secs = 600
audit_log_timeout_incomplete.timeout_secs = 14400
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
Expand Down
2 changes: 2 additions & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ trust_quorum.period_secs = 60
attached_subnet_manager.period_secs = 60
session_cleanup.period_secs = 300
session_cleanup.max_delete_per_activation = 10000
token_cleanup.period_secs = 300
token_cleanup.max_delete_per_activation = 10000
audit_log_timeout_incomplete.period_secs = 600
audit_log_timeout_incomplete.timeout_secs = 14400
audit_log_timeout_incomplete.max_timed_out_per_activation = 1000
Expand Down
16 changes: 16 additions & 0 deletions nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ use super::tasks::session_cleanup;
use super::tasks::support_bundle_collector;
use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker;
use super::tasks::sync_switch_configuration::SwitchPortSettingsManager;
use super::tasks::token_cleanup;
use super::tasks::trust_quorum;
use super::tasks::tuf_artifact_replication;
use super::tasks::tuf_repo_pruner;
Expand Down Expand Up @@ -277,6 +278,7 @@ impl BackgroundTasksInitializer {
task_trust_quorum_manager: Activator::new(),
task_attached_subnet_manager: Activator::new(),
task_session_cleanup: Activator::new(),
task_token_cleanup: Activator::new(),

// Handles to activate background tasks that do not get used by Nexus
// at-large. These background tasks are implementation details as far as
Expand Down Expand Up @@ -370,6 +372,7 @@ impl BackgroundTasksInitializer {
task_trust_quorum_manager,
task_attached_subnet_manager,
task_session_cleanup,
task_token_cleanup,
task_audit_log_timeout_incomplete,
task_audit_log_cleanup,
// Add new background tasks here. Be sure to use this binding in a
Expand Down Expand Up @@ -1247,6 +1250,19 @@ impl BackgroundTasksInitializer {
activator: task_session_cleanup,
});

driver.register(TaskDefinition {
name: "token_cleanup",
description: "hard-deletes expired device access tokens",
period: config.token_cleanup.period_secs,
task_impl: Box::new(token_cleanup::TokenCleanup::new(
datastore.clone(),
config.token_cleanup.max_delete_per_activation,
)),
opctx: opctx.child(BTreeMap::new()),
watchers: vec![],
activator: task_token_cleanup,
});

driver.register(TaskDefinition {
name: "audit_log_timeout_incomplete",
description: "transitions stale incomplete audit log entries to \
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/background/tasks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ pub mod support_bundle;
pub mod support_bundle_collector;
pub mod sync_service_zone_nat;
pub mod sync_switch_configuration;
pub mod token_cleanup;
pub mod trust_quorum;
pub mod tuf_artifact_replication;
pub mod tuf_repo_pruner;
Expand Down
Loading
Loading