diff --git a/Cargo.lock b/Cargo.lock index dd7a93fcd2b..5c5caf29dfc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7922,6 +7922,7 @@ name = "ntp-admin-client" version = "0.1.0" dependencies = [ "chrono", + "git-stub-vcs", "omicron-workspace-hack", "progenitor 0.13.0", "reqwest 0.13.2", diff --git a/clients/ntp-admin-client/Cargo.toml b/clients/ntp-admin-client/Cargo.toml index a624d69a59e..9e66b45fbe9 100644 --- a/clients/ntp-admin-client/Cargo.toml +++ b/clients/ntp-admin-client/Cargo.toml @@ -15,3 +15,6 @@ schemars.workspace = true serde.workspace = true slog.workspace = true omicron-workspace-hack.workspace = true + +[build-dependencies] +git-stub-vcs.workspace = true diff --git a/clients/ntp-admin-client/build.rs b/clients/ntp-admin-client/build.rs new file mode 100644 index 00000000000..dada47b36d3 --- /dev/null +++ b/clients/ntp-admin-client/build.rs @@ -0,0 +1,13 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use git_stub_vcs::Materializer; + +fn main() { + let materializer = Materializer::for_build_script("../../") + .expect("detected VCS at repo root"); + materializer + .materialize("openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json.gitstub") + .expect("materialized ntp-admint-client v1 git stub"); +} diff --git a/clients/ntp-admin-client/src/lib.rs b/clients/ntp-admin-client/src/lib.rs index 4cdc70d6d99..7719e98e3be 100644 --- a/clients/ntp-admin-client/src/lib.rs +++ b/clients/ntp-admin-client/src/lib.rs @@ -20,3 +20,25 @@ progenitor::generate_api!( }), derives = [schemars::JsonSchema], ); + +pub mod v1 { + progenitor::generate_api!( + spec = { + path = "git-stub-vcs/openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json", + relative_to = OutDir, + }, + interface = Positional, + inner_type = slog::Logger, + pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { + slog::debug!(log, "client request"; + "method" => %request.method(), + "uri" => %request.url(), + "body" => ?&request.body(), + ); + }), + post_hook = (|log: &slog::Logger, result: &Result<_, _>| { + slog::debug!(log, "client response"; "result" => ?result); + }), + derives = [schemars::JsonSchema], + ); +} diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 51bfd7231e8..083d5f7d173 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -511,7 +511,7 @@ impl<'a> Collector<'a> { let url = format!("http://{addr}"); let log = self.log.new(o!("ntp_admin_url" => url.clone())); - (cfg.id, ntp_admin_client::Client::new(&url, log)) + (cfg.id, ntp_admin_client::v1::Client::new(&url, log)) }) .collect(); @@ -537,7 +537,7 @@ impl<'a> Collector<'a> { async fn collect_one_timesync( log: &slog::Logger, zone_id: OmicronZoneUuid, - client: &ntp_admin_client::Client, + client: &ntp_admin_client::v1::Client, in_progress: &mut CollectionBuilder, ) -> Result<(), anyhow::Error> { let sled_agent_url = client.baseurl(); diff --git a/ntp-admin/api/src/lib.rs b/ntp-admin/api/src/lib.rs index 03db9240e9d..eab376184c2 100644 --- a/ntp-admin/api/src/lib.rs +++ b/ntp-admin/api/src/lib.rs @@ -4,35 +4,21 @@ use dropshot::{HttpError, HttpResponseOk, RequestContext}; use dropshot_api_manager_types::api_versions; -use ntp_admin_types_versions::latest; +use ntp_admin_types_versions::{latest, v1}; api_versions!([ - // WHEN CHANGING THE API (part 1 of 2): + // Do not create new versions of this client-side versioned API. + // https://github.com/oxidecomputer/omicron/issues/9290 // - // +- Pick a new semver and define it in the list below. The list MUST - // | remain sorted, which generally means that your version should go at - // | the very top. - // | - // | Duplicate this line, uncomment the *second* copy, update that copy for - // | your new API version, and leave the first copy commented out as an - // | example for the next person. - // v - // (next_int, IDENT), + // NOTE: Version 2 of this API was added to address + // //github.com/oxidecomputer/omicron/issues/7668, a bad failure mode in + // which CRDB panics early during control plane startup because the clocks + // are not synchronized well-enough. We're adding this as part of a + // two-phase rollout to get around #9290 for now. + (2, ADD_MAX_ERROR_AND_OFFSET), (1, INITIAL), ]); -// WHEN CHANGING THE API (part 2 of 2): -// -// The call to `api_versions!` above defines constants of type -// `semver::Version` that you can use in your Dropshot API definition to specify -// the version when a particular endpoint was added or removed. For example, if -// you used: -// -// (2, ADD_FOOBAR) -// -// Then you could use `VERSION_ADD_FOOBAR` as the version in which endpoints -// were added or removed. - #[dropshot::api_description] pub trait NtpAdminApi { type Context; @@ -41,8 +27,22 @@ pub trait NtpAdminApi { #[endpoint { method = GET, path = "/timesync", + versions = VERSION_ADD_MAX_ERROR_AND_OFFSET.. }] async fn timesync( rqctx: RequestContext, ) -> Result, HttpError>; + + /// Query for the state of time synchronization + #[endpoint { + method = GET, + path = "/timesync", + operation_id = "timesync", + versions = VERSION_INITIAL..VERSION_ADD_MAX_ERROR_AND_OFFSET, + }] + async fn timesync_v1( + rqctx: RequestContext, + ) -> Result, HttpError> { + Ok(Self::timesync(rqctx).await?.map(|ts| ts.into())) + } } diff --git a/ntp-admin/src/http_entrypoints.rs b/ntp-admin/src/http_entrypoints.rs index 03e2e08f210..9ebdbd0358c 100644 --- a/ntp-admin/src/http_entrypoints.rs +++ b/ntp-admin/src/http_entrypoints.rs @@ -42,12 +42,22 @@ impl From for HttpError { const TIMESYNC_STRATUM_MAX: u8 = 9; const TIMESYNC_REFTIME_MIN: f64 = 1234567890.0; const TIMESYNC_CORRECTION_MAX: f64 = 0.05; +// CockroachDB panics if any node's clock is more than 400ms from at least half +// of its peers. See +// https://www.cockroachlabs.com/docs/stable/recommended-production-settings#clock-synchronization +// for more details. +// +// If all nodes have max_error <= 175ms, the worst-case +// inter-node difference is 350ms, keeping us well within CRDB's threshold. +// +// This value is in seconds. +const TIMESYNC_MAX_ERROR_MAX: f64 = 0.175; const TIMESYNC_REF_IDS_NO_PEER_SYNC: [u32; 2] = [0, 0x7f7f0101]; fn parse_timesync_result(stdout: &str) -> Result { let v: Vec<&str> = stdout.split(',').collect(); - if v.len() < 10 { + if v.len() < 12 { return Err(TimeSyncError::FailedToParse { reason: "too few fields", stdout: stdout.to_string(), @@ -80,6 +90,32 @@ fn parse_timesync_result(stdout: &str) -> Result { stdout: stdout.to_string(), }); }; + let Ok(last_offset) = f64::from_str(v[5]) else { + return Err(TimeSyncError::FailedToParse { + reason: "bad last_offset", + stdout: stdout.to_string(), + }); + }; + let Ok(rms_offset) = f64::from_str(v[6]) else { + return Err(TimeSyncError::FailedToParse { + reason: "bad rms_offset", + stdout: stdout.to_string(), + }); + }; + let Ok(root_delay) = f64::from_str(v[10]) else { + return Err(TimeSyncError::FailedToParse { + reason: "bad root_delay", + stdout: stdout.to_string(), + }); + }; + let Ok(root_dispersion) = f64::from_str(v[11]) else { + return Err(TimeSyncError::FailedToParse { + reason: "bad root_dispersion", + stdout: stdout.to_string(), + }); + }; + + let max_error = compute_max_error(correction, root_delay, root_dispersion); // Per `chronyc waitsync`'s implementation, if either the // reference IP address is not unspecified or the reference @@ -90,9 +126,48 @@ fn parse_timesync_result(stdout: &str) -> Result { let sync = stratum <= TIMESYNC_STRATUM_MAX && TIMESYNC_REFTIME_MIN < ref_time && peer_sync - && correction.abs() <= TIMESYNC_CORRECTION_MAX; + && correction.abs() <= TIMESYNC_CORRECTION_MAX + && max_error <= TIMESYNC_MAX_ERROR_MAX; + + Ok(TimeSync { + sync, + ref_id, + ip_addr, + stratum, + ref_time, + correction, + last_offset, + rms_offset, + root_delay, + root_dispersion, + max_error, + }) +} - Ok(TimeSync { sync, ref_id, ip_addr, stratum, ref_time, correction }) +/// Compute the maximum clock error from the current offset, root delay, and +/// dispersion. +/// +/// The returned value is defined by: +/// +/// ```ignore +/// max_error <= |correction| + root_delay / 2 + root_dispersion. +/// ``` +/// +/// See for a reference. +/// +/// The term `root_delay / 2` comes from the assumption that the RTT delay from +/// us to the root stratum is symmetric, i.e., on average the same going to the +/// server as coming back from it. To that, we add the accumulated error at each +/// layer from all sources (dispersion). Adding the correction gets us an +/// estimate of how far off the _system clock_ is from the estimate of "true +/// time", which is the maximum error that programs using `gettimeofday(3)` or +/// similar would see. +const fn compute_max_error( + correction: f64, + root_delay: f64, + root_dispersion: f64, +) -> f64 { + correction.abs() + root_delay / 2.0 + root_dispersion } enum NtpAdminImpl {} @@ -133,45 +208,61 @@ mod tests { use super::*; use std::net::Ipv4Addr; + // Standard field values used across tests. chronyc -c tracking fields are: + // ref_id, ip_addr, stratum, ref_time, correction (v[4]), + // last_offset (v[5]), rms_offset (v[6]), freq_ppm (v[7]), + // resid_freq_ppm (v[8]), skew_ppm (v[9]), + // root_delay (v[10]), root_dispersion (v[11]), ... + const GOOD_FIELDS_5_TO_11: &str = "0.001,0.001,x,x,x,0.01,0.001"; + #[test] fn test_parse_timesync_result_success() { - let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.001,x,x,x,x,x"; + let input = format!( + "C0A80001,192.168.0.1,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.ref_id, 0xC0A80001); assert_eq!(result.ip_addr, IpAddr::V4(Ipv4Addr::new(192, 168, 0, 1))); assert_eq!(result.stratum, 2); assert_eq!(result.ref_time, 1234567891.123456); assert_eq!(result.correction, 0.001); + assert_eq!(result.last_offset, 0.001); + assert_eq!(result.rms_offset, 0.001); + assert_eq!(result.root_delay, 0.01); + assert_eq!(result.root_dispersion, 0.001); + assert_eq!(result.max_error, result.correction + 0.01 / 2.0 + 0.001); assert!(result.sync); } #[test] fn test_parse_timesync_result_not_synced_high_stratum() { let stratum = TIMESYNC_STRATUM_MAX + 1; - let input = &format!( - "C0A80001,192.168.0.1,{stratum},1234567891.123456,0.001,x,x,x,x,x" + let input = format!( + "C0A80001,192.168.0.1,{stratum},1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert!(!result.sync); } #[test] fn test_parse_timesync_result_not_synced_old_ref_time() { let ref_time = TIMESYNC_REFTIME_MIN - 0.01; - let input = &format!( - "C0A80001,192.168.0.1,2,{ref_time},0.001,0.000001,x,x,x,x,x" + let input = format!( + "C0A80001,192.168.0.1,2,{ref_time},0.001,{GOOD_FIELDS_5_TO_11}" ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert!(!result.sync); } #[test] fn test_parse_timesync_result_boundary_correction() { - let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.05,x,x,x,x,x"; + let input = format!( + "C0A80001,192.168.0.1,2,1234567891.123456,0.05,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.correction, 0.05); assert!(result.sync); } @@ -179,68 +270,90 @@ mod tests { #[test] fn test_parse_timesync_result_not_synced_high_correction() { let correction = TIMESYNC_CORRECTION_MAX + 0.01; - let input = &format!( - "C0A80001,192.168.0.1,2,1234567891.123456,{correction},x,x,x,x,x" + let input = format!( + "C0A80001,192.168.0.1,2,1234567891.123456,{correction},{GOOD_FIELDS_5_TO_11}" ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert!(!result.sync); } #[test] fn test_parse_timesync_result_negative_correction() { - let input = "C0A80001,192.168.0.1,2,1234567891.123456,-0.01,x,x,x,x,x"; + let input = format!( + "C0A80001,192.168.0.1,2,1234567891.123456,-0.01,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.correction, -0.01); assert!(result.sync); } #[test] fn test_parse_timesync_result_not_synced_no_peer() { - let input = "0,::/0,2,1234567891.123456,0.001,x,x,x,x,x"; + let input = + format!("0,::/0,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}"); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert!(!result.sync); } #[test] fn test_parse_timesync_result_special_ref_id() { - let input = "0,::/0,2,1234567891.123456,0.001,x,x,x,x,x"; - let result = parse_timesync_result(input).unwrap(); + let input = + format!("0,::/0,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}"); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.ref_id, TIMESYNC_REF_IDS_NO_PEER_SYNC[0]); assert!(!result.sync); - let input = "7f7f0101,::/0,2,1234567891.123456,0.001,x,x,x,x,x"; - let result = parse_timesync_result(input).unwrap(); + let input = format!( + "7f7f0101,::/0,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.ref_id, TIMESYNC_REF_IDS_NO_PEER_SYNC[1]); assert!(!result.sync); - let input = "7f7f0102,192.168.0.1,2,1234567891.123456,0.001,x,x,x,x,x"; - let result = parse_timesync_result(input).unwrap(); + let input = format!( + "7f7f0102,192.168.0.1,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.ref_id, 0x7f7f0102); assert!(result.sync); } #[test] fn test_parse_timesync_result_ipv6_address() { - let input = "C0A80001,2001:db8::1,2,1234567891.123456,0.001,x,x,x,x,x"; + let input = format!( + "C0A80001,2001:db8::1,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.ip_addr, "2001:db8::1".parse::().unwrap()); assert!(result.sync); } #[test] fn test_parse_timesync_result_invalid_ip_fallback() { - let input = "C0A80001,invalid_ip,2,1234567891.123456,0.001,x,x,x,x,x"; + let input = format!( + "C0A80001,invalid_ip,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input).unwrap(); + let result = parse_timesync_result(&input).unwrap(); assert_eq!(result.ip_addr, IpAddr::V6(Ipv6Addr::UNSPECIFIED)); // Still syncs if ref_id is not in TIMESYNC_REF_IDS_NO_PEER_SYNC assert!(result.sync); } + #[test] + fn test_parse_timesync_result_not_synced_high_max_error() { + let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.001,0.001,0.001,x,x,x,0.3,0.1"; + let result = parse_timesync_result(input).unwrap(); + assert_eq!(result.root_delay, 0.3); + assert_eq!(result.root_dispersion, 0.1); + assert_eq!(result.max_error, 0.001 + 0.3 / 2.0 + 0.1); + assert!(!result.sync); + } + #[test] fn test_parse_timesync_result_too_few_fields() { let input = "C0A80001,192.168.0.1,2,1234567891.123456"; @@ -257,9 +370,11 @@ mod tests { #[test] fn test_parse_timesync_result_invalid_ref_id() { - let input = "INVALID,192.168.0.1,2,1234567891.123456,0.001,x,x,x,x,x"; + let input = format!( + "INVALID,192.168.0.1,2,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input); + let result = parse_timesync_result(&input); assert!(result.is_err()); match result.unwrap_err() { TimeSyncError::FailedToParse { reason, .. } => { @@ -271,10 +386,11 @@ mod tests { #[test] fn test_parse_timesync_result_invalid_stratum() { - let input = - "C0A80001,192.168.0.1,invalid,1234567891.123456,0.001,x,x,x,x,x"; + let input = format!( + "C0A80001,192.168.0.1,invalid,1234567891.123456,0.001,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input); + let result = parse_timesync_result(&input); assert!(result.is_err()); match result.unwrap_err() { TimeSyncError::FailedToParse { reason, .. } => { @@ -286,9 +402,11 @@ mod tests { #[test] fn test_parse_timesync_result_invalid_ref_time() { - let input = "C0A80001,192.168.0.1,2,invalid,0.001,x,x,x,x,x"; + let input = format!( + "C0A80001,192.168.0.1,2,invalid,0.001,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input); + let result = parse_timesync_result(&input); assert!(result.is_err()); match result.unwrap_err() { TimeSyncError::FailedToParse { reason, .. } => { @@ -300,10 +418,11 @@ mod tests { #[test] fn test_parse_timesync_result_invalid_correction() { - let input = - "C0A80001,192.168.0.1,2,1234567891.123456,invalid,x,x,x,x,x"; + let input = format!( + "C0A80001,192.168.0.1,2,1234567891.123456,invalid,{GOOD_FIELDS_5_TO_11}" + ); - let result = parse_timesync_result(input); + let result = parse_timesync_result(&input); assert!(result.is_err()); match result.unwrap_err() { TimeSyncError::FailedToParse { reason, .. } => { @@ -312,4 +431,60 @@ mod tests { _ => panic!("Expected FailedToParse error"), } } + + #[test] + fn test_parse_timesync_result_invalid_last_offset() { + let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.001,invalid,0.001,x,x,x,0.01,0.001"; + + let result = parse_timesync_result(input); + assert!(result.is_err()); + match result.unwrap_err() { + TimeSyncError::FailedToParse { reason, .. } => { + assert_eq!(reason, "bad last_offset"); + } + _ => panic!("Expected FailedToParse error"), + } + } + + #[test] + fn test_parse_timesync_result_invalid_rms_offset() { + let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.001,0.001,invalid,x,x,x,0.01,0.001"; + + let result = parse_timesync_result(input); + assert!(result.is_err()); + match result.unwrap_err() { + TimeSyncError::FailedToParse { reason, .. } => { + assert_eq!(reason, "bad rms_offset"); + } + _ => panic!("Expected FailedToParse error"), + } + } + + #[test] + fn test_parse_timesync_result_invalid_root_delay() { + let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.001,0.001,0.001,x,x,x,invalid,0.001"; + + let result = parse_timesync_result(input); + assert!(result.is_err()); + match result.unwrap_err() { + TimeSyncError::FailedToParse { reason, .. } => { + assert_eq!(reason, "bad root_delay"); + } + _ => panic!("Expected FailedToParse error"), + } + } + + #[test] + fn test_parse_timesync_result_invalid_root_dispersion() { + let input = "C0A80001,192.168.0.1,2,1234567891.123456,0.001,0.001,0.001,x,x,x,0.01,invalid"; + + let result = parse_timesync_result(input); + assert!(result.is_err()); + match result.unwrap_err() { + TimeSyncError::FailedToParse { reason, .. } => { + assert_eq!(reason, "bad root_dispersion"); + } + _ => panic!("Expected FailedToParse error"), + } + } } diff --git a/ntp-admin/types/versions/src/add_max_error_and_offset/mod.rs b/ntp-admin/types/versions/src/add_max_error_and_offset/mod.rs new file mode 100644 index 00000000000..2f439cf84c4 --- /dev/null +++ b/ntp-admin/types/versions/src/add_max_error_and_offset/mod.rs @@ -0,0 +1,11 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Version `ADD_MAX_ERROR_AND_OFFSET` of the NTP Admin API. +//! +//! This version extends [`timesync::TimeSync`] with additional fields for +//! clock error estimation: `last_offset`, `rms_offset`, `root_dispersion`, +//! and `max_error` (= root_delay/2 + root_dispersion). + +pub mod timesync; diff --git a/ntp-admin/types/versions/src/add_max_error_and_offset/timesync.rs b/ntp-admin/types/versions/src/add_max_error_and_offset/timesync.rs new file mode 100644 index 00000000000..df13819734a --- /dev/null +++ b/ntp-admin/types/versions/src/add_max_error_and_offset/timesync.rs @@ -0,0 +1,57 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::v1; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::net::IpAddr; + +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct TimeSync { + /// The synchronization state of the sled, true when the system clock + /// and the NTP clock are in sync (to within a small window). + pub sync: bool, + /// The NTP reference ID. + pub ref_id: u32, + /// The NTP reference IP address. + pub ip_addr: IpAddr, + /// The NTP stratum (our upstream's stratum plus one). + pub stratum: u8, + /// The NTP reference time (i.e. what chrony thinks the current time is, + /// not necessarily the current system time). + pub ref_time: f64, + // This could be f32, but there is a problem with progenitor/typify + // where, although the f32 correctly becomes "float" (and not "double") in + // the API spec, that "float" gets converted back to f64 when generating + // the client. + /// The current offset between the NTP clock and system clock, in seconds. + pub correction: f64, + /// The offset of the NTP clock from the reference clock in the last + /// measurement, in seconds. + pub last_offset: f64, + /// The root-mean-square (RMS) of recent NTP clock offsets from the + /// reference clock, in seconds. + pub rms_offset: f64, + /// The total round-trip delay to the reference clock, in seconds. + pub root_delay: f64, + /// The root dispersion: uncertainty in the NTP clock due to accumulated + /// frequency errors since the last reference update, in seconds. + pub root_dispersion: f64, + /// The maximum estimated clock error: root_delay / 2 + root_dispersion, + /// in seconds. + pub max_error: f64, +} + +impl From for v1::timesync::TimeSync { + fn from(s: TimeSync) -> Self { + v1::timesync::TimeSync { + sync: s.sync, + ref_id: s.ref_id, + ip_addr: s.ip_addr, + stratum: s.stratum, + ref_time: s.ref_time, + correction: s.correction, + } + } +} diff --git a/ntp-admin/types/versions/src/latest.rs b/ntp-admin/types/versions/src/latest.rs index fd3284dfc4a..c4d3590c7c9 100644 --- a/ntp-admin/types/versions/src/latest.rs +++ b/ntp-admin/types/versions/src/latest.rs @@ -5,5 +5,5 @@ //! Re-exports of the latest versions of all published types. pub mod timesync { - pub use crate::v1::timesync::TimeSync; + pub use crate::v2::timesync::TimeSync; } diff --git a/ntp-admin/types/versions/src/lib.rs b/ntp-admin/types/versions/src/lib.rs index 5a98caa1fbe..12837015aec 100644 --- a/ntp-admin/types/versions/src/lib.rs +++ b/ntp-admin/types/versions/src/lib.rs @@ -32,3 +32,5 @@ pub mod latest; #[path = "initial/mod.rs"] pub mod v1; +#[path = "add_max_error_and_offset/mod.rs"] +pub mod v2; diff --git a/openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json.gitstub b/openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json.gitstub new file mode 100644 index 00000000000..15b03e90124 --- /dev/null +++ b/openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json.gitstub @@ -0,0 +1 @@ +a582b520e5926304437d844dd68dcb03dd1d04cf:openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json diff --git a/openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json b/openapi/ntp-admin/ntp-admin-2.0.0-f8d00a.json similarity index 69% rename from openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json rename to openapi/ntp-admin/ntp-admin-2.0.0-f8d00a.json index 97fc34581cc..d4e8e437cc0 100644 --- a/openapi/ntp-admin/ntp-admin-1.0.0-aeffc2.json +++ b/openapi/ntp-admin/ntp-admin-2.0.0-f8d00a.json @@ -7,7 +7,7 @@ "url": "https://oxide.computer", "email": "api@oxide.computer" }, - "version": "1.0.0" + "version": "2.0.0" }, "paths": { "/timesync": { @@ -60,7 +60,7 @@ "type": "object", "properties": { "correction": { - "description": "The current offset between the NTP clock and system clock.", + "description": "The current offset between the NTP clock and system clock, in seconds.", "type": "number", "format": "double" }, @@ -69,6 +69,16 @@ "type": "string", "format": "ip" }, + "last_offset": { + "description": "The offset of the NTP clock from the reference clock in the last measurement, in seconds.", + "type": "number", + "format": "double" + }, + "max_error": { + "description": "The maximum estimated clock error: root_delay / 2 + root_dispersion, in seconds.", + "type": "number", + "format": "double" + }, "ref_id": { "description": "The NTP reference ID.", "type": "integer", @@ -80,6 +90,21 @@ "type": "number", "format": "double" }, + "rms_offset": { + "description": "The root-mean-square (RMS) of recent NTP clock offsets from the reference clock, in seconds.", + "type": "number", + "format": "double" + }, + "root_delay": { + "description": "The total round-trip delay to the reference clock, in seconds.", + "type": "number", + "format": "double" + }, + "root_dispersion": { + "description": "The root dispersion: uncertainty in the NTP clock due to accumulated frequency errors since the last reference update, in seconds.", + "type": "number", + "format": "double" + }, "stratum": { "description": "The NTP stratum (our upstream's stratum plus one).", "type": "integer", @@ -94,8 +119,13 @@ "required": [ "correction", "ip_addr", + "last_offset", + "max_error", "ref_id", "ref_time", + "rms_offset", + "root_delay", + "root_dispersion", "stratum", "sync" ] diff --git a/openapi/ntp-admin/ntp-admin-latest.json b/openapi/ntp-admin/ntp-admin-latest.json index 30138bae0ea..a9496948dfe 120000 --- a/openapi/ntp-admin/ntp-admin-latest.json +++ b/openapi/ntp-admin/ntp-admin-latest.json @@ -1 +1 @@ -ntp-admin-1.0.0-aeffc2.json \ No newline at end of file +ntp-admin-2.0.0-f8d00a.json \ No newline at end of file diff --git a/sled-agent/config-reconciler/src/reconciler_task/zones.rs b/sled-agent/config-reconciler/src/reconciler_task/zones.rs index c97387362ec..97b72f539de 100644 --- a/sled-agent/config-reconciler/src/reconciler_task/zones.rs +++ b/sled-agent/config-reconciler/src/reconciler_task/zones.rs @@ -26,7 +26,7 @@ use illumos_utils::zone::Api as _; use illumos_utils::zone::DeleteAddressError; use illumos_utils::zone::OmicronZoneConfigExt; use illumos_utils::zone::Zones; -use ntp_admin_client::types::TimeSync; +use ntp_admin_client::v1::types::TimeSync; use omicron_common::address::Ipv6Subnet; use omicron_uuid_kinds::OmicronZoneUuid; use sled_agent_types::inventory::ConfigReconcilerInventoryResult; @@ -77,7 +77,9 @@ pub enum TimeSyncError { #[error("multiple running NTP zones - this should never happen!")] MultipleRunningNtpZones, #[error("failed to communicate with NTP admin server")] - NtpAdmin(#[from] ntp_admin_client::Error), + NtpAdmin( + #[from] ntp_admin_client::v1::Error, + ), #[error("failed to execute chronyc within NTP zone")] ExecuteChronyc(#[source] RunCommandError), #[error( @@ -532,7 +534,19 @@ impl OmicronZones { match &self.timesync_config { TimeSyncConfig::Normal => { match self.timesync_status_from_ntp_zone(log).await { - Ok(timesync) => TimeSyncStatus::TimeSync(timesync), + Ok(timesync) => { + if !timesync.sync { + // TODO-completeness: Include maximum error and + // dispersion after we update to the new client. + warn!( + log, + "time is not yet synchronized"; + "correction_s" => timesync.correction, + "stratum" => timesync.stratum, + ); + } + TimeSyncStatus::TimeSync(timesync) + } Err(err) => { TimeSyncStatus::FailedToGetSyncStatus(Arc::new(err)) } @@ -575,7 +589,7 @@ impl OmicronZones { return Err(TimeSyncError::MultipleRunningNtpZones); } - let client = ntp_admin_client::Client::new( + let client = ntp_admin_client::v1::Client::new( &format!("http://{ntp_admin_address}"), log.clone(), ); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index fff8ed7247c..e839c980270 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -91,8 +91,8 @@ use nexus_types::deployment::{ Blueprint, BlueprintZoneType, blueprint_zone_type, }; use nexus_types::internal_api::params::ExternalPortDiscovery; -use ntp_admin_client::ClientInfo as _; -use ntp_admin_client::{ +use ntp_admin_client::v1::ClientInfo as _; +use ntp_admin_client::v1::{ Client as NtpAdminClient, Error as NtpAdminError, types::TimeSync, }; use omicron_common::address::BOOTSTRAP_AGENT_HTTP_PORT; @@ -210,7 +210,7 @@ pub enum SetupServiceError { NexusApi(#[from] NexusError), #[error("Error making HTTP request to NTP Admin Server")] - NtpAdminApi(#[from] NtpAdminError), + NtpAdminApi(#[from] NtpAdminError), #[error("Error contacting ddmd")] DdmError(#[from] DdmError), @@ -723,15 +723,7 @@ impl ServiceInner { ) -> Result { info!(client.inner(), "Checking time synchronization"); - let ts = client.timesync().await?.into_inner(); - Ok(TimeSync { - sync: ts.sync, - ref_id: ts.ref_id, - ip_addr: ts.ip_addr, - stratum: ts.stratum, - ref_time: ts.ref_time, - correction: ts.correction, - }) + Ok(client.timesync().await?.into_inner()) } async fn wait_for_timesync(