diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index 13f737e40c0..3c518765503 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -153,5 +153,9 @@ name = "protocol" harness = false [[bench]] -name = "oxql" +name = "oxql_field" +harness = false + +[[bench]] +name = "oxql_measurement" harness = false diff --git a/oximeter/db/benches/README.md b/oximeter/db/benches/README.md index 30a28f5e9aa..93531297880 100644 --- a/oximeter/db/benches/README.md +++ b/oximeter/db/benches/README.md @@ -17,16 +17,16 @@ To restore into a test database. Note: take care not to restore into a real Oxid $ oximeter/db/benches/load_field_tables.sh /tmp/oximeter-field-bench [port] ``` -Then run the benchmark: +Then run the benchmark. `BENCH_METRIC` selects between server-side wall-clock (`latency`) and CPU time (`cpu_time`): ```bash -$ cargo bench --package oximeter-db --bench oxql -- --save-baseline main +$ BENCH_METRIC=latency cargo bench --package oximeter-db --bench oxql_field -- --save-baseline main ``` To evaluate performance changes, run the benchmark using a new baseline: ```bash -$ cargo bench --package oximeter-db --bench oxql -- --save-baseline my-branch +$ BENCH_METRIC=latency cargo bench --package oximeter-db --bench oxql_field -- --save-baseline my-branch ``` Then compare with `critcmp`: @@ -34,3 +34,31 @@ Then compare with `critcmp`: ```bash $ critcmp main my-branch ``` + +## Measurement query + +We have a separate benchmark that measures the performance of combined field and measurement lookup, fetching a set of representative series using `| last 1` to simulate the use case of fetching recent metrics to ship to Prometheus or similar. This benchmark requires backing up and restoring measurement tables. Use a limited time window, since these tables grow to tens of gigabytes or more on real racks. + +To fetch measurement data: + +```bash +$ mkdir -p /tmp/oximeter-measurement-bench +$ START=2026-05-01T00:00:00 +$ END=2026-05-01T01:00:00 +$ oximeter/db/benches/backup_measurement_tables.sh /tmp/oximeter-measurement-bench measurements_cumulativeu64 $START $END [port] +$ oximeter/db/benches/backup_measurement_tables.sh /tmp/oximeter-measurement-bench measurements_f32 $START $END [port] +``` + +To restore into a test database: + +```bash +$ oximeter/db/benches/load_measurement_tables.sh /tmp/oximeter-measurement-bench measurements_cumulativeu64 [port] +$ oximeter/db/benches/load_measurement_tables.sh /tmp/oximeter-measurement-bench measurements_f32 [port] +``` + +Run the benchmark. `OXQL_BENCH_START_TIME` and `OXQL_BENCH_END_TIME` should fall within the window you backed, in `YYYY-MM-DDTHH:MM:SS` format: + +```bash +$ OXQL_BENCH_START_TIME=$START OXQL_BENCH_END_TIME=$END BENCH_METRIC=latency \ + cargo bench --package oximeter-db --bench oxql_measurement -- --save-baseline main +``` diff --git a/oximeter/db/benches/backup_field_tables.sh b/oximeter/db/benches/backup_field_tables.sh index d842e362a87..1e59624de36 100755 --- a/oximeter/db/benches/backup_field_tables.sh +++ b/oximeter/db/benches/backup_field_tables.sh @@ -32,7 +32,7 @@ for table in timeseries_schema fields_{bool,i8,i16,i32,i64,ipaddr,string,u8,u16, fi output="$OUTPUT_DIR/${table}.native.gz" echo "Backing up $DATABASE.$table ($count rows) to $output" - clickhouse client --port "$PORT" \ + clickhouse client --port "$PORT" --compression=1 \ --query "SELECT * FROM $DATABASE.$table FORMAT Native" \ | gzip > "$output" done diff --git a/oximeter/db/benches/backup_measurement_tables.sh b/oximeter/db/benches/backup_measurement_tables.sh new file mode 100755 index 00000000000..b1f303fb77a --- /dev/null +++ b/oximeter/db/benches/backup_measurement_tables.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# Dump a partial ClickHouse measurement table (time-windowed slice) to disk in +# native format. Run against a test rack with realistic oximeter data. Used to +# capture test data for benchmarking. +# +# Usage: ./backup_measurement_tables.sh [port] + +set -euo pipefail + +if [[ $# -lt 4 ]]; then + echo "Usage: $0
[port]" >&2 + exit 1 +fi + +OUTPUT_DIR="$1" +TABLE="$2" +WINDOW_START="$3" +WINDOW_END="$4" +PORT="${5:-9000}" +DATABASE="oximeter" + +mkdir -p "$OUTPUT_DIR" + +# Back up a single measurement table. These tables can be very large, so we limit to the specified time range, and only operate on one measurement table at a time. + +# Note: Use SELECT rather than RESTORE because we may not have access to the +# remote ClickHouse's local disk, or have backups enabled at all. +count=$(clickhouse client --port "$PORT" \ + --query "SELECT count() FROM $DATABASE.$TABLE WHERE timestamp >= '$WINDOW_START' AND timestamp < '$WINDOW_END'") +if [[ "$count" -eq 0 ]]; then + echo "No rows in $DATABASE.$TABLE for window; nothing to back up" + exit 0 +fi +output="$OUTPUT_DIR/${TABLE}.native.gz" +echo "Backing up $DATABASE.$TABLE ($count rows) to $output" +clickhouse client --port "$PORT" --compression=1 \ + --query "SELECT * FROM $DATABASE.$TABLE WHERE timestamp >= '$WINDOW_START' AND timestamp < '$WINDOW_END' FORMAT Native" \ + | gzip > "$output" diff --git a/oximeter/db/benches/common/mod.rs b/oximeter/db/benches/common/mod.rs new file mode 100644 index 00000000000..5537a6df3e3 --- /dev/null +++ b/oximeter/db/benches/common/mod.rs @@ -0,0 +1,148 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared helpers for OxQL benchmarks. + +// Copyright 2026 Oxide Computer Company + +use criterion::measurement::WallTime; +use criterion::{BenchmarkGroup, BenchmarkId}; +use oximeter_db::Client; +use oximeter_db::oxql::query::QueryAuthzScope; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::sync::Arc; +use std::time::Duration; + +pub const DEFAULT_CLICKHOUSE_PORT: u16 = 9000; + +/// The metric to benchmark. +/// +/// Set via BENCH_METRIC env var. +pub enum BenchMetric { + /// Server-side query latency. + Latency, + /// Total cpu time (user and system). + CpuTime, +} + +pub fn bench_metric() -> BenchMetric { + match std::env::var("BENCH_METRIC").as_deref() { + Ok("cpu_time") => BenchMetric::CpuTime, + Ok("latency") => BenchMetric::Latency, + _ => panic!("BENCH_METRIC must be 'cpu_time' or 'latency'"), + } +} + +pub fn get_clickhouse_addr() -> IpAddr { + std::env::var("CLICKHOUSE_ADDRESS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or_else(|| IpAddr::from([127, 0, 0, 1])) +} + +pub fn get_clickhouse_port() -> u16 { + std::env::var("CLICKHOUSE_PORT") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_CLICKHOUSE_PORT) +} + +pub fn get_socket_addr() -> SocketAddr { + SocketAddr::new(get_clickhouse_addr(), get_clickhouse_port()) +} + +pub fn get_client(rt: &tokio::runtime::Runtime) -> Arc { + let addr = get_socket_addr(); + let log = slog::Logger::root(slog::Discard, slog::o!()); + + rt.block_on(async { + let client = Arc::new(Client::new(addr, &log)); + client.ping().await.unwrap(); + client + }) +} + +/// Benchmark a single OxQL query using criterion, measuring either server-side +/// latency or cpu time per [`BenchMetric`]. +pub fn bench_oxql_query( + group: &mut BenchmarkGroup<'_, WallTime>, + rt: &tokio::runtime::Runtime, + client: Arc, + bench_name: &str, + bench_id: String, + query: String, + metric: &BenchMetric, +) { + // Run the query once without recording performance to warm caches. + rt.block_on(client.oxql_query(&query, QueryAuthzScope::Fleet)).unwrap(); + + group.bench_function(BenchmarkId::new(bench_name, &bench_id), |bench| { + match metric { + BenchMetric::CpuTime => { + bench.to_async(rt).iter_custom(|iters| { + let client = client.clone(); + let query = query.clone(); + async move { + let mut total = Duration::ZERO; + for _ in 0..iters { + let result = client + .oxql_query(&query, QueryAuthzScope::Fleet) + .await + .unwrap(); + let cpu_us: i64 = result + .query_summaries + .iter() + .map(|s| { + // Profile events are occasionally and + // inexplicably empty; default to 0 + // for rare missing events. + s.profile_summary + .get("UserTimeMicroseconds") + .copied() + .unwrap_or(0) + + s.profile_summary + .get("SystemTimeMicroseconds") + .copied() + .unwrap_or(0) + }) + .sum(); + total += + Duration::from_micros(cpu_us.max(0) as u64); + } + total + } + }); + } + BenchMetric::Latency => { + bench.to_async(rt).iter_custom(|iters| { + let client = client.clone(); + let query = query.clone(); + async move { + let mut total = Duration::ZERO; + for _ in 0..iters { + let result = client + .oxql_query(&query, QueryAuthzScope::Fleet) + .await + .unwrap(); + let real_us: i64 = result + .query_summaries + .iter() + .map(|s| { + s.profile_summary + .get("RealTimeMicroseconds") + .copied() + .unwrap_or(0) + }) + .sum(); + total += + Duration::from_micros(real_us.max(0) as u64); + } + total + } + }); + } + } + }); +} diff --git a/oximeter/db/benches/load_measurement_tables.sh b/oximeter/db/benches/load_measurement_tables.sh new file mode 100755 index 00000000000..547a536fb4c --- /dev/null +++ b/oximeter/db/benches/load_measurement_tables.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# Load a measurement table backup into a ClickHouse for benchmarking. +# Crashes if the destination table already contains data. +# +# Usage: ./load_measurement_tables.sh
[port] + +set -euo pipefail + +if [[ $# -lt 2 ]]; then + echo "Usage: $0
[port]" >&2 + exit 1 +fi + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SCHEMA_DIR="$SCRIPT_DIR/../schema/single-node" + +INPUT_DIR="$1" +TABLE="$2" +PORT="${3:-9000}" + +DATABASE="oximeter" + +# Initialize schema. db-init.sql is fully IF NOT EXISTS, so this is a no-op +# if the schema is already in place. +echo "Initializing database schema..." +clickhouse client --port "$PORT" --multiquery < "$SCHEMA_DIR/db-init.sql" + +# Error if destination table already has data. +count=$(clickhouse client --port "$PORT" \ + --query "SELECT count() FROM $DATABASE.$TABLE") +if [[ "$count" -gt 0 ]]; then + echo "Error: $DATABASE.$TABLE already contains data ($count rows)" + echo "Refusing to load into a non-empty table." + exit 1 +fi + +input="$INPUT_DIR/${TABLE}.native.gz" +if [[ ! -f "$input" ]]; then + echo "No backup for table $TABLE in $INPUT_DIR" + exit 1 +fi + +# Note: Use INSERT rather than RESTORE because we may not have access to the +# local ClickHouse's disk, or have backups enabled at all. +echo "Loading $DATABASE.$TABLE from $input" +gunzip -c "$input" | clickhouse client --port "$PORT" \ + --query "INSERT INTO $DATABASE.$TABLE FORMAT Native" diff --git a/oximeter/db/benches/oxql.rs b/oximeter/db/benches/oxql_field.rs similarity index 52% rename from oximeter/db/benches/oxql.rs rename to oximeter/db/benches/oxql_field.rs index faba4864247..778c31f8029 100644 --- a/oximeter/db/benches/oxql.rs +++ b/oximeter/db/benches/oxql_field.rs @@ -8,39 +8,15 @@ // Copyright 2026 Oxide Computer Company -use criterion::BenchmarkId; +mod common; + +use common::{bench_metric, bench_oxql_query, get_client, get_socket_addr}; use criterion::Criterion; use criterion::{criterion_group, criterion_main}; -use oximeter_db::Client; use oximeter_db::native::Connection; -use oximeter_db::oxql::query::QueryAuthzScope; use rand::seq::SliceRandom; -use std::net::IpAddr; -use std::net::SocketAddr; -use std::sync::Arc; -use std::time::Duration; use uuid::Uuid; -const DEFAULT_CLICKHOUSE_PORT: u16 = 9000; - -/// The metric to benchmark. -/// -/// Set via BENCH_METRIC env var. -enum BenchMetric { - /// Wall clock latency. - Latency, - /// Total cpu time (user and system). - CpuTime, -} - -fn bench_metric() -> BenchMetric { - match std::env::var("BENCH_METRIC").as_deref() { - Ok("cpu_time") => BenchMetric::CpuTime, - Ok("latency") => BenchMetric::Latency, - _ => panic!("BENCH_METRIC must be 'cpu_time' or 'latency'"), - } -} - /// Timeseries to benchmark, spanning a range of field table counts. const TIMESERIES_NAMES: &[&str] = &[ "crucible_upstairs:flush", @@ -57,35 +33,6 @@ struct TimeseriesInfo { cardinality: u64, } -fn get_clickhouse_addr() -> IpAddr { - std::env::var("CLICKHOUSE_ADDRESS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or_else(|| IpAddr::from([127, 0, 0, 1])) -} - -fn get_clickhouse_port() -> u16 { - std::env::var("CLICKHOUSE_PORT") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(DEFAULT_CLICKHOUSE_PORT) -} - -fn get_socket_addr() -> SocketAddr { - SocketAddr::new(get_clickhouse_addr(), get_clickhouse_port()) -} - -fn get_client(rt: &tokio::runtime::Runtime) -> Arc { - let addr = get_socket_addr(); - let log = slog::Logger::root(slog::Discard, slog::o!()); - - rt.block_on(async { - let client = Arc::new(Client::new(addr, &log)); - client.ping().await.unwrap(); - client - }) -} - /// Fetch field table count and cardinality for each timeseries. fn get_timeseries_info(rt: &tokio::runtime::Runtime) -> Vec { let names_list = TIMESERIES_NAMES @@ -170,8 +117,6 @@ fn oxql_field_lookup(c: &mut Criterion) { let query = format!("get {} | filter timestamp > @2200-01-01", info.name); - rt.block_on(client.oxql_query(&query, QueryAuthzScope::Fleet)).unwrap(); - let bench_id = format!( "{} tables/{:0width$} keys: {}", info.field_tables, @@ -180,57 +125,14 @@ fn oxql_field_lookup(c: &mut Criterion) { width = cardinality_width ); - group.bench_function( - BenchmarkId::new("field_lookup", &bench_id), - |bench| match metric { - BenchMetric::CpuTime => { - bench.to_async(&rt).iter_custom(|iters| { - let client = client.clone(); - let query = query.clone(); - async move { - let mut total = Duration::ZERO; - for _ in 0..iters { - let result = client - .oxql_query(&query, QueryAuthzScope::Fleet) - .await - .unwrap(); - let cpu_us: i64 = result - .query_summaries - .iter() - .map(|s| { - // Profile events are occasionally and - // inexplicably empty; default to 0 - // for rare missing events. - s.profile_summary - .get("UserTimeMicroseconds") - .copied() - .unwrap_or(0) - + s.profile_summary - .get("SystemTimeMicroseconds") - .copied() - .unwrap_or(0) - }) - .sum(); - total += - Duration::from_micros(cpu_us.max(0) as u64); - } - total - } - }); - } - BenchMetric::Latency => { - bench.to_async(&rt).iter(|| { - let client = client.clone(); - let query = query.clone(); - async move { - client - .oxql_query(&query, QueryAuthzScope::Fleet) - .await - .unwrap() - } - }); - } - }, + bench_oxql_query( + &mut group, + &rt, + client.clone(), + "field_lookup", + bench_id, + query, + &metric, ); } diff --git a/oximeter/db/benches/oxql_measurement.rs b/oximeter/db/benches/oxql_measurement.rs new file mode 100644 index 00000000000..4d8566aef03 --- /dev/null +++ b/oximeter/db/benches/oxql_measurement.rs @@ -0,0 +1,97 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Benchmark for OxQL measurement query performance. +//! +//! Tests queries that retrieve measurement data within specified time windows. + +// Copyright 2026 Oxide Computer Company + +mod common; + +use common::{bench_metric, bench_oxql_query, get_client}; +use criterion::Criterion; +use criterion::{criterion_group, criterion_main}; + +// Benchmark measurement queries. Unlike the field benchmark, which elides +// measurement query performance by filtering on a far-future time, this +// benchmark uses a user-provided time range that contains measurements. We +// query a range of timeseries of different types, and pass them through +// `last 1` to simulate the common workload of fetching recent metrics for +// export to Prometheus or similar. +fn oxql_measurement_query(c: &mut Criterion) { + let metric = bench_metric(); + + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let client = get_client(&rt); + let mut group = c.benchmark_group("oxql_measurement"); + + // Get the measurement time range from environment variables + let start_time = std::env::var("OXQL_BENCH_START_TIME") + .expect("OXQL_BENCH_START_TIME must be set as YYYY-MM-DDTHH:MM:SS"); + let end_time = std::env::var("OXQL_BENCH_END_TIME") + .expect("OXQL_BENCH_END_TIME must be set as YYYY-MM-DDTHH:MM:SS"); + + // Choose timeseries of different types and observed cardinality. As of + // this writing, virtual_machine:vcpu_usage and hardware_component:temperature + // are relatively high-cardinality, and zone:cpu_nsec and + // hardware_component:fan_speed are lower cardinality. + const MEASUREMENT_TIMESERIES: &[(&str, &str)] = &[ + // CumulativeU64 + ("virtual_machine:vcpu_usage", "cumulativeu64"), + ("zone:cpu_nsec", "cumulativeu64"), + // F32 + ("hardware_component:temperature", "f32"), + ("hardware_component:fan_speed", "f32"), + ]; + + let start_dt = + chrono::NaiveDateTime::parse_from_str(&start_time, "%Y-%m-%dT%H:%M:%S") + .expect("Invalid start time format"); + let end_dt = + chrono::NaiveDateTime::parse_from_str(&end_time, "%Y-%m-%dT%H:%M:%S") + .expect("Invalid end time format"); + let duration = end_dt - start_dt; + let duration_str = if duration.num_minutes() < 60 { + format!("{}m", duration.num_minutes()) + } else { + format!("{}h", duration.num_hours()) + }; + + for (timeseries_name, measurement_type) in MEASUREMENT_TIMESERIES { + let query = format!( + "get {} | filter timestamp >= @{} && timestamp < @{} | last 1", + timeseries_name, start_time, end_time + ); + + let bench_id = format!( + "{}_last_1/{}: {}", + duration_str, measurement_type, timeseries_name + ); + + bench_oxql_query( + &mut group, + &rt, + client.clone(), + "query", + bench_id, + query, + &metric, + ); + } + + group.finish(); +} + +criterion_group!( + name = benches; + config = Criterion::default().sample_size(50).noise_threshold(0.05); + targets = oxql_measurement_query +); + +criterion_main!(benches);