From e67554aad73b395e3afc508992eb92b6fdc44b43 Mon Sep 17 00:00:00 2001 From: Metbcy Date: Sun, 17 May 2026 21:24:50 +0000 Subject: [PATCH] feat(bench): expand diff-core benchmark with realistic shapes (#29) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #29. The previous benches/diff.rs covered the axios fixture pair and a 200-component synthetic group with a single workload shape. That's fine as a smoke test but not enough to catch perf regressions in the diff hot path — especially the kind that wouldn't show up until a monorepo-scale SBOM lands in production. This expands the bench to three sizes (500 / 5_000 / opt-in 20_000) crossed with three workload shapes per size: - end_to_end: realistic mix (80% unchanged, 10% version_changed, 5% license_changed, 5% added/removed). Production hot path. - self_diff: identical inputs. Isolates group_by_key BTreeMap construction + per-key traversal with no change-pair work. - all_license_changed: every intersecting pair has a different license set. Isolates the license-comparison branch in diff_one_key. Throughput is reported per group so reviewers can read a regression as 'X% per component' across sizes vs an algorithmic shape change. The 20_000-component group is gated behind a new bench-stress cargo feature so the default 'cargo bench --bench diff' stays under the 30s budget. On my laptop (--quick mode) the full set runs in ~1.4s; the default measurement profile fits comfortably under 30s. docs/src/architecture.md gains a 'Perf reference: diff core benchmark' section linking benches/diff.rs and stating the >5% regression threshold for the production end_to_end / 5000 workload, mirroring the established 'Binary size budget' style. Verified: - cargo bench --bench diff --no-run: builds clean. - cargo bench --bench diff -- --quick: all 7 benches produce stable medians (axios + 3 workloads x 2 sizes). - cargo clippy --bench diff --all-features --release -- -D warnings: clean. --- Cargo.toml | 7 ++ benches/diff.rs | 171 +++++++++++++++++++++++++++++---------- docs/src/architecture.md | 26 ++++++ 3 files changed, 162 insertions(+), 42 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cc14a29..1de2f2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,13 @@ wait-timeout = "0.2" criterion = { version = "0.5", default-features = false, features = ["html_reports"] } proptest = "1" +[features] +# Opt-in stress bench group for `cargo bench --bench diff`. Adds a 20_000- +# component-per-side diff workload that pushes total bench wall time past the +# default <30s budget. CI and weekend perf checks should leave this off; use +# it locally when investigating algorithmic changes (#29). +bench-stress = [] + [[bench]] name = "parse" harness = false diff --git a/benches/diff.rs b/benches/diff.rs index cc52422..47f5749 100644 --- a/benches/diff.rs +++ b/benches/diff.rs @@ -1,17 +1,32 @@ -//! Benchmarks for the diff core. Measures `diff::diff(before, after)` on -//! the bundled axios-incident fixture pair (~3 components per side, the -//! shape of a typical small PR diff) and on a synthetic large pair (200 -//! components per side, simulating a monorepo SBOM). +//! Benchmarks for the diff core. Closes #29. //! -//! The synthetic large pair is generated in-process to avoid committing a -//! 200-component fixture file. The shape is deterministic so the bench -//! numbers are stable across runs. +//! The diff core (`src/diff/`) is on the critical path for every bomdrift +//! run, so we want a perf-regression catcher for any change that touches it. +//! This bench measures `diff::diff(before, after)` across three input shapes +//! (small / mid / large) and three diff workloads per shape: +//! +//! - **end_to_end**: realistic mix of added / removed / version_changed / +//! license_changed — the production hot path. +//! - **self_diff**: identical inputs, exercises the BTreeMap construction and +//! per-key traversal without producing any change pairs. Isolates the cost +//! of `group_by_key` + iteration. +//! - **all_license_changed**: every key intersects, every intersecting pair +//! has a different license set. Isolates the license-comparison branch in +//! `diff_one_key`. +//! +//! Input sizes mirror real-world bomdrift workloads: +//! +//! - **small**: 500 components per side (typical mid-sized JS app). +//! - **large**: 5000 components per side (typical large monorepo). +//! - **stress**: 20_000 components per side (upper-bound stress, gated behind +//! the `bench-stress` cargo feature so the default run stays under 30s). //! //! Run with `cargo bench --bench diff`. +//! Run with stress group: `cargo bench --bench diff --features bench-stress`. use std::fs; -use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; use bomdrift::diff; use bomdrift::model::{Component, Ecosystem, Relationship, Sbom, SbomFormat}; @@ -23,19 +38,17 @@ fn load(path: &str) -> Sbom { parse::parse_with_format(v, None).expect("must normalize to Sbom") } -fn synth_component(i: usize, version_offset: usize) -> Component { - let name = format!("pkg-{i:04}"); - let mut version = format!("1.{}.0", i % 50); - if i.is_multiple_of(2) { - version = format!("1.{}.0", (i % 50) + version_offset); - } +/// Build one synthetic component. Deterministic — given `i` and `licenses`, +/// the output is byte-identical across runs so bench medians stay stable. +fn synth_component(i: usize, version: &str, licenses: Vec) -> Component { + let name = format!("pkg-{i:06}"); let purl = format!("pkg:npm/{name}@{version}"); Component { - name: name.clone(), - version, + name, + version: version.to_string(), ecosystem: Ecosystem::Npm, purl: Some(purl.clone()), - licenses: vec!["MIT".to_string()], + licenses, supplier: None, hashes: Vec::new(), relationship: Relationship::Unknown, @@ -44,8 +57,11 @@ fn synth_component(i: usize, version_offset: usize) -> Component { } } -fn synth_sbom(n: usize, version_offset: usize) -> Sbom { - let components = (0..n).map(|i| synth_component(i, version_offset)).collect(); +/// Build a baseline SBOM of `n` components, all at version 1.0.0, all MIT. +fn synth_sbom(n: usize) -> Sbom { + let components = (0..n) + .map(|i| synth_component(i, "1.0.0", vec!["MIT".to_string()])) + .collect(); Sbom { format: SbomFormat::CycloneDx, serial: None, @@ -53,39 +69,110 @@ fn synth_sbom(n: usize, version_offset: usize) -> Sbom { } } -fn bench_diff(c: &mut Criterion) { - let mut g = c.benchmark_group("diff"); +/// Build the realistic-mix `after` SBOM for the **end_to_end** workload: +/// +/// - 80% of keys: same version, same license (no change — the common case). +/// - 10%: version bumped (`version_changed`). +/// - 5%: license changed in place (`license_changed`). +/// - 5%: new keys not present in `before` (`added`); these replace removed +/// tail keys so the size stays `n`. The removed-side count for the diff +/// is the symmetric `before` tail. +fn synth_after_mixed(n: usize) -> Sbom { + let components = (0..n) + .map(|i| match i % 20 { + // 5% version-changed (i % 20 in {0}) + 0 => synth_component(i, "1.0.1", vec!["MIT".to_string()]), + // 5% another version-changed slice (10% total) + 10 => synth_component(i, "2.0.0", vec!["MIT".to_string()]), + // 5% license-changed in place + 1 => synth_component(i, "1.0.0", vec!["Apache-2.0".to_string()]), + // 5% new keys (use a disjoint index range so they don't collide) + 2 => synth_component(n + i, "1.0.0", vec!["MIT".to_string()]), + // 80% unchanged + _ => synth_component(i, "1.0.0", vec!["MIT".to_string()]), + }) + .collect(); + Sbom { + format: SbomFormat::CycloneDx, + serial: None, + components, + } +} - // Real fixture pair (axios incident: 3-4 components per side). - let before = load("tests/fixtures/cdx-minimal.json"); - let after = load("tests/fixtures/cdx-after.json"); - g.bench_function("axios_fixture_pair", |b| { +/// Build an `after` SBOM where every key intersects with `before` at the same +/// version but with a different license — isolates the license-comparison +/// branch in `diff_one_key`. +fn synth_after_all_license_changed(n: usize) -> Sbom { + let components = (0..n) + .map(|i| synth_component(i, "1.0.0", vec!["Apache-2.0".to_string()])) + .collect(); + Sbom { + format: SbomFormat::CycloneDx, + serial: None, + components, + } +} + +fn bench_diff(c: &mut Criterion) { + // Real fixture pair (axios incident: 3-4 components per side). Kept from + // the original bench as a smoke check that the bench harness still wires + // through the real parse → diff path, not just synthetic data. + let fixture_before = load("tests/fixtures/cdx-minimal.json"); + let fixture_after = load("tests/fixtures/cdx-after.json"); + c.bench_function("diff_axios_fixture_pair", |b| { b.iter(|| { - let cs = diff::diff(black_box(&before), black_box(&after)); + let cs = diff::diff(black_box(&fixture_before), black_box(&fixture_after)); black_box(cs); }); }); - // Synthetic monorepo-scale pair (200 components per side, half - // version-changed). - let synth_before = synth_sbom(200, 0); - let synth_after = synth_sbom(200, 1); - g.bench_function("synth_monorepo_200", |b| { - b.iter(|| { - let cs = diff::diff(black_box(&synth_before), black_box(&synth_after)); - black_box(cs); + // Synthetic sizes. `bench-stress` adds the 20_000-component group; the + // default set targets the "under 30s total" acceptance criterion. + let mut sizes: Vec = vec![500, 5_000]; + if cfg!(feature = "bench-stress") { + sizes.push(20_000); + } + + let mut g = c.benchmark_group("diff_synth"); + for &n in &sizes { + // Throughput is reported in components/sec, summed across both sides + // of the diff. Lets reviewers see whether a change is a per-component + // hit or a structural one when the numbers cross sizes. + g.throughput(Throughput::Elements((n as u64) * 2)); + + let before = synth_sbom(n); + let after_mixed = synth_after_mixed(n); + let after_all_lic = synth_after_all_license_changed(n); + + // end_to_end: realistic mix of all four change kinds. + g.bench_with_input(BenchmarkId::new("end_to_end", n), &n, |b, _| { + b.iter(|| { + let cs = diff::diff(black_box(&before), black_box(&after_mixed)); + black_box(cs); + }); }); - }); - // Self-diff (no changes): exercises every key through the BTreeMap - // intersection without producing any add/remove/version_changed work. - g.bench_function("synth_self_diff_200", |b| { - b.iter(|| { - let cs = diff::diff(black_box(&synth_before), black_box(&synth_before)); - black_box(cs); + // self_diff: identical inputs. Isolates the BTreeMap construction + // (`group_by_key`) + per-key traversal cost with no change pairs + // produced. This is the lower bound on diff cost for a given size. + g.bench_with_input(BenchmarkId::new("self_diff", n), &n, |b, _| { + b.iter(|| { + let cs = diff::diff(black_box(&before), black_box(&before)); + black_box(cs); + }); }); - }); + // all_license_changed: every intersecting pair has a different + // license set. Isolates the license-comparison branch in + // `diff_one_key` (the version-intersection scan that routes pairs + // to `license_changed`). + g.bench_with_input(BenchmarkId::new("all_license_changed", n), &n, |b, _| { + b.iter(|| { + let cs = diff::diff(black_box(&before), black_box(&after_all_lic)); + black_box(cs); + }); + }); + } g.finish(); } diff --git a/docs/src/architecture.md b/docs/src/architecture.md index fae6596..1d232ad 100644 --- a/docs/src/architecture.md +++ b/docs/src/architecture.md @@ -279,3 +279,29 @@ beyond what `ureq` already brings. the release until split or explicitly waived in the changelog. - Tests-only files (`tests/**`) are exempt — large integration tests are easier to read as one file than as a maze of helpers. + +## Perf reference: diff core benchmark + +The diff core (`src/diff/`) runs on every bomdrift invocation, so it +has its own criterion bench under [`benches/diff.rs`][diff-bench]. +That bench is the perf reference for any change that touches diff +internals (`group_by_key`, `diff_one_key`, the multi-version +pair-by-version logic). + +Three workloads per size (500, 5_000, opt-in 20_000 with +`--features bench-stress`): + +- **end_to_end** — realistic mix (80% unchanged, 10% version_changed, + 5% license_changed, 5% added/removed). Production hot path. +- **self_diff** — identical inputs. Lower bound on diff cost; isolates + `group_by_key` BTreeMap construction + per-key iteration with no + change-pair work. +- **all_license_changed** — every intersecting pair has a different + license set. Isolates the license-comparison branch. + +Run with `cargo bench --bench diff` (under 30s on a laptop). Record +medians in the PR description for any change that touches +`src/diff/`; a >5% regression on the production `end_to_end / 5000` +workload warrants either a fix or an explicit explanation. + +[diff-bench]: https://github.com/Metbcy/bomdrift/blob/main/benches/diff.rs