From e67554aad73b395e3afc508992eb92b6fdc44b43 Mon Sep 17 00:00:00 2001
From: Metbcy <Amirbredy1@gmail.com>
Date: Sun, 17 May 2026 21:24:50 +0000
Subject: [PATCH] feat(bench): expand diff-core benchmark with realistic shapes
 (#29)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #29.

The previous benches/diff.rs covered the axios fixture pair and a
200-component synthetic group with a single workload shape. That's
fine as a smoke test but not enough to catch perf regressions in the
diff hot path — especially the kind that wouldn't show up until a
monorepo-scale SBOM lands in production.

This expands the bench to three sizes (500 / 5_000 / opt-in 20_000)
crossed with three workload shapes per size:

- end_to_end: realistic mix (80% unchanged, 10% version_changed,
  5% license_changed, 5% added/removed). Production hot path.
- self_diff: identical inputs. Isolates group_by_key BTreeMap
  construction + per-key traversal with no change-pair work.
- all_license_changed: every intersecting pair has a different
  license set. Isolates the license-comparison branch in
  diff_one_key.

Throughput is reported per group so reviewers can read a regression
as 'X% per component' across sizes vs an algorithmic shape change.

The 20_000-component group is gated behind a new bench-stress cargo
feature so the default 'cargo bench --bench diff' stays under the 30s
budget. On my laptop (--quick mode) the full set runs in ~1.4s; the
default measurement profile fits comfortably under 30s.

docs/src/architecture.md gains a 'Perf reference: diff core
benchmark' section linking benches/diff.rs and stating the >5%
regression threshold for the production end_to_end / 5000 workload,
mirroring the established 'Binary size budget' style.

Verified:
- cargo bench --bench diff --no-run: builds clean.
- cargo bench --bench diff -- --quick: all 7 benches produce stable
  medians (axios + 3 workloads x 2 sizes).
- cargo clippy --bench diff --all-features --release -- -D warnings: clean.
---
 Cargo.toml               |   7 ++
 benches/diff.rs          | 171 +++++++++++++++++++++++++++++----------
 docs/src/architecture.md |  26 ++++++
 3 files changed, 162 insertions(+), 42 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cc14a29..1de2f2c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -80,6 +80,13 @@ wait-timeout = "0.2"
 criterion = { version = "0.5", default-features = false, features = ["html_reports"] }
 proptest = "1"
 
+[features]
+# Opt-in stress bench group for `cargo bench --bench diff`. Adds a 20_000-
+# component-per-side diff workload that pushes total bench wall time past the
+# default <30s budget. CI and weekend perf checks should leave this off; use
+# it locally when investigating algorithmic changes (#29).
+bench-stress = []
+
 [[bench]]
 name = "parse"
 harness = false
diff --git a/benches/diff.rs b/benches/diff.rs
index cc52422..47f5749 100644
--- a/benches/diff.rs
+++ b/benches/diff.rs
@@ -1,17 +1,32 @@
-//! Benchmarks for the diff core. Measures `diff::diff(before, after)` on
-//! the bundled axios-incident fixture pair (~3 components per side, the
-//! shape of a typical small PR diff) and on a synthetic large pair (200
-//! components per side, simulating a monorepo SBOM).
+//! Benchmarks for the diff core. Closes #29.
 //!
-//! The synthetic large pair is generated in-process to avoid committing a
-//! 200-component fixture file. The shape is deterministic so the bench
-//! numbers are stable across runs.
+//! The diff core (`src/diff/`) is on the critical path for every bomdrift
+//! run, so we want a perf-regression catcher for any change that touches it.
+//! This bench measures `diff::diff(before, after)` across three input shapes
+//! (small / mid / large) and three diff workloads per shape:
+//!
+//! - **end_to_end**: realistic mix of added / removed / version_changed /
+//!   license_changed — the production hot path.
+//! - **self_diff**: identical inputs, exercises the BTreeMap construction and
+//!   per-key traversal without producing any change pairs. Isolates the cost
+//!   of `group_by_key` + iteration.
+//! - **all_license_changed**: every key intersects, every intersecting pair
+//!   has a different license set. Isolates the license-comparison branch in
+//!   `diff_one_key`.
+//!
+//! Input sizes mirror real-world bomdrift workloads:
+//!
+//! - **small**: 500 components per side (typical mid-sized JS app).
+//! - **large**: 5000 components per side (typical large monorepo).
+//! - **stress**: 20_000 components per side (upper-bound stress, gated behind
+//!   the `bench-stress` cargo feature so the default run stays under 30s).
 //!
 //! Run with `cargo bench --bench diff`.
+//! Run with stress group: `cargo bench --bench diff --features bench-stress`.
 
 use std::fs;
 
-use criterion::{Criterion, black_box, criterion_group, criterion_main};
+use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main};
 
 use bomdrift::diff;
 use bomdrift::model::{Component, Ecosystem, Relationship, Sbom, SbomFormat};
@@ -23,19 +38,17 @@ fn load(path: &str) -> Sbom {
     parse::parse_with_format(v, None).expect("must normalize to Sbom")
 }
 
-fn synth_component(i: usize, version_offset: usize) -> Component {
-    let name = format!("pkg-{i:04}");
-    let mut version = format!("1.{}.0", i % 50);
-    if i.is_multiple_of(2) {
-        version = format!("1.{}.0", (i % 50) + version_offset);
-    }
+/// Build one synthetic component. Deterministic — given `i` and `licenses`,
+/// the output is byte-identical across runs so bench medians stay stable.
+fn synth_component(i: usize, version: &str, licenses: Vec<String>) -> Component {
+    let name = format!("pkg-{i:06}");
     let purl = format!("pkg:npm/{name}@{version}");
     Component {
-        name: name.clone(),
-        version,
+        name,
+        version: version.to_string(),
         ecosystem: Ecosystem::Npm,
         purl: Some(purl.clone()),
-        licenses: vec!["MIT".to_string()],
+        licenses,
         supplier: None,
         hashes: Vec::new(),
         relationship: Relationship::Unknown,
@@ -44,8 +57,11 @@ fn synth_component(i: usize, version_offset: usize) -> Component {
     }
 }
 
-fn synth_sbom(n: usize, version_offset: usize) -> Sbom {
-    let components = (0..n).map(|i| synth_component(i, version_offset)).collect();
+/// Build a baseline SBOM of `n` components, all at version 1.0.0, all MIT.
+fn synth_sbom(n: usize) -> Sbom {
+    let components = (0..n)
+        .map(|i| synth_component(i, "1.0.0", vec!["MIT".to_string()]))
+        .collect();
     Sbom {
         format: SbomFormat::CycloneDx,
         serial: None,
@@ -53,39 +69,110 @@ fn synth_sbom(n: usize, version_offset: usize) -> Sbom {
     }
 }
 
-fn bench_diff(c: &mut Criterion) {
-    let mut g = c.benchmark_group("diff");
+/// Build the realistic-mix `after` SBOM for the **end_to_end** workload:
+///
+/// - 80% of keys: same version, same license (no change — the common case).
+/// - 10%: version bumped (`version_changed`).
+/// - 5%: license changed in place (`license_changed`).
+/// - 5%: new keys not present in `before` (`added`); these replace removed
+///   tail keys so the size stays `n`. The removed-side count for the diff
+///   is the symmetric `before` tail.
+fn synth_after_mixed(n: usize) -> Sbom {
+    let components = (0..n)
+        .map(|i| match i % 20 {
+            // 5% version-changed (i % 20 in {0})
+            0 => synth_component(i, "1.0.1", vec!["MIT".to_string()]),
+            // 5% another version-changed slice (10% total)
+            10 => synth_component(i, "2.0.0", vec!["MIT".to_string()]),
+            // 5% license-changed in place
+            1 => synth_component(i, "1.0.0", vec!["Apache-2.0".to_string()]),
+            // 5% new keys (use a disjoint index range so they don't collide)
+            2 => synth_component(n + i, "1.0.0", vec!["MIT".to_string()]),
+            // 80% unchanged
+            _ => synth_component(i, "1.0.0", vec!["MIT".to_string()]),
+        })
+        .collect();
+    Sbom {
+        format: SbomFormat::CycloneDx,
+        serial: None,
+        components,
+    }
+}
 
-    // Real fixture pair (axios incident: 3-4 components per side).
-    let before = load("tests/fixtures/cdx-minimal.json");
-    let after = load("tests/fixtures/cdx-after.json");
-    g.bench_function("axios_fixture_pair", |b| {
+/// Build an `after` SBOM where every key intersects with `before` at the same
+/// version but with a different license — isolates the license-comparison
+/// branch in `diff_one_key`.
+fn synth_after_all_license_changed(n: usize) -> Sbom {
+    let components = (0..n)
+        .map(|i| synth_component(i, "1.0.0", vec!["Apache-2.0".to_string()]))
+        .collect();
+    Sbom {
+        format: SbomFormat::CycloneDx,
+        serial: None,
+        components,
+    }
+}
+
+fn bench_diff(c: &mut Criterion) {
+    // Real fixture pair (axios incident: 3-4 components per side). Kept from
+    // the original bench as a smoke check that the bench harness still wires
+    // through the real parse → diff path, not just synthetic data.
+    let fixture_before = load("tests/fixtures/cdx-minimal.json");
+    let fixture_after = load("tests/fixtures/cdx-after.json");
+    c.bench_function("diff_axios_fixture_pair", |b| {
         b.iter(|| {
-            let cs = diff::diff(black_box(&before), black_box(&after));
+            let cs = diff::diff(black_box(&fixture_before), black_box(&fixture_after));
             black_box(cs);
         });
     });
 
-    // Synthetic monorepo-scale pair (200 components per side, half
-    // version-changed).
-    let synth_before = synth_sbom(200, 0);
-    let synth_after = synth_sbom(200, 1);
-    g.bench_function("synth_monorepo_200", |b| {
-        b.iter(|| {
-            let cs = diff::diff(black_box(&synth_before), black_box(&synth_after));
-            black_box(cs);
+    // Synthetic sizes. `bench-stress` adds the 20_000-component group; the
+    // default set targets the "under 30s total" acceptance criterion.
+    let mut sizes: Vec<usize> = vec![500, 5_000];
+    if cfg!(feature = "bench-stress") {
+        sizes.push(20_000);
+    }
+
+    let mut g = c.benchmark_group("diff_synth");
+    for &n in &sizes {
+        // Throughput is reported in components/sec, summed across both sides
+        // of the diff. Lets reviewers see whether a change is a per-component
+        // hit or a structural one when the numbers cross sizes.
+        g.throughput(Throughput::Elements((n as u64) * 2));
+
+        let before = synth_sbom(n);
+        let after_mixed = synth_after_mixed(n);
+        let after_all_lic = synth_after_all_license_changed(n);
+
+        // end_to_end: realistic mix of all four change kinds.
+        g.bench_with_input(BenchmarkId::new("end_to_end", n), &n, |b, _| {
+            b.iter(|| {
+                let cs = diff::diff(black_box(&before), black_box(&after_mixed));
+                black_box(cs);
+            });
         });
-    });
 
-    // Self-diff (no changes): exercises every key through the BTreeMap
-    // intersection without producing any add/remove/version_changed work.
-    g.bench_function("synth_self_diff_200", |b| {
-        b.iter(|| {
-            let cs = diff::diff(black_box(&synth_before), black_box(&synth_before));
-            black_box(cs);
+        // self_diff: identical inputs. Isolates the BTreeMap construction
+        // (`group_by_key`) + per-key traversal cost with no change pairs
+        // produced. This is the lower bound on diff cost for a given size.
+        g.bench_with_input(BenchmarkId::new("self_diff", n), &n, |b, _| {
+            b.iter(|| {
+                let cs = diff::diff(black_box(&before), black_box(&before));
+                black_box(cs);
+            });
         });
-    });
 
+        // all_license_changed: every intersecting pair has a different
+        // license set. Isolates the license-comparison branch in
+        // `diff_one_key` (the version-intersection scan that routes pairs
+        // to `license_changed`).
+        g.bench_with_input(BenchmarkId::new("all_license_changed", n), &n, |b, _| {
+            b.iter(|| {
+                let cs = diff::diff(black_box(&before), black_box(&after_all_lic));
+                black_box(cs);
+            });
+        });
+    }
     g.finish();
 }
 
diff --git a/docs/src/architecture.md b/docs/src/architecture.md
index fae6596..1d232ad 100644
--- a/docs/src/architecture.md
+++ b/docs/src/architecture.md
@@ -279,3 +279,29 @@ beyond what `ureq` already brings.
   the release until split or explicitly waived in the changelog.
 - Tests-only files (`tests/**`) are exempt — large integration tests
   are easier to read as one file than as a maze of helpers.
+
+## Perf reference: diff core benchmark
+
+The diff core (`src/diff/`) runs on every bomdrift invocation, so it
+has its own criterion bench under [`benches/diff.rs`][diff-bench].
+That bench is the perf reference for any change that touches diff
+internals (`group_by_key`, `diff_one_key`, the multi-version
+pair-by-version logic).
+
+Three workloads per size (500, 5_000, opt-in 20_000 with
+`--features bench-stress`):
+
+- **end_to_end** — realistic mix (80% unchanged, 10% version_changed,
+  5% license_changed, 5% added/removed). Production hot path.
+- **self_diff** — identical inputs. Lower bound on diff cost; isolates
+  `group_by_key` BTreeMap construction + per-key iteration with no
+  change-pair work.
+- **all_license_changed** — every intersecting pair has a different
+  license set. Isolates the license-comparison branch.
+
+Run with `cargo bench --bench diff` (under 30s on a laptop). Record
+medians in the PR description for any change that touches
+`src/diff/`; a >5% regression on the production `end_to_end / 5000`
+workload warrants either a fix or an explicit explanation.
+
+[diff-bench]: https://github.com/Metbcy/bomdrift/blob/main/benches/diff.rs