diff --git a/.cargo/audit.toml b/.cargo/audit.toml
new file mode 100644
index 0000000..f8ef703
--- /dev/null
+++ b/.cargo/audit.toml
@@ -0,0 +1,29 @@
+# cargo-audit configuration.
+#
+# CI runs `cargo audit --deny warnings` so any *new* advisory (vulnerability,
+# unsoundness, or unmaintained crate) fails the build. The entries below are
+# the explicitly-accepted exceptions: each one must have a tracking note and
+# should be revisited on every cargo bump.
+#
+# When adding an entry: include the advisory ID, a one-line reason, and the
+# date it was accepted. When removing: confirm the upstream fix shipped and
+# `cargo audit` returns clean without the ignore.
+
+[advisories]
+ignore = [
+ # RUSTSEC-2024-0436 — `paste` 1.0.15 is unmaintained.
+ # Pulled in transitively via foyer 0.22 -> foyer-memory -> paste. No direct
+ # use; only its proc-macro is invoked at build time. No exposed API surface
+ # at runtime. Re-evaluate when foyer drops paste or migrates to pastey.
+ # Accepted: 2026-05.
+ "RUSTSEC-2024-0436",
+
+ # RUSTSEC-2026-0097 — `rand` is unsound when a custom global logger calls
+ # `rand::rng()` from inside its log handler. We affect three rand versions
+ # (0.8.5 via stretto/caches, 0.9.2 via foyer/twox-hash, 0.10.0 direct).
+ # This benchmark does not install a custom `log` handler and does not call
+ # `rand::rng()` from any logger path, so the unsoundness condition cannot
+ # be triggered here. Re-evaluate when transitive deps move to a fixed rand.
+ # Accepted: 2026-05.
+ "RUSTSEC-2026-0097",
+]
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..40d01a3
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,27 @@
+# CODEOWNERS for rust-cache-benchmarks
+#
+# GitHub auto-assigns the listed teams as reviewers on every PR (including
+# Dependabot bumps). Order matters: the *last* matching pattern wins.
+#
+# All paths below are owned by @Shopify/rust-cache-benchmarks-maintainers.
+# If that team is renamed, retired, or split, update every line in this file
+# in the same commit — GitHub silently no-ops entries that point at unknown
+# teams, which would leave PRs without a required reviewer.
+
+# Default owner for everything in the repo.
+* @Shopify/rust-cache-benchmarks-maintainers
+
+# Benchmark methodology and statistical reporting are the highest-trust
+# surface in this repo. Any change here affects the public comparison and
+# should get an extra reviewer who has context on the published numbers.
+/src/main.rs @Shopify/rust-cache-benchmarks-maintainers
+/src/caches/mod.rs @Shopify/rust-cache-benchmarks-maintainers
+/README.md @Shopify/rust-cache-benchmarks-maintainers
+/CONTRIBUTING.md @Shopify/rust-cache-benchmarks-maintainers
+
+# Security-sensitive files: anyone changing the audit allowlist or the CI
+# pipeline should get a security-aware review.
+/SECURITY.md @Shopify/rust-cache-benchmarks-maintainers
+/.cargo/audit.toml @Shopify/rust-cache-benchmarks-maintainers
+/.github/workflows/ @Shopify/rust-cache-benchmarks-maintainers
+/.github/CODEOWNERS @Shopify/rust-cache-benchmarks-maintainers
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a58dc50..2098222 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,28 +15,44 @@ env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
+# All third-party actions are pinned to a 40-char commit SHA. The trailing
+# comment is the human-readable version Dependabot uses to keep the SHA up to
+# date (see .github/dependabot.yml — ecosystem `github-actions`). Never
+# downgrade these to floating tags or branches such as `@v4` or `@master`: a
+# compromise of the upstream action repo would otherwise execute attacker code
+# inside this workflow with the GITHUB_TOKEN scope.
jobs:
check:
- name: Check
- runs-on: ubuntu-latest
+ name: Check (${{ matrix.os }})
+ runs-on: ${{ matrix.os }}
+ strategy:
+ # Don't let a flake on one OS hide a real bug on the other.
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest]
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Install Rust toolchain
- uses: dtolnay/rust-toolchain@master
+ uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 # master @ 2026-05
with:
toolchain: "1.94"
components: rustfmt, clippy
- name: Cache cargo registry
- uses: actions/cache@v4
+ uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: |
~/.cargo/registry
~/.cargo/git
target
- key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- restore-keys: ${{ runner.os }}-cargo-
+ # Keys are scoped by OS, toolchain, and Cargo.lock. The fallback
+ # restore-key is also scoped by toolchain so a cache from a
+ # different Rust version is never partially mounted on top of
+ # this build (which has historically caused mysterious link
+ # errors on macOS runners).
+ key: ${{ runner.os }}-rust-1.94-cargo-${{ hashFiles('**/Cargo.lock') }}
+ restore-keys: ${{ runner.os }}-rust-1.94-cargo-
- name: Check formatting
run: cargo fmt --check
@@ -57,18 +73,20 @@ jobs:
name: Audit
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Install Rust toolchain
- uses: dtolnay/rust-toolchain@master
+ uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 # master @ 2026-05
with:
toolchain: "1.94"
- name: Cache cargo-audit binary
- uses: actions/cache@v4
+ uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: ~/.cargo/bin/cargo-audit
- key: ${{ runner.os }}-cargo-audit-bin-v1
+ # Cache key includes toolchain so audit binaries built against
+ # an older Rust never silently land in a newer pipeline.
+ key: ${{ runner.os }}-rust-1.94-cargo-audit-bin-v1
- name: Install cargo-audit
run: |
@@ -77,6 +95,8 @@ jobs:
fi
- name: Audit dependencies
- # cargo audit fails on vulnerabilities by default. Warnings (unmaintained
- # crates, soundness advisories) are reported but do not fail the job.
- run: cargo audit
+ # `--deny warnings` makes any *new* unmaintained or unsoundness
+ # advisory fail the build. Currently-accepted advisories are listed
+ # explicitly in .cargo/audit.toml with rationale and review dates;
+ # adding a new ignore there is a deliberate, reviewable code change.
+ run: cargo audit --deny warnings
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e306876..ebe9170 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -33,7 +33,8 @@ Thanks for your interest in contributing! `rust-cache-benchmarks` is a benchmark
## Local checks
-CI runs against Rust `1.94`. All of these must pass:
+CI runs against Rust `1.94` (the project's MSRV — see `rust-toolchain.toml`
+and `Cargo.toml`'s `rust-version`). All of these must pass:
```sh
cargo fmt --check
@@ -41,6 +42,12 @@ cargo clippy --all-targets --locked -- -D warnings
cargo build --locked
cargo build --release --locked
cargo test --locked
+
+# Supply-chain audit. CI runs this with `--deny warnings`, so any *new*
+# unmaintained or unsoundness advisory will fail the pipeline. Currently
+# accepted advisories are listed in .cargo/audit.toml with rationale.
+# `cargo install --locked cargo-audit` if you don't have it locally.
+cargo audit --deny warnings
```
## Running benchmarks
diff --git a/Cargo.lock b/Cargo.lock
index acc0243..331f46b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -151,7 +151,6 @@ dependencies = [
"lru-mem",
"mini-moka",
"moka",
- "neocache",
"parking_lot",
"quick_cache",
"rand 0.10.0",
@@ -1070,18 +1069,6 @@ dependencies = [
"uuid",
]
-[[package]]
-name = "neocache"
-version = "0.1.0"
-source = "git+https://github.com/Shopify/neocache?rev=3c87b8fde4fab6eb3e9285c05df5ab8ae0cf498c#3c87b8fde4fab6eb3e9285c05df5ab8ae0cf498c"
-dependencies = [
- "ahash",
- "crossbeam-utils",
- "hashbrown 0.14.5",
- "lock_api",
- "parking_lot_core",
-]
-
[[package]]
name = "num-traits"
version = "0.2.19"
diff --git a/Cargo.toml b/Cargo.toml
index 96e537c..3b3b0b8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,11 +2,22 @@
name = "cache-bench"
version = "0.1.0"
edition = "2021"
+# MSRV. Kept in sync with `rust-toolchain.toml` and the toolchain pinned in
+# `.github/workflows/ci.yml`. Bumping any of the three requires bumping the
+# other two.
+rust-version = "1.94"
description = "Concurrent in-memory cache benchmarks for Rust"
license = "MIT"
+readme = "README.md"
repository = "https://github.com/Shopify/rust-cache-benchmarks"
homepage = "https://github.com/Shopify/rust-cache-benchmarks"
authors = ["Shopify Inc."]
+keywords = ["cache", "benchmark", "lru", "concurrent", "performance"]
+categories = ["caching", "development-tools::profiling"]
+# This is a benchmark harness, not a library. Block accidental `cargo publish`
+# from a maintainer's machine or a future release CI — the `cache-bench` name
+# is currently free on crates.io and we do not intend to claim it.
+publish = false
[dependencies]
schnellru = "0.2.4"
@@ -27,13 +38,17 @@ clru = "0.6.3"
lru-mem = "0.3.0"
sieve-cache = "1.1.6"
caches = "0.3.0"
-# Pinned to a commit SHA so `cargo update` cannot silently roll the
-# benchmark forward to a future neocache release with different
-# performance characteristics. Bump deliberately when re-baselining.
-neocache = { git = "https://github.com/Shopify/neocache", rev = "3c87b8fde4fab6eb3e9285c05df5ab8ae0cf498c" }
rand_distr = "0.6"
parking_lot = "0.12"
+# `neocache` is intentionally not a dependency of this public repository.
+# The upstream crate lives in a private Shopify repo, so any reference to
+# it (even an `optional = true` git dependency) is recorded in `Cargo.lock`
+# and forces `cargo` to fetch the source on every build — which fails with
+# `failed to authenticate when downloading repository` on any clone
+# without Shopify git credentials, including public CI runners. See the
+# `Per-cache configuration deviations` section of `README.md`.
+
[profile.release]
opt-level = 3
lto = "fat"
diff --git a/README.md b/README.md
index a926731..533e2e4 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,6 @@ Concurrent in-memory cache benchmarks for Rust. Compares throughput, hit rate, a
-
### Bachmark machine specs:
| Component | Details |
@@ -25,26 +24,80 @@ Concurrent in-memory cache benchmarks for Rust. Compares throughput, hit rate, a
## Caches compared
-| Crate | Strategy |
-|---|---|
-| [moka](https://crates.io/crates/moka) | TinyLFU / W-TinyLFU |
-| [mini-moka](https://crates.io/crates/mini-moka) | TinyLFU (lighter weight) |
-| [quick_cache](https://crates.io/crates/quick_cache) | LRU / CLOCK-Pro |
-| [schnellru](https://crates.io/crates/schnellru) | LRU |
-| [stretto](https://crates.io/crates/stretto) | Ristretto (TinyLFU) |
-| [lru](https://crates.io/crates/lru) | LRU |
-| [TinyUFO](https://crates.io/crates/TinyUFO) | TinyUFO |
-| [foyer](https://crates.io/crates/foyer) | S3-FIFO |
-| [cached](https://crates.io/crates/cached) | LRU (proc-macro) |
-| [clru](https://crates.io/crates/clru) | Count-Min LRU |
-| [lru-mem](https://crates.io/crates/lru-mem) | Memory-bounded LRU |
-| [sieve-cache](https://crates.io/crates/sieve-cache) | SIEVE |
-| [caches](https://crates.io/crates/caches) | Two-Queue |
-| [neocache](https://github.com/Shopify/neocache) | S3-FIFO + DashMap |
+The `Concurrency` column is a critical fairness disclosure: throughput numbers
+are dominated by lock topology as much as by eviction policy. Caches marked
+*sharded* serialise only within their shard; caches marked *single mutex* are
+wrapped by this benchmark in a `parking_lot::Mutex` because the underlying
+crate is not internally `Sync`-safe for concurrent writers. Compare
+like-for-like before drawing conclusions.
+
+| Crate | Strategy | Concurrency |
+|---|---|---|
+| [moka](https://crates.io/crates/moka) | TinyLFU / W-TinyLFU | sharded (built-in) |
+| [mini-moka](https://crates.io/crates/mini-moka) | TinyLFU (lighter weight) | sharded (built-in) |
+| [quick_cache](https://crates.io/crates/quick_cache) | LRU / CLOCK-Pro | sharded (built-in) |
+| [schnellru](https://crates.io/crates/schnellru) | LRU | single `Mutex` (this harness) |
+| [stretto](https://crates.io/crates/stretto) | Ristretto (TinyLFU) | sharded (built-in) - see footnote † |
+| [lru](https://crates.io/crates/lru) | LRU | single `Mutex` (this harness) |
+| [TinyUFO](https://crates.io/crates/TinyUFO) | TinyUFO | sharded (built-in) |
+| [foyer](https://crates.io/crates/foyer) | S3-FIFO | sharded (built-in) |
+| [cached](https://crates.io/crates/cached) | LRU (proc-macro) | single `Mutex` (this harness) |
+| [clru](https://crates.io/crates/clru) | Count-Min LRU | single `Mutex` (this harness) |
+| [lru-mem](https://crates.io/crates/lru-mem) | Memory-bounded LRU | single `Mutex` (this harness) - see footnote ‡ |
+| [sieve-cache](https://crates.io/crates/sieve-cache) | SIEVE | sharded (`ShardedSieveCache`) |
+| [caches](https://crates.io/crates/caches) | Two-Queue | single `Mutex` (this harness) — see footnote § |
+
+> **Note on `neocache`.** Earlier internal revisions of this harness also
+> benchmarked Shopify's in-house `neocache` crate (S3-FIFO + DashMap, built-in
+> sharding). It is intentionally absent from the public release: `neocache`
+> lives in a private repository, and even an `optional = true` git
+> dependency forces `cargo` to record the source in `Cargo.lock` and fetch
+> it on every build, which fails for any clone without Shopify git
+> credentials (including public CI runners). Reintroducing it requires a
+> private overlay branch that adds the dependency, the
+> `src/caches/neocache.rs` module, and matching entries in `ALL_CACHES` and
+> `dispatch` in `src/main.rs`.
+
+### Per-cache configuration deviations
+
+Every cache is constructed with default parameters *unless* listed here. Anything
+on this list is a deliberate choice we have made on behalf of the cache; the
+rationale is in the matching `src/caches/.rs` file. If you believe one of
+these tunings biases the comparison, please file a methodology issue with a
+before/after diff.
+
+- **† stretto** - constructed with `num_counters = 10× cache_size`,
+ `set_ignore_internal_cost(true)`, and `set_buffer_size(num_tasks × 4 KiB)`.
+ Stretto additionally receives a unique 3-phase warmup (insert → read pass to
+ populate TinyLFU frequency counts → re-insert with TinyLFU-informed admission)
+ because its frequency sketch is updated only from reads, never from inserts;
+ a single-phase warmup leaves admission decisions effectively random and
+ collapses the hit rate to ~45%.
+- **‡ lru-mem** - capacity is byte-budgeted (`cache_size × value_size`). The
+ budget covers value bytes only; `String` headers and key-string storage are
+ not accounted for, so the effective entry count slightly exceeds
+ `cache_size`. Documented for transparency; correcting it would require
+ bespoke `HeapSize` accounting that other caches do not model.
+- **§ caches (Two-Queue)** - the [`caches`](https://crates.io/crates/caches)
+ crate has not seen a release since 2022 and pulls in `rand 0.8.5`. Included
+ for historical comparison; a future PR may retire it once a maintained
+ alternative exists.
+- **mini-moka** - unlike moka, mini-moka 0.10 does not expose a public flush
+ analogous to `run_pending_tasks()` (the equivalent `sync()` is private), so
+ warmup completion is not synchronous on this cache. The first measurement
+ iteration absorbs the residual eviction work; later iterations measure
+ steady state.
+- **lrucache, schnellru, clru, cached, two-queue, lru-mem** - wrapped in a
+ single `parking_lot::Mutex` because none of them is `Sync` for concurrent
+ writers. This makes them strictly worse than sharded caches under the
+ default 8-thread workload; the comparison is still meaningful as a
+ *deployment* signal ("do not use these from multiple writer threads"), not
+ as a pure algorithmic ranking.
## Requirements
-- Rust toolchain (pinned in `rust-toolchain.toml` — installed automatically via `rustup`)
+- Rust toolchain (MSRV 1.94, pinned in `rust-toolchain.toml`, `Cargo.toml`'s
+ `rust-version`, and the CI workflow). Installed automatically via `rustup`.
- For best results, run on a quiet machine (no other heavy processes)
## Build and run
@@ -54,7 +107,7 @@ Concurrent in-memory cache benchmarks for Rust. Compares throughput, hit rate, a
cargo run --release
# Run a subset
-cargo run --release -- --caches moka,quick_cache,neocache
+cargo run --release -- --caches moka,quick_cache,sieve
# Override config parameters at runtime (no recompile needed)
cargo run --release -- --size 10000 --zipf 0.9
@@ -76,19 +129,19 @@ cargo run --release -- --info
| Column | Meaning |
|---|---|
-| `ops/sec` | Median throughput across all measurement iterations — total operations (reads + writes) per wall-clock second |
+| `ops/sec` | Median throughput across all measurement iterations - total operations (reads + writes) per wall-clock second |
| `±ci95` | 95% CI half-width as a percentage of the mean throughput (uses the actual `n` for this cache). Two caches whose CI ranges don't overlap are distinguishable at the 95% level |
| `cv%` | Coefficient of variation (`stddev / mean × 100`). < 3% = stable; > 10% = noisy, flagged with `!` |
| `n` | Actual number of measurement iterations run. Adaptive early stopping may converge before the 30-iteration maximum |
-| `p50µs` | Median of per-iteration p50 latencies in µs — the typical latency of a representative single run |
-| `p99µs` | Median of per-iteration p99 latencies in µs — avoids the pooling bias of aggregating all iterations before computing percentiles |
-| `tailµs` | Tail amplification = `p99µs − p50µs` (both medians) in µs. Lower = more consistent per-op cost |
-| `p99cv%` | CV% of per-iteration p99 latencies — measures tail latency *stability*; high = occasional spikes invisible in the median |
-| `wp99µs` | Median of per-iteration **write-only** p99 latencies in µs. Isolates write cost from the read-dominated `p99µs`; especially diagnostic for mutex-backed caches where writes hold an exclusive lock. Shows `--` if no write samples exist |
-| `hit%` | Cache hit rate — fraction of reads that found the key; purely a function of eviction policy and access pattern |
+| `p50μs` | Median of per-iteration p50 latencies in μs - the typical latency of a representative single run |
+| `p99μs` | Median of per-iteration p99 latencies in μs - avoids the pooling bias of aggregating all iterations before computing percentiles |
+| `tailμs` | Tail amplification = `p99μs - p50μs` (both medians) in μs. Lower = more consistent per-op cost |
+| `p99cv%` | CV% of per-iteration p99 latencies - measures tail latency *stability*; high = occasional spikes invisible in the median |
+| `wp99μs` | Median of per-iteration **write-only** p99 latencies in μs. Isolates write cost from the read-dominated `p99μs`; especially diagnostic for mutex-backed caches where writes hold an exclusive lock. Shows `--` if no write samples exist |
+| `hit%` | Cache hit rate - fraction of reads that found the key; purely a function of eviction policy and access pattern |
| `±hitci` | 95% CI half-width for hit rate in absolute percentage points (e.g. `±0.020` = ±0.020 pp). Three decimal places avoids the misleading `±0.00` display when CI is very tight but nonzero |
-> **Note on latency floor:** Each sampled latency is bracketed by two TSC reads (~10–20 ns each on Linux via vDSO). The combined ~20–40 ns overhead inflates `p50µs`/`p99µs` for the fastest caches. Use latency numbers for relative comparison, not absolute wall-clock cost.
+> **Note on latency floor:** Each sampled latency is bracketed by two TSC reads (~10-20 ns each on Linux via vDSO). The combined ~20-40 ns overhead inflates `p50μs`/`p99μs` for the fastest caches. Use latency numbers for relative comparison, not absolute wall-clock cost.
## Configuration
@@ -103,7 +156,7 @@ All parameters have sensible defaults and can be overridden via CLI flags withou
| `value_size` | `--value-size` | 5,120 B | 5 KB cached values |
| `num_iterations` | `--iters` | 30 (max) | Hard cap; adaptive early stopping may converge sooner |
| `warmup_iterations` | `--warmup` | 1 | One ~3 s pass primes branch predictors and engages Turbo Boost |
-| `zipf_exponent` | `--zipf` | 1.07 | ≈ 80/20 hotspot skew; sweep 0.8–1.5 |
+| `zipf_exponent` | `--zipf` | 1.07 | ≈ 80/20 hotspot skew; sweep 0.8-1.5 |
| `access_pattern` | `--pattern` | zipfian | `zipfian`, `uniform`, or `sequential` |
| `cold_start` | `--cold-start` | false | Start with empty cache |
| `latency_sample_every` | `--sample-every` | 10 | Bernoulli sampling rate (~10% of ops) |
@@ -113,17 +166,22 @@ All parameters have sensible defaults and can be overridden via CLI flags withou
## Benchmark methodology
-- **Task-parallel, OS-thread model** — each task runs in a `tokio::task::spawn_blocking` thread for true parallelism
-- **Barrier-synchronized start** — all tasks start simultaneously; throughput = `total_ops / wall_time_of_slowest_task`
-- **Per-cache ops calibration** — a 200 K-op/task calibration pass before warmup measures each cache's throughput and sets `num_keys_per_task = clamp(3 s × throughput / num_tasks, 500 K, 2 M)`, targeting ~3 s wall time per iteration; prevents slow global-mutex caches from dominating total runtime
-- **Per-iteration randomised ordering** — the remaining active caches are shuffled each iteration to eliminate thermal/position bias
-- **Adaptive early stopping** — after each iteration, any cache with ≥ 15 samples and 95% CI < 0.75% of mean is retired; the `n` column shows actual iterations run
-- **Median throughput** — robust to single-iteration outliers from thermal throttling or OS scheduling
-- **95% confidence intervals** — reported on both throughput (`±ci95`) and hit rate (`±hitci`)
-- **Per-iteration latency percentiles** — p50/p99 computed per iteration then summarised with the median; avoids the pooling bias of aggregating all samples before computing percentiles
-- **Bernoulli latency sampling** — each op is sampled with probability 1/N from the task's existing RNG; avoids phase-locking with periodic cache slow paths that deterministic modulo sampling can miss
-- **Noisy-run flagging** — throughput CV% > 10 is marked with `!` in the output table; a footnote is printed below the table when triggered
+- **Task-parallel, OS-thread model** - each task runs in a `tokio::task::spawn_blocking` thread for true parallelism
+- **Barrier-synchronized start** - all tasks start simultaneously; throughput = `total_ops / wall_time_of_slowest_task`
+- **Per-cache ops calibration** - a 200 K-op/task calibration pass before warmup measures each cache's throughput and sets `num_keys_per_task = clamp(3 s × throughput / num_tasks, 500 K, 2 M)`, targeting ~3 s wall time per iteration; prevents slow global-mutex caches from dominating total runtime
+- **Per-iteration randomised ordering** - the remaining active caches are shuffled each iteration to eliminate thermal/position bias
+- **Adaptive early stopping** - after each iteration, any cache with ≥ 15 samples and 95% CI < 0.75% of mean is retired; the `n` column shows actual iterations run
+- **Median throughput** - robust to single-iteration outliers from thermal throttling or OS scheduling
+- **95% confidence intervals** - reported on both throughput (`±ci95`) and hit rate (`±hitci`)
+- **Per-iteration latency percentiles** - p50/p99 computed per iteration then summarised with the median; avoids the pooling bias of aggregating all samples before computing percentiles
+- **Bernoulli latency sampling** - each op is sampled with probability 1/N from the task's existing RNG; avoids phase-locking with periodic cache slow paths that deterministic modulo sampling can miss
+- **Noisy-run flagging** - throughput CV% > 10 is marked with `!` in the output table; a footnote is printed below the table when triggered
+
+## Reporting security issues
+
+Please do not open public issues for security reports. See [SECURITY.md](SECURITY.md)
+for private disclosure channels.
## License
-MIT
+Licensed under the [MIT License](LICENSE).
diff --git a/docs/assets/README.md b/docs/assets/README.md
new file mode 100644
index 0000000..1c993bd
--- /dev/null
+++ b/docs/assets/README.md
@@ -0,0 +1,26 @@
+# Assets
+
+This directory holds README assets that need to be versioned with the repo
+(screenshots, charts, banners) so they cannot break independently of the code.
+
+## Currently expected files
+
+| Path | Purpose |
+|---|---|
+| `benchmark-output.png` | Hero screenshot of `cargo run --release` output, referenced from the top of `README.md`. |
+
+## How to refresh the screenshot
+
+1. On a quiet machine matching the spec table in `README.md`, run:
+ ```sh
+ cargo run --release > /tmp/bench.txt
+ ```
+2. Take a screenshot of the rendered table (the binary uses ANSI escapes for
+ colour, so a terminal screenshot is more readable than the raw text).
+3. Save as `docs/assets/benchmark-output.png`. Keep it under ~500 KB; resize
+ to a reasonable display width (1300\u20131600 px is fine).
+4. Commit. The README link is a relative path, so no further changes needed.
+
+Do **not** re-link the README to `github.com/user-attachments/...` URLs:
+those are tied to a single uploader and disappear if the user is removed
+or the upload is purged.
diff --git a/src/caches/minimoka.rs b/src/caches/minimoka.rs
index 41774f8..f48a955 100644
--- a/src/caches/minimoka.rs
+++ b/src/caches/minimoka.rs
@@ -20,8 +20,12 @@ pub async fn run_bench(
let value = value_pool[key_idx % value_pool.len()].clone();
cache.insert(key_pool[key_idx].clone(), value);
}
- // Flush deferred eviction decisions before measurement, matching moka's
- // run_pending_tasks() call and ensuring a consistent steady-state start.
+ // mini-moka 0.10 does NOT expose a public flush analogous to
+ // moka::sync::Cache::run_pending_tasks(); the equivalent `sync()` is
+ // private. Pending eviction work may therefore still be in-flight when
+ // measurement begins. The first warmup iteration absorbs most of it,
+ // but mini-moka's read of `entry_count` shortly after warmup may lag
+ // behind moka's. Documented here so the asymmetry is not silent.
}
let cache = Arc::new(cache);
diff --git a/src/caches/mod.rs b/src/caches/mod.rs
index 10535e0..b9848be 100644
--- a/src/caches/mod.rs
+++ b/src/caches/mod.rs
@@ -11,7 +11,12 @@ pub mod lrucache;
pub mod lrumem;
pub mod minimoka;
pub mod moka;
-pub mod neocache;
+// `neocache` is intentionally not benchmarked from this public repo:
+// the upstream crate lives in a private Shopify repo and so cannot be
+// cloned by external users or CI runners. See the comment in
+// `Cargo.toml` and the README. To benchmark `neocache` internally,
+// keep the dependency, this `pub mod`, and the corresponding entry in
+// `ALL_CACHES`/`dispatch` in a private overlay branch.
pub mod quick_cache;
pub mod schnellru;
pub mod sieve;
@@ -54,7 +59,14 @@ pub struct BenchConfig {
pub access_pattern: AccessPattern,
/// Zipf skew exponent: 1.07 ≈ 80/20 hotspot; higher = more skewed
pub zipf_exponent: f64,
- /// Skip pre-population warmup to measure cold ramp-up; false = steady-state
+ /// Skip pre-population warmup to measure cold ramp-up; false = steady-state.
+ ///
+ /// Also set internally to `true` for the calibration pass so that
+ /// calibration measures pure hot-loop throughput, not
+ /// `(pre-pop cost + hot-loop) / wall_time` — caches with expensive
+ /// pre-pop (e.g. stretto's 3-phase warmup, lru-mem's per-byte tracking)
+ /// would otherwise be under-calibrated and assigned a smaller
+ /// `num_keys_per_task` than is actually warranted.
pub cold_start: bool,
/// Minimum key string length (zero-padded); 0 = natural numeric length
pub key_size: usize,
@@ -79,9 +91,13 @@ pub struct BenchResults {
pub write_latencies_ns: Vec, // Sampled write-only latencies in nanoseconds
}
-pub fn generate_fixed_value(size: usize) -> String {
- rand::rng()
- .sample_iter(&rand::distr::Alphanumeric)
+/// Generate a single random alphanumeric string of `size` bytes from the
+/// caller-supplied RNG. Taking the RNG by reference lets the value pool be
+/// generated deterministically from the benchmark's base seed instead of from
+/// an unseeded thread RNG — a precondition of the project's reproducibility
+/// guarantee documented in `--info`.
+pub fn generate_fixed_value(rng: &mut impl rand::Rng, size: usize) -> String {
+ rng.sample_iter(&rand::distr::Alphanumeric)
.take(size)
.map(char::from)
.collect()
@@ -89,9 +105,24 @@ pub fn generate_fixed_value(size: usize) -> String {
/// Pre-allocates a pool of values to eliminate allocation overhead during benchmarking.
/// Uses Arc to avoid expensive clones — only the Arc is cloned (8 bytes), not the data.
-pub fn generate_value_pool(pool_size: usize, value_size: usize) -> Arc>> {
+///
+/// `seed` is taken from `BenchConfig::rng_seed`, with a fixed offset so it
+/// cannot collide with the per-task seeds derived as `(rng_seed + task_id + 1)`.
+/// Re-running with the same seed therefore produces bit-identical value bytes,
+/// which is what `--info` claims and what before/after diff comparisons rely on.
+pub fn generate_value_pool(
+ pool_size: usize,
+ value_size: usize,
+ seed: u64,
+) -> Arc>> {
+ // Offset chosen to leave the [seed, seed + num_tasks] range free for the
+ // per-task RNGs in every cache implementation. 0xC0FFEE is arbitrary
+ // — the only requirement is that it is large enough to not collide with
+ // realistic task counts.
+ const VALUE_POOL_SEED_OFFSET: u64 = 0x00C0_FFEE;
+ let mut rng = rand::rngs::StdRng::seed_from_u64(seed.wrapping_add(VALUE_POOL_SEED_OFFSET));
let pool: Vec> = (0..pool_size)
- .map(|_| Arc::new(generate_fixed_value(value_size)))
+ .map(|_| Arc::new(generate_fixed_value(&mut rng, value_size)))
.collect();
Arc::new(pool)
}
@@ -120,7 +151,27 @@ pub fn generate_key_pool(num_keys: usize, key_size: usize) -> Arc> {
}
/// Returns the number of primary benchmark keys (the Zipf keyspace).
-/// The full key pool is 2× this value to accommodate write_new_key_fraction.
+/// The full key pool is 2× this value to accommodate `write_new_key_fraction`.
+///
+/// ## Fresh-key window (write_new_key_fraction > 0)
+///
+/// Each cache implementation computes a fresh-key index as:
+///
+/// ```text
+/// idx = total_keys + ((seq_start + fresh_write_count) % total_keys)
+/// ```
+///
+/// so `idx` lives in `[total_keys, 2 × total_keys)`. Because the modulus is
+/// `total_keys` (not `total_keys * num_tasks`), two tasks with identical
+/// `seq_start mod total_keys` will rotate through the **same** fresh-key
+/// indices. With the default 8 tasks × 2 M ops over a 480 K Zipf keyspace,
+/// each task wraps the fresh-key window every ~60 K ops.
+///
+/// This is intentional: the goal of `write_new_key_fraction` is to model
+/// insert-heavy workloads (event ingestion, telemetry firehoses) by injecting
+/// keys outside the Zipf hot-set, not to allocate a fully unique fresh-key
+/// stream per task. Documenting it here so the bound on the fresh-key pool
+/// is auditable and the wraparound is not surprising.
pub fn total_benchmark_keys(cfg: &BenchConfig) -> usize {
cfg.num_distinct_keys
}
@@ -223,7 +274,9 @@ pub fn calculate_statistics(values: &[f64]) -> (f64, f64, f64, f64) {
let mean = values.iter().sum::() / values.len() as f64;
let mut sorted = values.to_vec();
- sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
+ // total_cmp instead of partial_cmp so a stray NaN sorts deterministically
+ // to one end rather than panicking the whole benchmark summary.
+ sorted.sort_by(|a, b| a.total_cmp(b));
let n = sorted.len();
let median = if n.is_multiple_of(2) {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
diff --git a/src/caches/neocache.rs b/src/caches/neocache.rs
deleted file mode 100644
index f4c0025..0000000
--- a/src/caches/neocache.rs
+++ /dev/null
@@ -1,157 +0,0 @@
-use crate::caches::{total_benchmark_keys, BenchConfig, BenchResults, KeyGenerator};
-use neocache::NeoCache;
-use rand::{RngExt, SeedableRng};
-use std::sync::Arc;
-use std::time::Instant;
-
-/// NeoCache: concurrent HashMap with built-in S3-FIFO cache eviction.
-/// Unlike the DashMap benchmark, no manual entry_count tracking or random
-/// eviction is required — S3-FIFO eviction runs automatically per-shard on
-/// every insert once the cache reaches capacity.
-pub async fn run_bench(
- cfg: Arc,
- value_pool: Arc>>,
- key_pool: Arc>,
-) -> BenchResults {
- let cache: Arc>> = Arc::new(NeoCache::new(cfg.cache_size));
- let total_keys = total_benchmark_keys(&cfg);
-
- if !cfg.cold_start {
- let warmup_key_gen = KeyGenerator::new(cfg.access_pattern, total_keys, cfg.zipf_exponent);
- let mut rng = rand::rngs::StdRng::seed_from_u64(cfg.rng_seed);
- for i in 0..total_keys {
- let key_idx = warmup_key_gen.next_key(i, &mut rng);
- let value = value_pool[key_idx % value_pool.len()].clone();
- cache.insert(key_pool[key_idx].clone(), value);
- }
- // No eviction trim needed: S3-FIFO enforces cache_capacity automatically.
- }
-
- let key_gen = Arc::new(KeyGenerator::new(
- cfg.access_pattern,
- total_keys,
- cfg.zipf_exponent,
- ));
- let barrier = Arc::new(std::sync::Barrier::new(cfg.num_tasks));
-
- let tasks: Vec<_> = (0..cfg.num_tasks)
- .map(|i| {
- let my_cache = cache.clone();
- let value_pool = value_pool.clone();
- let key_pool = key_pool.clone();
- let key_gen = key_gen.clone();
- let seq_start = i * cfg.num_keys_per_task;
- let seq_end = (i + 1) * cfg.num_keys_per_task;
- let cfg = cfg.clone();
- let barrier = barrier.clone();
-
- tokio::task::spawn_blocking(move || {
- let mut local_latencies =
- Vec::with_capacity(cfg.num_keys_per_task / cfg.latency_sample_every + 64);
- let mut local_write_latencies =
- Vec::with_capacity(cfg.num_keys_per_task / cfg.latency_sample_every / 5 + 64);
- let mut local_reads = 0usize;
- let mut local_hits = 0usize;
- let mut fresh_write_count = 0usize;
- let mut rng = rand::rngs::StdRng::seed_from_u64(cfg.rng_seed + i as u64 + 1);
-
- barrier.wait();
- let task_start = Instant::now();
-
- for seq_key in seq_start..seq_end {
- let key_idx = key_gen.next_key(seq_key, &mut rng);
- let k = &key_pool[key_idx];
-
- let is_read = rng.random_bool(cfg.read_write_ratio);
- let should_measure = rng.random_bool(1.0 / cfg.latency_sample_every as f64);
-
- if is_read {
- local_reads += 1;
- let op_start = if should_measure {
- Some(Instant::now())
- } else {
- None
- };
- let hit = my_cache.get(k);
- if let Some(t) = op_start {
- local_latencies.push(t.elapsed().as_nanos() as u64);
- }
- if let Some(value) = hit {
- local_hits += 1;
- let checksum: u32 = value
- .as_bytes()
- .iter()
- .step_by(256)
- .map(|&b| b as u32)
- .sum();
- std::hint::black_box(checksum);
- }
- } else {
- let write_key_idx = if cfg.write_new_key_fraction > 0.0
- && rng.random_bool(cfg.write_new_key_fraction)
- {
- let idx = total_keys + ((seq_start + fresh_write_count) % total_keys);
- fresh_write_count += 1;
- idx
- } else {
- key_idx
- };
- let k_write = &key_pool[write_key_idx];
- let value = value_pool[write_key_idx % value_pool.len()].clone();
- let op_start = if should_measure {
- Some(Instant::now())
- } else {
- None
- };
- my_cache.insert(k_write.clone(), value);
- if let Some(t) = op_start {
- let ns = t.elapsed().as_nanos() as u64;
- local_latencies.push(ns);
- local_write_latencies.push(ns);
- }
- }
- }
-
- (
- local_latencies,
- local_write_latencies,
- local_reads,
- local_hits,
- task_start.elapsed(),
- )
- })
- })
- .collect();
-
- let mut all_latencies = Vec::new();
- let mut all_write_latencies = Vec::new();
- let mut total_reads = 0usize;
- let mut total_hits = 0usize;
- let mut max_duration = std::time::Duration::ZERO;
- for handle in tasks {
- let (lat, wlat, reads, hits, duration) = handle.await.unwrap();
- all_latencies.extend(lat);
- all_write_latencies.extend(wlat);
- total_reads += reads;
- total_hits += hits;
- max_duration = max_duration.max(duration);
- }
-
- let total_ops = cfg.num_tasks * cfg.num_keys_per_task;
- let throughput = total_ops as f64 / max_duration.as_secs_f64();
- let hit_rate = if total_reads > 0 {
- total_hits as f64 / total_reads as f64
- } else {
- 0.0
- };
-
- BenchResults {
- throughput,
- hit_rate,
- total_ops,
- total_reads,
- total_hits,
- latencies_ns: all_latencies,
- write_latencies_ns: all_write_latencies,
- }
-}
diff --git a/src/main.rs b/src/main.rs
index fc10219..2458a08 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,16 +1,61 @@
use rand::{seq::SliceRandom, SeedableRng};
use std::collections::HashMap;
+use std::io::IsTerminal;
use std::sync::Arc;
+use std::sync::OnceLock;
use crate::caches::*;
-// Raw ANSI sequences — no crate dependency.
-const DIM: &str = "\x1b[2m";
-const RESET: &str = "\x1b[0m";
-const BOLD: &str = "\x1b[1m";
-const GREEN: &str = "\x1b[32m";
-const YELLOW: &str = "\x1b[33m";
-const CYAN: &str = "\x1b[36m";
+// ANSI colour escapes are written through these helpers so they collapse to
+// empty strings when stdout is not a TTY (piped to a file, captured by CI).
+// Writing the escape bytes to a non-terminal sink produces literal `\x1b[2m`
+// in the output, which would be unhelpful for anyone diffing benchmark logs.
+fn ansi_enabled() -> bool {
+ static CACHED: OnceLock = OnceLock::new();
+ *CACHED.get_or_init(|| std::io::stdout().is_terminal())
+}
+fn dim() -> &'static str {
+ if ansi_enabled() {
+ "\x1b[2m"
+ } else {
+ ""
+ }
+}
+fn reset() -> &'static str {
+ if ansi_enabled() {
+ "\x1b[0m"
+ } else {
+ ""
+ }
+}
+fn bold() -> &'static str {
+ if ansi_enabled() {
+ "\x1b[1m"
+ } else {
+ ""
+ }
+}
+fn green() -> &'static str {
+ if ansi_enabled() {
+ "\x1b[32m"
+ } else {
+ ""
+ }
+}
+fn yellow() -> &'static str {
+ if ansi_enabled() {
+ "\x1b[33m"
+ } else {
+ ""
+ }
+}
+fn cyan() -> &'static str {
+ if ansi_enabled() {
+ "\x1b[36m"
+ } else {
+ ""
+ }
+}
mod caches;
@@ -31,7 +76,10 @@ const ALL_CACHES: &[&str] = &[
"lrumem",
"sieve",
"two_queue",
- "neocache",
+ // `neocache` is deliberately not in this public list — see the
+ // comment in `caches/mod.rs` and `Cargo.toml` for why. Internal
+ // users carrying the overlay should add the entry back here in
+ // the same alphabetical-by-introduction order.
];
/// Dispatch a single benchmark run by cache name.
@@ -55,7 +103,6 @@ async fn dispatch(
"lrumem" => lrumem::run_bench(cfg, value_pool, key_pool).await,
"sieve" => sieve::run_bench(cfg, value_pool, key_pool).await,
"two_queue" => two_queue::run_bench(cfg, value_pool, key_pool).await,
- "neocache" => neocache::run_bench(cfg, value_pool, key_pool).await,
// Cache names are validated against `ALL_CACHES` in `parse_args`, so any
// unknown name here indicates a programmer error (a name was added to
// `ALL_CACHES` without a matching arm here).
@@ -73,8 +120,11 @@ async fn run_benchmark_suite(
let spinners = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
let mut spin_idx: usize = 0;
- // Pre-allocate value pool once for all benchmarks
- let value_pool = generate_value_pool(cfg.value_pool_size, cfg.value_size);
+ // Pre-allocate value pool once for all benchmarks. Seeded from the same
+ // base seed as the rest of the benchmark, so re-running with the same
+ // `--seed` produces bit-identical value bytes (matching the
+ // reproducibility guarantee printed by `--info`).
+ let value_pool = generate_value_pool(cfg.value_pool_size, cfg.value_size, cfg.rng_seed);
let key_pool = generate_key_pool(total_keys * 2, cfg.key_size);
let active: Vec<&'static str> = ALL_CACHES
@@ -97,8 +147,15 @@ async fn run_benchmark_suite(
const MIN_OPS: usize = 500_000;
const MAX_OPS: usize = 2_000_000;
+ // Calibration runs with `cold_start = true` so we measure pure hot-loop
+ // throughput, not (pre-pop cost + hot-loop) / wall_time. Caches with
+ // expensive pre-population (stretto's 3-phase warmup, lru-mem's per-byte
+ // tracking) would otherwise be under-calibrated relative to caches with
+ // cheap pre-pop, giving them a smaller `num_keys_per_task` budget than
+ // their steady-state throughput justifies.
let cal_cfg = Arc::new(BenchConfig {
num_keys_per_task: CAL_OPS,
+ cold_start: true,
..(*cfg).clone()
});
let mut per_cache_cfg: HashMap<&'static str, Arc> = HashMap::new();
@@ -114,7 +171,9 @@ async fn run_benchmark_suite(
sp,
cal_idx + 1,
total_active,
- name
+ name,
+ DIM = dim(),
+ RESET = reset(),
)
);
let _ = std::io::Write::flush(&mut std::io::stdout());
@@ -145,7 +204,9 @@ async fn run_benchmark_suite(
sp,
i + 1,
cfg.warmup_iterations,
- name
+ name,
+ YELLOW = yellow(),
+ RESET = reset(),
)
);
let _ = std::io::Write::flush(&mut std::io::stdout());
@@ -183,7 +244,15 @@ async fn run_benchmark_suite(
spin_idx += 1;
print!(
"{:<65}\r",
- format!(" {} {CYAN}{BOLD}run{RESET} {} {}...", sp, n, name)
+ format!(
+ " {} {CYAN}{BOLD}run{RESET} {} {}...",
+ sp,
+ n,
+ name,
+ CYAN = cyan(),
+ BOLD = bold(),
+ RESET = reset(),
+ )
);
let _ = std::io::Write::flush(&mut std::io::stdout());
let result = dispatch(
@@ -216,38 +285,61 @@ fn check_help_flag() -> bool {
std::env::args().any(|a| a == "--help" || a == "-h")
}
+/// Single source of truth for the user-facing flag table.
+///
+/// Both `print_help` (`--help`) and `print_info` (`--info`) render from this
+/// list so they cannot drift apart. When adding or renaming a flag, update
+/// **only** this table plus the matching arm in `parse_args`.
+struct FlagDoc {
+ /// Human-readable invocation form, e.g. `"--size N"` or `"--caches / -c LIST"`.
+ form: &'static str,
+ /// One-line description suitable for both the brief and full references.
+ /// Includes the default value where applicable.
+ desc: &'static str,
+}
+
+#[rustfmt::skip]
+const FLAG_TABLE: &[FlagDoc] = &[
+ FlagDoc { form: "--size N", desc: "cache_size (default: 30000)" },
+ FlagDoc { form: "--keys N", desc: "num_distinct_keys (default: 480000, i.e. 16× cache_size)" },
+ FlagDoc { form: "--tasks N", desc: "num_tasks (default: available_parallelism)" },
+ FlagDoc { form: "--iters N", desc: "num_iterations max (default: 30; adaptive stopping may run fewer)" },
+ FlagDoc { form: "--warmup N", desc: "warmup_iterations (default: 1)" },
+ FlagDoc { form: "--ratio F", desc: "read_write_ratio 0.0–1.0 (default: 0.80)" },
+ FlagDoc { form: "--value-size N", desc: "value size in bytes (default: 5120)" },
+ FlagDoc { form: "--zipf F", desc: "Zipf exponent (default: 1.07) sweep 0.8–1.5" },
+ FlagDoc { form: "--pattern P", desc: "access pattern: zipfian|uniform|sequential (default: zipfian)" },
+ FlagDoc { form: "--cold-start", desc: "start with empty cache instead of pre-populated" },
+ FlagDoc { form: "--key-size N", desc: "minimum key string width, zero-padded (default: 0)" },
+ FlagDoc { form: "--write-new F", desc: "fraction of writes inserting a fresh key (default: 0.0)" },
+ FlagDoc { form: "--seed N", desc: "RNG seed (default: 42)" },
+ FlagDoc { form: "--pool-size N", desc: "value pool size (default: 10000)" },
+ FlagDoc { form: "--sample-every N", desc: "latency sampling rate 1/N (default: 10)" },
+ FlagDoc { form: "--caches / -c LIST", desc: "comma-separated list of caches to run" },
+ FlagDoc { form: "--info", desc: "full reference: config, methodology, column definitions" },
+ FlagDoc { form: "--help / -h", desc: "print brief flag summary" },
+];
+
+fn print_flag_table() {
+ for f in FLAG_TABLE {
+ println!(" {:<19} {}", f.form, f.desc);
+ }
+}
+
fn print_help() {
println!("rust-cache-benchmarks — concurrent in-memory cache benchmark\n");
println!("USAGE:");
println!(" cargo run --release [-- [FLAGS]]\n");
println!("FLAGS:");
- println!(" --size N cache_size (default: 30000)");
- println!(" --keys N num_distinct_keys (default: 480000, i.e. 16× cache_size)");
- println!(" --tasks N num_tasks (default: available_parallelism)");
- println!(
- " --iters N num_iterations max (default: 30; adaptive stopping may run fewer)"
- );
- println!(" --warmup N warmup_iterations (default: 1)");
- println!(" --ratio F read_write_ratio 0.0–1.0 (default: 0.80)");
- println!(" --value-size N value size in bytes (default: 5120)");
- println!(" --zipf F Zipf exponent (default: 1.07) sweep 0.8–1.5");
- println!(" --pattern P access pattern: zipfian|uniform|sequential (default: zipfian)");
- println!(" --cold-start start with empty cache instead of pre-populated");
- println!(" --key-size N minimum key string width, zero-padded (default: 0)");
- println!(" --write-new F fraction of writes inserting a fresh key (default: 0.0)");
- println!(" --seed N RNG seed (default: 42)");
- println!(" --pool-size N value pool size (default: 10000)");
- println!(" --sample-every N latency sampling rate 1/N (default: 10)");
- println!(" --caches / -c LIST comma-separated list of caches to run");
- println!(" --info full reference: config, methodology, column definitions");
- println!(" --help / -h print this help\n");
+ print_flag_table();
+ println!();
println!(" All flags accept both space-separated and = forms: --zipf 1.2 --zipf=1.2\n");
println!("AVAILABLE CACHES:");
println!(" moka, minimoka, quick_cache, schnellru, stretto, lrucache, tinyufo,");
- println!(" foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue, neocache\n");
+ println!(" foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue\n");
println!("EXAMPLES:");
println!(" cargo run --release # run all caches");
- println!(" cargo run --release -- -c moka,neocache # run a subset");
+ println!(" cargo run --release -- -c moka,quick_cache # run a subset");
println!(" cargo run --release -- --size 10000 --zipf 0.9 # custom config");
println!(" cargo run --release -- --pattern uniform --ratio 0.5 # different workload");
println!(" cargo run --release -- --info # full reference\n");
@@ -472,6 +564,19 @@ fn parse_args() -> Result<(BenchConfig, std::collections::HashSet), Stri
i += 1;
}
+ // Cross-flag validation. A keyspace smaller than the cache means the
+ // cache will admit every key with no eviction pressure at all — the
+ // benchmark stops measuring eviction policy quality and becomes a
+ // hash-table micro-benchmark, which is not what this harness exists for.
+ // Defaults set num_distinct_keys = 16 × cache_size; require at least 2×
+ // for any user override so the comparison stays meaningful.
+ if num_distinct_keys < cache_size * 2 {
+ return Err(format!(
+ "--keys ({num_distinct_keys}) must be at least 2× --size ({cache_size}) so the \
+ cache experiences eviction pressure. Recommended ratio is 16× (the default)."
+ ));
+ }
+
let config = BenchConfig {
cache_size,
num_tasks,
@@ -818,35 +923,19 @@ fn print_info() {
println!("━━━ CLI FLAGS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
- println!(" --size N cache_size (default: 30000)");
- println!(" --keys N num_distinct_keys (default: 480000, i.e. 16× cache_size)");
- println!(" --tasks N num_tasks (default: available_parallelism)");
- println!(" --iters N num_iterations / max iterations (default: 30)");
- println!(" --warmup N warmup_iterations (default: 1)");
- println!(" --ratio F read_write_ratio 0.0–1.0 (default: 0.80)");
- println!(" --value-size N value size in bytes (default: 5120)");
- println!(" --zipf F Zipf exponent (default: 1.07) sweep 0.8–1.5");
- println!(" --pattern P access pattern: zipfian|uniform|sequential (default: zipfian)");
- println!(" --cold-start start with empty cache instead of pre-populated");
- println!(" --key-size N minimum key string width, zero-padded (default: 0)");
- println!(" --write-new F fraction of writes inserting a fresh key (default: 0.0)");
- println!(" --seed N RNG seed (default: 42)");
- println!(" --pool-size N value pool size (default: 10000)");
- println!(" --sample-every N latency sampling rate 1/N (default: 10)");
- println!(" --caches / -c LIST comma-separated list of caches to run");
- println!(" --info print this full reference");
- println!(" --help / -h print brief flag summary\n");
+ print_flag_table();
+ println!();
println!(" All flags accept both space-separated and = forms:");
println!(" --zipf 1.2 --zipf=1.2\n");
println!(" Available caches:");
println!(" moka, minimoka, quick_cache, schnellru, stretto, lrucache, tinyufo,");
- println!(" foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue, neocache\n");
+ println!(" foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue\n");
println!(" Usage:");
println!(" cargo run --release # run all caches");
- println!(" cargo run --release -- -c moka,neocache # run a subset");
+ println!(" cargo run --release -- -c moka,quick_cache # run a subset");
println!(" cargo run --release -- --size 10000 --zipf 0.9 # custom config");
println!(" cargo run --release -- --pattern uniform --ratio 0.5 # different workload");
println!(" cargo run --release -- --info # print this reference\n");
@@ -934,12 +1023,11 @@ async fn main() {
.map(|(name, results)| (*name, AggregatedStats::from_results(results)))
.collect();
- // Sort by median throughput (descending)
- stats_map.sort_by(|a, b| {
- b.1.throughput_median
- .partial_cmp(&a.1.throughput_median)
- .unwrap()
- });
+ // Sort by median throughput (descending). `total_cmp` instead of
+ // `partial_cmp().unwrap()` so a NaN (which today cannot occur, but
+ // `total_cmp` is one character longer and one defensive line shorter)
+ // would simply sort to the end rather than panic the binary.
+ stats_map.sort_by(|a, b| b.1.throughput_median.total_cmp(&a.1.throughput_median));
// Print combined results table
println!(
@@ -955,7 +1043,9 @@ async fn main() {
"p99cv%",
"wp99µs",
"hit%",
- "±hitci"
+ "±hitci",
+ BOLD = bold(),
+ RESET = reset(),
);
println!("{}", "─".repeat(99));
@@ -998,24 +1088,25 @@ async fn main() {
println!(" (!) cv% > 10%: throughput is noisy — consider rerunning on a quieter system");
}
- println!("{DIM} legend: ops/sec=throughput ±ci95=95% CI cv%=variability tailµs=p99−p50 wp99µs=write p99 ±hitci=hit-rate CI{RESET}");
+ println!(
+ "{DIM} legend: ops/sec=throughput ±ci95=95% CI cv%=variability tailµs=p99−p50 wp99µs=write p99 ±hitci=hit-rate CI{RESET}",
+ DIM = dim(),
+ RESET = reset(),
+ );
- // Overall winners
+ // Overall winners. `total_cmp` everywhere so a NaN (which today cannot
+ // occur, but defensive-cmp is cheap) cannot panic the summary line.
let (best_throughput, best_throughput_stats) = &stats_map[0];
let best_latency = stats_map
.iter()
- .min_by(|a, b| {
- a.1.latency_p99_median
- .partial_cmp(&b.1.latency_p99_median)
- .unwrap()
- })
+ .min_by(|a, b| a.1.latency_p99_median.total_cmp(&b.1.latency_p99_median))
.unwrap();
let best_consistency = stats_map
.iter()
.min_by(|a, b| {
let a_amp = a.1.latency_p99_median - a.1.latency_p50_median;
let b_amp = b.1.latency_p99_median - b.1.latency_p50_median;
- a_amp.partial_cmp(&b_amp).unwrap()
+ a_amp.total_cmp(&b_amp)
})
.unwrap();
@@ -1027,7 +1118,7 @@ async fn main() {
let multi = stats_map.len() > 1;
let w = |s: &str| -> String {
if multi {
- format!("{BOLD}{GREEN}{s}{RESET}")
+ format!("{}{}{}{}", bold(), green(), s, reset())
} else {
s.to_string()
}