Shopify · sam-obeid · May 4, 2026 · May 4, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -38,17 +38,13 @@ clru = "0.6.3"
 lru-mem = "0.3.0"
 sieve-cache = "1.1.6"
 caches = "0.3.0"
+# Pinned to a commit SHA so `cargo update` cannot silently roll the
+# benchmark forward to a future neocache release with different
+# performance characteristics. Bump deliberately when re-baselining.
+neocache = { git = "https://github.com/Shopify/neocache", rev = "3c87b8fde4fab6eb3e9285c05df5ab8ae0cf498c" }
 rand_distr = "0.6"
 parking_lot = "0.12"
 
-# `neocache` is intentionally not a dependency of this public repository.
-# The upstream crate lives in a private Shopify repo, so any reference to
-# it (even an `optional = true` git dependency) is recorded in `Cargo.lock`
-# and forces `cargo` to fetch the source on every build — which fails with
-# `failed to authenticate when downloading repository` on any clone
-# without Shopify git credentials, including public CI runners. See the
-# `Per-cache configuration deviations` section of `README.md`.
-
 [profile.release]
 opt-level = 3
 lto = "fat"

diff --git a/README.md b/README.md
@@ -46,17 +46,7 @@ like-for-like before drawing conclusions.
 | [lru-mem](https://crates.io/crates/lru-mem) | Memory-bounded LRU | single `Mutex` (this harness) - see footnote ‡ |
 | [sieve-cache](https://crates.io/crates/sieve-cache) | SIEVE | sharded (`ShardedSieveCache`) |
 | [caches](https://crates.io/crates/caches) | Two-Queue | single `Mutex` (this harness) — see footnote § |
-
-> **Note on `neocache`.** Earlier internal revisions of this harness also
-> benchmarked Shopify's in-house `neocache` crate (S3-FIFO + DashMap, built-in
-> sharding). It is intentionally absent from the public release: `neocache`
-> lives in a private repository, and even an `optional = true` git
-> dependency forces `cargo` to record the source in `Cargo.lock` and fetch
-> it on every build, which fails for any clone without Shopify git
-> credentials (including public CI runners). Reintroducing it requires a
-> private overlay branch that adds the dependency, the
-> `src/caches/neocache.rs` module, and matching entries in `ALL_CACHES` and
-> `dispatch` in `src/main.rs`.
+| [neocache](https://github.com/Shopify/neocache) | S3-FIFO + DashMap | sharded (built-in) |
 
 ### Per-cache configuration deviations
 
@@ -107,7 +97,7 @@ before/after diff.
 cargo run --release
 
 # Run a subset
-cargo run --release -- --caches moka,quick_cache,sieve
+cargo run --release -- --caches moka,quick_cache,neocache
 
 # Override config parameters at runtime (no recompile needed)
 cargo run --release -- --size 10000 --zipf 0.9

diff --git a/src/caches/mod.rs b/src/caches/mod.rs
@@ -11,12 +11,7 @@ pub mod lrucache;
 pub mod lrumem;
 pub mod minimoka;
 pub mod moka;
-// `neocache` is intentionally not benchmarked from this public repo:
-// the upstream crate lives in a private Shopify repo and so cannot be
-// cloned by external users or CI runners. See the comment in
-// `Cargo.toml` and the README. To benchmark `neocache` internally,
-// keep the dependency, this `pub mod`, and the corresponding entry in
-// `ALL_CACHES`/`dispatch` in a private overlay branch.
+pub mod neocache;
 pub mod quick_cache;
 pub mod schnellru;
 pub mod sieve;

diff --git a/src/caches/neocache.rs b/src/caches/neocache.rs
@@ -0,0 +1,157 @@
+use crate::caches::{total_benchmark_keys, BenchConfig, BenchResults, KeyGenerator};
+use neocache::NeoCache;
+use rand::{RngExt, SeedableRng};
+use std::sync::Arc;
+use std::time::Instant;
+
+/// NeoCache: concurrent HashMap with built-in S3-FIFO cache eviction.
+/// Unlike the DashMap benchmark, no manual entry_count tracking or random
+/// eviction is required — S3-FIFO eviction runs automatically per-shard on
+/// every insert once the cache reaches capacity.
+pub async fn run_bench(
+    cfg: Arc<BenchConfig>,
+    value_pool: Arc<Vec<Arc<String>>>,
+    key_pool: Arc<Vec<String>>,
+) -> BenchResults {
+    let cache: Arc<NeoCache<String, Arc<String>>> = Arc::new(NeoCache::new(cfg.cache_size));
+    let total_keys = total_benchmark_keys(&cfg);
+
+    if !cfg.cold_start {
+        let warmup_key_gen = KeyGenerator::new(cfg.access_pattern, total_keys, cfg.zipf_exponent);
+        let mut rng = rand::rngs::StdRng::seed_from_u64(cfg.rng_seed);
+        for i in 0..total_keys {
+            let key_idx = warmup_key_gen.next_key(i, &mut rng);
+            let value = value_pool[key_idx % value_pool.len()].clone();
+            cache.insert(key_pool[key_idx].clone(), value);
+        }
+        // No eviction trim needed: S3-FIFO enforces cache_capacity automatically.
+    }
+
+    let key_gen = Arc::new(KeyGenerator::new(
+        cfg.access_pattern,
+        total_keys,
+        cfg.zipf_exponent,
+    ));
+    let barrier = Arc::new(std::sync::Barrier::new(cfg.num_tasks));
+
+    let tasks: Vec<_> = (0..cfg.num_tasks)
+        .map(|i| {
+            let my_cache = cache.clone();
+            let value_pool = value_pool.clone();
+            let key_pool = key_pool.clone();
+            let key_gen = key_gen.clone();
+            let seq_start = i * cfg.num_keys_per_task;
+            let seq_end = (i + 1) * cfg.num_keys_per_task;
+            let cfg = cfg.clone();
+            let barrier = barrier.clone();
+
+            tokio::task::spawn_blocking(move || {
+                let mut local_latencies =
+                    Vec::with_capacity(cfg.num_keys_per_task / cfg.latency_sample_every + 64);
+                let mut local_write_latencies =
+                    Vec::with_capacity(cfg.num_keys_per_task / cfg.latency_sample_every / 5 + 64);
+                let mut local_reads = 0usize;
+                let mut local_hits = 0usize;
+                let mut fresh_write_count = 0usize;
+                let mut rng = rand::rngs::StdRng::seed_from_u64(cfg.rng_seed + i as u64 + 1);
+
+                barrier.wait();
+                let task_start = Instant::now();
+
+                for seq_key in seq_start..seq_end {
+                    let key_idx = key_gen.next_key(seq_key, &mut rng);
+                    let k = &key_pool[key_idx];
+
+                    let is_read = rng.random_bool(cfg.read_write_ratio);
+                    let should_measure = rng.random_bool(1.0 / cfg.latency_sample_every as f64);
+
+                    if is_read {
+                        local_reads += 1;
+                        let op_start = if should_measure {
+                            Some(Instant::now())
+                        } else {
+                            None
+                        };
+                        let hit = my_cache.get(k);
+                        if let Some(t) = op_start {
+                            local_latencies.push(t.elapsed().as_nanos() as u64);
+                        }
+                        if let Some(value) = hit {
+                            local_hits += 1;
+                            let checksum: u32 = value
+                                .as_bytes()
+                                .iter()
+                                .step_by(256)
+                                .map(|&b| b as u32)
+                                .sum();
+                            std::hint::black_box(checksum);
+                        }
+                    } else {
+                        let write_key_idx = if cfg.write_new_key_fraction > 0.0
+                            && rng.random_bool(cfg.write_new_key_fraction)
+                        {
+                            let idx = total_keys + ((seq_start + fresh_write_count) % total_keys);
+                            fresh_write_count += 1;
+                            idx
+                        } else {
+                            key_idx
+                        };
+                        let k_write = &key_pool[write_key_idx];
+                        let value = value_pool[write_key_idx % value_pool.len()].clone();
+                        let op_start = if should_measure {
+                            Some(Instant::now())
+                        } else {
+                            None
+                        };
+                        my_cache.insert(k_write.clone(), value);
+                        if let Some(t) = op_start {
+                            let ns = t.elapsed().as_nanos() as u64;
+                            local_latencies.push(ns);
+                            local_write_latencies.push(ns);
+                        }
+                    }
+                }
+
+                (
+                    local_latencies,
+                    local_write_latencies,
+                    local_reads,
+                    local_hits,
+                    task_start.elapsed(),
+                )
+            })
+        })
+        .collect();
+
+    let mut all_latencies = Vec::new();
+    let mut all_write_latencies = Vec::new();
+    let mut total_reads = 0usize;
+    let mut total_hits = 0usize;
+    let mut max_duration = std::time::Duration::ZERO;
+    for handle in tasks {
+        let (lat, wlat, reads, hits, duration) = handle.await.unwrap();
+        all_latencies.extend(lat);
+        all_write_latencies.extend(wlat);
+        total_reads += reads;
+        total_hits += hits;
+        max_duration = max_duration.max(duration);
+    }
+
+    let total_ops = cfg.num_tasks * cfg.num_keys_per_task;
+    let throughput = total_ops as f64 / max_duration.as_secs_f64();
+    let hit_rate = if total_reads > 0 {
+        total_hits as f64 / total_reads as f64
+    } else {
+        0.0
+    };
+
+    BenchResults {
+        throughput,
+        hit_rate,
+        total_ops,
+        total_reads,
+        total_hits,
+        latencies_ns: all_latencies,
+        write_latencies_ns: all_write_latencies,
+    }
+}
diff --git a/src/main.rs b/src/main.rs
@@ -76,10 +76,7 @@ const ALL_CACHES: &[&str] = &[
     "lrumem",
     "sieve",
     "two_queue",
-    // `neocache` is deliberately not in this public list — see the
-    // comment in `caches/mod.rs` and `Cargo.toml` for why. Internal
-    // users carrying the overlay should add the entry back here in
-    // the same alphabetical-by-introduction order.
+    "neocache",
 ];
 
 /// Dispatch a single benchmark run by cache name.
@@ -103,6 +100,7 @@ async fn dispatch(
         "lrumem" => lrumem::run_bench(cfg, value_pool, key_pool).await,
         "sieve" => sieve::run_bench(cfg, value_pool, key_pool).await,
         "two_queue" => two_queue::run_bench(cfg, value_pool, key_pool).await,
+        "neocache" => neocache::run_bench(cfg, value_pool, key_pool).await,
         // Cache names are validated against `ALL_CACHES` in `parse_args`, so any
         // unknown name here indicates a programmer error (a name was added to
         // `ALL_CACHES` without a matching arm here).
@@ -336,10 +334,10 @@ fn print_help() {
     println!("  All flags accept both space-separated and = forms: --zipf 1.2  --zipf=1.2\n");
     println!("AVAILABLE CACHES:");
     println!("  moka, minimoka, quick_cache, schnellru, stretto, lrucache, tinyufo,");
-    println!("  foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue\n");
+    println!("  foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue, neocache\n");
     println!("EXAMPLES:");
     println!("  cargo run --release                                    # run all caches");
-    println!("  cargo run --release -- -c moka,quick_cache            # run a subset");
+    println!("  cargo run --release -- -c moka,neocache               # run a subset");
     println!("  cargo run --release -- --size 10000 --zipf 0.9         # custom config");
     println!("  cargo run --release -- --pattern uniform --ratio 0.5   # different workload");
     println!("  cargo run --release -- --info                          # full reference\n");
@@ -931,11 +929,11 @@ fn print_info() {
 
     println!("  Available caches:");
     println!("    moka, minimoka, quick_cache, schnellru, stretto, lrucache, tinyufo,");
-    println!("    foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue\n");
+    println!("    foyer_cache, cached_lru, clru_cache, lrumem, sieve, two_queue, neocache\n");
 
     println!("  Usage:");
     println!("    cargo run --release                                    # run all caches");
-    println!("    cargo run --release -- -c moka,quick_cache            # run a subset");
+    println!("    cargo run --release -- -c moka,neocache               # run a subset");
     println!("    cargo run --release -- --size 10000 --zipf 0.9         # custom config");
     println!("    cargo run --release -- --pattern uniform --ratio 0.5   # different workload");
     println!("    cargo run --release -- --info                          # print this reference\n");