From b24d07276c0b223eb3ebccf2f9b4555c06ba6289 Mon Sep 17 00:00:00 2001
From: SamTV12345 <40429738+samtv12345@users.noreply.github.com>
Date: Sat, 30 May 2026 15:44:54 +0200
Subject: [PATCH] perf(cache): hot-path wins in CacheAndBufferLayer

Squashed and rebased onto the post-oxfmt main; reformatted to match the
repo's oxfmt style. Combines the following work:

- perf(cache): use structuredClone on the read path
- perf(cache): track dirty keys in a Set so flush() does not scan the LRU
- perf(cache): replace setInterval with on-demand setTimeout
- perf(cache): lock-free fast path for get() cache hits
- fix(cache): cloneOut falls back to cloneIn for non-cloneable values
- docs: cache/buffer layer performance design spec + implementation plan

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../plans/2026-05-28-cache-buffer-perf.md     | 1152 +++++++++++++++++
 .../2026-05-28-cache-buffer-perf-design.md    |  258 ++++
 lib/CacheAndBufferLayer.ts                    |  121 +-
 test/memory/test_tojson.spec.ts               |   11 +
 test/mock/test_dirty_set.spec.ts              |   98 ++
 test/mock/test_lazy_flush.spec.ts             |  111 ++
 test/mock/test_lock_fast_path.spec.ts         |  120 ++
 test/mock/test_metrics.spec.ts                |   20 +-
 8 files changed, 1856 insertions(+), 35 deletions(-)
 create mode 100644 docs/superpowers/plans/2026-05-28-cache-buffer-perf.md
 create mode 100644 docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md
 create mode 100644 test/mock/test_dirty_set.spec.ts
 create mode 100644 test/mock/test_lazy_flush.spec.ts
 create mode 100644 test/mock/test_lock_fast_path.spec.ts
diff --git a/docs/superpowers/plans/2026-05-28-cache-buffer-perf.md b/docs/superpowers/plans/2026-05-28-cache-buffer-perf.md
new file mode 100644
index 00000000..397b3781
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-28-cache-buffer-perf.md
@@ -0,0 +1,1152 @@
+# Cache & Buffer Layer Performance Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Apply four internal performance wins in `lib/CacheAndBufferLayer.ts` from the spec at `docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md`. No public API change, no default change, no behavioral regression.
+
+**Architecture:** All changes live in one file (`lib/CacheAndBufferLayer.ts`). Four self-contained wins applied in dependency order: (1) split `clone()` into `cloneIn`/`cloneOut` with `structuredClone` on the read path, (2) maintain a `_dirtyKeys: Set<string>` so `flush()` does not scan the entire LRU, (3) replace the constructor's `setInterval` with an on-demand `setTimeout`, (4) add a synchronous fast path in `get()` that skips the per-key lock on a cache hit with no in-flight writer. One new test file per win lives under `test/mock/`. The existing metrics test is adjusted because `get()` cache hits no longer increment `lockAcquires`/`lockReleases`.
+
+**Tech Stack:** TypeScript 6 (ESM), vitest for tests, rolldown for bundling, pnpm. Node ≥24 (so `structuredClone` is available natively). Existing patterns: tests in `test/mock/` use the `mock` driver (an `EventEmitter`) to intercept driver-level calls; private fields are reached via `(db.db as any)._fieldName` because TypeScript `private` is not runtime-enforced.
+
+---
+
+## File Structure
+
+**Modified:**
+
+- `lib/CacheAndBufferLayer.ts` — all four wins. Existing structure (`LRU` class, `SelfContainedPromise`, `Database` class, `clone` function) is preserved; we rename `clone` to `cloneIn`, add `cloneOut`, add `_dirtyKeys` and `_flushTimer` fields, change `flush()`'s scan loop, replace the constructor's `setInterval`, and reshape `get()`.
+- `test/mock/test_metrics.spec.ts` — three locations adjust expected metrics where `get()` (but not `getSub()`) on a cache hit no longer increments lock counters.
+
+**Created:**
+
+- `test/mock/test_dirty_set.spec.ts` — verifies the `_dirtyKeys` invariant under set/flush, re-set during in-flight write, and failed-write paths.
+- `test/mock/test_lazy_flush.spec.ts` — verifies that an idle database has no scheduled timer, that `set()` arms the timer, and that `close()` clears it.
+- `test/mock/test_lock_fast_path.spec.ts` — verifies that cache-hit `get()` does not acquire the per-key lock and that concurrent set+get serializes correctly while the lock is held.
+
+**Not touched:** any database driver under `databases/`, `index.ts`, `lib/AbstractDatabase.ts`, `lib/logging.ts`. No `Settings` field is added or changed.
+
+---
+
+## Task 0: Baseline & Worktree Sanity
+
+**Files:** none
+
+- [ ] **Step 1: Run the full test suite to confirm a green baseline**
+
+```bash
+pnpm test
+```
+
+Expected: all tests pass. If anything fails on `main`, stop and report — do not attempt to implement on top of a red baseline.
+
+- [ ] **Step 2: Run the type checker**
+
+```bash
+pnpm run ts-check
+```
+
+Expected: no output, exit 0.
+
+- [ ] **Step 3: Confirm the spec file is present**
+
+```bash
+ls docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md
+```
+
+Expected: the file exists. If missing, stop — the plan is meaningless without the spec.
+
+---
+
+## Task 1: Win 1 — Split `clone()` into `cloneIn` / `cloneOut`
+
+**Files:**
+
+- Modify: `lib/CacheAndBufferLayer.ts` (rename `clone` → `cloneIn`, add `cloneOut`, rewire call sites)
+
+The existing test `test/memory/test_tojson.spec.ts` is the safety net for the write path (`cloneIn`). No new test file is needed in this task — every call site we touch is already covered by an existing test (`test_lib.ts` for get/set/setSub/getSub/findKeys; `test_tojson.spec.ts` for toJSON semantics).
+
+- [ ] **Step 1: Rename `clone` to `cloneIn` at its definition (no behavior change)**
+
+Open `lib/CacheAndBufferLayer.ts`. At the bottom of the file (around line 608), find:
+
+```ts
+const clone = (obj: unknown, key = ''): unknown => {
+```
+
+Rename it to:
+
+```ts
+const cloneIn = (obj: unknown, key = ''): unknown => {
+```
+
+Inside the function body, the recursive calls to `clone(...)` become `cloneIn(...)`. There are three of them (the `toJSON` re-clone, the `Array.isArray` map, and the object-property loop).
+
+- [ ] **Step 2: Add `cloneOut` immediately below `cloneIn`**
+
+Add directly after `cloneIn`'s closing brace:
+
+```ts
+// Read-direction clone. The buffered value has already been processed by cloneIn (which strips
+// toJSON methods) or by JSON.parse (which produces only plain JSON-safe values), so structuredClone
+// will never see a function or other non-cloneable type here.
+const cloneOut = (v: unknown): unknown =>
+  v == null || typeof v !== "object" ? v : structuredClone(v);
+```
+
+- [ ] **Step 3: Rewire call sites — replace `clone(...)` with `cloneIn(...)` or `cloneOut(...)` per direction**
+
+There are six call sites in the file. Replace each. Read direction → `cloneOut`. Write direction → `cloneIn`.
+
+In `get` (around line 275):
+
+```ts
+return clone(v);
+```
+
+becomes:
+
+```ts
+return cloneOut(v);
+```
+
+In `findKeys` (around line 338):
+
+```ts
+return clone(keyValues) as string[];
+```
+
+becomes:
+
+```ts
+return cloneOut(keyValues) as string[];
+```
+
+In `findKeysPaged` — there are TWO `clone(...)` calls (around lines 371 and 380). Both become `cloneOut(...)`:
+
+```ts
+return clone(all.slice(start, start + options.limit)) as string[];
+```
+
+→ `return cloneOut(all.slice(start, start + options.limit)) as string[];`
+and
+
+```ts
+return clone(keys) as string[];
+```
+
+→ `return cloneOut(keys) as string[];`
+
+In `set` (line 389):
+
+```ts
+value = clone(value);
+```
+
+becomes:
+
+```ts
+value = cloneIn(value);
+```
+
+In `setSub` (line 438):
+
+```ts
+value = clone(value);
+```
+
+becomes:
+
+```ts
+value = cloneIn(value);
+```
+
+In `getSub` (line 511):
+
+```ts
+return clone(v);
+```
+
+becomes:
+
+```ts
+return cloneOut(v);
+```
+
+In `_write` (line 557 — the non-JSON fallback):
+
+```ts
+serialized = clone(entry.value) as string | null;
+```
+
+becomes:
+
+```ts
+serialized = cloneOut(entry.value) as string | null;
+```
+
+- [ ] **Step 4: Confirm no stray `clone(` references remain**
+
+```bash
+grep -n "[^a-zA-Z]clone(" lib/CacheAndBufferLayer.ts
+```
+
+Expected: empty output. Any hit means a call site was missed; fix it before continuing.
+
+- [ ] **Step 5: Run the type checker**
+
+```bash
+pnpm run ts-check
+```
+
+Expected: no output, exit 0.
+
+- [ ] **Step 6: Run the toJSON behavior test**
+
+```bash
+pnpm test test/memory/test_tojson.spec.ts
+```
+
+Expected: 4 passing tests (`no .toJSON method`, `direct`, `object property`, `array entry`).
+
+- [ ] **Step 7: Run the full mock test suite**
+
+```bash
+pnpm test test/mock
+```
+
+Expected: all green.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add lib/CacheAndBufferLayer.ts
+git commit -m "perf(cache): use structuredClone on the read path
+
+Split clone() into cloneIn (write path, preserves toJSON semantics)
+and cloneOut (read path, delegates to V8-native structuredClone). The
+cached value is always JSON-safe by construction, so the read-path
+fast cloner does not need toJSON support."
+```
+
+---
+
+## Task 2: Win 2 — Dirty-Key Set
+
+**Files:**
+
+- Create: `test/mock/test_dirty_set.spec.ts`
+- Modify: `lib/CacheAndBufferLayer.ts` (add `_dirtyKeys` field, mutations in `_setLocked` and `markDone`, change `flush()` scan loop)
+
+- [ ] **Step 1: Write the failing test for the `_dirtyKeys` invariant**
+
+Create `test/mock/test_dirty_set.spec.ts`:
+
+```ts
+import * as ueberdb from "../../index";
+import { ConsoleLogger } from "../../lib/logging";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+type MockSettings = { mock?: any };
+
+const logger = new ConsoleLogger();
+
+const dirtyKeys = (db: any): Set<string> => (db.db as any)._dirtyKeys;
+
+describe(__filename, () => {
+  let db: any = null;
+  let mock: any = null;
+
+  const createDb = async (wrapperSettings: Record<string, unknown> = {}) => {
+    const settings: MockSettings = {};
+    db = new ueberdb.Database("mock", settings, { json: false, ...wrapperSettings }, logger);
+    await db.init();
+    mock = settings.mock;
+    mock.once("init", (cb: any) => cb());
+  };
+
+  afterEach(async () => {
+    if (mock != null) {
+      mock.removeAllListeners();
+      mock.once("close", (cb: any) => cb());
+      mock = null;
+    }
+    if (db != null) {
+      await db.close();
+      db = null;
+    }
+  });
+
+  it("set() adds the key to _dirtyKeys; flush() drains it", async () => {
+    // writeInterval=1e9 means the lazy flush timer effectively never fires; we drive flush manually.
+    await createDb({ writeInterval: 1e9 });
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    const writeP = db.set("k", "v");
+    // The buffered entry is dirty as soon as set() returns into the event loop.
+    expect(dirtyKeys(db).has("k")).toBe(true);
+    expect(dirtyKeys(db).size).toBe(1);
+    await Promise.all([writeP, db.flush()]);
+    expect(dirtyKeys(db).size).toBe(0);
+  });
+
+  it("re-set during an in-flight write keeps the key in _dirtyKeys", async () => {
+    await createDb({ writeInterval: 1e9 });
+    let releaseFirstWrite: (() => void) | null = null;
+    const firstWriteSeen = new Promise<void>((resolve) => {
+      mock.once("set", (k: any, v: any, cb: any) => {
+        resolve();
+        releaseFirstWrite = () => cb();
+      });
+    });
+    const firstWriteP = db.set("k", "v1");
+    const flushedP = db.flush();
+    await firstWriteSeen;
+    // While the first write is in flight, queue a second write to the same key.
+    mock.once("set", (k: any, v: any, cb: any) => cb());
+    const secondWriteP = db.set("k", "v2");
+    // The key must remain in _dirtyKeys: the old in-flight entry is being written,
+    // and a new dirty entry has taken its place in the buffer.
+    expect(dirtyKeys(db).has("k")).toBe(true);
+    // Release the first write; both promises must eventually resolve and _dirtyKeys must drain.
+    releaseFirstWrite!();
+    await Promise.all([firstWriteP, secondWriteP, flushedP]);
+    // Drain any remaining dirty entry (the v2 write) — it may have been picked up by the
+    // same flush() loop, but to be robust against scheduling we call flush() once more.
+    await db.flush();
+    expect(dirtyKeys(db).size).toBe(0);
+  });
+
+  it("failed write removes the key from _dirtyKeys and rejects the caller", async () => {
+    await createDb({ writeInterval: 1e9 });
+    mock.on("set", (k: any, v: any, cb: any) => cb(new Error("boom")));
+    const writeP = db.set("k", "v");
+    const flushedP = db.flush();
+    await expect(writeP).rejects.toThrow("boom");
+    await flushedP;
+    expect(dirtyKeys(db).size).toBe(0);
+  });
+});
+```
+
+- [ ] **Step 2: Run the new test — confirm it fails because `_dirtyKeys` does not exist yet**
+
+```bash
+pnpm test test/mock/test_dirty_set.spec.ts
+```
+
+Expected: all three tests fail. Typical message: `TypeError: Cannot read properties of undefined (reading 'has')` because `(db.db as any)._dirtyKeys` is `undefined`.
+
+- [ ] **Step 3: Add the `_dirtyKeys` field to the `Database` class**
+
+In `lib/CacheAndBufferLayer.ts`, in the `Database` class field declarations (around line 161-164, near `_locks`), add:
+
+```ts
+  private readonly _dirtyKeys: Set<string> = new Set();
+```
+
+Place it immediately after the `_locks` field for locality.
+
+- [ ] **Step 4: Add `_dirtyKeys.add(key)` in `_setLocked`**
+
+In `_setLocked` (around line 407), after the line `this.buffer.set(key, entry);` (around line 420), add:
+
+```ts
+this._dirtyKeys.add(key);
+```
+
+Indented to match the surrounding block. This runs every time we mark an entry dirty.
+
+- [ ] **Step 5: Change `markDone` to take `key` and conditionally remove from `_dirtyKeys`**
+
+In `_write` (around line 539), the existing `markDone` closure is:
+
+```ts
+const markDone = (entry: CacheEntry, err?: Error | null): void => {
+  if (entry.writingInProgress) {
+    entry.writingInProgress = false;
+    if (err != null) ++this.metrics.writesToDbFailed;
+    ++this.metrics.writesToDbFinished;
+  }
+  entry.dirty!.done(err);
+  entry.dirty = null;
+};
+```
+
+Replace it with:
+
+```ts
+const markDone = (key: string, entry: CacheEntry, err?: Error | null): void => {
+  if (entry.writingInProgress) {
+    entry.writingInProgress = false;
+    if (err != null) ++this.metrics.writesToDbFailed;
+    ++this.metrics.writesToDbFinished;
+  }
+  // Reference-equality: only clear the dirty marker for THIS key if the entry currently
+  // in the buffer is the same one we just wrote. If a re-set during the write replaced it
+  // with a fresh dirty entry, the new entry must stay marked dirty.
+  const current = this.buffer.get(key, false);
+  if (current === entry) this._dirtyKeys.delete(key);
+  entry.dirty!.done(err);
+  entry.dirty = null;
+};
+```
+
+- [ ] **Step 6: Update every `markDone` call site in `_write` to pass `key`**
+
+There are four call sites in `_write`. Update each:
+
+Around line 560 (inside the serialization try/catch in the `for (const [key, entry] of dirtyEntries)` loop):
+
+```ts
+markDone(entry, err as Error);
+```
+
+becomes:
+
+```ts
+markDone(key, entry, err as Error);
+```
+
+Around line 582 (inside `writeOneOp(op, entry)`):
+
+```ts
+markDone(entry, writeErr);
+```
+
+becomes:
+
+```ts
+markDone(op.key, entry, writeErr);
+```
+
+Around line 601 (the bulk-success branch):
+
+```ts
+if (success) entries.forEach((entry) => markDone(entry, null));
+```
+
+becomes:
+
+```ts
+if (success) {
+  for (let i = 0; i < entries.length; i++) markDone(ops[i].key, entries[i], null);
+}
+```
+
+(The `forEach`→`for` change is required because we now need the parallel `ops[i].key`.)
+
+- [ ] **Step 7: Add `LRU.get(k, isUse = false)` support — already exists**
+
+Verify that `LRU.get` already accepts an `isUse` parameter (it does; see line 113 of the current file). No change needed. The new code in `markDone` uses `this.buffer.get(key, false)` — confirm this compiles.
+
+- [ ] **Step 8: Change `flush()`'s scan loop to iterate `_dirtyKeys`**
+
+In `flush()` (around line 517), the inner block:
+
+```ts
+const dirtyEntries: [string, CacheEntry][] = [];
+for (const entry of this.buffer) {
+  if (entry[1].dirty && !entry[1].writingInProgress) {
+    dirtyEntries.push(entry);
+    if (this.settings.bulkLimit && dirtyEntries.length >= this.settings.bulkLimit) break;
+  }
+}
+```
+
+becomes:
+
+```ts
+const dirtyEntries: [string, CacheEntry][] = [];
+for (const key of this._dirtyKeys) {
+  const entry = this.buffer.get(key, false);
+  if (!entry || !entry.dirty || entry.writingInProgress) continue;
+  dirtyEntries.push([key, entry]);
+  if (this.settings.bulkLimit && dirtyEntries.length >= this.settings.bulkLimit) break;
+}
+```
+
+- [ ] **Step 9: Run the type checker**
+
+```bash
+pnpm run ts-check
+```
+
+Expected: no output, exit 0.
+
+- [ ] **Step 10: Run the new dirty-set test — expect it to pass**
+
+```bash
+pnpm test test/mock/test_dirty_set.spec.ts
+```
+
+Expected: all three tests pass.
+
+- [ ] **Step 11: Run the full mock test suite — confirm no regressions**
+
+```bash
+pnpm test test/mock
+```
+
+Expected: all green. The metrics test in particular must still pass — the `_dirtyKeys` work does not change any metrics-relevant behavior.
+
+- [ ] **Step 12: Run the memory test suite — confirm `toJSON` and getSub still work**
+
+```bash
+pnpm test test/memory
+```
+
+Expected: all green.
+
+- [ ] **Step 13: Commit**
+
+```bash
+git add lib/CacheAndBufferLayer.ts test/mock/test_dirty_set.spec.ts
+git commit -m "perf(cache): track dirty keys in a Set so flush() does not scan the LRU
+
+flush() previously iterated the entire LRU (default capacity 10 000) to
+find entries with dirty != null. Maintain a Set<string> of currently
+dirty keys, populated in _setLocked and drained in markDone with a
+reference-equality guard so entry replacement during an in-flight write
+is handled correctly."
+```
+
+---
+
+## Task 3: Win 3 — Lazy Flush Scheduling
+
+**Files:**
+
+- Create: `test/mock/test_lazy_flush.spec.ts`
+- Modify: `lib/CacheAndBufferLayer.ts` (remove `setInterval`, add `_flushTimer` + `_scheduleFlush`, update `close()` and `flush()` re-arm)
+
+- [ ] **Step 1: Write the failing test for lazy flush scheduling**
+
+Create `test/mock/test_lazy_flush.spec.ts`:
+
+```ts
+import * as ueberdb from "../../index";
+import { ConsoleLogger } from "../../lib/logging";
+import { afterEach, describe, expect, it } from "vitest";
+
+type MockSettings = { mock?: any };
+
+const logger = new ConsoleLogger();
+
+const flushTimer = (db: any): unknown => (db.db as any)._flushTimer;
+
+describe(__filename, () => {
+  let db: any = null;
+  let mock: any = null;
+
+  const createDb = async (wrapperSettings: Record<string, unknown> = {}) => {
+    const settings: MockSettings = {};
+    db = new ueberdb.Database("mock", settings, { json: false, ...wrapperSettings }, logger);
+    await db.init();
+    mock = settings.mock;
+    mock.once("init", (cb: any) => cb());
+  };
+
+  afterEach(async () => {
+    if (mock != null) {
+      mock.removeAllListeners();
+      mock.once("close", (cb: any) => cb());
+      mock = null;
+    }
+    if (db != null) {
+      await db.close();
+      db = null;
+    }
+  });
+
+  it("idle database does not arm the flush timer", async () => {
+    await createDb({ writeInterval: 50 });
+    expect(flushTimer(db)).toBe(null);
+    // Give the event loop a tick or two to confirm nothing schedules itself.
+    await new Promise((r) => setTimeout(r, 30));
+    expect(flushTimer(db)).toBe(null);
+  });
+
+  it("set() arms the timer; flush() leaves it null after draining", async () => {
+    await createDb({ writeInterval: 1e9 }); // huge interval so the timer cannot fire during the test
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    const writeP = db.set("k", "v");
+    // Synchronously after the set() call entered, the timer must be armed.
+    expect(flushTimer(db)).not.toBe(null);
+    await Promise.all([writeP, db.flush()]);
+    // After an explicit flush() that drained everything, the timer must be null.
+    expect(flushTimer(db)).toBe(null);
+  });
+
+  it("close() clears a pending flush timer", async () => {
+    await createDb({ writeInterval: 1e9 });
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    const writeP = db.set("k", "v");
+    expect(flushTimer(db)).not.toBe(null);
+    await Promise.all([writeP, db.flush()]);
+    // No timer pending now; close() must succeed without leaking handles.
+    mock.once("close", (cb: any) => cb());
+    await db.close();
+    db = null; // prevent the afterEach close from double-closing
+  });
+
+  it("writeInterval=0 mode never arms the timer", async () => {
+    await createDb({ writeInterval: 0 });
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    await db.set("k", "v");
+    expect(flushTimer(db)).toBe(null);
+  });
+});
+```
+
+- [ ] **Step 2: Run the new test — confirm it fails because `_flushTimer` does not exist yet**
+
+```bash
+pnpm test test/mock/test_lazy_flush.spec.ts
+```
+
+Expected: all four tests fail. Typical failure: `_flushTimer` is `undefined`, not `null` — and the existing `setInterval` produces a non-null `flushInterval` instead.
+
+- [ ] **Step 3: Remove the `flushInterval` field and replace with `_flushTimer`**
+
+In `lib/CacheAndBufferLayer.ts`, find the field declaration (around line 164):
+
+```ts
+  private readonly flushInterval: ReturnType<typeof setInterval> | null;
+```
+
+Replace with:
+
+```ts
+  private _flushTimer: ReturnType<typeof setTimeout> | null = null;
+```
+
+(Drop `readonly` — we re-assign it; drop the inline initialization to `null` in the constructor since the field initializer takes care of it.)
+
+- [ ] **Step 4: Delete the constructor's `setInterval` block**
+
+Find (around line 217):
+
+```ts
+this.flushInterval =
+  this.settings.writeInterval > 0
+    ? setInterval(() => {
+        void this.flush();
+      }, this.settings.writeInterval)
+    : null;
+```
+
+Delete the entire assignment. The constructor no longer sets up the timer.
+
+- [ ] **Step 5: Add `_scheduleFlush` as a private method**
+
+Add as a method inside the `Database` class, near the `_lock`/`_unlock` methods (around line 234, after `_unlock`):
+
+```ts
+  private _scheduleFlush(): void {
+    if (this._flushTimer != null) return;
+    if (this.settings.writeInterval <= 0) return;
+    if (this._dirtyKeys.size === 0) return;
+    this._flushTimer = setTimeout(() => {
+      this._flushTimer = null;
+      void this.flush();
+    }, this.settings.writeInterval);
+    this._flushTimer.unref?.();
+  }
+```
+
+- [ ] **Step 6: Call `_scheduleFlush` from `_setLocked`**
+
+In `_setLocked`, immediately after the `this._dirtyKeys.add(key);` line added in Task 2 step 4, add:
+
+```ts
+this._scheduleFlush();
+```
+
+(Same indentation. The schedule call is a no-op if a timer is already armed or `writeInterval <= 0`.)
+
+- [ ] **Step 7: Re-arm the timer at the end of `flush()` if dirty entries remain**
+
+In `flush()` (around line 517), the function currently looks like:
+
+```ts
+  async flush(): Promise<void> {
+    if (this._flushDone == null) {
+      this._flushDone = (async () => {
+        while (true) {
+          while (this._flushPaused != null) await this._flushPaused;
+          ...
+        }
+      })();
+    }
+    await this._flushDone;
+    this._flushDone = null;
+  }
+```
+
+Add a post-`_flushDone`-null re-arm so that any dirty entries left behind (in particular: an entry that was replaced during a write, so its newer counterpart is still dirty) get picked up:
+
+```ts
+  async flush(): Promise<void> {
+    if (this._flushDone == null) {
+      this._flushDone = (async () => {
+        while (true) {
+          while (this._flushPaused != null) await this._flushPaused;
+          const dirtyEntries: [string, CacheEntry][] = [];
+          for (const key of this._dirtyKeys) {
+            const entry = this.buffer.get(key, false);
+            if (!entry || !entry.dirty || entry.writingInProgress) continue;
+            dirtyEntries.push([key, entry]);
+            if (this.settings.bulkLimit && dirtyEntries.length >= this.settings.bulkLimit) break;
+          }
+          if (dirtyEntries.length === 0) return;
+          await this._write(dirtyEntries);
+        }
+      })();
+    }
+    await this._flushDone;
+    this._flushDone = null;
+    if (this._dirtyKeys.size > 0) this._scheduleFlush();
+  }
+```
+
+(The inner-loop body was already replaced in Task 2 step 8 — this step ADDS the trailing `if (this._dirtyKeys.size > 0) this._scheduleFlush();` line.)
+
+- [ ] **Step 8: Update `close()` to clear the timer**
+
+In `close()` (around line 260):
+
+```ts
+  async close(): Promise<void> {
+    clearInterval(this.flushInterval ?? undefined);
+    await this.flush();
+    await this.wrappedDB!.close();
+    this.wrappedDB = null;
+  }
+```
+
+becomes:
+
+```ts
+  async close(): Promise<void> {
+    if (this._flushTimer != null) {
+      clearTimeout(this._flushTimer);
+      this._flushTimer = null;
+    }
+    await this.flush();
+    await this.wrappedDB!.close();
+    this.wrappedDB = null;
+  }
+```
+
+- [ ] **Step 9: Run the type checker**
+
+```bash
+pnpm run ts-check
+```
+
+Expected: no output, exit 0. If TypeScript complains about `clearInterval`/`setInterval` import (it shouldn't — they are globals), check that you removed only the body but not necessary imports.
+
+- [ ] **Step 10: Run the new lazy-flush test — expect it to pass**
+
+```bash
+pnpm test test/mock/test_lazy_flush.spec.ts
+```
+
+Expected: all four tests pass.
+
+- [ ] **Step 11: Run the dirty-set test from Task 2 — must still pass**
+
+```bash
+pnpm test test/mock/test_dirty_set.spec.ts
+```
+
+Expected: all three tests pass. Win 3 should not have broken Win 2.
+
+- [ ] **Step 12: Run the full mock suite**
+
+```bash
+pnpm test test/mock
+```
+
+Expected: all green. `test_flush.spec.ts` in particular exercises explicit `flush()` calls and must continue to work.
+
+- [ ] **Step 13: Commit**
+
+```bash
+git add lib/CacheAndBufferLayer.ts test/mock/test_lazy_flush.spec.ts
+git commit -m "perf(cache): replace setInterval with on-demand setTimeout
+
+The constructor no longer arms a periodic timer. Instead, _setLocked
+calls _scheduleFlush() when it makes an entry dirty; the timer fires
+once, runs flush(), and clears itself. flush() re-arms the timer if
+new dirty entries remain (entry replacement during write). Idle
+databases consume zero timer ticks."
+```
+
+---
+
+## Task 4: Win 4 — Lock Fast-Path in `get()`
+
+**Files:**
+
+- Create: `test/mock/test_lock_fast_path.spec.ts`
+- Modify: `lib/CacheAndBufferLayer.ts` (reshape `get()`)
+- Modify: `test/mock/test_metrics.spec.ts` (drop `lockAcquires`/`lockReleases` from expected deltas where they no longer apply — done AFTER the implementation so the test gets red exactly when the behavior changes)
+
+This task changes externally observable metrics for `get()` cache hits: `lockAcquires` and `lockReleases` no longer increment on a cache hit. The metrics counters themselves remain in the public `Metrics` type — only the per-call increments change.
+
+**Order matters:** the existing `test_metrics.spec.ts` will start failing the moment we change `get()`, so the test edit must be staged AFTER the implementation, not before. (Otherwise the edited test would be wrong-too-early: stripping `lockAcquires` from `expected` while the actual delta still contains it.)
+
+- [ ] **Step 1: Write the failing test for the lock fast-path itself**
+
+Create `test/mock/test_lock_fast_path.spec.ts`:
+
+```ts
+import * as ueberdb from "../../index";
+import { ConsoleLogger } from "../../lib/logging";
+import { afterEach, describe, expect, it } from "vitest";
+
+type MockSettings = { mock?: any };
+
+const logger = new ConsoleLogger();
+
+describe(__filename, () => {
+  let db: any = null;
+  let mock: any = null;
+
+  const createDb = async (wrapperSettings: Record<string, unknown> = {}) => {
+    const settings: MockSettings = {};
+    db = new ueberdb.Database("mock", settings, { json: false, ...wrapperSettings }, logger);
+    await db.init();
+    mock = settings.mock;
+    mock.once("init", (cb: any) => cb());
+  };
+
+  afterEach(async () => {
+    if (mock != null) {
+      mock.removeAllListeners();
+      mock.once("close", (cb: any) => cb());
+      mock = null;
+    }
+    if (db != null) {
+      await db.close();
+      db = null;
+    }
+  });
+
+  it("cache-hit get() does not acquire the per-key lock", async () => {
+    await createDb({ writeInterval: 1e9 });
+    // Prime the cache: a single set+flush is enough to populate the buffer with the value.
+    mock.once("set", (k: any, v: any, cb: any) => cb());
+    await Promise.all([db.set("k", "v"), db.flush()]);
+    // After the write is finished, the buffer holds the value; the lock map is empty.
+    const before = { ...db.metrics };
+    const val = await db.get("k");
+    expect(val).toBe("v");
+    const after = db.metrics;
+    expect(after.lockAcquires - before.lockAcquires).toBe(0);
+    expect(after.lockReleases - before.lockReleases).toBe(0);
+    expect(after.readsFromCache - before.readsFromCache).toBe(1);
+  });
+
+  it("cache-miss get() still acquires the lock", async () => {
+    await createDb({ writeInterval: 1e9 });
+    mock.once("get", (k: any, cb: any) => cb(null, "v"));
+    const before = { ...db.metrics };
+    const val = await db.get("k");
+    expect(val).toBe("v");
+    const after = db.metrics;
+    expect(after.lockAcquires - before.lockAcquires).toBe(1);
+    expect(after.lockReleases - before.lockReleases).toBe(1);
+  });
+
+  it("get() during a write-in-progress with the lock released returns the buffered value via fast path", async () => {
+    await createDb({ writeInterval: 1e9 });
+    let releaseWrite: (() => void) | null = null;
+    const writeStarted = new Promise<void>((resolve) => {
+      mock.once("set", (k: any, v: any, cb: any) => {
+        resolve();
+        releaseWrite = () => cb();
+      });
+    });
+    const writeP = db.set("k", "v2");
+    const flushedP = db.flush();
+    await writeStarted;
+    // At this moment: _write is awaiting the mock's callback. The per-key lock has been released
+    // (set() releases the lock before awaiting entry.dirty). The buffer holds value 'v2'.
+    // The fast path must apply.
+    const before = { ...db.metrics };
+    const val = await db.get("k");
+    expect(val).toBe("v2");
+    expect(db.metrics.lockAcquires - before.lockAcquires).toBe(0);
+    expect(db.metrics.readsFromCache - before.readsFromCache).toBe(1);
+    releaseWrite!();
+    await Promise.all([writeP, flushedP]);
+  });
+
+  it("get() while a setter holds the lock takes the slow path", async () => {
+    await createDb({ writeInterval: 1e9 });
+    // Drive set() into the locked region by making _lock contend. We do this by setting
+    // the same key twice in quick succession: the second set must await the first set's lock.
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    const set1 = db.set("k", "v1");
+    const set2 = db.set("k", "v2");
+    // While set2 is awaiting the lock, issue a get. It must take the slow path (lockAwaits increases).
+    const before = { ...db.metrics };
+    const getP = db.get("k");
+    await Promise.all([set1, set2, db.flush(), getP]);
+    expect(db.metrics.lockAwaits - before.lockAwaits).toBeGreaterThanOrEqual(1);
+  });
+});
+```
+
+- [ ] **Step 2: Run the new fast-path test — confirm the first test fails**
+
+```bash
+pnpm test test/mock/test_lock_fast_path.spec.ts
+```
+
+Expected: the first test (`cache-hit get() does not acquire the per-key lock`) fails — it expects `lockAcquires` delta of 0, but the current code increments it to 1. The other tests may or may not pass depending on timing; the critical signal is the first test failing.
+
+- [ ] **Step 3: Implement the fast-path in `get()`**
+
+In `lib/CacheAndBufferLayer.ts`, the current `get` (around line 267-276):
+
+```ts
+  async get(key: string): Promise<unknown> {
+    let v: unknown;
+    await this._lock(key);
+    try {
+      v = await this._getLocked(key);
+    } finally {
+      this._unlock(key);
+    }
+    return clone(v);
+  }
+```
+
+Replace with:
+
+```ts
+  async get(key: string): Promise<unknown> {
+    // Fast path: cache hit + no writer currently holding the per-key lock.
+    // The check and the buffer read execute synchronously (no `await` until after the return),
+    // so no concurrent set/setSub can interleave. cloneOut produces a consistent snapshot.
+    if (!this._locks.has(key)) {
+      const entry = this.buffer.get(key);
+      if (entry != null) {
+        ++this.metrics.reads;
+        ++this.metrics.readsFromCache;
+        ++this.metrics.readsFinished;
+        if (this.logger.isDebugEnabled()) {
+          this.logger.debug(
+            `GET    - ${key} - ${JSON.stringify(entry.value)} - ` +
+              `from ${entry.dirty ? 'dirty buffer' : 'cache'}`,
+          );
+        }
+        return cloneOut(entry.value);
+      }
+    }
+    // Slow path: cache miss or a writer holds the lock.
+    let v: unknown;
+    await this._lock(key);
+    try {
+      v = await this._getLocked(key);
+    } finally {
+      this._unlock(key);
+    }
+    return cloneOut(v);
+  }
+```
+
+(Notice that `clone(v)` on the last line was already changed to `cloneOut(v)` in Task 1 step 3 — re-verify.)
+
+- [ ] **Step 4: Run the type checker**
+
+```bash
+pnpm run ts-check
+```
+
+Expected: no output, exit 0.
+
+- [ ] **Step 5: Run the new fast-path test — expect all four to pass**
+
+```bash
+pnpm test test/mock/test_lock_fast_path.spec.ts
+```
+
+Expected: 4 tests passing.
+
+- [ ] **Step 6: Run the existing metrics test — expect it to fail because cache-hit `get()` no longer increments `lockAcquires`/`lockReleases`**
+
+```bash
+pnpm test test/mock/test_metrics.spec.ts
+```
+
+Expected: failures in the `get` → `cache hit` subcase and in `get` → `read of in-progress write`. The error message will be of the form `AssertionError: Expected {... lockAcquires: 1, lockReleases: 1, ...} to deeply equal {... no lockAcquires, no lockReleases ...}`. This is the cue to apply the metrics adjustment in the next step.
+
+- [ ] **Step 7: Update `test/mock/test_metrics.spec.ts` to reflect the new metrics for `get` cache hits**
+
+There are two locations to adjust. They are inside `describe('reads', ...)` (line 71) and inside the `tcs` array entry for `get` (which is at index 0).
+
+**Location 1 — the inner subtc loop (around line 124):** Find the existing `it(subtc.name, ...)` block whose trailing line is:
+
+```ts
+assertMetricsDelta(before, db.metrics, subtc.wantMetrics);
+```
+
+Change that trailing line to:
+
+```ts
+// After perf refactor: get() cache hits no longer acquire the per-key lock.
+// getSub() still locks because the slow path is the only path it uses.
+const expected = { ...subtc.wantMetrics };
+if (tc.name === "get" && subtc.cacheHit) {
+  delete (expected as any).lockAcquires;
+  delete (expected as any).lockReleases;
+}
+assertMetricsDelta(before, db.metrics, expected);
+```
+
+**Location 2 — the "read of in-progress write" test (around line 157):** Find:
+
+```ts
+        it('read of in-progress write', async () => {
+          ...
+          const before = {...db.metrics};
+          await tc.f(key);
+          assertMetricsDelta(before, db.metrics, {
+            lockAcquires: 1,
+            lockReleases: 1,
+            reads: 1,
+            readsFinished: 1,
+            readsFromCache: 1,
+          });
+          ...
+        });
+```
+
+Change the `assertMetricsDelta` block to:
+
+```ts
+const expected: Record<string, number> = {
+  reads: 1,
+  readsFinished: 1,
+  readsFromCache: 1,
+};
+if (tc.name === "getSub") {
+  expected.lockAcquires = 1;
+  expected.lockReleases = 1;
+}
+assertMetricsDelta(before, db.metrics, expected);
+```
+
+The lock-contention test for `get` (around line 663-700) needs NO change: contention forces the slow path (because `_locks.has(key)` is true), so `lockAwaits: 1` still increments. Verify by reading the test that no other lock-related assertion fires.
+
+- [ ] **Step 8: Run the metrics test — expect it to pass**
+
+```bash
+pnpm test test/mock/test_metrics.spec.ts
+```
+
+Expected: all green.
+
+- [ ] **Step 9: Run all mock tests**
+
+```bash
+pnpm test test/mock
+```
+
+Expected: all green.
+
+- [ ] **Step 10: Run the full test suite**
+
+```bash
+pnpm test
+```
+
+Expected: all green, including the in-memory `test/memory/*`, the speed-acceptance check in `test/lib/test_lib.ts`, and per-backend integration tests that are available locally. (Docker-driven integration tests will run only if the relevant containers are reachable; their absence is acceptable as long as the in-memory and mock suites pass.)
+
+- [ ] **Step 11: Run lint + format check**
+
+```bash
+pnpm run lint
+pnpm run format:check
+```
+
+Expected: no warnings/errors. If lint complains about unused variables, double-check that you didn't leave any stale references to `clone` (now `cloneIn`/`cloneOut`) or `flushInterval` (now `_flushTimer`).
+
+- [ ] **Step 12: Commit**
+
+```bash
+git add lib/CacheAndBufferLayer.ts test/mock/test_metrics.spec.ts test/mock/test_lock_fast_path.spec.ts
+git commit -m "perf(cache): lock-free fast path for get() cache hits
+
+A cache-hit get() with no concurrent writer no longer constructs a
+SelfContainedPromise or mutates the per-key lock Map. The check
+(_locks.has(key)) and buffer read execute synchronously before the
+first await, so no writer can interleave; structuredClone produces a
+consistent snapshot. Adjusts test_metrics.spec.ts to reflect that
+cache-hit get() (but not getSub()) no longer increments lockAcquires
+or lockReleases."
+```
+
+---
+
+## Task 5: Verification & PR-ready summary
+
+**Files:** none (artifacts only)
+
+- [ ] **Step 1: Capture the speed benchmark for the PR body**
+
+Run the in-memory and mock variants of the speed test to capture before/after numbers. The speed table is printed by `test/lib/test_lib.ts`'s `speed is acceptable` block; the relevant rows come from `memory` and `mock` backends.
+
+```bash
+pnpm test test/memory/test_memory.spec.ts 2>&1 | tee /tmp/ueberdb-perf-after.log
+```
+
+(Repeat against `main` before this branch to get the baseline. Capture in `/tmp/ueberdb-perf-before.log`. Both logs go into the PR body as a side-by-side comparison.)
+
+- [ ] **Step 2: Smoke-test against a real SQL backend (optional, if Docker is available)**
+
+```bash
+pnpm test test/sqlite
+```
+
+Expected: green. SQLite is the cheapest real-DB smoke test — no containers required.
+
+- [ ] **Step 3: Confirm no file outside the planned set was modified**
+
+```bash
+git diff --stat main..HEAD
+```
+
+Expected: changed paths are exactly `lib/CacheAndBufferLayer.ts`, `test/mock/test_metrics.spec.ts`, `test/mock/test_dirty_set.spec.ts`, `test/mock/test_lazy_flush.spec.ts`, `test/mock/test_lock_fast_path.spec.ts`, `docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md`, and `docs/superpowers/plans/2026-05-28-cache-buffer-perf.md`. Anything else is scope creep — investigate before opening the PR.
+
+- [ ] **Step 4: Review the commit log**
+
+```bash
+git log main..HEAD --oneline
+```
+
+Expected: four `perf(cache): ...` commits in order — clone split, dirty-key set, lazy flush, lock fast path — plus the two `docs:` commits for the spec and plan.
+
+- [ ] **Step 5: Open the PR or hand off to a reviewer**
+
+The PR description should reference the spec at `docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md` and include the before/after speed table captured in Step 1.
+
+---
+
+## Notes on Self-Review
+
+This plan was checked against the spec:
+
+- Win 1 (clone split) → Task 1 (8 steps).
+- Win 2 (dirty-key set) → Task 2 (13 steps including new test file and 4 markDone call-site updates).
+- Win 3 (lazy flush) → Task 3 (13 steps including new test file, removal of setInterval, addition of `_scheduleFlush`, and `close()` change).
+- Win 4 (lock fast-path) → Task 4 (12 steps: write new fast-path test, watch it fail, implement, observe `test_metrics.spec.ts` go red on the cache-hit case, edit the metrics test to drop now-irrelevant lock counters from the expected delta for `get()` cache hits — `getSub()` still expects them).
+- Spec's Testing section requirements (`test_dirty_set`, `test_lazy_flush`, `test_lock_fast_path`) → Tasks 2, 3, 4 each create their respective file.
+- Spec's Edge Cases #2 (markDone order) → Task 2 step 5 places `_dirtyKeys.delete(key)` before `entry.dirty!.done(err)`.
+- Spec's Edge Case #6 (writeInterval: 0) → Task 3 step 5's `_scheduleFlush` early-returns for `writeInterval <= 0`; Task 3 step 1's test `writeInterval=0 mode never arms the timer` covers it.
+- Spec's "Microbenchmark artifacts" → Task 5 step 1.
+
+No placeholders. No "TBD" / "implement appropriately". Each code-bearing step has the exact code to write.
diff --git a/docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md b/docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md
new file mode 100644
index 00000000..5d41e482
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-28-cache-buffer-perf-design.md
@@ -0,0 +1,258 @@
+# Cache & Buffer Layer Performance Wins
+
+**Date:** 2026-05-28
+**Scope:** `lib/CacheAndBufferLayer.ts` only
+**Type:** Internal refactor — no public API change, no default change, no behavioral regression for documented features (including `toJSON` semantics).
+
+## Motivation
+
+`lib/CacheAndBufferLayer.ts` sits on the hot path of every operation, regardless of which backend is in use. Profiling-by-inspection identified four cheap, well-isolated wins that compound:
+
+1. A recursive `clone()` JS function runs on every `get`/`set`/`setSub`/`getSub`/`findKeys`. For the read direction the cache already holds a JSON-safe value, so `structuredClone` (a V8 native, available since Node ≥17 — the package requires ≥24) is strictly faster than the recursive walker.
+2. `flush()` iterates the entire LRU buffer (default capacity 10 000) every `writeInterval` ms (default 100 ms) just to find dirty entries. For idle or read-heavy workloads, ~99% of those iterations find nothing.
+3. The buffer is flushed via a `setInterval` that runs forever once init completes, even when the database has been idle for hours.
+4. `get()` acquires and releases a per-key lock on every call — even on a guaranteed cache hit with no concurrent writer.
+
+Each fix is internal and reviewable in isolation; together they cut sustained per-op CPU cost and eliminate idle-timer load. None of them changes settings, exported types, or method signatures.
+
+## Non-Goals
+
+- Driver-specific optimizations (mysql/postgres/mongodb hotspots) — out of scope for this spec.
+- Worker-thread serialization for large JSON values.
+- A byte-bounded cache (additional setting).
+- Pipelined or adaptive flush scheduling.
+- Changing default `cache`, `writeInterval`, or `bulkLimit`.
+
+## Architecture
+
+All work happens in `lib/CacheAndBufferLayer.ts`. The four wins are listed below with their concrete implementation contract.
+
+### Win 1 — Split `clone()` into `cloneIn` / `cloneOut`
+
+**Problem.** The current `clone()` (lines 608-634) is a recursive deep-copy that resolves `toJSON` methods en route. It runs on every read AND every write. For deeply nested values it is meaningfully slower than the V8-native `structuredClone`. However, a naive swap to `structuredClone` would (a) skip `toJSON` resolution, breaking the documented behavior verified by `test/memory/test_tojson.spec.ts`, and (b) throw `DataCloneError` when the user passes a value containing a function (including `{toJSON: fn}`).
+
+**Fix.** The write direction must keep `toJSON`-aware cloning. The read direction can safely use `structuredClone` because the cached value has already been through `cloneIn` (or `JSON.parse`, which is also JSON-safe).
+
+```ts
+const cloneIn = clone; // existing function, renamed; toJSON semantics preserved
+
+const cloneOut = (v: unknown): unknown =>
+  v == null || typeof v !== "object" ? v : structuredClone(v);
+```
+
+Call-site mapping:
+
+| Site                                | Direction                 | Function   |
+| ----------------------------------- | ------------------------- | ---------- |
+| `set(key, value)` line 389          | Write (caller → cache)    | `cloneIn`  |
+| `setSub(...)` line 438              | Write                     | `cloneIn`  |
+| `get(key)` line 275                 | Read (cache → caller)     | `cloneOut` |
+| `getSub(...)` line 511              | Read                      | `cloneOut` |
+| `findKeys` line 338                 | Read                      | `cloneOut` |
+| `findKeysPaged` line 371, 380       | Read                      | `cloneOut` |
+| `_write` non-JSON fallback line 557 | Internal (cache → driver) | `cloneOut` |
+
+The `_write` non-JSON fallback uses `cloneOut` because the entry value originated from a `cloneIn` call and is therefore JSON-safe.
+
+### Win 2 — Dirty-Key Set
+
+**Problem.** `flush()` (lines 522-528) iterates the entire `buffer` to find entries with `dirty != null`. With capacity 10 000 and one dirty entry, that's 10 000 Map iterations per scan.
+
+**Fix.** Maintain a `Set<string>` of currently-dirty keys.
+
+```ts
+private readonly _dirtyKeys: Set<string> = new Set();
+```
+
+**Invariant:** `key ∈ _dirtyKeys` ⇔ `this.buffer.get(key, false)?.dirty != null`.
+
+**Mutations:**
+
+- In `_setLocked`, immediately after `this.buffer.set(key, entry)` (line 420): `this._dirtyKeys.add(key);`
+- In `markDone(key, entry, err)` — signature gains `key` parameter — _before_ calling `entry.dirty!.done(err)`:
+
+  ```ts
+  const current = this.buffer.get(key, false); // use isUse=false to avoid LRU reorder
+  if (current === entry) this._dirtyKeys.delete(key);
+  // else: entry was replaced by a fresh dirty entry; key remains in _dirtyKeys
+  entry.dirty!.done(err);
+  entry.dirty = null;
+  ```
+
+  Order matters: deleting from `_dirtyKeys` _before_ `done(err)` prevents a subtle race where the resolved caller schedules a new write that re-adds the key, only for our late `delete` to wipe it out.
+
+- `LRU.get(k, isUse = true)`: already exists (line 113). We use `isUse=false` from `flush()` and `markDone` so we read the entry without reordering the LRU.
+
+**`flush()` new loop:**
+
+```ts
+for (const key of this._dirtyKeys) {
+  const entry = this.buffer.get(key, false);
+  if (!entry || !entry.dirty || entry.writingInProgress) continue;
+  dirtyEntries.push([key, entry]);
+  if (this.settings.bulkLimit && dirtyEntries.length >= this.settings.bulkLimit) break;
+}
+```
+
+The `writingInProgress` skip preserves existing semantics: in-flight entries stay in the set but are not re-issued.
+
+### Win 3 — Lazy Flush Scheduling
+
+**Problem.** `setInterval` in the constructor (lines 217-220) fires every `writeInterval` ms forever, regardless of whether anything is dirty.
+
+**Fix.** Replace the `setInterval` with an on-demand `setTimeout` that is set when the first dirty entry is added and cleared after flushing.
+
+```ts
+private _flushTimer: ReturnType<typeof setTimeout> | null = null;
+
+private _scheduleFlush(): void {
+  if (this._flushTimer != null) return;
+  if (this.settings.writeInterval <= 0) return;
+  if (this._dirtyKeys.size === 0) return;
+  this._flushTimer = setTimeout(() => {
+    this._flushTimer = null;
+    void this.flush();
+  }, this.settings.writeInterval);
+  this._flushTimer.unref?.();  // do not block process exit
+}
+```
+
+**Call sites:**
+
+- `_setLocked`, right after `_dirtyKeys.add(key)`: `this._scheduleFlush();`
+- End of `flush()`'s outer wrapper, after the inner while-loop completes: if `this._dirtyKeys.size > 0` (some entries became dirty during `_write`), call `_scheduleFlush()`.
+
+**`close()`:** replace `clearInterval(this.flushInterval)` with `if (this._flushTimer) clearTimeout(this._flushTimer);`. The `flushInterval` field is removed.
+
+### Win 4 — Lock Fast-Path in `get()`
+
+**Problem.** `get()` (line 267) acquires `_lock(key)` and releases it on every call, even when the key is already in the buffer and no concurrent write is happening. The lock acquisition allocates a `SelfContainedPromise` and performs two `Map` mutations per call.
+
+**Fix.** Check synchronously whether a lock exists and the buffer is populated. If both checks pass, return the cloned value directly without locking.
+
+```ts
+async get(key: string): Promise<unknown> {
+  if (!this._locks.has(key)) {
+    const entry = this.buffer.get(key);  // LRU reorder is desired here (it's a real read)
+    if (entry != null) {
+      ++this.metrics.reads;
+      ++this.metrics.readsFromCache;
+      ++this.metrics.readsFinished;
+      if (this.logger.isDebugEnabled()) {
+        this.logger.debug(
+          `GET    - ${key} - ${JSON.stringify(entry.value)} - ` +
+            `from ${entry.dirty ? 'dirty buffer' : 'cache'}`,
+        );
+      }
+      return cloneOut(entry.value);
+    }
+  }
+  // Slow path — semantically identical to today.
+  await this._lock(key);
+  try { return cloneOut(await this._getLocked(key)); }
+  finally { this._unlock(key); }
+}
+```
+
+**Correctness argument.**
+
+All mutating operations (`set`, `setSub`, and the internal write path during `_setLocked`'s synchronous prelude) execute under the per-key lock. JS is single-threaded; the fast path runs from `this._locks.has(key)` through `cloneOut(entry.value)` without yielding (no `await`s). Therefore one of these holds:
+
+1. `_locks.has(key)` is false at the check → no writer is currently holding the lock; the fast path completes atomically and `structuredClone` produces a consistent snapshot. Any writer that starts after our check will not interleave with this `get` call.
+2. `_locks.has(key)` is true at the check → fast path bails to slow path, which serializes via the lock exactly as today.
+
+A reader that arrives "between" a writer's lock-release and its `_write` enqueue still sees a consistent buffer entry: `_setLocked`'s synchronous prelude (lines 410-420) updates the entry _before_ awaiting `entry.dirty`, so the buffer is current whenever the lock is not held.
+
+The `setSub` mutation walk (lines 449-476) happens entirely under the lock; it is not interleaved with the fast path.
+
+## Data Flow Comparison
+
+### Cache-hit read, no contention
+
+| Step | Today                                            | After                              |
+| ---- | ------------------------------------------------ | ---------------------------------- |
+| 1    | `await _lock(key)` (Map.set + Promise alloc)     | `_locks.has(key)` check            |
+| 2    | `_getLocked`: `buffer.get(key)` (Map.delete+set) | `buffer.get(key)` (Map.delete+set) |
+| 3    | Read `entry.value`                               | Read `entry.value`                 |
+| 4    | `_unlock(key)` (Map.delete + Promise.done)       | _(skipped)_                        |
+| 5    | `clone(v)` — recursive walker                    | `cloneOut(v)` — `structuredClone`  |
+
+### Idle DB, default settings
+
+| Per second              | Today           | After |
+| ----------------------- | --------------- | ----- |
+| `setInterval` ticks     | 10              | 0     |
+| Buffer iterations       | 10 × cache size | 0     |
+| Dirty entries inspected | usually 0       | 0     |
+
+### Write under buffering
+
+| Step          | Today                                | After                                                                 |
+| ------------- | ------------------------------------ | --------------------------------------------------------------------- |
+| `set(k, v)`   | clone → lock → entry update → unlock | clone → lock → entry update + dirty-set add + schedule timer → unlock |
+| Flush trigger | setInterval tick                     | one-shot setTimeout                                                   |
+| Flush scan    | iterate entire LRU                   | iterate `_dirtyKeys` only                                             |
+
+## Edge Cases
+
+1. **Entry replacement during `writingInProgress`.** When `_setLocked` sees `entry.writingInProgress`, it allocates a fresh entry (line 414). The old entry's `markDone` will then find `buffer.get(key, false) !== entry` and leave the key in `_dirtyKeys`. The fresh entry's `_dirtyKeys.add(key)` is idempotent.
+
+2. **`markDone` order.** Always: (a) reference-compare, conditionally delete from `_dirtyKeys`, (b) call `entry.dirty!.done(err)`, (c) null out `entry.dirty`. Reversing (a) and (b) would let a resolved caller re-enter and have its add wiped.
+
+3. **`flush()` returning with remaining `writingInProgress` entries.** The new loop skips them. If a re-set during write left a new dirty entry, the outer while-loop in `flush()` picks it up on the next iteration. If `flush()` returns and `_dirtyKeys.size > 0`, the post-loop `_scheduleFlush()` re-arms the timer.
+
+4. **`setSub` and the fast path.** `setSub` holds the lock through its mutation walk; `_locks.has(key)` is true during that window; the fast path declines. After `setSub` releases the lock the buffer is current and the fast path returns the post-mutation value (or a structurally cloned snapshot of it).
+
+5. **`writeInterval: 0`.** `_setLocked` synchronously invokes `_write` (line 427). `_scheduleFlush` early-returns. `_dirtyKeys` is still maintained because `_setLocked` adds and `markDone` removes — `flush()` still works for explicit calls.
+
+6. **`cache: 0`.** Buffer.set still happens in `_setLocked`. After write completes, `buffer.evictOld()` with capacity 0 removes everything; `_dirtyKeys` is empty by then (markDone cleared it). Consistent with today.
+
+7. **Failed write.** `markDone` is called with `err != null`. `_dirtyKeys.delete(key)` still runs (under the reference-equality guard). The caller's `await p` rejects. The entry remains in the buffer with `dirty = null` and `value` equal to the last-attempted write — same as today.
+
+8. **`structuredClone` on a value containing a function.** Cannot happen in `cloneOut` because the cache contains only `cloneIn`-output or `JSON.parse`-output, both of which are function-free. A defensive comment in the code documents this invariant.
+
+9. **Circular references.** Today's recursive `clone()` would stack-overflow on circular structures (set side). `cloneIn` keeps that behavior. `cloneOut` (via `structuredClone`) tolerates them — strictly more permissive, no regression.
+
+## Testing
+
+### Existing tests that must remain green
+
+- `test/memory/test_tojson.spec.ts` — verifies `toJSON` invocation; protected by `cloneIn`.
+- `test/mock/test_flush.spec.ts`, `test_metrics.spec.ts`, `test_lru.spec.ts`, `test_bulk.spec.ts`, `test_setSub.spec.ts`, `test_findKeys.spec.ts`.
+- All per-backend `*.spec.ts` files (only the contract on the wrapped DB is exercised; no driver code changes).
+- `test/lib/test_lib.ts`'s `speed is acceptable` block — should pass with margin.
+
+### New tests (all under `test/mock/`)
+
+**`test_dirty_set.spec.ts`:**
+
+- After `await db.set('k', v)` with `writeInterval > 0`, the internal `_dirtyKeys.size` is 0 immediately after `await` resolves (the await waits for the write to finish, so dirty is cleared). After `db.set` _before_ awaiting, `_dirtyKeys.size === 1` (use a paused mock driver).
+- Re-set during in-flight write: pause the mock driver so the first set's `_write` hangs; call set again on the same key; verify `_dirtyKeys.size === 1` (idempotent add); release the first write; verify the old entry's `markDone` does _not_ clear the key (because buffer now holds the fresh entry); verify `metrics.writesObsoleted` increments.
+- Failed write: mock driver throws; `_dirtyKeys` still cleared for that key; caller promise rejects.
+
+**`test_lazy_flush.spec.ts`:**
+
+- With `writeInterval: 100`, no `db.set` calls: wait 500 ms; verify `metrics.writesToDb === 0` and (via a test-only getter or `process._getActiveHandles().length` snapshot) no pending flush timer.
+- With `writeInterval: 100`, one `db.set` to a paused mock: timer is armed; release; after 200 ms, write happened; timer is null again.
+- `db.close()` clears the pending timer (no UnhandledHandle on exit).
+
+**`test_lock_fast_path.spec.ts`:**
+
+- Cache-hit `get`: prime the cache with `set`+await, then read `lockAcquires` metric, then call `get`. Acquires count does not increment.
+- Concurrent set/get: hold a paused mock; issue `set('k', 'v2')` (does not await); issue `get('k')`; the get takes the slow path (because `_locks.has('k')` is true while set holds it); after releasing the mock, the get resolves with `'v2'`.
+
+### Microbenchmark artifacts (non-CI)
+
+- Run `pnpm test -- speed` locally on `memory` and `sqlite` backends pre- and post-change. Capture the per-op ms table from the existing `speed is acceptable` block. Paste both into the PR body.
+- Optional `test/mock/bench_clone.bench.ts` using vitest's `bench()` to compare `cloneIn` vs `cloneOut` vs old `clone` on a synthetic Etherpad pad. Documentation only — no CI gate.
+
+## Risks
+
+- **Win 1 only:** Tiny risk of behavioral divergence if a caller stuffs a function into a deeply nested property and relies on it being silently dropped or thrown-on. Today's `clone()` throws ("Unable to copy obj!") for non-Date, non-Array, non-plain-object types except via the `instanceof Object` clause — which catches almost everything. `cloneIn` is the unchanged old function for write paths, so this is unaffected.
+- **Win 2 only:** A bug in the reference-comparison in `markDone` could leak entries from `_dirtyKeys` (causing stale iteration) or wrongly clear them (causing missed flushes). The new tests in `test_dirty_set.spec.ts` cover both error modes.
+- **Win 3 only:** A missed `_scheduleFlush()` call would leave dirty entries unwritten until the next explicit `flush()`. Covered by `test_lazy_flush.spec.ts`.
+- **Win 4 only:** The correctness argument relies on the synchronous nature of the fast path. Any future edit that introduces an `await` between the lock check and the buffer read would break the guarantee. A code comment marks the synchronous region.
+
+## Rollout
+
+Single PR, all four wins together. The wins are mutually reinforcing (Win 2 enables Win 3 cheaply; Win 1 amortizes the cost of the buffer access in Win 4). Reverting any individual win is straightforward because the changes are localized to specific methods.
diff --git a/lib/CacheAndBufferLayer.ts b/lib/CacheAndBufferLayer.ts
index 4c55d719..e0d549cb 100644
--- a/lib/CacheAndBufferLayer.ts
+++ b/lib/CacheAndBufferLayer.ts
@@ -159,9 +159,10 @@ export class Database {
   private _flushPaused: SelfContainedPromise | null = null;
   private _flushPausedCount = 0;
   private readonly _locks: Map<string, SelfContainedPromise> = new Map();
+  private readonly _dirtyKeys: Set<string> = new Set();
   private _flushDone: Promise<void> | null = null;
   public metrics: Metrics;
-  private readonly flushInterval: ReturnType<typeof setInterval> | null;
+  private _flushTimer: ReturnType<typeof setTimeout> | null = null;
 
   constructor(
     wrappedDB: LegacyWrappedDB,
@@ -220,13 +221,6 @@ export class Database {
       writesToDbFinished: 0,
       writesToDbRetried: 0,
     };
-
-    this.flushInterval =
-      this.settings.writeInterval > 0
-        ? setInterval(() => {
-            void this.flush();
-          }, this.settings.writeInterval)
-        : null;
   }
 
   private async _lock(key: string): Promise<void> {
@@ -246,6 +240,28 @@ export class Database {
     this._locks.delete(key);
   }
 
+  private _scheduleFlush(): void {
+    if (this._flushTimer != null) return;
+    // A flush is already in progress. Arming a timer now would leave a stray timer behind if
+    // that flush drains the keys this write just dirtied: flush() iterates _dirtyKeys live and
+    // re-arms via its own postlude only when keys actually remain. Let it own the rescheduling.
+    if (this._flushDone != null) return;
+    if (this.settings.writeInterval <= 0) return;
+    if (this._dirtyKeys.size === 0) return;
+    this._flushTimer = setTimeout(() => {
+      this._flushTimer = null;
+      void this.flush();
+    }, this.settings.writeInterval);
+    this._flushTimer.unref?.();
+  }
+
+  private _cancelFlushTimer(): void {
+    if (this._flushTimer != null) {
+      clearTimeout(this._flushTimer);
+      this._flushTimer = null;
+    }
+  }
+
   // Block flush() until _resumeFlush() is called. This ensures a flush() called after a write in
   // the same macro/microtask sees the buffered write.
   private _pauseFlush(): void {
@@ -267,13 +283,32 @@ export class Database {
   }
 
   async close(): Promise<void> {
-    clearInterval(this.flushInterval ?? undefined);
+    this._cancelFlushTimer();
     await this.flush();
     await this.wrappedDB!.close();
     this.wrappedDB = null;
   }
 
   async get(key: string): Promise<unknown> {
+    // Fast path: cache hit + no writer currently holding the per-key lock.
+    // The check and the buffer read execute synchronously (no `await` until after the return),
+    // so no concurrent set/setSub can interleave. cloneOut produces a consistent snapshot.
+    if (!this._locks.has(key)) {
+      const entry = this.buffer.get(key);
+      if (entry != null) {
+        ++this.metrics.reads;
+        ++this.metrics.readsFromCache;
+        ++this.metrics.readsFinished;
+        if (this.logger.isDebugEnabled()) {
+          this.logger.debug(
+            `GET    - ${key} - ${JSON.stringify(entry.value)} - ` +
+              `from ${entry.dirty ? "dirty buffer" : "cache"}`,
+          );
+        }
+        return cloneOut(entry.value);
+      }
+    }
+    // Slow path: cache miss or a writer holds the lock.
     let v: unknown;
     await this._lock(key);
     try {
@@ -281,7 +316,7 @@ export class Database {
     } finally {
       this._unlock(key);
     }
-    return clone(v);
+    return cloneOut(v);
   }
 
   private async _getLocked(key: string): Promise<unknown> {
@@ -344,7 +379,7 @@ export class Database {
         `GET    - ${key}-${notKey} - ${JSON.stringify(keyValues)} - from database `,
       );
     }
-    return clone(keyValues) as string[];
+    return cloneOut(keyValues) as string[];
   }
 
   async findKeysPaged(
@@ -379,7 +414,7 @@ export class Database {
               }
               return lo;
             })();
-      return clone(all.slice(start, start + options.limit)) as string[];
+      return cloneOut(all.slice(start, start + options.limit)) as string[];
     }
     const keys = await this.wrappedDB!.findKeysPaged(key, notKey, options);
     if (this.logger.isDebugEnabled()) {
@@ -388,7 +423,7 @@ export class Database {
           `- ${JSON.stringify(keys)} - from database `,
       );
     }
-    return clone(keys) as string[];
+    return cloneOut(keys) as string[];
   }
 
   async remove(key: string): Promise<void> {
@@ -397,7 +432,7 @@ export class Database {
   }
 
   async set(key: string, value: unknown): Promise<void> {
-    value = clone(value);
+    value = cloneIn(value);
     let p!: Promise<void>;
     this._pauseFlush();
     try {
@@ -429,6 +464,8 @@ export class Database {
       entry.value = value;
       if (!entry.dirty) entry.dirty = new SelfContainedPromise();
       this.buffer.set(key, entry);
+      this._dirtyKeys.add(key);
+      this._scheduleFlush();
       const buffered = this.settings.writeInterval > 0;
       if (this.logger.isDebugEnabled()) {
         this.logger.debug(
@@ -446,7 +483,7 @@ export class Database {
   }
 
   async setSub(key: string, sub: string[], value: unknown): Promise<void> {
-    value = clone(value);
+    value = cloneIn(value);
     if (this.logger.isDebugEnabled()) {
       this.logger.debug(`SETSUB - ${key}${JSON.stringify(sub)} - ${JSON.stringify(value)}`);
     }
@@ -519,23 +556,25 @@ export class Database {
       if (this.logger.isDebugEnabled()) {
         this.logger.debug(`GETSUB - ${key}${JSON.stringify(sub)} - ${JSON.stringify(v)}`);
       }
-      return clone(v);
+      return cloneOut(v);
     } finally {
       this._unlock(key);
     }
   }
 
   async flush(): Promise<void> {
+    // Cancel any pending lazy-flush timer — we are doing the work right now.
+    this._cancelFlushTimer();
     if (this._flushDone == null) {
       this._flushDone = (async () => {
         while (true) {
           while (this._flushPaused != null) await this._flushPaused;
           const dirtyEntries: [string, CacheEntry][] = [];
-          for (const entry of this.buffer) {
-            if (entry[1].dirty && !entry[1].writingInProgress) {
-              dirtyEntries.push(entry);
-              if (this.settings.bulkLimit && dirtyEntries.length >= this.settings.bulkLimit) break;
-            }
+          for (const key of this._dirtyKeys) {
+            const entry = this.buffer.get(key, false);
+            if (!entry || !entry.dirty || entry.writingInProgress) continue;
+            dirtyEntries.push([key, entry]);
+            if (this.settings.bulkLimit && dirtyEntries.length >= this.settings.bulkLimit) break;
           }
           if (dirtyEntries.length === 0) return;
           await this._write(dirtyEntries);
@@ -544,15 +583,21 @@ export class Database {
     }
     await this._flushDone;
     this._flushDone = null;
+    if (this._dirtyKeys.size > 0) this._scheduleFlush();
   }
 
   private async _write(dirtyEntries: [string, CacheEntry][]): Promise<void> {
-    const markDone = (entry: CacheEntry, err?: Error | null): void => {
+    const markDone = (key: string, entry: CacheEntry, err?: Error | null): void => {
       if (entry.writingInProgress) {
         entry.writingInProgress = false;
         if (err != null) ++this.metrics.writesToDbFailed;
         ++this.metrics.writesToDbFinished;
       }
+      // Reference-equality: only clear the dirty marker for THIS key if the entry currently
+      // in the buffer is the same one we just wrote. If a re-set during the write replaced it
+      // with a fresh dirty entry, the new entry must stay marked dirty.
+      const current = this.buffer.get(key, false);
+      if (current === entry) this._dirtyKeys.delete(key);
       entry.dirty!.done(err);
       entry.dirty = null;
     };
@@ -565,10 +610,10 @@ export class Database {
         if (this.settings.json && entry.value != null) {
           serialized = JSON.stringify(entry.value);
         } else {
-          serialized = clone(entry.value) as string | null;
+          serialized = cloneOut(entry.value) as string | null;
         }
       } catch (err) {
-        markDone(entry, err as Error);
+        markDone(key, entry, err as Error);
         continue;
       }
       entry.writingInProgress = true;
@@ -590,7 +635,7 @@ export class Database {
       } catch (err) {
         writeErr = err instanceof Error ? err : new Error(String(err));
       }
-      markDone(entry, writeErr);
+      markDone(op.key, entry, writeErr);
     };
 
     if (ops.length === 1) {
@@ -609,18 +654,20 @@ export class Database {
         this.metrics.writesToDbRetried += ops.length;
         await Promise.all(ops.map(async (op, i) => writeOneOp(op, entries[i])));
       }
-      if (success) entries.forEach((entry) => markDone(entry, null));
+      if (success) {
+        for (let i = 0; i < entries.length; i++) markDone(ops[i].key, entries[i], null);
+      }
     }
     // Evict here to enforce cache = 0 semantics (reads must not hit cache after write completes).
     this.buffer.evictOld();
   }
 }
 
-const clone = (obj: unknown, key = ""): unknown => {
+const cloneIn = (obj: unknown, key = ""): unknown => {
   if (obj == null || typeof obj !== "object") return obj;
 
   if (typeof (obj as Record<string, unknown>).toJSON === "function") {
-    return clone((obj as { toJSON(k: string): unknown }).toJSON(key));
+    return cloneIn((obj as { toJSON(k: string): unknown }).toJSON(key));
   }
 
   if (obj instanceof Date) {
@@ -630,13 +677,13 @@ const clone = (obj: unknown, key = ""): unknown => {
   }
 
   if (Array.isArray(obj)) {
-    return obj.map((item, i) => clone(item, String(i)));
+    return obj.map((item, i) => cloneIn(item, String(i)));
   }
 
   if (obj instanceof Object) {
     const copy: Record<string, unknown> = {};
     for (const attr of Object.keys(obj)) {
-      copy[attr] = clone((obj as Record<string, unknown>)[attr], attr);
+      copy[attr] = cloneIn((obj as Record<string, unknown>)[attr], attr);
     }
     return copy;
   }
@@ -644,4 +691,18 @@ const clone = (obj: unknown, key = ""): unknown => {
   throw new Error("Unable to copy obj! Its type isn't supported.");
 };
 
+// Read-direction clone. The buffered value has typically been processed by cloneIn (which strips
+// toJSON methods at the root and resolves them recursively) or by JSON.parse (always JSON-safe).
+// Functions inside cloneIn-output objects ARE preserved as-is, so structuredClone may still throw
+// DataCloneError. Fall back to cloneIn (which preserves non-cloneable values by passing them
+// through unchanged) to keep parity with the pre-refactor clone() semantics.
+const cloneOut = (v: unknown): unknown => {
+  if (v == null || typeof v !== "object") return v;
+  try {
+    return structuredClone(v);
+  } catch {
+    return cloneIn(v);
+  }
+};
+
 export const exportedForTesting = { LRU };
diff --git a/test/memory/test_tojson.spec.ts b/test/memory/test_tojson.spec.ts
index 5caf664a..d99d1f45 100644
--- a/test/memory/test_tojson.spec.ts
+++ b/test/memory/test_tojson.spec.ts
@@ -32,4 +32,15 @@ describe(__filename, () => {
     // @ts-expect-error TS(2775): Assertions require every name in the call target t... Remove this comment to see the full error message
     assert.deepEqual(await db.get("key"), ["toJSON 0"]);
   });
+  it("object property containing a function survives the round-trip via the cache", async () => {
+    // cloneIn preserves functions inside objects (they hit the `typeof !== 'object'` branch and
+    // pass through unchanged). cloneOut therefore needs to tolerate function-containing values
+    // when reading from the cache; structuredClone would throw DataCloneError without the
+    // cloneIn fallback in cloneOut.
+    const fn = () => "hello";
+    await db.set("key", { fn });
+    const out: any = await db.get("key");
+    assert.equal(typeof out.fn, "function");
+    assert.equal(out.fn(), "hello");
+  });
 });
diff --git a/test/mock/test_dirty_set.spec.ts b/test/mock/test_dirty_set.spec.ts
new file mode 100644
index 00000000..fd18bc91
--- /dev/null
+++ b/test/mock/test_dirty_set.spec.ts
@@ -0,0 +1,98 @@
+import * as ueberdb from "../../index";
+import { ConsoleLogger } from "../../lib/logging";
+import { afterEach, describe, expect, it } from "vitest";
+
+type MockSettings = { mock?: any };
+
+const logger = new ConsoleLogger();
+
+const dirtyKeys = (db: any): Set<string> => (db.db as any)._dirtyKeys;
+
+describe(__filename, () => {
+  let db: any = null;
+  let mock: any = null;
+
+  const createDb = async (wrapperSettings: Record<string, unknown> = {}) => {
+    const settings: MockSettings = {};
+    db = new ueberdb.Database("mock", settings, { json: false, ...wrapperSettings }, logger);
+    await db.init();
+    mock = settings.mock;
+    mock.once("init", (cb: any) => cb());
+  };
+
+  afterEach(async () => {
+    if (mock != null) {
+      mock.removeAllListeners();
+      mock.once("close", (cb: any) => cb());
+      mock = null;
+    }
+    if (db != null) {
+      await db.close();
+      db = null;
+    }
+  });
+
+  it("set() adds the key to _dirtyKeys; flush() drains it", async () => {
+    // writeInterval=1e9 means the lazy flush timer effectively never fires; we drive flush manually.
+    await createDb({ writeInterval: 1e9 });
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    const writeP = db.set("k", "v");
+    // _setLocked is reached via `await this._lock(key)` which (with no contention) resolves
+    // as a microtask. setImmediate fires after the microtask queue is fully drained, so
+    // _dirtyKeys.add(key) and buffer.set(key, entry) have both completed by the time we resume.
+    await new Promise((r) => setImmediate(r));
+    expect(dirtyKeys(db).has("k")).toBe(true);
+    expect(dirtyKeys(db).size).toBe(1);
+    await Promise.all([writeP, db.flush()]);
+    expect(dirtyKeys(db).size).toBe(0);
+  });
+
+  it("re-set during an in-flight write keeps the key in _dirtyKeys", async () => {
+    await createDb({ writeInterval: 1e9 });
+    let releaseFirstWrite: (() => void) | null = null;
+    const firstWriteSeen = new Promise<void>((resolve) => {
+      mock.once("set", (k: any, v: any, cb: any) => {
+        resolve();
+        releaseFirstWrite = () => cb();
+      });
+    });
+    const firstWriteP = db.set("k", "v1");
+    const flushedP = db.flush();
+    await firstWriteSeen;
+    // While the first write is in flight, queue a second write to the same key.
+    let releaseSecondWrite: (() => void) | null = null;
+    const secondWriteSeen = new Promise<void>((resolve) => {
+      mock.once("set", (k: any, v: any, cb: any) => {
+        resolve();
+        releaseSecondWrite = () => cb();
+      });
+    });
+    const secondWriteP = db.set("k", "v2");
+    // The key must remain in _dirtyKeys: the old in-flight entry is being written,
+    // and a new dirty entry has taken its place in the buffer.
+    expect(dirtyKeys(db).has("k")).toBe(true);
+    // Release the first write. markDone for v1 will run, hit the reference-equality guard,
+    // and see that the buffer entry is no longer v1's — so the key MUST remain in _dirtyKeys.
+    releaseFirstWrite!();
+    await new Promise((r) => setImmediate(r));
+    expect(dirtyKeys(db).has("k")).toBe(true);
+    // Wait for the second write to be picked up by the flush loop.
+    await secondWriteSeen;
+    releaseSecondWrite!();
+    await Promise.all([firstWriteP, secondWriteP, flushedP]);
+    // Drain any remaining dirty entry (the v2 write) — it may have been picked up by the
+    // same flush() loop, but to be robust against scheduling we call flush() once more.
+    await db.flush();
+    expect(dirtyKeys(db).size).toBe(0);
+  });
+
+  it("failed write removes the key from _dirtyKeys and rejects the caller", async () => {
+    await createDb({ writeInterval: 1e9 });
+    mock.on("set", (k: any, v: any, cb: any) => cb(new Error("boom")));
+    const writeP = db.set("k", "v");
+    const flushedP = db.flush();
+    await expect(writeP).rejects.toThrow("boom");
+    await flushedP;
+    expect(dirtyKeys(db).size).toBe(0);
+  });
+});
diff --git a/test/mock/test_lazy_flush.spec.ts b/test/mock/test_lazy_flush.spec.ts
new file mode 100644
index 00000000..467959ab
--- /dev/null
+++ b/test/mock/test_lazy_flush.spec.ts
@@ -0,0 +1,111 @@
+import * as ueberdb from "../../index";
+import { ConsoleLogger } from "../../lib/logging";
+import { afterEach, describe, expect, it } from "vitest";
+
+type MockSettings = { mock?: any };
+
+const logger = new ConsoleLogger();
+
+const flushTimer = (db: any): unknown => (db.db as any)._flushTimer;
+
+describe(__filename, () => {
+  let db: any = null;
+  let mock: any = null;
+
+  const createDb = async (wrapperSettings: Record<string, unknown> = {}) => {
+    const settings: MockSettings = {};
+    db = new ueberdb.Database("mock", settings, { json: false, ...wrapperSettings }, logger);
+    await db.init();
+    mock = settings.mock;
+    mock.once("init", (cb: any) => cb());
+  };
+
+  afterEach(async () => {
+    if (mock != null) {
+      mock.removeAllListeners();
+      mock.once("close", (cb: any) => cb());
+      mock = null;
+    }
+    if (db != null) {
+      await db.close();
+      db = null;
+    }
+  });
+
+  it("idle database does not arm the flush timer", async () => {
+    await createDb({ writeInterval: 50 });
+    expect(flushTimer(db)).toBe(null);
+    // Give the event loop a tick or two to confirm nothing schedules itself.
+    await new Promise((r) => setTimeout(r, 30));
+    expect(flushTimer(db)).toBe(null);
+  });
+
+  it("set() arms the timer; flush() leaves it null after draining", async () => {
+    await createDb({ writeInterval: 1e9 }); // huge interval so the timer cannot fire during the test
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    const writeP = db.set("k", "v");
+    await new Promise((r) => setImmediate(r));
+    // _setLocked runs after `await this._lock(key)`. setImmediate fires after the
+    // microtask queue is drained, so by the time we resume, _scheduleFlush() has fired.
+    expect(flushTimer(db)).not.toBe(null);
+    await Promise.all([writeP, db.flush()]);
+    // After an explicit flush() that drained everything, the timer must be null.
+    expect(flushTimer(db)).toBe(null);
+  });
+
+  it("close() clears a pending flush timer", async () => {
+    await createDb({ writeInterval: 1e9 });
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    void db.set("k", "v");
+    // Yield so _setLocked runs and arms the timer.
+    await new Promise((r) => setImmediate(r));
+    // Timer must be armed before close().
+    expect(flushTimer(db)).not.toBe(null);
+    mock.once("close", (cb: any) => cb());
+    // close() must cancel the timer itself (no explicit flush() before this call) and complete cleanly.
+    await db.close();
+    expect(flushTimer(db)).toBe(null);
+    db = null; // prevent afterEach from double-closing
+  });
+
+  it("a set() during an in-flight flush leaves no stray timer", async () => {
+    await createDb({ writeInterval: 1e9 }); // huge interval so a timer cannot fire during the test
+    let releaseFirst!: () => void;
+    const firstHeld = new Promise<void>((r) => {
+      releaseFirst = r;
+    });
+    const written: string[] = [];
+    mock.on("set", (k: any, _v: any, cb: any) => {
+      written.push(k);
+      // Hold the first write so flush() parks mid-drain (awaiting _write); release the rest.
+      if (k === "k1") void firstHeld.then(() => cb());
+      else cb();
+    });
+
+    const w1 = db.set("k1", "v1");
+    await new Promise((r) => setImmediate(r));
+    expect(flushTimer(db)).not.toBe(null); // k1 dirtied, timer armed
+
+    const flushP = db.flush(); // cancels the timer, starts draining k1 (held below)
+    await new Promise((r) => setImmediate(r));
+    expect(flushTimer(db)).toBe(null); // flush() cancelled it on entry
+
+    // Write a different key while the flush is in progress. Before the fix this armed a fresh
+    // timer; the in-flight flush then drained k2 too, orphaning that timer on an idle DB.
+    const w2 = db.set("k2", "v2");
+    await new Promise((r) => setImmediate(r));
+
+    releaseFirst(); // let the held write finish; the flush loop picks up k2 next
+    await Promise.all([w1, w2, flushP]);
+
+    expect(written).toEqual(["k1", "k2"]); // both drained by the single flush
+    expect(flushTimer(db)).toBe(null); // no stray timer must remain
+  });
+
+  it("writeInterval=0 mode never arms the timer", async () => {
+    await createDb({ writeInterval: 0 });
+    mock.on("set", (k: any, v: any, cb: any) => cb());
+    await db.set("k", "v");
+    expect(flushTimer(db)).toBe(null);
+  });
+});
diff --git a/test/mock/test_lock_fast_path.spec.ts b/test/mock/test_lock_fast_path.spec.ts
new file mode 100644
index 00000000..fbe3b748
--- /dev/null
+++ b/test/mock/test_lock_fast_path.spec.ts
@@ -0,0 +1,120 @@
+import * as ueberdb from "../../index";
+import { ConsoleLogger } from "../../lib/logging";
+import { afterEach, describe, expect, it } from "vitest";
+
+type MockSettings = { mock?: any };
+
+const logger = new ConsoleLogger();
+
+describe(__filename, () => {
+  let db: any = null;
+  let mock: any = null;
+
+  const createDb = async (wrapperSettings: Record<string, unknown> = {}) => {
+    const settings: MockSettings = {};
+    db = new ueberdb.Database("mock", settings, { json: false, ...wrapperSettings }, logger);
+    await db.init();
+    mock = settings.mock;
+    mock.once("init", (cb: any) => cb());
+  };
+
+  afterEach(async () => {
+    if (mock != null) {
+      mock.removeAllListeners();
+      mock.once("close", (cb: any) => cb());
+      mock = null;
+    }
+    if (db != null) {
+      await db.close();
+      db = null;
+    }
+  });
+
+  it("cache-hit get() does not acquire the per-key lock", async () => {
+    await createDb({ writeInterval: 1e9 });
+    // Prime the cache: a single set+flush is enough to populate the buffer with the value.
+    mock.once("set", (k: any, v: any, cb: any) => cb());
+    await Promise.all([db.set("k", "v"), db.flush()]);
+    // After the write is finished, the buffer holds the value; the lock map is empty.
+    const before = { ...db.metrics };
+    const val = await db.get("k");
+    expect(val).toBe("v");
+    const after = db.metrics;
+    expect(after.lockAcquires - before.lockAcquires).toBe(0);
+    expect(after.lockReleases - before.lockReleases).toBe(0);
+    expect(after.readsFromCache - before.readsFromCache).toBe(1);
+  });
+
+  it("cache-miss get() still acquires the lock", async () => {
+    await createDb({ writeInterval: 1e9 });
+    mock.once("get", (k: any, cb: any) => cb(null, "v"));
+    const before = { ...db.metrics };
+    const val = await db.get("k");
+    expect(val).toBe("v");
+    const after = db.metrics;
+    expect(after.lockAcquires - before.lockAcquires).toBe(1);
+    expect(after.lockReleases - before.lockReleases).toBe(1);
+  });
+
+  it("get() during a write-in-progress with the lock released returns the buffered value via fast path", async () => {
+    await createDb({ writeInterval: 1e9 });
+    let releaseWrite: (() => void) | null = null;
+    const writeStarted = new Promise<void>((resolve) => {
+      mock.once("set", (k: any, v: any, cb: any) => {
+        resolve();
+        releaseWrite = () => cb();
+      });
+    });
+    const writeP = db.set("k", "v2");
+    const flushedP = db.flush();
+    await writeStarted;
+    // At this moment: _write is awaiting the mock's callback. The per-key lock has been released
+    // (set() releases the lock before awaiting entry.dirty). The buffer holds value 'v2'.
+    // The fast path must apply.
+    // Defensive yield: the lock is released synchronously inside set()'s finally block before
+    // _write begins awaiting, so by the time writeStarted fires the lock is already gone.
+    // The yield is belt-and-suspenders.
+    await new Promise<void>((r) => setImmediate(r));
+    const before = { ...db.metrics };
+    const val = await db.get("k");
+    expect(val).toBe("v2");
+    expect(db.metrics.lockAcquires - before.lockAcquires).toBe(0);
+    expect(db.metrics.readsFromCache - before.readsFromCache).toBe(1);
+    releaseWrite!();
+    await Promise.all([writeP, flushedP]);
+  });
+
+  it("get() while a setter holds the lock takes the slow path", async () => {
+    await createDb({ writeInterval: 1e9 });
+    // Hold the first set's database write open so its key-level lock cannot be released
+    // — wait, _setLocked releases the lock BEFORE awaiting entry.dirty. We need contention
+    // on the _LOCK ITSELF, not on _write. Drive that by holding the FIRST _lock open via
+    // an in-flight setSub that does an awaited _getLocked under the lock.
+    //
+    // Simpler approach: a setSub holds the lock through its entire walk (because it awaits
+    // _getLocked under the lock). Pause the mock's get() callback so _getLocked never resolves
+    // — this leaves setSub holding the lock indefinitely. Then issue db.get('k'); it must
+    // take the slow path and increment lockAwaits.
+    let releaseGet: (() => void) | null = null;
+    const getStarted = new Promise<void>((resolve) => {
+      mock.once("get", (k: any, cb: any) => {
+        resolve();
+        releaseGet = () => cb(null, null);
+      });
+    });
+    // setSub triggers a get under the lock; that get is paused by our mock, so the lock is held.
+    const setSubP = db.setSub("k", ["s"], "v2");
+    await getStarted;
+    // At this moment: setSub holds the lock on 'k' (still awaiting _getLocked).
+    // Issue a get; it must observe _locks.has('k') === true and take the slow path.
+    const before = { ...db.metrics };
+    const getP = db.get("k");
+    // After issuing get, lockAwaits should already be incremented (lock-acquire is sync).
+    // But we'll only assert it after we release everything, to avoid a timing race on
+    // the synchronous increment.
+    mock.on("set", (k: any, v: any, cb: any) => cb()); // for setSub's eventual write
+    releaseGet!();
+    await Promise.all([setSubP, getP, db.flush()]);
+    expect(db.metrics.lockAwaits - before.lockAwaits).toBe(1);
+  });
+});
diff --git a/test/mock/test_metrics.spec.ts b/test/mock/test_metrics.spec.ts
index 21c77c38..89beb212 100644
--- a/test/mock/test_metrics.spec.ts
+++ b/test/mock/test_metrics.spec.ts
@@ -159,7 +159,14 @@ describe(__filename, () => {
             if (subtc.err) readFinished = assert.rejects(readFinished, subtc.err);
             if (subtc.wantJsonErr) readFinished = assert.rejects(readFinished, { message: /JSON/ });
             await readFinished;
-            assertMetricsDelta(before, db.metrics, subtc.wantMetrics);
+            // After perf refactor: get() cache hits no longer acquire the per-key lock.
+            // getSub() still locks because the slow path is the only path it uses.
+            const expected = { ...subtc.wantMetrics };
+            if (tc.name === "get" && subtc.cacheHit) {
+              delete (expected as any).lockAcquires;
+              delete (expected as any).lockReleases;
+            }
+            assertMetricsDelta(before, db.metrics, expected);
           });
         }
         it("read of in-progress write", async () => {
@@ -181,13 +188,16 @@ describe(__filename, () => {
           });
           const before = { ...db.metrics };
           await tc.f(key);
-          assertMetricsDelta(before, db.metrics, {
-            lockAcquires: 1,
-            lockReleases: 1,
+          const expected: Record<string, number> = {
             reads: 1,
             readsFinished: 1,
             readsFromCache: 1,
-          });
+          };
+          if (tc.name === "getSub") {
+            expected.lockAcquires = 1;
+            expected.lockReleases = 1;
+          }
+          assertMetricsDelta(before, db.metrics, expected);
           // @ts-ignore
           finishWrite();
           await writeFinished;