From 06beb9a68fefd47055c22870b89298e227c8a9d0 Mon Sep 17 00:00:00 2001 From: John Boyer Date: Sun, 7 Jun 2026 16:38:40 -0700 Subject: [PATCH] Add tools/qmap: qubit-allocation profiler + viewer Optional, opt-in (QMAP env var) profiler + interactive HTML viewer for qubit allocation over circuit time. Purely additive - no src/ changes; instrumentation is a separate apply-yourself patch. Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/README.md | 86 +++++++++++ tools/qmap_explorer.py | 118 ++++++++++++++ tools/qmap_instrumentation.patch | 256 +++++++++++++++++++++++++++++++ 3 files changed, 460 insertions(+) create mode 100644 tools/README.md create mode 100644 tools/qmap_explorer.py create mode 100644 tools/qmap_instrumentation.patch diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 00000000..b6afc656 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,86 @@ +# qmap — qubit-allocation profiler + viewer + +A small, **opt-in** profiler and interactive HTML viewer for *where qubits go over +circuit time* in the point-add builder. Zero cost when disabled — the instrumentation +is gated entirely behind the `QMAP` env var. + +It answers questions like: at any point in the circuit, how many qubits are live, and +which register role are they in (target_x, target_y, the GCD register `u`, the +transcript sidecar, scratch)? + +## Contents + +- `qmap_explorer.py` — interactive HTML viewer. Pure Python stdlib + a browser; no + numpy/matplotlib/PIL required. +- `qmap_instrumentation.patch` — env-gated builder hooks. **No behavior change unless + `QMAP` is set.** +- `README.md` — this file. + +## 1. Install the instrumentation + +```sh +git apply tools/qmap_instrumentation.patch +cargo build --release --bin build_circuit +``` + +The patch is against commit `94927be`. The hook sites (`alloc_qubit` / `free` / +`reacquire` / `push_op`, and the register allocations in the dialog GCD) are stable +across revisions, so if it doesn't apply cleanly to a newer tip it re-fits with minor +context adjustment — the changes are: a few struct fields, increment/decrement of a +running per-role counter at alloc/free, and a `set_role(...)` tag wrapped around each +register allocation. + +## 2. Capture a profile + +Whole circuit, anti-aliased (each snapshot records the **max** within its bucket, so a +coarse stride still captures true peaks), ~1 second: + +```sh +QMAP=1 QMAP_STRIDE=500 QMAP_OUT=qmap.tsv ./target/release/build_circuit +``` + +True per-op resolution inside a span (no aliasing — snapshots every op in the window): + +```sh +QMAP=1 QMAP_OP_START=1090000 QMAP_OP_END=1130000 QMAP_OUT=qmap_win.tsv ./target/release/build_circuit +``` + +Environment variables: + +| var | meaning | +|---|---| +| `QMAP=1` | enable profiling (otherwise zero cost) | +| `QMAP_STRIDE=N` | snapshot every N ops (default 15000). Snapshots record the per-bucket max, so peaks survive coarse strides. | +| `QMAP_OP_START` / `QMAP_OP_END` | snapshot **every** op inside `[start, end)` — true 1-op resolution in a span | +| `QMAP_OUT=path` | output TSV path (default `/tmp/qmap.tsv`) | + +Snapshots are O(1) (a running per-role count maintained at alloc/free), so even fine +capture is cheap. + +## 3. View it + +```sh +python3 tools/qmap_explorer.py qmap.tsv qmap.html "my run" +open qmap.html # or xdg-open / just open the file in a browser +``` + +Explorer controls: + +- **Stacked-by-role ↔ scratch-only** toggle +- **Per-role show/hide** chips (drop the idle I/O registers to magnify the rest, etc.) +- **Minimap** window-slider (drag the window to pan, drag empty space to select a span) +- **Wheel / trackpad zoom**, cursor-centered; **drag to pan**; double-click to reset +- **Y-axis auto-scales** to the currently visible window +- **Hover** for per-snapshot detail (op index, phase, per-role counts) + +## TSV format + +Tab-separated, one row per snapshot: + +``` +op_idx phase active scr_res scr_live tx_res tx_live ty_res ty_live u_res u_live tr_res tr_live +``` + +`active` is total live qubits; the per-role pairs are `[reserved, live]` counts for +scratch / target_x / target_y / `u` / transcript. The viewer reads any file in this +format, so you can also generate it from your own tooling. diff --git a/tools/qmap_explorer.py b/tools/qmap_explorer.py new file mode 100644 index 00000000..fc32f8cc --- /dev/null +++ b/tools/qmap_explorer.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +"""Interactive qubit-allocation explorer: toggle stacked-by-role <-> scratch-only, +minimap window-slider, wheel/trackpad zoom (cursor-centered), drag-to-pan, peak markers.""" +import json, sys +SRC = sys.argv[1] if len(sys.argv) > 1 else "/tmp/qmap_frontier.tsv" +OUT = sys.argv[2] if len(sys.argv) > 2 else "/tmp/qmap_explorer.html" +TITLE = sys.argv[3] if len(sys.argv) > 3 else "frontier (1302q)" +rows, phases, pidx = [], [], {} +for line in open(SRC): + if line.startswith("#") or not line.strip(): + continue + c = line.rstrip("\n").split("\t") + op, ph, active = int(c[0]), c[1], int(c[2]) + scr = int(c[3]) + int(c[4]); tx = int(c[5]) + int(c[6]); ty = int(c[7]) + int(c[8]) + u = int(c[9]) + int(c[10]); tr = int(c[11]) + int(c[12]) + if ph not in pidx: + pidx[ph] = len(phases); phases.append(ph) + rows.append([op, pidx[ph], active, scr, tx, ty, u, tr]) +peak = max(r[2] for r in rows); maxscr = max(r[3] for r in rows) +D = json.dumps({"rows": rows, "phases": phases, "peak": peak, "maxscr": maxscr}) +PAGE = r"""qmap explorer +
+

Qubit allocation explorer — __TITLE__

+
+ + + + + + +
+ +
+ +
Minimap: drag the window to pan, drag empty space to select a span. Main view: scroll/trackpad to zoom (cursor-centered), drag to pan, double-click to reset.
+
+""" +open(OUT, "w").write(PAGE.replace("__DATA__", D).replace("__TITLE__", TITLE)) +print(f"wrote {OUT} (N={len(rows)}, peak={peak}, maxscr={maxscr})") diff --git a/tools/qmap_instrumentation.patch b/tools/qmap_instrumentation.patch new file mode 100644 index 00000000..b4bad87b --- /dev/null +++ b/tools/qmap_instrumentation.patch @@ -0,0 +1,256 @@ +diff --git a/src/point_add/mod.rs b/src/point_add/mod.rs +index 9d795de..b995298 100644 +--- a/src/point_add/mod.rs ++++ b/src/point_add/mod.rs +@@ -117,6 +117,17 @@ pub(crate) struct B { + // tobitvector (compute/uncompute) and apply (conditional 2nd double/halve). + // Empty when K=2 is disabled (frontier path byte-identical). + pub k2_shift2_log: Vec, ++ pub qmap_on: bool, ++ pub qmap_stride: u64, ++ pub qmap_ops: u64, ++ pub qmap_out: Option, ++ pub qmap_role: Vec, ++ pub qmap_cur_role: u8, ++ pub qmap_win_start: u64, ++ pub qmap_win_end: u64, ++ pub qmap_live: [u32; 5], // running live count per role (O(1) snapshots) ++ pub qmap_bpeak: u32, // max active in the current bucket (anti-alias) ++ pub qmap_bpeak_live: [u32; 5], // role breakdown at that bucket peak + } + + #[derive(Clone, Copy)] +@@ -169,6 +180,20 @@ impl B { + current_phase_active_max: 0, + phase_transitions: Vec::new(), + k2_shift2_log: Vec::new(), ++ qmap_on: std::env::var("QMAP").is_ok(), ++ qmap_stride: std::env::var("QMAP_STRIDE") ++ .ok() ++ .and_then(|s| s.parse().ok()) ++ .unwrap_or(15000), ++ qmap_ops: 0, ++ qmap_out: None, ++ qmap_role: Vec::new(), ++ qmap_cur_role: 0, ++ qmap_win_start: std::env::var("QMAP_OP_START").ok().and_then(|s| s.parse().ok()).unwrap_or(0), ++ qmap_win_end: std::env::var("QMAP_OP_END").ok().and_then(|s| s.parse().ok()).unwrap_or(0), ++ qmap_live: [0; 5], ++ qmap_bpeak: 0, ++ qmap_bpeak_live: [0; 5], + } + } + fn new_count_only() -> Self { +@@ -180,6 +205,19 @@ impl B { + self.counted_ops += 1; + self.counted_kind_ops[op.kind as usize] += 1; + self.counted_phase_kind_ops[op.kind as usize] += 1; ++ if self.qmap_on && !self.count_only { ++ self.qmap_ops += 1; ++ let trig = if self.qmap_win_end > 0 { ++ // window mode: snapshot EVERY op inside [start,end) — true 1-op resolution ++ let op = self.current_ops_len() as u64; ++ op >= self.qmap_win_start && op < self.qmap_win_end ++ } else { ++ self.qmap_ops % self.qmap_stride == 0 ++ }; ++ if trig { ++ self.qmap_snapshot(); ++ } ++ } + if !self.count_only { + self.ops.push(op); + } +@@ -298,13 +336,56 @@ impl B { + self.peak_log + .push((self.active_qubits, self.phase, self.current_ops_len())); + } +- if let Some(q) = self.free_qubits.pop() { +- QubitId(q.into()) ++ let id = if let Some(q) = self.free_qubits.pop() { ++ q + } else { + let q = self.next_qubit; + self.next_qubit += 1; +- QubitId(q.into()) ++ q ++ }; ++ if self.qmap_on { ++ let i = id as usize; ++ if i >= self.qmap_role.len() { ++ self.qmap_role.resize(i + 1, 0); ++ } ++ self.qmap_role[i] = self.qmap_cur_role; ++ let role = (self.qmap_cur_role as usize).min(4); ++ self.qmap_live[role] += 1; ++ if self.active_qubits > self.qmap_bpeak { ++ self.qmap_bpeak = self.active_qubits; ++ self.qmap_bpeak_live = self.qmap_live; ++ } ++ } ++ QubitId(id.into()) ++ } ++ fn set_role(&mut self, r: u8) { ++ self.qmap_cur_role = r; ++ } ++ fn qmap_snapshot(&mut self) { ++ use std::io::Write; ++ if self.qmap_out.is_none() { ++ let path = std::env::var("QMAP_OUT").unwrap_or_else(|_| "/tmp/qmap.tsv".to_string()); ++ self.qmap_out = std::fs::File::create(&path).ok(); ++ if let Some(f) = self.qmap_out.as_mut() { ++ let _ = writeln!(f, "# op_idx\tphase\tactive\tscr_res\tscr_live\ttx_res\ttx_live\tty_res\tty_live\tu_res\tu_live\ttr_res\ttr_live"); ++ } ++ } ++ // O(1): emit the bucket PEAK (max active since last snapshot) + its role ++ // breakdown — anti-aliased, no per-snapshot rescan. ++ let c = self.qmap_bpeak_live; ++ let op_idx = self.current_ops_len(); ++ let phase = self.phase; ++ let active = self.qmap_bpeak; ++ if let Some(f) = self.qmap_out.as_mut() { ++ let _ = writeln!( ++ f, ++ "{op_idx}\t{phase}\t{active}\t0\t{}\t0\t{}\t0\t{}\t0\t{}\t0\t{}", ++ c[0], c[1], c[2], c[3], c[4] ++ ); + } ++ // reset the bucket tracker to the current live state ++ self.qmap_bpeak = self.active_qubits; ++ self.qmap_bpeak_live = self.qmap_live; + } + fn alloc_qubits(&mut self, n: usize) -> Vec { + (0..n).map(|_| self.alloc_qubit()).collect() +@@ -318,6 +399,15 @@ impl B { + (0..n).map(|_| self.alloc_bit()).collect() + } + fn free(&mut self, q: QubitId) { ++ if self.qmap_on { ++ let i = q.0 as usize; ++ if i < self.qmap_role.len() { ++ let role = (self.qmap_role[i] as usize).min(4); ++ if self.qmap_live[role] > 0 { ++ self.qmap_live[role] -= 1; ++ } ++ } ++ } + self.r(q); + self.free_qubits + .push(q.0.try_into().expect("qubit id fits in u32")); +@@ -338,6 +428,17 @@ impl B { + .expect("reacquire qubit that is not currently free"); + self.free_qubits.swap_remove(pos); + self.active_qubits += 1; ++ if self.qmap_on { ++ let i = q.0 as usize; ++ if i < self.qmap_role.len() { ++ let role = (self.qmap_role[i] as usize).min(4); ++ self.qmap_live[role] += 1; ++ if self.active_qubits > self.qmap_bpeak { ++ self.qmap_bpeak = self.active_qubits; ++ self.qmap_bpeak_live = self.qmap_live; ++ } ++ } ++ } + self.record_phase_active(); + if self.active_qubits > self.peak_qubits { + self.peak_qubits = self.active_qubits; +@@ -1378,10 +1479,14 @@ fn build_builder() -> B { + }; + let b = &mut builder; + // Register 0: target_x (quantum) ++ b.set_role(1); + let tx = b.alloc_qubits(N); ++ b.set_role(0); + b.declare_qubit_register(&tx); + // Register 1: target_y (quantum) ++ b.set_role(2); + let ty = b.alloc_qubits(N); ++ b.set_role(0); + b.declare_qubit_register(&ty); + // Register 2: offset_x (classical bits) + let ox = b.alloc_bits(N); +diff --git a/src/point_add/rounds/dialog/compressed.rs b/src/point_add/rounds/dialog/compressed.rs +index 9e06004..fe07dab 100644 +--- a/src/point_add/rounds/dialog/compressed.rs ++++ b/src/point_add/rounds/dialog/compressed.rs +@@ -1431,13 +1431,13 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_ipmul_block_lifecycle( + assert_eq!(factor.len(), N); + assert_eq!(target.len(), N); + +- let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits()); ++ b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits()); b.set_role(0); + let raw_block = if dialog_gcd_host_reverse_raw_block_enabled() { + Vec::new() + } else { + b.alloc_qubits(dialog_gcd_raw_block_len()) + }; +- let u = b.alloc_qubits(N); ++ b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0); + let runway = dialog_gcd_build_compressed_log_u_high_runway(&u, &compressed_log); + let replay_log = runway + .as_ref() +@@ -1571,10 +1571,10 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_ipmul( + return; + } + +- let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits()); ++ b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits()); b.set_role(0); + let pair = b.alloc_qubits(2); + let compressor_scratch = b.alloc_qubit(); +- let u = b.alloc_qubits(N); ++ b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0); + b.set_phase("dialog_gcd_compressed_sidecar_ipmul_load_p"); + for i in 0..N { + if bit(p, i) { +@@ -1701,13 +1701,13 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_quotient_block_lifecycle( + assert_eq!(factor.len(), N); + assert_eq!(target.len(), N); + +- let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits()); ++ b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits()); b.set_role(0); + let raw_block = if dialog_gcd_host_reverse_raw_block_enabled() { + Vec::new() + } else { + b.alloc_qubits(dialog_gcd_raw_block_len()) + }; +- let u = b.alloc_qubits(N); ++ b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0); + let runway = dialog_gcd_build_compressed_log_u_high_runway(&u, &compressed_log); + let replay_log = runway + .as_ref() +@@ -1823,10 +1823,10 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_quotient( + return; + } + +- let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits()); ++ b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits()); b.set_role(0); + let pair = b.alloc_qubits(2); + let compressor_scratch = b.alloc_qubit(); +- let u = b.alloc_qubits(N); ++ b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0); + b.set_phase("dialog_gcd_compressed_sidecar_quotient_load_p"); + for i in 0..N { + if bit(p, i) { +diff --git a/src/point_add/rounds/dialog/mod.rs b/src/point_add/rounds/dialog/mod.rs +index 4ecb794..4daf650 100644 +--- a/src/point_add/rounds/dialog/mod.rs ++++ b/src/point_add/rounds/dialog/mod.rs +@@ -1594,7 +1594,7 @@ pub(crate) fn emit_dialog_gcd_raw_ipmul(b: &mut B, factor: &[QubitId], target: & + } + + let dialog_log = b.alloc_qubits(DIALOG_GCD_RAW_LOG_BITS); +- let u = b.alloc_qubits(N); ++ b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0); + b.set_phase("dialog_gcd_raw_ipmul_load_p"); + for i in 0..N { + if bit(p, i) { +@@ -1681,7 +1681,7 @@ pub(crate) fn emit_dialog_gcd_raw_quotient(b: &mut B, factor: &[QubitId], target + } + + let dialog_log = b.alloc_qubits(DIALOG_GCD_RAW_LOG_BITS); +- let u = b.alloc_qubits(N); ++ b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0); + b.set_phase("dialog_gcd_raw_quotient_load_p"); + for i in 0..N { + if bit(p, i) {