From 06beb9a68fefd47055c22870b89298e227c8a9d0 Mon Sep 17 00:00:00 2001
From: John Boyer <johnfranklinboyer@gmail.com>
Date: Sun, 7 Jun 2026 16:38:40 -0700
Subject: [PATCH] Add tools/qmap: qubit-allocation profiler + viewer

Optional, opt-in (QMAP env var) profiler + interactive HTML viewer for qubit allocation over circuit time. Purely additive - no src/ changes; instrumentation is a separate apply-yourself patch.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tools/README.md                  |  86 +++++++++++
 tools/qmap_explorer.py           | 118 ++++++++++++++
 tools/qmap_instrumentation.patch | 256 +++++++++++++++++++++++++++++++
 3 files changed, 460 insertions(+)
 create mode 100644 tools/README.md
 create mode 100644 tools/qmap_explorer.py
 create mode 100644 tools/qmap_instrumentation.patch

diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 00000000..b6afc656
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,86 @@
+# qmap — qubit-allocation profiler + viewer
+
+A small, **opt-in** profiler and interactive HTML viewer for *where qubits go over
+circuit time* in the point-add builder. Zero cost when disabled — the instrumentation
+is gated entirely behind the `QMAP` env var.
+
+It answers questions like: at any point in the circuit, how many qubits are live, and
+which register role are they in (target_x, target_y, the GCD register `u`, the
+transcript sidecar, scratch)?
+
+## Contents
+
+- `qmap_explorer.py` — interactive HTML viewer. Pure Python stdlib + a browser; no
+  numpy/matplotlib/PIL required.
+- `qmap_instrumentation.patch` — env-gated builder hooks. **No behavior change unless
+  `QMAP` is set.**
+- `README.md` — this file.
+
+## 1. Install the instrumentation
+
+```sh
+git apply tools/qmap_instrumentation.patch
+cargo build --release --bin build_circuit
+```
+
+The patch is against commit `94927be`. The hook sites (`alloc_qubit` / `free` /
+`reacquire` / `push_op`, and the register allocations in the dialog GCD) are stable
+across revisions, so if it doesn't apply cleanly to a newer tip it re-fits with minor
+context adjustment — the changes are: a few struct fields, increment/decrement of a
+running per-role counter at alloc/free, and a `set_role(...)` tag wrapped around each
+register allocation.
+
+## 2. Capture a profile
+
+Whole circuit, anti-aliased (each snapshot records the **max** within its bucket, so a
+coarse stride still captures true peaks), ~1 second:
+
+```sh
+QMAP=1 QMAP_STRIDE=500 QMAP_OUT=qmap.tsv ./target/release/build_circuit
+```
+
+True per-op resolution inside a span (no aliasing — snapshots every op in the window):
+
+```sh
+QMAP=1 QMAP_OP_START=1090000 QMAP_OP_END=1130000 QMAP_OUT=qmap_win.tsv ./target/release/build_circuit
+```
+
+Environment variables:
+
+| var | meaning |
+|---|---|
+| `QMAP=1` | enable profiling (otherwise zero cost) |
+| `QMAP_STRIDE=N` | snapshot every N ops (default 15000). Snapshots record the per-bucket max, so peaks survive coarse strides. |
+| `QMAP_OP_START` / `QMAP_OP_END` | snapshot **every** op inside `[start, end)` — true 1-op resolution in a span |
+| `QMAP_OUT=path` | output TSV path (default `/tmp/qmap.tsv`) |
+
+Snapshots are O(1) (a running per-role count maintained at alloc/free), so even fine
+capture is cheap.
+
+## 3. View it
+
+```sh
+python3 tools/qmap_explorer.py qmap.tsv qmap.html "my run"
+open qmap.html      # or xdg-open / just open the file in a browser
+```
+
+Explorer controls:
+
+- **Stacked-by-role ↔ scratch-only** toggle
+- **Per-role show/hide** chips (drop the idle I/O registers to magnify the rest, etc.)
+- **Minimap** window-slider (drag the window to pan, drag empty space to select a span)
+- **Wheel / trackpad zoom**, cursor-centered; **drag to pan**; double-click to reset
+- **Y-axis auto-scales** to the currently visible window
+- **Hover** for per-snapshot detail (op index, phase, per-role counts)
+
+## TSV format
+
+Tab-separated, one row per snapshot:
+
+```
+op_idx  phase  active  scr_res scr_live  tx_res tx_live  ty_res ty_live  u_res u_live  tr_res tr_live
+```
+
+`active` is total live qubits; the per-role pairs are `[reserved, live]` counts for
+scratch / target_x / target_y / `u` / transcript. The viewer reads any file in this
+format, so you can also generate it from your own tooling.
diff --git a/tools/qmap_explorer.py b/tools/qmap_explorer.py
new file mode 100644
index 00000000..fc32f8cc
--- /dev/null
+++ b/tools/qmap_explorer.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""Interactive qubit-allocation explorer: toggle stacked-by-role <-> scratch-only,
+minimap window-slider, wheel/trackpad zoom (cursor-centered), drag-to-pan, peak markers."""
+import json, sys
+SRC = sys.argv[1] if len(sys.argv) > 1 else "/tmp/qmap_frontier.tsv"
+OUT = sys.argv[2] if len(sys.argv) > 2 else "/tmp/qmap_explorer.html"
+TITLE = sys.argv[3] if len(sys.argv) > 3 else "frontier (1302q)"
+rows, phases, pidx = [], [], {}
+for line in open(SRC):
+    if line.startswith("#") or not line.strip():
+        continue
+    c = line.rstrip("\n").split("\t")
+    op, ph, active = int(c[0]), c[1], int(c[2])
+    scr = int(c[3]) + int(c[4]); tx = int(c[5]) + int(c[6]); ty = int(c[7]) + int(c[8])
+    u = int(c[9]) + int(c[10]); tr = int(c[11]) + int(c[12])
+    if ph not in pidx:
+        pidx[ph] = len(phases); phases.append(ph)
+    rows.append([op, pidx[ph], active, scr, tx, ty, u, tr])
+peak = max(r[2] for r in rows); maxscr = max(r[3] for r in rows)
+D = json.dumps({"rows": rows, "phases": phases, "peak": peak, "maxscr": maxscr})
+PAGE = r"""<!doctype html><html><head><meta charset=utf8><title>qmap explorer</title>
+<style>body{font:13px -apple-system,system-ui,sans-serif;margin:0;background:#0f1115;color:#dde}
+#wrap{padding:14px}h1{font-size:16px;margin:0 0 4px}
+.bar{display:flex;gap:8px;align-items:center;margin:6px 0;flex-wrap:wrap}
+button{background:#1a1e27;border:1px solid #333a48;color:#dde;padding:5px 11px;border-radius:6px;cursor:pointer}
+button.on{background:#2a4d8f;border-color:#3a6fd0}
+#mini{display:block;width:100%;height:56px;background:#0a0c10;border:1px solid #262c38;border-radius:4px;cursor:ew-resize;margin-bottom:4px}
+#cv{display:block;width:100%;background:#0a0c10;border:1px solid #262c38;border-radius:4px;cursor:grab}
+#cv.grab{cursor:grabbing}
+#tip{position:fixed;pointer-events:none;background:#000d;border:1px solid #555;border-radius:4px;padding:6px 8px;font:12px monospace;display:none;z-index:9;white-space:pre;color:#fff}
+.legend{display:flex;flex-wrap:wrap;gap:4px 14px;margin:6px 0;font-size:11px;color:#aab}
+.legend b{display:inline-block;width:11px;height:11px;margin-right:3px;vertical-align:-1px}
+.rtog{cursor:pointer;user-select:none;padding:2px 6px;border-radius:4px;border:1px solid #2a3140}.rtog:hover{background:#ffffff14}.rtog.off{opacity:.32;text-decoration:line-through}
+.hint{color:#6b7280;font-size:11px}</style></head><body><div id=wrap>
+<h1>Qubit allocation explorer &mdash; __TITLE__</h1>
+<div class=bar>
+ <button id=mStack class=on onclick="setMode('stack')">Stacked (all roles)</button>
+ <button id=mScr onclick="setMode('scratch')">Scratch only</button>
+ <span style="width:14px"></span>
+ <button onclick="zreset()">Reset</button>
+ <button id=pk class=on onclick="togPk()">Peak markers</button>
+ <span class=hint id=range></span>
+</div>
+<canvas id=mini></canvas>
+<div class=legend id=leg></div>
+<canvas id=cv></canvas>
+<div class=hint>Minimap: drag the window to pan, drag empty space to select a span. Main view: scroll/trackpad to zoom (cursor-centered), drag to pan, double-click to reset.</div>
+</div><div id=tip></div>
+<script>
+const DB=__DATA__, R=DB.rows, PH=DB.phases, PEAK=DB.peak, MAXS=DB.maxscr, N=R.length;
+let mode='stack', x0=0, x1=N, showPk=true;
+const cv=document.getElementById('cv'),x=cv.getContext('2d');
+const mini=document.getElementById('mini'),mc=mini.getContext('2d');
+const STRIP=22, PAD=26, DH=440, H=DH+PAD+STRIP, W=1500, MH=56;
+cv.width=W; cv.height=H; mini.width=W; mini.height=MH;
+const ROLES=[['tx',4,'#2b6cd0','tx (dx)'],['ty',5,'#2a9d4f','ty (dy)'],['u',6,'#8e44c0','u (GCD)'],['tr',7,'#e07020','transcript'],['scr',3,'#c0392b','scratch']];
+let roleVis={tx:1,ty:1,u:1,tr:1,scr:1};
+function phaseOf(p){const s=p.split('/');return s[2]||s[s.length-1]||p;}
+function niceStep(top){const raw=top/5;const p=Math.pow(10,Math.floor(Math.log10(raw)));const m=raw/p;const s=m<1.5?1:m<3?2:m<7?5:10;return Math.max(1,s*p);}
+function hls(h,l,s){const f=n=>{const k=(n+h*12)%12,a=s*Math.min(l,1-l);return Math.round(255*(l-a*Math.max(-1,Math.min(k-3,Math.min(9-k,1)))));};return[f(0),f(8),f(4)];}
+function pcol(p){let h=0;for(const c of p)h=(h*31+c.charCodeAt(0))%360;const[r,g,b]=hls(h/360,0.56,0.55);return`rgb(${r},${g},${b})`;}
+function clamp(){x0=Math.max(0,x0);x1=Math.min(N,x1);if(x1-x0<3)x1=x0+3;if(x1>N){x1=N;x0=Math.max(0,N-3);}}
+function draw(){
+  clamp();
+  const n=x1-x0;
+  let vmax=1;for(let i=0;i<n;i++){const j=Math.floor(x0)+i;if(j>=N)break;let v;if(mode==='scratch')v=R[j][3];else{v=0;for(const[key,idx]of ROLES)if(roleVis[key])v+=R[j][idx];}if(v>vmax)vmax=v;}
+  const top=Math.max(4,Math.ceil(vmax*1.08)), sy=DH/top, yOf=v=>STRIP+PAD+DH-v*sy;
+  x.fillStyle='#0a0c10';x.fillRect(0,0,W,H);
+  for(let i=0;i<n;i++){const j=Math.floor(x0)+i; if(j>=N)break; const px=i*W/n, pw=Math.ceil(W/n)+1, r=R[j];
+    if(mode==='scratch'){x.fillStyle=pcol(phaseOf(PH[r[1]]));x.fillRect(px,yOf(r[3]),pw,DH-(yOf(r[3])-STRIP-PAD));}
+    else{let acc=0;for(const[key,idx,col]of ROLES){if(!roleVis[key])continue;const v=r[idx];if(v>0){x.fillStyle=col;x.fillRect(px,yOf(acc+v),pw,v*sy);}acc+=v;}}}
+  for(let i=0;i<n;i++){const j=Math.floor(x0)+i;if(j>=N)break;x.fillStyle=pcol(phaseOf(PH[R[j][1]]));x.fillRect(i*W/n,0,Math.ceil(W/n)+1,STRIP-3);}
+  x.strokeStyle='#fff2';x.fillStyle='#fff7';x.font='10px monospace';const step=niceStep(top);
+  for(let v=step;v<top;v+=step){const y=yOf(v)+.5;x.beginPath();x.moveTo(0,y);x.lineTo(W,y);x.stroke();x.fillText(''+v,3,y-2);}
+  x.strokeStyle='#fff8';x.setLineDash([5,4]);x.beginPath();x.moveTo(0,yOf(vmax)+.5);x.lineTo(W,yOf(vmax)+.5);x.stroke();x.setLineDash([]);
+  x.fillStyle='#fff';x.fillText((mode==='scratch'?'window max scratch ':'window max active ')+vmax,3,yOf(vmax)-3);
+  if(showPk&&mode==='stack'){x.fillStyle='#ff3b30';for(let i=0;i<n;i++){const j=Math.floor(x0)+i;if(j<N&&R[j][2]>=PEAK-1)x.fillRect(i*W/n,STRIP,2,DH+PAD);}}
+  const j0=Math.floor(x0),j1=Math.min(N-1,Math.ceil(x1));
+  document.getElementById('range').textContent=`snapshots ${j0}–${j1} of ${N}  ·  ops ${R[j0][0].toLocaleString()}–${R[j1][0].toLocaleString()}`;
+  if(mode==='stack')document.getElementById('leg').innerHTML=ROLES.map(ro=>`<span class="rtog${roleVis[ro[0]]?'':' off'}" onclick="togRole('${ro[0]}')"><b style="background:${ro[2]}"></b>${ro[3]}</span>`).join('')+'<span style="opacity:.55;margin-left:8px"><b style="background:#ff3b30"></b>peak-touch</span>';
+  else{const seen=new Set(),o=[];for(let i=j0;i<=j1;i++){const k=phaseOf(PH[R[i][1]]);if(!seen.has(k)){seen.add(k);o.push(k);}}document.getElementById('leg').innerHTML=o.slice(0,28).map(p=>`<span><b style="background:${pcol(p)}"></b>${p}</span>`).join('');}
+  drawMini();
+}
+function drawMini(){
+  mc.fillStyle='#0a0c10';mc.fillRect(0,0,W,MH);
+  mc.fillStyle='#34507e';for(let j=0;j<N;j++){const h=R[j][2]/PEAK*(MH-2);mc.fillRect(j*W/N,MH-h,Math.ceil(W/N)+1,h);}
+  const a=x0/N*W,b=x1/N*W;mc.fillStyle='#ffffff26';mc.fillRect(a,0,b-a,MH);
+  mc.strokeStyle='#5b8ff0';mc.lineWidth=2;mc.strokeRect(a+1,1,Math.max(2,b-a-2),MH-2);
+}
+function setMode(m){mode=m;document.getElementById('mStack').className=m==='stack'?'on':'';document.getElementById('mScr').className=m==='scratch'?'on':'';draw();}
+function zreset(){x0=0;x1=N;draw();}
+function setRange(a,b){x0=a;x1=b;clamp();draw();}
+function togPk(){showPk=!showPk;document.getElementById('pk').className=showPk?'on':'';draw();}
+function togRole(k){roleVis[k]=!roleVis[k];draw();}
+// wheel/trackpad zoom on main, cursor-centered
+cv.addEventListener('wheel',e=>{e.preventDefault();const r=cv.getBoundingClientRect();const frac=Math.max(0,Math.min(1,(e.clientX-r.left)/r.width));const cx=x0+frac*(x1-x0);let f=Math.exp(e.deltaY*0.0016);f=Math.max(0.5,Math.min(2,f));x0=cx-(cx-x0)*f;x1=cx+(x1-cx)*f;draw();},{passive:false});
+// drag-pan on main
+let pan=null;
+cv.addEventListener('mousedown',e=>{const r=cv.getBoundingClientRect();pan={f:(e.clientX-r.left)/r.width,x0,x1};cv.classList.add('grab');});
+cv.addEventListener('dblclick',zreset);
+// minimap: drag window to pan, drag empty to brush
+let md=null;
+mini.addEventListener('mousedown',e=>{const r=mini.getBoundingClientRect();const frac=(e.clientX-r.left)/r.width,px=frac*N;if(px>=x0&&px<=x1)md={m:'pan',f:frac,x0,x1};else md={m:'brush',s:px};});
+window.addEventListener('mousemove',e=>{
+  if(pan){const r=cv.getBoundingClientRect();const frac=(e.clientX-r.left)/r.width;const d=(pan.f-frac)*(pan.x1-pan.x0);x0=pan.x0+d;x1=pan.x1+d;clamp();draw();return;}
+  if(md){const r=mini.getBoundingClientRect();const frac=(e.clientX-r.left)/r.width,px=frac*N;if(md.m==='pan'){const d=(frac-md.f)*N;x0=md.x0+d;x1=md.x1+d;}else{x0=Math.min(md.s,px);x1=Math.max(md.s,px);}clamp();draw();return;}
+});
+window.addEventListener('mouseup',()=>{pan=null;md=null;cv.classList.remove('grab');});
+// hover tooltip (only when not dragging)
+const tip=document.getElementById('tip');
+cv.addEventListener('mousemove',e=>{if(pan)return;const r=cv.getBoundingClientRect();const i=Math.floor((e.clientX-r.left)/r.width*(x1-x0));const j=Math.floor(x0)+i;if(j<0||j>=N){tip.style.display='none';return;}const d=R[j];
+  tip.textContent=`op ~${d[0].toLocaleString()}\nactive ${d[2]}\nscratch ${d[3]} | tx ${d[4]} ty ${d[5]} u ${d[6]} tr ${d[7]}\nphase: ${phaseOf(PH[d[1]])}`;
+  tip.style.display='block';tip.style.left=(e.clientX+14)+'px';tip.style.top=(e.clientY+14)+'px';});
+cv.addEventListener('mouseleave',()=>tip.style.display='none');
+draw();
+</script></body></html>"""
+open(OUT, "w").write(PAGE.replace("__DATA__", D).replace("__TITLE__", TITLE))
+print(f"wrote {OUT}  (N={len(rows)}, peak={peak}, maxscr={maxscr})")
diff --git a/tools/qmap_instrumentation.patch b/tools/qmap_instrumentation.patch
new file mode 100644
index 00000000..b4bad87b
--- /dev/null
+++ b/tools/qmap_instrumentation.patch
@@ -0,0 +1,256 @@
+diff --git a/src/point_add/mod.rs b/src/point_add/mod.rs
+index 9d795de..b995298 100644
+--- a/src/point_add/mod.rs
++++ b/src/point_add/mod.rs
+@@ -117,6 +117,17 @@ pub(crate) struct B {
+     // tobitvector (compute/uncompute) and apply (conditional 2nd double/halve).
+     // Empty when K=2 is disabled (frontier path byte-identical).
+     pub k2_shift2_log: Vec<QubitId>,
++    pub qmap_on: bool,
++    pub qmap_stride: u64,
++    pub qmap_ops: u64,
++    pub qmap_out: Option<std::fs::File>,
++    pub qmap_role: Vec<u8>,
++    pub qmap_cur_role: u8,
++    pub qmap_win_start: u64,
++    pub qmap_win_end: u64,
++    pub qmap_live: [u32; 5],        // running live count per role (O(1) snapshots)
++    pub qmap_bpeak: u32,            // max active in the current bucket (anti-alias)
++    pub qmap_bpeak_live: [u32; 5],  // role breakdown at that bucket peak
+ }
+ 
+ #[derive(Clone, Copy)]
+@@ -169,6 +180,20 @@ impl B {
+             current_phase_active_max: 0,
+             phase_transitions: Vec::new(),
+             k2_shift2_log: Vec::new(),
++            qmap_on: std::env::var("QMAP").is_ok(),
++            qmap_stride: std::env::var("QMAP_STRIDE")
++                .ok()
++                .and_then(|s| s.parse().ok())
++                .unwrap_or(15000),
++            qmap_ops: 0,
++            qmap_out: None,
++            qmap_role: Vec::new(),
++            qmap_cur_role: 0,
++            qmap_win_start: std::env::var("QMAP_OP_START").ok().and_then(|s| s.parse().ok()).unwrap_or(0),
++            qmap_win_end: std::env::var("QMAP_OP_END").ok().and_then(|s| s.parse().ok()).unwrap_or(0),
++            qmap_live: [0; 5],
++            qmap_bpeak: 0,
++            qmap_bpeak_live: [0; 5],
+         }
+     }
+     fn new_count_only() -> Self {
+@@ -180,6 +205,19 @@ impl B {
+         self.counted_ops += 1;
+         self.counted_kind_ops[op.kind as usize] += 1;
+         self.counted_phase_kind_ops[op.kind as usize] += 1;
++        if self.qmap_on && !self.count_only {
++            self.qmap_ops += 1;
++            let trig = if self.qmap_win_end > 0 {
++                // window mode: snapshot EVERY op inside [start,end) — true 1-op resolution
++                let op = self.current_ops_len() as u64;
++                op >= self.qmap_win_start && op < self.qmap_win_end
++            } else {
++                self.qmap_ops % self.qmap_stride == 0
++            };
++            if trig {
++                self.qmap_snapshot();
++            }
++        }
+         if !self.count_only {
+             self.ops.push(op);
+         }
+@@ -298,13 +336,56 @@ impl B {
+             self.peak_log
+                 .push((self.active_qubits, self.phase, self.current_ops_len()));
+         }
+-        if let Some(q) = self.free_qubits.pop() {
+-            QubitId(q.into())
++        let id = if let Some(q) = self.free_qubits.pop() {
++            q
+         } else {
+             let q = self.next_qubit;
+             self.next_qubit += 1;
+-            QubitId(q.into())
++            q
++        };
++        if self.qmap_on {
++            let i = id as usize;
++            if i >= self.qmap_role.len() {
++                self.qmap_role.resize(i + 1, 0);
++            }
++            self.qmap_role[i] = self.qmap_cur_role;
++            let role = (self.qmap_cur_role as usize).min(4);
++            self.qmap_live[role] += 1;
++            if self.active_qubits > self.qmap_bpeak {
++                self.qmap_bpeak = self.active_qubits;
++                self.qmap_bpeak_live = self.qmap_live;
++            }
++        }
++        QubitId(id.into())
++    }
++    fn set_role(&mut self, r: u8) {
++        self.qmap_cur_role = r;
++    }
++    fn qmap_snapshot(&mut self) {
++        use std::io::Write;
++        if self.qmap_out.is_none() {
++            let path = std::env::var("QMAP_OUT").unwrap_or_else(|_| "/tmp/qmap.tsv".to_string());
++            self.qmap_out = std::fs::File::create(&path).ok();
++            if let Some(f) = self.qmap_out.as_mut() {
++                let _ = writeln!(f, "# op_idx\tphase\tactive\tscr_res\tscr_live\ttx_res\ttx_live\tty_res\tty_live\tu_res\tu_live\ttr_res\ttr_live");
++            }
++        }
++        // O(1): emit the bucket PEAK (max active since last snapshot) + its role
++        // breakdown — anti-aliased, no per-snapshot rescan.
++        let c = self.qmap_bpeak_live;
++        let op_idx = self.current_ops_len();
++        let phase = self.phase;
++        let active = self.qmap_bpeak;
++        if let Some(f) = self.qmap_out.as_mut() {
++            let _ = writeln!(
++                f,
++                "{op_idx}\t{phase}\t{active}\t0\t{}\t0\t{}\t0\t{}\t0\t{}\t0\t{}",
++                c[0], c[1], c[2], c[3], c[4]
++            );
+         }
++        // reset the bucket tracker to the current live state
++        self.qmap_bpeak = self.active_qubits;
++        self.qmap_bpeak_live = self.qmap_live;
+     }
+     fn alloc_qubits(&mut self, n: usize) -> Vec<QubitId> {
+         (0..n).map(|_| self.alloc_qubit()).collect()
+@@ -318,6 +399,15 @@ impl B {
+         (0..n).map(|_| self.alloc_bit()).collect()
+     }
+     fn free(&mut self, q: QubitId) {
++        if self.qmap_on {
++            let i = q.0 as usize;
++            if i < self.qmap_role.len() {
++                let role = (self.qmap_role[i] as usize).min(4);
++                if self.qmap_live[role] > 0 {
++                    self.qmap_live[role] -= 1;
++                }
++            }
++        }
+         self.r(q);
+         self.free_qubits
+             .push(q.0.try_into().expect("qubit id fits in u32"));
+@@ -338,6 +428,17 @@ impl B {
+             .expect("reacquire qubit that is not currently free");
+         self.free_qubits.swap_remove(pos);
+         self.active_qubits += 1;
++        if self.qmap_on {
++            let i = q.0 as usize;
++            if i < self.qmap_role.len() {
++                let role = (self.qmap_role[i] as usize).min(4);
++                self.qmap_live[role] += 1;
++                if self.active_qubits > self.qmap_bpeak {
++                    self.qmap_bpeak = self.active_qubits;
++                    self.qmap_bpeak_live = self.qmap_live;
++                }
++            }
++        }
+         self.record_phase_active();
+         if self.active_qubits > self.peak_qubits {
+             self.peak_qubits = self.active_qubits;
+@@ -1378,10 +1479,14 @@ fn build_builder() -> B {
+     };
+     let b = &mut builder;
+     // Register 0: target_x (quantum)
++    b.set_role(1);
+     let tx = b.alloc_qubits(N);
++    b.set_role(0);
+     b.declare_qubit_register(&tx);
+     // Register 1: target_y (quantum)
++    b.set_role(2);
+     let ty = b.alloc_qubits(N);
++    b.set_role(0);
+     b.declare_qubit_register(&ty);
+     // Register 2: offset_x (classical bits)
+     let ox = b.alloc_bits(N);
+diff --git a/src/point_add/rounds/dialog/compressed.rs b/src/point_add/rounds/dialog/compressed.rs
+index 9e06004..fe07dab 100644
+--- a/src/point_add/rounds/dialog/compressed.rs
++++ b/src/point_add/rounds/dialog/compressed.rs
+@@ -1431,13 +1431,13 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_ipmul_block_lifecycle(
+     assert_eq!(factor.len(), N);
+     assert_eq!(target.len(), N);
+ 
+-    let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits());
++    b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits()); b.set_role(0);
+     let raw_block = if dialog_gcd_host_reverse_raw_block_enabled() {
+         Vec::new()
+     } else {
+         b.alloc_qubits(dialog_gcd_raw_block_len())
+     };
+-    let u = b.alloc_qubits(N);
++    b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0);
+     let runway = dialog_gcd_build_compressed_log_u_high_runway(&u, &compressed_log);
+     let replay_log = runway
+         .as_ref()
+@@ -1571,10 +1571,10 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_ipmul(
+         return;
+     }
+ 
+-    let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits());
++    b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits()); b.set_role(0);
+     let pair = b.alloc_qubits(2);
+     let compressor_scratch = b.alloc_qubit();
+-    let u = b.alloc_qubits(N);
++    b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0);
+     b.set_phase("dialog_gcd_compressed_sidecar_ipmul_load_p");
+     for i in 0..N {
+         if bit(p, i) {
+@@ -1701,13 +1701,13 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_quotient_block_lifecycle(
+     assert_eq!(factor.len(), N);
+     assert_eq!(target.len(), N);
+ 
+-    let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits());
++    b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_allocated_compressed_sidecar_bits()); b.set_role(0);
+     let raw_block = if dialog_gcd_host_reverse_raw_block_enabled() {
+         Vec::new()
+     } else {
+         b.alloc_qubits(dialog_gcd_raw_block_len())
+     };
+-    let u = b.alloc_qubits(N);
++    b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0);
+     let runway = dialog_gcd_build_compressed_log_u_high_runway(&u, &compressed_log);
+     let replay_log = runway
+         .as_ref()
+@@ -1823,10 +1823,10 @@ pub(crate) fn emit_dialog_gcd_compressed_sidecar_quotient(
+         return;
+     }
+ 
+-    let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits());
++    b.set_role(4); let compressed_log = b.alloc_qubits(dialog_gcd_compressed_sidecar_bits()); b.set_role(0);
+     let pair = b.alloc_qubits(2);
+     let compressor_scratch = b.alloc_qubit();
+-    let u = b.alloc_qubits(N);
++    b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0);
+     b.set_phase("dialog_gcd_compressed_sidecar_quotient_load_p");
+     for i in 0..N {
+         if bit(p, i) {
+diff --git a/src/point_add/rounds/dialog/mod.rs b/src/point_add/rounds/dialog/mod.rs
+index 4ecb794..4daf650 100644
+--- a/src/point_add/rounds/dialog/mod.rs
++++ b/src/point_add/rounds/dialog/mod.rs
+@@ -1594,7 +1594,7 @@ pub(crate) fn emit_dialog_gcd_raw_ipmul(b: &mut B, factor: &[QubitId], target: &
+     }
+ 
+     let dialog_log = b.alloc_qubits(DIALOG_GCD_RAW_LOG_BITS);
+-    let u = b.alloc_qubits(N);
++    b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0);
+     b.set_phase("dialog_gcd_raw_ipmul_load_p");
+     for i in 0..N {
+         if bit(p, i) {
+@@ -1681,7 +1681,7 @@ pub(crate) fn emit_dialog_gcd_raw_quotient(b: &mut B, factor: &[QubitId], target
+     }
+ 
+     let dialog_log = b.alloc_qubits(DIALOG_GCD_RAW_LOG_BITS);
+-    let u = b.alloc_qubits(N);
++    b.set_role(3); let u = b.alloc_qubits(N); b.set_role(0);
+     b.set_phase("dialog_gcd_raw_quotient_load_p");
+     for i in 0..N {
+         if bit(p, i) {