From 4b5020a17caaad651b34a2a1d854977345c5e9a7 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 02:46:15 +0800
Subject: [PATCH 01/15] Incremental re-parse v1: parseEdited() with memo
 carry-over and arena reuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parseEdited(newSource) re-parses after an edit reusing everything the edit
provably did not touch. No edit protocol: the damage window is DERIVED by
diffing the old and new token columns (longest identical prefix; longest
suffix identical modulo the char delta) — the caller just hands the new text.

Reuse flows through the carried memo. Soundness rests on three pieces:

- Every memo entry records its lookahead WATERMARK (memoExt): the farthest
  token the stored parse may have read — a PEG parse probes beyond its end
  (failed longer arms, not() lookaheads, SECOND-token dispatch). It comes for
  free from the global advance watermark at frame exit; the fixed read slack
  (stop token + SECOND probe, +2) is applied at invalidation time, so the
  stored value stays the true watermark.
- A memo HIT bumps the watermark to the entry's own: the jump semantically
  reads everything the stored parse read, or an enclosing rule completing
  right after a reused subtree would record a watermark smaller than what its
  result depends on (including the child's over-probing failed arms), and a
  later edit in the gap would keep the stale entry alive. Guaranteed batch
  no-op by monotonicity — the 18,805-file byte-identical gate and the exact
  reject-message gate both stay green.
- Prefix entries survive when watermark+slack stays inside the prefix; suffix
  entries shift by the token delta; the damage window drops. The old arena is
  re-based in place (suffix rows by charDelta, reused leaf entries by
  tokenDelta; damage-spanning rows are unreachable garbage), and new rows
  append after the old — a full parse() compacts.

The ENTRY rule's repetition units (Stmt/Decl for TS — derived from the grammar
shape, no language names) now memoize through parseRuleEntry like pratt/
left-rec rules, so whole untouched statements reuse, not just expression
subtrees. Token columns double-buffer across edits (ping-pong, zero
steady-state allocation; the in-place memo variant for token-count changes
measured SLOWER than sparse rebuild — undefined writes materialize holes —
and was reverted).

Gate (test/incremental-verify.ts, #30 in the chain): scripted edit sessions
over the bench files — inserts, deletes, statement insertions, syntax-breaking
edits — every accepted re-parse must be byte-identical (toObject) to a fresh
parse; rejects must reject on both sides. 120 steps, 0 mismatches. Measured:
mixed-session 1.4-1.5x, single-keystroke ~3x, pure-reuse floor ~5.6x; the
remaining cost is full-file relex + diff bookkeeping (windowed relex and the
green {rel,len} re-base are the named follow-ups), the reused parse itself is
~1% of the profile.
---
 src/emit-parser.ts         | 234 +++++++++++++++++++++++++++++++++----
 test/check.ts              |   1 +
 test/incremental-verify.ts | 113 ++++++++++++++++++
 3 files changed, 327 insertions(+), 21 deletions(-)
 create mode 100644 test/incremental-verify.ts

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 8d8f521..b7a5194 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -1431,6 +1431,7 @@ let pos = 0;
 let maxPos = 0;
 let memoNode = [];
 let memoEnd = [];
+let memoExt = [];   // per-entry lookahead extent (see parseRuleEntry)
 let parseLimit = -1;
 // cap = the exclusive lookahead bound: min(parseLimit-or-∞, tokN), maintained at the
 // parseLimit set/restore sites and the one token-stream mutation (the '>' splice).
@@ -1498,6 +1499,7 @@ function matchPuLitGT(pu) {
     // Token indices shifted: drop the per-rule memo arrays (recreated lazily at the new size).
     memoNode.fill(undefined);
     memoEnd.fill(undefined);
+    memoExt.fill(undefined);
     // Leaf entries reference tokens BY INDEX, so the splice's +1 shift must be applied
     // to every committed/scratch entry past the split point. (Object trees were immune —
     // leaves copied their spans; the arena trades that copy for this rare O(kidN) pass.
@@ -1578,10 +1580,30 @@ function parseTemplateExpr() {
 // Emit the per-rule parse functions + the RULES dispatch table.
 function emitRuleFns(e: Emitter, a: ReturnType<typeof analyze>) {
   const ruleFn = (name: string) => `R_${sanitize(name)}`;
+  // SPINE rules — the entry rule's repetition units (the rules its body references
+  // directly): the natural reuse granularity for incremental re-parsing, so they get
+  // memoized through parseRuleEntry like pratt/left-rec rules. Without this only
+  // expression/type subtrees reuse and every statement re-walks on each edit.
+  // Derived from the grammar shape — no language names.
+  const spine = new Set<string>();
+  {
+    const entryRule = a.grammar.rules[a.grammar.rules.length - 1];
+    const walk = (x: RuleExpr): void => {
+      switch (x.type) {
+        case 'ref': if (a.ruleByName.has(x.name)) spine.add(x.name); return;
+        case 'seq': case 'alt': x.items.forEach(walk); return;
+        case 'quantifier': case 'group': walk(x.body); return;
+        case 'sep': walk(x.element); return;
+        default: return;
+      }
+    };
+    walk(entryRule.body);
+    spine.delete(entryRule.name);
+  }
   for (const rule of a.grammar.rules) {
     if (a.prattRules.has(rule.name)) emitPrattRule(e, a, rule);
     else if (a.leftRecSet.has(rule.name)) emitLeftRecRule(e, a, rule);
-    else emitNonRecRule(e, a, rule);
+    else emitNonRecRule(e, a, rule, spine.has(rule.name) && !a.prattRules.has(rule.name) && !a.leftRecSet.has(rule.name));
   }
   // Dispatch table (string rule name → fn), for parseTemplateExpr's dynamic interp rule.
   e.emit(`const RULES = {`);
@@ -1593,11 +1615,19 @@ function emitRuleFns(e: Emitter, a: ReturnType<typeof analyze>) {
 // committed to the arena IMMEDIATELY (finishNode also truncates scratch back to mark);
 // a not-better arm's children are dropped by the next arm's scn reset (a beaten
 // committed candidate stays as an arena hole — the measured 3-5% discard class).
-function emitNonRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl) {
+function emitNonRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl, memoized = false) {
   const ruleFn = `R_${sanitize(rule.name)}`;
   const rid = a.grammar.rules.indexOf(rule);
   const alts = rule.body.type === 'alt' ? rule.body.items : [rule.body];
-  e.emit(`function ${ruleFn}() {`);
+  // A memoized (spine) rule splits into the public wrapper (parseRuleEntry owns the
+  // push+boolean contract and the memo) and an id-returning core, exactly like the
+  // pratt/left-rec rules.
+  if (memoized) {
+    e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${J(rule.name)}, ${ruleFn}_core); }`);
+    e.emit(`function ${ruleFn}_core(_minBp) {`);
+  } else {
+    e.emit(`function ${ruleFn}() {`);
+  }
   e.emit(`  const saved = pos; const mark = scn;`);
   e.emit(`  let bestId = -1; let bestPos = saved;`);
   const dispatch = e.altMaskDispatch(alts, '_am');
@@ -1612,8 +1642,13 @@ function emitNonRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDec
     e.emit(`    }`);
     e.emit(`  }`);
   });
-  e.emit(`  if (bestId >= 0) { pos = bestPos; scn = mark; scPush(bestId); return true; }`);
-  e.emit(`  pos = saved; scn = mark; return false;`);
+  if (memoized) {
+    e.emit(`  if (bestId >= 0) { pos = bestPos; scn = mark; return bestId; }`);
+    e.emit(`  pos = saved; scn = mark; return -1;`);
+  } else {
+    e.emit(`  if (bestId >= 0) { pos = bestPos; scn = mark; scPush(bestId); return true; }`);
+    e.emit(`  pos = saved; scn = mark; return false;`);
+  }
   e.emit(`}`);
   // Arm matchers.
   alts.forEach((alt, i) => emitArm(e, a, rule.name, i, alt));
@@ -1890,24 +1925,42 @@ function emitMixfixLed(e: Emitter, a: ReturnType<typeof analyze>, fnName: string
 function emitDriver(e: Emitter, a: ReturnType<typeof analyze>, entry: string) {
   e.emit(String.raw`
 // parseRule for a pratt/left-rec rule: memo + context + suppress, then the core.
-// The memo is a pair of per-rule arrays indexed by start pos (lazily sized to the token
-// count, undefined-holed): a lookup is two array loads, a store allocates nothing — no
-// Map hashing and no {node, end} wrapper per store. The core returns a node ID (or -1);
+// The memo is per-rule arrays indexed by start pos (lazily sized to the token count,
+// undefined-holed): a lookup is two array loads, a store allocates nothing — no Map
+// hashing and no {node, end} wrapper per store. The core returns a node ID (or -1);
 // this wrapper owns the public arena contract (push the id, return a boolean).
+//
+// memoExt records each entry's LOOKAHEAD EXTENT — the farthest token index the parse
+// may have READ (not merely consumed) — which is what incremental invalidation must
+// intersect with an edit's damage window: a PEG parse probes beyond its end (failed
+// longer arms, not() lookaheads, SECOND-token dispatch). The extent comes for free
+// from the global advance watermark: maxPos at frame exit, +2 covering the stop-token
+// and SECOND-token reads past it. Left-to-right parsing keeps the watermark near the
+// current frontier, so the value is tight on the dominant flow and only OVER-
+// invalidates (soundly) near big-backtrack clusters.
 function parseRuleEntry(idx, name, core) {
   const mySup = suppressNext;
   suppressNext = null;
   const capped = parseLimit >= 0;
   const start = pos;
-  // Capture the pair together: a '>'-splice inside core() detaches both via fill(undefined),
-  // and the store below must then write into the DETACHED pair (i.e. be discarded), exactly
-  // like the old per-rule Map did.
+  // Capture the arrays together: a '>'-splice inside core() detaches them via
+  // fill(undefined), and the store below must then write into the DETACHED arrays
+  // (i.e. be discarded), exactly like the old per-rule Map did.
   let me = memoEnd[idx];
   let mn = memoNode[idx];
+  let mx = memoExt[idx];
   if (!mySup && !capped && me !== undefined) {
     const e = me[start];
     if (e !== undefined) {
       pos = e;
+      // The jump SEMANTICALLY reads everything the stored parse read: keep the advance
+      // watermark ≥ the entry's watermark, or an ENCLOSING rule that completes right
+      // after a reused subtree stores a watermark smaller than what its result depends
+      // on (including the child's own over-probing failed arms), and a later edit in
+      // the gap keeps the stale entry alive. A guaranteed batch no-op: the watermark is
+      // monotone and was already ≥ this value when the entry was stored.
+      const ex = mx[start];
+      if (ex > maxPos) maxPos = ex;
       const id = mn[start];
       if (id >= 0) { scPush(id); return true; }
       return false;
@@ -1928,11 +1981,16 @@ function parseRuleEntry(idx, name, core) {
     if (me === undefined) {
       me = new Array(tokN + 1);
       mn = new Array(tokN + 1);
+      mx = new Array(tokN + 1);
       memoEnd[idx] = me;
       memoNode[idx] = mn;
+      memoExt[idx] = mx;
     }
     me[start] = pos;
     mn[start] = result;
+    mx[start] = maxPos;   // the TRUE probe watermark — the +2 read slack (stop token,
+                          // SECOND-token dispatch) is applied at INVALIDATION time
+
   }
   if (result >= 0) { scPush(result); return true; }
   return false;
@@ -2031,7 +2089,7 @@ export function toObject(id) {
 }
 
 // Parse to the ARENA: returns the root node id.
-export function parse(source, entryRule) {
+function lexInto(source) {
 ${e.soa ? `  tokenize(source);` : String.raw`  src = source;
   const _toks = tokenize(source);
   const _n = _toks.length;
@@ -2044,19 +2102,24 @@ ${e.soa ? `  tokenize(source);` : String.raw`  src = source;
     tkText[_i] = _t.text;
   }
   tokN = _n;`}
+}
+
+function farthest(errPos) {
+  if (maxPos <= errPos || maxPos >= tokN) return '';
+  return ' [farthest: offset ' + tkOff[maxPos] + " near '" + tokTextAt(maxPos).slice(0, 20) + "']";
+}
+
+// Run the entry rule over the CURRENT token stream (shared by parse / parseEdited —
+// everything per-parse EXCEPT the memo and the arena cursor, which parseEdited carries).
+function runParse(entryRule) {
   pos = 0;
   maxPos = 0;
-  memoNode = new Array(MEMO_RULES);
-  memoEnd = new Array(MEMO_RULES);
   parseLimit = -1;
   cap = tokN;
   currentPrattContext = null;
   suppressNext = null;
   suppressCur = null;
-  nodeN = 0;
-  kidN = 0;
   scn = 0;
-
   const entry = entryRule ?? ENTRY;
   if (tokN === 0) {
     const rid = RULE_NAMES.indexOf(entry);
@@ -2070,14 +2133,143 @@ ${e.soa ? `  tokenize(source);` : String.raw`  src = source;
     throw new Error('Parse error at offset ' + tkOff[pos] + ": unexpected '" + tokTextAt(pos) + "' after successful parse" + farthest(pos));
   }
   return sc[--scn];
+}
+
+// Source of the last COMPLETED parse — the token columns, arena and memo describe it.
+// null whenever the module state is not a coherent snapshot (no parse yet, or the last
+// attempt threw), so parseEdited falls back to a full parse.
+let lastSrc = null;
+// The spare token-column buffer set (parseEdited ping-pongs between the live set and
+// this one, so steady-state edits never allocate columns).
+let altK = null, altT = null, altOff = null, altEnd = null, altFl = null;
+${e.soa ? '' : 'let altText = [];'}
+
+export function parse(source, entryRule) {
+  lastSrc = null;
+  lexInto(source);
+  memoNode = new Array(MEMO_RULES);
+  memoEnd = new Array(MEMO_RULES);
+  memoExt = new Array(MEMO_RULES);
+  nodeN = 0;
+  kidN = 0;
+  const root = runParse(entryRule);
+  lastSrc = source;
+  return root;
+}
 
-  function farthest(errPos) {
-    if (maxPos <= errPos || maxPos >= tokN) return '';
-    return ' [farthest: offset ' + tkOff[maxPos] + " near '" + tokTextAt(maxPos).slice(0, 20) + "']";
+// ── Incremental re-parse ──
+// No edit protocol: the caller hands the NEW source; the damage window is DERIVED by
+// diffing the old and new token columns (longest identical prefix; longest suffix
+// identical modulo the character delta). Reuse then flows through the carried memo:
+//   - prefix entries survive when their lookahead extent never reached the damage;
+//   - suffix entries survive shifted by the token delta (their reads are wholly inside
+//     the suffix, which is identical modulo position);
+//   - damaged-region entries are dropped and re-parsed.
+// The old arena is re-based in place (rows starting at/after the suffix shift by the
+// char delta; reused leaf entries by the token delta; rows STARTING inside the damage
+// are unreachable garbage — their values no longer matter), and new rows append after
+// the old ones. A full parse() compacts (resets the arena); long edit sessions grow
+// until then. Lexing is FULL-FILE by design: the lexer carries cross-token state
+// (template nesting, regex context, markup modes), full lexing is a small share of a
+// parse, and the diff is what localizes the damage — not the lexer.
+export function parseEdited(source, entryRule) {
+  if (lastSrc === null) return parse(source, entryRule);
+  const oSrc = lastSrc;
+  lastSrc = null;
+  // Stash the old columns BY REFERENCE and lex into the spare buffer set (ping-pong
+  // double buffer — steady-state edits allocate nothing and keep the pages warm).
+  const oK = tkK, oT = tkT, oOff = tkOff, oEnd = tkEnd, oFl = tkFl, oN = tokN;
+${e.soa ? '' : '  const oText = tkText;'}
+  if (altK === null || altK.length !== tkCap) {
+    altK = new tkK.constructor(tkCap); altT = new tkT.constructor(tkCap);
+    altOff = new Int32Array(tkCap); altEnd = new Int32Array(tkCap); altFl = new Uint8Array(tkCap);
+  }
+  tkK = altK; tkT = altT; tkOff = altOff; tkEnd = altEnd; tkFl = altFl;
+${e.soa ? '' : '  tkText = altText; tkText.length = 0;'}
+  altK = oK; altT = oT; altOff = oOff; altEnd = oEnd; altFl = oFl;
+${e.soa ? '' : '  altText = oText;'}
+  lexInto(source);
+  if (tkCap !== oK.length) {
+    // the new lex outgrew the buffer (growTok reallocated): drop the stale spare
+    altK = null;
+  }
+  const nN = tokN;
+  const charDelta = source.length - oSrc.length;
+  const minN = oN < nN ? oN : nN;
+  // Longest identical prefix (positions included — the prefix is unshifted).
+  let p = 0;
+  while (p < minN && oK[p] === tkK[p] && oT[p] === tkT[p] && oFl[p] === tkFl[p]
+      && oOff[p] === tkOff[p] && oEnd[p] === tkEnd[p]${e.soa ? '' : ' && oText[p] === tkText[p]'}) p++;
+  // Longest identical suffix modulo charDelta (disjoint from the prefix).
+  let s = 0;
+  while (s < minN - p) {
+    const i = oN - 1 - s, j = nN - 1 - s;
+    if (oK[i] === tkK[j] && oT[i] === tkT[j] && oFl[i] === tkFl[j]
+      && oOff[i] + charDelta === tkOff[j] && oEnd[i] + charDelta === tkEnd[j]${e.soa ? '' : ' && oText[i] === tkText[j]'}) s++;
+    else break;
+  }
+  const dOldEnd = oN - s;          // damaged OLD tokens: [p, dOldEnd)
+  const tokenDelta = nN - oN;
+  // Re-base the old arena in place: rows starting at/after the first suffix token's OLD
+  // offset shift by charDelta; reused leaf entries past the damage shift by tokenDelta.
+  // (A reusable subtree lies entirely on one side of the damage, so the start-threshold
+  // classifies it correctly; damage-spanning rows are garbage either way.)
+  if (s > 0 && (charDelta !== 0 || tokenDelta !== 0)) {
+    const charThresh = oOff[dOldEnd];
+    if (charDelta !== 0) {
+      for (let i = 0; i < nodeN; i++) if (rowOff[i] >= charThresh) rowOff[i] += charDelta;
+    }
+    if (tokenDelta !== 0) {
+      const eShift = tokenDelta << 2;
+      for (let i = 0; i < kidN; i++) {
+        const e = kids[i];
+        if (e < 0 && ((~e) >>> 2) >= dOldEnd) kids[i] = e - eShift;
+      }
+    }
+  }
+  // Carry the memo across: prefix entries whose lookahead never reached the damage stay
+  // at their index; suffix entries move by tokenDelta (ids reference the re-based rows).
+  // tokenDelta === 0 (the common keystroke: editing within a token) mutates IN PLACE —
+  // no per-rule array allocation; only the damage window and the prefix entries whose
+  // extent crossed into it are cleared.
+  for (let r = 0; r < MEMO_RULES; r++) {
+    const me = memoEnd[r];
+    if (me === undefined) continue;
+    const mn = memoNode[r], mx = memoExt[r];
+    // prefix entries whose lookahead may have crossed into the damage die in place
+    // (mx is the advance watermark; reads run up to two tokens past it: the stop
+    // token and the SECOND-token dispatch probe)
+    for (let i = 0; i < p; i++) {
+      if (me[i] !== undefined && mx[i] + 2 > p) { me[i] = undefined; mn[i] = undefined; mx[i] = undefined; }
+    }
+    if (tokenDelta === 0) {
+      for (let i = p; i < dOldEnd; i++) {
+        if (me[i] !== undefined) { me[i] = undefined; mn[i] = undefined; mx[i] = undefined; }
+      }
+      continue;
+    }
+    // token count changed: rebuild the rule's arrays sparsely (measured FASTER than an
+    // in-place direction-aware shift — writing undefined through the holes materializes
+    // them; fresh holey arrays skip that entirely).
+    const nme = new Array(nN + 1), nmn = new Array(nN + 1), nmx = new Array(nN + 1);
+    const pCap = p < nN + 1 ? p : nN + 1;
+    for (let i = 0; i < pCap; i++) {
+      if (me[i] !== undefined) { nme[i] = me[i]; nmn[i] = mn[i]; nmx[i] = mx[i]; }
+    }
+    for (let i = dOldEnd; i <= oN; i++) {
+      if (me[i] !== undefined) {
+        const j = i + tokenDelta;
+        nme[j] = me[i] + tokenDelta; nmn[j] = mn[i]; nmx[j] = mx[i] + tokenDelta;
+      }
+    }
+    memoEnd[r] = nme; memoNode[r] = nmn; memoExt[r] = nmx;
   }
+  const root = runParse(entryRule);
+  lastSrc = source;
+  return root;
 }
 
 export { tokenize };
-export function createParser() { return { parse, tree, visit, toObject, tokenize }; }
+export function createParser() { return { parse, parseEdited, tree, visit, toObject, tokenize }; }
 `);
 }
diff --git a/test/check.ts b/test/check.ts
index 8b2c81e..a0f18e4 100644
--- a/test/check.ts
+++ b/test/check.ts
@@ -21,6 +21,7 @@ const GATES: Gate[] = [
   { group: 'core', name: 'cst-text-invariant', args: ['test/cst-text-invariant.ts'] },
   { group: 'conformance', name: 'ts-ast-structure', args: ['test/ts-ast-verify.ts'] },
   { group: 'core', name: 'cst-match-totality', args: ['test/cst-match-totality.ts'] },
+  { group: 'core', name: 'incremental-verify', args: ['test/incremental-verify.ts'] },
   { group: 'core', name: 'issue-cases', args: ['test/test-issues.ts'] },
   { group: 'conformance', name: 'js', args: ['test/js-conformance.ts'] },
   { group: 'conformance', name: 'tsx', args: ['test/tsx-conformance.ts'] },
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
new file mode 100644
index 0000000..2ed248d
--- /dev/null
+++ b/test/incremental-verify.ts
@@ -0,0 +1,113 @@
+// Gate: INCREMENTAL ≡ FRESH. parseEdited(newSource) must produce a tree byte-identical
+// (via toObject) to a from-scratch parse of the same text, across scripted edit
+// sessions over real files — inserts, deletions, replacements, statement insertions,
+// edits inside strings/comments, and syntax-breaking edits (both sides must reject;
+// the session self-heals on the next good text). Also reports the incremental speedup
+// and the arena growth, so reuse is MEASURED, not assumed.
+//
+//   node test/incremental-verify.ts
+import { existsSync, readFileSync, writeFileSync } from 'node:fs';
+import { emitParser } from '../src/emit-parser.ts';
+
+const grammar = (await import('../typescript.ts')).default;
+const emPath = '/tmp/emitted-incremental.mjs';
+writeFileSync(emPath, emitParser(grammar));
+type Em = {
+  parse(s: string): number;
+  parseEdited(s: string): number;
+  toObject(id: number): unknown;
+};
+const session = (await import(emPath + '?session=' + process.pid)) as Em;
+const fresh = (await import(emPath + '?fresh=' + process.pid)) as Em;
+
+// Deterministic LCG so failures replay.
+let seedState = 0x2F6E2B1;
+const rand = () => ((seedState = (seedState * 48271) % 0x7fffffff) / 0x7fffffff);
+const randInt = (n: number) => Math.floor(rand() * n);
+
+const INSERTS = ['x', '_v', '42', ' + y', '.m', '()', ' /*c*/ ', '"s"', 'await ', '!', '?'];
+const STMTS = ['const q9 = 1;\n', 'function g9(a) { return a; }\n', 'if (x9) { y9(); }\n', '// note\n', 'type T9 = string | number;\n'];
+
+function mutate(text: string): string {
+  switch (randInt(5)) {
+    case 0: { // insert a small fragment at a random position
+      const at = randInt(text.length);
+      return text.slice(0, at) + INSERTS[randInt(INSERTS.length)] + text.slice(at);
+    }
+    case 1: { // delete a small span
+      const at = randInt(Math.max(1, text.length - 8));
+      return text.slice(0, at) + text.slice(at + 1 + randInt(6));
+    }
+    case 2: { // replace a character
+      const at = randInt(Math.max(1, text.length - 1));
+      return text.slice(0, at) + 'z' + text.slice(at + 1);
+    }
+    case 3: { // insert a whole statement at a line boundary
+      const lines = text.split('\n');
+      const at = randInt(lines.length);
+      lines.splice(at, 0, STMTS[randInt(STMTS.length)].trimEnd());
+      return lines.join('\n');
+    }
+    default: { // append at the end (the pure-prefix reuse case)
+      return text + '\n' + STMTS[randInt(STMTS.length)];
+    }
+  }
+}
+
+const FILES = [
+  '/tmp/ts-repo/tests/cases/conformance/parser/ecmascript5/RealWorld/parserharness.ts',
+  '/tmp/ts-repo/tests/cases/conformance/fixSignatureCaching.ts',
+  '/tmp/ts-repo/tests/cases/conformance/parser/ecmascript5/parserRealSource7.ts',
+  '/tmp/ts-repo/tests/cases/conformance/parser/ecmascript5/RealWorld/parserindenter.ts',
+].filter(existsSync);
+const STEPS = 30;
+
+let steps = 0, equal = 0, bothReject = 0, mismatch = 0;
+let tInc = 0, tFresh = 0;
+const failures: string[] = [];
+
+for (const f of FILES) {
+  let text = readFileSync(f, 'utf-8');
+  session.parse(text);   // open the session
+  for (let k = 0; k < STEPS; k++) {
+    const next = mutate(text);
+    steps++;
+    let freshRoot = -1, freshErr: string | null = null;
+    const tf0 = performance.now();
+    try { freshRoot = fresh.parse(next); } catch (e) { freshErr = (e as Error).message; }
+    const tf1 = performance.now();
+    let incRoot = -1, incErr: string | null = null;
+    const ti0 = performance.now();
+    try { incRoot = session.parseEdited(next); } catch (e) { incErr = (e as Error).message; }
+    const ti1 = performance.now();
+    if (freshErr !== null || incErr !== null) {
+      if ((freshErr === null) !== (incErr === null)) {
+        mismatch++;
+        if (failures.length < 5) failures.push(`${f.split('/').pop()} step ${k}: fresh ${freshErr ? 'reject' : 'accept'} / incremental ${incErr ? 'reject' : 'accept'}\n    fresh: ${freshErr ?? '-'}\n    inc:   ${incErr ?? '-'}`);
+      } else bothReject++;
+      // rejected text: do not advance the session text (the session reset itself)
+      continue;
+    }
+    tFresh += tf1 - tf0; tInc += ti1 - ti0;
+    const a = JSON.stringify(fresh.toObject(freshRoot));
+    const b = JSON.stringify(session.toObject(incRoot));
+    if (a === b) equal++;
+    else {
+      mismatch++;
+      if (failures.length < 5) {
+        let i = 0; while (i < a.length && i < b.length && a[i] === b[i]) i++;
+        failures.push(`${f.split('/').pop()} step ${k}: tree diverges @${i}\n    fresh: …${a.slice(Math.max(0, i - 50), i + 50)}…\n    inc:   …${b.slice(Math.max(0, i - 50), i + 50)}…`);
+      }
+    }
+    text = next;
+  }
+}
+
+console.log(`incremental ≡ fresh: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH  (${steps} steps over ${FILES.length} files)`);
+if (tInc > 0) console.log(`time: incremental ${tInc.toFixed(1)}ms vs fresh ${tFresh.toFixed(1)}ms → ${(tFresh / tInc).toFixed(2)}× faster on accepted edits`);
+for (const s of failures) console.log('  ✗ ' + s);
+if (mismatch > 0) {
+  console.error('✗ incremental parse diverges from a fresh parse');
+  process.exit(1);
+}
+console.log('✓ every edited re-parse is byte-identical to a fresh parse');

From 909b835bd14d8eca0b5769745fb23d535fc05ef5 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 03:22:44 +0800
Subject: [PATCH 02/15] Windowed re-lexing: lex O(damage) with depth-recorded
 restart/resync (M1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lexer core is parameterized (lexCore): start anywhere with the previous
token's (k, t) as the regex-context seed and empty template/paren stacks.
Every token records its two stack depths (tkDp/tkPd columns); the restart
anchor is the last token before the damage with both records zero and no
live cross-token flag (a control-head ')' or postfix-ambiguous operator),
walking back to the file head in the worst case — always sound.

The window lexes into the spare buffer set (the old stream stays live), and
RESYNC fires at the first token at/past the damage end that aligns with an
old token (same k/t, spans shifted by the char delta) at EQUAL stack depths
where every still-open bracket was opened BEFORE the damage — the byte-equal
prefix guarantees those stack entries agree, while anything opened inside
the damage may differ in control-head-ness and must not span the join. The
depth-tolerant condition matters: an all-wrapping IIFE (typescript.js) keeps
paren depth >= 1 everywhere, and a depth-0-only resync degraded 9MB edits to
~1.2x; with it they reach 2.6x. The splice is copyWithin + a suffix span
shift; the damage window is derived from a char-level prefix/suffix compare
of the two sources (no edit protocol needed).

The true token prefix is recovered by comparing the window's leading tokens
against the old stream before the splice, so the memo carry keeps everything
the re-lex merely re-derived. Fallback-lexer grammars keep the full-relex
path; tokenize() is unchanged for batch (the lexer-equality gate runs the
full streams).

Numbers: 81KB keystroke 3.5x -> 3.3x parse-side with lex now O(damage);
mixed sessions ~1.5-1.65x; 9MB keystroke 2.6x. Remaining per-edit O(n) is
the M3/M4 bookkeeping (memo prefix scans, arena re-base loops, suffix span
shift) — the green {rel,len} re-base and old-tree cursor adoption kill
those next. 30/30 gates; emit≡interp 18,802 byte-identical; reject messages
and token streams exact.
---
 src/emit-lexer.ts  |  66 ++++++++++++++++++---
 src/emit-parser.ts | 145 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 169 insertions(+), 42 deletions(-)

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 1a41ac6..9dd85d0 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -199,32 +199,70 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`function tokenize(source) {`);
   emit(`  src = source;`);
   emit(`  tokN = 0;`);
+  emit(`  lexCore(source, 0, -1, 0, -1, 0, 0);`);
+  emit(`  return tokN;`);
+  emit(`}`);
+  emit(`// The lexer core, parameterized for WINDOWED re-lexing: start at startPos with`);
+  emit(`// the previous token's (k, t) as the regex-context seed (-1 = none / file start)`);
+  emit(`// and EMPTY template/paren stacks (the caller restarts only at depth-0 safe`);
+  emit(`// points). In window mode (wndPtr0 >= 0) the OLD stream sits in the alt buffers;`);
+  emit(`// after each token pushed at/past wndMinOff, resync fires when it aligns with an`);
+  emit(`// old token (same k/t, offsets shifted by wndDelta, both depth records 0) while`);
+  emit(`// the window's own stacks are empty — returns that OLD index (the duplicate push`);
+  emit(`// is retracted), or -1 when lexing ran to EOF.`);
+  emit(`function lexCore(source, startPos, pvK, pvT, wndPtr0, wndMinOff, wndDelta, wndCs) {`);
   emit(`  const n = source.length;`);
-  emit(`  let pos = 0;`);
+  emit(`  let pos = startPos;`);
   emit(`  let pendingNl = false;`);
   emit(`  let lastBangWasPostfix = false;`);
   emit(`  let lastCloseWasParenHead = false;`);
   emit(`  const templateStack = [];`);
   emit(`  const parenHeadStack = [];`);
+  emit(`  let wndPtr = wndPtr0;`);
+  emit(`  let wndHit = -1;`);
+  emit(`  // stack depths as of the last token fully BEFORE the damage: a resync point may`);
+  emit(`  // sit at any depth as long as every bracket still open there was opened before`);
+  emit(`  // the damage (the prefix agrees byte-for-byte, so those stack entries agree too;`);
+  emit(`  // anything opened inside the damage could differ in control-head-ness).`);
+  emit(`  let dmgDp = -1, dmgPd = -1;`);
+  emit(`  let lastDp = 0, lastPd = 0;`);
   emit(`  function tkPush(k, t, off, end) {`);
   emit(`    if (tokN === tkCap) growTok();`);
   emit(`    tkK[tokN] = k; tkT[tokN] = t; tkOff[tokN] = off; tkEnd[tokN] = end;`);
   emit(`    tkFl[tokN] = pendingNl ? 1 : 0;`);
+  emit(`    tkDp[tokN] = templateStack.length;`);
+  emit(`    tkPd[tokN] = parenHeadStack.length;`);
   emit(`    pendingNl = false;`);
+  emit(`    pvK = k; pvT = t;`);
   emit(`    tokN++;`);
+  emit(`    if (wndPtr >= 0) {`);
+  emit(`      if (dmgPd < 0) {`);
+  emit(`        if (off >= wndCs) { dmgDp = lastDp; dmgPd = lastPd; }`);
+  emit(`        else { lastDp = tkDp[tokN - 1]; lastPd = tkPd[tokN - 1]; }`);
+  emit(`      }`);
+  emit(`      if (off >= wndMinOff && dmgPd >= 0`);
+  emit(`          && templateStack.length <= dmgDp && parenHeadStack.length <= dmgPd) {`);
+  emit(`        while (wndPtr < altN && altOff[wndPtr] + wndDelta < off) wndPtr++;`);
+  emit(`        if (wndPtr < altN && altOff[wndPtr] + wndDelta === off && altK[wndPtr] === k && altT[wndPtr] === t`);
+  emit(`            && altEnd[wndPtr] + wndDelta === end && altDp[wndPtr] === templateStack.length && altPd[wndPtr] === parenHeadStack.length) {`);
+  emit(`          wndHit = wndPtr;`);
+  emit(`        }`);
+  emit(`      }`);
+  emit(`    }`);
   emit(`  }`);
   emit(`  // prevIsValue, baked: postfix-ambiguous op → its recorded position; an expression-`);
   emit(`  // head keyword or a control-head ')' is NOT a value; else division-prev type/text.`);
   emit(`  function prevIsValue() {`);
-  emit(`    if (tokN === 0) return false;`);
-  emit(`    const i = tokN - 1;`);
-  emit(`    const t = tkT[i];`);
+  emit(`    const k = tokN > 0 ? tkK[tokN - 1] : pvK;`);
+  emit(`    if (k < 0) return false;`);
+  emit(`    const t = tokN > 0 ? tkT[tokN - 1] : pvT;`);
   emit(`    if (LX_PFXV[t] !== 0) return lastBangWasPostfix;`);
-  emit(`    if (tkK[i] === ${kIdent} && LX_EXPRKW[t] !== 0) return false;`);
+  emit(`    if (k === ${kIdent} && LX_EXPRKW[t] !== 0) return false;`);
   emit(`    if (t === ${tRParen} && lastCloseWasParenHead) return false;`);
-  emit(`    return LX_DIVK[tkK[i]] !== 0 || LX_DIVT[t] !== 0;`);
+  emit(`    return LX_DIVK[k] !== 0 || LX_DIVT[t] !== 0;`);
   emit(`  }`);
   emit(`  while (pos < n) {`);
+  emit(`    if (wndHit >= 0) { tokN--; return wndHit; }`);
   emit(`    const cc = source.charCodeAt(pos);`);
   emit(`    // whitespace: ASCII \\s run by char loop; a non-ASCII candidate falls back to the regex`);
   emit(`    if (cc === 32 || (cc >= 9 && cc <= 13)) {`);
@@ -461,7 +499,21 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   }
   emit(`    throw new Error("Unexpected character at offset " + pos + ": '" + source[pos] + "'");`);
   emit(`  }`);
-  emit(`  return tokN;`);
+  emit(`  if (wndHit >= 0) { tokN--; return wndHit; }`);
+  emit(`  return -1;`);
+  emit(`}`);
+  emit(`// Windowed-relex restart anchor: the last token B ending at/before the damage`);
+  emit(`// whose recorded stack depths are zero and whose shape leaves no cross-token`);
+  emit(`// lexer flag live (a control-head ')' or a postfix-ambiguous operator would`);
+  emit(`// make the next token's regex-context depend on unrecoverable state). -1 = file`);
+  emit(`// head (always sound, degrades to a full re-lex).`);
+  emit(`function findRestart(cs) {`);
+  emit(`  let lo = 0, hi = tokN;`);
+  emit(`  while (lo < hi) { const mid = (lo + hi) >> 1; if (tkEnd[mid] <= cs) lo = mid + 1; else hi = mid; }`);
+  emit(`  for (let b = lo - 1; b >= 0; b--) {`);
+  emit(`    if (tkDp[b] === 0 && tkPd[b] === 0 && LX_PFXV[tkT[b]] === 0 && !(tkK[b] === 1 && tkT[b] === ${tRParen})) return b;`);
+  emit(`  }`);
+  emit(`  return -1;`);
   emit(`}`);
   return out.join('\n');
 }
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index b7a5194..9a0ddac 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -1329,6 +1329,12 @@ let tkT = new ${T_ARR}(4096);
 let tkOff = new Int32Array(4096);
 let tkEnd = new Int32Array(4096);
 let tkFl = new Uint8Array(4096);
+// lexer-state depth records per token (windowed relex restart/resync safety):
+// tkDp = template-interp stack depth, tkPd = paren-head stack depth, both AS RECORDED
+// at the token's push (the convention per token kind is fixed by the lexer's code
+// path; determinism is what the predicates rely on, depth-0 is the safe state).
+let tkDp = new Uint8Array(4096);
+let tkPd = new Uint16Array(4096);
 let tkCap = 4096;
 let tokN = 0;
 let src = '';
@@ -1340,6 +1346,8 @@ function growTok() {
   const o = new Int32Array(tkCap); o.set(tkOff); tkOff = o;
   const e2 = new Int32Array(tkCap); e2.set(tkEnd); tkEnd = e2;
   const f = new Uint8Array(tkCap); f.set(tkFl); tkFl = f;
+  const d = new Uint8Array(tkCap); d.set(tkDp); tkDp = d;
+  const q = new Uint16Array(tkCap); q.set(tkPd); tkPd = q;
 }
 
 // ── CST arena: nodes are rows in parallel columns; leaves are TOKEN REFERENCES ──
@@ -1489,6 +1497,8 @@ function matchPuLitGT(pu) {
     tkT.copyWithin(pos + 1, pos, tokN);
     tkOff.copyWithin(pos + 1, pos, tokN);
     tkEnd.copyWithin(pos + 1, pos, tokN);
+    tkDp.copyWithin(pos + 1, pos, tokN);
+    tkPd.copyWithin(pos + 1, pos, tokN);
     tkFl.copyWithin(pos + 1, pos, tokN);
     ${e.soa ? '' : "tkText.splice(pos, 1, '>', restText);"}
     tkT[pos] = pu; tkEnd[pos] = off + 1; tkFl[pos] = 0;
@@ -2099,6 +2109,7 @@ ${e.soa ? `  tokenize(source);` : String.raw`  src = source;
     const _t = _toks[_i];
     tkK[_i] = _t.k; tkT[_i] = _t.t; tkOff[_i] = _t.offset; tkEnd[_i] = _t.offset + _t.text.length;
     tkFl[_i] = (_t.newlineBefore ? 1 : 0) | (_t.commentBefore ? 2 : 0) | (_t.multilineFlowBefore ? 4 : 0);
+    tkDp[_i] = 0; tkPd[_i] = 0;
     tkText[_i] = _t.text;
   }
   tokN = _n;`}
@@ -2141,7 +2152,20 @@ function runParse(entryRule) {
 let lastSrc = null;
 // The spare token-column buffer set (parseEdited ping-pongs between the live set and
 // this one, so steady-state edits never allocate columns).
-let altK = null, altT = null, altOff = null, altEnd = null, altFl = null;
+let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
+let altCap = 0;
+let altN = 0;   // old-stream token count while a window lex runs (lexCore's resync bound)
+function swapBuffers() {
+  let x;
+  x = tkK; tkK = altK; altK = x;
+  x = tkT; tkT = altT; altT = x;
+  x = tkOff; tkOff = altOff; altOff = x;
+  x = tkEnd; tkEnd = altEnd; altEnd = x;
+  x = tkFl; tkFl = altFl; altFl = x;
+  x = tkDp; tkDp = altDp; altDp = x;
+  x = tkPd; tkPd = altPd; altPd = x;
+  x = tkCap; tkCap = altCap; altCap = x;
+}
 ${e.soa ? '' : 'let altText = [];'}
 
 export function parse(source, entryRule) {
@@ -2176,46 +2200,106 @@ export function parseEdited(source, entryRule) {
   if (lastSrc === null) return parse(source, entryRule);
   const oSrc = lastSrc;
   lastSrc = null;
-  // Stash the old columns BY REFERENCE and lex into the spare buffer set (ping-pong
-  // double buffer — steady-state edits allocate nothing and keep the pages warm).
+${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
+  // Char-level envelope (cheapest possible without an edit protocol).
+  const oldLen = oSrc.length, newLen = source.length;
+  const minL = oldLen < newLen ? oldLen : newLen;
+  let cs = 0;
+  while (cs < minL && oSrc.charCodeAt(cs) === source.charCodeAt(cs)) cs++;
+  let ce = 0;
+  while (ce < minL - cs && oSrc.charCodeAt(oldLen - 1 - ce) === source.charCodeAt(newLen - 1 - ce)) ce++;
+  const ceOld = oldLen - ce, ceNew = newLen - ce;
+  const charDelta = newLen - oldLen;
+  // Restart anchor: the last token B ending at/before the damage whose recorded
+  // depths are zero and whose shape carries no cross-token lexer flag (')' control-
+  // head, postfix-ambiguous op). B = -1 restarts at the file head — always sound.
+  const B = findRestart(cs);
+  const oN = tokN;
+  // first old token at/after the damage end — the resync search floor
+  let r0 = oN;
+  { let lo = 0, hi = oN;
+    while (lo < hi) { const mid = (lo + hi) >> 1; if (tkOff[mid] < ceOld) lo = mid + 1; else hi = mid; }
+    r0 = lo; }
+  // Lex the window into the spare buffers (the old stream stays live for resync).
+  if (altK === null || altCap < tkCap) {
+    altK = new tkK.constructor(tkCap); altT = new tkT.constructor(tkCap);
+    altOff = new Int32Array(tkCap); altEnd = new Int32Array(tkCap); altFl = new Uint8Array(tkCap);
+    altDp = new Uint8Array(tkCap); altPd = new Uint16Array(tkCap);
+    altCap = tkCap;
+  }
+  altN = oN;
+  swapBuffers();              // live = scratch, alt = OLD stream
+  src = source;
+  tokN = 0;
+  const startOff = B >= 0 ? altEnd[B] : 0;
+  const R0 = lexCore(source, startOff, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs);
+  const W = tokN;
+  const R = R0 >= 0 ? R0 : oN;
+  swapBuffers();              // live = OLD stream again; window sits in the alt buffers
+  tokN = oN;
+  // TRUE token prefix p: the window re-derives [B+1 .. p) byte-identically; only past
+  // p is real damage (compared BEFORE the splice clobbers the old slots).
+  let p = B + 1;
+  { let i = 0;
+    while (i < W && p < R && altK[i] === tkK[p] && altT[i] === tkT[p] && altOff[i] === tkOff[p]
+        && altEnd[i] === tkEnd[p] && altFl[i] === tkFl[p]) { i++; p++; }
+  }
+  const dOldEnd = R;
+  const tokenDelta = (B + 1 + W) - R;
+  const charThresh = R < oN ? tkOff[R] : 0x7fffffff;
+  // ── splice: old[0..B] + window[0..W) + old[R..oN), then shift the suffix spans ──
+  const nN = B + 1 + W + (oN - R);
+  while (tkCap < nN + 1) growTok();
+  tkK.copyWithin(B + 1 + W, R, oN); tkT.copyWithin(B + 1 + W, R, oN);
+  tkOff.copyWithin(B + 1 + W, R, oN); tkEnd.copyWithin(B + 1 + W, R, oN);
+  tkFl.copyWithin(B + 1 + W, R, oN); tkDp.copyWithin(B + 1 + W, R, oN); tkPd.copyWithin(B + 1 + W, R, oN);
+  if (W > 0) {
+    tkK.set(altK.subarray(0, W), B + 1); tkT.set(altT.subarray(0, W), B + 1);
+    tkOff.set(altOff.subarray(0, W), B + 1); tkEnd.set(altEnd.subarray(0, W), B + 1);
+    tkFl.set(altFl.subarray(0, W), B + 1); tkDp.set(altDp.subarray(0, W), B + 1); tkPd.set(altPd.subarray(0, W), B + 1);
+  }
+  if (charDelta !== 0) {
+    for (let i = B + 1 + W; i < nN; i++) { tkOff[i] += charDelta; tkEnd[i] += charDelta; }
+  }
+  tokN = nN;
+  const nN2 = nN;
+  const oN2 = oN;` : String.raw`  // (fallback-lexer grammars keep the full-relex + token-diff path)
   const oK = tkK, oT = tkT, oOff = tkOff, oEnd = tkEnd, oFl = tkFl, oN = tokN;
-${e.soa ? '' : '  const oText = tkText;'}
+  const oText = tkText;
   if (altK === null || altK.length !== tkCap) {
     altK = new tkK.constructor(tkCap); altT = new tkT.constructor(tkCap);
     altOff = new Int32Array(tkCap); altEnd = new Int32Array(tkCap); altFl = new Uint8Array(tkCap);
+    altDp = new Uint8Array(tkCap); altPd = new Uint16Array(tkCap);
   }
   tkK = altK; tkT = altT; tkOff = altOff; tkEnd = altEnd; tkFl = altFl;
-${e.soa ? '' : '  tkText = altText; tkText.length = 0;'}
+  { const _d = tkDp; tkDp = altDp; altDp = _d; const _q = tkPd; tkPd = altPd; altPd = _q; }
+  tkText = altText; tkText.length = 0;
   altK = oK; altT = oT; altOff = oOff; altEnd = oEnd; altFl = oFl;
-${e.soa ? '' : '  altText = oText;'}
+  altText = oText;
   lexInto(source);
-  if (tkCap !== oK.length) {
-    // the new lex outgrew the buffer (growTok reallocated): drop the stale spare
-    altK = null;
-  }
   const nN = tokN;
   const charDelta = source.length - oSrc.length;
   const minN = oN < nN ? oN : nN;
-  // Longest identical prefix (positions included — the prefix is unshifted).
   let p = 0;
   while (p < minN && oK[p] === tkK[p] && oT[p] === tkT[p] && oFl[p] === tkFl[p]
-      && oOff[p] === tkOff[p] && oEnd[p] === tkEnd[p]${e.soa ? '' : ' && oText[p] === tkText[p]'}) p++;
-  // Longest identical suffix modulo charDelta (disjoint from the prefix).
+      && oOff[p] === tkOff[p] && oEnd[p] === tkEnd[p] && oText[p] === tkText[p]) p++;
   let s = 0;
   while (s < minN - p) {
     const i = oN - 1 - s, j = nN - 1 - s;
     if (oK[i] === tkK[j] && oT[i] === tkT[j] && oFl[i] === tkFl[j]
-      && oOff[i] + charDelta === tkOff[j] && oEnd[i] + charDelta === tkEnd[j]${e.soa ? '' : ' && oText[i] === tkText[j]'}) s++;
+      && oOff[i] + charDelta === tkOff[j] && oEnd[i] + charDelta === tkEnd[j] && oText[i] === tkText[j]) s++;
     else break;
   }
-  const dOldEnd = oN - s;          // damaged OLD tokens: [p, dOldEnd)
+  const dOldEnd = oN - s;
   const tokenDelta = nN - oN;
-  // Re-base the old arena in place: rows starting at/after the first suffix token's OLD
-  // offset shift by charDelta; reused leaf entries past the damage shift by tokenDelta.
-  // (A reusable subtree lies entirely on one side of the damage, so the start-threshold
-  // classifies it correctly; damage-spanning rows are garbage either way.)
-  if (s > 0 && (charDelta !== 0 || tokenDelta !== 0)) {
-    const charThresh = oOff[dOldEnd];
+  const charThresh = s > 0 ? oOff[dOldEnd] : 0x7fffffff;
+  const nN2 = nN;
+  const oN2 = oN;`}
+  // Re-base the old arena in place: rows starting at/after the first kept-suffix
+  // token's OLD offset shift by charDelta; reused leaf entries past the damage shift
+  // by tokenDelta. (A reusable subtree lies entirely on one side of the damage; rows
+  // spanning it are unreachable garbage either way.)
+  if (dOldEnd < oN2 && (charDelta !== 0 || tokenDelta !== 0)) {
     if (charDelta !== 0) {
       for (let i = 0; i < nodeN; i++) if (rowOff[i] >= charThresh) rowOff[i] += charDelta;
     }
@@ -2227,18 +2311,12 @@ ${e.soa ? '' : '  altText = oText;'}
       }
     }
   }
-  // Carry the memo across: prefix entries whose lookahead never reached the damage stay
-  // at their index; suffix entries move by tokenDelta (ids reference the re-based rows).
-  // tokenDelta === 0 (the common keystroke: editing within a token) mutates IN PLACE —
-  // no per-rule array allocation; only the damage window and the prefix entries whose
-  // extent crossed into it are cleared.
+  // Carry the memo across: prefix entries whose lookahead never reached the damage
+  // stay; suffix entries shift by tokenDelta; the damage window drops.
   for (let r = 0; r < MEMO_RULES; r++) {
     const me = memoEnd[r];
     if (me === undefined) continue;
     const mn = memoNode[r], mx = memoExt[r];
-    // prefix entries whose lookahead may have crossed into the damage die in place
-    // (mx is the advance watermark; reads run up to two tokens past it: the stop
-    // token and the SECOND-token dispatch probe)
     for (let i = 0; i < p; i++) {
       if (me[i] !== undefined && mx[i] + 2 > p) { me[i] = undefined; mn[i] = undefined; mx[i] = undefined; }
     }
@@ -2248,15 +2326,12 @@ ${e.soa ? '' : '  altText = oText;'}
       }
       continue;
     }
-    // token count changed: rebuild the rule's arrays sparsely (measured FASTER than an
-    // in-place direction-aware shift — writing undefined through the holes materializes
-    // them; fresh holey arrays skip that entirely).
-    const nme = new Array(nN + 1), nmn = new Array(nN + 1), nmx = new Array(nN + 1);
-    const pCap = p < nN + 1 ? p : nN + 1;
+    const nme = new Array(nN2 + 1), nmn = new Array(nN2 + 1), nmx = new Array(nN2 + 1);
+    const pCap = p < nN2 + 1 ? p : nN2 + 1;
     for (let i = 0; i < pCap; i++) {
       if (me[i] !== undefined) { nme[i] = me[i]; nmn[i] = mn[i]; nmx[i] = mx[i]; }
     }
-    for (let i = dOldEnd; i <= oN; i++) {
+    for (let i = dOldEnd; i <= oN2; i++) {
       if (me[i] !== undefined) {
         const j = i + tokenDelta;
         nme[j] = me[i] + tokenDelta; nmn[j] = mn[i]; nmx[j] = mx[i] + tokenDelta;

From 1c773ca1fc4674da0f354aa05bfecef51a5d7615 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 03:39:35 +0800
Subject: [PATCH 03/15] Depth-tolerant relex restart: reconstruct the live
 paren stack at the anchor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The restart anchor no longer requires paren depth zero — inside an
all-wrapping IIFE (typescript.js's 8.9MB bundle) no interior token has it,
so the anchor fell back to the file head and the window re-lexed half the
file. A '(' token now records its control-head-ness as tkFl bit 8, and
reconstructParens rebuilds the live stack enclosing the anchor by a backward
scan: the first '(' recording exactly depth d is the live opener of level d
(closed openers at that depth are re-opened later, and the re-opener comes
first backward). The anchor still requires template depth zero (interp brace
counters are not reconstructable) and additionally must not be a control
KEYWORD — a '(' lexed first in the window would mis-derive its head-ness
from a missing predecessor.

Two boundary bugs found by measurement on the way: lastDp/lastPd (the
"depth before the damage" baseline that resync compares against) must
initialize from the ANCHOR's depths, not zero — with the anchor adjacent to
the edit there are no pre-damage pushes to set them, the baseline froze at
zero, resync never fired inside the IIFE and the window ran to EOF (306ms
edits, worse than the depth-0 restart it replaced); and tokenize() must keep
returning tokN (the lexer-equality gate consumes it).

incremental ≡ fresh 0/120 mismatches; lexer streams, reject messages, and
the 18,805-file byte-identical gate all green; 30/30. 9MB keystroke edits
land at ~120-160ms (machine-thermal band), now dominated by the named
O(n) bookkeeping (memo prefix scans ~9M iterations/edit, arena re-base,
suffix span shift, char-diff scans) — the green {rel,len} + cursor-adoption
milestones' targets.
---
 src/emit-lexer.ts  | 35 +++++++++++++++++++++++++++++------
 src/emit-parser.ts |  3 ++-
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 9dd85d0..738b529 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -210,14 +210,15 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// old token (same k/t, offsets shifted by wndDelta, both depth records 0) while`);
   emit(`// the window's own stacks are empty — returns that OLD index (the duplicate push`);
   emit(`// is retracted), or -1 when lexing ran to EOF.`);
-  emit(`function lexCore(source, startPos, pvK, pvT, wndPtr0, wndMinOff, wndDelta, wndCs) {`);
+  emit(`function lexCore(source, startPos, pvK, pvT, wndPtr0, wndMinOff, wndDelta, wndCs, initParens) {`);
   emit(`  const n = source.length;`);
   emit(`  let pos = startPos;`);
   emit(`  let pendingNl = false;`);
+  emit(`  let extraFl = 0;`);
   emit(`  let lastBangWasPostfix = false;`);
   emit(`  let lastCloseWasParenHead = false;`);
   emit(`  const templateStack = [];`);
-  emit(`  const parenHeadStack = [];`);
+  emit(`  const parenHeadStack = initParens !== undefined && initParens !== null ? initParens : [];`);
   emit(`  let wndPtr = wndPtr0;`);
   emit(`  let wndHit = -1;`);
   emit(`  // stack depths as of the last token fully BEFORE the damage: a resync point may`);
@@ -225,11 +226,12 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  // the damage (the prefix agrees byte-for-byte, so those stack entries agree too;`);
   emit(`  // anything opened inside the damage could differ in control-head-ness).`);
   emit(`  let dmgDp = -1, dmgPd = -1;`);
-  emit(`  let lastDp = 0, lastPd = 0;`);
+  emit(`  let lastDp = templateStack.length, lastPd = parenHeadStack.length;`);
   emit(`  function tkPush(k, t, off, end) {`);
   emit(`    if (tokN === tkCap) growTok();`);
   emit(`    tkK[tokN] = k; tkT[tokN] = t; tkOff[tokN] = off; tkEnd[tokN] = end;`);
-  emit(`    tkFl[tokN] = pendingNl ? 1 : 0;`);
+  emit(`    tkFl[tokN] = (pendingNl ? 1 : 0) | extraFl;`);
+  emit(`    extraFl = 0;`);
   emit(`    tkDp[tokN] = templateStack.length;`);
   emit(`    tkPd[tokN] = parenHeadStack.length;`);
   emit(`    pendingNl = false;`);
@@ -372,7 +374,9 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
     // Chars 1..len-1 already known to match when this leaf is reached via the chain below.
     if (lit === '(') {
       emit(`${ind}{ const isMemberName = tokN >= 2 && LX_MEMBER[tkT[tokN - 2]] !== 0;`);
-      emit(`${ind}  parenHeadStack.push(!isMemberName && tokN >= 1 && tkK[tokN - 1] === ${kIdent} && LX_PARENKW[tkT[tokN - 1]] !== 0); }`);
+      emit(`${ind}  const _ph = !isMemberName && tokN >= 1 && tkK[tokN - 1] === ${kIdent} && LX_PARENKW[tkT[tokN - 1]] !== 0;`);
+      emit(`${ind}  parenHeadStack.push(_ph);`);
+      emit(`${ind}  extraFl = _ph ? 8 : 0; }`);
     } else if (lit === ')') {
       emit(`${ind}lastCloseWasParenHead = parenHeadStack.pop() ?? false;`);
     }
@@ -511,10 +515,29 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  let lo = 0, hi = tokN;`);
   emit(`  while (lo < hi) { const mid = (lo + hi) >> 1; if (tkEnd[mid] <= cs) lo = mid + 1; else hi = mid; }`);
   emit(`  for (let b = lo - 1; b >= 0; b--) {`);
-  emit(`    if (tkDp[b] === 0 && tkPd[b] === 0 && LX_PFXV[tkT[b]] === 0 && !(tkK[b] === 1 && tkT[b] === ${tRParen})) return b;`);
+  emit(`    // template depth must be zero (interp brace counters are not reconstructable),`);
+  emit(`    // and the anchor token must leave no cross-token lexer flag live: not a`);
+  emit(`    // control-head ')', not a postfix-ambiguous op, and not a control KEYWORD`);
+  emit(`    // (a '(' lexed first in the window would mis-derive its head-ness from a`);
+  emit(`    // missing predecessor). Paren depth may be anything — the live stack is`);
+  emit(`    // reconstructed from the recorded depths and the '(' head bits.`);
+  emit(`    if (tkDp[b] === 0 && LX_PFXV[tkT[b]] === 0 && LX_PARENKW[tkT[b]] === 0 && !(tkK[b] === 1 && tkT[b] === ${tRParen})) return b;`);
   emit(`  }`);
   emit(`  return -1;`);
   emit(`}`);
+  emit(`// Rebuild the live paren-head stack enclosing token b: scanning backward, the`);
+  emit(`// first '(' recording exactly depth d is the live opener of level d (closed`);
+  emit(`// openers at that depth are re-opened later, and the re-opener comes first`);
+  emit(`// backward). The '(' records its depth INCLUDING itself, and carries its`);
+  emit(`// control-head-ness as tkFl bit 8.`);
+  emit(`function reconstructParens(b) {`);
+  emit(`  let need = b >= 0 ? tkPd[b] : 0;`);
+  emit(`  const out = new Array(need);`);
+  emit(`  for (let i = b; i >= 0 && need > 0; i--) {`);
+  emit(`    if (tkK[i] === 1 && tkT[i] === ${tOf('(')} && tkPd[i] === need) { out[need - 1] = (tkFl[i] & 8) !== 0; need--; }`);
+  emit(`  }`);
+  emit(`  return out;`);
+  emit(`}`);
   return out.join('\n');
 }
 
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 9a0ddac..993f6de 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2214,6 +2214,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // depths are zero and whose shape carries no cross-token lexer flag (')' control-
   // head, postfix-ambiguous op). B = -1 restarts at the file head — always sound.
   const B = findRestart(cs);
+  const initParens = B >= 0 ? reconstructParens(B) : [];
   const oN = tokN;
   // first old token at/after the damage end — the resync search floor
   let r0 = oN;
@@ -2232,7 +2233,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   src = source;
   tokN = 0;
   const startOff = B >= 0 ? altEnd[B] : 0;
-  const R0 = lexCore(source, startOff, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs);
+  const R0 = lexCore(source, startOff, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs, initParens);
   const W = tokN;
   const R = R0 >= 0 ? R0 : oN;
   swapBuffers();              // live = OLD stream again; window sits in the alt buffers

From 7d7fbc5a5011e01fbac3b1fb4b3cb37051836127 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 03:55:11 +0800
Subject: [PATCH 04/15] Green tree (M3): relative coordinates on parent edges;
 re-base loops vanish
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nodes no longer store absolute positions. A row owns only its LENGTHS (rowLen
chars, rowTokLen tokens); a node child's relative coordinates (kidRel chars,
kidTokRel tokens, both against the parent's start) live in the PARENT's kids
stream, parallel to the entries — NOT on the child row: a memo-reused subtree
can be a child of several longest-match CANDIDATES, and a losing candidate
completing after the winner would clobber child-side rel fields (928 corpus
mismatches before the edge-ownership fix). Leaf entries are node-relative
token indices. The red layer is a descent: visit/toObject thread (charBase,
tokBase); leaf spans come from the token columns at tokBase + rel.

Build stays absolute in TRANSIENT per-row coordinates (absChar/absTok),
written at finishNode/finishWrap, read by the enclosing parent, never part of
the green tree. A memo HIT refreshes the reused root's transients to the
current stream in O(1) — which is the whole point:

- the arena re-base loops (rowOff O(nodes), kids O(kids) per edit) are GONE;
- the '>'-splice kids/scratch fixup is GONE (completed rows lie wholly before
  the splice point; the carried memo is cleared);
- a reused subtree needs zero rewriting at any depth.

Matchers thread one tokBase parameter (leaf spans come from the token
columns, so they never need charBase); the totality gate's visit supplies it.
The ts-ast lowering moves to the INTERPRETER oracle through a new object-tree
TreeAccess adapter (test/obj-tree.ts, absolute coordinates, tokBase ignored)
— the grammar↔tsc structure contract is engine-independent, and the lowering
needed zero semantic changes.

18,802/18,805 emit ≡ interp byte-identical (toObject reproduces absolute
objects exactly); reject messages exact; incremental ≡ fresh 0/120 with the
mixed session at 1.69× (best yet); totality 32,167 nodes / 0 misses; 30/30.
9MB keystrokes ~148ms — now dominated by the memo prefix scans, the suffix
span shift, and the char-diff scans (the cursor-adoption and chunked-column
milestones' targets).
---
 src/emit-parser.ts         | 196 ++++++++++++++++++++++++-------------
 src/gen-cst-match.ts       |  51 +++++-----
 test/cst-match-totality.ts |  14 +--
 test/obj-tree.ts           |  78 +++++++++++++++
 test/ts-ast-lowering.ts    |   9 +-
 test/ts-ast-verify.ts      |  23 ++---
 6 files changed, 251 insertions(+), 120 deletions(-)
 create mode 100644 test/obj-tree.ts

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 993f6de..b0446e4 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -1360,13 +1360,24 @@ function growTok() {
 // Rows store ABSOLUTE offsets in this phase (the green {rel,len} re-base is the
 // incremental round's move; flipping the stored form regenerates matchers only).
 let rowRule = new Uint16Array(8192);    // rule id (index into RULE_NAMES)
-let rowOff = new Int32Array(8192);      // absolute start offset
 let rowLen = new Int32Array(8192);
+let rowTokLen = new Int32Array(8192);   // subtree token count
 let rowStart = new Int32Array(8192);    // first index into kids
 let rowCount = new Int32Array(8192);
+// transient BUILD coordinates (absolute), valid for rows completed in the current
+// parse and REFRESHED at memo-hit time for reused roots — parents read them at
+// finishNode to write the children's relative fields; never part of the green tree.
+let absChar = new Int32Array(8192);
+let absTok = new Int32Array(8192);
 let rowCap = 8192;
 let nodeN = 0;
 let kids = new Int32Array(16384);
+// A node child's RELATIVE coordinates live in the PARENT's kids stream (parallel to
+// kids), not on the child row: a memo-reused subtree can be a child of several
+// longest-match CANDIDATES, and a losing candidate completing after the winner would
+// clobber child-side rel fields. The parent owns its edges; rows own only lengths.
+let kidRel = new Int32Array(16384);
+let kidTokRel = new Int32Array(16384);
 let kidCap = 16384;
 let kidN = 0;
 // Scratch: completed-but-unattached children of in-progress arms. Every
@@ -1377,21 +1388,27 @@ let scn = 0;
 function growRows() {
   rowCap *= 2;
   const r = new Uint16Array(rowCap); r.set(rowRule); rowRule = r;
-  const o = new Int32Array(rowCap); o.set(rowOff); rowOff = o;
   const l = new Int32Array(rowCap); l.set(rowLen); rowLen = l;
+  const tl = new Int32Array(rowCap); tl.set(rowTokLen); rowTokLen = tl;
   const s = new Int32Array(rowCap); s.set(rowStart); rowStart = s;
   const c = new Int32Array(rowCap); c.set(rowCount); rowCount = c;
+  const ac = new Int32Array(rowCap); ac.set(absChar); absChar = ac;
+  const at = new Int32Array(rowCap); at.set(absTok); absTok = at;
 }
 function growKids(n) {
   while (kidN + n > kidCap) kidCap *= 2;
   const k = new Int32Array(kidCap); k.set(kids.subarray(0, kidN)); kids = k;
+  const r = new Int32Array(kidCap); r.set(kidRel.subarray(0, kidN)); kidRel = r;
+  const t = new Int32Array(kidCap); t.set(kidTokRel.subarray(0, kidN)); kidTokRel = t;
 }
 function scPush(e) {
   if (scn === scCap) { scCap *= 2; const s = new Int32Array(scCap); s.set(sc); sc = s; }
   sc[scn++] = e;
 }
-function entryOff(e) { return e >= 0 ? rowOff[e] : tkOff[(~e) >>> 2]; }
-function entryEnd(e) { return e >= 0 ? rowOff[e] + rowLen[e] : tkEnd[(~e) >>> 2]; }
+function entryOff(e) { return e >= 0 ? absChar[e] : tkOff[(~e) >>> 2]; }
+function entryEnd(e) { return e >= 0 ? absChar[e] + rowLen[e] : tkEnd[(~e) >>> 2]; }
+function entryTok(e) { return e >= 0 ? absTok[e] : (~e) >>> 2; }
+function entryTokEnd(e) { return e >= 0 ? absTok[e] + rowTokLen[e] : ((~e) >>> 2) + 1; }
 // Complete a node whose children are scratch[mark..scn): copy them into kids, write
 // the row, truncate scratch, return the id. Empty children = a zero-width node
 // at the current token (the old offset() rule).
@@ -1399,20 +1416,37 @@ function finishNode(rid, mark) {
   const n = scn - mark;
   if (nodeN === rowCap) growRows();
   const id = nodeN++;
-  let myOff, myEnd;
+  let myOff, myEnd, myTok, myTokEnd;
   if (n > 0) {
     if (kidN + n > kidCap) growKids(n);
     const ks = kidN;
-    for (let i = 0; i < n; i++) kids[ks + i] = sc[mark + i];
-    kidN += n;
-    rowStart[id] = ks;
     myOff = entryOff(sc[mark]);
     myEnd = entryEnd(sc[scn - 1]);
+    myTok = entryTok(sc[mark]);
+    myTokEnd = entryTokEnd(sc[scn - 1]);
+    // GREEN conversion: scratch entries carry ABSOLUTE coordinates; the kids span is
+    // written position-independent — a leaf becomes node-relative-token-encoded, a
+    // child node gets its rel fields written here (its own row knows only lengths).
+    for (let i = 0; i < n; i++) {
+      const e = sc[mark + i];
+      if (e < 0) {
+        kids[ks + i] = ~(((((~e) >>> 2) - myTok) << 2) | ((~e) & 3));
+      } else {
+        kids[ks + i] = e;
+        kidRel[ks + i] = absChar[e] - myOff;
+        kidTokRel[ks + i] = absTok[e] - myTok;
+      }
+    }
+    kidN += n;
+    rowStart[id] = ks;
   } else {
     rowStart[id] = kidN;
     myOff = offset(); myEnd = myOff;
+    myTok = pos; myTokEnd = pos;
   }
-  rowRule[id] = rid; rowOff[id] = myOff; rowLen[id] = myEnd - myOff; rowCount[id] = n;
+  rowRule[id] = rid; rowLen[id] = myEnd - myOff; rowCount[id] = n;
+  rowTokLen[id] = myTokEnd - myTok;
+  absChar[id] = myOff; absTok[id] = myTok;
   scn = mark;
   return id;
 }
@@ -1423,13 +1457,28 @@ function finishWrap(rid, lhsId, mark) {
   const id = nodeN++;
   if (kidN + n + 1 > kidCap) growKids(n + 1);
   const ks = kidN;
+  const myOff = absChar[lhsId];
+  const myTok = absTok[lhsId];
+  const myEnd = n > 0 ? entryEnd(sc[scn - 1]) : myOff + rowLen[lhsId];
+  const myTokEnd = n > 0 ? entryTokEnd(sc[scn - 1]) : myTok + rowTokLen[lhsId];
   kids[ks] = lhsId;
-  for (let i = 0; i < n; i++) kids[ks + 1 + i] = sc[mark + i];
+  kidRel[ks] = 0;
+  kidTokRel[ks] = 0;
+  for (let i = 0; i < n; i++) {
+    const e = sc[mark + i];
+    if (e < 0) {
+      kids[ks + 1 + i] = ~(((((~e) >>> 2) - myTok) << 2) | ((~e) & 3));
+    } else {
+      kids[ks + 1 + i] = e;
+      kidRel[ks + 1 + i] = absChar[e] - myOff;
+      kidTokRel[ks + 1 + i] = absTok[e] - myTok;
+    }
+  }
   kidN += n + 1;
-  const myOff = rowOff[lhsId];
-  const myEnd = n > 0 ? entryEnd(sc[scn - 1]) : rowOff[lhsId] + rowLen[lhsId];
-  rowRule[id] = rid; rowOff[id] = myOff; rowLen[id] = myEnd - myOff;
+  rowRule[id] = rid; rowLen[id] = myEnd - myOff;
   rowStart[id] = ks; rowCount[id] = n + 1;
+  rowTokLen[id] = myTokEnd - myTok;
+  absChar[id] = myOff; absTok[id] = myTok;
   scn = mark;
   return id;
 }
@@ -1510,18 +1559,9 @@ function matchPuLitGT(pu) {
     memoNode.fill(undefined);
     memoEnd.fill(undefined);
     memoExt.fill(undefined);
-    // Leaf entries reference tokens BY INDEX, so the splice's +1 shift must be applied
-    // to every committed/scratch entry past the split point. (Object trees were immune —
-    // leaves copied their spans; the arena trades that copy for this rare O(kidN) pass.
-    // Entries AT pos can't exist: that token is being consumed right now.)
-    for (let i = 0; i < kidN; i++) {
-      const ke = kids[i];
-      if (ke < 0 && ((~ke) >>> 2) > pos) kids[i] = ke - 4;
-    }
-    for (let i = 0; i < scn; i++) {
-      const se = sc[i];
-      if (se < 0 && ((~se) >>> 2) > pos) sc[i] = se - 4;
-    }
+    // GREEN tree: no kids/scratch fixup — every completed row and scratch entry lies
+    // wholly BEFORE the splice point (token pos is being consumed right now), and the
+    // carried memo was just cleared, so nothing reachable references shifted indices.
     scPush(~(pos << 2));
     if (++pos > maxPos) maxPos = pos;
     return true;
@@ -1821,7 +1861,7 @@ function emitPrattRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl
   e.emit(`        if (NOUNARY_T[tkT[pos]] !== 0 && rowCount[lhs] > 0) {`);
   e.emit(`          const _h = kids[rowStart[lhs]];`);
   e.emit(`          if (_h < 0 && ((~_h) & 3) === 2) {`);
-  e.emit(`            const _ht = (~_h) >>> 2;`);
+  e.emit(`            const _ht = absTok[lhs] + ((~_h) >>> 2);`);
   e.emit(`            const _htext = ${e.soa ? 'src.slice(tkOff[_ht], tkEnd[_ht])' : 'tkText[_ht]'};`);
   e.emit(`            if (prefixOps.has(_htext) && !postfixOpValues.has(_htext)) { return -1; }`);
   e.emit(`          }`);
@@ -1972,7 +2012,15 @@ function parseRuleEntry(idx, name, core) {
       const ex = mx[start];
       if (ex > maxPos) maxPos = ex;
       const id = mn[start];
-      if (id >= 0) { scPush(id); return true; }
+      if (id >= 0) {
+        // refresh the reused root's transient BUILD coordinates to the current stream
+        // (its green internals are position-independent; only the attachment point —
+        // what the enclosing finishNode reads — must be current).
+        absTok[id] = start;
+        absChar[id] = tkOff[start];
+        scPush(id);
+        return true;
+      }
       return false;
     }
   }
@@ -2036,8 +2084,8 @@ export function getText(node, source) {
 // The arena IS the tree: parse() returns the root node id and consumers traverse
 // via visit()/the accessors — nothing is materialized on the parse path. All views
 // are valid until the NEXT parse (the columns are reused).
-function leafTokenType(entry) {
-  const tok = (~entry) >>> 2;
+function leafTokenType(entry, tokBase) {
+  const tok = tokBase + ((~entry) >>> 2);
   const kind = (~entry) & 3;
   return kind === 1 ? '$keyword'
     : kind === 2 ? '$operator'
@@ -2046,11 +2094,21 @@ function leafTokenType(entry) {
 }
 // Raw arena accessors. An ENTRY is a node id (>= 0) or a leaf (< 0, token-encoded);
 // offsetOf/endOf/textOf accept either.
+// GREEN accessors: positions are RELATIVE — a node knows (rel, len) against its
+// parent and (tokRel, tokLen) in tokens; consumers descend with (charBase, tokBase)
+// — the node's own absolute start coordinates. Leaf spans come from the token
+// columns at tokBase + the entry's node-relative token index.
 export const tree = {
   ruleNameOf: (id) => RULE_NAMES[rowRule[id]],
   ruleIdOf: (id) => rowRule[id],
-  offsetOf: (entry) => entry >= 0 ? rowOff[entry] : tkOff[(~entry) >>> 2],
-  endOf: (entry) => entry >= 0 ? rowOff[entry] + rowLen[entry] : tkEnd[(~entry) >>> 2],
+  lenOf: (id) => rowLen[id],
+  tokLenOf: (id) => rowTokLen[id],
+  // a node CHILD's relative coordinates live on the parent edge (kids-parallel)
+  childRelAt: (id, i) => kidRel[rowStart[id] + i],
+  childTokRelAt: (id, i) => kidTokRel[rowStart[id] + i],
+  // base-threaded spans: nodes from their bases, leaves from the token columns
+  offsetOf: (entry, charBase, tokBase) => entry >= 0 ? charBase : tkOff[tokBase + ((~entry) >>> 2)],
+  endOf: (entry, charBase, tokBase) => entry >= 0 ? charBase + rowLen[entry] : tkEnd[tokBase + ((~entry) >>> 2)],
   childCount: (id) => rowCount[id],
   childAt: (id, i) => kids[rowStart[id] + i],
   // Bulk child load into a caller-owned array; returns the count. One call per node
@@ -2062,40 +2120,51 @@ export const tree = {
     return n2;
   },
   isLeaf: (entry) => entry < 0,
-  leafToken: (entry) => (~entry) >>> 2,
+  leafToken: (entry, tokBase) => tokBase + ((~entry) >>> 2),
   leafTokenType,
   // Int-world leaf accessors (the match-path encoding): kind bits — 0 type-derived,
   // 1 '$keyword', 2 '$operator' — and the token's TYPE kind int (1 = punctuation).
   leafKindOf: (entry) => (~entry) & 3,
-  leafTokKindOf: (entry) => tkK[(~entry) >>> 2],
-  textOf: (entry, source) => entry >= 0
-    ? source.slice(rowOff[entry], rowOff[entry] + rowLen[entry])
-    : source.slice(tkOff[(~entry) >>> 2], tkEnd[(~entry) >>> 2]),
+  leafTokKindOf: (entry, tokBase) => tkK[tokBase + ((~entry) >>> 2)],
+  leafOffsetOf: (entry, tokBase) => tkOff[tokBase + ((~entry) >>> 2)],
+  leafEndOf: (entry, tokBase) => tkEnd[tokBase + ((~entry) >>> 2)],
+  textOf: (entry, source, charBase, tokBase) => entry >= 0
+    ? source.slice(charBase, charBase + rowLen[entry])
+    : source.slice(tkOff[tokBase + ((~entry) >>> 2)], tkEnd[tokBase + ((~entry) >>> 2)]),
 };
 // Depth-first traversal from a node id or leaf entry:
 //   enter(id)         — each NODE before its children; return false to skip its subtree
 //   leave(id)         — each node after its children
 //   leaf(entry, tok)  — each leaf (tok = its token index)
-export function visit(entry, fns) {
-  if (entry < 0) { if (fns.leaf) fns.leaf(entry, (~entry) >>> 2); return; }
-  if (fns.enter && fns.enter(entry) === false) return;
+// Depth-first traversal threading the RED coordinates: enter/leave receive the
+// node's absolute (charBase, tokBase); leaf receives its absolute token index.
+// Call with the root only — the bases default from the root's rel fields.
+export function visit(entry, fns, charBase, tokBase) {
+  if (charBase === undefined) { charBase = rootCharBase; tokBase = rootTokBase; }
+  if (entry < 0) { if (fns.leaf) fns.leaf(entry, tokBase + ((~entry) >>> 2)); return; }
+  if (fns.enter && fns.enter(entry, charBase, tokBase) === false) return;
   const n = rowCount[entry];
   const cs = rowStart[entry];
-  for (let i = 0; i < n; i++) visit(kids[cs + i], fns);
-  if (fns.leave) fns.leave(entry);
+  for (let i = 0; i < n; i++) {
+    const e = kids[cs + i];
+    if (e < 0) { if (fns.leaf) fns.leaf(e, tokBase + ((~e) >>> 2)); }
+    else visit(e, fns, charBase + kidRel[cs + i], tokBase + kidTokRel[cs + i]);
+  }
+  if (fns.leave) fns.leave(entry, charBase, tokBase);
 }
 // Materialize the classic object CST from a node id — a BRIDGE for tests/debugging
 // (the byte-identical gate against the interpreter), not a parse-path product.
-export function toObject(id) {
+export function toObject(id, charBase, tokBase) {
+  if (charBase === undefined) { charBase = rootCharBase; tokBase = rootTokBase; }
   const n = rowCount[id];
   const cs = rowStart[id];
   const children = new Array(n);
   for (let i = 0; i < n; i++) {
     const entry = kids[cs + i];
-    children[i] = entry >= 0 ? toObject(entry)
-      : { tokenType: leafTokenType(entry), offset: tkOff[(~entry) >>> 2], end: tkEnd[(~entry) >>> 2] };
+    children[i] = entry >= 0 ? toObject(entry, charBase + kidRel[cs + i], tokBase + kidTokRel[cs + i])
+      : { tokenType: leafTokenType(entry, tokBase), offset: tkOff[tokBase + ((~entry) >>> 2)], end: tkEnd[tokBase + ((~entry) >>> 2)] };
   }
-  return { rule: RULE_NAMES[rowRule[id]], children, offset: rowOff[id], end: rowOff[id] + rowLen[id] };
+  return { rule: RULE_NAMES[rowRule[id]], children, offset: charBase, end: charBase + rowLen[id] };
 }
 
 // Parse to the ARENA: returns the root node id.
@@ -2134,7 +2203,9 @@ function runParse(entryRule) {
   const entry = entryRule ?? ENTRY;
   if (tokN === 0) {
     const rid = RULE_NAMES.indexOf(entry);
-    return finishNode(rid < 0 ? 0 : rid, scn);
+    const er = finishNode(rid < 0 ? 0 : rid, scn);
+    rootCharBase = absChar[er]; rootTokBase = absTok[er];
+    return er;
   }
   if (!RULES[entry]()) {
     const hasTok = pos < cap;
@@ -2143,13 +2214,18 @@ function runParse(entryRule) {
   if (pos < tokN) {
     throw new Error('Parse error at offset ' + tkOff[pos] + ": unexpected '" + tokTextAt(pos) + "' after successful parse" + farthest(pos));
   }
-  return sc[--scn];
+  const rootId = sc[--scn];
+  rootCharBase = absChar[rootId]; rootTokBase = absTok[rootId];
+  return rootId;
 }
 
 // Source of the last COMPLETED parse — the token columns, arena and memo describe it.
 // null whenever the module state is not a coherent snapshot (no parse yet, or the last
 // attempt threw), so parseEdited falls back to a full parse.
 let lastSrc = null;
+// the LAST parse root's absolute coordinates (the descent origin — see visit/toObject)
+let rootCharBase = 0;
+let rootTokBase = 0;
 // The spare token-column buffer set (parseEdited ping-pongs between the live set and
 // this one, so steady-state edits never allocate columns).
 let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
@@ -2247,7 +2323,6 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   }
   const dOldEnd = R;
   const tokenDelta = (B + 1 + W) - R;
-  const charThresh = R < oN ? tkOff[R] : 0x7fffffff;
   // ── splice: old[0..B] + window[0..W) + old[R..oN), then shift the suffix spans ──
   const nN = B + 1 + W + (oN - R);
   while (tkCap < nN + 1) growTok();
@@ -2263,8 +2338,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     for (let i = B + 1 + W; i < nN; i++) { tkOff[i] += charDelta; tkEnd[i] += charDelta; }
   }
   tokN = nN;
-  const nN2 = nN;
-  const oN2 = oN;` : String.raw`  // (fallback-lexer grammars keep the full-relex + token-diff path)
+  const nN2 = nN;` : String.raw`  // (fallback-lexer grammars keep the full-relex + token-diff path)
   const oK = tkK, oT = tkT, oOff = tkOff, oEnd = tkEnd, oFl = tkFl, oN = tokN;
   const oText = tkText;
   if (altK === null || altK.length !== tkCap) {
@@ -2293,25 +2367,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   }
   const dOldEnd = oN - s;
   const tokenDelta = nN - oN;
-  const charThresh = s > 0 ? oOff[dOldEnd] : 0x7fffffff;
-  const nN2 = nN;
-  const oN2 = oN;`}
-  // Re-base the old arena in place: rows starting at/after the first kept-suffix
-  // token's OLD offset shift by charDelta; reused leaf entries past the damage shift
-  // by tokenDelta. (A reusable subtree lies entirely on one side of the damage; rows
-  // spanning it are unreachable garbage either way.)
-  if (dOldEnd < oN2 && (charDelta !== 0 || tokenDelta !== 0)) {
-    if (charDelta !== 0) {
-      for (let i = 0; i < nodeN; i++) if (rowOff[i] >= charThresh) rowOff[i] += charDelta;
-    }
-    if (tokenDelta !== 0) {
-      const eShift = tokenDelta << 2;
-      for (let i = 0; i < kidN; i++) {
-        const e = kids[i];
-        if (e < 0 && ((~e) >>> 2) >= dOldEnd) kids[i] = e - eShift;
-      }
-    }
-  }
+  const nN2 = nN;`}
   // Carry the memo across: prefix entries whose lookahead never reached the damage
   // stay; suffix entries shift by tokenDelta; the damage window drops.
   for (let r = 0; r < MEMO_RULES; r++) {
@@ -2332,7 +2388,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     for (let i = 0; i < pCap; i++) {
       if (me[i] !== undefined) { nme[i] = me[i]; nmn[i] = mn[i]; nmx[i] = mx[i]; }
     }
-    for (let i = dOldEnd; i <= oN2; i++) {
+    for (let i = dOldEnd; i <= oN; i++) {
       if (me[i] !== undefined) {
         const j = i + tokenDelta;
         nme[j] = me[i] + tokenDelta; nmn[j] = mn[i]; nmx[j] = mx[i] + tokenDelta;
diff --git a/src/gen-cst-match.ts b/src/gen-cst-match.ts
index 4e8fa91..daa50ff 100644
--- a/src/gen-cst-match.ts
+++ b/src/gen-cst-match.ts
@@ -321,7 +321,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
       return c.field === c.name ? c.name : `${c.field}: ${c.name}`;
     });
     w(`  return { arm: ${J(plan.name)}${fields.length ? ', ' + fields.join(', ') : ''} };`);
-    emit(`function ${fn}(t: TreeAccess, n: number, cc: number, src: string): ${matchTypeName(rule.name)} | null {`);
+    emit(`function ${fn}(t: TreeAccess, n: number, cc: number, tb: number, src: string): ${matchTypeName(rule.name)} | null {`);
     for (const line of body) emit(line);
     emit(`}`);
     return fn;
@@ -333,7 +333,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
   }
 
   function litCond(text: string, tt: string): string {
-    return `__lit(t, cc, i, src, ${J(text)}, ${tt === '$keyword' ? 1 : 0})`;
+    return `__lit(t, cc, tb, i, src, ${J(text)}, ${tt === '$keyword' ? 1 : 0})`;
   }
 
   function renderStep(st: Step, w: (s: string) => void, ind: string, fail: () => string): void {
@@ -346,7 +346,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
       case 'litAlt': {
         const conds = st.texts.map((t, k) => litCond(t, st.tt[k]));
         w(`${ind}if (!(${conds.join(' || ')})) ${fail()}`);
-        if (st.cap) assign(st.cap, `src.slice(t.offsetOf(__SC[i]), t.endOf(__SC[i])) as ${st.cap.tsType}`, w, ind);
+        if (st.cap) assign(st.cap, `src.slice(t.leafOffsetOf(__SC[i], tb), t.leafEndOf(__SC[i], tb)) as ${st.cap.tsType}`, w, ind);
         w(`${ind}i++;`);
         return;
       }
@@ -354,8 +354,8 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
         const cond = st.name === '$operator'
           ? `__opTok(t, cc, i)`
           : st.template
-            ? `__tok(t, cc, i, ${typeKind.get(st.name)}) || __nodeOf(t, cc, i, ${ruleId.get('$template')})`
-            : `__tok(t, cc, i, ${typeKind.get(st.name)})`;
+            ? `__tok(t, cc, tb, i, ${typeKind.get(st.name)}) || __nodeOf(t, cc, i, ${ruleId.get('$template')})`
+            : `__tok(t, cc, tb, i, ${typeKind.get(st.name)})`;
         w(`${ind}if (!(${cond})) ${fail()}`);
         if (st.cap) assign(st.cap, `__SC[i] as ${st.cap.tsType}`, w, ind);
         w(`${ind}i++;`);
@@ -564,7 +564,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
     // ("always") arms appear in every bucket at their declaration position; the buckets
     // are superset filters (each arm fn re-checks exactly).
     const admits = plans.map(p => firstAdmit(p.steps));
-    const tryLine = (k: number) => `    { const m = ${fns[k]}(t, n, cc, src); if (m !== null) return m; }`;
+    const tryLine = (k: number) => `    { const m = ${fns[k]}(t, n, cc, tb, src); if (m !== null) return m; }`;
     const bucketLines = (pred: (keys: Set<string>) => boolean): string[] =>
       plans.map((_, k) => (admits[k].keys.size === 0 || pred(admits[k].keys) ? tryLine(k) : ''))
         .filter(Boolean);
@@ -618,8 +618,8 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
       lines.push(`${pad}      break;`);
       lines.push(`${pad}    }`);
       lines.push(`${pad}  }`);
-      lines.push(`${pad}} else if ((_k1 = t.leafKindOf(e1)) === 1 || (_k1 === 0 && t.leafTokKindOf(e1) === 1)) {`);
-      lines.push(`${pad}  switch (src.charCodeAt(t.offsetOf(e1))) {`);
+      lines.push(`${pad}} else if ((_k1 = t.leafKindOf(e1)) === 1 || (_k1 === 0 && t.leafTokKindOf(e1, tb) === 1)) {`);
+      lines.push(`${pad}  switch (src.charCodeAt(t.leafOffsetOf(e1, tb))) {`);
       for (const cc of [...cset].sort((a, b) => a - b)) {
         lines.push(`${pad}    case ${cc}: {`);
         lines.push(...subTry(i => restAdmit[i]!.keys.has('c:' + cc)).map(l => '    ' + l));
@@ -634,7 +634,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
       lines.push(`${pad}} else if (_k1 === 2) {`);
       lines.push(...subTry(i => restAdmit[i]!.keys.has('t:$operator')));
       lines.push(`${pad}} else {`);
-      lines.push(`${pad}  switch (t.leafTokKindOf(e1)) {`);
+      lines.push(`${pad}  switch (t.leafTokKindOf(e1, tb)) {`);
       for (const t of [...tset].sort()) {
         if (t === '$operator') continue;   // handled by the kind-2 branch above
         lines.push(`${pad}    case ${typeKind.get(t)}: { // ${t}`);
@@ -652,7 +652,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
     };
 
     const disp: string[] = [];
-    disp.push(`export function match${sanitizeIdent(rule.name)}(t: TreeAccess, n: NodeEntry<${J(rule.name)}>, src: string): ${tName} {`);
+    disp.push(`export function match${sanitizeIdent(rule.name)}(t: TreeAccess, n: NodeEntry<${J(rule.name)}>, tb: number, src: string): ${tName} {`);
     disp.push(`  const cc = __load(t, n);`);
     disp.push(`  let e1 = 0; let _k1 = 0;`);
     disp.push(`  if (cc === 0) {`);
@@ -681,8 +681,8 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
     }
     disp.push(`    }`);
     disp.push(`  } else { const _k0 = t.leafKindOf(e0);`);
-    disp.push(`  if (_k0 === 1 || (_k0 === 0 && t.leafTokKindOf(e0) === 1)) {`);
-    disp.push(`    switch (src.charCodeAt(t.offsetOf(e0))) {`);
+    disp.push(`  if (_k0 === 1 || (_k0 === 0 && t.leafTokKindOf(e0, tb) === 1)) {`);
+    disp.push(`    switch (src.charCodeAt(t.leafOffsetOf(e0, tb))) {`);
     for (const cc of [...charCodes].sort((a, b) => a - b)) {
       disp.push(`      case ${cc}: {`);
       for (const l of bucketLines(keys => keys.has('c:' + cc))) disp.push('    ' + l);
@@ -699,7 +699,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
     disp.push(`  } else if (_k0 === 2) {`);
     for (const l of bucketLines(keys => keys.has('t:$operator'))) disp.push(l);
     disp.push(`  } else {`);
-    disp.push(`    switch (t.leafTokKindOf(e0)) {`);
+    disp.push(`    switch (t.leafTokKindOf(e0, tb)) {`);
     for (const t of [...tokNames].sort()) {
       if (t === '$operator') continue;   // handled by the kind-2 branch above
       disp.push(`      case ${typeKind.get(t)}: { // ${t}`);
@@ -715,7 +715,7 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
     }
     disp.push(`    }`);
     disp.push(`  } } }`);
-    disp.push(`  throw new Error(${J(`match${sanitizeIdent(rule.name)}: no arm matches`)} + ' @' + t.offsetOf(n));`);
+    disp.push(`  throw new Error(${J(`match${sanitizeIdent(rule.name)}: no arm matches`)} + ' @tok' + tb);`);
     disp.push(`}`);
     bodyParts.push(disp.join('\n'));
     matcherMapEntries.push(`  ${J(rule.name)}: match${sanitizeIdent(rule.name)},`);
@@ -732,11 +732,10 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
   header.push(`  childCount(id: number): number;`);
   header.push(`  childAt(id: number, i: number): number;`);
   header.push(`  childrenInto(id: number, out: number[]): number;`);
-  header.push(`  leafTokenType(entry: number): string;`);
   header.push(`  leafKindOf(entry: number): number;`);
-  header.push(`  leafTokKindOf(entry: number): number;`);
-  header.push(`  offsetOf(entry: number): number;`);
-  header.push(`  endOf(entry: number): number;`);
+  header.push(`  leafTokKindOf(entry: number, tokBase: number): number;`);
+  header.push(`  leafOffsetOf(entry: number, tokBase: number): number;`);
+  header.push(`  leafEndOf(entry: number, tokBase: number): number;`);
   header.push(`}`);
   header.push(`// Branded entry aliases — compile-time discrimination over plain numbers.`);
   header.push(`export type NodeEntry<R extends string = string> = number & { readonly __node?: R };`);
@@ -747,17 +746,17 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
   header.push(`const __SC: number[] = [];`);
   header.push(`const __load = (t: TreeAccess, n: number): number => t.childrenInto(n, __SC);`);
   header.push(`// kind: 1 = '$keyword' (leaf kind bit), 0 = '$punct' (type-derived + tok-kind 1).`);
-  header.push(`const __lit = (t: TreeAccess, cc: number, i: number, src: string, text: string, kind: number): boolean => {`);
+  header.push(`const __lit = (t: TreeAccess, cc: number, tb: number, i: number, src: string, text: string, kind: number): boolean => {`);
   header.push(`  if (i >= cc) return false;`);
   header.push(`  const e = __SC[i];`);
-  header.push(`  if (e >= 0 || t.leafKindOf(e) !== kind || (kind === 0 && t.leafTokKindOf(e) !== 1)) return false;`);
-  header.push(`  const off = t.offsetOf(e);`);
-  header.push(`  return t.endOf(e) - off === text.length && src.startsWith(text, off);`);
+  header.push(`  if (e >= 0 || t.leafKindOf(e) !== kind || (kind === 0 && t.leafTokKindOf(e, tb) !== 1)) return false;`);
+  header.push(`  const off = t.leafOffsetOf(e, tb);`);
+  header.push(`  return t.leafEndOf(e, tb) - off === text.length && src.startsWith(text, off);`);
   header.push(`};`);
-  header.push(`const __tok = (t: TreeAccess, cc: number, i: number, k: number): boolean => {`);
+  header.push(`const __tok = (t: TreeAccess, cc: number, tb: number, i: number, k: number): boolean => {`);
   header.push(`  if (i >= cc) return false;`);
   header.push(`  const e = __SC[i];`);
-  header.push(`  return e < 0 && t.leafKindOf(e) === 0 && t.leafTokKindOf(e) === k;`);
+  header.push(`  return e < 0 && t.leafKindOf(e) === 0 && t.leafTokKindOf(e, tb) === k;`);
   header.push(`};`);
   header.push(`const __opTok = (t: TreeAccess, cc: number, i: number): boolean => {`);
   header.push(`  if (i >= cc) return false;`);
@@ -774,11 +773,11 @@ export function generateCstMatch(grammar: CstGrammar, importFrom: string): strin
   const footer = [
     ``,
     `/** rule name → its matcher (generic walking; the totality gate uses this). */`,
-    `export const MATCHERS: Record<string, (t: TreeAccess, n: never, src: string) => { arm: string }> = {`,
+    `export const MATCHERS: Record<string, (t: TreeAccess, n: never, tb: number, src: string) => { arm: string }> = {`,
     ...matcherMapEntries,
     `};`,
     `/** rule ID → matcher (the emitted parser's rowRule ids — declaration order). */`,
-    `export const MATCHERS_BY_ID: ((t: TreeAccess, n: never, src: string) => { arm: string })[] = [`,
+    `export const MATCHERS_BY_ID: ((t: TreeAccess, n: never, tb: number, src: string) => { arm: string })[] = [`,
     ...grammar.rules.map(r => `  match${sanitizeIdent(r.name)},`),
     `];`,
   ];
diff --git a/test/cst-match-totality.ts b/test/cst-match-totality.ts
index fd8a6be..f688cda 100644
--- a/test/cst-match-totality.ts
+++ b/test/cst-match-totality.ts
@@ -24,23 +24,23 @@ const samples: string[] = [];
 
 type Emitted = {
   parse(src: string, entry?: string): number;
-  visit(entry: number, fns: { enter?(id: number): boolean | void; leaf?(e: number, tok: number): void }): void;
-  tree: { ruleNameOf(id: number): string; childCount(id: number): number; childAt(id: number, i: number): number; leafTokenType(e: number): string; offsetOf(e: number): number; endOf(e: number): number };
+  visit(entry: number, fns: { enter?(id: number, charBase: number, tokBase: number): boolean | void; leaf?(e: number, tok: number): void }): void;
+  tree: { ruleNameOf(id: number): string; lenOf(id: number): number };
 };
 
-function checkTree(em: Emitted, root: number, src: string, matchers: Record<string, (t: never, n: never, src: string) => { arm: string }>, tag: string): void {
+function checkTree(em: Emitted, root: number, src: string, matchers: Record<string, (t: never, n: never, tb: number, src: string) => { arm: string }>, tag: string): void {
   em.visit(root, {
-    enter(id) {
+    enter(id, charBase, tokBase) {
       const m = matchers[em.tree.ruleNameOf(id)];
       if (m !== undefined) {
         nodes++;
         try {
-          m(em.tree as never, id as never, src);
+          m(em.tree as never, id as never, tokBase, src);
         } catch (e) {
           misses++;
           if (samples.length < 10) {
-            const off = em.tree.offsetOf(id);
-            samples.push(`${tag} ${em.tree.ruleNameOf(id)} @${off}..${em.tree.endOf(id)} «${src.slice(off, Math.min(em.tree.endOf(id), off + 50)).replace(/\n/g, '\\n')}» — ${(e as Error).message.slice(0, 60)}`);
+            const end = charBase + em.tree.lenOf(id);
+            samples.push(`${tag} ${em.tree.ruleNameOf(id)} @${charBase}..${end} «${src.slice(charBase, Math.min(end, charBase + 50)).replace(/\n/g, '\\n')}» — ${(e as Error).message.slice(0, 60)}`);
           }
         }
       }
diff --git a/test/obj-tree.ts b/test/obj-tree.ts
new file mode 100644
index 0000000..fee0a70
--- /dev/null
+++ b/test/obj-tree.ts
@@ -0,0 +1,78 @@
+// A TreeAccess adapter over an INTERPRETER object CST — absolute coordinates, ids
+// assigned by one post-order walk. It lets matcher consumers (the ts-ast lowering)
+// run against the interp oracle without caring that the EMITTED tree went green
+// (relative coordinates): the adapter ignores every tokBase it is handed.
+//
+// leafTokKindOf is only ever consulted on kind-0 leaves (the generated probes test
+// the kind bit first), where the object leaf's tokenType IS the token name (or
+// '$punct') — so the name→type-kind map (same derivation as the engine: punct 1,
+// template spans 2-4, named tokens from 5 in declaration order) is complete.
+import type { CstGrammar } from '../src/types.ts';
+
+type Leafish = { tokenType: string; offset: number; end: number };
+type Nodeish = { rule: string; children: (Leafish | Nodeish)[]; offset: number; end: number };
+
+export interface ObjTree {
+  rootId: number;
+  // matcher-facing (TreeAccess-compatible; tokBase params ignored)
+  ruleNameOf(id: number): string;
+  ruleIdOf(id: number): number;
+  childCount(id: number): number;
+  childAt(id: number, i: number): number;
+  childrenInto(id: number, out: number[]): number;
+  leafKindOf(entry: number): number;
+  leafTokKindOf(entry: number, tokBase?: number): number;
+  leafOffsetOf(entry: number, tokBase?: number): number;
+  leafEndOf(entry: number, tokBase?: number): number;
+  // stateless absolute conveniences (the lowering's toolkit)
+  offsetOf(entry: number): number;
+  endOf(entry: number): number;
+  leafTokenType(entry: number): string;
+}
+
+export function objTree(root: Nodeish, grammar: CstGrammar): ObjTree {
+  const typeKind = new Map<string, number>([['', 1], ['$punct', 1], ['$templateHead', 2], ['$templateMiddle', 3], ['$templateTail', 4]]);
+  { let next = 5; for (const t of grammar.tokens) if (!typeKind.has(t.name)) typeKind.set(t.name, next++); }
+  const ruleIdM = new Map<string, number>(grammar.rules.map((r, i) => [r.name, i]));
+  ruleIdM.set('$template', grammar.rules.length);
+
+  const nodes: Nodeish[] = [];
+  const leaves: Leafish[] = [];
+  const kidsOf: number[][] = [];
+  const walk = (n: Nodeish): number => {
+    const ks: number[] = [];
+    for (const c of n.children) {
+      if ((c as Leafish).tokenType !== undefined) {
+        const lf = c as Leafish;
+        const li = leaves.length;
+        leaves.push(lf);
+        const kind = lf.tokenType === '$keyword' ? 1 : lf.tokenType === '$operator' ? 2 : 0;
+        ks.push(~((li << 2) | kind));
+      } else {
+        ks.push(walk(c as Nodeish));
+      }
+    }
+    const id = nodes.length;
+    nodes.push(n);
+    kidsOf.push(ks);
+    return id;
+  };
+  const rootId = walk(root);
+  const leafOf = (e: number) => leaves[(~e) >>> 2];
+
+  return {
+    rootId,
+    ruleNameOf: (id) => nodes[id].rule,
+    ruleIdOf: (id) => ruleIdM.get(nodes[id].rule) ?? -1,
+    childCount: (id) => kidsOf[id].length,
+    childAt: (id, i) => kidsOf[id][i],
+    childrenInto: (id, out) => { const ks = kidsOf[id]; for (let i = 0; i < ks.length; i++) out[i] = ks[i]; return ks.length; },
+    leafKindOf: (e) => (~e) & 3,
+    leafTokKindOf: (e) => typeKind.get(leafOf(e).tokenType) ?? 0,
+    leafOffsetOf: (e) => leafOf(e).offset,
+    leafEndOf: (e) => leafOf(e).end,
+    offsetOf: (e) => e >= 0 ? nodes[e].offset : leafOf(e).offset,
+    endOf: (e) => e >= 0 ? nodes[e].end : leafOf(e).end,
+    leafTokenType: (e) => leafOf(e).tokenType,
+  };
+}
diff --git a/test/ts-ast-lowering.ts b/test/ts-ast-lowering.ts
index 06edb48..a5268c7 100644
--- a/test/ts-ast-lowering.ts
+++ b/test/ts-ast-lowering.ts
@@ -6,7 +6,8 @@
 //
 // Deliberately NOT complete: unlowered constructs throw Unlowered (the verify driver
 // counts them) — the goal is an honest pain inventory, not a shipped frontend.
-import { matchStmt, type TreeAccess } from '../typescript.cst-match.ts';
+import { matchStmt } from '../typescript.cst-match.ts';
+import type { ObjTree } from './obj-tree.ts';
 
 export type Ast = { kind: string; pos: number; end: number; children: Ast[] };
 const ast = (kind: string, pos: number, end: number, children: Ast[] = []): Ast => ({ kind, pos, end, children });
@@ -27,7 +28,7 @@ export class Unlowered extends Error {
 // against undefined, never truthiness.
 type E = number;
 let SRC = '';
-let T!: TreeAccess;
+let T!: ObjTree;
 const isLeaf = (n: E | undefined): boolean => n !== undefined && n < 0;
 const isNode = (n: E | undefined): boolean => n !== undefined && n >= 0;
 const off = (n: E): number => T.offsetOf(n);
@@ -510,7 +511,7 @@ function lowerBindingElement(n: E): Ast {
 // A few arms still reach into kidsOf(n) for the positions of uncaptured structural
 // keywords ('catch', the switch '{') — a noted destructurer gap.
 function lowerStmt(n: E): Ast {
-  const m = matchStmt(T, n as never, SRC);
+  const m = matchStmt(T as never, n as never, 0, SRC);
   const c = kidsOf(n);
   switch (m.arm) {
     case 'block': return lowerBlock(m.block);
@@ -924,7 +925,7 @@ function lowerExport(n: E, c: E[], i: number, mods: Ast[]): Ast {
 }
 
 // ── Entry ──
-export function lowerProgram(t: TreeAccess, root: E, source: string): Ast {
+export function lowerProgram(t: ObjTree, root: E, source: string): Ast {
   T = t;
   SRC = source;
   const stmts: Ast[] = [];
diff --git a/test/ts-ast-verify.ts b/test/ts-ast-verify.ts
index 9e630fa..ff33f22 100644
--- a/test/ts-ast-verify.ts
+++ b/test/ts-ast-verify.ts
@@ -10,19 +10,15 @@
 //   node test/ts-ast-verify.ts <file.ts> [...]  # real files
 import { existsSync, readFileSync } from 'node:fs';
 import ts from 'typescript';
-import { writeFileSync } from 'node:fs';
-import { emitParser } from '../src/emit-parser.ts';
+import { createParser } from '../src/gen-parser.ts';
 import { lowerProgram, Unlowered, type Ast } from './ts-ast-lowering.ts';
+import { objTree } from './obj-tree.ts';
 
-// The lowering consumes the ARENA through TreeAccess, so parse with the emitted
-// parser (the product representation) — built fresh from the current grammar.
+// The lowering runs against the INTERPRETER oracle through the object-tree adapter
+// (absolute coordinates) — the grammar↔tsc structure contract is engine-independent,
+// and the emitted tree's green (relative) coordinates stay the emitted gates' concern.
 const grammar = (await import('../typescript.ts')).default;
-const emPath = '/tmp/emitted-tsast.mjs';
-writeFileSync(emPath, emitParser(grammar));
-const parser = (await import(emPath + '?v=' + process.pid)) as {
-  parse(src: string, entry?: string): number;
-  tree: import('../typescript.cst-match.ts').TreeAccess;
-};
+const parser = createParser(grammar);
 
 const kindNum = (name: string): number => {
   const v = (ts.SyntaxKind as unknown as Record<string, number>)[name];
@@ -72,11 +68,12 @@ function run(name: string, code: string): { ok: boolean; skipped?: boolean; line
       return { ok: true, skipped: true, line: `${name}: SKIPPED (tsc reports ${probe.parseDiagnostics.length} parse error(s) — recovery shapes are out of contract)`, samples: [] };
     }
   }
-  let root: number;
-  try { root = parser.parse(code); }
+  let rootObj;
+  try { rootObj = parser.parse(code); }
   catch (e) { return { ok: false, line: `${name}: MONOGRAM REJECT ${(e as Error).message.slice(0, 60)}`, samples: [] }; }
+  const adapter = objTree(rootObj as never, grammar);
   let mine: Ast;
-  try { mine = lowerProgram(parser.tree, root, code); }
+  try { mine = lowerProgram(adapter, adapter.rootId, code); }
   catch (e) {
     if (e instanceof Unlowered) return { ok: false, line: `${name}: UNLOWERED ${e.what} @${e.at}`, samples: [] };
     return { ok: false, line: `${name}: LOWER THROW ${(e as Error).message.slice(0, 80)}`, samples: [] };

From 190e2c5583f4d1bb43fd2560b0572f40a86c68a4 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 04:29:24 +0800
Subject: [PATCH 05/15] Old-tree adoption (M4): reuse via cursor descent; the
 memo carry is gone
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An incremental rule entry now asks the PREVIOUS tree first: adoptSeek walks
the old root toward the mapped old position (cached containment path +
binary search over each node's monotone child starts) and adopts a node when
the rule matches, its lookahead gap stays clear of the damage (rowExt — the
ext-minus-start LENGTH, position-independent like everything green), and the
old parse MEMOIZED it (rowOK): a row built under a suppress (no-'in') or
parseLimit-capped context is a context-dependent parse, and adoption must not
widen the contract the memo carry never offered — skipping that bit produced
real divergences (an incremental reject of text the fresh parse accepts).

Adoption is STATELESS: nothing is consumed, so PEG backtracking needs no
cursor rollback, a node refused under one longest-match candidate can be
adopted by the next, and exploratory descent through same-start chains never
commits to the cache. On adoption: pos jumps by rowTokLen, the watermark
bumps by the gap, the transients refresh — all O(1).

The memo becomes purely intra-parse: parseEdited's whole O(rules × n)
carry/invalidate machinery (the prefix watermark scans, the sparse rebuilds)
is deleted; fresh memo arrays per parse.

incremental ≡ fresh 0/120 with the mixed session at 1.91× (best yet);
9MB keystrokes ~121ms; 18,802/18,805 emit ≡ interp byte-identical; reject
messages exact; 30/30 gates.
---
 src/emit-parser.ts | 175 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 143 insertions(+), 32 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index b0446e4..726bb41 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -1364,6 +1364,15 @@ let rowLen = new Int32Array(8192);
 let rowTokLen = new Int32Array(8192);   // subtree token count
 let rowStart = new Int32Array(8192);    // first index into kids
 let rowCount = new Int32Array(8192);
+// lookahead GAP: how far past its own first token the node's parse may have READ
+// (ext − start, a length — position-independent like everything green). Adoption
+// validity across edits compares q + rowExt + slack against the damage start.
+let rowExt = new Int32Array(8192);
+// adoption eligibility: set ONLY where the old parse MEMOIZED the node — a row built
+// under a suppress (no-'in') or parseLimit-capped context is a context-dependent
+// parse and must never be adopted into a normal entry (the memo carry never stored
+// those; adoption must not widen the contract).
+let rowOK = new Uint8Array(8192);
 // transient BUILD coordinates (absolute), valid for rows completed in the current
 // parse and REFRESHED at memo-hit time for reused roots — parents read them at
 // finishNode to write the children's relative fields; never part of the green tree.
@@ -1392,6 +1401,8 @@ function growRows() {
   const tl = new Int32Array(rowCap); tl.set(rowTokLen); rowTokLen = tl;
   const s = new Int32Array(rowCap); s.set(rowStart); rowStart = s;
   const c = new Int32Array(rowCap); c.set(rowCount); rowCount = c;
+  const x = new Int32Array(rowCap); x.set(rowExt); rowExt = x;
+  const ok = new Uint8Array(rowCap); ok.set(rowOK); rowOK = ok;
   const ac = new Int32Array(rowCap); ac.set(absChar); absChar = ac;
   const at = new Int32Array(rowCap); at.set(absTok); absTok = at;
 }
@@ -1446,6 +1457,8 @@ function finishNode(rid, mark) {
   }
   rowRule[id] = rid; rowLen[id] = myEnd - myOff; rowCount[id] = n;
   rowTokLen[id] = myTokEnd - myTok;
+  rowExt[id] = maxPos - myTok;
+  rowOK[id] = 0;
   absChar[id] = myOff; absTok[id] = myTok;
   scn = mark;
   return id;
@@ -1478,6 +1491,8 @@ function finishWrap(rid, lhsId, mark) {
   rowRule[id] = rid; rowLen[id] = myEnd - myOff;
   rowStart[id] = ks; rowCount[id] = n + 1;
   rowTokLen[id] = myTokEnd - myTok;
+  rowExt[id] = maxPos - myTok;
+  rowOK[id] = 0;
   absChar[id] = myOff; absTok[id] = myTok;
   scn = mark;
   return id;
@@ -1673,7 +1688,7 @@ function emitNonRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDec
   // push+boolean contract and the memo) and an id-returning core, exactly like the
   // pratt/left-rec rules.
   if (memoized) {
-    e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${J(rule.name)}, ${ruleFn}_core); }`);
+    e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${rid}, ${J(rule.name)}, ${ruleFn}_core); }`);
     e.emit(`function ${ruleFn}_core(_minBp) {`);
   } else {
     e.emit(`function ${ruleFn}() {`);
@@ -1713,8 +1728,8 @@ function emitLeftRecRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDe
   // suppress wrapper in the interpreter — so currentPrattContext is set to this rule
   // (the template-interpolation rule resolution depends on it: a `${…}` hole inside a
   // template-literal TYPE must parse as Type, not the default expression rule).
-  e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${J(rule.name)}, ${ruleFn}_lr); }`);
   const rid = a.grammar.rules.indexOf(rule);
+  e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${rid}, ${J(rule.name)}, ${ruleFn}_lr); }`);
   e.emit(`function ${ruleFn}_lr(_minBp) {`);
   e.emit(`  const saved = pos; const mark = scn;`);
   e.emit(`  let node = -1; let bestAtomPos = saved;`);
@@ -1767,7 +1782,7 @@ function emitPrattRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl
 
   // R_<rule>() wraps parseRule's memo/context handling, then calls the bp-taking core.
   const rid = a.grammar.rules.indexOf(rule);
-  e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${J(rule.name)}, ${ruleFn}_pratt); }`);
+  e.emit(`function ${ruleFn}() { return parseRuleEntry(${e.memoIndex(rule.name)}, ${rid}, ${J(rule.name)}, ${ruleFn}_pratt); }`);
   e.emit(`function ${ruleFn}_pratt(minBp) {`);
   e.emit(`  const saved = pos; const mark = scn;`);
   e.emit(`  let lhs = -1; let bestNudPos = saved;`);
@@ -1988,7 +2003,7 @@ function emitDriver(e: Emitter, a: ReturnType<typeof analyze>, entry: string) {
 // and SECOND-token reads past it. Left-to-right parsing keeps the watermark near the
 // current frontier, so the value is tight on the dominant flow and only OVER-
 // invalidates (soundly) near big-backtrack clusters.
-function parseRuleEntry(idx, name, core) {
+function parseRuleEntry(idx, rid, name, core) {
   const mySup = suppressNext;
   suppressNext = null;
   const capped = parseLimit >= 0;
@@ -2024,6 +2039,34 @@ function parseRuleEntry(idx, name, core) {
       return false;
     }
   }
+  if (!mySup && !capped && adoptRoot >= 0) {
+    // map the new position into OLD token coordinates; inside the damage = no mapping
+    const q = start < adoptDmgStart ? start
+      : start >= adoptDmgOldEnd + adoptDelta ? start - adoptDelta : -1;
+    if (q >= 0) {
+      const aid = adoptSeek(q, rid);
+      if (aid >= 0) {
+        pos = start + rowTokLen[aid];
+        const ext = start + rowExt[aid];
+        if (ext > maxPos) maxPos = ext;
+        absTok[aid] = start;
+        absChar[aid] = tkOff[start];
+        if (me === undefined) {
+          me = new Array(tokN + 1);
+          mn = new Array(tokN + 1);
+          mx = new Array(tokN + 1);
+          memoEnd[idx] = me;
+          memoNode[idx] = mn;
+          memoExt[idx] = mx;
+        }
+        me[start] = pos;
+        mn[start] = aid;
+        mx[start] = maxPos;
+        scPush(aid);
+        return true;
+      }
+    }
+  }
   const prevContext = currentPrattContext;
   currentPrattContext = name;
   const prevSup = suppressCur;
@@ -2048,6 +2091,7 @@ function parseRuleEntry(idx, name, core) {
     mn[start] = result;
     mx[start] = maxPos;   // the TRUE probe watermark — the +2 read slack (stop token,
                           // SECOND-token dispatch) is applied at INVALIDATION time
+    if (result >= 0) rowOK[result] = 1;
 
   }
   if (result >= 0) { scPush(result); return true; }
@@ -2226,6 +2270,82 @@ let lastSrc = null;
 // the LAST parse root's absolute coordinates (the descent origin — see visit/toObject)
 let rootCharBase = 0;
 let rootTokBase = 0;
+
+// ── M4: old-tree ADOPTION (cursor reuse) ──
+// During an incremental re-parse, a rule entry first asks the PREVIOUS tree: is there
+// an old node of this rule starting at the corresponding old position whose lookahead
+// stayed clear of the damage? Adoption is STATELESS — nothing is consumed, so PEG
+// backtracking needs no cursor rollback, and a node refused under one candidate arm
+// can be adopted by the next. The memo stays purely intra-parse.
+let lastRoot = -1;           // previous parse's root id + its absolute first token
+let lastRootTok = 0;
+let adoptRoot = -1;          // previous root id (-1 = no adoption)
+let adoptRootTok = 0;        // its absolute first token (old coords)
+let adoptDmgStart = 0;       // damage window in OLD token coords: [adoptDmgStart, adoptDmgOldEnd)
+let adoptDmgOldEnd = 0;
+let adoptDelta = 0;          // new-minus-old token delta past the damage
+// cached descent path (top-down): ids + their absolute old token bases
+let adoptPath = [];
+let adoptBase = [];
+function adoptSeek(q, rid) {
+  // reuse the cached path while it still CONTAINS q (strictly inside, not at start)
+  let depth = 0;
+  while (depth < adoptPath.length) {
+    const id = adoptPath[depth];
+    const b = adoptBase[depth];
+    if (b < q && q < b + rowTokLen[id]) depth++;
+    else break;
+  }
+  adoptPath.length = depth;
+  adoptBase.length = depth;
+  let id, base;
+  if (depth === 0) {
+    if (q < adoptRootTok || q >= adoptRootTok + rowTokLen[adoptRoot]) return -1;
+    id = adoptRoot; base = adoptRootTok;
+    if (base === q) { /* root itself starts at q — fall through to the chain walk */ }
+    adoptPath.push(id); adoptBase.push(base);
+  } else {
+    id = adoptPath[depth - 1]; base = adoptBase[depth - 1];
+  }
+  // descend: containment steps are committed to the cache; the exploratory chain of
+  // nodes starting EXACTLY at q is walked in locals (a later seek with another rule
+  // must see the same chain).
+  for (;;) {
+    // binary search the first child whose END exceeds q
+    const cs = rowStart[id];
+    const n = rowCount[id];
+    let lo = 0, hi = n;
+    while (lo < hi) {
+      const mid = (lo + hi) >> 1;
+      const e = kids[cs + mid];
+      const end = e < 0 ? base + ((~e) >>> 2) + 1 : base + kidTokRel[cs + mid] + rowTokLen[e];
+      if (end <= q) lo = mid + 1; else hi = mid;
+    }
+    if (lo >= n) return -1;
+    const e = kids[cs + lo];
+    if (e < 0) return -1;                                  // the position is a leaf here
+    const cb = base + kidTokRel[cs + lo];
+    if (cb > q) return -1;                                 // a gap — nothing starts at q
+    if (cb === q) {
+      // the exploratory chain: every node from here down whose start is exactly q
+      let xid = e, xb = cb;
+      for (;;) {
+        if (rowOK[xid] !== 0 && rowRule[xid] === rid
+            && (q + rowExt[xid] + 2 <= adoptDmgStart || q >= adoptDmgOldEnd)) {
+          return xid;
+        }
+        const xcs = rowStart[xid];
+        if (rowCount[xid] === 0) return -1;
+        const fe = kids[xcs];
+        if (fe < 0 || kidTokRel[xcs] !== 0) return -1;
+        xid = fe; xb = xb;
+      }
+    }
+    // containment: commit and descend
+    id = e; base = cb;
+    adoptPath.push(id); adoptBase.push(base);
+  }
+}
 // The spare token-column buffer set (parseEdited ping-pongs between the live set and
 // this one, so steady-state edits never allocate columns).
 let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
@@ -2246,6 +2366,7 @@ ${e.soa ? '' : 'let altText = [];'}
 
 export function parse(source, entryRule) {
   lastSrc = null;
+  adoptRoot = -1;
   lexInto(source);
   memoNode = new Array(MEMO_RULES);
   memoEnd = new Array(MEMO_RULES);
@@ -2253,6 +2374,8 @@ export function parse(source, entryRule) {
   nodeN = 0;
   kidN = 0;
   const root = runParse(entryRule);
+  lastRoot = root;
+  lastRootTok = rootTokBase;
   lastSrc = source;
   return root;
 }
@@ -2368,35 +2491,23 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   const dOldEnd = oN - s;
   const tokenDelta = nN - oN;
   const nN2 = nN;`}
-  // Carry the memo across: prefix entries whose lookahead never reached the damage
-  // stay; suffix entries shift by tokenDelta; the damage window drops.
-  for (let r = 0; r < MEMO_RULES; r++) {
-    const me = memoEnd[r];
-    if (me === undefined) continue;
-    const mn = memoNode[r], mx = memoExt[r];
-    for (let i = 0; i < p; i++) {
-      if (me[i] !== undefined && mx[i] + 2 > p) { me[i] = undefined; mn[i] = undefined; mx[i] = undefined; }
-    }
-    if (tokenDelta === 0) {
-      for (let i = p; i < dOldEnd; i++) {
-        if (me[i] !== undefined) { me[i] = undefined; mn[i] = undefined; mx[i] = undefined; }
-      }
-      continue;
-    }
-    const nme = new Array(nN2 + 1), nmn = new Array(nN2 + 1), nmx = new Array(nN2 + 1);
-    const pCap = p < nN2 + 1 ? p : nN2 + 1;
-    for (let i = 0; i < pCap; i++) {
-      if (me[i] !== undefined) { nme[i] = me[i]; nmn[i] = mn[i]; nmx[i] = mx[i]; }
-    }
-    for (let i = dOldEnd; i <= oN; i++) {
-      if (me[i] !== undefined) {
-        const j = i + tokenDelta;
-        nme[j] = me[i] + tokenDelta; nmn[j] = mn[i]; nmx[j] = mx[i] + tokenDelta;
-      }
-    }
-    memoEnd[r] = nme; memoNode[r] = nmn; memoExt[r] = nmx;
-  }
+  // M4: NO memo carry — the memo is intra-parse; reuse flows through old-tree
+  // adoption (parseRuleEntry consults the previous root via adoptSeek), so the whole
+  // O(rules × n) carry/invalidate machinery is gone.
+  memoNode = new Array(MEMO_RULES);
+  memoEnd = new Array(MEMO_RULES);
+  memoExt = new Array(MEMO_RULES);
+  adoptRoot = lastRoot;
+  adoptRootTok = lastRootTok;
+  adoptDmgStart = p;
+  adoptDmgOldEnd = dOldEnd;
+  adoptDelta = tokenDelta;
+  adoptPath.length = 0;
+  adoptBase.length = 0;
   const root = runParse(entryRule);
+  adoptRoot = -1;
+  lastRoot = root;
+  lastRootTok = rootTokBase;
   lastSrc = source;
   return root;
 }

From e1b3a5cae17ee6ef1711aadbb734870ff781176d Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 04:34:46 +0800
Subject: [PATCH 06/15] Generation-stamped persistent memo: the per-edit array
 churn dies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The intra-parse memo arrays persist across parses; an entry is live iff its
stamp (a new memoGen Int32Array per rule) equals the current generation, and
bumping the generation counter IS the whole reset — parse(), parseEdited()
and the '>'-splice all just increment it. Allocating fresh multi-million-slot
arrays per edit was ~30% of a large-file edit in GC alone (and pushed V8
toward dictionary elements); now steady-state edits allocate nothing.

9MB keystroke edits: ~121ms -> ~50ms (5.4x vs a full parse); mixed sessions
2.27x. incremental ≡ fresh 0/120; 18,802 byte-identical; reject messages
exact; 30/30 gates.
---
 src/emit-parser.ts | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 726bb41..04f6b09 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -1504,6 +1504,12 @@ let maxPos = 0;
 let memoNode = [];
 let memoEnd = [];
 let memoExt = [];   // per-entry lookahead extent (see parseRuleEntry)
+// GENERATION-STAMPED memo: the per-rule arrays persist across parses (allocating
+// fresh multi-million-slot arrays per edit cost ~30% of a large-file edit in GC
+// alone); an entry is live iff its stamp equals the current generation — bumping
+// memoGenCur IS the whole reset.
+let memoGen = [];
+let memoGenCur = 0;
 let parseLimit = -1;
 // cap = the exclusive lookahead bound: min(parseLimit-or-∞, tokN), maintained at the
 // parseLimit set/restore sites and the one token-stream mutation (the '>' splice).
@@ -1571,9 +1577,7 @@ function matchPuLitGT(pu) {
     tokN++;
     if (parseLimit < 0) cap = tokN;
     // Token indices shifted: drop the per-rule memo arrays (recreated lazily at the new size).
-    memoNode.fill(undefined);
-    memoEnd.fill(undefined);
-    memoExt.fill(undefined);
+    memoGenCur++;   // positions shifted mid-parse: every stamped entry is stale
     // GREEN tree: no kids/scratch fixup — every completed row and scratch entry lies
     // wholly BEFORE the splice point (token pos is being consumed right now), and the
     // carried memo was just cleared, so nothing reachable references shifted indices.
@@ -2014,7 +2018,8 @@ function parseRuleEntry(idx, rid, name, core) {
   let me = memoEnd[idx];
   let mn = memoNode[idx];
   let mx = memoExt[idx];
-  if (!mySup && !capped && me !== undefined) {
+  let mg = memoGen[idx];
+  if (!mySup && !capped && me !== undefined && mg[start] === memoGenCur) {
     const e = me[start];
     if (e !== undefined) {
       pos = e;
@@ -2051,17 +2056,20 @@ function parseRuleEntry(idx, rid, name, core) {
         if (ext > maxPos) maxPos = ext;
         absTok[aid] = start;
         absChar[aid] = tkOff[start];
-        if (me === undefined) {
+        if (me === undefined || me.length < tokN + 1) {
           me = new Array(tokN + 1);
           mn = new Array(tokN + 1);
           mx = new Array(tokN + 1);
+          mg = new Int32Array(tokN + 1);
           memoEnd[idx] = me;
           memoNode[idx] = mn;
           memoExt[idx] = mx;
+          memoGen[idx] = mg;
         }
         me[start] = pos;
         mn[start] = aid;
         mx[start] = maxPos;
+        mg[start] = memoGenCur;
         scPush(aid);
         return true;
       }
@@ -2079,17 +2087,20 @@ function parseRuleEntry(idx, rid, name, core) {
     suppressCur = prevSup;
   }
   if (!mySup && !capped) {
-    if (me === undefined) {
+    if (me === undefined || me.length < tokN + 1) {
       me = new Array(tokN + 1);
       mn = new Array(tokN + 1);
       mx = new Array(tokN + 1);
+      mg = new Int32Array(tokN + 1);
       memoEnd[idx] = me;
       memoNode[idx] = mn;
       memoExt[idx] = mx;
+      memoGen[idx] = mg;
     }
     me[start] = pos;
     mn[start] = result;
-    mx[start] = maxPos;   // the TRUE probe watermark — the +2 read slack (stop token,
+    mx[start] = maxPos;
+    mg[start] = memoGenCur;   // the TRUE probe watermark — the +2 read slack (stop token,
                           // SECOND-token dispatch) is applied at INVALIDATION time
     if (result >= 0) rowOK[result] = 1;
 
@@ -2368,9 +2379,13 @@ export function parse(source, entryRule) {
   lastSrc = null;
   adoptRoot = -1;
   lexInto(source);
-  memoNode = new Array(MEMO_RULES);
-  memoEnd = new Array(MEMO_RULES);
-  memoExt = new Array(MEMO_RULES);
+  if (memoEnd.length !== MEMO_RULES) {
+    memoNode = new Array(MEMO_RULES);
+    memoEnd = new Array(MEMO_RULES);
+    memoExt = new Array(MEMO_RULES);
+    memoGen = new Array(MEMO_RULES);
+  }
+  memoGenCur++;
   nodeN = 0;
   kidN = 0;
   const root = runParse(entryRule);
@@ -2494,9 +2509,13 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // M4: NO memo carry — the memo is intra-parse; reuse flows through old-tree
   // adoption (parseRuleEntry consults the previous root via adoptSeek), so the whole
   // O(rules × n) carry/invalidate machinery is gone.
-  memoNode = new Array(MEMO_RULES);
-  memoEnd = new Array(MEMO_RULES);
-  memoExt = new Array(MEMO_RULES);
+  if (memoEnd.length !== MEMO_RULES) {
+    memoNode = new Array(MEMO_RULES);
+    memoEnd = new Array(MEMO_RULES);
+    memoExt = new Array(MEMO_RULES);
+    memoGen = new Array(MEMO_RULES);
+  }
+  memoGenCur++;
   adoptRoot = lastRoot;
   adoptRootTok = lastRootTok;
   adoptDmgStart = p;

From d4535ad5072d97010db9e15cfeb76c843281345b Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 04:37:17 +0800
Subject: [PATCH 07/15] Edit protocol: parseEdited(source, entry, edits) skips
 the char-diff scans
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An editor knows its edit ranges, so the damage envelope can come from the
caller ([{start, oldEnd, newEnd}], merged over multiple edits) instead of the
char-level prefix/suffix compare — which was the largest remaining O(file)
scan (two charCodeAt sweeps over a 9MB source per keystroke). The compare
stays as the no-protocol fallback.

9MB keystroke edits: ~50ms -> 7.8ms (34.6x vs a full parse), equivalence
verified. What remains per edit is memcpy-grade: the suffix span shift and
the token-column splice (the chunked-columns endgame), the window lex, and
the adoption walk. incremental ≡ fresh 0/120 (the gate exercises the
fallback path); 30/30 gates.
---
 src/emit-parser.ts | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 04f6b09..a8e0508 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2410,19 +2410,33 @@ export function parse(source, entryRule) {
 // until then. Lexing is FULL-FILE by design: the lexer carries cross-token state
 // (template nesting, regex context, markup modes), full lexing is a small share of a
 // parse, and the diff is what localizes the damage — not the lexer.
-export function parseEdited(source, entryRule) {
+export function parseEdited(source, entryRule, edits) {
   if (lastSrc === null) return parse(source, entryRule);
   const oSrc = lastSrc;
   lastSrc = null;
 ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
-  // Char-level envelope (cheapest possible without an edit protocol).
+  // Damage envelope: from the EDIT PROTOCOL when the caller provides it (an editor
+  // knows its edit ranges — [{start, oldEnd, newEnd}] in old/new coordinates), else
+  // derived by the char-level prefix/suffix compare (the cheapest possible fallback,
+  // but O(file) scans).
   const oldLen = oSrc.length, newLen = source.length;
-  const minL = oldLen < newLen ? oldLen : newLen;
-  let cs = 0;
-  while (cs < minL && oSrc.charCodeAt(cs) === source.charCodeAt(cs)) cs++;
-  let ce = 0;
-  while (ce < minL - cs && oSrc.charCodeAt(oldLen - 1 - ce) === source.charCodeAt(newLen - 1 - ce)) ce++;
-  const ceOld = oldLen - ce, ceNew = newLen - ce;
+  let cs, ceOld, ceNew;
+  if (edits !== undefined && edits.length > 0) {
+    cs = edits[0].start; ceOld = edits[0].oldEnd; ceNew = edits[0].newEnd;
+    for (let i = 1; i < edits.length; i++) {
+      const ed = edits[i];
+      if (ed.start < cs) cs = ed.start;
+      if (ed.oldEnd > ceOld) ceOld = ed.oldEnd;
+      if (ed.newEnd > ceNew) ceNew = ed.newEnd;
+    }
+  } else {
+    const minL = oldLen < newLen ? oldLen : newLen;
+    cs = 0;
+    while (cs < minL && oSrc.charCodeAt(cs) === source.charCodeAt(cs)) cs++;
+    let ce = 0;
+    while (ce < minL - cs && oSrc.charCodeAt(oldLen - 1 - ce) === source.charCodeAt(newLen - 1 - ce)) ce++;
+    ceOld = oldLen - ce; ceNew = newLen - ce;
+  }
   const charDelta = newLen - oldLen;
   // Restart anchor: the last token B ending at/before the damage whose recorded
   // depths are zero and whose shape carries no cross-token lexer flag (')' control-

From 597b0f36caba997eea49e3f8aea89c3779b06fa8 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 05:16:56 +0800
Subject: [PATCH 08/15] Run-adoption: rep loops bulk-adopt old sibling runs;
 session paren-stack cache

A 9MB flat-body keystroke spent 79% of its 61ms re-entering
parseRuleEntry/adoptSeek once per undamaged statement, and another 7ms
re-deriving the live paren stack by backward scan (the IIFE worst case).

- adoptSeek publishes the hit site (old parent row / kid index / base)
  when the adopted node is the parent's direct kid; parseRuleEntry arms
  a (pos, rid, generation)-signed run signal on such adoptions.
- '*'/'+' loops whose element is a parseRuleEntry-routed rule (pratt /
  left-rec / spine) consume the signal via runExtend: following old
  siblings are adopted in one tight loop under exactly the single-adopt
  eligibility (same-rule row, rowOK, contiguous, damage-clear, non-zero
  width). A member's existence proves the loop's FIRST guard true at its
  position; the signature triple keeps an inner rule's adoption from
  feeding elements into an outer loop. Members skip memo stores - a
  backtracking re-probe just re-adopts.
- reconstructParensCached rolls the previous anchor's stack FORWARD over
  the tokens between the anchors (tokens at/before the cached anchor are
  splice-stable); backward jumps fall back to the full scan. Invalidated
  by full lexes and the '>' splice.
- The spine-rule set moved to Emitter.spineSet(), shared by emitRuleFns
  and the quantifier hook.

9MB IIFE keystroke: 61ms -> 10.4ms (parse 50.5 -> 6.9, parens 7.2 -> 0.2).
Gates: 30/30, incremental 0/120, emit-parser-verify 0 mismatch,
emit-lexer-verify streams equal, batch bench unchanged (11.4x aggregate).
---
 src/emit-lexer.ts        |  24 +++
 src/emit-parser.ts       | 127 ++++++++++--
 src/token-dfa.ts         | 417 +++++++++++++++++++++++++++++++++++++++
 test/token-dfa-verify.ts |  74 +++++++
 4 files changed, 624 insertions(+), 18 deletions(-)
 create mode 100644 src/token-dfa.ts
 create mode 100644 test/token-dfa-verify.ts

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 738b529..cf4291d 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -199,6 +199,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`function tokenize(source) {`);
   emit(`  src = source;`);
   emit(`  tokN = 0;`);
+  emit(`  parenCachePos = -1;`);
   emit(`  lexCore(source, 0, -1, 0, -1, 0, 0);`);
   emit(`  return tokN;`);
   emit(`}`);
@@ -538,6 +539,29 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  }`);
   emit(`  return out;`);
   emit(`}`);
+  emit(`// Session cache for the live paren stack: the previous edit's anchor stack rolled`);
+  emit(`// FORWARD over the tokens between the two anchors (push on '(', pop on ')') — the`);
+  emit(`// backward scan is O(distance to the outermost live opener), which a deep`);
+  emit(`// stationary session would pay per keystroke. Tokens at/before the cached anchor`);
+  emit(`// are splice-stable (every splice begins past its own anchor), so the baseline`);
+  emit(`// stays exact; a backward jump (b < cached) falls back to the full scan.`);
+  emit(`let parenCachePos = -1;`);
+  emit(`let parenCacheStack = [];`);
+  emit(`function reconstructParensCached(b) {`);
+  emit(`  let stack;`);
+  emit(`  if (b < 0) stack = [];`);
+  emit(`  else if (parenCachePos >= 0 && parenCachePos <= b) {`);
+  emit(`    stack = parenCacheStack;`);
+  emit(`    for (let i = parenCachePos + 1; i <= b; i++) {`);
+  emit(`      if (tkK[i] === 1) {`);
+  emit(`        if (tkT[i] === ${tOf('(')}) stack.push((tkFl[i] & 8) !== 0);`);
+  emit(`        else if (tkT[i] === ${tRParen}) { if (stack.length > 0) stack.pop(); }`);
+  emit(`      }`);
+  emit(`    }`);
+  emit(`  } else stack = reconstructParens(b);`);
+  emit(`  parenCachePos = b; parenCacheStack = stack;`);
+  emit(`  return stack.slice();`);
+  emit(`}`);
   return out.join('\n');
 }
 
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index a8e0508..41d571c 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -689,6 +689,49 @@ class Emitter {
   // Reference to a rule's parse function (token refs are inlined where used).
   private ruleFn(name: string) { return `R_${sanitize(name)}`; }
 
+  // SPINE rules — the entry rule's repetition units (the rules its body references
+  // directly): memoized through parseRuleEntry and therefore the adoption/run-
+  // extension granularity. Shared by emitRuleFns (memoized emission) and the
+  // quantifier run-extension hook. Grammar-shape-derived — no language names.
+  private spine: Set<string> | null = null;
+  spineSet(): Set<string> {
+    if (this.spine !== null) return this.spine;
+    const a = this.a;
+    const spine = new Set<string>();
+    const entryRule = a.grammar.rules[a.grammar.rules.length - 1];
+    const walk = (x: RuleExpr): void => {
+      switch (x.type) {
+        case 'ref': if (a.ruleByName.has(x.name)) spine.add(x.name); return;
+        case 'seq': case 'alt': x.items.forEach(walk); return;
+        case 'quantifier': case 'group': walk(x.body); return;
+        case 'sep': walk(x.element); return;
+        default: return;
+      }
+    };
+    walk(entryRule.body);
+    spine.delete(entryRule.name);
+    return (this.spine = spine);
+  }
+  // The run-extension target of a repetition: when the body unwraps to a plain ref of
+  // a rule that routes through parseRuleEntry (pratt / left-rec / spine), its rule id;
+  // else -1 (the loop gets no extension hook — adoption stays element-by-element).
+  private quantRunRuleId(body: RuleExpr): number {
+    const a = this.a;
+    let expr = body;
+    while (true) {
+      if (expr.type === 'group' && !(expr.suppress && expr.suppress.length)) { expr = expr.body; continue; }
+      if (expr.type === 'seq') {
+        const real = expr.items.filter(it => it.type !== 'op' && it.type !== 'prefix' && it.type !== 'postfix');
+        if (real.length === 1) { expr = real[0]; continue; }
+      }
+      break;
+    }
+    if (expr.type !== 'ref' || !a.ruleByName.has(expr.name)) return -1;
+    const name = expr.name;
+    if (!(a.prattRules.has(name) || a.leftRecSet.has(name) || this.spineSet().has(name))) return -1;
+    return a.grammar.rules.findIndex(r => r.name === name);
+  }
+
   /**
    * Emit (once) a helper fn for a compound `expr` and return its name. The helper
    * has the matchExpr contract: returns the matched children array or null, with pos
@@ -853,13 +896,20 @@ class Emitter {
       // Try once; on failure the helper restored pos/scn itself.
       return `${fn}();`;
     }
+    // Run-extension: after an iteration whose element was ADOPTED from the old tree,
+    // bulk-adopt its following old siblings (runExtend) instead of re-entering the
+    // rule machinery once per element. Only loops over a parseRuleEntry-routed rule
+    // get the hook, and runExtend re-checks rid + generation, so an inner rule's
+    // adoption can never feed elements into an outer loop.
+    const runId = this.quantRunRuleId(body);
+    const ext = runId >= 0 ? `\n  if (adoptRunPos === pos) runExtend(${runId});` : '';
     if (kind === '*') {
       const before = this.id(), bsn = this.id();
       return [
         `while (true) {`,
         `  const ${before} = pos; const ${bsn} = scn;`,
         `  if (!${fn}()) break;`,
-        `  if (pos === ${before} && scn === ${bsn}) break;`,
+        `  if (pos === ${before} && scn === ${bsn}) break;` + ext,
         `}`,
       ].join('\n');
     }
@@ -870,7 +920,7 @@ class Emitter {
       `while (true) {`,
       `  const ${before} = pos; const ${bsn} = scn;`,
       `  if (!${fn}()) break;`,
-      `  if (pos === ${before} && scn === ${bsn}) break;`,
+      `  if (pos === ${before} && scn === ${bsn}) break;` + ext,
       `}`,
     ].join('\n');
   }
@@ -1563,6 +1613,7 @@ function matchPuLitGT(pu) {
     const end0 = tkEnd[pos];
     ${e.soa ? '' : 'const restText = tkText[pos].slice(1);'}
     if (tokN === tkCap) growTok();
+    parenCachePos = -1;
     tkK.copyWithin(pos + 1, pos, tokN);
     tkT.copyWithin(pos + 1, pos, tokN);
     tkOff.copyWithin(pos + 1, pos, tokN);
@@ -1654,21 +1705,7 @@ function emitRuleFns(e: Emitter, a: ReturnType<typeof analyze>) {
   // memoized through parseRuleEntry like pratt/left-rec rules. Without this only
   // expression/type subtrees reuse and every statement re-walks on each edit.
   // Derived from the grammar shape — no language names.
-  const spine = new Set<string>();
-  {
-    const entryRule = a.grammar.rules[a.grammar.rules.length - 1];
-    const walk = (x: RuleExpr): void => {
-      switch (x.type) {
-        case 'ref': if (a.ruleByName.has(x.name)) spine.add(x.name); return;
-        case 'seq': case 'alt': x.items.forEach(walk); return;
-        case 'quantifier': case 'group': walk(x.body); return;
-        case 'sep': walk(x.element); return;
-        default: return;
-      }
-    };
-    walk(entryRule.body);
-    spine.delete(entryRule.name);
-  }
+  const spine = e.spineSet();
   for (const rule of a.grammar.rules) {
     if (a.prattRules.has(rule.name)) emitPrattRule(e, a, rule);
     else if (a.leftRecSet.has(rule.name)) emitLeftRecRule(e, a, rule);
@@ -2056,6 +2093,11 @@ function parseRuleEntry(idx, rid, name, core) {
         if (ext > maxPos) maxPos = ext;
         absTok[aid] = start;
         absChar[aid] = tkOff[start];
+        if (adoptHitP >= 0) {
+          adoptRunPos = pos; adoptRunRid = rid; adoptRunGen = memoGenCur;
+          adoptRunP = adoptHitP; adoptRunKid = adoptHitKid + 1;
+          adoptRunOq = q + rowTokLen[aid]; adoptRunBase = adoptHitBase;
+        }
         if (me === undefined || me.length < tokN + 1) {
           me = new Array(tokN + 1);
           mn = new Array(tokN + 1);
@@ -2298,6 +2340,12 @@ let adoptDelta = 0;          // new-minus-old token delta past the damage
 // cached descent path (top-down): ids + their absolute old token bases
 let adoptPath = [];
 let adoptBase = [];
+// run-extension state: where the last single adoption sat in the old tree (its
+// parent row / kid index / parent token base), published by adoptSeek, plus the
+// (pos, rid, generation) signature a repetition must present to consume it.
+let adoptHitP = -1, adoptHitKid = 0, adoptHitBase = 0;
+let adoptRunPos = -1, adoptRunRid = -1, adoptRunGen = -1;
+let adoptRunP = -1, adoptRunKid = 0, adoptRunOq = 0, adoptRunBase = 0;
 function adoptSeek(q, rid) {
   // reuse the cached path while it still CONTAINS q (strictly inside, not at start)
   let depth = 0;
@@ -2339,6 +2387,7 @@ function adoptSeek(q, rid) {
     if (cb > q) return -1;                                 // a gap — nothing starts at q
     if (cb === q) {
       // the exploratory chain: every node from here down whose start is exactly q
+      adoptHitP = id; adoptHitKid = cs + lo; adoptHitBase = base;
       let xid = e, xb = cb;
       for (;;) {
         if (rowOK[xid] !== 0 && rowRule[xid] === rid
@@ -2349,6 +2398,7 @@ function adoptSeek(q, rid) {
         if (rowCount[xid] === 0) return -1;
         const fe = kids[xcs];
         if (fe < 0 || kidTokRel[xcs] !== 0) return -1;
+        adoptHitP = -1;
         xid = fe; xb = xb;
       }
     }
@@ -2357,6 +2407,45 @@ function adoptSeek(q, rid) {
     adoptPath.push(id); adoptBase.push(base);
   }
 }
+// Run-extension: a repetition whose element was just ADOPTED bulk-adopts the
+// following OLD SIBLINGS in one tight loop — whole-statement reuse without
+// re-entering parseRuleEntry/adoptSeek once per element. Soundness: each member
+// re-passes exactly the single-adoption eligibility (same-rule row, memoized
+// [rowOK], contiguous, lookahead clear of the damage), a member's existence
+// proves the loop's FIRST-set guard true at its position (its first token starts
+// the rule), and the loop's own continuation checks run again after the run
+// breaks. Members get no memo entries — a backtracking re-probe just re-adopts.
+function runExtend(rid) {
+  if (rid !== adoptRunRid || memoGenCur !== adoptRunGen) { adoptRunPos = -1; return; }
+  adoptRunPos = -1;
+  const P = adoptRunP;
+  const csEnd = rowStart[P] + rowCount[P];
+  const pb = adoptRunBase;
+  let i = adoptRunKid;
+  let oq = adoptRunOq;
+  let nq = pos;
+  const sfx = oq >= adoptDmgOldEnd;   // past the damage: monotone, no per-member ext check
+  let mp = maxPos;
+  while (i < csEnd) {
+    const e = kids[i];
+    if (e < 0) break;
+    if (pb + kidTokRel[i] !== oq) break;
+    if (rowRule[e] !== rid || rowOK[e] === 0) break;
+    const tl = rowTokLen[e];
+    if (tl === 0) break;
+    const ex = rowExt[e];
+    if (!sfx && oq + ex + 2 > adoptDmgStart) break;
+    absTok[e] = nq; absChar[e] = tkOff[nq];
+    scPush(e);
+    const w = nq + ex;
+    if (w > mp) mp = w;
+    nq += tl; oq += tl;
+    i++;
+  }
+  if (mp > maxPos) maxPos = mp;
+  pos = nq;
+}
+
 // The spare token-column buffer set (parseEdited ping-pongs between the live set and
 // this one, so steady-state edits never allocate columns).
 let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
@@ -2378,6 +2467,7 @@ ${e.soa ? '' : 'let altText = [];'}
 export function parse(source, entryRule) {
   lastSrc = null;
   adoptRoot = -1;
+  adoptRunPos = -1;
   lexInto(source);
   if (memoEnd.length !== MEMO_RULES) {
     memoNode = new Array(MEMO_RULES);
@@ -2442,7 +2532,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // depths are zero and whose shape carries no cross-token lexer flag (')' control-
   // head, postfix-ambiguous op). B = -1 restarts at the file head — always sound.
   const B = findRestart(cs);
-  const initParens = B >= 0 ? reconstructParens(B) : [];
+  const initParens = reconstructParensCached(B);
   const oN = tokN;
   // first old token at/after the damage end — the resync search floor
   let r0 = oN;
@@ -2537,6 +2627,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   adoptDelta = tokenDelta;
   adoptPath.length = 0;
   adoptBase.length = 0;
+  adoptRunPos = -1;
   const root = runParse(entryRule);
   adoptRoot = -1;
   lastRoot = root;
diff --git a/src/token-dfa.ts b/src/token-dfa.ts
new file mode 100644
index 0000000..12b83ca
--- /dev/null
+++ b/src/token-dfa.ts
@@ -0,0 +1,417 @@
+// ─────────────────────────────────────────────────────────────────────────────
+//  token-dfa.ts — derive a char-code DFA matcher from a token's structured pattern IR
+//  (src/token-pattern.ts), as the forward path to a scanner that dispatches on char
+//  codes instead of executing a regex per token (issue #5).
+//
+//  The lexer matches one token at a time, anchored at `pos`, taking that token's
+//  greedy/longest match (sticky `re.lastIndex = pos; re.exec(s)`). This compiles the
+//  REGULAR subset of the IR — literal · charClass · anyChar · seq · alt · greedy
+//  repeat · never, plus a single TRAILING lookahead over a char class (the `(?!…)`
+//  guard the numeric tokens end with) — to an NFA (Thompson), then a DFA (subset
+//  construction), and runs it over `charCodeAt` code units. `match(s, pos)` returns
+//  the same match length the token's sticky regex would, or -1.
+//
+//  Anything outside that subset (mid-pattern look-around, lookbehind, anchors, a
+//  non-greedy quantifier) → `compileTokenDfa` returns null and the caller keeps using
+//  the regex. So the scanner is byte-identical by construction: a DFA where the IR is
+//  regular, the proven regex elsewhere. Char classes are matched over UTF-16 code
+//  units (0..0xFFFF) exactly like the non-`/u` regexes the lexer emits today.
+// ─────────────────────────────────────────────────────────────────────────────
+
+import type { TokenPattern, TokenCharClassItem } from './types.ts';
+
+// UTF-16 code-unit alphabet. Negated classes complement within [0, MAX_CODE].
+const MAX_CODE = 0xffff;
+
+// A half-open is avoided: ranges are inclusive [lo, hi] of code units.
+export interface Range { lo: number; hi: number }
+
+// ── Char-class → sorted, merged, inclusive ranges ──
+function classRanges(items: TokenCharClassItem[], negate: boolean): Range[] {
+  const raw: Range[] = [];
+  for (const item of items) {
+    if (item.type === 'char') {
+      const c = item.value.charCodeAt(0);
+      raw.push({ lo: c, hi: c });
+    } else {
+      const a = item.from.charCodeAt(0), b = item.to.charCodeAt(0);
+      raw.push({ lo: Math.min(a, b), hi: Math.max(a, b) });
+    }
+  }
+  const merged = mergeRanges(raw);
+  return negate ? complementRanges(merged) : merged;
+}
+
+function mergeRanges(ranges: Range[]): Range[] {
+  if (ranges.length === 0) return [];
+  const sorted = [...ranges].sort((a, b) => a.lo - b.lo || a.hi - b.hi);
+  const out: Range[] = [{ ...sorted[0] }];
+  for (let i = 1; i < sorted.length; i++) {
+    const last = out[out.length - 1], r = sorted[i];
+    if (r.lo <= last.hi + 1) last.hi = Math.max(last.hi, r.hi);
+    else out.push({ ...r });
+  }
+  return out;
+}
+
+function complementRanges(ranges: Range[]): Range[] {
+  // ranges are sorted+merged; complement within [0, MAX_CODE].
+  const out: Range[] = [];
+  let next = 0;
+  for (const r of ranges) {
+    if (r.lo > next) out.push({ lo: next, hi: r.lo - 1 });
+    next = r.hi + 1;
+  }
+  if (next <= MAX_CODE) out.push({ lo: next, hi: MAX_CODE });
+  return out;
+}
+
+// ── NFA (Thompson) ──
+// A transition is either an epsilon move or a move on any code unit inside `ranges`.
+interface NfaState { eps: number[]; trans: { ranges: Range[]; to: number }[] }
+
+class UnsupportedPattern extends Error {}
+
+class Nfa {
+  states: NfaState[] = [];
+  newState(): number { this.states.push({ eps: [], trans: [] }); return this.states.length - 1; }
+  eps(a: number, b: number): void { this.states[a].eps.push(b); }
+  move(a: number, ranges: Range[], b: number): void { this.states[a].trans.push({ ranges, to: b }); }
+}
+
+// Build an NFA fragment for `pattern`; returns [start, accept]. Throws UnsupportedPattern
+// for any non-regular construct so the caller can fall back to the regex.
+function build(nfa: Nfa, pattern: TokenPattern): [number, number] {
+  if (typeof pattern === 'string') return buildLiteral(nfa, pattern);
+  switch (pattern.type) {
+    case 'anyChar': {
+      const s = nfa.newState(), a = nfa.newState();
+      nfa.move(s, [{ lo: 0, hi: MAX_CODE }], a);
+      return [s, a];
+    }
+    case 'charClass': {
+      const ranges = classRanges(pattern.items, pattern.negate);
+      const s = nfa.newState(), a = nfa.newState();
+      if (ranges.length) nfa.move(s, ranges, a);   // empty class → no edge → never matches
+      return [s, a];
+    }
+    case 'seq': {
+      if (pattern.items.length === 0) { const s = nfa.newState(); return [s, s]; }
+      let [start, acc] = build(nfa, pattern.items[0]);
+      for (let i = 1; i < pattern.items.length; i++) {
+        const [s2, a2] = build(nfa, pattern.items[i]);
+        nfa.eps(acc, s2);
+        acc = a2;
+      }
+      return [start, acc];
+    }
+    case 'alt': {
+      const s = nfa.newState(), a = nfa.newState();
+      for (const item of pattern.items) {
+        const [s2, a2] = build(nfa, item);
+        nfa.eps(s, s2);
+        nfa.eps(a2, a);
+      }
+      return [s, a];
+    }
+    case 'repeat': {
+      if (!pattern.greedy) throw new UnsupportedPattern('non-greedy repeat');
+      // min mandatory copies, then either an unbounded star or (max-min) optional copies.
+      const s = nfa.newState();
+      let acc = s;
+      for (let i = 0; i < pattern.min; i++) {
+        const [s2, a2] = build(nfa, pattern.body);
+        nfa.eps(acc, s2);
+        acc = a2;
+      }
+      if (pattern.max === undefined) {
+        // star: acc --eps--> bodyStart, bodyAccept --eps--> acc (loop) and onward.
+        const [s2, a2] = build(nfa, pattern.body);
+        const a = nfa.newState();
+        nfa.eps(acc, s2);
+        nfa.eps(a2, s2);   // loop
+        nfa.eps(acc, a);   // skip (zero more)
+        nfa.eps(a2, a);    // exit after >=1
+        return [s, a];
+      } else {
+        const a = nfa.newState();
+        let cur = acc;
+        for (let i = pattern.min; i < pattern.max; i++) {
+          const [s2, a2] = build(nfa, pattern.body);
+          nfa.eps(cur, s2);
+          nfa.eps(cur, a);   // optional: skip the rest
+          cur = a2;
+        }
+        nfa.eps(cur, a);
+        return [s, a];
+      }
+    }
+    case 'never': {
+      const s = nfa.newState(), a = nfa.newState();   // no edge s→a → never accepts
+      return [s, a];
+    }
+    // Non-regular: the caller must fall back to the regex.
+    case 'lookahead':
+    case 'lookbehind':
+    case 'anchor':
+      throw new UnsupportedPattern(pattern.type);
+  }
+}
+
+function buildLiteral(nfa: Nfa, literal: string): [number, number] {
+  const start = nfa.newState();
+  let cur = start;
+  for (let i = 0; i < literal.length; i++) {
+    const c = literal.charCodeAt(i);
+    const next = nfa.newState();
+    nfa.move(cur, [{ lo: c, hi: c }], next);
+    cur = next;
+  }
+  return [start, cur];
+}
+
+// ── Subset construction → DFA ──
+interface DfaState { accept: boolean; edges: { ranges: Range[]; to: number }[] }
+
+function epsilonClosure(nfa: Nfa, set: Set<number>): Set<number> {
+  const stack = [...set], out = new Set(set);
+  while (stack.length) {
+    const s = stack.pop()!;
+    for (const t of nfa.states[s].eps) if (!out.has(t)) { out.add(t); stack.push(t); }
+  }
+  return out;
+}
+
+function setKey(set: Set<number>): string {
+  return [...set].sort((a, b) => a - b).join(',');
+}
+
+// Partition boundaries: every code unit where some transition's membership flips. We
+// build a sorted list of "cut points" so the alphabet splits into intervals on which
+// every NFA transition is constant — the classic DFA alphabet partition.
+function buildDfa(nfa: Nfa, start: number, accept: number): DfaState[] {
+  const startSet = epsilonClosure(nfa, new Set([start]));
+  const dfa: DfaState[] = [];
+  const index = new Map<string, number>();
+  const queue: Set<number>[] = [];
+
+  const intern = (set: Set<number>): number => {
+    const key = setKey(set);
+    let id = index.get(key);
+    if (id === undefined) {
+      id = dfa.length;
+      index.set(key, id);
+      dfa.push({ accept: set.has(accept), edges: [] });
+      queue.push(set);
+    }
+    return id;
+  };
+
+  intern(startSet);
+  while (queue.length) {
+    const set = queue.shift()!;
+    const id = index.get(setKey(set))!;
+    // Collect this state's outgoing transitions, then split into disjoint intervals.
+    const trans: { ranges: Range[]; to: number }[] = [];
+    for (const ns of set) for (const tr of nfa.states[ns].trans) trans.push(tr);
+    if (trans.length === 0) continue;
+    // Cut points: for every range [lo,hi] add boundaries at lo and hi+1.
+    const cuts = new Set<number>();
+    for (const tr of trans) for (const r of tr.ranges) { cuts.add(r.lo); cuts.add(r.hi + 1); }
+    const points = [...cuts].sort((a, b) => a - b);
+    // For each elementary interval [points[i], points[i+1]-1], gather NFA targets.
+    const edges: { ranges: Range[]; to: number }[] = [];
+    for (let i = 0; i < points.length - 1; i++) {
+      const lo = points[i], hi = points[i + 1] - 1;
+      if (hi < lo) continue;
+      const targets = new Set<number>();
+      for (const tr of trans) {
+        for (const r of tr.ranges) if (r.lo <= lo && hi <= r.hi) { targets.add(tr.to); break; }
+      }
+      if (targets.size === 0) continue;
+      const toId = intern(epsilonClosure(nfa, targets));
+      edges.push({ ranges: [{ lo, hi }], to: toId });
+    }
+    // Merge adjacent intervals that go to the same DFA state (compacts the table).
+    edges.sort((a, b) => a.ranges[0].lo - b.ranges[0].lo);
+    const merged: { ranges: Range[]; to: number }[] = [];
+    for (const e of edges) {
+      const last = merged[merged.length - 1];
+      if (last && last.to === e.to && last.ranges[last.ranges.length - 1].hi + 1 === e.ranges[0].lo) {
+        last.ranges[last.ranges.length - 1].hi = e.ranges[0].hi;
+      } else merged.push({ ranges: [{ ...e.ranges[0] }], to: e.to });
+    }
+    dfa[id].edges = merged;
+  }
+  return dfa;
+}
+
+function dfaNext(state: DfaState, code: number): number {
+  for (const e of state.edges) {
+    for (const r of e.ranges) {
+      if (code < r.lo) break;       // ranges are sorted ascending
+      if (code <= r.hi) return e.to;
+    }
+  }
+  return -1;
+}
+
+// Run the DFA from `pos`, recording every accepting length. Returns the lengths in
+// DESCENDING order (longest first) — what a greedy regex would prefer, and what the
+// trailing-lookahead retry needs.
+function runAcceptLengths(dfa: DfaState[], s: string, pos: number): number[] {
+  const accepts: number[] = [];
+  let state = 0, i = pos;
+  if (dfa[0].accept) accepts.push(0);
+  while (state >= 0 && i < s.length) {
+    const next = dfaNext(dfa[state], s.charCodeAt(i));
+    if (next < 0) break;
+    state = next;
+    i++;
+    if (dfa[state].accept) accepts.push(i - pos);
+  }
+  return accepts.reverse();
+}
+
+// ── Public compile ──
+export interface TokenDfa {
+  /** Match length at `pos`, or -1 — byte-identical to the token's sticky regex exec. */
+  match(s: string, pos: number): number;
+}
+
+// The compiled DFA + any trailing char-class assertion, exposed so a code emitter can
+// turn it into specialized straight-line JS (a generic interpreter over this structure
+// is SLOWER than V8's regex — the win is in emitting tight char-code branches).
+export type { DfaState };
+export interface CompiledTokenDfa { states: DfaState[]; trailing: { ranges: Range[]; negate: boolean } | null }
+
+export function buildTokenDfaRaw(pattern: TokenPattern): CompiledTokenDfa | null {
+  try {
+    const look = trailingLookahead(pattern);
+    const nfa = new Nfa();
+    const [start, accept] = build(nfa, look ? look.body : pattern);
+    const states = buildDfa(nfa, start, accept);
+    return { states, trailing: look ? { ranges: look.ranges, negate: look.negate } : null };
+  } catch (e) {
+    if (e instanceof UnsupportedPattern) return null;
+    throw e;
+  }
+}
+
+// ── DFA → specialized straight-line JS ──
+// A GENERIC interpreter over the DFA is slower than V8's JIT-compiled regex; the win is
+// in emitting tight char-code branches (measured ~1.3–1.6× over the sticky regex on the
+// common tokens). Above this many DFA states the emitted switch stops paying off (a large
+// escape-heavy token like a string literal lands ~even with the regex), so we decline and
+// the caller keeps the regex — correctness is identical either way.
+const MAX_SCANNER_STATES = 64;
+
+function rangesCond(ranges: Range[], v: string): string {
+  return ranges.map(r => r.lo === r.hi ? `${v}===${r.lo}` : `${v}>=${r.lo}&&${v}<=${r.hi}`).join('||');
+}
+
+/**
+ * Emit a token scanner as a JS function BODY with parameters `(s, pos, re)`: returns the
+ * match length at `pos` (byte-identical to the token's sticky regex), or -1. `re` is the
+ * token's own regex, used only on the rare trailing-lookahead retry. Returns null when the
+ * pattern is outside the supported subset or its DFA is too large (caller keeps the regex).
+ */
+export function emitTokenScannerBody(pattern: TokenPattern): string | null {
+  const compiled = buildTokenDfaRaw(pattern);
+  if (!compiled) return null;
+  const { states, trailing } = compiled;
+  if (states.length > MAX_SCANNER_STATES) return null;
+  const accept = states.map(s => s.accept);
+  const L: string[] = [];
+  L.push(`const n=s.length;let i=pos,st=0,acc=${accept[0] ? 0 : -1};`);
+  L.push(`for(;;){if(i>=n)break;const c=s.charCodeAt(i);switch(st){`);
+  states.forEach((state, si) => {
+    if (state.edges.length === 0) { L.push(`case ${si}:break;`); return; }
+    let body = `case ${si}:{`;
+    for (const e of state.edges) {
+      const cond = rangesCond(e.ranges, 'c');
+      body += `if(${e.ranges.length > 1 ? `(${cond})` : cond}){st=${e.to};i++;${accept[e.to] ? 'acc=i-pos;' : ''}continue;}`;
+    }
+    L.push(body + 'break;}');
+  });
+  L.push('}break;}');
+  if (trailing) {
+    // longest accept = acc; a trailing `(?!class)`/`(?=class)` may force a shorter match —
+    // rare (well-formed input ends the token at a boundary), so defer that to the regex.
+    L.push('if(acc<0)return -1;const at=pos+acc;const cc=at<n?s.charCodeAt(at):-1;');
+    L.push(`const present=at<n&&(${rangesCond(trailing.ranges, 'cc')});`);
+    L.push(`if(${trailing.negate ? '!present' : 'present'})return acc;`);
+    L.push('re.lastIndex=pos;const m=re.exec(s);return m?m[0].length:-1;');
+  } else {
+    L.push('return acc;');
+  }
+  return L.join('');
+}
+
+/** Runtime-compile a token scanner (for the interpreted lexer). Null = keep the regex. */
+export function compileTokenScanner(pattern: TokenPattern, regex: RegExp): ((s: string, pos: number) => number) | null {
+  const body = emitTokenScannerBody(pattern);
+  if (body === null) return null;
+  const fn = new Function('s', 'pos', 're', body) as (s: string, pos: number, re: RegExp) => number;
+  return (s, pos) => fn(s, pos, regex);
+}
+
+// A trailing `(?!class)` / `(?=class)` over a single char class is the only look-around
+// the numeric tokens use; supported by retrying shorter body matches until the assertion
+// at the body's end holds. Detected structurally on the IR.
+function trailingLookahead(pattern: TokenPattern): { body: TokenPattern; ranges: Range[]; negate: boolean } | null {
+  if (typeof pattern === 'string' || pattern.type !== 'seq') return null;
+  const last = pattern.items[pattern.items.length - 1];
+  if (typeof last === 'string' || last.type !== 'lookahead') return null;
+  const inner = last.body;
+  if (typeof inner === 'string' || inner.type !== 'charClass') return null;   // only a char-class assertion
+  const body: TokenPattern = pattern.items.length === 2
+    ? pattern.items[0]
+    : { type: 'seq', items: pattern.items.slice(0, -1) };
+  return { body, ranges: classRanges(inner.items, inner.negate), negate: last.negate };
+}
+
+function inRanges(ranges: Range[], code: number): boolean {
+  for (const r of ranges) if (code >= r.lo && code <= r.hi) return true;
+  return false;
+}
+
+/**
+ * Compile a token's pattern to a char-code DFA matcher, or return null if the pattern
+ * uses a construct outside the supported regular subset (caller falls back to regex).
+ */
+export function compileTokenDfa(pattern: TokenPattern): TokenDfa | null {
+  try {
+    const look = trailingLookahead(pattern);
+    if (look) {
+      const nfa = new Nfa();
+      const [start, accept] = build(nfa, look.body);
+      const dfa = buildDfa(nfa, start, accept);
+      const { ranges, negate } = look;
+      return {
+        match(s, pos) {
+          const lens = runAcceptLengths(dfa, s, pos);   // longest first
+          for (const len of lens) {
+            const at = pos + len;
+            const has = at < s.length && inRanges(ranges, s.charCodeAt(at));
+            // negative lookahead succeeds when the char is absent (incl. EOF); positive needs it present.
+            if (negate ? !has : has) return len;
+          }
+          return -1;
+        },
+      };
+    }
+    const nfa = new Nfa();
+    const [start, accept] = build(nfa, pattern);
+    const dfa = buildDfa(nfa, start, accept);
+    return {
+      match(s, pos) {
+        const lens = runAcceptLengths(dfa, s, pos);
+        return lens.length ? lens[0] : -1;
+      },
+    };
+  } catch (e) {
+    if (e instanceof UnsupportedPattern) return null;
+    throw e;
+  }
+}
diff --git a/test/token-dfa-verify.ts b/test/token-dfa-verify.ts
new file mode 100644
index 0000000..a86f6c8
--- /dev/null
+++ b/test/token-dfa-verify.ts
@@ -0,0 +1,74 @@
+// Correctness + speed gate for token-dfa.ts: for every TS token whose pattern compiles
+// to a DFA, the DFA's match length must equal the token's sticky-regex match length at
+// EVERY position of the corpus (byte-identical), and we measure the per-token speedup.
+//
+//   node test/token-dfa-verify.ts
+import { compileTokenDfa } from '../src/token-dfa.ts';
+import { tokenPatternSource } from '../src/token-pattern.ts';
+import { readFileSync, readdirSync } from 'fs';
+import { join } from 'path';
+
+const grammar = (await import('../typescript.ts')).default;
+
+const base = '/tmp/ts-repo/tests/cases/conformance';
+function walk(d: string): string[] {
+  const o: string[] = [];
+  for (const e of readdirSync(d, { withFileTypes: true })) {
+    const f = join(d, e.name);
+    if (e.isDirectory()) o.push(...walk(f));
+    else if (e.name.endsWith('.ts') && !e.name.endsWith('.d.ts')) o.push(f);
+  }
+  return o;
+}
+const files = walk(base).sort().filter((_, i) => i % 11 === 0);   // ~stride sample
+const sources = files.map(f => { try { return readFileSync(f, 'utf-8'); } catch { return ''; } }).filter(Boolean);
+const totalChars = sources.reduce((a, s) => a + s.length, 0);
+
+// Tokens the per-position lexer loop actually runs through a regex (skip template).
+const tokens = grammar.tokens.filter(t => !t.template);
+
+console.log(`tokens: ${tokens.length} · corpus sample: ${sources.length} files, ${(totalChars / 1024).toFixed(0)} KB\n`);
+console.log('token            DFA?    positions   mism   regex ms   dfa ms   speedup');
+console.log('-'.repeat(78));
+
+let totalMism = 0, compiled = 0, fellBack = 0;
+for (const t of tokens) {
+  let src: string;
+  try { src = tokenPatternSource(t); } catch { src = ''; }
+  const dfa = compileTokenDfa(t.pattern);
+  if (!dfa) {
+    fellBack++;
+    console.log(`${t.name.padEnd(16)} regex   ${'—'.padStart(10)}   ${'—'.padStart(4)}   (unsupported → falls back to regex)`);
+    continue;
+  }
+  compiled++;
+  const re = new RegExp(`(?:${src})`, 'y');
+
+  // Correctness: at every position, DFA length === regex length.
+  let mism = 0, positions = 0;
+  for (const s of sources) {
+    for (let pos = 0; pos < s.length; pos++) {
+      re.lastIndex = pos;
+      const m = re.exec(s);
+      const reLen = m ? m[0].length : -1;
+      const dfaLen = dfa.match(s, pos);
+      positions++;
+      if (reLen !== dfaLen) {
+        if (mism < 3) console.log(`    MISMATCH @${pos} re=${reLen} dfa=${dfaLen} ctx=${JSON.stringify(s.slice(pos, pos + 24))}`);
+        mism++;
+      }
+    }
+  }
+  totalMism += mism;
+
+  // Speed: scan each source once via regex vs DFA (best-of-5).
+  const timeRe = () => { let acc = 0; for (const s of sources) for (let p = 0; p < s.length; p++) { re.lastIndex = p; const m = re.exec(s); acc += m ? m[0].length : 0; } return acc; };
+  const timeDfa = () => { let acc = 0; for (const s of sources) for (let p = 0; p < s.length; p++) { const l = dfa.match(s, p); acc += l > 0 ? l : 0; } return acc; };
+  const best = (fn: () => number) => { for (let w = 0; w < 2; w++) fn(); let b = Infinity; for (let r = 0; r < 5; r++) { const t0 = process.hrtime.bigint(); fn(); const dt = Number(process.hrtime.bigint() - t0) / 1e6; if (dt < b) b = dt; } return b; };
+  const reMs = best(timeRe), dfaMs = best(timeDfa);
+  console.log(`${t.name.padEnd(16)} dfa     ${String(positions).padStart(10)}   ${String(mism).padStart(4)}   ${reMs.toFixed(1).padStart(8)}   ${dfaMs.toFixed(1).padStart(6)}   ${(reMs / dfaMs).toFixed(2)}×`);
+}
+
+console.log('-'.repeat(78));
+console.log(`compiled to DFA: ${compiled} · fell back to regex: ${fellBack} · TOTAL mismatches: ${totalMism}`);
+process.exit(totalMism === 0 ? 0 : 1);

From 0215b24a4bdd443f1478e05f0986c827e6fcb189 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 06:09:36 +0800
Subject: [PATCH 09/15] Node surgery + EOF-relative spans: a 9MB keystroke
 re-parse in ~0.05ms

After run-adoption, three O(n)-per-edit costs remained on a 9MB flat
body: the damage-path list parent re-collected all 180k kids through
scratch (and the arena grew by that much per edit), the suffix token
spans took a char-delta add-loop, and the spliced parent's suffix kids
took a rel add-loop.

- Node SURGERY patches the damage path in place. Descend the old tree
  along single-affected-row kids; at the deepest PURE container
  (SURG_ELEM: a seq of literals/refs around exactly one '*'/'+' rep of a
  parseRuleEntry-routed rule - no alt/sep/opt/not at the container's own
  level, so every probe is owned by a kid row), re-parse only the
  affected elements with the real rule fn (adoption reuses their
  undamaged subtrees), require exact rejoin at an old kid start, then
  splice the kid range and patch lengths up the path. Every check runs
  before any row is mutated; any failure falls back to the full adoption
  re-parse. Prefix kids are kept under the adoption watermark rule, made
  transitive by rowKC (lazy kid-containment bit). Pure insertions at a
  kid boundary must touch the rep zone (a neighbour element), or the
  splice would stitch the element into a CLOSED node. Char lengths are
  re-DERIVED from the token columns, not patched by the char delta: a
  pure-trivia edit can sit token-inside but char-outside a node (the gap
  belongs to no node).
- EOF-relative token spans: tkOff/tkEnd at/after the damage store
  value - (srcLen + 1); decode adds the current length back, so updating
  srcLenP1 IS the suffix shift. Values self-describe by sign; negFrom
  bounds the flip band (cursor-locality sized). The '>' splice writes
  its pair sign-consistently with the zone it lands in.
- END-relative kid rels: a row kid's kidRel/kidTokRel may be stored
  relative to the parent's END (strictly negative, decoded with the
  parent's current lengths), so a surgical splice shifts the whole kid
  suffix by updating the parent's lengths. Stable across edits while the
  parent row is untouched; rowNF bounds the per-row band. Leaf kids stay
  start-relative (packed) - a pure container's trailing leaves get an
  O(1) backward walk.
- incremental-verify now alternates the edits-protocol and char-diff
  envelopes, and its seeded sessions caught three real holes during
  development (trivia-boundary length leak, closed-node stitching,
  Int32 overflow in the relocated-range boundary remap).

9MB keystroke: 10.4ms -> median 0.04ms / p90 0.07ms (~750x vs fresh,
steady state; the first edit of a session pays the one-time flip +
buffer allocation). 8MB nested real-code shape: median 0.13ms. 81KB:
median 0.10ms. Batch is sign-clean: emitted aggregate 11.4-11.6x
(unchanged band), 30/30 gates, emit-parser-verify 0 mismatches,
emit-lexer-verify byte-identical streams.
---
 src/emit-lexer.ts          |  10 +-
 src/emit-parser.ts         | 457 +++++++++++++++++++++++++++++++++----
 test/incremental-verify.ts |  30 ++-
 3 files changed, 442 insertions(+), 55 deletions(-)

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index cf4291d..07745ea 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -200,6 +200,8 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  src = source;`);
   emit(`  tokN = 0;`);
   emit(`  parenCachePos = -1;`);
+  emit(`  srcLenP1 = source.length + 1;`);
+  emit(`  negFrom = 0x7fffffff;`);
   emit(`  lexCore(source, 0, -1, 0, -1, 0, 0);`);
   emit(`  return tokN;`);
   emit(`}`);
@@ -245,9 +247,9 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`      }`);
   emit(`      if (off >= wndMinOff && dmgPd >= 0`);
   emit(`          && templateStack.length <= dmgDp && parenHeadStack.length <= dmgPd) {`);
-  emit(`        while (wndPtr < altN && altOff[wndPtr] + wndDelta < off) wndPtr++;`);
-  emit(`        if (wndPtr < altN && altOff[wndPtr] + wndDelta === off && altK[wndPtr] === k && altT[wndPtr] === t`);
-  emit(`            && altEnd[wndPtr] + wndDelta === end && altDp[wndPtr] === templateStack.length && altPd[wndPtr] === parenHeadStack.length) {`);
+  emit(`        while (wndPtr < altN && (altOff[wndPtr] < 0 ? altOff[wndPtr] + srcLenP1 : altOff[wndPtr]) + wndDelta < off) wndPtr++;`);
+  emit(`        if (wndPtr < altN && (altOff[wndPtr] < 0 ? altOff[wndPtr] + srcLenP1 : altOff[wndPtr]) + wndDelta === off && altK[wndPtr] === k && altT[wndPtr] === t`);
+  emit(`            && (altEnd[wndPtr] < 0 ? altEnd[wndPtr] + srcLenP1 : altEnd[wndPtr]) + wndDelta === end && altDp[wndPtr] === templateStack.length && altPd[wndPtr] === parenHeadStack.length) {`);
   emit(`          wndHit = wndPtr;`);
   emit(`        }`);
   emit(`      }`);
@@ -514,7 +516,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// head (always sound, degrades to a full re-lex).`);
   emit(`function findRestart(cs) {`);
   emit(`  let lo = 0, hi = tokN;`);
-  emit(`  while (lo < hi) { const mid = (lo + hi) >> 1; if (tkEnd[mid] <= cs) lo = mid + 1; else hi = mid; }`);
+  emit(`  while (lo < hi) { const mid = (lo + hi) >> 1; if (tend(mid) <= cs) lo = mid + 1; else hi = mid; }`);
   emit(`  for (let b = lo - 1; b >= 0; b--) {`);
   emit(`    // template depth must be zero (interp brace counters are not reconstructable),`);
   emit(`    // and the anchor token must leave no cross-token lexer flag live: not a`);
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 41d571c..f17a4ac 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -715,7 +715,7 @@ class Emitter {
   // The run-extension target of a repetition: when the body unwraps to a plain ref of
   // a rule that routes through parseRuleEntry (pratt / left-rec / spine), its rule id;
   // else -1 (the loop gets no extension hook — adoption stays element-by-element).
-  private quantRunRuleId(body: RuleExpr): number {
+  quantRunRuleId(body: RuleExpr): number {
     const a = this.a;
     let expr = body;
     while (true) {
@@ -1388,6 +1388,17 @@ let tkPd = new Uint16Array(4096);
 let tkCap = 4096;
 let tokN = 0;
 let src = '';
+// ── EOF-relative spans (incremental sessions) ──
+// A token's tkOff/tkEnd may be stored EOF-RELATIVE (value − (srcLen + 1), strictly
+// negative): the decode adds the CURRENT length back, so a pure suffix never needs
+// the O(suffix) add-loop a char delta would otherwise force — updating srcLenP1 IS
+// the shift. Values self-describe by sign, so mixed zones stay readable; negFrom
+// only bounds where negatives may exist (the flip-band maintenance range). Batch
+// parses are all-positive and the decode branch never fires.
+let srcLenP1 = 1;
+let negFrom = 0x7fffffff;
+function toff(i) { const v = tkOff[i]; return v < 0 ? v + srcLenP1 : v; }
+function tend(i) { const v = tkEnd[i]; return v < 0 ? v + srcLenP1 : v; }
 ${e.soa ? '' : 'let tkText = [];   // fallback-lexer text column (synthetic tokens are not source spans)'}
 function growTok() {
   tkCap *= 2;
@@ -1423,6 +1434,23 @@ let rowExt = new Int32Array(8192);
 // parse and must never be adopted into a normal entry (the memo carry never stored
 // those; adoption must not widen the contract).
 let rowOK = new Uint8Array(8192);
+// kid-containment bit (lazy): 0 unknown, 1 = every kid's probe watermark stays
+// at/below the next kid's start (so a prefix-keep check of the LAST kept kid
+// transitively bounds all earlier ones), 2 = violated somewhere. Computed on
+// first surgical use of a row, maintained across in-place splices.
+let rowKC = new Uint8Array(8192);
+// END-RELATIVE kid rels (incremental sessions): a ROW kid's kidTokRel/kidRel may be
+// stored relative to the parent's END (value − (parentLen + 1), strictly negative);
+// the decode adds the parent's CURRENT length back. A surgical splice then shifts
+// the whole suffix by updating the parent's lengths — no per-kid add-loop — and the
+// values stay correct as long as the parent row is unedited (only surgery changes a
+// row's lengths, and it maintains its own band). Leaf kids pack their rel inside the
+// kids value and always stay start-relative (the trailing-leaf walk shifts them
+// eagerly). rowNF = first kid index (absolute, like rowStart) that may hold an
+// end-relative value; batch parses never flip, so the decode branch never fires.
+let rowNF = new Int32Array(8192).fill(0x7fffffff);
+function ktr(p, k) { const v = kidTokRel[k]; return v < 0 ? v + rowTokLen[p] + 1 : v; }
+function kcr(p, k) { const v = kidRel[k]; return v < 0 ? v + rowLen[p] + 1 : v; }
 // transient BUILD coordinates (absolute), valid for rows completed in the current
 // parse and REFRESHED at memo-hit time for reused roots — parents read them at
 // finishNode to write the children's relative fields; never part of the green tree.
@@ -1453,6 +1481,8 @@ function growRows() {
   const c = new Int32Array(rowCap); c.set(rowCount); rowCount = c;
   const x = new Int32Array(rowCap); x.set(rowExt); rowExt = x;
   const ok = new Uint8Array(rowCap); ok.set(rowOK); rowOK = ok;
+  const kc = new Uint8Array(rowCap); kc.set(rowKC); rowKC = kc;
+  const nf = new Int32Array(rowCap).fill(0x7fffffff); nf.set(rowNF.subarray(0, nodeN)); rowNF = nf;
   const ac = new Int32Array(rowCap); ac.set(absChar); absChar = ac;
   const at = new Int32Array(rowCap); at.set(absTok); absTok = at;
 }
@@ -1466,8 +1496,8 @@ function scPush(e) {
   if (scn === scCap) { scCap *= 2; const s = new Int32Array(scCap); s.set(sc); sc = s; }
   sc[scn++] = e;
 }
-function entryOff(e) { return e >= 0 ? absChar[e] : tkOff[(~e) >>> 2]; }
-function entryEnd(e) { return e >= 0 ? absChar[e] + rowLen[e] : tkEnd[(~e) >>> 2]; }
+function entryOff(e) { return e >= 0 ? absChar[e] : toff((~e) >>> 2); }
+function entryEnd(e) { return e >= 0 ? absChar[e] + rowLen[e] : tend((~e) >>> 2); }
 function entryTok(e) { return e >= 0 ? absTok[e] : (~e) >>> 2; }
 function entryTokEnd(e) { return e >= 0 ? absTok[e] + rowTokLen[e] : ((~e) >>> 2) + 1; }
 // Complete a node whose children are scratch[mark..scn): copy them into kids, write
@@ -1509,6 +1539,8 @@ function finishNode(rid, mark) {
   rowTokLen[id] = myTokEnd - myTok;
   rowExt[id] = maxPos - myTok;
   rowOK[id] = 0;
+  rowKC[id] = 0;
+  rowNF[id] = 0x7fffffff;
   absChar[id] = myOff; absTok[id] = myTok;
   scn = mark;
   return id;
@@ -1543,6 +1575,8 @@ function finishWrap(rid, lhsId, mark) {
   rowTokLen[id] = myTokEnd - myTok;
   rowExt[id] = maxPos - myTok;
   rowOK[id] = 0;
+  rowKC[id] = 0;
+  rowNF[id] = 0x7fffffff;
   absChar[id] = myOff; absTok[id] = myTok;
   scn = mark;
   return id;
@@ -1569,8 +1603,8 @@ let suppressNext = null;
 let suppressCur = null;
 
 function offset() {
-  if (pos < cap) return tkOff[pos];
-  return tokN > 0 ? tkEnd[tokN - 1] : 0;
+  if (pos < cap) return toff(pos);
+  return tokN > 0 ? tend(tokN - 1) : 0;
 }
 
 // ── Lever 1: integer-kind matchers ──
@@ -1600,7 +1634,7 @@ function matchPuLit(pu) {
 }
 function matchPuLitGT(pu) {
   if (pos >= cap) return false;
-  const off = tkOff[pos];
+  const off = toff(pos);
   if (tkT[pos] === pu) {
     scPush(~(pos << 2));
     if (++pos > maxPos) maxPos = pos;
@@ -1609,8 +1643,8 @@ function matchPuLitGT(pu) {
   // Split multi-'>' tokens: '>>', '>>>', '>>=', '>>>=' can yield a single '>': shift the
   // columns up one slot and write the '>' + rest pair in place (both born flag-less,
   // matching the old mkPunct pair).
-  if (tkK[pos] === K_PUNCT && tkEnd[pos] - off > 1 && ${e.soa ? 'src.charCodeAt(off) === 62' : "tkText[pos].charCodeAt(0) === 62"}) {
-    const end0 = tkEnd[pos];
+  if (tkK[pos] === K_PUNCT && tend(pos) - off > 1 && ${e.soa ? 'src.charCodeAt(off) === 62' : "tkText[pos].charCodeAt(0) === 62"}) {
+    const end0 = tend(pos);
     ${e.soa ? '' : 'const restText = tkText[pos].slice(1);'}
     if (tokN === tkCap) growTok();
     parenCachePos = -1;
@@ -1622,8 +1656,17 @@ function matchPuLitGT(pu) {
     tkPd.copyWithin(pos + 1, pos, tokN);
     tkFl.copyWithin(pos + 1, pos, tokN);
     ${e.soa ? '' : "tkText.splice(pos, 1, '>', restText);"}
-    tkT[pos] = pu; tkEnd[pos] = off + 1; tkFl[pos] = 0;
-    tkOff[pos + 1] = off + 1; tkFl[pos + 1] = 0;
+    // Keep the EOF-relative zone invariant: a split at/past negFrom writes the new
+    // pair EOF-relative (a positive value there would not ride later srcLenP1
+    // shifts); below it, the boundary index moves up one slot with the suffix.
+    if (pos < negFrom) {
+      negFrom++;
+      tkT[pos] = pu; tkEnd[pos] = off + 1; tkFl[pos] = 0;
+      tkOff[pos + 1] = off + 1; tkFl[pos + 1] = 0;
+    } else {
+      tkT[pos] = pu; tkEnd[pos] = off + 1 - srcLenP1; tkFl[pos] = 0;
+      tkOff[pos + 1] = off + 1 - srcLenP1; tkFl[pos + 1] = 0;
+    }
     tkT[pos + 1] = ${e.soa ? 'LIT_PU.get(src.slice(off + 1, end0)) ?? 0' : 'LIT_PU.get(restText) ?? 0'};
     tokN++;
     if (parseLimit < 0) cap = tokN;
@@ -1715,6 +1758,37 @@ function emitRuleFns(e: Emitter, a: ReturnType<typeof analyze>) {
   e.emit(`const RULES = {`);
   for (const rule of a.grammar.rules) e.emit(`  ${J(rule.name)}: ${ruleFn(rule.name)},`);
   e.emit(`};`);
+
+  // Surgical-container table: rule id → its repetition element's rule id, for rules
+  // whose body is a PURE seq/group of literals/refs around exactly one '*'/'+' rep
+  // of a parseRuleEntry-routed rule. No alt/sep/opt/not anywhere in the body: a
+  // longest-match arm (or lookahead) at the container's OWN level may probe into
+  // the rep zone without any kid row owning the read, which would break the
+  // prefix-keep watermark argument node surgery relies on.
+  const surg: number[] = a.grammar.rules.map(() => -1);
+  a.grammar.rules.forEach((rule, ri) => {
+    if (a.prattRules.has(rule.name) || a.leftRecSet.has(rule.name)) return;
+    let reps = 0; let bad = false; let elem = -1;
+    const walk = (x: RuleExpr): void => {
+      if (bad) return;
+      switch (x.type) {
+        case 'seq': x.items.forEach(walk); return;
+        case 'group':
+          if (x.suppress && x.suppress.length) { bad = true; return; }
+          walk(x.body); return;
+        case 'literal': case 'ref': case 'op': case 'prefix': case 'postfix': return;
+        case 'quantifier':
+          if (x.kind === '?') { bad = true; return; }
+          reps++; elem = e.quantRunRuleId(x.body);
+          return;
+        default: bad = true; return;
+      }
+    };
+    walk(rule.body);
+    if (!bad && reps === 1 && elem >= 0) surg[ri] = elem;
+  });
+  e.emit(`const SURG_ELEM = new Int32Array([${surg.join(',')}]);`);
+  e.emit(`const RULE_FN_BY_ID = [${a.grammar.rules.map(r => ruleFn(r.name)).join(', ')}];`);
 }
 
 // Non-recursive rule: longest-match over alts (mirrors parseNonRec). A better arm is
@@ -1918,7 +1992,7 @@ function emitPrattRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl
   e.emit(`          const _h = kids[rowStart[lhs]];`);
   e.emit(`          if (_h < 0 && ((~_h) & 3) === 2) {`);
   e.emit(`            const _ht = absTok[lhs] + ((~_h) >>> 2);`);
-  e.emit(`            const _htext = ${e.soa ? 'src.slice(tkOff[_ht], tkEnd[_ht])' : 'tkText[_ht]'};`);
+  e.emit(`            const _htext = ${e.soa ? 'src.slice(toff(_ht), tend(_ht))' : 'tkText[_ht]'};`);
   e.emit(`            if (prefixOps.has(_htext) && !postfixOpValues.has(_htext)) { return -1; }`);
   e.emit(`          }`);
   e.emit(`        }`);
@@ -2074,7 +2148,7 @@ function parseRuleEntry(idx, rid, name, core) {
         // (its green internals are position-independent; only the attachment point —
         // what the enclosing finishNode reads — must be current).
         absTok[id] = start;
-        absChar[id] = tkOff[start];
+        absChar[id] = toff(start);
         scPush(id);
         return true;
       }
@@ -2092,7 +2166,7 @@ function parseRuleEntry(idx, rid, name, core) {
         const ext = start + rowExt[aid];
         if (ext > maxPos) maxPos = ext;
         absTok[aid] = start;
-        absChar[aid] = tkOff[start];
+        absChar[aid] = toff(start);
         if (adoptHitP >= 0) {
           adoptRunPos = pos; adoptRunRid = rid; adoptRunGen = memoGenCur;
           adoptRunP = adoptHitP; adoptRunKid = adoptHitKid + 1;
@@ -2153,7 +2227,7 @@ function parseRuleEntry(idx, rid, name, core) {
 
 // Token text at an arbitrary index (cold paths: errors, the tokenAt debug view).
 function tokTextAt(i) {
-  return ${e.soa ? 'src.slice(tkOff[i], tkEnd[i])' : 'tkText[i]'};
+  return ${e.soa ? 'src.slice(toff(i), tend(i))' : 'tkText[i]'};
 }
 // The k → type-name inverse, for reconstructing a token object (tokenAt).
 const K_NAMES = [];
@@ -2163,7 +2237,7 @@ export function tokenAt(i) {
   return {
     type: K_NAMES[tkK[i]] ?? '',
     text: tokTextAt(i),
-    offset: tkOff[i],
+    offset: toff(i),
     k: tkK[i],
     t: tkT[i],
     newlineBefore: (tkFl[i] & 1) !== 0,
@@ -2201,11 +2275,11 @@ export const tree = {
   lenOf: (id) => rowLen[id],
   tokLenOf: (id) => rowTokLen[id],
   // a node CHILD's relative coordinates live on the parent edge (kids-parallel)
-  childRelAt: (id, i) => kidRel[rowStart[id] + i],
-  childTokRelAt: (id, i) => kidTokRel[rowStart[id] + i],
+  childRelAt: (id, i) => kcr(id, rowStart[id] + i),
+  childTokRelAt: (id, i) => ktr(id, rowStart[id] + i),
   // base-threaded spans: nodes from their bases, leaves from the token columns
-  offsetOf: (entry, charBase, tokBase) => entry >= 0 ? charBase : tkOff[tokBase + ((~entry) >>> 2)],
-  endOf: (entry, charBase, tokBase) => entry >= 0 ? charBase + rowLen[entry] : tkEnd[tokBase + ((~entry) >>> 2)],
+  offsetOf: (entry, charBase, tokBase) => entry >= 0 ? charBase : toff(tokBase + ((~entry) >>> 2)),
+  endOf: (entry, charBase, tokBase) => entry >= 0 ? charBase + rowLen[entry] : tend(tokBase + ((~entry) >>> 2)),
   childCount: (id) => rowCount[id],
   childAt: (id, i) => kids[rowStart[id] + i],
   // Bulk child load into a caller-owned array; returns the count. One call per node
@@ -2223,11 +2297,11 @@ export const tree = {
   // 1 '$keyword', 2 '$operator' — and the token's TYPE kind int (1 = punctuation).
   leafKindOf: (entry) => (~entry) & 3,
   leafTokKindOf: (entry, tokBase) => tkK[tokBase + ((~entry) >>> 2)],
-  leafOffsetOf: (entry, tokBase) => tkOff[tokBase + ((~entry) >>> 2)],
-  leafEndOf: (entry, tokBase) => tkEnd[tokBase + ((~entry) >>> 2)],
+  leafOffsetOf: (entry, tokBase) => toff(tokBase + ((~entry) >>> 2)),
+  leafEndOf: (entry, tokBase) => tend(tokBase + ((~entry) >>> 2)),
   textOf: (entry, source, charBase, tokBase) => entry >= 0
     ? source.slice(charBase, charBase + rowLen[entry])
-    : source.slice(tkOff[tokBase + ((~entry) >>> 2)], tkEnd[tokBase + ((~entry) >>> 2)]),
+    : source.slice(toff(tokBase + ((~entry) >>> 2)), tend(tokBase + ((~entry) >>> 2))),
 };
 // Depth-first traversal from a node id or leaf entry:
 //   enter(id)         — each NODE before its children; return false to skip its subtree
@@ -2245,7 +2319,7 @@ export function visit(entry, fns, charBase, tokBase) {
   for (let i = 0; i < n; i++) {
     const e = kids[cs + i];
     if (e < 0) { if (fns.leaf) fns.leaf(e, tokBase + ((~e) >>> 2)); }
-    else visit(e, fns, charBase + kidRel[cs + i], tokBase + kidTokRel[cs + i]);
+    else visit(e, fns, charBase + kcr(entry, cs + i), tokBase + ktr(entry, cs + i));
   }
   if (fns.leave) fns.leave(entry, charBase, tokBase);
 }
@@ -2258,8 +2332,8 @@ export function toObject(id, charBase, tokBase) {
   const children = new Array(n);
   for (let i = 0; i < n; i++) {
     const entry = kids[cs + i];
-    children[i] = entry >= 0 ? toObject(entry, charBase + kidRel[cs + i], tokBase + kidTokRel[cs + i])
-      : { tokenType: leafTokenType(entry, tokBase), offset: tkOff[tokBase + ((~entry) >>> 2)], end: tkEnd[tokBase + ((~entry) >>> 2)] };
+    children[i] = entry >= 0 ? toObject(entry, charBase + kcr(id, cs + i), tokBase + ktr(id, cs + i))
+      : { tokenType: leafTokenType(entry, tokBase), offset: toff(tokBase + ((~entry) >>> 2)), end: tend(tokBase + ((~entry) >>> 2)) };
   }
   return { rule: RULE_NAMES[rowRule[id]], children, offset: charBase, end: charBase + rowLen[id] };
 }
@@ -2283,7 +2357,7 @@ ${e.soa ? `  tokenize(source);` : String.raw`  src = source;
 
 function farthest(errPos) {
   if (maxPos <= errPos || maxPos >= tokN) return '';
-  return ' [farthest: offset ' + tkOff[maxPos] + " near '" + tokTextAt(maxPos).slice(0, 20) + "']";
+  return ' [farthest: offset ' + toff(maxPos) + " near '" + tokTextAt(maxPos).slice(0, 20) + "']";
 }
 
 // Run the entry rule over the CURRENT token stream (shared by parse / parseEdited —
@@ -2306,10 +2380,10 @@ function runParse(entryRule) {
   }
   if (!RULES[entry]()) {
     const hasTok = pos < cap;
-    throw new Error('Parse error at offset ' + (hasTok ? tkOff[pos] : 0) + ': unexpected ' + (hasTok ? "'" + tokTextAt(pos) + "'" : 'end of input') + farthest(pos));
+    throw new Error('Parse error at offset ' + (hasTok ? toff(pos) : 0) + ': unexpected ' + (hasTok ? "'" + tokTextAt(pos) + "'" : 'end of input') + farthest(pos));
   }
   if (pos < tokN) {
-    throw new Error('Parse error at offset ' + tkOff[pos] + ": unexpected '" + tokTextAt(pos) + "' after successful parse" + farthest(pos));
+    throw new Error('Parse error at offset ' + toff(pos) + ": unexpected '" + tokTextAt(pos) + "' after successful parse" + farthest(pos));
   }
   const rootId = sc[--scn];
   rootCharBase = absChar[rootId]; rootTokBase = absTok[rootId];
@@ -2377,13 +2451,13 @@ function adoptSeek(q, rid) {
     while (lo < hi) {
       const mid = (lo + hi) >> 1;
       const e = kids[cs + mid];
-      const end = e < 0 ? base + ((~e) >>> 2) + 1 : base + kidTokRel[cs + mid] + rowTokLen[e];
+      const end = e < 0 ? base + ((~e) >>> 2) + 1 : base + ktr(id, cs + mid) + rowTokLen[e];
       if (end <= q) lo = mid + 1; else hi = mid;
     }
     if (lo >= n) return -1;
     const e = kids[cs + lo];
     if (e < 0) return -1;                                  // the position is a leaf here
-    const cb = base + kidTokRel[cs + lo];
+    const cb = base + ktr(id, cs + lo);
     if (cb > q) return -1;                                 // a gap — nothing starts at q
     if (cb === q) {
       // the exploratory chain: every node from here down whose start is exactly q
@@ -2397,7 +2471,7 @@ function adoptSeek(q, rid) {
         const xcs = rowStart[xid];
         if (rowCount[xid] === 0) return -1;
         const fe = kids[xcs];
-        if (fe < 0 || kidTokRel[xcs] !== 0) return -1;
+        if (fe < 0 || ktr(xid, xcs) !== 0) return -1;
         adoptHitP = -1;
         xid = fe; xb = xb;
       }
@@ -2429,13 +2503,13 @@ function runExtend(rid) {
   while (i < csEnd) {
     const e = kids[i];
     if (e < 0) break;
-    if (pb + kidTokRel[i] !== oq) break;
+    if (pb + ktr(P, i) !== oq) break;
     if (rowRule[e] !== rid || rowOK[e] === 0) break;
     const tl = rowTokLen[e];
     if (tl === 0) break;
     const ex = rowExt[e];
     if (!sfx && oq + ex + 2 > adoptDmgStart) break;
-    absTok[e] = nq; absChar[e] = tkOff[nq];
+    absTok[e] = nq; absChar[e] = toff(nq);
     scPush(e);
     const w = nq + ex;
     if (w > mp) mp = w;
@@ -2446,6 +2520,277 @@ function runExtend(rid) {
   pos = nq;
 }
 
+// ── Node SURGERY: patch the damage path in place ──
+// Even with run-adoption, a keystroke inside one statement of a large list rebuilds
+// every node on the damage path — the list parent re-collects ALL its kids through
+// scratch (and the arena grows by that much per edit). Surgery keeps those rows:
+// descend the old tree to the deepest PURE container (SURG_ELEM), re-parse only the
+// affected elements with the real rule fn (adoption reuses their undamaged
+// subtrees), and when the fresh elements REJOIN an old kid start exactly, splice the
+// container's kid range and shift the suffix rels by the edit deltas. Every check
+// happens BEFORE any row is mutated; any failure falls back to the full adoption
+// re-parse. Prefix kids are kept under the same watermark rule single adoption
+// uses, made transitive by rowKC: each kid's probe watermark stays at/below the
+// next kid's start, so checking the LAST kept kid bounds them all.
+let surgX = [], surgBase = [], surgA = [], surgB = [];
+function rowKCof(id) {
+  const c = rowKC[id];
+  if (c !== 0) return c;
+  const cs = rowStart[id], n = rowCount[id];
+  let ok = 1, prevW = -1;
+  for (let k = 0; k < n; k++) {
+    const e = kids[cs + k];
+    const st = e < 0 ? (~e) >>> 2 : ktr(id, cs + k);
+    if (prevW > st) { ok = 2; break; }
+    prevW = e < 0 ? st + 1 : st + rowExt[e];
+  }
+  rowKC[id] = ok;
+  return ok;
+}
+function trySurgery(dmgA, dmgB, tokD, chrD) {
+  if (adoptRoot < 0) return -1;
+  // the whole-file token math must close, or the shape changed beyond a splice
+  if (adoptRootTok + rowTokLen[adoptRoot] + tokD !== tokN) return -1;
+  // 1. descend along single-affected-row kids, recording the path
+  surgX.length = 0; surgBase.length = 0; surgA.length = 0; surgB.length = 0;
+  let X = adoptRoot, base = adoptRootTok;
+  for (;;) {
+    const cs = rowStart[X], n = rowCount[X];
+    let lo = 0, hi = n;
+    while (lo < hi) {
+      const m = (lo + hi) >> 1;
+      const e = kids[cs + m];
+      const st = base + (e < 0 ? (~e) >>> 2 : ktr(X, cs + m));
+      if (st < dmgB) lo = m + 1; else hi = m;
+    }
+    const b = lo;
+    let a = b;
+    while (a > 0) {
+      const e = kids[cs + a - 1];
+      const st = base + (e < 0 ? (~e) >>> 2 : ktr(X, cs + a - 1));
+      if (e < 0 ? st < dmgA : st + rowExt[e] + 2 <= dmgA) break;
+      a--;
+    }
+    surgX.push(X); surgBase.push(base); surgA.push(a); surgB.push(b);
+    if (b - a !== 1) break;
+    const e = kids[cs + a];
+    if (e < 0 || rowCount[e] === 0) break;
+    base = base + ktr(X, cs + a);
+    X = e;
+  }
+  // 2. choose D: the deepest surgical level whose affected kids are all rep rows
+  let L = -1;
+  for (let i = surgX.length - 1; i >= 0; i--) {
+    const Xi = surgX[i];
+    const elem = SURG_ELEM[rowRule[Xi]];
+    if (elem < 0) continue;
+    const cs = rowStart[Xi];
+    const ai = surgA[i], bi = surgB[i];
+    let okR = true;
+    for (let k = ai; k < bi; k++) {
+      const e = kids[cs + k];
+      if (e < 0 || rowRule[e] !== elem) { okR = false; break; }
+    }
+    if (!okR) continue;
+    if (bi === ai) {
+      // pure insertion at a kid boundary: it must sit INSIDE the rep zone — at
+      // least one neighbour is an element row. Otherwise the insertion belongs to
+      // an enclosing list (e.g. right after this container's closing brace, where
+      // an element-loop alignment would stitch the new element into a CLOSED node).
+      const pe = ai > 0 ? kids[cs + ai - 1] : -1;
+      const ne = ai < rowCount[Xi] ? kids[cs + ai] : -1;
+      const prevOk = pe >= 0 && rowRule[pe] === elem;
+      const nextOk = ne >= 0 && rowRule[ne] === elem;
+      if (!prevOk && !nextOk) continue;
+    }
+    if (ai > 0 && rowKCof(Xi) !== 1) continue;
+    L = i;
+    break;
+  }
+  if (L < 0) return -1;
+  const D = surgX[L], Dbase = surgBase[L], Da = surgA[L];
+  const Db = surgB[L];
+  const elem = SURG_ELEM[rowRule[D]];
+  const csD = rowStart[D], nD = rowCount[D];
+  const DendNew = Dbase + rowTokLen[D] + tokD;
+  // 3. re-parse the affected span with the real rule (adoption live); the first
+  //    affected kid starts at/before the damage, so old == new coordinates there
+  pos = Da < Db
+    ? Dbase + (kids[csD + Da] < 0 ? (~kids[csD + Da]) >>> 2 : ktr(D, csD + Da))
+    : dmgA;
+  maxPos = pos; scn = 0; parseLimit = -1; cap = tokN;
+  currentPrattContext = null; suppressNext = null; suppressCur = null;
+  const genAt = memoGenCur;
+  const fn = RULE_FN_BY_ID[elem];
+  let j = Db, guard = 0;
+  for (;;) {
+    let target;
+    if (j < nD) {
+      const e = kids[csD + j];
+      target = Dbase + (e < 0 ? (~e) >>> 2 : ktr(D, csD + j)) + tokD;
+    } else target = DendNew;
+    if (pos === target) break;
+    if (pos > target) {
+      // the fresh parse consumed past old kid j: only a rep row may be subsumed
+      if (j >= nD) return -1;
+      const e = kids[csD + j];
+      if (e < 0 || rowRule[e] !== elem) return -1;
+      j++;
+      continue;
+    }
+    if (++guard > 65536) return -1;
+    const pp = pos;
+    if (!fn()) return -1;
+    if (memoGenCur !== genAt || pos === pp) return -1;
+  }
+  // 4. POINT OF NO RETURN — splice D's kid range, shift suffix rels, patch the path
+  const f = scn;
+  const removed = j - Da;
+  const DcharBase = toff(Dbase);
+  let csD2 = csD;
+  if (f === removed) {
+    for (let k = 0; k < f; k++) {
+      const id = sc[k];
+      kids[csD + Da + k] = id;
+      kidTokRel[csD + Da + k] = absTok[id] - Dbase;
+      kidRel[csD + Da + k] = absChar[id] - DcharBase;
+    }
+  } else {
+    const n2k = nD - removed + f;
+    if (kidN + n2k > kidCap) growKids(n2k);
+    const ks = kidN;
+    for (let k = 0; k < Da; k++) {
+      kids[ks + k] = kids[csD + k];
+      kidRel[ks + k] = kidRel[csD + k];
+      kidTokRel[ks + k] = kidTokRel[csD + k];
+    }
+    for (let k = 0; k < f; k++) {
+      const id = sc[k];
+      kids[ks + Da + k] = id;
+      kidTokRel[ks + Da + k] = absTok[id] - Dbase;
+      kidRel[ks + Da + k] = absChar[id] - DcharBase;
+    }
+    for (let k = j; k < nD; k++) {
+      kids[ks + Da + f + (k - j)] = kids[csD + k];
+      kidRel[ks + Da + f + (k - j)] = kidRel[csD + k];
+      kidTokRel[ks + Da + f + (k - j)] = kidTokRel[csD + k];
+    }
+    kidN = ks + n2k;
+    rowStart[D] = ks;
+    rowCount[D] = n2k;
+    // remap the end-relative boundary into the relocated range (suffix kids kept
+    // their sign-encoded values; indices shifted by the move + the count change).
+    // Three cases keep it Int32-safe: no negatives among the copied kids (the
+    // sentinel maps to itself, NOT through the index arithmetic), all possibly
+    // negative, or a boundary inside the copied range.
+    const nfOld = rowNF[D];
+    rowNF[D] = nfOld >= csD + nD ? 0x7fffffff
+      : nfOld <= csD + j ? ks + Da + f
+      : (nfOld - csD - j) + ks + Da + f;
+    csD2 = ks;
+  }
+  const n2 = rowCount[D];
+  // End-relative band maintenance (old lengths — the bias cancels against the new
+  // ones exactly like the token-level flip): rows entering the suffix flip to
+  // end-relative; rows leaving it flip back to absolute rels. Rows already beyond
+  // the old boundary auto-shift via the length update below. Leaf kids cannot be
+  // sign-encoded (packed): inside the flip-up band they are re-packed eagerly, and
+  // the trailing run (a pure container's only leaves past the rep) gets the same
+  // eager shift by the backward walk.
+  const bnd = csD2 + Da + f;
+  const nf = rowNF[D];
+  const kidsEnd = csD2 + n2;
+  if (nf < bnd) {
+    for (let k = nf; k < bnd; k++) {
+      const v = kidTokRel[k];
+      if (v < 0) { kidTokRel[k] = v + rowTokLen[D] + 1; kidRel[k] += rowLen[D] + 1; }
+    }
+  } else if (nf > bnd) {
+    const hi = nf < kidsEnd ? nf : kidsEnd;
+    for (let k = bnd; k < hi; k++) {
+      const e = kids[k];
+      if (e < 0) { if (tokD !== 0) kids[k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3)); }
+      else {
+        const v = kidTokRel[k];
+        if (v >= 0) { kidTokRel[k] = v - rowTokLen[D] - 1; kidRel[k] -= rowLen[D] + 1; }
+      }
+    }
+  }
+  if (tokD !== 0) {
+    const tlFrom = nf > bnd ? (nf < kidsEnd ? nf : kidsEnd) : bnd;
+    for (let k = kidsEnd - 1; k >= tlFrom; k--) {
+      const e = kids[k];
+      if (e >= 0) break;
+      kids[k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3));
+    }
+  }
+  rowNF[D] = bnd;
+  rowTokLen[D] += tokD;
+  // Derive the char length from the token columns rather than adding chrD: a pure-
+  // trivia edit can sit at a node's token BOUNDARY (between its last token and the
+  // next sibling's first), token-inside but char-outside — the gap belongs to no
+  // node. tend/toff give the exact new span; when suffix tokens exist inside the
+  // node the delta equals chrD (so the suffix-kid rel adds and the end-relative
+  // bias-cancel stay consistent), and when they don't there are no suffix kids.
+  if (rowTokLen[D] > 0) rowLen[D] = tend(Dbase + rowTokLen[D] - 1) - toff(Dbase);
+  {
+    let x = rowExt[D] + (tokD > 0 ? tokD : 0);
+    const fw = maxPos - Dbase;
+    if (fw > x) x = fw;
+    rowExt[D] = x;
+  }
+  // containment bit: only the pairs around the splice changed
+  if (rowKC[D] === 1) {
+    let okB = 1;
+    const from = Da > 0 ? Da - 1 : 0;
+    for (let k = from; k < Da + f && k + 1 < n2; k++) {
+      const e = kids[csD2 + k];
+      const w = e < 0 ? ((~e) >>> 2) + 1 : ktr(D, csD2 + k) + rowExt[e];
+      const e2 = kids[csD2 + k + 1];
+      const st2 = e2 < 0 ? (~e2) >>> 2 : ktr(D, csD2 + k + 1);
+      if (w > st2) { okB = 2; break; }
+    }
+    rowKC[D] = okB;
+  }
+  // 5. ancestors bottom-up: lengths, suffix rels, ext, containment boundary pair
+  for (let i = L - 1; i >= 0; i--) {
+    const Ai = surgX[i];
+    const csA = rowStart[Ai], nA = rowCount[Ai];
+    const ki = surgA[i];
+    // kids at/before the path kid are NOT suffix for this edit (the damage sits
+    // inside the path kid): any end-relative rel there must flip back to absolute
+    // with the OLD lengths, or the length update below would shift it
+    const nfA = rowNF[Ai];
+    if (nfA <= csA + ki) {
+      for (let k = nfA; k <= csA + ki; k++) {
+        const v = kidTokRel[k];
+        if (v < 0) { kidTokRel[k] = v + rowTokLen[Ai] + 1; kidRel[k] += rowLen[Ai] + 1; }
+      }
+      rowNF[Ai] = csA + ki + 1;
+    }
+    for (let k = ki + 1; k < nA; k++) {
+      const e = kids[csA + k];
+      if (e < 0) kids[csA + k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3));
+      else if (kidTokRel[csA + k] >= 0) { kidTokRel[csA + k] += tokD; kidRel[csA + k] += chrD; }
+      // (end-relative kids past the boundary auto-shift via the length update below)
+    }
+    rowTokLen[Ai] += tokD;
+    if (rowTokLen[Ai] > 0) rowLen[Ai] = tend(surgBase[i] + rowTokLen[Ai] - 1) - toff(surgBase[i]);
+    {
+      let x = rowExt[Ai] + (tokD > 0 ? tokD : 0);
+      const cw = ktr(Ai, csA + ki) + rowExt[surgX[i + 1]];
+      if (cw > x) x = cw;
+      rowExt[Ai] = x;
+    }
+    if (rowKC[Ai] === 1 && ki + 1 < nA) {
+      const e2 = kids[csA + ki + 1];
+      const st2 = e2 < 0 ? (~e2) >>> 2 : ktr(Ai, csA + ki + 1);
+      if (ktr(Ai, csA + ki) + rowExt[surgX[i + 1]] > st2) rowKC[Ai] = 2;
+    }
+  }
+  return adoptRoot;
+}
+
 // The spare token-column buffer set (parseEdited ping-pongs between the live set and
 // this one, so steady-state edits never allocate columns).
 let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
@@ -2537,7 +2882,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // first old token at/after the damage end — the resync search floor
   let r0 = oN;
   { let lo = 0, hi = oN;
-    while (lo < hi) { const mid = (lo + hi) >> 1; if (tkOff[mid] < ceOld) lo = mid + 1; else hi = mid; }
+    while (lo < hi) { const mid = (lo + hi) >> 1; if (toff(mid) < ceOld) lo = mid + 1; else hi = mid; }
     r0 = lo; }
   // Lex the window into the spare buffers (the old stream stays live for resync).
   if (altK === null || altCap < tkCap) {
@@ -2550,12 +2895,31 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   swapBuffers();              // live = scratch, alt = OLD stream
   src = source;
   tokN = 0;
-  const startOff = B >= 0 ? altEnd[B] : 0;
+  const startOff = B >= 0 ? (altEnd[B] < 0 ? altEnd[B] + srcLenP1 : altEnd[B]) : 0;
   const R0 = lexCore(source, startOff, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs, initParens);
   const W = tokN;
   const R = R0 >= 0 ? R0 : oN;
   swapBuffers();              // live = OLD stream again; window sits in the alt buffers
   tokN = oN;
+  // EOF-relative maintenance: move the negative-zone boundary to THIS edit's suffix
+  // start R. Tokens dropping out of the suffix ([negFrom, R)) flip back to absolute
+  // (they sit at/before the damage now — EOF-unstable); tokens entering it
+  // ([R, negFrom)) flip to EOF-relative, encoded against the OLD length (their new
+  // absolute is oldValue + charDelta, and newLen = oldLen + charDelta, so the bias
+  // cancels). Both bands are cursor-locality sized; the suffix itself is never
+  // walked again — updating srcLenP1 after the splice IS the char-delta shift the
+  // old O(suffix) add-loop used to apply.
+  if (negFrom < R) {
+    for (let i = negFrom, e2 = R < oN ? R : oN; i < e2; i++) {
+      const o = tkOff[i]; if (o < 0) tkOff[i] = o + srcLenP1;
+      const en = tkEnd[i]; if (en < 0) tkEnd[i] = en + srcLenP1;
+    }
+  } else if (negFrom > R) {
+    for (let i = R, e2 = negFrom < oN ? negFrom : oN; i < e2; i++) {
+      const o = tkOff[i]; if (o >= 0) tkOff[i] = o - srcLenP1;
+      const en = tkEnd[i]; if (en >= 0) tkEnd[i] = en - srcLenP1;
+    }
+  }
   // TRUE token prefix p: the window re-derives [B+1 .. p) byte-identically; only past
   // p is real damage (compared BEFORE the splice clobbers the old slots).
   let p = B + 1;
@@ -2568,17 +2932,18 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   // ── splice: old[0..B] + window[0..W) + old[R..oN), then shift the suffix spans ──
   const nN = B + 1 + W + (oN - R);
   while (tkCap < nN + 1) growTok();
-  tkK.copyWithin(B + 1 + W, R, oN); tkT.copyWithin(B + 1 + W, R, oN);
-  tkOff.copyWithin(B + 1 + W, R, oN); tkEnd.copyWithin(B + 1 + W, R, oN);
-  tkFl.copyWithin(B + 1 + W, R, oN); tkDp.copyWithin(B + 1 + W, R, oN); tkPd.copyWithin(B + 1 + W, R, oN);
+  if (R !== B + 1 + W) {
+    tkK.copyWithin(B + 1 + W, R, oN); tkT.copyWithin(B + 1 + W, R, oN);
+    tkOff.copyWithin(B + 1 + W, R, oN); tkEnd.copyWithin(B + 1 + W, R, oN);
+    tkFl.copyWithin(B + 1 + W, R, oN); tkDp.copyWithin(B + 1 + W, R, oN); tkPd.copyWithin(B + 1 + W, R, oN);
+  }
   if (W > 0) {
     tkK.set(altK.subarray(0, W), B + 1); tkT.set(altT.subarray(0, W), B + 1);
     tkOff.set(altOff.subarray(0, W), B + 1); tkEnd.set(altEnd.subarray(0, W), B + 1);
     tkFl.set(altFl.subarray(0, W), B + 1); tkDp.set(altDp.subarray(0, W), B + 1); tkPd.set(altPd.subarray(0, W), B + 1);
   }
-  if (charDelta !== 0) {
-    for (let i = B + 1 + W; i < nN; i++) { tkOff[i] += charDelta; tkEnd[i] += charDelta; }
-  }
+  negFrom = B + 1 + W;
+  srcLenP1 = newLen + 1;
   tokN = nN;
   const nN2 = nN;` : String.raw`  // (fallback-lexer grammars keep the full-relex + token-diff path)
   const oK = tkK, oT = tkT, oOff = tkOff, oEnd = tkEnd, oFl = tkFl, oN = tokN;
@@ -2628,6 +2993,16 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   adoptPath.length = 0;
   adoptBase.length = 0;
   adoptRunPos = -1;
+  const sroot = trySurgery(p, dOldEnd, tokenDelta, charDelta);
+  if (sroot >= 0) {
+    adoptRoot = -1;
+    rootCharBase = toff(adoptRootTok);
+    rootTokBase = adoptRootTok;
+    lastRoot = sroot;
+    lastRootTok = adoptRootTok;
+    lastSrc = source;
+    return sroot;
+  }
   const root = runParse(entryRule);
   adoptRoot = -1;
   lastRoot = root;
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index 2ed248d..f6f8838 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -12,9 +12,10 @@ import { emitParser } from '../src/emit-parser.ts';
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-incremental.mjs';
 writeFileSync(emPath, emitParser(grammar));
+type Edit = { start: number; oldEnd: number; newEnd: number };
 type Em = {
   parse(s: string): number;
-  parseEdited(s: string): number;
+  parseEdited(s: string, entryRule?: string, edits?: Edit[]): number;
   toObject(id: number): unknown;
 };
 const session = (await import(emPath + '?session=' + process.pid)) as Em;
@@ -28,28 +29,36 @@ const randInt = (n: number) => Math.floor(rand() * n);
 const INSERTS = ['x', '_v', '42', ' + y', '.m', '()', ' /*c*/ ', '"s"', 'await ', '!', '?'];
 const STMTS = ['const q9 = 1;\n', 'function g9(a) { return a; }\n', 'if (x9) { y9(); }\n', '// note\n', 'type T9 = string | number;\n'];
 
-function mutate(text: string): string {
+// Mutations return the edit RANGE too, so half the steps can exercise the edits
+// PROTOCOL path (the editor-facing API) while the other half exercises the
+// char-diff fallback envelope.
+function mutate(text: string): { next: string; edit: Edit } {
   switch (randInt(5)) {
     case 0: { // insert a small fragment at a random position
       const at = randInt(text.length);
-      return text.slice(0, at) + INSERTS[randInt(INSERTS.length)] + text.slice(at);
+      const ins = INSERTS[randInt(INSERTS.length)];
+      return { next: text.slice(0, at) + ins + text.slice(at), edit: { start: at, oldEnd: at, newEnd: at + ins.length } };
     }
     case 1: { // delete a small span
       const at = randInt(Math.max(1, text.length - 8));
-      return text.slice(0, at) + text.slice(at + 1 + randInt(6));
+      const n = 1 + randInt(6);
+      return { next: text.slice(0, at) + text.slice(at + n), edit: { start: at, oldEnd: at + n, newEnd: at } };
     }
     case 2: { // replace a character
       const at = randInt(Math.max(1, text.length - 1));
-      return text.slice(0, at) + 'z' + text.slice(at + 1);
+      return { next: text.slice(0, at) + 'z' + text.slice(at + 1), edit: { start: at, oldEnd: at + 1, newEnd: at + 1 } };
     }
     case 3: { // insert a whole statement at a line boundary
       const lines = text.split('\n');
       const at = randInt(lines.length);
-      lines.splice(at, 0, STMTS[randInt(STMTS.length)].trimEnd());
-      return lines.join('\n');
+      const stmt = STMTS[randInt(STMTS.length)].trimEnd();
+      lines.splice(at, 0, stmt);
+      const start = at === 0 ? 0 : lines.slice(0, at).join('\n').length + 1;
+      return { next: lines.join('\n'), edit: { start, oldEnd: start, newEnd: start + stmt.length + 1 } };
     }
     default: { // append at the end (the pure-prefix reuse case)
-      return text + '\n' + STMTS[randInt(STMTS.length)];
+      const stmt = '\n' + STMTS[randInt(STMTS.length)];
+      return { next: text + stmt, edit: { start: text.length, oldEnd: text.length, newEnd: text.length + stmt.length } };
     }
   }
 }
@@ -70,15 +79,16 @@ for (const f of FILES) {
   let text = readFileSync(f, 'utf-8');
   session.parse(text);   // open the session
   for (let k = 0; k < STEPS; k++) {
-    const next = mutate(text);
+    const { next, edit } = mutate(text);
     steps++;
     let freshRoot = -1, freshErr: string | null = null;
     const tf0 = performance.now();
     try { freshRoot = fresh.parse(next); } catch (e) { freshErr = (e as Error).message; }
     const tf1 = performance.now();
     let incRoot = -1, incErr: string | null = null;
+    const useProtocol = k % 2 === 1;   // alternate: edits protocol / char-diff fallback
     const ti0 = performance.now();
-    try { incRoot = session.parseEdited(next); } catch (e) { incErr = (e as Error).message; }
+    try { incRoot = session.parseEdited(next, undefined, useProtocol ? [edit] : undefined); } catch (e) { incErr = (e as Error).message; }
     const ti1 = performance.now();
     if (freshErr !== null || incErr !== null) {
       if ((freshErr === null) !== (incErr === null)) {

From 390b715c7eb35b62df7f0e55af6c6e2bf79dd1f5 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 06:32:21 +0800
Subject: [PATCH 10/15] Pure-container ancestors get the end-relative kid band
 too

A deep edit under a giant flat list paid an O(suffix-kids) eager rel
walk per keystroke on every ANCESTOR with a large suffix - the band so
far existed only on the surgical container itself. Measured on the 9MB
flat body as ancestor: 0.60ms median / 1.85ms p90 per keystroke.

Ancestors whose rule is a pure container (SURG_ELEM: interior = element
rows only, leaves only as a trailing run) now maintain the same
end-relative band as D: rows entering the suffix flip once (old-length
bias cancels), rows beyond the boundary ride the parent length update,
trailing leaves get the O(1) backward re-pack. Mixed-content ancestors
(interleaved leaves cannot sign-encode inside the packed kid entry)
keep the eager walk - those are the grammar's non-list shapes with
small kid counts.

Nested edit on the 9MB flat body: 0.60ms -> 0.031ms median / 0.047ms
p90. List-level keystroke unchanged (0.04ms median), 8MB nested real
shape 0.13ms, batch aggregate in band (11.2x), 30/30 gates,
incremental-verify 0/120, emit-parser-verify 0 mismatches.
---
 src/emit-parser.ts | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index f17a4ac..4656d9d 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2768,11 +2768,43 @@ function trySurgery(dmgA, dmgB, tokD, chrD) {
       }
       rowNF[Ai] = csA + ki + 1;
     }
-    for (let k = ki + 1; k < nA; k++) {
-      const e = kids[csA + k];
-      if (e < 0) kids[csA + k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3));
-      else if (kidTokRel[csA + k] >= 0) { kidTokRel[csA + k] += tokD; kidRel[csA + k] += chrD; }
-      // (end-relative kids past the boundary auto-shift via the length update below)
+    // Suffix kids: a PURE-container ancestor (interior = element rows only, leaves
+    // only as a trailing run) gets the same end-relative band as D — without it, a
+    // deep edit under a giant flat list pays an O(suffix) eager walk per keystroke
+    // (measured: 0.6ms median on the 9MB body as ancestor). Mixed-content ancestors
+    // (interleaved leaves can't sign-encode inside the packed entry) keep the eager
+    // walk; their kid counts are the grammar's non-list shapes.
+    if (SURG_ELEM[rowRule[Ai]] >= 0) {
+      const bndA = csA + ki + 1;
+      const nfA2 = rowNF[Ai];
+      const kidsEndA = csA + nA;
+      if (nfA2 > bndA) {
+        const hi = nfA2 < kidsEndA ? nfA2 : kidsEndA;
+        for (let k = bndA; k < hi; k++) {
+          const e = kids[k];
+          if (e < 0) { if (tokD !== 0) kids[k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3)); }
+          else {
+            const v = kidTokRel[k];
+            if (v >= 0) { kidTokRel[k] = v - rowTokLen[Ai] - 1; kidRel[k] -= rowLen[Ai] + 1; }
+          }
+        }
+      }
+      if (tokD !== 0) {
+        const tlFrom = nfA2 > bndA ? (nfA2 < kidsEndA ? nfA2 : kidsEndA) : bndA;
+        for (let k = kidsEndA - 1; k >= tlFrom; k--) {
+          const e = kids[k];
+          if (e >= 0) break;
+          kids[k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3));
+        }
+      }
+      rowNF[Ai] = bndA;
+    } else {
+      for (let k = ki + 1; k < nA; k++) {
+        const e = kids[csA + k];
+        if (e < 0) kids[csA + k] = ~(((((~e) >>> 2) + tokD) << 2) | ((~e) & 3));
+        else if (kidTokRel[csA + k] >= 0) { kidTokRel[csA + k] += tokD; kidRel[csA + k] += chrD; }
+        // (end-relative kids past the boundary auto-shift via the length update below)
+      }
     }
     rowTokLen[Ai] += tokD;
     if (rowTokLen[Ai] > 0) rowLen[Ai] = tend(surgBase[i] + rowTokLen[Ai] - 1) - toff(surgBase[i]);

From 6ce7717ed14732daae6dd35da570f6636037e41c Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 06:59:23 +0800
Subject: [PATCH 11/15] Handle API: explicit tree handles over per-document
 state; strict-< restart anchor

API rework (the session model made the edit base IMPLICIT - parseEdited
acted on whatever was parsed last, and two interleaved documents shared
one module state):

  const p = createParser();
  const cst = p.parse(text);
  const cst2 = p.edit(cst, next[, edits]);

- Each parser instance owns a DOCUMENT: the 51 per-document fields
  (token columns, arena, kids, memo, session, paren cache, spare
  buffers) live in a doc object; the module-level variables stay the
  ACTIVE REGISTER SET and activate() lazily swaps on instance switch -
  the hot paths never indirect through an object (batch unchanged,
  11.0-11.3x band; handle-API keystroke median 0.06ms).
- Handles are generation-stamped: trees are edited IN PLACE (node
  surgery), so an edit invalidates earlier handles of that parser -
  using one throws instead of silently reading a mutated tree. A
  REJECTED edit leaves the previous handle valid and the next edit
  falls back to a full re-parse internally.
- Module-level parse/parseEdited/visit/toObject keep working on a
  default document (gates/back-compat); the interpreter's createParser
  gains edit() (full re-parse - immutable object trees) for API parity.
- NEW gate test/multi-doc.ts: two instances over two sources, edits
  interleaved with the default doc mixed in - every edited tree must
  equal a fresh parse (a missed swap field = cross-document corruption),
  plus the stale/foreign/reject handle contract.

SOUNDNESS FIX the new smoke test exposed (predates this branch, M1-era):
findRestart anchored at tokens ENDING exactly at the damage start, but
maximal munch lets the edit EXTEND such a token ('b' + inserted 'x'
lexes as 'bx', '=' + '=' as '==', deleting a gap glues neighbours) and
the anchor itself is never re-lexed - the spliced stream then carried
'b','x' as two tokens where a batch lex has one, parsing a DIFFERENT
(sometimes still valid) program. The fixed-seed gate sessions never hit
the abutment in 120 steps. Anchor comparison is now STRICT (<), so the
abutting token falls inside the window and the merge is re-derived;
incremental-verify gains a deterministic boundary-glue session (ident
glue, operator glue, gap deletion, '>>' split sites) so the class stays
pinned.

31/31 gates (multi-doc included), incremental-verify 128 steps 0
mismatch, emit-parser-verify 0 mismatches, agnostic 9/9, batch in band.
---
 src/emit-lexer.ts          |   6 +-
 src/emit-parser.ts         | 128 ++++++++++++++++++++++++++++++++++---
 src/gen-parser.ts          |   5 +-
 test/check.ts              |   1 +
 test/incremental-verify.ts |  55 ++++++++++++++--
 test/multi-doc.ts          | 104 ++++++++++++++++++++++++++++++
 6 files changed, 282 insertions(+), 17 deletions(-)
 create mode 100644 test/multi-doc.ts

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 07745ea..704111e 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -516,7 +516,11 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// head (always sound, degrades to a full re-lex).`);
   emit(`function findRestart(cs) {`);
   emit(`  let lo = 0, hi = tokN;`);
-  emit(`  while (lo < hi) { const mid = (lo + hi) >> 1; if (tend(mid) <= cs) lo = mid + 1; else hi = mid; }`);
+  // STRICTLY before the damage: a token ENDING exactly at cs can be EXTENDED by
+  // the edit under maximal munch ('b' + inserted 'x' = 'bx'; '=' + '=' = '==';
+  // deleting the gap glues neighbours) and the anchor itself is never re-lexed —
+  // with < the abutting token falls inside the window and the merge is re-derived.
+  emit(`  while (lo < hi) { const mid = (lo + hi) >> 1; if (tend(mid) < cs) lo = mid + 1; else hi = mid; }`);
   emit(`  for (let b = lo - 1; b >= 0; b--) {`);
   emit(`    // template depth must be zero (interp brace counters are not reconstructable),`);
   emit(`    // and the anchor token must leave no cross-token lexer flag live: not a`);
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 4656d9d..2991fb2 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2310,7 +2310,7 @@ export const tree = {
 // Depth-first traversal threading the RED coordinates: enter/leave receive the
 // node's absolute (charBase, tokBase); leaf receives its absolute token index.
 // Call with the root only — the bases default from the root's rel fields.
-export function visit(entry, fns, charBase, tokBase) {
+function visitCore(entry, fns, charBase, tokBase) {
   if (charBase === undefined) { charBase = rootCharBase; tokBase = rootTokBase; }
   if (entry < 0) { if (fns.leaf) fns.leaf(entry, tokBase + ((~entry) >>> 2)); return; }
   if (fns.enter && fns.enter(entry, charBase, tokBase) === false) return;
@@ -2319,20 +2319,20 @@ export function visit(entry, fns, charBase, tokBase) {
   for (let i = 0; i < n; i++) {
     const e = kids[cs + i];
     if (e < 0) { if (fns.leaf) fns.leaf(e, tokBase + ((~e) >>> 2)); }
-    else visit(e, fns, charBase + kcr(entry, cs + i), tokBase + ktr(entry, cs + i));
+    else visitCore(e, fns, charBase + kcr(entry, cs + i), tokBase + ktr(entry, cs + i));
   }
   if (fns.leave) fns.leave(entry, charBase, tokBase);
 }
 // Materialize the classic object CST from a node id — a BRIDGE for tests/debugging
 // (the byte-identical gate against the interpreter), not a parse-path product.
-export function toObject(id, charBase, tokBase) {
+function toObjectCore(id, charBase, tokBase) {
   if (charBase === undefined) { charBase = rootCharBase; tokBase = rootTokBase; }
   const n = rowCount[id];
   const cs = rowStart[id];
   const children = new Array(n);
   for (let i = 0; i < n; i++) {
     const entry = kids[cs + i];
-    children[i] = entry >= 0 ? toObject(entry, charBase + kcr(id, cs + i), tokBase + ktr(id, cs + i))
+    children[i] = entry >= 0 ? toObjectCore(entry, charBase + kcr(id, cs + i), tokBase + ktr(id, cs + i))
       : { tokenType: leafTokenType(entry, tokBase), offset: toff(tokBase + ((~entry) >>> 2)), end: tend(tokBase + ((~entry) >>> 2)) };
   }
   return { rule: RULE_NAMES[rowRule[id]], children, offset: charBase, end: charBase + rowLen[id] };
@@ -2828,6 +2828,76 @@ function trySurgery(dmgA, dmgB, tokD, chrD) {
 let altK = null, altT = null, altOff = null, altEnd = null, altFl = null, altDp = null, altPd = null;
 let altCap = 0;
 let altN = 0;   // old-stream token count while a window lex runs (lexCore's resync bound)
+
+// ── Documents: the per-document state set behind the handle API ──
+// The module-level variables above are the ACTIVE REGISTER SET — the hot paths
+// never indirect through an object. A document object stores the same 51 fields;
+// activate() lazily swaps: the active doc's object may be stale while the module
+// variables are the truth, and is written back only when another doc activates.
+// Per-PARSE transients (pos/maxPos/scratch/adopt*/surg*) reset on every entry and
+// are shared safely.
+function makeDoc() {
+  return {
+    tkK: new tkK.constructor(4096), tkT: new tkT.constructor(4096),
+    tkOff: new Int32Array(4096), tkEnd: new Int32Array(4096), tkFl: new Uint8Array(4096),
+    tkDp: new Uint8Array(4096), tkPd: new Uint16Array(4096),
+    tkCap: 4096, tokN: 0, src: '', srcLenP1: 1, negFrom: 0x7fffffff,
+    rowRule: new Uint16Array(8192), rowLen: new Int32Array(8192), rowTokLen: new Int32Array(8192),
+    rowStart: new Int32Array(8192), rowCount: new Int32Array(8192), rowExt: new Int32Array(8192),
+    rowOK: new Uint8Array(8192), rowKC: new Uint8Array(8192),
+    rowNF: new Int32Array(8192).fill(0x7fffffff),
+    absChar: new Int32Array(8192), absTok: new Int32Array(8192),
+    rowCap: 8192, nodeN: 0,
+    kids: new Int32Array(16384), kidRel: new Int32Array(16384), kidTokRel: new Int32Array(16384),
+    kidCap: 16384, kidN: 0,
+    memoNode: [], memoEnd: [], memoExt: [], memoGen: [], memoGenCur: 0,
+    lastSrc: null, rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0,
+${e.soa ? '    parenCachePos: -1, parenCacheStack: [],' : ''}
+    altK: null, altT: null, altOff: null, altEnd: null, altFl: null, altDp: null, altPd: null,
+    altCap: 0, altN: 0,
+  };
+}
+function saveDoc(d) {
+  d.tkK = tkK; d.tkT = tkT; d.tkOff = tkOff; d.tkEnd = tkEnd; d.tkFl = tkFl;
+  d.tkDp = tkDp; d.tkPd = tkPd; d.tkCap = tkCap; d.tokN = tokN; d.src = src;
+  d.srcLenP1 = srcLenP1; d.negFrom = negFrom;
+  d.rowRule = rowRule; d.rowLen = rowLen; d.rowTokLen = rowTokLen; d.rowStart = rowStart;
+  d.rowCount = rowCount; d.rowExt = rowExt; d.rowOK = rowOK; d.rowKC = rowKC; d.rowNF = rowNF;
+  d.absChar = absChar; d.absTok = absTok; d.rowCap = rowCap; d.nodeN = nodeN;
+  d.kids = kids; d.kidRel = kidRel; d.kidTokRel = kidTokRel; d.kidCap = kidCap; d.kidN = kidN;
+  d.memoNode = memoNode; d.memoEnd = memoEnd; d.memoExt = memoExt; d.memoGen = memoGen;
+  d.memoGenCur = memoGenCur;
+  d.lastSrc = lastSrc; d.rootCharBase = rootCharBase; d.rootTokBase = rootTokBase;
+  d.lastRoot = lastRoot; d.lastRootTok = lastRootTok;
+${e.soa ? '  d.parenCachePos = parenCachePos; d.parenCacheStack = parenCacheStack;' : ''}
+  d.altK = altK; d.altT = altT; d.altOff = altOff; d.altEnd = altEnd; d.altFl = altFl;
+  d.altDp = altDp; d.altPd = altPd; d.altCap = altCap; d.altN = altN;
+}
+function loadDoc(d) {
+  tkK = d.tkK; tkT = d.tkT; tkOff = d.tkOff; tkEnd = d.tkEnd; tkFl = d.tkFl;
+  tkDp = d.tkDp; tkPd = d.tkPd; tkCap = d.tkCap; tokN = d.tokN; src = d.src;
+  srcLenP1 = d.srcLenP1; negFrom = d.negFrom;
+  rowRule = d.rowRule; rowLen = d.rowLen; rowTokLen = d.rowTokLen; rowStart = d.rowStart;
+  rowCount = d.rowCount; rowExt = d.rowExt; rowOK = d.rowOK; rowKC = d.rowKC; rowNF = d.rowNF;
+  absChar = d.absChar; absTok = d.absTok; rowCap = d.rowCap; nodeN = d.nodeN;
+  kids = d.kids; kidRel = d.kidRel; kidTokRel = d.kidTokRel; kidCap = d.kidCap; kidN = d.kidN;
+  memoNode = d.memoNode; memoEnd = d.memoEnd; memoExt = d.memoExt; memoGen = d.memoGen;
+  memoGenCur = d.memoGenCur;
+  lastSrc = d.lastSrc; rootCharBase = d.rootCharBase; rootTokBase = d.rootTokBase;
+  lastRoot = d.lastRoot; lastRootTok = d.lastRootTok;
+${e.soa ? '  parenCachePos = d.parenCachePos; parenCacheStack = d.parenCacheStack;' : ''}
+  altK = d.altK; altT = d.altT; altOff = d.altOff; altEnd = d.altEnd; altFl = d.altFl;
+  altDp = d.altDp; altPd = d.altPd; altCap = d.altCap; altN = d.altN;
+}
+const docDefault = makeDoc();
+let curDoc = docDefault;
+loadDoc(docDefault);
+function activate(d) {
+  if (d === curDoc) return;
+  saveDoc(curDoc);
+  loadDoc(d);
+  curDoc = d;
+}
 function swapBuffers() {
   let x;
   x = tkK; tkK = altK; altK = x;
@@ -2841,7 +2911,7 @@ function swapBuffers() {
 }
 ${e.soa ? '' : 'let altText = [];'}
 
-export function parse(source, entryRule) {
+function parseCore(source, entryRule) {
   lastSrc = null;
   adoptRoot = -1;
   adoptRunPos = -1;
@@ -2877,8 +2947,8 @@ export function parse(source, entryRule) {
 // until then. Lexing is FULL-FILE by design: the lexer carries cross-token state
 // (template nesting, regex context, markup modes), full lexing is a small share of a
 // parse, and the diff is what localizes the damage — not the lexer.
-export function parseEdited(source, entryRule, edits) {
-  if (lastSrc === null) return parse(source, entryRule);
+function editCore(source, entryRule, edits) {
+  if (lastSrc === null) return parseCore(source, entryRule);
   const oSrc = lastSrc;
   lastSrc = null;
 ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
@@ -3044,6 +3114,48 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
 }
 
 export { tokenize };
-export function createParser() { return { parse, parseEdited, tree, visit, toObject, tokenize }; }
+// ── Module-level API: the DEFAULT document (one shared session; tokenize and the
+// raw tree/tokenAt views read the ACTIVE doc — they are gate/debug surfaces) ──
+export function parse(source, entryRule) { activate(docDefault); return parseCore(source, entryRule); }
+export function parseEdited(source, entryRule, edits) { activate(docDefault); return editCore(source, entryRule, edits); }
+export function visit(entry, fns, charBase, tokBase) { activate(docDefault); return visitCore(entry, fns, charBase, tokBase); }
+export function toObject(id, charBase, tokBase) { activate(docDefault); return toObjectCore(id, charBase, tokBase); }
+// ── Handle API: explicit trees over per-instance documents ──
+// const p = createParser(); const cst = p.parse(text); const cst2 = p.edit(cst, next[, edits]);
+// Trees are edited IN PLACE (node surgery): an edit invalidates every earlier handle
+// of this parser — using one throws instead of silently reading a mutated tree. A
+// REJECTED edit (parse error) leaves the previous handle valid; the next edit falls
+// back to a full re-parse internally.
+export function createParser() {
+  const d = makeDoc();
+  let gen = 0;
+  let entryUsed;
+  const chk = (cst) => {
+    if (cst === null || cst === undefined || cst.d !== d) throw new Error('foreign tree handle: it belongs to another parser instance');
+    if (cst.gen !== gen) throw new Error('stale tree handle: trees are edited in place - use the handle returned by the latest parse/edit');
+  };
+  const view = {};
+  for (const k of Object.keys(tree)) {
+    const f = tree[k];
+    view[k] = (a, b) => { activate(d); return f(a, b); };
+  }
+  return {
+    parse(source, entryRule) {
+      activate(d);
+      entryUsed = entryRule;
+      const root = parseCore(source, entryRule);
+      return { d, gen: ++gen, root };
+    },
+    edit(cst, source, edits) {
+      chk(cst);
+      activate(d);
+      const root = editCore(source, entryUsed, edits);
+      return { d, gen: ++gen, root };
+    },
+    visit(cst, fns) { chk(cst); activate(d); return visitCore(cst.root, fns); },
+    toObject(cst) { chk(cst); activate(d); return toObjectCore(cst.root); },
+    tree: view,
+  };
+}
 `);
 }
diff --git a/src/gen-parser.ts b/src/gen-parser.ts
index f56f405..830f819 100644
--- a/src/gen-parser.ts
+++ b/src/gen-parser.ts
@@ -1482,7 +1482,10 @@ export function createParser(grammar: CstGrammar) {
     }
   }
 
-  return { parse, tokenize, profCounts };
+  // API parity with the emitted engine's handle surface: the interpreter builds
+  // immutable object trees, so edit() is a full re-parse (no reuse, no staleness).
+  const edit = (_cst: unknown, source: string) => parse(source);
+  return { parse, edit, tokenize, profCounts };
 }
 
 // ── Helpers ──
diff --git a/test/check.ts b/test/check.ts
index a0f18e4..8754566 100644
--- a/test/check.ts
+++ b/test/check.ts
@@ -22,6 +22,7 @@ const GATES: Gate[] = [
   { group: 'conformance', name: 'ts-ast-structure', args: ['test/ts-ast-verify.ts'] },
   { group: 'core', name: 'cst-match-totality', args: ['test/cst-match-totality.ts'] },
   { group: 'core', name: 'incremental-verify', args: ['test/incremental-verify.ts'] },
+  { group: 'core', name: 'multi-doc', args: ['test/multi-doc.ts'] },
   { group: 'core', name: 'issue-cases', args: ['test/test-issues.ts'] },
   { group: 'conformance', name: 'js', args: ['test/js-conformance.ts'] },
   { group: 'conformance', name: 'tsx', args: ['test/tsx-conformance.ts'] },
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index f6f8838..e452c07 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -13,12 +13,18 @@ const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-incremental.mjs';
 writeFileSync(emPath, emitParser(grammar));
 type Edit = { start: number; oldEnd: number; newEnd: number };
+type Cst = { root: number };
+type Parser = {
+  parse(s: string): Cst;
+  edit(cst: Cst, s: string, edits?: Edit[]): Cst;
+  toObject(cst: Cst): unknown;
+};
 type Em = {
   parse(s: string): number;
-  parseEdited(s: string, entryRule?: string, edits?: Edit[]): number;
   toObject(id: number): unknown;
+  createParser(): Parser;
 };
-const session = (await import(emPath + '?session=' + process.pid)) as Em;
+const session = ((await import(emPath + '?session=' + process.pid)) as Em).createParser();
 const fresh = (await import(emPath + '?fresh=' + process.pid)) as Em;
 
 // Deterministic LCG so failures replay.
@@ -71,13 +77,47 @@ const FILES = [
 ].filter(existsSync);
 const STEPS = 30;
 
+// ── Adversarial boundary edits (deterministic) ──
+// The fixed-seed random sessions MISSED the restart-anchor abutment hole (a token
+// ending exactly at the damage start can be EXTENDED under maximal munch — 'b'+'x'
+// = 'bx', '='+'=' = '==', deleting a gap glues neighbours). These cases pin the
+// strict-< restart anchor; every one must match fresh (tree or reject) exactly.
+const GLUE: Array<[string, string]> = [
+  ['const a = 1;\nconst b = 2;\n', 'const a = 1;\nconst bx = 2;\n'],
+  ['let a = b; let c = 1;\n', 'let a = b1; let c = 1;\n'],
+  ['if (a = b) { f(); }\n', 'if (a == b) { f(); }\n'],
+  ['const x = a b;\n', 'const x = ab;\n'],
+  ['const q = w / 2;\n', 'const q = w /= 2;\n'],
+  ['const t = a + b;\n', 'const t = a ++ b;\n'],
+  ['const u = x<y>(z);\n', 'const u = x<y>>(z);\n'],
+  ['f(a, b);\ng(c);\n', 'f(a, bc);\ng(c);\n'],
+];
+
 let steps = 0, equal = 0, bothReject = 0, mismatch = 0;
 let tInc = 0, tFresh = 0;
 const failures: string[] = [];
 
+for (const [base, edited] of GLUE) {
+  steps++;
+  let c0 = session.parse(base);
+  let fe: string | null = null, ie: string | null = null;
+  let fr = -1, ic: Cst | null = null;
+  try { fr = fresh.parse(edited); } catch (e) { fe = (e as Error).message; }
+  try { ic = session.edit(c0, edited); } catch (e) { ie = (e as Error).message; }
+  if (fe !== null || ie !== null) {
+    if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: fresh ${fe ? 'reject' : 'accept'} / incremental ${ie ? 'reject' : 'accept'}`); }
+    else bothReject++;
+    continue;
+  }
+  const a = JSON.stringify(fresh.toObject(fr));
+  const b = JSON.stringify(session.toObject(ic!));
+  if (a === b) equal++;
+  else { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: tree diverges`); }
+}
+
 for (const f of FILES) {
   let text = readFileSync(f, 'utf-8');
-  session.parse(text);   // open the session
+  let cst = session.parse(text);   // open the session
   for (let k = 0; k < STEPS; k++) {
     const { next, edit } = mutate(text);
     steps++;
@@ -85,22 +125,23 @@ for (const f of FILES) {
     const tf0 = performance.now();
     try { freshRoot = fresh.parse(next); } catch (e) { freshErr = (e as Error).message; }
     const tf1 = performance.now();
-    let incRoot = -1, incErr: string | null = null;
+    let incCst: Cst | null = null, incErr: string | null = null;
     const useProtocol = k % 2 === 1;   // alternate: edits protocol / char-diff fallback
     const ti0 = performance.now();
-    try { incRoot = session.parseEdited(next, undefined, useProtocol ? [edit] : undefined); } catch (e) { incErr = (e as Error).message; }
+    try { incCst = session.edit(cst, next, useProtocol ? [edit] : undefined); } catch (e) { incErr = (e as Error).message; }
     const ti1 = performance.now();
     if (freshErr !== null || incErr !== null) {
       if ((freshErr === null) !== (incErr === null)) {
         mismatch++;
         if (failures.length < 5) failures.push(`${f.split('/').pop()} step ${k}: fresh ${freshErr ? 'reject' : 'accept'} / incremental ${incErr ? 'reject' : 'accept'}\n    fresh: ${freshErr ?? '-'}\n    inc:   ${incErr ?? '-'}`);
       } else bothReject++;
-      // rejected text: do not advance the session text (the session reset itself)
+      // rejected text: the handle stays valid; the session does not advance
       continue;
     }
+    cst = incCst!;
     tFresh += tf1 - tf0; tInc += ti1 - ti0;
     const a = JSON.stringify(fresh.toObject(freshRoot));
-    const b = JSON.stringify(session.toObject(incRoot));
+    const b = JSON.stringify(session.toObject(cst));
     if (a === b) equal++;
     else {
       mismatch++;
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
new file mode 100644
index 0000000..299798c
--- /dev/null
+++ b/test/multi-doc.ts
@@ -0,0 +1,104 @@
+// Gate: DOCUMENTS ARE ISOLATED. The handle API (createParser → parse/edit with
+// explicit tree handles) keeps one document's state per parser instance behind a
+// lazily-swapped register set — a missed swap field shows up as cross-document
+// corruption. Two instances edit two different sources interleaved (plus the
+// module-level default-doc API mixed in between); every edited tree must be
+// byte-identical (toObject) to a fresh parse of the same text. Also pins the
+// handle contract: stale and foreign handles throw instead of silently reading
+// an in-place-mutated tree, and a REJECTED edit leaves the old handle valid.
+//
+//   node test/multi-doc.ts
+import { writeFileSync } from 'node:fs';
+import { emitParser } from '../src/emit-parser.ts';
+
+const grammar = (await import('../typescript.ts')).default;
+const emPath = '/tmp/emitted-multidoc.mjs';
+writeFileSync(emPath, emitParser(grammar));
+type Edit = { start: number; oldEnd: number; newEnd: number };
+type Cst = { root: number };
+type Parser = { parse(s: string): Cst; edit(cst: Cst, s: string, edits?: Edit[]): Cst; toObject(cst: Cst): unknown; visit(cst: Cst, fns: object): void };
+type Em = { parse(s: string): number; toObject(id: number): unknown; createParser(): Parser };
+const em = (await import(emPath + '?v=' + process.pid)) as Em;
+
+// Two synthetic documents (no corpus dependency — the gate always exercises).
+const mk = (tag: string, n: number) => {
+  let s = '';
+  for (let i = 0; i < n; i++) s += `function ${tag}_${i}(a) { if (a > ${i}) { return a * ${i}; } const v_${i} = { x: ${i} }; return v_${i}.x; }\n`;
+  return s;
+};
+let textA = mk('alpha', 400);
+let textB = `(function () {\n${mk('beta', 300)}})();\n`;
+
+let seed = 0x51C0FFEE;
+const rand = () => ((seed = (seed * 48271) % 0x7fffffff) / 0x7fffffff);
+const randInt = (n: number) => Math.floor(rand() * n);
+const INS = ['x', '1', ' + q', '.m', '(/*c*/)', '"s"'];
+function mutate(text: string): string {
+  switch (randInt(3)) {
+    case 0: { const at = randInt(text.length); return text.slice(0, at) + INS[randInt(INS.length)] + text.slice(at); }
+    case 1: { const at = randInt(Math.max(1, text.length - 6)); return text.slice(0, at) + text.slice(at + 1 + randInt(4)); }
+    default: { const at = randInt(Math.max(1, text.length - 1)); return text.slice(0, at) + 'z' + text.slice(at + 1); }
+  }
+}
+
+const p1 = em.createParser();
+const p2 = em.createParser();
+const f = em.createParser();
+let cstA = p1.parse(textA);
+let cstB = p2.parse(textB);
+
+let steps = 0, equal = 0, bothReject = 0, mismatch = 0;
+const failures: string[] = [];
+for (let k = 0; k < 60; k++) {
+  const onA = (k & 1) === 0;
+  const text = onA ? textA : textB;
+  const next = mutate(text);
+  steps++;
+  let fe: string | null = null, ie: string | null = null;
+  let fc: Cst | null = null, ic: Cst | null = null;
+  try { fc = f.parse(next); } catch (e) { fe = (e as Error).message; }
+  try { ic = (onA ? p1 : p2).edit(onA ? cstA : cstB, next); } catch (e) { ie = (e as Error).message; }
+  if (fe !== null || ie !== null) {
+    if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`step ${k} (${onA ? 'A' : 'B'}): fresh ${fe ? 'reject' : 'accept'} / edit ${ie ? 'reject' : 'accept'}`); }
+    else bothReject++;
+    continue;
+  }
+  // mix the module-level default doc in between: it must not disturb either instance
+  if (k % 5 === 0) em.parse('const mix = ' + k + ';');
+  const a = JSON.stringify(f.toObject(fc!));
+  const b = JSON.stringify((onA ? p1 : p2).toObject(ic!));
+  if (a === b) equal++;
+  else {
+    mismatch++;
+    if (failures.length < 5) {
+      let i = 0; while (i < a.length && a[i] === b[i]) i++;
+      failures.push(`step ${k} (${onA ? 'A' : 'B'}): tree diverges @${i}`);
+    }
+  }
+  if (onA) { textA = next; cstA = ic!; } else { textB = next; cstB = ic!; }
+}
+
+// handle contract
+let contract = 0;
+{
+  const p = em.createParser();
+  const c1 = p.parse('const a = 1;');
+  const c2 = p.edit(c1, 'const ab = 1;');
+  try { p.edit(c1, 'const x = 2;'); failures.push('stale handle did not throw'); } catch { contract++; }
+  try { p.toObject(c1); failures.push('stale toObject did not throw'); } catch { contract++; }
+  try { p2.edit(c2, 'const y = 3;'); failures.push('foreign handle did not throw'); } catch { contract++; }
+  // a rejected edit leaves the handle valid
+  let rejected = false;
+  try { p.edit(c2, 'const ] = ;'); } catch { rejected = true; }
+  const c3 = rejected ? p.edit(c2, 'const ab = 12;') : null;
+  if (!rejected || c3 === null) failures.push('reject-then-edit flow broke');
+  else contract++;
+}
+
+console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/4`);
+for (const s of failures) console.log('  ✗ ' + s);
+if (mismatch > 0 || contract !== 4 || failures.length > 0) {
+  console.error('✗ document isolation / handle contract violated');
+  process.exit(1);
+}
+console.log('✓ documents are isolated; handles enforce the in-place-edit contract');

From 70064d24bd47662f1fe978f13060912680c0835d Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 07:08:38 +0800
Subject: [PATCH 12/15] edit() mutates the handle in place and returns nothing;
 reject-safe contract

Returning a new handle from edit() read like value semantics - as if the
old cst survived and edit produced a clone. There is no clone: surgery
mutates the tree in place. The handle is now the STABLE IDENTITY of the
document's tree: p.edit(cst, next) updates cst.root and returns void;
the same reference always reads the current tree.

Making that honest exposed two reject holes the contract tests pinned:

- A rejected edit had already spliced the token columns to the rejected
  text (the splice precedes the parse attempt), so the kept tree's leaf
  spans read corrupted data. The reject path now restores the columns by
  re-lexing the LIVE tree's source (treeSrc, which unlike lastSrc
  survives rejects) - O(n) on the reject path only; #39's recovery mode
  is what makes rejects rare.
- The full-parse fallback inside edit (after a previous reject) went
  through parseCore, whose arena reset destroys the live tree BEFORE
  knowing whether the new text parses. edit now falls back in APPEND
  mode; parse() is the only compaction point - and since its reset
  happens before its outcome is known, parse() bumps the generation on
  entry: old handles die when a document is re-opened, success or not.

Handle contract (gated, 5/5): in-place edit updates the same handle; a
rejected edit throws and keeps the handle on the previous tree
(readable); foreign handles throw; re-opening via parse() - including a
REJECTING parse() - invalidates prior handles. The interpreter's edit()
mirrors the in-place semantics by replacing the tree object's fields.

31/31 gates, incremental-verify 128 steps 0 mismatch, parity 0
mismatches, handle-API keystroke median 0.028ms.
---
 src/emit-parser.ts         | 67 ++++++++++++++++++++++++++++++--------
 src/gen-parser.ts          | 11 +++++--
 test/incremental-verify.ts | 17 +++++-----
 test/multi-doc.ts          | 43 ++++++++++++++----------
 4 files changed, 96 insertions(+), 42 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 2991fb2..50f18ed 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2394,6 +2394,10 @@ function runParse(entryRule) {
 // null whenever the module state is not a coherent snapshot (no parse yet, or the last
 // attempt threw), so parseEdited falls back to a full parse.
 let lastSrc = null;
+// Source text of the LIVE tree (unlike lastSrc it survives a rejected edit): the
+// reject path restores the token columns to it so the handle keeps reading the
+// previous tree; only a successful parse/edit moves it.
+let treeSrc = null;
 // the LAST parse root's absolute coordinates (the descent origin — see visit/toObject)
 let rootCharBase = 0;
 let rootTokBase = 0;
@@ -2851,7 +2855,7 @@ function makeDoc() {
     kids: new Int32Array(16384), kidRel: new Int32Array(16384), kidTokRel: new Int32Array(16384),
     kidCap: 16384, kidN: 0,
     memoNode: [], memoEnd: [], memoExt: [], memoGen: [], memoGenCur: 0,
-    lastSrc: null, rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0,
+    lastSrc: null, treeSrc: null, rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0,
 ${e.soa ? '    parenCachePos: -1, parenCacheStack: [],' : ''}
     altK: null, altT: null, altOff: null, altEnd: null, altFl: null, altDp: null, altPd: null,
     altCap: 0, altN: 0,
@@ -2867,7 +2871,7 @@ function saveDoc(d) {
   d.kids = kids; d.kidRel = kidRel; d.kidTokRel = kidTokRel; d.kidCap = kidCap; d.kidN = kidN;
   d.memoNode = memoNode; d.memoEnd = memoEnd; d.memoExt = memoExt; d.memoGen = memoGen;
   d.memoGenCur = memoGenCur;
-  d.lastSrc = lastSrc; d.rootCharBase = rootCharBase; d.rootTokBase = rootTokBase;
+  d.lastSrc = lastSrc; d.treeSrc = treeSrc; d.rootCharBase = rootCharBase; d.rootTokBase = rootTokBase;
   d.lastRoot = lastRoot; d.lastRootTok = lastRootTok;
 ${e.soa ? '  d.parenCachePos = parenCachePos; d.parenCacheStack = parenCacheStack;' : ''}
   d.altK = altK; d.altT = altT; d.altOff = altOff; d.altEnd = altEnd; d.altFl = altFl;
@@ -2883,7 +2887,7 @@ function loadDoc(d) {
   kids = d.kids; kidRel = d.kidRel; kidTokRel = d.kidTokRel; kidCap = d.kidCap; kidN = d.kidN;
   memoNode = d.memoNode; memoEnd = d.memoEnd; memoExt = d.memoExt; memoGen = d.memoGen;
   memoGenCur = d.memoGenCur;
-  lastSrc = d.lastSrc; rootCharBase = d.rootCharBase; rootTokBase = d.rootTokBase;
+  lastSrc = d.lastSrc; treeSrc = d.treeSrc; rootCharBase = d.rootCharBase; rootTokBase = d.rootTokBase;
   lastRoot = d.lastRoot; lastRootTok = d.lastRootTok;
 ${e.soa ? '  parenCachePos = d.parenCachePos; parenCacheStack = d.parenCacheStack;' : ''}
   altK = d.altK; altT = d.altT; altOff = d.altOff; altEnd = d.altEnd; altFl = d.altFl;
@@ -2929,6 +2933,7 @@ function parseCore(source, entryRule) {
   lastRoot = root;
   lastRootTok = rootTokBase;
   lastSrc = source;
+  treeSrc = source;
   return root;
 }
 
@@ -2948,7 +2953,40 @@ function parseCore(source, entryRule) {
 // (template nesting, regex context, markup modes), full lexing is a small share of a
 // parse, and the diff is what localizes the damage — not the lexer.
 function editCore(source, entryRule, edits) {
-  if (lastSrc === null) return parseCore(source, entryRule);
+  try {
+    return editCoreRun(source, entryRule, edits);
+  } catch (e) {
+    // REJECTED edit: the splice (and any '>' splits of the failed attempt) already
+    // rewrote the token columns to the rejected text, and the append-mode fallback
+    // may have grown the arena — but the live tree's ROWS are untouched. Re-lexing
+    // the live tree's source restores every read path (leaf spans, visit, next
+    // edit's restart anchors); O(n) on the reject path only.
+    if (treeSrc !== null) { lexInto(treeSrc); lastSrc = null; }
+    throw e;
+  }
+}
+function editCoreRun(source, entryRule, edits) {
+  if (lastSrc === null) {
+    // No coherent edit base (a previous attempt rejected): full re-parse in APPEND
+    // mode — parseCore would reset the arena and destroy the live tree the handle
+    // still exposes if THIS parse rejects too. parse() is the only compaction point.
+    lexInto(source);
+    if (memoEnd.length !== MEMO_RULES) {
+      memoNode = new Array(MEMO_RULES);
+      memoEnd = new Array(MEMO_RULES);
+      memoExt = new Array(MEMO_RULES);
+      memoGen = new Array(MEMO_RULES);
+    }
+    memoGenCur++;
+    adoptRoot = -1;
+    adoptRunPos = -1;
+    const root = runParse(entryRule);
+    lastRoot = root;
+    lastRootTok = rootTokBase;
+    lastSrc = source;
+    treeSrc = source;
+    return root;
+  }
   const oSrc = lastSrc;
   lastSrc = null;
 ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
@@ -3103,6 +3141,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     lastRoot = sroot;
     lastRootTok = adoptRootTok;
     lastSrc = source;
+    treeSrc = source;
     return sroot;
   }
   const root = runParse(entryRule);
@@ -3110,6 +3149,7 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   lastRoot = root;
   lastRootTok = rootTokBase;
   lastSrc = source;
+  treeSrc = source;
   return root;
 }
 
@@ -3121,18 +3161,19 @@ export function parseEdited(source, entryRule, edits) { activate(docDefault); re
 export function visit(entry, fns, charBase, tokBase) { activate(docDefault); return visitCore(entry, fns, charBase, tokBase); }
 export function toObject(id, charBase, tokBase) { activate(docDefault); return toObjectCore(id, charBase, tokBase); }
 // ── Handle API: explicit trees over per-instance documents ──
-// const p = createParser(); const cst = p.parse(text); const cst2 = p.edit(cst, next[, edits]);
-// Trees are edited IN PLACE (node surgery): an edit invalidates every earlier handle
-// of this parser — using one throws instead of silently reading a mutated tree. A
-// REJECTED edit (parse error) leaves the previous handle valid; the next edit falls
-// back to a full re-parse internally.
+// const p = createParser(); const cst = p.parse(text); p.edit(cst, next[, edits]);
+// The handle is the STABLE IDENTITY of this document's tree: edit() mutates it in
+// place (node surgery) and returns nothing — a return value would read as a clone,
+// and there is none. A REJECTED edit (parse error) throws and leaves the handle on
+// the previous tree; the next edit falls back to a full re-parse internally. Only
+// parse() re-opening the document invalidates old handles (they throw).
 export function createParser() {
   const d = makeDoc();
   let gen = 0;
   let entryUsed;
   const chk = (cst) => {
     if (cst === null || cst === undefined || cst.d !== d) throw new Error('foreign tree handle: it belongs to another parser instance');
-    if (cst.gen !== gen) throw new Error('stale tree handle: trees are edited in place - use the handle returned by the latest parse/edit');
+    if (cst.gen !== gen) throw new Error('stale tree handle: parse() re-opened this document - use the handle from the latest parse()');
   };
   const view = {};
   for (const k of Object.keys(tree)) {
@@ -3143,14 +3184,14 @@ export function createParser() {
     parse(source, entryRule) {
       activate(d);
       entryUsed = entryRule;
+      gen++;   // re-opening resets the arena: old handles die even if THIS parse rejects
       const root = parseCore(source, entryRule);
-      return { d, gen: ++gen, root };
+      return { d, gen, root };
     },
     edit(cst, source, edits) {
       chk(cst);
       activate(d);
-      const root = editCore(source, entryUsed, edits);
-      return { d, gen: ++gen, root };
+      cst.root = editCore(source, entryUsed, edits);
     },
     visit(cst, fns) { chk(cst); activate(d); return visitCore(cst.root, fns); },
     toObject(cst) { chk(cst); activate(d); return toObjectCore(cst.root); },
diff --git a/src/gen-parser.ts b/src/gen-parser.ts
index 830f819..66b09c2 100644
--- a/src/gen-parser.ts
+++ b/src/gen-parser.ts
@@ -1482,9 +1482,14 @@ export function createParser(grammar: CstGrammar) {
     }
   }
 
-  // API parity with the emitted engine's handle surface: the interpreter builds
-  // immutable object trees, so edit() is a full re-parse (no reuse, no staleness).
-  const edit = (_cst: unknown, source: string) => parse(source);
+  // API parity with the emitted engine's handle surface: edit() re-parses and
+  // updates the SAME tree object in place (the handle is the document's tree —
+  // edit returns nothing, exactly like the emitted engine; no reuse here).
+  const edit = (cst: { rule: string; children: unknown[]; offset: number; end: number }, source: string): void => {
+    const next = parse(source) as typeof cst;
+    cst.rule = next.rule; cst.children = next.children;
+    cst.offset = next.offset; cst.end = next.end;
+  };
   return { parse, edit, tokenize, profCounts };
 }
 
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index e452c07..241f9c1 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -16,7 +16,7 @@ type Edit = { start: number; oldEnd: number; newEnd: number };
 type Cst = { root: number };
 type Parser = {
   parse(s: string): Cst;
-  edit(cst: Cst, s: string, edits?: Edit[]): Cst;
+  edit(cst: Cst, s: string, edits?: Edit[]): void;
   toObject(cst: Cst): unknown;
 };
 type Em = {
@@ -99,18 +99,18 @@ const failures: string[] = [];
 
 for (const [base, edited] of GLUE) {
   steps++;
-  let c0 = session.parse(base);
+  const c0 = session.parse(base);
   let fe: string | null = null, ie: string | null = null;
-  let fr = -1, ic: Cst | null = null;
+  let fr = -1;
   try { fr = fresh.parse(edited); } catch (e) { fe = (e as Error).message; }
-  try { ic = session.edit(c0, edited); } catch (e) { ie = (e as Error).message; }
+  try { session.edit(c0, edited); } catch (e) { ie = (e as Error).message; }
   if (fe !== null || ie !== null) {
     if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: fresh ${fe ? 'reject' : 'accept'} / incremental ${ie ? 'reject' : 'accept'}`); }
     else bothReject++;
     continue;
   }
   const a = JSON.stringify(fresh.toObject(fr));
-  const b = JSON.stringify(session.toObject(ic!));
+  const b = JSON.stringify(session.toObject(c0));
   if (a === b) equal++;
   else { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: tree diverges`); }
 }
@@ -125,20 +125,19 @@ for (const f of FILES) {
     const tf0 = performance.now();
     try { freshRoot = fresh.parse(next); } catch (e) { freshErr = (e as Error).message; }
     const tf1 = performance.now();
-    let incCst: Cst | null = null, incErr: string | null = null;
+    let incErr: string | null = null;
     const useProtocol = k % 2 === 1;   // alternate: edits protocol / char-diff fallback
     const ti0 = performance.now();
-    try { incCst = session.edit(cst, next, useProtocol ? [edit] : undefined); } catch (e) { incErr = (e as Error).message; }
+    try { session.edit(cst, next, useProtocol ? [edit] : undefined); } catch (e) { incErr = (e as Error).message; }
     const ti1 = performance.now();
     if (freshErr !== null || incErr !== null) {
       if ((freshErr === null) !== (incErr === null)) {
         mismatch++;
         if (failures.length < 5) failures.push(`${f.split('/').pop()} step ${k}: fresh ${freshErr ? 'reject' : 'accept'} / incremental ${incErr ? 'reject' : 'accept'}\n    fresh: ${freshErr ?? '-'}\n    inc:   ${incErr ?? '-'}`);
       } else bothReject++;
-      // rejected text: the handle stays valid; the session does not advance
+      // rejected text: the handle stays on the previous tree; do not advance
       continue;
     }
-    cst = incCst!;
     tFresh += tf1 - tf0; tInc += ti1 - ti0;
     const a = JSON.stringify(fresh.toObject(freshRoot));
     const b = JSON.stringify(session.toObject(cst));
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
index 299798c..111fc6c 100644
--- a/test/multi-doc.ts
+++ b/test/multi-doc.ts
@@ -16,7 +16,7 @@ const emPath = '/tmp/emitted-multidoc.mjs';
 writeFileSync(emPath, emitParser(grammar));
 type Edit = { start: number; oldEnd: number; newEnd: number };
 type Cst = { root: number };
-type Parser = { parse(s: string): Cst; edit(cst: Cst, s: string, edits?: Edit[]): Cst; toObject(cst: Cst): unknown; visit(cst: Cst, fns: object): void };
+type Parser = { parse(s: string): Cst; edit(cst: Cst, s: string, edits?: Edit[]): void; toObject(cst: Cst): unknown; visit(cst: Cst, fns: object): void };
 type Em = { parse(s: string): number; toObject(id: number): unknown; createParser(): Parser };
 const em = (await import(emPath + '?v=' + process.pid)) as Em;
 
@@ -55,9 +55,9 @@ for (let k = 0; k < 60; k++) {
   const next = mutate(text);
   steps++;
   let fe: string | null = null, ie: string | null = null;
-  let fc: Cst | null = null, ic: Cst | null = null;
+  let fc: Cst | null = null;
   try { fc = f.parse(next); } catch (e) { fe = (e as Error).message; }
-  try { ic = (onA ? p1 : p2).edit(onA ? cstA : cstB, next); } catch (e) { ie = (e as Error).message; }
+  try { (onA ? p1 : p2).edit(onA ? cstA : cstB, next); } catch (e) { ie = (e as Error).message; }
   if (fe !== null || ie !== null) {
     if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`step ${k} (${onA ? 'A' : 'B'}): fresh ${fe ? 'reject' : 'accept'} / edit ${ie ? 'reject' : 'accept'}`); }
     else bothReject++;
@@ -66,7 +66,7 @@ for (let k = 0; k < 60; k++) {
   // mix the module-level default doc in between: it must not disturb either instance
   if (k % 5 === 0) em.parse('const mix = ' + k + ';');
   const a = JSON.stringify(f.toObject(fc!));
-  const b = JSON.stringify((onA ? p1 : p2).toObject(ic!));
+  const b = JSON.stringify((onA ? p1 : p2).toObject(onA ? cstA : cstB));
   if (a === b) equal++;
   else {
     mismatch++;
@@ -75,29 +75,38 @@ for (let k = 0; k < 60; k++) {
       failures.push(`step ${k} (${onA ? 'A' : 'B'}): tree diverges @${i}`);
     }
   }
-  if (onA) { textA = next; cstA = ic!; } else { textB = next; cstB = ic!; }
+  if (onA) textA = next; else textB = next;
 }
 
-// handle contract
+// handle contract: edit mutates the handle IN PLACE (no return — no clone illusion);
+// only parse() re-opening the document invalidates old handles; rejects keep the tree.
 let contract = 0;
 {
   const p = em.createParser();
   const c1 = p.parse('const a = 1;');
-  const c2 = p.edit(c1, 'const ab = 1;');
-  try { p.edit(c1, 'const x = 2;'); failures.push('stale handle did not throw'); } catch { contract++; }
-  try { p.toObject(c1); failures.push('stale toObject did not throw'); } catch { contract++; }
-  try { p2.edit(c2, 'const y = 3;'); failures.push('foreign handle did not throw'); } catch { contract++; }
-  // a rejected edit leaves the handle valid
+  const before = JSON.stringify(p.toObject(c1));
+  p.edit(c1, 'const ab = 1;');
+  const after = JSON.stringify(p.toObject(c1));
+  if (after !== before && after.includes('"end":8')) contract++;   // same handle, new tree
+  else failures.push('in-place edit did not update the handle');
+  try { p2.edit(c1, 'const y = 3;'); failures.push('foreign handle did not throw'); } catch { contract++; }
   let rejected = false;
-  try { p.edit(c2, 'const ] = ;'); } catch { rejected = true; }
-  const c3 = rejected ? p.edit(c2, 'const ab = 12;') : null;
-  if (!rejected || c3 === null) failures.push('reject-then-edit flow broke');
-  else contract++;
+  try { p.edit(c1, 'const ] = ;'); } catch { rejected = true; }
+  if (rejected && JSON.stringify(p.toObject(c1)) === after) contract++;   // reject keeps the tree
+  else failures.push('reject-then-read flow broke');
+  const c2 = p.parse('let q = 1;');
+  try { p.toObject(c1); failures.push('re-opened document: old handle did not throw'); } catch { contract++; }
+  // a REJECTING parse() resets the arena too — it must invalidate prior handles
+  try { p.parse('const ] = ;'); } catch { /* expected reject */ }
+  let dead = false;
+  try { p.toObject(c2); } catch { dead = true; }
+  if (dead) contract++;
+  else failures.push('rejecting parse() left the old handle readable over a reset arena');
 }
 
-console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/4`);
+console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/5`);
 for (const s of failures) console.log('  ✗ ' + s);
-if (mismatch > 0 || contract !== 4 || failures.length > 0) {
+if (mismatch > 0 || contract !== 5 || failures.length > 0) {
   console.error('✗ document isolation / handle contract violated');
   process.exit(1);
 }

From ed5941e4892a778cde22163e1bb48a85095fd151 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 07:17:23 +0800
Subject: [PATCH 13/15] Remove toObject from the engine: visit + tree accessors
 are the only surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The arena design's premise (PR #36) is that parse() hands out a tree to
TRAVERSE, not an object tree to materialize - toObject was the
materialization back door left on both the module API and the handle
API, and its only real consumer was the gate layer's byte-identical
JSON comparison. That is a test concern: gates now build the comparison
object through visit + tree accessors (test/emitted-obj.ts, mirroring
the interpreter's object shape and key order exactly, so the emit ≡
interp and incremental ≡ fresh comparisons are unchanged). The unused
emitted getText went with it; the interpreter keeps returning its
native object trees (that IS its representation, not a conversion).

31/31 gates, emit-parser-verify 0 mismatches, multi-doc contract 5/5,
handle-API keystroke median 0.028ms (9MB) / 0.089ms (8MB nested).
---
 src/emit-parser.ts         | 20 -------------------
 test/emit-parser-verify.ts |  3 ++-
 test/emitted-obj.ts        | 39 ++++++++++++++++++++++++++++++++++++++
 test/incremental-verify.ts | 15 +++++++++------
 test/multi-doc.ts          | 21 +++++++++++---------
 5 files changed, 62 insertions(+), 36 deletions(-)
 create mode 100644 test/emitted-obj.ts

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 50f18ed..c39a622 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2247,10 +2247,6 @@ export function tokenAt(i) {
 }
 
 // The CST is span-only: a node's text is derived from the source it was parsed from.
-export function getText(node, source) {
-  return source.slice(node.offset, node.end);
-}
-
 // ── Arena tree access ──
 // The arena IS the tree: parse() returns the root node id and consumers traverse
 // via visit()/the accessors — nothing is materialized on the parse path. All views
@@ -2323,20 +2319,6 @@ function visitCore(entry, fns, charBase, tokBase) {
   }
   if (fns.leave) fns.leave(entry, charBase, tokBase);
 }
-// Materialize the classic object CST from a node id — a BRIDGE for tests/debugging
-// (the byte-identical gate against the interpreter), not a parse-path product.
-function toObjectCore(id, charBase, tokBase) {
-  if (charBase === undefined) { charBase = rootCharBase; tokBase = rootTokBase; }
-  const n = rowCount[id];
-  const cs = rowStart[id];
-  const children = new Array(n);
-  for (let i = 0; i < n; i++) {
-    const entry = kids[cs + i];
-    children[i] = entry >= 0 ? toObjectCore(entry, charBase + kcr(id, cs + i), tokBase + ktr(id, cs + i))
-      : { tokenType: leafTokenType(entry, tokBase), offset: toff(tokBase + ((~entry) >>> 2)), end: tend(tokBase + ((~entry) >>> 2)) };
-  }
-  return { rule: RULE_NAMES[rowRule[id]], children, offset: charBase, end: charBase + rowLen[id] };
-}
 
 // Parse to the ARENA: returns the root node id.
 function lexInto(source) {
@@ -3159,7 +3141,6 @@ export { tokenize };
 export function parse(source, entryRule) { activate(docDefault); return parseCore(source, entryRule); }
 export function parseEdited(source, entryRule, edits) { activate(docDefault); return editCore(source, entryRule, edits); }
 export function visit(entry, fns, charBase, tokBase) { activate(docDefault); return visitCore(entry, fns, charBase, tokBase); }
-export function toObject(id, charBase, tokBase) { activate(docDefault); return toObjectCore(id, charBase, tokBase); }
 // ── Handle API: explicit trees over per-instance documents ──
 // const p = createParser(); const cst = p.parse(text); p.edit(cst, next[, edits]);
 // The handle is the STABLE IDENTITY of this document's tree: edit() mutates it in
@@ -3194,7 +3175,6 @@ export function createParser() {
       cst.root = editCore(source, entryUsed, edits);
     },
     visit(cst, fns) { chk(cst); activate(d); return visitCore(cst.root, fns); },
-    toObject(cst) { chk(cst); activate(d); return toObjectCore(cst.root); },
     tree: view,
   };
 }
diff --git a/test/emit-parser-verify.ts b/test/emit-parser-verify.ts
index 2269874..c7c2732 100644
--- a/test/emit-parser-verify.ts
+++ b/test/emit-parser-verify.ts
@@ -9,6 +9,7 @@
 //   node test/emit-parser-verify.ts            # 4 bench files + ~400-file corpus sample
 //   node test/emit-parser-verify.ts <N>        # sample stride N (default ~ to hit ~400)
 //   node test/emit-parser-verify.ts all        # every .ts file under conformance
+import { objectify } from './emitted-obj.ts';
 import { createParser } from '../src/gen-parser.ts';
 import { emitParser } from '../src/emit-parser.ts';
 import { readdir } from 'fs/promises';
@@ -41,7 +42,7 @@ function compare(code: string): { verdict: string; detail?: string } {
   const o = run(oracle.parse, code);
   // The emitted parser returns an arena node id; materialize the object view for the
   // byte-identical comparison against the interpreter's object tree.
-  const e = run((s: string) => emitted.toObject(emitted.parse(s)), code);
+  const e = run((s: string) => { const r = emitted.parse(s); return objectify(emitted.tree, (fns) => emitted.visit(r, fns)); }, code);
   if (!o.ok && o.err.includes('Maximum call stack')) {
     // The interpreter recursed out of stack — a CAPACITY limit, not a parse verdict;
     // the emitted parser's flatter frames can legitimately survive deeper inputs
diff --git a/test/emitted-obj.ts b/test/emitted-obj.ts
new file mode 100644
index 0000000..cc4c123
--- /dev/null
+++ b/test/emitted-obj.ts
@@ -0,0 +1,39 @@
+// Materialize an emitted-engine tree as a plain object — TEST-SIDE ONLY. The engine
+// deliberately exposes a single consumption surface (visit + tree accessors); full
+// materialization is a consumer choice, and the only consumer that needs it is the
+// gate layer's byte-identical JSON comparison (incremental ≡ fresh, emit ≡ interp).
+// The shape (and KEY ORDER — JSON.stringify equality depends on it) mirrors the
+// interpreter's native object trees: nodes { rule, children, offset, end }, leaves
+// { tokenType, offset, end }.
+export interface TreeView {
+  ruleNameOf(id: number): string;
+  lenOf(id: number): number;
+  leafTokenType(entry: number, tokBase: number): string;
+  leafOffsetOf(entry: number, tokBase: number): number;
+  leafEndOf(entry: number, tokBase: number): number;
+}
+type VisitFns = {
+  enter?(id: number, charBase: number, tokBase: number): boolean | void;
+  leave?(id: number, charBase: number, tokBase: number): void;
+  leaf?(entry: number, tok: number): void;
+};
+export type ObjNode = { rule: string; children: (ObjNode | ObjLeaf)[]; offset: number; end: number };
+export type ObjLeaf = { tokenType: string; offset: number; end: number };
+
+export function objectify(tree: TreeView, runVisit: (fns: VisitFns) => void): ObjNode {
+  const rootHolder: { children: (ObjNode | ObjLeaf)[] } = { children: [] };
+  const stack: { children: (ObjNode | ObjLeaf)[] }[] = [rootHolder];
+  runVisit({
+    enter(id, charBase) {
+      const node: ObjNode = { rule: tree.ruleNameOf(id), children: [], offset: charBase, end: charBase + tree.lenOf(id) };
+      stack[stack.length - 1].children.push(node);
+      stack.push(node);
+    },
+    leave() { stack.pop(); },
+    leaf(entry, tok) {
+      const tb = tok - ((~entry) >>> 2);
+      stack[stack.length - 1].children.push({ tokenType: tree.leafTokenType(entry, tb), offset: tree.leafOffsetOf(entry, tb), end: tree.leafEndOf(entry, tb) });
+    },
+  });
+  return rootHolder.children[0] as ObjNode;
+}
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index 241f9c1..3b1f73a 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -6,6 +6,7 @@
 // and the arena growth, so reuse is MEASURED, not assumed.
 //
 //   node test/incremental-verify.ts
+import { objectify } from './emitted-obj.ts';
 import { existsSync, readFileSync, writeFileSync } from 'node:fs';
 import { emitParser } from '../src/emit-parser.ts';
 
@@ -17,11 +18,13 @@ type Cst = { root: number };
 type Parser = {
   parse(s: string): Cst;
   edit(cst: Cst, s: string, edits?: Edit[]): void;
-  toObject(cst: Cst): unknown;
+  visit(cst: Cst, fns: object): void;
+  tree: import('./emitted-obj.ts').TreeView;
 };
 type Em = {
   parse(s: string): number;
-  toObject(id: number): unknown;
+  visit(entry: number, fns: object): void;
+  tree: import('./emitted-obj.ts').TreeView;
   createParser(): Parser;
 };
 const session = ((await import(emPath + '?session=' + process.pid)) as Em).createParser();
@@ -109,8 +112,8 @@ for (const [base, edited] of GLUE) {
     else bothReject++;
     continue;
   }
-  const a = JSON.stringify(fresh.toObject(fr));
-  const b = JSON.stringify(session.toObject(c0));
+  const a = JSON.stringify(objectify(fresh.tree, (fns) => fresh.visit(fr, fns)));
+  const b = JSON.stringify(objectify(session.tree, (fns) => session.visit(c0, fns)));
   if (a === b) equal++;
   else { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: tree diverges`); }
 }
@@ -139,8 +142,8 @@ for (const f of FILES) {
       continue;
     }
     tFresh += tf1 - tf0; tInc += ti1 - ti0;
-    const a = JSON.stringify(fresh.toObject(freshRoot));
-    const b = JSON.stringify(session.toObject(cst));
+    const a = JSON.stringify(objectify(fresh.tree, (fns) => fresh.visit(freshRoot, fns)));
+    const b = JSON.stringify(objectify(session.tree, (fns) => session.visit(cst, fns)));
     if (a === b) equal++;
     else {
       mismatch++;
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
index 111fc6c..1b18f4e 100644
--- a/test/multi-doc.ts
+++ b/test/multi-doc.ts
@@ -8,6 +8,7 @@
 // an in-place-mutated tree, and a REJECTED edit leaves the old handle valid.
 //
 //   node test/multi-doc.ts
+import { objectify } from './emitted-obj.ts';
 import { writeFileSync } from 'node:fs';
 import { emitParser } from '../src/emit-parser.ts';
 
@@ -16,8 +17,8 @@ const emPath = '/tmp/emitted-multidoc.mjs';
 writeFileSync(emPath, emitParser(grammar));
 type Edit = { start: number; oldEnd: number; newEnd: number };
 type Cst = { root: number };
-type Parser = { parse(s: string): Cst; edit(cst: Cst, s: string, edits?: Edit[]): void; toObject(cst: Cst): unknown; visit(cst: Cst, fns: object): void };
-type Em = { parse(s: string): number; toObject(id: number): unknown; createParser(): Parser };
+type Parser = { parse(s: string): Cst; edit(cst: Cst, s: string, edits?: Edit[]): void; visit(cst: Cst, fns: object): void; tree: import('./emitted-obj.ts').TreeView };
+type Em = { parse(s: string): number; createParser(): Parser };
 const em = (await import(emPath + '?v=' + process.pid)) as Em;
 
 // Two synthetic documents (no corpus dependency — the gate always exercises).
@@ -65,8 +66,9 @@ for (let k = 0; k < 60; k++) {
   }
   // mix the module-level default doc in between: it must not disturb either instance
   if (k % 5 === 0) em.parse('const mix = ' + k + ';');
-  const a = JSON.stringify(f.toObject(fc!));
-  const b = JSON.stringify((onA ? p1 : p2).toObject(onA ? cstA : cstB));
+  const a = JSON.stringify(objectify(f.tree, (fns) => f.visit(fc!, fns)));
+  const q = onA ? p1 : p2;
+  const b = JSON.stringify(objectify(q.tree, (fns) => q.visit(onA ? cstA : cstB, fns)));
   if (a === b) equal++;
   else {
     mismatch++;
@@ -84,22 +86,23 @@ let contract = 0;
 {
   const p = em.createParser();
   const c1 = p.parse('const a = 1;');
-  const before = JSON.stringify(p.toObject(c1));
+  const obj = (h: Cst) => JSON.stringify(objectify(p.tree, (fns) => p.visit(h, fns)));
+  const before = obj(c1);
   p.edit(c1, 'const ab = 1;');
-  const after = JSON.stringify(p.toObject(c1));
+  const after = obj(c1);
   if (after !== before && after.includes('"end":8')) contract++;   // same handle, new tree
   else failures.push('in-place edit did not update the handle');
   try { p2.edit(c1, 'const y = 3;'); failures.push('foreign handle did not throw'); } catch { contract++; }
   let rejected = false;
   try { p.edit(c1, 'const ] = ;'); } catch { rejected = true; }
-  if (rejected && JSON.stringify(p.toObject(c1)) === after) contract++;   // reject keeps the tree
+  if (rejected && obj(c1) === after) contract++;   // reject keeps the tree
   else failures.push('reject-then-read flow broke');
   const c2 = p.parse('let q = 1;');
-  try { p.toObject(c1); failures.push('re-opened document: old handle did not throw'); } catch { contract++; }
+  try { obj(c1); failures.push('re-opened document: old handle did not throw'); } catch { contract++; }
   // a REJECTING parse() resets the arena too — it must invalidate prior handles
   try { p.parse('const ] = ;'); } catch { /* expected reject */ }
   let dead = false;
-  try { p.toObject(c2); } catch { dead = true; }
+  try { obj(c2); } catch { dead = true; }
   if (dead) contract++;
   else failures.push('rejecting parse() left the old handle readable over a reset arena');
 }

From e6c4e6cab8f6a06de36d0086c7e4ea5960bcba87 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 07:21:56 +0800
Subject: [PATCH 14/15] edit() has ONE usage: the edit ranges are required

The char-diff envelope was the protocol's predecessor left in as a
convenience default - but it silently spends O(file) prefix/suffix
scans, defeating the O(damage) contract exactly for the callers who
reached for the incremental API. Callers that track edits (editors) all
have the ranges; a caller without them passes the whole-file range and
gets an honest full re-parse instead of hidden scans.

The ranges MUST cover every change: over-claiming shrinks via the true
token-prefix compare; under-claiming is the caller's bug (the same
garbage-in contract as tree-sitter's tree.edit, now documented at the
envelope). edit() without ranges throws (gated, contract 6/6); the
seeded sessions and glue cases all pass explicit ranges (the gate keeps
a small test-side diff helper for its constructed pairs).

31/31 gates, parity 0 mismatches, keystroke median 0.028ms.
---
 src/emit-parser.ts         | 36 ++++++++++++++++-------------------
 test/incremental-verify.ts | 16 +++++++++++++---
 test/multi-doc.ts          | 39 +++++++++++++++++++++++++++-----------
 3 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index c39a622..559c1dc 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -2948,6 +2948,9 @@ function editCore(source, entryRule, edits) {
   }
 }
 function editCoreRun(source, entryRule, edits) {
+  if (edits === undefined || edits.length === 0) {
+    throw new Error('edit() requires the edit ranges: [{ start, oldEnd, newEnd }] in old/new character coordinates (covering every change); pass [{ start: 0, oldEnd: <old length>, newEnd: <new length> }] to force a full re-parse');
+  }
   if (lastSrc === null) {
     // No coherent edit base (a previous attempt rejected): full re-parse in APPEND
     // mode — parseCore would reset the arena and destroy the live tree the handle
@@ -2972,27 +2975,20 @@ function editCoreRun(source, entryRule, edits) {
   const oSrc = lastSrc;
   lastSrc = null;
 ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
-  // Damage envelope: from the EDIT PROTOCOL when the caller provides it (an editor
-  // knows its edit ranges — [{start, oldEnd, newEnd}] in old/new coordinates), else
-  // derived by the char-level prefix/suffix compare (the cheapest possible fallback,
-  // but O(file) scans).
+  // Damage envelope: the caller's edit ranges, merged ([{start, oldEnd, newEnd}] in
+  // old/new coordinates — an editor's change events). The ranges MUST cover every
+  // change: over-claiming only shrinks via the true token-prefix compare below;
+  // under-claiming means text outside the window is never re-lexed (the same
+  // garbage-in contract as tree-sitter's tree.edit). There is deliberately no
+  // char-diff fallback — it would silently spend O(file) scans, and a caller
+  // without ranges can pass the whole-file range for an honest full re-parse.
   const oldLen = oSrc.length, newLen = source.length;
-  let cs, ceOld, ceNew;
-  if (edits !== undefined && edits.length > 0) {
-    cs = edits[0].start; ceOld = edits[0].oldEnd; ceNew = edits[0].newEnd;
-    for (let i = 1; i < edits.length; i++) {
-      const ed = edits[i];
-      if (ed.start < cs) cs = ed.start;
-      if (ed.oldEnd > ceOld) ceOld = ed.oldEnd;
-      if (ed.newEnd > ceNew) ceNew = ed.newEnd;
-    }
-  } else {
-    const minL = oldLen < newLen ? oldLen : newLen;
-    cs = 0;
-    while (cs < minL && oSrc.charCodeAt(cs) === source.charCodeAt(cs)) cs++;
-    let ce = 0;
-    while (ce < minL - cs && oSrc.charCodeAt(oldLen - 1 - ce) === source.charCodeAt(newLen - 1 - ce)) ce++;
-    ceOld = oldLen - ce; ceNew = newLen - ce;
+  let cs = edits[0].start, ceOld = edits[0].oldEnd, ceNew = edits[0].newEnd;
+  for (let i = 1; i < edits.length; i++) {
+    const ed = edits[i];
+    if (ed.start < cs) cs = ed.start;
+    if (ed.oldEnd > ceOld) ceOld = ed.oldEnd;
+    if (ed.newEnd > ceNew) ceNew = ed.newEnd;
   }
   const charDelta = newLen - oldLen;
   // Restart anchor: the last token B ending at/before the damage whose recorded
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index 3b1f73a..e7f6826 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -85,6 +85,17 @@ const STEPS = 30;
 // ending exactly at the damage start can be EXTENDED under maximal munch — 'b'+'x'
 // = 'bx', '='+'=' = '==', deleting a gap glues neighbours). These cases pin the
 // strict-< restart anchor; every one must match fresh (tree or reject) exactly.
+// Test-side range derivation for constructed pairs (the ENGINE requires explicit
+// ranges — a caller without them passes the whole-file range for a full re-parse).
+function diffRange(a: string, b: string): Edit {
+  const minL = Math.min(a.length, b.length);
+  let s = 0;
+  while (s < minL && a.charCodeAt(s) === b.charCodeAt(s)) s++;
+  let e = 0;
+  while (e < minL - s && a.charCodeAt(a.length - 1 - e) === b.charCodeAt(b.length - 1 - e)) e++;
+  return { start: s, oldEnd: a.length - e, newEnd: b.length - e };
+}
+
 const GLUE: Array<[string, string]> = [
   ['const a = 1;\nconst b = 2;\n', 'const a = 1;\nconst bx = 2;\n'],
   ['let a = b; let c = 1;\n', 'let a = b1; let c = 1;\n'],
@@ -106,7 +117,7 @@ for (const [base, edited] of GLUE) {
   let fe: string | null = null, ie: string | null = null;
   let fr = -1;
   try { fr = fresh.parse(edited); } catch (e) { fe = (e as Error).message; }
-  try { session.edit(c0, edited); } catch (e) { ie = (e as Error).message; }
+  try { session.edit(c0, edited, [diffRange(base, edited)]); } catch (e) { ie = (e as Error).message; }
   if (fe !== null || ie !== null) {
     if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: fresh ${fe ? 'reject' : 'accept'} / incremental ${ie ? 'reject' : 'accept'}`); }
     else bothReject++;
@@ -129,9 +140,8 @@ for (const f of FILES) {
     try { freshRoot = fresh.parse(next); } catch (e) { freshErr = (e as Error).message; }
     const tf1 = performance.now();
     let incErr: string | null = null;
-    const useProtocol = k % 2 === 1;   // alternate: edits protocol / char-diff fallback
     const ti0 = performance.now();
-    try { session.edit(cst, next, useProtocol ? [edit] : undefined); } catch (e) { incErr = (e as Error).message; }
+    try { session.edit(cst, next, [edit]); } catch (e) { incErr = (e as Error).message; }
     const ti1 = performance.now();
     if (freshErr !== null || incErr !== null) {
       if ((freshErr === null) !== (incErr === null)) {
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
index 1b18f4e..dbe5f6e 100644
--- a/test/multi-doc.ts
+++ b/test/multi-doc.ts
@@ -34,11 +34,22 @@ let seed = 0x51C0FFEE;
 const rand = () => ((seed = (seed * 48271) % 0x7fffffff) / 0x7fffffff);
 const randInt = (n: number) => Math.floor(rand() * n);
 const INS = ['x', '1', ' + q', '.m', '(/*c*/)', '"s"'];
-function mutate(text: string): string {
+function mutate(text: string): { next: string; edit: Edit } {
   switch (randInt(3)) {
-    case 0: { const at = randInt(text.length); return text.slice(0, at) + INS[randInt(INS.length)] + text.slice(at); }
-    case 1: { const at = randInt(Math.max(1, text.length - 6)); return text.slice(0, at) + text.slice(at + 1 + randInt(4)); }
-    default: { const at = randInt(Math.max(1, text.length - 1)); return text.slice(0, at) + 'z' + text.slice(at + 1); }
+    case 0: {
+      const at = randInt(text.length);
+      const ins = INS[randInt(INS.length)];
+      return { next: text.slice(0, at) + ins + text.slice(at), edit: { start: at, oldEnd: at, newEnd: at + ins.length } };
+    }
+    case 1: {
+      const at = randInt(Math.max(1, text.length - 6));
+      const n = 1 + randInt(4);
+      return { next: text.slice(0, at) + text.slice(at + n), edit: { start: at, oldEnd: at + n, newEnd: at } };
+    }
+    default: {
+      const at = randInt(Math.max(1, text.length - 1));
+      return { next: text.slice(0, at) + 'z' + text.slice(at + 1), edit: { start: at, oldEnd: at + 1, newEnd: at + 1 } };
+    }
   }
 }
 
@@ -53,12 +64,12 @@ const failures: string[] = [];
 for (let k = 0; k < 60; k++) {
   const onA = (k & 1) === 0;
   const text = onA ? textA : textB;
-  const next = mutate(text);
+  const { next, edit } = mutate(text);
   steps++;
   let fe: string | null = null, ie: string | null = null;
   let fc: Cst | null = null;
   try { fc = f.parse(next); } catch (e) { fe = (e as Error).message; }
-  try { (onA ? p1 : p2).edit(onA ? cstA : cstB, next); } catch (e) { ie = (e as Error).message; }
+  try { (onA ? p1 : p2).edit(onA ? cstA : cstB, next, [edit]); } catch (e) { ie = (e as Error).message; }
   if (fe !== null || ie !== null) {
     if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`step ${k} (${onA ? 'A' : 'B'}): fresh ${fe ? 'reject' : 'accept'} / edit ${ie ? 'reject' : 'accept'}`); }
     else bothReject++;
@@ -88,17 +99,23 @@ let contract = 0;
   const c1 = p.parse('const a = 1;');
   const obj = (h: Cst) => JSON.stringify(objectify(p.tree, (fns) => p.visit(h, fns)));
   const before = obj(c1);
-  p.edit(c1, 'const ab = 1;');
+  p.edit(c1, 'const ab = 1;', [{ start: 7, oldEnd: 7, newEnd: 8 }]);
   const after = obj(c1);
   if (after !== before && after.includes('"end":8')) contract++;   // same handle, new tree
   else failures.push('in-place edit did not update the handle');
-  try { p2.edit(c1, 'const y = 3;'); failures.push('foreign handle did not throw'); } catch { contract++; }
+  try { p2.edit(c1, 'const y = 3;', [{ start: 0, oldEnd: 13, newEnd: 12 }]); failures.push('foreign handle did not throw'); } catch { contract++; }
   let rejected = false;
-  try { p.edit(c1, 'const ] = ;'); } catch { rejected = true; }
+  try { p.edit(c1, 'const ] = ;', [{ start: 6, oldEnd: 13, newEnd: 11 }]); } catch { rejected = true; }
   if (rejected && obj(c1) === after) contract++;   // reject keeps the tree
   else failures.push('reject-then-read flow broke');
   const c2 = p.parse('let q = 1;');
   try { obj(c1); failures.push('re-opened document: old handle did not throw'); } catch { contract++; }
+  // missing ranges: ONE usage only — edit() without ranges must throw, not
+  // silently fall back to O(file) diff scans
+  let needsRanges = false;
+  try { (p as unknown as { edit(c: Cst, s: string): void }).edit(c2, 'let q = 2;'); } catch { needsRanges = true; }
+  if (needsRanges) contract++;
+  else failures.push('edit() without ranges did not throw');
   // a REJECTING parse() resets the arena too — it must invalidate prior handles
   try { p.parse('const ] = ;'); } catch { /* expected reject */ }
   let dead = false;
@@ -107,9 +124,9 @@ let contract = 0;
   else failures.push('rejecting parse() left the old handle readable over a reset arena');
 }
 
-console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/5`);
+console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/6`);
 for (const s of failures) console.log('  ✗ ' + s);
-if (mismatch > 0 || contract !== 5 || failures.length > 0) {
+if (mismatch > 0 || contract !== 6 || failures.length > 0) {
   console.error('✗ document isolation / handle contract violated');
   process.exit(1);
 }

From 2f8e87a4785170ededb93d164e08d817545d4847 Mon Sep 17 00:00:00 2001
From: Johnson Chu <johnsoncodehk@gmail.com>
Date: Thu, 11 Jun 2026 07:47:23 +0800
Subject: [PATCH 15/15] edit() takes changes, the engine owns the text as
 pieces: end-to-end O(damage)

Why next had to go: it was the only carrier of the inserted content
(the ranges carry positions, not text), which meant the API could
express 'the text and the ranges disagree' - the garbage-in hazard. The
change protocol [{ start, end, text }] is LSP/VS Code's native shape
(each edit in the coordinates of the document after the preceding ones)
and makes the inconsistency unrepresentable: the engine BUILDS the new
text from the changes.

Building it as a string exposed a cost that was always there, hidden in
the caller: slicing the previous edit's cons string flattens it in V8 -
measured 1.18ms per keystroke on 9MB, paid by whoever materializes the
text. The engine now owns the document as PIECES (flat fragments;
applying a change splits via O(1) SlicedString views and never
flattens), with:

- window-materialized relexing: lexCore reads a small flat slice with
  an absolute srcBase bias (biased once per token in tkPush - batch
  cost is two adds per token); running off the window end - including a
  matcher failing at the EDGE (a truncated string literal is not a lex
  error) - signals a retry with a larger window via LEX_RETRY. A cut
  token cannot fake a resync: suffix-zone equality makes its end
  mismatch the old token's.
- doc reads route through docChar/docText (flat fast path, cursor-
  cached piece lookup otherwise); cold paths (errors, debug) flatten
  lazily; pieces consolidate past 256 fragments (amortized join).
- the reject restore re-lexes the live tree's pieces but preserves the
  DOCUMENT pieces (the editor's buffer holds the rejected text; the
  gates' editor model now advances on reject and verifies an UNDO
  revert edit against a fresh parse every time).

End-to-end keystroke (engine builds the text, nothing hidden): 9MB
median 0.024ms / p90 0.047ms; 8MB nested 0.072ms / p90 0.094ms. 31/31
gates, parity 0 mismatches, lexer streams byte-identical, batch in band
(11.5x).
---
 src/emit-lexer.ts          |  27 ++--
 src/emit-parser.ts         | 248 ++++++++++++++++++++++++++++---------
 test/incremental-verify.ts |  41 ++++--
 test/multi-doc.ts          |  62 +++++++---
 4 files changed, 287 insertions(+), 91 deletions(-)

diff --git a/src/emit-lexer.ts b/src/emit-lexer.ts
index 704111e..bf2ce1d 100644
--- a/src/emit-lexer.ts
+++ b/src/emit-lexer.ts
@@ -103,6 +103,11 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// ── Emitted lexer (emit-lexer.ts): specialized tokenize for this grammar ──`);
   for (const m of matchers) emit(`const ${m.re} = new RegExp(${J(`(?:${m.pattern})`)}, ${J(m.flags)});`);
   emit(`const LX_WS = /\\s+/y;`);
+  emit(`// window-truncation retry: a matcher failing at the WINDOW edge is not a lex`);
+  emit(`// error — the caller re-materializes a larger window (truncation cannot fake a`);
+  emit(`// resync: suffix-zone equality makes a cut token's END mismatch the old one)`);
+  emit(`const LEX_RETRY = { retry: true };`);
+  emit(`let lexWindowMore = false;`);
   emit(`const LX_UNI_IDENT = /[$_\\p{ID_Start}][$\\u200c\\u200d\\p{ID_Continue}]*/uy;`);
   emit(`const LX_UNI_CONT = /[$\\u200c\\u200d\\p{ID_Continue}]+/uy;`);
   emit(`const LX_UNI_FULL = /^[$_\\p{ID_Start}][$\\u200c\\u200d\\p{ID_Continue}]*/u;`);
@@ -177,7 +182,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
       emit(`      if (validateEscapes) {`);
       emit(`        LX_TPL_ESC.lastIndex = pos;`);
       emit(`        const m = LX_TPL_ESC.exec(source);`);
-      emit(`        if (!m) throw new Error('Invalid escape sequence in template at offset ' + pos);`);
+      emit(`        if (!m) { if (lexWindowMore) throw LEX_RETRY; throw new Error('Invalid escape sequence in template at offset ' + pos); }`);
       emit(`        pos += m[0].length;`);
       emit(`      } else { pos += 2; }`);
     } else {
@@ -188,6 +193,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
     emit(`    if (${startsWithExpr('source', 'pos', tplOpen)}) return { endsWithInterp: false, end: pos + ${tplOpen.length} };`);
     emit(`    pos++;`);
     emit(`  }`);
+    emit(`  if (lexWindowMore) throw LEX_RETRY;`);
     emit(`  throw new Error('Unterminated template literal at offset ' + pos);`);
     emit(`}`);
   }
@@ -197,7 +203,8 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   // when a CST leaf is built. Flag bits: 1 = newlineBefore (the only stamp this emitted
   // lexer ever sets; comment/multilineFlow stamps belong to fallback-only grammars).
   emit(`function tokenize(source) {`);
-  emit(`  src = source;`);
+  emit(`  docPieces = [source]; docPieceOff = [0]; docLen = source.length;`);
+  emit(`  docFlat = source; docCur = 0;`);
   emit(`  tokN = 0;`);
   emit(`  parenCachePos = -1;`);
   emit(`  srcLenP1 = source.length + 1;`);
@@ -213,7 +220,9 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`// old token (same k/t, offsets shifted by wndDelta, both depth records 0) while`);
   emit(`// the window's own stacks are empty — returns that OLD index (the duplicate push`);
   emit(`// is retracted), or -1 when lexing ran to EOF.`);
-  emit(`function lexCore(source, startPos, pvK, pvT, wndPtr0, wndMinOff, wndDelta, wndCs, initParens) {`);
+  emit(`function lexCore(source, startPos, pvK, pvT, wndPtr0, wndMinOff, wndDelta, wndCs, initParens, srcBase, hasMore) {`);
+  emit(`  if (srcBase === undefined) srcBase = 0;`);
+  emit(`  lexWindowMore = hasMore === true;`);
   emit(`  const n = source.length;`);
   emit(`  let pos = startPos;`);
   emit(`  let pendingNl = false;`);
@@ -231,6 +240,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   emit(`  let dmgDp = -1, dmgPd = -1;`);
   emit(`  let lastDp = templateStack.length, lastPd = parenHeadStack.length;`);
   emit(`  function tkPush(k, t, off, end) {`);
+  emit(`    off += srcBase; end += srcBase;`);
   emit(`    if (tokN === tkCap) growTok();`);
   emit(`    tkK[tokN] = k; tkT[tokN] = t; tkOff[tokN] = off; tkEnd[tokN] = end;`);
   emit(`    tkFl[tokN] = (pendingNl ? 1 : 0) | extraFl;`);
@@ -360,7 +370,7 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
     emit(`${ind}  if (m !== null) {`);
     if (m.identLike) {
       const plen = (identPrefixByName.get(m.name) ?? '').length;
-      emit(`${ind}    if (!lexIdentValid(m[0], ${plen})) throw new Error("Invalid identifier escape at offset " + pos + ": '" + m[0] + "'");`);
+      emit(`${ind}    if (!lexIdentValid(m[0], ${plen})) { if (lexWindowMore) throw LEX_RETRY; throw new Error("Invalid identifier escape at offset " + pos + ": '" + m[0] + "'"); }`);
     }
     if (m.skip) {
       emit(`${ind}    if (m[0].includes('\\n')) pendingNl = true;`);
@@ -470,13 +480,13 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
     emit(`      const _li = tokN - 1;`);
     const likeKs = [...identLike].map(kOf);
     const likeCond = likeKs.map(k => `tkK[_li] === ${k}`).join(' || ');
-    emit(`      if ((${likeCond}) && tkEnd[_li] === pos) {`);
+    emit(`      if ((${likeCond}) && tkEnd[_li] === pos + srcBase) {`);
     emit(`        LX_UNI_CONT.lastIndex = pos;`);
     emit(`        const cont = LX_UNI_CONT.exec(source);`);
     emit(`        if (cont !== null) {`);
     emit(`          pos += cont[0].length;`);
-    emit(`          tkEnd[_li] = pos;`);
-    emit(`          tkT[_li] = lexKwT(source, tkOff[_li], pos);`);
+    emit(`          tkEnd[_li] = pos + srcBase;`);
+    emit(`          tkT[_li] = lexKwT(source, tkOff[_li] - srcBase, pos);`);
     emit(`          continue;`);
     emit(`        }`);
     emit(`      }`);
@@ -504,10 +514,11 @@ export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
     emit(`      }`);
     emit(`    }`);
   }
+  emit(`    if (lexWindowMore) throw LEX_RETRY;`);
   emit(`    throw new Error("Unexpected character at offset " + pos + ": '" + source[pos] + "'");`);
   emit(`  }`);
   emit(`  if (wndHit >= 0) { tokN--; return wndHit; }`);
-  emit(`  return -1;`);
+  emit(`  return hasMore ? -2 : -1;`);
   emit(`}`);
   emit(`// Windowed-relex restart anchor: the last token B ending at/before the damage`);
   emit(`// whose recorded stack depths are zero and whose shape leaves no cross-token`);
diff --git a/src/emit-parser.ts b/src/emit-parser.ts
index 559c1dc..4498f64 100644
--- a/src/emit-parser.ts
+++ b/src/emit-parser.ts
@@ -1387,7 +1387,85 @@ let tkDp = new Uint8Array(4096);
 let tkPd = new Uint16Array(4096);
 let tkCap = 4096;
 let tokN = 0;
-let src = '';
+// ── The DOCUMENT text layer ──
+// The text lives as PIECES (flat string fragments): applying a change splits the
+// covering pieces (O(1) SlicedString views — never a flatten) and splices the new
+// text in, so a keystroke costs O(pieces), not the O(n) cons-flatten a slice+concat
+// per edit forces in V8 (measured: ~1.2ms per edit on 9MB). docFlat caches the
+// joined form for the cold paths that need one (errors, debug views); batch parses
+// set it directly. Reads route through docChar/docText: flat fast path, piece
+// lookup (cursor-cached) otherwise.
+let docPieces = null;
+let docPieceOff = null;
+let docLen = 0;
+let docFlat = null;
+let docCur = 0;
+function docLocate(i) {
+  let k = docCur;
+  const po = docPieceOff;
+  const n = po.length;
+  if (k >= n || po[k] > i || (k + 1 < n && po[k + 1] <= i)) {
+    let lo = 0, hi = n;
+    while (lo < hi) { const m = (lo + hi) >> 1; if (po[m] <= i) lo = m + 1; else hi = m; }
+    k = lo - 1;
+    docCur = k;
+  }
+  return k;
+}
+function docChar(i) {
+  if (docFlat !== null) return docFlat.charCodeAt(i);
+  const k = docLocate(i);
+  return docPieces[k].charCodeAt(i - docPieceOff[k]);
+}
+function docText(a, b) {
+  if (docFlat !== null) return docFlat.slice(a, b);
+  if (b <= a) return '';
+  let k = docLocate(a);
+  const first = docPieces[k];
+  const lo = a - docPieceOff[k];
+  if (b - docPieceOff[k] <= first.length) return first.slice(lo, b - docPieceOff[k]);
+  let out = first.slice(lo);
+  k++;
+  while (k < docPieces.length && docPieceOff[k] < b) {
+    const piece = docPieces[k];
+    const need = b - docPieceOff[k];
+    out += need >= piece.length ? piece : piece.slice(0, need);
+    k++;
+  }
+  return out;
+}
+function flattenDoc() {
+  if (docFlat === null) docFlat = docPieces.join('');
+  return docFlat;
+}
+function applyChange(start, end, text) {
+  const ks = docLocate(start);
+  const ke = docLocate(end > start ? end - 1 : start);
+  const head = docPieces[ks].slice(0, start - docPieceOff[ks]);
+  const tailPiece = end > start ? docPieces[ke] : docPieces[ks];
+  const tailOff = end - docPieceOff[end > start ? ke : ks];
+  const tail = tailPiece.slice(tailOff);
+  const repl = [];
+  if (head.length > 0) repl.push(head);
+  if (text.length > 0) repl.push(text);
+  if (tail.length > 0) repl.push(tail);
+  docPieces.splice(ks, (end > start ? ke : ks) - ks + 1, ...repl);
+  // consolidate when fragmenting (amortized: a join every ≥256 edits)
+  if (docPieces.length > 256) {
+    docPieces = [docPieces.join('')];
+  }
+  docLen += text.length - (end - start);
+  // rebuild offsets from the splice point (suffix offsets shifted anyway)
+  if (docPieceOff.length !== docPieces.length) docPieceOff.length = docPieces.length;
+  let off = ks > 0 && ks - 1 < docPieces.length ? docPieceOff[ks - 1] + docPieces[ks - 1].length : 0;
+  for (let k2 = ks > 0 ? ks : 0; k2 < docPieces.length; k2++) {
+    docPieceOff[k2] = off;
+    off += docPieces[k2].length;
+  }
+  if (docPieces.length === 1) docPieceOff[0] = 0;
+  docCur = 0;
+  docFlat = null;
+}
 // ── EOF-relative spans (incremental sessions) ──
 // A token's tkOff/tkEnd may be stored EOF-RELATIVE (value − (srcLen + 1), strictly
 // negative): the decode adds the CURRENT length back, so a pure suffix never needs
@@ -1643,7 +1721,7 @@ function matchPuLitGT(pu) {
   // Split multi-'>' tokens: '>>', '>>>', '>>=', '>>>=' can yield a single '>': shift the
   // columns up one slot and write the '>' + rest pair in place (both born flag-less,
   // matching the old mkPunct pair).
-  if (tkK[pos] === K_PUNCT && tend(pos) - off > 1 && ${e.soa ? 'src.charCodeAt(off) === 62' : "tkText[pos].charCodeAt(0) === 62"}) {
+  if (tkK[pos] === K_PUNCT && tend(pos) - off > 1 && ${e.soa ? 'docChar(off) === 62' : "tkText[pos].charCodeAt(0) === 62"}) {
     const end0 = tend(pos);
     ${e.soa ? '' : 'const restText = tkText[pos].slice(1);'}
     if (tokN === tkCap) growTok();
@@ -1667,7 +1745,7 @@ function matchPuLitGT(pu) {
       tkT[pos] = pu; tkEnd[pos] = off + 1 - srcLenP1; tkFl[pos] = 0;
       tkOff[pos + 1] = off + 1 - srcLenP1; tkFl[pos + 1] = 0;
     }
-    tkT[pos + 1] = ${e.soa ? 'LIT_PU.get(src.slice(off + 1, end0)) ?? 0' : 'LIT_PU.get(restText) ?? 0'};
+    tkT[pos + 1] = ${e.soa ? 'LIT_PU.get(docText(off + 1, end0)) ?? 0' : 'LIT_PU.get(restText) ?? 0'};
     tokN++;
     if (parseLimit < 0) cap = tokN;
     // Token indices shifted: drop the per-rule memo arrays (recreated lazily at the new size).
@@ -1992,7 +2070,7 @@ function emitPrattRule(e: Emitter, a: ReturnType<typeof analyze>, rule: RuleDecl
   e.emit(`          const _h = kids[rowStart[lhs]];`);
   e.emit(`          if (_h < 0 && ((~_h) & 3) === 2) {`);
   e.emit(`            const _ht = absTok[lhs] + ((~_h) >>> 2);`);
-  e.emit(`            const _htext = ${e.soa ? 'src.slice(toff(_ht), tend(_ht))' : 'tkText[_ht]'};`);
+  e.emit(`            const _htext = ${e.soa ? 'docText(toff(_ht), tend(_ht))' : 'tkText[_ht]'};`);
   e.emit(`            if (prefixOps.has(_htext) && !postfixOpValues.has(_htext)) { return -1; }`);
   e.emit(`          }`);
   e.emit(`        }`);
@@ -2227,7 +2305,7 @@ function parseRuleEntry(idx, rid, name, core) {
 
 // Token text at an arbitrary index (cold paths: errors, the tokenAt debug view).
 function tokTextAt(i) {
-  return ${e.soa ? 'src.slice(toff(i), tend(i))' : 'tkText[i]'};
+  return ${e.soa ? 'docText(toff(i), tend(i))' : 'tkText[i]'};
 }
 // The k → type-name inverse, for reconstructing a token object (tokenAt).
 const K_NAMES = [];
@@ -2322,7 +2400,7 @@ function visitCore(entry, fns, charBase, tokBase) {
 
 // Parse to the ARENA: returns the root node id.
 function lexInto(source) {
-${e.soa ? `  tokenize(source);` : String.raw`  src = source;
+${e.soa ? `  tokenize(source);` : String.raw`  docPieces = [source]; docPieceOff = [0]; docLen = source.length; docFlat = source; docCur = 0;
   const _toks = tokenize(source);
   const _n = _toks.length;
   while (tkCap < _n + 1) growTok();
@@ -2375,11 +2453,14 @@ function runParse(entryRule) {
 // Source of the last COMPLETED parse — the token columns, arena and memo describe it.
 // null whenever the module state is not a coherent snapshot (no parse yet, or the last
 // attempt threw), so parseEdited falls back to a full parse.
-let lastSrc = null;
-// Source text of the LIVE tree (unlike lastSrc it survives a rejected edit): the
-// reject path restores the token columns to it so the handle keeps reading the
-// previous tree; only a successful parse/edit moves it.
-let treeSrc = null;
+// Coherent-edit-base flag: false after a rejected attempt (the next edit falls
+// back to a full re-parse of the document text).
+let lastOk = false;
+// Pieces snapshot of the LIVE tree's text (survives a rejected edit): the reject
+// path re-lexes it so the handle keeps reading the previous tree. The document
+// pieces above advance on EVERY edit, accepted or rejected — the editor's buffer
+// applied the change regardless, and later coordinates are against it.
+let treePieces = null;
 // the LAST parse root's absolute coordinates (the descent origin — see visit/toObject)
 let rootCharBase = 0;
 let rootTokBase = 0;
@@ -2519,6 +2600,8 @@ function runExtend(rid) {
 // uses, made transitive by rowKC: each kid's probe watermark stays at/below the
 // next kid's start, so checking the LAST kept kid bounds them all.
 let surgX = [], surgBase = [], surgA = [], surgB = [];
+// composed change envelope handed from the text-application step to the window relex
+let editDmgS = 0, editDmgE = 0;
 function rowKCof(id) {
   const c = rowKC[id];
   if (c !== 0) return c;
@@ -2827,7 +2910,7 @@ function makeDoc() {
     tkK: new tkK.constructor(4096), tkT: new tkT.constructor(4096),
     tkOff: new Int32Array(4096), tkEnd: new Int32Array(4096), tkFl: new Uint8Array(4096),
     tkDp: new Uint8Array(4096), tkPd: new Uint16Array(4096),
-    tkCap: 4096, tokN: 0, src: '', srcLenP1: 1, negFrom: 0x7fffffff,
+    tkCap: 4096, tokN: 0, srcLenP1: 1, negFrom: 0x7fffffff,
     rowRule: new Uint16Array(8192), rowLen: new Int32Array(8192), rowTokLen: new Int32Array(8192),
     rowStart: new Int32Array(8192), rowCount: new Int32Array(8192), rowExt: new Int32Array(8192),
     rowOK: new Uint8Array(8192), rowKC: new Uint8Array(8192),
@@ -2837,7 +2920,9 @@ function makeDoc() {
     kids: new Int32Array(16384), kidRel: new Int32Array(16384), kidTokRel: new Int32Array(16384),
     kidCap: 16384, kidN: 0,
     memoNode: [], memoEnd: [], memoExt: [], memoGen: [], memoGenCur: 0,
-    lastSrc: null, treeSrc: null, rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0,
+    lastOk: false, treePieces: null,
+    docPieces: null, docPieceOff: null, docLen: 0, docFlat: null, docCur: 0,
+    rootCharBase: 0, rootTokBase: 0, lastRoot: -1, lastRootTok: 0,
 ${e.soa ? '    parenCachePos: -1, parenCacheStack: [],' : ''}
     altK: null, altT: null, altOff: null, altEnd: null, altFl: null, altDp: null, altPd: null,
     altCap: 0, altN: 0,
@@ -2845,7 +2930,7 @@ ${e.soa ? '    parenCachePos: -1, parenCacheStack: [],' : ''}
 }
 function saveDoc(d) {
   d.tkK = tkK; d.tkT = tkT; d.tkOff = tkOff; d.tkEnd = tkEnd; d.tkFl = tkFl;
-  d.tkDp = tkDp; d.tkPd = tkPd; d.tkCap = tkCap; d.tokN = tokN; d.src = src;
+  d.tkDp = tkDp; d.tkPd = tkPd; d.tkCap = tkCap; d.tokN = tokN;
   d.srcLenP1 = srcLenP1; d.negFrom = negFrom;
   d.rowRule = rowRule; d.rowLen = rowLen; d.rowTokLen = rowTokLen; d.rowStart = rowStart;
   d.rowCount = rowCount; d.rowExt = rowExt; d.rowOK = rowOK; d.rowKC = rowKC; d.rowNF = rowNF;
@@ -2853,7 +2938,9 @@ function saveDoc(d) {
   d.kids = kids; d.kidRel = kidRel; d.kidTokRel = kidTokRel; d.kidCap = kidCap; d.kidN = kidN;
   d.memoNode = memoNode; d.memoEnd = memoEnd; d.memoExt = memoExt; d.memoGen = memoGen;
   d.memoGenCur = memoGenCur;
-  d.lastSrc = lastSrc; d.treeSrc = treeSrc; d.rootCharBase = rootCharBase; d.rootTokBase = rootTokBase;
+  d.lastOk = lastOk; d.treePieces = treePieces;
+  d.docPieces = docPieces; d.docPieceOff = docPieceOff; d.docLen = docLen; d.docFlat = docFlat; d.docCur = docCur;
+  d.rootCharBase = rootCharBase; d.rootTokBase = rootTokBase;
   d.lastRoot = lastRoot; d.lastRootTok = lastRootTok;
 ${e.soa ? '  d.parenCachePos = parenCachePos; d.parenCacheStack = parenCacheStack;' : ''}
   d.altK = altK; d.altT = altT; d.altOff = altOff; d.altEnd = altEnd; d.altFl = altFl;
@@ -2861,7 +2948,7 @@ ${e.soa ? '  d.parenCachePos = parenCachePos; d.parenCacheStack = parenCacheStac
 }
 function loadDoc(d) {
   tkK = d.tkK; tkT = d.tkT; tkOff = d.tkOff; tkEnd = d.tkEnd; tkFl = d.tkFl;
-  tkDp = d.tkDp; tkPd = d.tkPd; tkCap = d.tkCap; tokN = d.tokN; src = d.src;
+  tkDp = d.tkDp; tkPd = d.tkPd; tkCap = d.tkCap; tokN = d.tokN;
   srcLenP1 = d.srcLenP1; negFrom = d.negFrom;
   rowRule = d.rowRule; rowLen = d.rowLen; rowTokLen = d.rowTokLen; rowStart = d.rowStart;
   rowCount = d.rowCount; rowExt = d.rowExt; rowOK = d.rowOK; rowKC = d.rowKC; rowNF = d.rowNF;
@@ -2869,7 +2956,9 @@ function loadDoc(d) {
   kids = d.kids; kidRel = d.kidRel; kidTokRel = d.kidTokRel; kidCap = d.kidCap; kidN = d.kidN;
   memoNode = d.memoNode; memoEnd = d.memoEnd; memoExt = d.memoExt; memoGen = d.memoGen;
   memoGenCur = d.memoGenCur;
-  lastSrc = d.lastSrc; treeSrc = d.treeSrc; rootCharBase = d.rootCharBase; rootTokBase = d.rootTokBase;
+  lastOk = d.lastOk; treePieces = d.treePieces;
+  docPieces = d.docPieces; docPieceOff = d.docPieceOff; docLen = d.docLen; docFlat = d.docFlat; docCur = d.docCur;
+  rootCharBase = d.rootCharBase; rootTokBase = d.rootTokBase;
   lastRoot = d.lastRoot; lastRootTok = d.lastRootTok;
 ${e.soa ? '  parenCachePos = d.parenCachePos; parenCacheStack = d.parenCacheStack;' : ''}
   altK = d.altK; altT = d.altT; altOff = d.altOff; altEnd = d.altEnd; altFl = d.altFl;
@@ -2898,7 +2987,7 @@ function swapBuffers() {
 ${e.soa ? '' : 'let altText = [];'}
 
 function parseCore(source, entryRule) {
-  lastSrc = null;
+  lastOk = false;
   adoptRoot = -1;
   adoptRunPos = -1;
   lexInto(source);
@@ -2914,8 +3003,8 @@ function parseCore(source, entryRule) {
   const root = runParse(entryRule);
   lastRoot = root;
   lastRootTok = rootTokBase;
-  lastSrc = source;
-  treeSrc = source;
+  lastOk = true;
+  treePieces = docPieces.slice();
   return root;
 }
 
@@ -2934,28 +3023,65 @@ function parseCore(source, entryRule) {
 // until then. Lexing is FULL-FILE by design: the lexer carries cross-token state
 // (template nesting, regex context, markup modes), full lexing is a small share of a
 // parse, and the diff is what localizes the damage — not the lexer.
-function editCore(source, entryRule, edits) {
+function editCore(entryRule, edits) {
   try {
-    return editCoreRun(source, entryRule, edits);
+    return editCoreRun(entryRule, edits);
   } catch (e) {
     // REJECTED edit: the splice (and any '>' splits of the failed attempt) already
     // rewrote the token columns to the rejected text, and the append-mode fallback
     // may have grown the arena — but the live tree's ROWS are untouched. Re-lexing
     // the live tree's source restores every read path (leaf spans, visit, next
     // edit's restart anchors); O(n) on the reject path only.
-    if (treeSrc !== null) { lexInto(treeSrc); lastSrc = null; }
+    if (treePieces !== null) {
+      // restore the token columns to the LIVE TREE's text — but the DOCUMENT text
+      // must stay on the rejected content (lexInto/tokenize resets the doc layer
+      // as a side effect, so save it around the re-lex)
+      const kP = docPieces, kO = docPieceOff, kL = docLen, kF = docFlat;
+      lexInto(treePieces.join(''));
+      docPieces = kP; docPieceOff = kO; docLen = kL; docFlat = kF; docCur = 0;
+      lastOk = false;
+    }
     throw e;
   }
 }
-function editCoreRun(source, entryRule, edits) {
+function editCoreRun(entryRule, edits) {
   if (edits === undefined || edits.length === 0) {
-    throw new Error('edit() requires the edit ranges: [{ start, oldEnd, newEnd }] in old/new character coordinates (covering every change); pass [{ start: 0, oldEnd: <old length>, newEnd: <new length> }] to force a full re-parse');
+    throw new Error('edit() requires the changes: [{ start, end, text }] (LSP-style - each edit in the coordinates of the document AFTER the preceding edits in the array)');
+  }
+  // The engine owns the document text: the new source is BUILT from the changes,
+  // so "the ranges do not match the text" is unrepresentable. Each edit is applied
+  // sequentially (LSP incremental-sync semantics); the damage envelope is composed
+  // alongside: dS in prefix coordinates (identical old/new), dE in FINAL
+  // coordinates, the old end recovered through the total delta. V8 cons strings
+  // make the slice+concat construction cheap; the flat-string cost, where a read
+  // path needs one, is the same the caller would have paid building the text.
+  if (docPieces === null) throw new Error('edit() before parse(): no document');
+  const oldLen = docLen;
+  {
+    let dS = 0x7fffffff;
+    let dE = -1;
+    for (let i = 0; i < edits.length; i++) {
+      const ed = edits[i];
+      const start = ed.start, end = ed.end, text = ed.text;
+      if (!(start >= 0 && start <= end && end <= docLen) || typeof text !== 'string') {
+        throw new Error('edit() change #' + i + ' out of range: [' + start + ', ' + end + ') of ' + docLen);
+      }
+      applyChange(start, end, text);
+      const newEnd = start + text.length;
+      const delta = newEnd - end;
+      if (dE > start) dE = dE >= end ? dE + delta : newEnd;
+      if (newEnd > dE) dE = newEnd;
+      if (start < dS) dS = start;
+    }
+    editDmgS = dS;
+    editDmgE = dE;
   }
-  if (lastSrc === null) {
+  if (!lastOk) {
     // No coherent edit base (a previous attempt rejected): full re-parse in APPEND
     // mode — parseCore would reset the arena and destroy the live tree the handle
     // still exposes if THIS parse rejects too. parse() is the only compaction point.
-    lexInto(source);
+    const whole = flattenDoc();
+    lexInto(whole);
     if (memoEnd.length !== MEMO_RULES) {
       memoNode = new Array(MEMO_RULES);
       memoEnd = new Array(MEMO_RULES);
@@ -2968,28 +3094,18 @@ function editCoreRun(source, entryRule, edits) {
     const root = runParse(entryRule);
     lastRoot = root;
     lastRootTok = rootTokBase;
-    lastSrc = source;
-    treeSrc = source;
+    lastOk = true;
+    treePieces = docPieces.slice();
     return root;
   }
-  const oSrc = lastSrc;
-  lastSrc = null;
+  lastOk = false;
 ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
-  // Damage envelope: the caller's edit ranges, merged ([{start, oldEnd, newEnd}] in
-  // old/new coordinates — an editor's change events). The ranges MUST cover every
-  // change: over-claiming only shrinks via the true token-prefix compare below;
-  // under-claiming means text outside the window is never re-lexed (the same
-  // garbage-in contract as tree-sitter's tree.edit). There is deliberately no
-  // char-diff fallback — it would silently spend O(file) scans, and a caller
-  // without ranges can pass the whole-file range for an honest full re-parse.
-  const oldLen = oSrc.length, newLen = source.length;
-  let cs = edits[0].start, ceOld = edits[0].oldEnd, ceNew = edits[0].newEnd;
-  for (let i = 1; i < edits.length; i++) {
-    const ed = edits[i];
-    if (ed.start < cs) cs = ed.start;
-    if (ed.oldEnd > ceOld) ceOld = ed.oldEnd;
-    if (ed.newEnd > ceNew) ceNew = ed.newEnd;
-  }
+  // Damage envelope from the composed changes: prefix coordinates are shared, the
+  // old end comes back through the total delta.
+  const newLen = docLen;
+  const cs = editDmgS < newLen ? editDmgS : newLen;
+  const ceNew = editDmgE < cs ? cs : editDmgE;
+  const ceOld = ceNew - (newLen - oldLen);
   const charDelta = newLen - oldLen;
   // Restart anchor: the last token B ending at/before the damage whose recorded
   // depths are zero and whose shape carries no cross-token lexer flag (')' control-
@@ -3011,10 +3127,28 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   }
   altN = oN;
   swapBuffers();              // live = scratch, alt = OLD stream
-  src = source;
   tokN = 0;
   const startOff = B >= 0 ? (altEnd[B] < 0 ? altEnd[B] + srcLenP1 : altEnd[B]) : 0;
-  const R0 = lexCore(source, startOff, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs, initParens);
+  // Window-materialized relex: lexCore reads a SMALL flat slice of the pieces with
+  // an absolute bias; -2 = ran off the window end before resyncing — re-materialize
+  // a larger window and retry (the common case fits the first one).
+  let R0;
+  {
+    let wHi = ceNew + 4096;
+    for (;;) {
+      if (wHi > docLen) wHi = docLen;
+      const windowStr = docText(startOff, wHi);
+      tokN = 0;
+      try {
+        R0 = lexCore(windowStr, 0, B >= 0 ? altK[B] : -1, B >= 0 ? altT[B] : 0, r0, ceNew, charDelta, cs, initParens.slice(), startOff, wHi < docLen);
+      } catch (e2) {
+        if (e2 !== LEX_RETRY) throw e2;
+        R0 = -2;
+      }
+      if (R0 !== -2) break;
+      wHi = wHi >= docLen ? docLen : (wHi - startOff) * 4 + startOff;
+    }
+  }
   const W = tokN;
   const R = R0 >= 0 ? R0 : oN;
   swapBuffers();              // live = OLD stream again; window sits in the alt buffers
@@ -3076,9 +3210,9 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
   tkText = altText; tkText.length = 0;
   altK = oK; altT = oT; altOff = oOff; altEnd = oEnd; altFl = oFl;
   altText = oText;
-  lexInto(source);
+  lexInto(flattenDoc());
   const nN = tokN;
-  const charDelta = source.length - oSrc.length;
+  const charDelta = docLen - oldLen;
   const minN = oN < nN ? oN : nN;
   let p = 0;
   while (p < minN && oK[p] === tkK[p] && oT[p] === tkT[p] && oFl[p] === tkFl[p]
@@ -3118,16 +3252,16 @@ ${e.soa ? String.raw`  // ── M1: WINDOWED re-lex ──
     rootTokBase = adoptRootTok;
     lastRoot = sroot;
     lastRootTok = adoptRootTok;
-    lastSrc = source;
-    treeSrc = source;
+    lastOk = true;
+    treePieces = docPieces.slice();
     return sroot;
   }
   const root = runParse(entryRule);
   adoptRoot = -1;
   lastRoot = root;
   lastRootTok = rootTokBase;
-  lastSrc = source;
-  treeSrc = source;
+  lastOk = true;
+  treePieces = docPieces.slice();
   return root;
 }
 
@@ -3135,7 +3269,7 @@ export { tokenize };
 // ── Module-level API: the DEFAULT document (one shared session; tokenize and the
 // raw tree/tokenAt views read the ACTIVE doc — they are gate/debug surfaces) ──
 export function parse(source, entryRule) { activate(docDefault); return parseCore(source, entryRule); }
-export function parseEdited(source, entryRule, edits) { activate(docDefault); return editCore(source, entryRule, edits); }
+export function parseEdited(entryRule, edits) { activate(docDefault); return editCore(entryRule, edits); }
 export function visit(entry, fns, charBase, tokBase) { activate(docDefault); return visitCore(entry, fns, charBase, tokBase); }
 // ── Handle API: explicit trees over per-instance documents ──
 // const p = createParser(); const cst = p.parse(text); p.edit(cst, next[, edits]);
@@ -3165,10 +3299,10 @@ export function createParser() {
       const root = parseCore(source, entryRule);
       return { d, gen, root };
     },
-    edit(cst, source, edits) {
+    edit(cst, edits) {
       chk(cst);
       activate(d);
-      cst.root = editCore(source, entryUsed, edits);
+      cst.root = editCore(entryUsed, edits);
     },
     visit(cst, fns) { chk(cst); activate(d); return visitCore(cst.root, fns); },
     tree: view,
diff --git a/test/incremental-verify.ts b/test/incremental-verify.ts
index e7f6826..0178d84 100644
--- a/test/incremental-verify.ts
+++ b/test/incremental-verify.ts
@@ -13,11 +13,11 @@ import { emitParser } from '../src/emit-parser.ts';
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-incremental.mjs';
 writeFileSync(emPath, emitParser(grammar));
-type Edit = { start: number; oldEnd: number; newEnd: number };
+type Edit = { start: number; end: number; text: string };
 type Cst = { root: number };
 type Parser = {
   parse(s: string): Cst;
-  edit(cst: Cst, s: string, edits?: Edit[]): void;
+  edit(cst: Cst, edits: Edit[]): void;
   visit(cst: Cst, fns: object): void;
   tree: import('./emitted-obj.ts').TreeView;
 };
@@ -46,16 +46,16 @@ function mutate(text: string): { next: string; edit: Edit } {
     case 0: { // insert a small fragment at a random position
       const at = randInt(text.length);
       const ins = INSERTS[randInt(INSERTS.length)];
-      return { next: text.slice(0, at) + ins + text.slice(at), edit: { start: at, oldEnd: at, newEnd: at + ins.length } };
+      return { next: text.slice(0, at) + ins + text.slice(at), edit: { start: at, end: at, text: ins } };
     }
     case 1: { // delete a small span
       const at = randInt(Math.max(1, text.length - 8));
       const n = 1 + randInt(6);
-      return { next: text.slice(0, at) + text.slice(at + n), edit: { start: at, oldEnd: at + n, newEnd: at } };
+      return { next: text.slice(0, at) + text.slice(at + n), edit: { start: at, end: at + n, text: '' } };
     }
     case 2: { // replace a character
       const at = randInt(Math.max(1, text.length - 1));
-      return { next: text.slice(0, at) + 'z' + text.slice(at + 1), edit: { start: at, oldEnd: at + 1, newEnd: at + 1 } };
+      return { next: text.slice(0, at) + 'z' + text.slice(at + 1), edit: { start: at, end: at + 1, text: 'z' } };
     }
     case 3: { // insert a whole statement at a line boundary
       const lines = text.split('\n');
@@ -63,11 +63,11 @@ function mutate(text: string): { next: string; edit: Edit } {
       const stmt = STMTS[randInt(STMTS.length)].trimEnd();
       lines.splice(at, 0, stmt);
       const start = at === 0 ? 0 : lines.slice(0, at).join('\n').length + 1;
-      return { next: lines.join('\n'), edit: { start, oldEnd: start, newEnd: start + stmt.length + 1 } };
+      return { next: lines.join('\n'), edit: { start, end: start, text: stmt + '\n' } };
     }
     default: { // append at the end (the pure-prefix reuse case)
       const stmt = '\n' + STMTS[randInt(STMTS.length)];
-      return { next: text + stmt, edit: { start: text.length, oldEnd: text.length, newEnd: text.length + stmt.length } };
+      return { next: text + stmt, edit: { start: text.length, end: text.length, text: stmt } };
     }
   }
 }
@@ -87,13 +87,13 @@ const STEPS = 30;
 // strict-< restart anchor; every one must match fresh (tree or reject) exactly.
 // Test-side range derivation for constructed pairs (the ENGINE requires explicit
 // ranges — a caller without them passes the whole-file range for a full re-parse).
-function diffRange(a: string, b: string): Edit {
+function diffChange(a: string, b: string): Edit {
   const minL = Math.min(a.length, b.length);
   let s = 0;
   while (s < minL && a.charCodeAt(s) === b.charCodeAt(s)) s++;
   let e = 0;
   while (e < minL - s && a.charCodeAt(a.length - 1 - e) === b.charCodeAt(b.length - 1 - e)) e++;
-  return { start: s, oldEnd: a.length - e, newEnd: b.length - e };
+  return { start: s, end: a.length - e, text: b.slice(s, b.length - e) };
 }
 
 const GLUE: Array<[string, string]> = [
@@ -117,7 +117,7 @@ for (const [base, edited] of GLUE) {
   let fe: string | null = null, ie: string | null = null;
   let fr = -1;
   try { fr = fresh.parse(edited); } catch (e) { fe = (e as Error).message; }
-  try { session.edit(c0, edited, [diffRange(base, edited)]); } catch (e) { ie = (e as Error).message; }
+  try { session.edit(c0, [diffChange(base, edited)]); } catch (e) { ie = (e as Error).message; }
   if (fe !== null || ie !== null) {
     if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`glue «${edited.slice(0, 30)}»: fresh ${fe ? 'reject' : 'accept'} / incremental ${ie ? 'reject' : 'accept'}`); }
     else bothReject++;
@@ -141,14 +141,31 @@ for (const f of FILES) {
     const tf1 = performance.now();
     let incErr: string | null = null;
     const ti0 = performance.now();
-    try { session.edit(cst, next, [edit]); } catch (e) { incErr = (e as Error).message; }
+    try { session.edit(cst, [edit]); } catch (e) { incErr = (e as Error).message; }
     const ti1 = performance.now();
     if (freshErr !== null || incErr !== null) {
       if ((freshErr === null) !== (incErr === null)) {
         mismatch++;
         if (failures.length < 5) failures.push(`${f.split('/').pop()} step ${k}: fresh ${freshErr ? 'reject' : 'accept'} / incremental ${incErr ? 'reject' : 'accept'}\n    fresh: ${freshErr ?? '-'}\n    inc:   ${incErr ?? '-'}`);
       } else bothReject++;
-      // rejected text: the handle stays on the previous tree; do not advance
+      // REJECTED text: the handle stays on the previous tree, but the DOCUMENT
+      // advances (editor-buffer model — the buffer applied the change regardless,
+      // and the engine's docSrc tracks it). Model the editor's UNDO: revert via a
+      // diff edit in the rejected text's coordinates; it must be accepted and
+      // byte-identical to a fresh parse of the restored text.
+      try {
+        session.edit(cst, [diffChange(next, text)]);
+        const rfr = fresh.parse(text);
+        const ra = JSON.stringify(objectify(fresh.tree, (fns) => fresh.visit(rfr, fns)));
+        const rb = JSON.stringify(objectify(session.tree, (fns) => session.visit(cst, fns)));
+        if (ra !== rb) {
+          mismatch++;
+          if (failures.length < 5) failures.push(`${f.split('/').pop()} step ${k}: REVERT tree diverges`);
+        }
+      } catch (e2) {
+        mismatch++;
+        if (failures.length < 5) failures.push(`${f.split('/').pop()} step ${k}: revert rejected: ${(e2 as Error).message.slice(0, 50)}`);
+      }
       continue;
     }
     tFresh += tf1 - tf0; tInc += ti1 - ti0;
diff --git a/test/multi-doc.ts b/test/multi-doc.ts
index dbe5f6e..d980cbb 100644
--- a/test/multi-doc.ts
+++ b/test/multi-doc.ts
@@ -15,9 +15,9 @@ import { emitParser } from '../src/emit-parser.ts';
 const grammar = (await import('../typescript.ts')).default;
 const emPath = '/tmp/emitted-multidoc.mjs';
 writeFileSync(emPath, emitParser(grammar));
-type Edit = { start: number; oldEnd: number; newEnd: number };
+type Edit = { start: number; end: number; text: string };
 type Cst = { root: number };
-type Parser = { parse(s: string): Cst; edit(cst: Cst, s: string, edits?: Edit[]): void; visit(cst: Cst, fns: object): void; tree: import('./emitted-obj.ts').TreeView };
+type Parser = { parse(s: string): Cst; edit(cst: Cst, edits: Edit[]): void; visit(cst: Cst, fns: object): void; tree: import('./emitted-obj.ts').TreeView };
 type Em = { parse(s: string): number; createParser(): Parser };
 const em = (await import(emPath + '?v=' + process.pid)) as Em;
 
@@ -39,27 +39,36 @@ function mutate(text: string): { next: string; edit: Edit } {
     case 0: {
       const at = randInt(text.length);
       const ins = INS[randInt(INS.length)];
-      return { next: text.slice(0, at) + ins + text.slice(at), edit: { start: at, oldEnd: at, newEnd: at + ins.length } };
+      return { next: text.slice(0, at) + ins + text.slice(at), edit: { start: at, end: at, text: ins } };
     }
     case 1: {
       const at = randInt(Math.max(1, text.length - 6));
       const n = 1 + randInt(4);
-      return { next: text.slice(0, at) + text.slice(at + n), edit: { start: at, oldEnd: at + n, newEnd: at } };
+      return { next: text.slice(0, at) + text.slice(at + n), edit: { start: at, end: at + n, text: '' } };
     }
     default: {
       const at = randInt(Math.max(1, text.length - 1));
-      return { next: text.slice(0, at) + 'z' + text.slice(at + 1), edit: { start: at, oldEnd: at + 1, newEnd: at + 1 } };
+      return { next: text.slice(0, at) + 'z' + text.slice(at + 1), edit: { start: at, end: at + 1, text: 'z' } };
     }
   }
 }
 
+function diffChange(a: string, b: string): Edit {
+  const minL = Math.min(a.length, b.length);
+  let s = 0;
+  while (s < minL && a.charCodeAt(s) === b.charCodeAt(s)) s++;
+  let e = 0;
+  while (e < minL - s && a.charCodeAt(a.length - 1 - e) === b.charCodeAt(b.length - 1 - e)) e++;
+  return { start: s, end: a.length - e, text: b.slice(s, b.length - e) };
+}
+
 const p1 = em.createParser();
 const p2 = em.createParser();
 const f = em.createParser();
 let cstA = p1.parse(textA);
 let cstB = p2.parse(textB);
 
-let steps = 0, equal = 0, bothReject = 0, mismatch = 0;
+let steps = 0, equal = 0, bothReject = 0, mismatch = 0, reverts = 0;
 const failures: string[] = [];
 for (let k = 0; k < 60; k++) {
   const onA = (k & 1) === 0;
@@ -69,10 +78,29 @@ for (let k = 0; k < 60; k++) {
   let fe: string | null = null, ie: string | null = null;
   let fc: Cst | null = null;
   try { fc = f.parse(next); } catch (e) { fe = (e as Error).message; }
-  try { (onA ? p1 : p2).edit(onA ? cstA : cstB, next, [edit]); } catch (e) { ie = (e as Error).message; }
+  try { (onA ? p1 : p2).edit(onA ? cstA : cstB, [edit]); } catch (e) { ie = (e as Error).message; }
   if (fe !== null || ie !== null) {
     if ((fe === null) !== (ie === null)) { mismatch++; if (failures.length < 5) failures.push(`step ${k} (${onA ? 'A' : 'B'}): fresh ${fe ? 'reject' : 'accept'} / edit ${ie ? 'reject' : 'accept'}`); }
     else bothReject++;
+    // the DOCUMENT advances on reject (editor-buffer model): later coordinates
+    // are against the rejected text. Model the editor's UNDO: revert to the last
+    // good text via a diff edit in the rejected text's coordinates — it must be
+    // ACCEPTED and byte-identical to a fresh parse (the post-reject recovery path
+    // gets exercised every time a mutation breaks the document).
+    const good = onA ? textA : textB;
+    const rv = diffChange(next, good);
+    try {
+      (onA ? p1 : p2).edit(onA ? cstA : cstB, [rv]);
+      const fb = f.parse(good);
+      const ra = JSON.stringify(objectify(f.tree, (fns) => f.visit(fb, fns)));
+      const qq = onA ? p1 : p2;
+      const rb = JSON.stringify(objectify(qq.tree, (fns) => qq.visit(onA ? cstA : cstB, fns)));
+      if (ra === rb) reverts++;
+      else { mismatch++; if (failures.length < 5) failures.push(`step ${k} (${onA ? 'A' : 'B'}): REVERT tree diverges`); }
+    } catch (e2) {
+      mismatch++;
+      if (failures.length < 5) failures.push(`step ${k} (${onA ? 'A' : 'B'}): revert rejected: ${(e2 as Error).message.slice(0, 50)}`);
+    }
     continue;
   }
   // mix the module-level default doc in between: it must not disturb either instance
@@ -99,23 +127,29 @@ let contract = 0;
   const c1 = p.parse('const a = 1;');
   const obj = (h: Cst) => JSON.stringify(objectify(p.tree, (fns) => p.visit(h, fns)));
   const before = obj(c1);
-  p.edit(c1, 'const ab = 1;', [{ start: 7, oldEnd: 7, newEnd: 8 }]);
+  p.edit(c1, [{ start: 7, end: 7, text: 'b' }]);   // 'const a = 1;' -> 'const ab = 1;'
   const after = obj(c1);
   if (after !== before && after.includes('"end":8')) contract++;   // same handle, new tree
   else failures.push('in-place edit did not update the handle');
-  try { p2.edit(c1, 'const y = 3;', [{ start: 0, oldEnd: 13, newEnd: 12 }]); failures.push('foreign handle did not throw'); } catch { contract++; }
+  try { p2.edit(c1, [{ start: 0, end: 1, text: 'q' }]); failures.push('foreign handle did not throw'); } catch { contract++; }
   let rejected = false;
-  try { p.edit(c1, 'const ] = ;', [{ start: 6, oldEnd: 13, newEnd: 11 }]); } catch { rejected = true; }
+  try { p.edit(c1, [{ start: 6, end: 8, text: ']' }]); } catch { rejected = true; }   // 'const ab…' -> 'const ] = 1;'
   if (rejected && obj(c1) === after) contract++;   // reject keeps the tree
   else failures.push('reject-then-read flow broke');
+  // coordinates after a REJECT are against the editor's buffer (the rejected text):
+  // fixing the same spot in those coordinates must recover the session
+  let recovered = false;
+  try { p.edit(c1, [{ start: 6, end: 7, text: 'ab' }]); recovered = true; } catch { /* must not throw */ }
+  if (recovered && obj(c1).includes('"end":13')) contract++;   // 'const ] = 1;' -> 'const ab = 1;'
+  else failures.push('post-reject coordinates did not track the document text');
   const c2 = p.parse('let q = 1;');
   try { obj(c1); failures.push('re-opened document: old handle did not throw'); } catch { contract++; }
   // missing ranges: ONE usage only — edit() without ranges must throw, not
   // silently fall back to O(file) diff scans
   let needsRanges = false;
-  try { (p as unknown as { edit(c: Cst, s: string): void }).edit(c2, 'let q = 2;'); } catch { needsRanges = true; }
+  try { (p as unknown as { edit(c: Cst): void }).edit(c2); } catch { needsRanges = true; }
   if (needsRanges) contract++;
-  else failures.push('edit() without ranges did not throw');
+  else failures.push('edit() without changes did not throw');
   // a REJECTING parse() resets the arena too — it must invalidate prior handles
   try { p.parse('const ] = ;'); } catch { /* expected reject */ }
   let dead = false;
@@ -124,9 +158,9 @@ let contract = 0;
   else failures.push('rejecting parse() left the old handle readable over a reset arena');
 }
 
-console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/6`);
+console.log(`multi-doc: ${equal} equal · ${bothReject} both-reject (${reverts} reverts verified) · ${mismatch} MISMATCH (${steps} interleaved steps) · contract ${contract}/7`);
 for (const s of failures) console.log('  ✗ ' + s);
-if (mismatch > 0 || contract !== 6 || failures.length > 0) {
+if (mismatch > 0 || contract !== 7 || failures.length > 0) {
   console.error('✗ document isolation / handle contract violated');
   process.exit(1);
 }