From ed6affcc53e9c374a33bc2b573bffc4dc6c8cf80 Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrewcampbell@Andrews-Mini.lan>
Date: Mon, 16 Mar 2026 23:50:00 -0500
Subject: [PATCH 1/6] feat: generate codex skill docs

Add host-aware skill generation so the shared templates produce Codex-native skill files under .agents alongside the existing Claude output.
---
 .agents/skills/gstack-browse/SKILL.md         | 307 +++++++++
 .../gstack-design-consultation/SKILL.md       | 372 ++++++++++
 .../skills/gstack-document-release/SKILL.md   | 428 ++++++++++++
 .../skills/gstack-plan-ceo-review/SKILL.md    | 565 ++++++++++++++++
 .../skills/gstack-plan-design-review/SKILL.md | 554 +++++++++++++++
 .../skills/gstack-plan-eng-review/SKILL.md    | 255 +++++++
 .../skills/gstack-qa-design-review/SKILL.md   | 638 ++++++++++++++++++
 .agents/skills/gstack-qa-only/SKILL.md        | 449 ++++++++++++
 .agents/skills/gstack-qa/SKILL.md             | 611 +++++++++++++++++
 .agents/skills/gstack-retro/SKILL.md          | 543 +++++++++++++++
 .agents/skills/gstack-review/SKILL.md         | 259 +++++++
 .../gstack-setup-browser-cookies/SKILL.md     | 152 +++++
 .agents/skills/gstack-ship/SKILL.md           | 495 ++++++++++++++
 .agents/skills/gstack-upgrade/SKILL.md        | 195 ++++++
 .agents/skills/gstack/CHANGELOG.md            |   1 +
 .agents/skills/gstack/SKILL.md                | 439 ++++++++++++
 .agents/skills/gstack/VERSION                 |   1 +
 .agents/skills/gstack/bin                     |   1 +
 .agents/skills/gstack/browse                  |   1 +
 .agents/skills/gstack/design-consultation     |   1 +
 .agents/skills/gstack/document-release        |   1 +
 .agents/skills/gstack/gstack-upgrade          |   1 +
 .agents/skills/gstack/package.json            |   1 +
 .agents/skills/gstack/plan-ceo-review         |   1 +
 .agents/skills/gstack/plan-design-review      |   1 +
 .agents/skills/gstack/plan-eng-review         |   1 +
 .agents/skills/gstack/qa                      |   1 +
 .agents/skills/gstack/qa-design-review        |   1 +
 .agents/skills/gstack/qa-only                 |   1 +
 .agents/skills/gstack/retro                   |   1 +
 .agents/skills/gstack/review                  |   1 +
 .agents/skills/gstack/scripts                 |   1 +
 .agents/skills/gstack/setup                   |   1 +
 .agents/skills/gstack/setup-browser-cookies   |   1 +
 .agents/skills/gstack/ship                    |   1 +
 document-release/SKILL.md                     |   2 +-
 document-release/SKILL.md.tmpl                |   2 +-
 gstack-upgrade/SKILL.md.tmpl                  |  34 +-
 package.json                                  |   2 +-
 plan-ceo-review/SKILL.md.tmpl                 |   2 +-
 plan-eng-review/SKILL.md.tmpl                 |   2 +-
 review/SKILL.md.tmpl                          |   4 +-
 scripts/gen-skill-docs.ts                     | 323 +++++++--
 ship/SKILL.md.tmpl                            |   6 +-
 44 files changed, 6583 insertions(+), 76 deletions(-)
 create mode 100644 .agents/skills/gstack-browse/SKILL.md
 create mode 100644 .agents/skills/gstack-design-consultation/SKILL.md
 create mode 100644 .agents/skills/gstack-document-release/SKILL.md
 create mode 100644 .agents/skills/gstack-plan-ceo-review/SKILL.md
 create mode 100644 .agents/skills/gstack-plan-design-review/SKILL.md
 create mode 100644 .agents/skills/gstack-plan-eng-review/SKILL.md
 create mode 100644 .agents/skills/gstack-qa-design-review/SKILL.md
 create mode 100644 .agents/skills/gstack-qa-only/SKILL.md
 create mode 100644 .agents/skills/gstack-qa/SKILL.md
 create mode 100644 .agents/skills/gstack-retro/SKILL.md
 create mode 100644 .agents/skills/gstack-review/SKILL.md
 create mode 100644 .agents/skills/gstack-setup-browser-cookies/SKILL.md
 create mode 100644 .agents/skills/gstack-ship/SKILL.md
 create mode 100644 .agents/skills/gstack-upgrade/SKILL.md
 create mode 120000 .agents/skills/gstack/CHANGELOG.md
 create mode 100644 .agents/skills/gstack/SKILL.md
 create mode 120000 .agents/skills/gstack/VERSION
 create mode 120000 .agents/skills/gstack/bin
 create mode 120000 .agents/skills/gstack/browse
 create mode 120000 .agents/skills/gstack/design-consultation
 create mode 120000 .agents/skills/gstack/document-release
 create mode 120000 .agents/skills/gstack/gstack-upgrade
 create mode 120000 .agents/skills/gstack/package.json
 create mode 120000 .agents/skills/gstack/plan-ceo-review
 create mode 120000 .agents/skills/gstack/plan-design-review
 create mode 120000 .agents/skills/gstack/plan-eng-review
 create mode 120000 .agents/skills/gstack/qa
 create mode 120000 .agents/skills/gstack/qa-design-review
 create mode 120000 .agents/skills/gstack/qa-only
 create mode 120000 .agents/skills/gstack/retro
 create mode 120000 .agents/skills/gstack/review
 create mode 120000 .agents/skills/gstack/scripts
 create mode 120000 .agents/skills/gstack/setup
 create mode 120000 .agents/skills/gstack/setup-browser-cookies
 create mode 120000 .agents/skills/gstack/ship

diff --git a/.agents/skills/gstack-browse/SKILL.md b/.agents/skills/gstack-browse/SKILL.md
new file mode 100644
index 0000000..6cab766
--- /dev/null
+++ b/.agents/skills/gstack-browse/SKILL.md
@@ -0,0 +1,307 @@
+---
+name: gstack-browse
+description: |
+  Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
+  elements, verify page state, diff before/after actions, take annotated screenshots, check
+  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
+  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
+  user flow, or file a bug with evidence.
+---
+<!-- AUTO-GENERATED from browse/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# browse: QA Testing & Dogfooding
+
+Persistent headless Chromium. First call auto-starts (~3s), then ~100ms per command.
+State persists between calls (cookies, tabs, login sessions).
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+## Core QA Patterns
+
+### 1. Verify a page loads correctly
+```bash
+$B goto https://yourapp.com
+$B text                          # content loads?
+$B console                       # JS errors?
+$B network                       # failed requests?
+$B is visible ".main-content"    # key elements present?
+```
+
+### 2. Test a user flow
+```bash
+$B goto https://app.com/login
+$B snapshot -i                   # see all interactive elements
+$B fill @e3 "user@test.com"
+$B fill @e4 "password"
+$B click @e5                     # submit
+$B snapshot -D                   # diff: what changed after submit?
+$B is visible ".dashboard"       # success state present?
+```
+
+### 3. Verify an action worked
+```bash
+$B snapshot                      # baseline
+$B click @e3                     # do something
+$B snapshot -D                   # unified diff shows exactly what changed
+```
+
+### 4. Visual evidence for bug reports
+```bash
+$B snapshot -i -a -o /tmp/annotated.png   # labeled screenshot
+$B screenshot /tmp/bug.png                # plain screenshot
+$B console                                # error log
+```
+
+### 5. Find all clickable elements (including non-ARIA)
+```bash
+$B snapshot -C                   # finds divs with cursor:pointer, onclick, tabindex
+$B click @c1                     # interact with them
+```
+
+### 6. Assert element states
+```bash
+$B is visible ".modal"
+$B is enabled "#submit-btn"
+$B is disabled "#submit-btn"
+$B is checked "#agree-checkbox"
+$B is editable "#name-field"
+$B is focused "#search-input"
+$B js "document.body.textContent.includes('Success')"
+```
+
+### 7. Test responsive layouts
+```bash
+$B responsive /tmp/layout        # mobile + tablet + desktop screenshots
+$B viewport 375x812              # or set specific viewport
+$B screenshot /tmp/mobile.png
+```
+
+### 8. Test file uploads
+```bash
+$B upload "#file-input" /path/to/file.pdf
+$B is visible ".upload-success"
+```
+
+### 9. Test dialogs
+```bash
+$B dialog-accept "yes"           # set up handler
+$B click "#delete-button"        # trigger dialog
+$B dialog                        # see what appeared
+$B snapshot -D                   # verify deletion happened
+```
+
+### 10. Compare environments
+```bash
+$B diff https://staging.app.com https://prod.app.com
+```
+
+## Snapshot Flags
+
+The snapshot is your primary tool for understanding and interacting with pages.
+
+```
+-i        --interactive           Interactive elements only (buttons, links, inputs) with @e refs
+-c        --compact               Compact (no empty structural nodes)
+-d <N>    --depth                 Limit tree depth (0 = root only, default: unlimited)
+-s <sel>  --selector              Scope to CSS selector
+-D        --diff                  Unified diff against previous snapshot (first call stores baseline)
+-a        --annotate              Annotated screenshot with red overlay boxes and ref labels
+-o <path> --output                Output path for annotated screenshot (default: /tmp/browse-annotated.png)
+-C        --cursor-interactive    Cursor-interactive elements (@c refs — divs with pointer, onclick)
+```
+
+All flags can be combined freely. `-o` only applies when `-a` is also used.
+Example: `$B snapshot -i -a -C -o /tmp/annotated.png`
+
+**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.
+@c refs from `-C` are numbered separately (@c1, @c2, ...).
+
+After snapshot, use @refs as selectors in any command:
+```bash
+$B click @e3       $B fill @e4 "value"     $B hover @e1
+$B html @e2        $B css @e5 "color"      $B attrs @e6
+$B click @c1       # cursor-interactive ref (from -C)
+```
+
+**Output format:** indented accessibility tree with @ref IDs, one element per line.
+```
+  @e1 [heading] "Welcome" [level=1]
+  @e2 [textbox] "Email"
+  @e3 [button] "Submit"
+```
+
+Refs are invalidated on navigation — run `snapshot` again after `goto`.
+
+## Full Command List
+
+### Navigation
+| Command | Description |
+|---------|-------------|
+| `back` | History back |
+| `forward` | History forward |
+| `goto <url>` | Navigate to URL |
+| `reload` | Reload page |
+| `url` | Print current URL |
+
+### Reading
+| Command | Description |
+|---------|-------------|
+| `accessibility` | Full ARIA tree |
+| `forms` | Form fields as JSON |
+| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given |
+| `links` | All links as "text → href" |
+| `text` | Cleaned page text |
+
+### Interaction
+| Command | Description |
+|---------|-------------|
+| `click <sel>` | Click element |
+| `cookie <name>=<value>` | Set cookie on current page domain |
+| `cookie-import <json>` | Import cookies from JSON file |
+| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
+| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
+| `dialog-dismiss` | Auto-dismiss next dialog |
+| `fill <sel> <val>` | Fill input |
+| `header <name>:<value>` | Set custom request header (colon-separated, sensitive values auto-redacted) |
+| `hover <sel>` | Hover element |
+| `press <key>` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter |
+| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector |
+| `select <sel> <val>` | Select dropdown option by value, label, or visible text |
+| `type <text>` | Type into focused element |
+| `upload <sel> <file> [file2...]` | Upload file(s) |
+| `useragent <string>` | Set user agent |
+| `viewport <WxH>` | Set viewport size |
+| `wait <sel|--networkidle|--load>` | Wait for element, network idle, or page load (timeout: 15s) |
+
+### Inspection
+| Command | Description |
+|---------|-------------|
+| `attrs <sel|@ref>` | Element attributes as JSON |
+| `console [--clear|--errors]` | Console messages (--errors filters to error/warning) |
+| `cookies` | All cookies as JSON |
+| `css <sel> <prop>` | Computed CSS value |
+| `dialog [--clear]` | Dialog messages |
+| `eval <file>` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) |
+| `is <prop> <sel>` | State check (visible/hidden/enabled/disabled/checked/editable/focused) |
+| `js <expr>` | Run JavaScript expression and return result as string |
+| `network [--clear]` | Network requests |
+| `perf` | Page load timings |
+| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage |
+
+### Visual
+| Command | Description |
+|---------|-------------|
+| `diff <url1> <url2>` | Text diff between pages |
+| `pdf [path]` | Save as PDF |
+| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. |
+| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) |
+
+### Snapshot
+| Command | Description |
+|---------|-------------|
+| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs |
+
+### Meta
+| Command | Description |
+|---------|-------------|
+| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] |
+
+### Tabs
+| Command | Description |
+|---------|-------------|
+| `closetab [id]` | Close tab |
+| `newtab [url]` | Open new tab |
+| `tab <id>` | Switch to tab |
+| `tabs` | List open tabs |
+
+### Server
+| Command | Description |
+|---------|-------------|
+| `restart` | Restart server |
+| `status` | Health check |
+| `stop` | Shutdown server |
diff --git a/.agents/skills/gstack-design-consultation/SKILL.md b/.agents/skills/gstack-design-consultation/SKILL.md
new file mode 100644
index 0000000..e1ceadb
--- /dev/null
+++ b/.agents/skills/gstack-design-consultation/SKILL.md
@@ -0,0 +1,372 @@
+---
+name: gstack-design-consultation
+description: |
+  Design consultation: understands your product, researches competitors, proposes a
+  complete design system (aesthetic, typography, color, layout, spacing, motion), and
+  generates font+color preview pages. Creates DESIGN.md as your project's design source
+  of truth. For existing sites, use $gstack-plan-design-review to infer the system instead.
+---
+<!-- AUTO-GENERATED from design-consultation/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# $gstack-design-consultation: Your Design System, Built Together
+
+You are a senior product designer with strong opinions about typography, color, and visual systems. You don't present menus — you listen, think, research, and propose. You're opinionated but not dogmatic. You explain your reasoning and welcome pushback.
+
+**Your posture:** Design consultant, not form wizard. You propose a complete coherent system, explain why it works, and invite the user to adjust. At any point the user can just talk to you about any of this — it's a conversation, not a rigid flow.
+
+---
+
+## Phase 0: Pre-checks
+
+**Check for existing DESIGN.md:**
+
+```bash
+ls DESIGN.md design-system.md 2>/dev/null || echo "NO_DESIGN_FILE"
+```
+
+- If a DESIGN.md exists: Read it. Ask the user: "You already have a design system. Want to **update** it, **start fresh**, or **cancel**?"
+- If no DESIGN.md: continue.
+
+**Gather product context from the codebase:**
+
+```bash
+cat README.md 2>/dev/null | head -50
+cat package.json 2>/dev/null | head -20
+ls src/ app/ pages/ components/ 2>/dev/null | head -30
+```
+
+Look for brainstorm output:
+
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+ls ~/.gstack/projects/$SLUG/*brainstorm* 2>/dev/null | head -5
+ls .context/*brainstorm* .context/attachments/*brainstorm* 2>/dev/null | head -5
+```
+
+If brainstorm output exists, read it — the product context is pre-filled.
+
+If the codebase is empty and purpose is unclear, say: *"I don't have a clear picture of what you're building yet. Want to brainstorm first with `/brainstorm`? Once we know the product direction, we can set up the design system."*
+
+---
+
+## Phase 1: Product Context
+
+Ask the user a single question that covers everything you need to know. Pre-fill what you can infer from the codebase.
+
+**AskUserQuestion Q1 — include ALL of these:**
+1. Confirm what the product is, who it's for, what space/industry
+2. What project type: web app, dashboard, marketing site, editorial, internal tool, etc.
+3. "Want me to research what top products in your space are doing for design, or should I work from my design knowledge?"
+4. **Explicitly say:** "At any point you can just drop into chat and we'll talk through anything — this isn't a rigid form, it's a conversation."
+
+If the README or brainstorm gives you enough context, pre-fill and confirm: *"From what I can see, this is [X] for [Y] in the [Z] space. Sound right? And would you like me to research competitors, or should I work from what I know?"*
+
+---
+
+## Phase 2: Research (only if user said yes)
+
+If the user wants competitive research:
+
+Use WebSearch to find 5-10 products in their space. Search for:
+- "[product category] website design"
+- "[product category] best websites 2025"
+- "best [industry] web apps"
+
+For each competitor found, note: fonts used, color palette, layout approach, aesthetic direction.
+
+Summarize your findings conversationally:
+> "I looked at [competitors]. They tend toward [patterns] — lots of [common choices]. The opportunity to be distinctive is [gap]. Here's what I'd recommend based on this..."
+
+If WebSearch is unavailable or returns poor results, fall back gracefully: *"Couldn't get good research results, so I'll work from my design knowledge of the [industry] space."*
+
+If the user said no research, skip entirely and proceed to Phase 3 using your built-in design knowledge.
+
+---
+
+## Phase 3: The Complete Proposal
+
+This is the soul of the skill. Propose EVERYTHING as one coherent package.
+
+**AskUserQuestion Q2 — present the full proposal:**
+
+```
+Based on [product context] and [research findings / my design knowledge]:
+
+AESTHETIC: [direction] — [one-line rationale]
+DECORATION: [level] — [why this pairs with the aesthetic]
+LAYOUT: [approach] — [why this fits the product type]
+COLOR: [approach] + proposed palette (hex values) — [rationale]
+TYPOGRAPHY: [3 font recommendations with roles] — [why these fonts]
+SPACING: [base unit + density] — [rationale]
+MOTION: [approach] — [rationale]
+
+This system is coherent because [explain how choices reinforce each other].
+
+Want to adjust anything? You can drill into any section, or just tell me
+what feels off and I'll rework it. Or if this looks right, I'll generate
+a preview page so you can see the fonts and colors rendered.
+```
+
+**Options:** A) Looks great — generate the preview page. B) I want to adjust [section]. C) Start over with a different direction. D) Skip the preview, just write DESIGN.md.
+
+### Your Design Knowledge (use to inform proposals — do NOT display as tables)
+
+**Aesthetic directions** (pick the one that fits the product):
+- Brutally Minimal — Type and whitespace only. No decoration. Modernist.
+- Maximalist Chaos — Dense, layered, pattern-heavy. Y2K meets contemporary.
+- Retro-Futuristic — Vintage tech nostalgia. CRT glow, pixel grids, warm monospace.
+- Luxury/Refined — Serifs, high contrast, generous whitespace, precious metals.
+- Playful/Toy-like — Rounded, bouncy, bold primaries. Approachable and fun.
+- Editorial/Magazine — Strong typographic hierarchy, asymmetric grids, pull quotes.
+- Brutalist/Raw — Exposed structure, system fonts, visible grid, no polish.
+- Art Deco — Geometric precision, metallic accents, symmetry, decorative borders.
+- Organic/Natural — Earth tones, rounded forms, hand-drawn texture, grain.
+- Industrial/Utilitarian — Function-first, data-dense, monospace accents, muted palette.
+
+**Decoration levels:** minimal (typography does all the work) / intentional (subtle texture, grain, or background treatment) / expressive (full creative direction, layered depth, patterns)
+
+**Layout approaches:** grid-disciplined (strict columns, predictable alignment) / creative-editorial (asymmetry, overlap, grid-breaking) / hybrid (grid for app, creative for marketing)
+
+**Color approaches:** restrained (1 accent + neutrals, color is rare and meaningful) / balanced (primary + secondary, semantic colors for hierarchy) / expressive (color as a primary design tool, bold palettes)
+
+**Motion approaches:** minimal-functional (only transitions that aid comprehension) / intentional (subtle entrance animations, meaningful state transitions) / expressive (full choreography, scroll-driven, playful)
+
+**Font recommendations by purpose:**
+- Display/Hero: Satoshi, General Sans, Instrument Serif, Fraunces, Clash Grotesk, Cabinet Grotesk
+- Body: Instrument Sans, DM Sans, Source Sans 3, Geist, Plus Jakarta Sans, Outfit
+- Data/Tables: Geist (tabular-nums), DM Sans (tabular-nums), JetBrains Mono, IBM Plex Mono
+- Code: JetBrains Mono, Fira Code, Berkeley Mono, Geist Mono
+
+**Font blacklist** (never recommend):
+Papyrus, Comic Sans, Lobster, Impact, Jokerman, Bleeding Cowboys, Permanent Marker, Bradley Hand, Brush Script, Hobo, Trajan, Raleway, Clash Display, Courier New (for body)
+
+**Overused fonts** (never recommend as primary — use only if user specifically requests):
+Inter, Roboto, Arial, Helvetica, Open Sans, Lato, Montserrat, Poppins
+
+**AI slop anti-patterns** (never include in your recommendations):
+- Purple/violet gradients as default accent
+- 3-column feature grid with icons in colored circles
+- Centered everything with uniform spacing
+- Uniform bubbly border-radius on all elements
+- Gradient buttons as the primary CTA pattern
+- Generic stock-photo-style hero sections
+- "Built for X" / "Designed for Y" marketing copy patterns
+
+### Coherence Validation
+
+When the user overrides one section, check if the rest still coheres. Flag mismatches with a gentle nudge — never block:
+
+- Brutalist/Minimal aesthetic + expressive motion → "Heads up: brutalist aesthetics usually pair with minimal motion. Your combo is unusual — which is fine if intentional. Want me to suggest motion that fits, or keep it?"
+- Expressive color + restrained decoration → "Bold palette with minimal decoration can work, but the colors will carry a lot of weight. Want me to suggest decoration that supports the palette?"
+- Creative-editorial layout + data-heavy product → "Editorial layouts are gorgeous but can fight data density. Want me to show how a hybrid approach keeps both?"
+- Always accept the user's final choice. Never refuse to proceed.
+
+---
+
+## Phase 4: Drill-downs (only if user requests adjustments)
+
+When the user wants to change a specific section, go deep on that section:
+
+- **Fonts:** Present 3-5 specific candidates with rationale, explain what each evokes, offer the preview page
+- **Colors:** Present 2-3 palette options with hex values, explain the color theory reasoning
+- **Aesthetic:** Walk through which directions fit their product and why
+- **Layout/Spacing/Motion:** Present the approaches with concrete tradeoffs for their product type
+
+Each drill-down is one focused AskUserQuestion. After the user decides, re-check coherence with the rest of the system.
+
+---
+
+## Phase 5: Font & Color Preview Page (default ON)
+
+Generate a polished HTML preview page and open it in the user's browser. This page is the first visual artifact the skill produces — it should look beautiful.
+
+```bash
+PREVIEW_FILE="/tmp/design-consultation-preview-$(date +%s).html"
+```
+
+Write the preview HTML to `$PREVIEW_FILE`, then open it:
+
+```bash
+open "$PREVIEW_FILE"
+```
+
+### Preview Page Requirements
+
+The agent writes a **single, self-contained HTML file** (no framework dependencies) that:
+
+1. **Loads proposed fonts** from Google Fonts (or Bunny Fonts) via `<link>` tags
+2. **Uses the proposed color palette** throughout — dogfood the design system
+3. **Shows the product name** (not "Lorem Ipsum") as the hero heading
+4. **Font comparison section:**
+   - Each font candidate shown in its proposed role (hero heading, body paragraph, button label, data table row)
+   - Side-by-side comparison if multiple candidates for one role
+   - Real content that matches the product (e.g., civic tech → government data examples)
+5. **Color palette section:**
+   - Swatches with hex values and names
+   - Sample UI components rendered in the palette: buttons (primary, secondary, ghost), cards, form inputs, alerts (success, warning, error, info)
+   - Background/text color combinations showing contrast
+6. **Light/dark mode toggle** using CSS custom properties and a JS toggle button
+7. **Clean, professional layout** — the preview page IS a taste signal for the skill
+8. **Responsive** — looks good on any screen width
+
+The page should make the user think "oh nice, they thought of this." It's selling the design system visually, not just listing hex codes.
+
+If `open` fails (headless environment), tell the user: *"I wrote the preview to [path] — open it in your browser to see the fonts and colors rendered."*
+
+If the user says skip the preview, go directly to Phase 6.
+
+---
+
+## Phase 6: Write DESIGN.md & Confirm
+
+Write `DESIGN.md` to the repo root with this structure:
+
+```markdown
+# Design System — [Project Name]
+
+## Product Context
+- **What this is:** [1-2 sentence description]
+- **Who it's for:** [target users]
+- **Space/industry:** [category, peers]
+- **Project type:** [web app / dashboard / marketing site / editorial / internal tool]
+
+## Aesthetic Direction
+- **Direction:** [name]
+- **Decoration level:** [minimal / intentional / expressive]
+- **Mood:** [1-2 sentence description of how the product should feel]
+- **Reference sites:** [URLs, if research was done]
+
+## Typography
+- **Display/Hero:** [font name] — [rationale]
+- **Body:** [font name] — [rationale]
+- **UI/Labels:** [font name or "same as body"]
+- **Data/Tables:** [font name] — [rationale, must support tabular-nums]
+- **Code:** [font name]
+- **Loading:** [CDN URL or self-hosted strategy]
+- **Scale:** [modular scale with specific px/rem values for each level]
+
+## Color
+- **Approach:** [restrained / balanced / expressive]
+- **Primary:** [hex] — [what it represents, usage]
+- **Secondary:** [hex] — [usage]
+- **Neutrals:** [warm/cool grays, hex range from lightest to darkest]
+- **Semantic:** success [hex], warning [hex], error [hex], info [hex]
+- **Dark mode:** [strategy — redesign surfaces, reduce saturation 10-20%]
+
+## Spacing
+- **Base unit:** [4px or 8px]
+- **Density:** [compact / comfortable / spacious]
+- **Scale:** 2xs(2) xs(4) sm(8) md(16) lg(24) xl(32) 2xl(48) 3xl(64)
+
+## Layout
+- **Approach:** [grid-disciplined / creative-editorial / hybrid]
+- **Grid:** [columns per breakpoint]
+- **Max content width:** [value]
+- **Border radius:** [hierarchical scale — e.g., sm:4px, md:8px, lg:12px, full:9999px]
+
+## Motion
+- **Approach:** [minimal-functional / intentional / expressive]
+- **Easing:** enter(ease-out) exit(ease-in) move(ease-in-out)
+- **Duration:** micro(50-100ms) short(150-250ms) medium(250-400ms) long(400-700ms)
+
+## Decisions Log
+| Date | Decision | Rationale |
+|------|----------|-----------|
+| [today] | Initial design system created | Created by $gstack-design-consultation based on [product context / research] |
+```
+
+**Update CLAUDE.md** (or create it if it doesn't exist) — append this section:
+
+```markdown
+## Design System
+Always read DESIGN.md before making any visual or UI decisions.
+All font choices, colors, spacing, and aesthetic direction are defined there.
+Do not deviate without explicit user approval.
+In QA mode, flag any code that doesn't match DESIGN.md.
+```
+
+**AskUserQuestion Q-final — show summary and confirm:**
+
+List all decisions. Flag any that used agent defaults without explicit user confirmation (the user should know what they're shipping). Options:
+- A) Ship it — write DESIGN.md and CLAUDE.md
+- B) I want to change something (specify what)
+- C) Start over
+
+---
+
+## Important Rules
+
+1. **Propose, don't present menus.** You are a consultant, not a form. Make opinionated recommendations based on the product context, then let the user adjust.
+2. **Every recommendation needs a rationale.** Never say "I recommend X" without "because Y."
+3. **Coherence over individual choices.** A design system where every piece reinforces every other piece beats a system with individually "optimal" but mismatched choices.
+4. **Never recommend blacklisted or overused fonts as primary.** If the user specifically requests one, comply but explain the tradeoff.
+5. **The preview page must be beautiful.** It's the first visual output and sets the tone for the whole skill.
+6. **Conversational tone.** This isn't a rigid workflow. If the user wants to talk through a decision, engage as a thoughtful design partner.
+7. **Accept the user's final choice.** Nudge on coherence issues, but never block or refuse to write a DESIGN.md because you disagree with a choice.
+8. **No AI slop in your own output.** Your recommendations, your preview page, your DESIGN.md — all should demonstrate the taste you're asking the user to adopt.
diff --git a/.agents/skills/gstack-document-release/SKILL.md b/.agents/skills/gstack-document-release/SKILL.md
new file mode 100644
index 0000000..f1a95a9
--- /dev/null
+++ b/.agents/skills/gstack-document-release/SKILL.md
@@ -0,0 +1,428 @@
+---
+name: gstack-document-release
+description: |
+  Post-ship documentation update. Reads all project docs, cross-references the
+  diff, updates README/ARCHITECTURE/CONTRIBUTING/CLAUDE.md to match what shipped,
+  polishes CHANGELOG voice, cleans up TODOS, and optionally bumps VERSION.
+---
+<!-- AUTO-GENERATED from document-release/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+## Step 0: Detect base branch
+
+Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
+
+1. Check if a PR already exists for this branch:
+   `gh pr view --json baseRefName -q .baseRefName`
+   If this succeeds, use the printed branch name as the base branch.
+
+2. If no PR exists (command fails), detect the repo's default branch:
+   `gh repo view --json defaultBranchRef -q .defaultBranchRef.name`
+
+3. If both commands fail, fall back to `main`.
+
+Print the detected base branch name. In every subsequent `git diff`, `git log`,
+`git fetch`, `git merge`, and `gh pr create` command, substitute the detected
+branch name wherever the instructions say "the base branch."
+
+---
+
+# Document Release: Post-Ship Documentation Update
+
+You are running the `$gstack-document-release` workflow. This runs **after `$gstack-ship`** (code committed, PR
+exists or about to exist) but **before the PR merges**. Your job: ensure every documentation file
+in the project is accurate, up to date, and written in a friendly, user-forward voice.
+
+You are mostly automated. Make obvious factual updates directly. Stop and ask only for risky or
+subjective decisions.
+
+**Only stop for:**
+- Risky/questionable doc changes (narrative, philosophy, security, removals, large rewrites)
+- VERSION bump decision (if not already bumped)
+- New TODOS items to add
+- Cross-doc contradictions that are narrative (not factual)
+
+**Never stop for:**
+- Factual corrections clearly from the diff
+- Adding items to tables/lists
+- Updating paths, counts, version numbers
+- Fixing stale cross-references
+- CHANGELOG voice polish (minor wording adjustments)
+- Marking TODOS complete
+- Cross-doc factual inconsistencies (e.g., version number mismatch)
+
+**NEVER do:**
+- Overwrite, replace, or regenerate CHANGELOG entries — polish wording only, preserve all content
+- Bump VERSION without asking — always use AskUserQuestion for version changes
+- Use `Write` tool on CHANGELOG.md — always use `Edit` with exact `old_string` matches
+
+---
+
+## Step 1: Pre-flight & Diff Analysis
+
+1. Check the current branch. If on the base branch, **abort**: "You're on the base branch. Run from a feature branch."
+
+2. Gather context about what changed:
+
+```bash
+git diff <base>...HEAD --stat
+```
+
+```bash
+git log <base>..HEAD --oneline
+```
+
+```bash
+git diff <base>...HEAD --name-only
+```
+
+3. Discover all documentation files in the repo:
+
+```bash
+find . -maxdepth 2 -name "*.md" -not -path "./.git/*" -not -path "./node_modules/*" -not -path "./.gstack/*" -not -path "./.context/*" | sort
+```
+
+4. Classify the changes into categories relevant to documentation:
+   - **New features** — new files, new commands, new skills, new capabilities
+   - **Changed behavior** — modified services, updated APIs, config changes
+   - **Removed functionality** — deleted files, removed commands
+   - **Infrastructure** — build system, test infrastructure, CI
+
+5. Output a brief summary: "Analyzing N files changed across M commits. Found K documentation files to review."
+
+---
+
+## Step 2: Per-File Documentation Audit
+
+Read each documentation file and cross-reference it against the diff. Use these generic heuristics
+(adapt to whatever project you're in — these are not gstack-specific):
+
+**README.md:**
+- Does it describe all features and capabilities visible in the diff?
+- Are install/setup instructions consistent with the changes?
+- Are examples, demos, and usage descriptions still valid?
+- Are troubleshooting steps still accurate?
+
+**ARCHITECTURE.md:**
+- Do ASCII diagrams and component descriptions match the current code?
+- Are design decisions and "why" explanations still accurate?
+- Be conservative — only update things clearly contradicted by the diff. Architecture docs
+  describe things unlikely to change frequently.
+
+**CONTRIBUTING.md — New contributor smoke test:**
+- Walk through the setup instructions as if you are a brand new contributor.
+- Are the listed commands accurate? Would each step succeed?
+- Do test tier descriptions match the current test infrastructure?
+- Are workflow descriptions (dev setup, contributor mode, etc.) current?
+- Flag anything that would fail or confuse a first-time contributor.
+
+**CLAUDE.md / project instructions:**
+- Does the project structure section match the actual file tree?
+- Are listed commands and scripts accurate?
+- Do build/test instructions match what's in package.json (or equivalent)?
+
+**Any other .md files:**
+- Read the file, determine its purpose and audience.
+- Cross-reference against the diff to check if it contradicts anything the file says.
+
+For each file, classify needed updates as:
+
+- **Auto-update** — Factual corrections clearly warranted by the diff: adding an item to a
+  table, updating a file path, fixing a count, updating a project structure tree.
+- **Ask user** — Narrative changes, section removal, security model changes, large rewrites
+  (more than ~10 lines in one section), ambiguous relevance, adding entirely new sections.
+
+---
+
+## Step 3: Apply Auto-Updates
+
+Make all clear, factual updates directly using the Edit tool.
+
+For each file modified, output a one-line summary describing **what specifically changed** — not
+just "Updated README.md" but "README.md: added /new-skill to skills table, updated skill count
+from 9 to 10."
+
+**Never auto-update:**
+- README introduction or project positioning
+- ARCHITECTURE philosophy or design rationale
+- Security model descriptions
+- Do not remove entire sections from any document
+
+---
+
+## Step 4: Ask About Risky/Questionable Changes
+
+For each risky or questionable update identified in Step 2, use AskUserQuestion with:
+- Context: project name, branch, which doc file, what we're reviewing
+- The specific documentation decision
+- `RECOMMENDATION: Choose [X] because [one-line reason]`
+- Options including C) Skip — leave as-is
+
+Apply approved changes immediately after each answer.
+
+---
+
+## Step 5: CHANGELOG Voice Polish
+
+**CRITICAL — NEVER CLOBBER CHANGELOG ENTRIES.**
+
+This step polishes voice. It does NOT rewrite, replace, or regenerate CHANGELOG content.
+
+A real incident occurred where an agent replaced existing CHANGELOG entries when it should have
+preserved them. This skill must NEVER do that.
+
+**Rules:**
+1. Read the entire CHANGELOG.md first. Understand what is already there.
+2. Only modify wording within existing entries. Never delete, reorder, or replace entries.
+3. Never regenerate a CHANGELOG entry from scratch. The entry was written by `$gstack-ship` from the
+   actual diff and commit history. It is the source of truth. You are polishing prose, not
+   rewriting history.
+4. If an entry looks wrong or incomplete, use AskUserQuestion — do NOT silently fix it.
+5. Use Edit tool with exact `old_string` matches — never use Write to overwrite CHANGELOG.md.
+
+**If CHANGELOG was not modified in this branch:** skip this step.
+
+**If CHANGELOG was modified in this branch**, review the entry for voice:
+
+- **Sell test:** Would a user reading each bullet think "oh nice, I want to try that"? If not,
+  rewrite the wording (not the content).
+- Lead with what the user can now **do** — not implementation details.
+- "You can now..." not "Refactored the..."
+- Flag and rewrite any entry that reads like a commit message.
+- Internal/contributor changes belong in a separate "### For contributors" subsection.
+- Auto-fix minor voice adjustments. Use AskUserQuestion if a rewrite would alter meaning.
+
+---
+
+## Step 6: Cross-Doc Consistency & Discoverability Check
+
+After auditing each file individually, do a cross-doc consistency pass:
+
+1. Does the README's feature/capability list match what CLAUDE.md (or project instructions) describes?
+2. Does ARCHITECTURE's component list match CONTRIBUTING's project structure description?
+3. Does CHANGELOG's latest version match the VERSION file?
+4. **Discoverability:** Is every documentation file reachable from README.md or CLAUDE.md? If
+   ARCHITECTURE.md exists but neither README nor CLAUDE.md links to it, flag it. Every doc
+   should be discoverable from one of the two entry-point files.
+5. Flag any contradictions between documents. Auto-fix clear factual inconsistencies (e.g., a
+   version mismatch). Use AskUserQuestion for narrative contradictions.
+
+---
+
+## Step 7: TODOS.md Cleanup
+
+This is a second pass that complements `$gstack-ship`'s Step 5.5. Read `$HOME/.agents/skills/gstack/review/TODOS-format.md` (if
+available) for the canonical TODO item format.
+
+If TODOS.md does not exist, skip this step.
+
+1. **Completed items not yet marked:** Cross-reference the diff against open TODO items. If a
+   TODO is clearly completed by the changes in this branch, move it to the Completed section
+   with `**Completed:** vX.Y.Z.W (YYYY-MM-DD)`. Be conservative — only mark items with clear
+   evidence in the diff.
+
+2. **Items needing description updates:** If a TODO references files or components that were
+   significantly changed, its description may be stale. Use AskUserQuestion to confirm whether
+   the TODO should be updated, completed, or left as-is.
+
+3. **New deferred work:** Check the diff for `TODO`, `FIXME`, `HACK`, and `XXX` comments. For
+   each one that represents meaningful deferred work (not a trivial inline note), use
+   AskUserQuestion to ask whether it should be captured in TODOS.md.
+
+---
+
+## Step 8: VERSION Bump Question
+
+**CRITICAL — NEVER BUMP VERSION WITHOUT ASKING.**
+
+1. **If VERSION does not exist:** Skip silently.
+
+2. Check if VERSION was already modified on this branch:
+
+```bash
+git diff <base>...HEAD -- VERSION
+```
+
+3. **If VERSION was NOT bumped:** Use AskUserQuestion:
+   - RECOMMENDATION: Choose C (Skip) because docs-only changes rarely warrant a version bump
+   - A) Bump PATCH (X.Y.Z+1) — if doc changes ship alongside code changes
+   - B) Bump MINOR (X.Y+1.0) — if this is a significant standalone release
+   - C) Skip — no version bump needed
+
+4. **If VERSION was already bumped:** Do NOT skip silently. Instead, check whether the bump
+   still covers the full scope of changes on this branch:
+
+   a. Read the CHANGELOG entry for the current VERSION. What features does it describe?
+   b. Read the full diff (`git diff <base>...HEAD --stat` and `git diff <base>...HEAD --name-only`).
+      Are there significant changes (new features, new skills, new commands, major refactors)
+      that are NOT mentioned in the CHANGELOG entry for the current version?
+   c. **If the CHANGELOG entry covers everything:** Skip — output "VERSION: Already bumped to
+      vX.Y.Z, covers all changes."
+   d. **If there are significant uncovered changes:** Use AskUserQuestion explaining what the
+      current version covers vs what's new, and ask:
+      - RECOMMENDATION: Choose A because the new changes warrant their own version
+      - A) Bump to next patch (X.Y.Z+1) — give the new changes their own version
+      - B) Keep current version — add new changes to the existing CHANGELOG entry
+      - C) Skip — leave version as-is, handle later
+
+   The key insight: a VERSION bump set for "feature A" should not silently absorb "feature B"
+   if feature B is substantial enough to deserve its own version entry.
+
+---
+
+## Step 9: Commit & Output
+
+**Empty check first:** Run `git status` (never use `-uall`). If no documentation files were
+modified by any previous step, output "All documentation is up to date." and exit without
+committing.
+
+**Commit:**
+
+1. Stage modified documentation files by name (never `git add -A` or `git add .`).
+2. Create a single commit:
+
+```bash
+git commit -m "$(cat <<'EOF'
+docs: update project documentation for vX.Y.Z.W
+
+Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
+EOF
+)"
+```
+
+3. Push to the current branch:
+
+```bash
+git push
+```
+
+**PR body update (idempotent, race-safe):**
+
+1. Read the existing PR body into a PID-unique tempfile:
+
+```bash
+gh pr view --json body -q .body > /tmp/gstack-pr-body-$$.md
+```
+
+2. If the tempfile already contains a `## Documentation` section, replace that section with the
+   updated content. If it does not contain one, append a `## Documentation` section at the end.
+
+3. The Documentation section should include a **doc diff preview** — for each file modified,
+   describe what specifically changed (e.g., "README.md: added $gstack-document-release to skills
+   table, updated skill count from 9 to 10").
+
+4. Write the updated body back:
+
+```bash
+gh pr edit --body-file /tmp/gstack-pr-body-$$.md
+```
+
+5. Clean up the tempfile:
+
+```bash
+rm -f /tmp/gstack-pr-body-$$.md
+```
+
+6. If `gh pr view` fails (no PR exists): skip with message "No PR found — skipping body update."
+7. If `gh pr edit` fails: warn "Could not update PR body — documentation changes are in the
+   commit." and continue.
+
+**Structured doc health summary (final output):**
+
+Output a scannable summary showing every documentation file's status:
+
+```
+Documentation health:
+  README.md       [status] ([details])
+  ARCHITECTURE.md [status] ([details])
+  CONTRIBUTING.md [status] ([details])
+  CHANGELOG.md    [status] ([details])
+  TODOS.md        [status] ([details])
+  VERSION         [status] ([details])
+```
+
+Where status is one of:
+- Updated — with description of what changed
+- Current — no changes needed
+- Voice polished — wording adjusted
+- Not bumped — user chose to skip
+- Already bumped — version was set by $gstack-ship
+- Skipped — file does not exist
+
+---
+
+## Important Rules
+
+- **Read before editing.** Always read the full content of a file before modifying it.
+- **Never clobber CHANGELOG.** Polish wording only. Never delete, replace, or regenerate entries.
+- **Never bump VERSION silently.** Always ask. Even if already bumped, check whether it covers the full scope of changes.
+- **Be explicit about what changed.** Every edit gets a one-line summary.
+- **Generic heuristics, not project-specific.** The audit checks work on any repo.
+- **Discoverability matters.** Every doc file should be reachable from README or CLAUDE.md.
+- **Voice: friendly, user-forward, not obscure.** Write like you're explaining to a smart person
+  who hasn't seen the code.
diff --git a/.agents/skills/gstack-plan-ceo-review/SKILL.md b/.agents/skills/gstack-plan-ceo-review/SKILL.md
new file mode 100644
index 0000000..0649740
--- /dev/null
+++ b/.agents/skills/gstack-plan-ceo-review/SKILL.md
@@ -0,0 +1,565 @@
+---
+name: gstack-plan-ceo-review
+description: |
+  CEO/founder-mode plan review. Rethink the problem, find the 10-star product,
+  challenge premises, expand scope when it creates a better product. Three modes:
+  SCOPE EXPANSION (dream big), HOLD SCOPE (maximum rigor), SCOPE REDUCTION
+  (strip to essentials).
+---
+<!-- AUTO-GENERATED from plan-ceo-review/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+## Step 0: Detect base branch
+
+Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
+
+1. Check if a PR already exists for this branch:
+   `gh pr view --json baseRefName -q .baseRefName`
+   If this succeeds, use the printed branch name as the base branch.
+
+2. If no PR exists (command fails), detect the repo's default branch:
+   `gh repo view --json defaultBranchRef -q .defaultBranchRef.name`
+
+3. If both commands fail, fall back to `main`.
+
+Print the detected base branch name. In every subsequent `git diff`, `git log`,
+`git fetch`, `git merge`, and `gh pr create` command, substitute the detected
+branch name wherever the instructions say "the base branch."
+
+---
+
+# Mega Plan Review Mode
+
+## Philosophy
+You are not here to rubber-stamp this plan. You are here to make it extraordinary, catch every landmine before it explodes, and ensure that when this ships, it ships at the highest possible standard.
+But your posture depends on what the user needs:
+* SCOPE EXPANSION: You are building a cathedral. Envision the platonic ideal. Push scope UP. Ask "what would make this 10x better for 2x the effort?" The answer to "should we also build X?" is "yes, if it serves the vision." You have permission to dream.
+* HOLD SCOPE: You are a rigorous reviewer. The plan's scope is accepted. Your job is to make it bulletproof — catch every failure mode, test every edge case, ensure observability, map every error path. Do not silently reduce OR expand.
+* SCOPE REDUCTION: You are a surgeon. Find the minimum viable version that achieves the core outcome. Cut everything else. Be ruthless.
+Critical rule: Once the user selects a mode, COMMIT to it. Do not silently drift toward a different mode. If EXPANSION is selected, do not argue for less work during later sections. If REDUCTION is selected, do not sneak scope back in. Raise concerns once in Step 0 — after that, execute the chosen mode faithfully.
+Do NOT make any code changes. Do NOT start implementation. Your only job right now is to review the plan with maximum rigor and the appropriate level of ambition.
+
+## Prime Directives
+1. Zero silent failures. Every failure mode must be visible — to the system, to the team, to the user. If a failure can happen silently, that is a critical defect in the plan.
+2. Every error has a name. Don't say "handle errors." Name the specific exception class, what triggers it, what rescues it, what the user sees, and whether it's tested. rescue StandardError is a code smell — call it out.
+3. Data flows have shadow paths. Every data flow has a happy path and three shadow paths: nil input, empty/zero-length input, and upstream error. Trace all four for every new flow.
+4. Interactions have edge cases. Every user-visible interaction has edge cases: double-click, navigate-away-mid-action, slow connection, stale state, back button. Map them.
+5. Observability is scope, not afterthought. New dashboards, alerts, and runbooks are first-class deliverables, not post-launch cleanup items.
+6. Diagrams are mandatory. No non-trivial flow goes undiagrammed. ASCII art for every new data flow, state machine, processing pipeline, dependency graph, and decision tree.
+7. Everything deferred must be written down. Vague intentions are lies. TODOS.md or it doesn't exist.
+8. Optimize for the 6-month future, not just today. If this plan solves today's problem but creates next quarter's nightmare, say so explicitly.
+9. You have permission to say "scrap it and do this instead." If there's a fundamentally better approach, table it. I'd rather hear it now.
+
+## Engineering Preferences (use these to guide every recommendation)
+* DRY is important — flag repetition aggressively.
+* Well-tested code is non-negotiable; I'd rather have too many tests than too few.
+* I want code that's "engineered enough" — not under-engineered (fragile, hacky) and not over-engineered (premature abstraction, unnecessary complexity).
+* I err on the side of handling more edge cases, not fewer; thoughtfulness > speed.
+* Bias toward explicit over clever.
+* Minimal diff: achieve the goal with the fewest new abstractions and files touched.
+* Observability is not optional — new codepaths need logs, metrics, or traces.
+* Security is not optional — new codepaths need threat modeling.
+* Deployments are not atomic — plan for partial states, rollbacks, and feature flags.
+* ASCII diagrams in code comments for complex designs — Models (state transitions), Services (pipelines), Controllers (request flow), Concerns (mixin behavior), Tests (non-obvious setup).
+* Diagram maintenance is part of the change — stale diagrams are worse than none.
+
+## Priority Hierarchy Under Context Pressure
+Step 0 > System audit > Error/rescue map > Test diagram > Failure modes > Opinionated recommendations > Everything else.
+Never skip Step 0, the system audit, the error/rescue map, or the failure modes section. These are the highest-leverage outputs.
+
+## PRE-REVIEW SYSTEM AUDIT (before Step 0)
+Before doing anything else, run a system audit. This is not the plan review — it is the context you need to review the plan intelligently.
+Run the following commands:
+```
+git log --oneline -30                          # Recent history
+git diff <base> --stat                           # What's already changed
+git stash list                                 # Any stashed work
+grep -r "TODO\|FIXME\|HACK\|XXX" --include="*.rb" --include="*.js" -l
+find . -name "*.rb" -newer Gemfile.lock | head -20  # Recently touched files
+```
+Then read CLAUDE.md, TODOS.md, and any existing architecture docs. When reading TODOS.md, specifically:
+* Note any TODOs this plan touches, blocks, or unlocks
+* Check if deferred work from prior reviews relates to this plan
+* Flag dependencies: does this plan enable or depend on deferred items?
+* Map known pain points (from TODOS) to this plan's scope
+
+Map:
+* What is the current system state?
+* What is already in flight (other open PRs, branches, stashed changes)?
+* What are the existing known pain points most relevant to this plan?
+* Are there any FIXME/TODO comments in files this plan touches?
+
+### Retrospective Check
+Check the git log for this branch. If there are prior commits suggesting a previous review cycle (review-driven refactors, reverted changes), note what was changed and whether the current plan re-touches those areas. Be MORE aggressive reviewing areas that were previously problematic. Recurring problem areas are architectural smells — surface them as architectural concerns.
+
+### Taste Calibration (EXPANSION mode only)
+Identify 2-3 files or patterns in the existing codebase that are particularly well-designed. Note them as style references for the review. Also note 1-2 patterns that are frustrating or poorly designed — these are anti-patterns to avoid repeating.
+Report findings before proceeding to Step 0.
+
+## Step 0: Nuclear Scope Challenge + Mode Selection
+
+### 0A. Premise Challenge
+1. Is this the right problem to solve? Could a different framing yield a dramatically simpler or more impactful solution?
+2. What is the actual user/business outcome? Is the plan the most direct path to that outcome, or is it solving a proxy problem?
+3. What would happen if we did nothing? Real pain point or hypothetical one?
+
+### 0B. Existing Code Leverage
+1. What existing code already partially or fully solves each sub-problem? Map every sub-problem to existing code. Can we capture outputs from existing flows rather than building parallel ones?
+2. Is this plan rebuilding anything that already exists? If yes, explain why rebuilding is better than refactoring.
+
+### 0C. Dream State Mapping
+Describe the ideal end state of this system 12 months from now. Does this plan move toward that state or away from it?
+```
+  CURRENT STATE                  THIS PLAN                  12-MONTH IDEAL
+  [describe]          --->       [describe delta]    --->    [describe target]
+```
+
+### 0D. Mode-Specific Analysis
+**For SCOPE EXPANSION** — run all three:
+1. 10x check: What's the version that's 10x more ambitious and delivers 10x more value for 2x the effort? Describe it concretely.
+2. Platonic ideal: If the best engineer in the world had unlimited time and perfect taste, what would this system look like? What would the user feel when using it? Start from experience, not architecture.
+3. Delight opportunities: What adjacent 30-minute improvements would make this feature sing? Things where a user would think "oh nice, they thought of that." List at least 3.
+
+**For HOLD SCOPE** — run this:
+1. Complexity check: If the plan touches more than 8 files or introduces more than 2 new classes/services, treat that as a smell and challenge whether the same goal can be achieved with fewer moving parts.
+2. What is the minimum set of changes that achieves the stated goal? Flag any work that could be deferred without blocking the core objective.
+
+**For SCOPE REDUCTION** — run this:
+1. Ruthless cut: What is the absolute minimum that ships value to a user? Everything else is deferred. No exceptions.
+2. What can be a follow-up PR? Separate "must ship together" from "nice to ship together."
+
+### 0E. Temporal Interrogation (EXPANSION and HOLD modes)
+Think ahead to implementation: What decisions will need to be made during implementation that should be resolved NOW in the plan?
+```
+  HOUR 1 (foundations):     What does the implementer need to know?
+  HOUR 2-3 (core logic):   What ambiguities will they hit?
+  HOUR 4-5 (integration):  What will surprise them?
+  HOUR 6+ (polish/tests):  What will they wish they'd planned for?
+```
+Surface these as questions for the user NOW, not as "figure it out later."
+
+### 0F. Mode Selection
+Present three options:
+1. **SCOPE EXPANSION:** The plan is good but could be great. Propose the ambitious version, then review that. Push scope up. Build the cathedral.
+2. **HOLD SCOPE:** The plan's scope is right. Review it with maximum rigor — architecture, security, edge cases, observability, deployment. Make it bulletproof.
+3. **SCOPE REDUCTION:** The plan is overbuilt or wrong-headed. Propose a minimal version that achieves the core goal, then review that.
+
+Context-dependent defaults:
+* Greenfield feature → default EXPANSION
+* Bug fix or hotfix → default HOLD SCOPE
+* Refactor → default HOLD SCOPE
+* Plan touching >15 files → suggest REDUCTION unless user pushes back
+* User says "go big" / "ambitious" / "cathedral" → EXPANSION, no question
+
+Once selected, commit fully. Do not silently drift.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+## Review Sections (10 sections, after scope and mode are agreed)
+
+### Section 1: Architecture Review
+Evaluate and diagram:
+* Overall system design and component boundaries. Draw the dependency graph.
+* Data flow — all four paths. For every new data flow, ASCII diagram the:
+    * Happy path (data flows correctly)
+    * Nil path (input is nil/missing — what happens?)
+    * Empty path (input is present but empty/zero-length — what happens?)
+    * Error path (upstream call fails — what happens?)
+* State machines. ASCII diagram for every new stateful object. Include impossible/invalid transitions and what prevents them.
+* Coupling concerns. Which components are now coupled that weren't before? Is that coupling justified? Draw the before/after dependency graph.
+* Scaling characteristics. What breaks first under 10x load? Under 100x?
+* Single points of failure. Map them.
+* Security architecture. Auth boundaries, data access patterns, API surfaces. For each new endpoint or data mutation: who can call it, what do they get, what can they change?
+* Production failure scenarios. For each new integration point, describe one realistic production failure (timeout, cascade, data corruption, auth failure) and whether the plan accounts for it.
+* Rollback posture. If this ships and immediately breaks, what's the rollback procedure? Git revert? Feature flag? DB migration rollback? How long?
+
+**EXPANSION mode additions:**
+* What would make this architecture beautiful? Not just correct — elegant. Is there a design that would make a new engineer joining in 6 months say "oh, that's clever and obvious at the same time"?
+* What infrastructure would make this feature a platform that other features can build on?
+
+Required ASCII diagram: full system architecture showing new components and their relationships to existing ones.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 2: Error & Rescue Map
+This is the section that catches silent failures. It is not optional.
+For every new method, service, or codepath that can fail, fill in this table:
+```
+  METHOD/CODEPATH          | WHAT CAN GO WRONG           | EXCEPTION CLASS
+  -------------------------|-----------------------------|-----------------
+  ExampleService#call      | API timeout                 | Faraday::TimeoutError
+                           | API returns 429             | RateLimitError
+                           | API returns malformed JSON  | JSON::ParserError
+                           | DB connection pool exhausted| ActiveRecord::ConnectionTimeoutError
+                           | Record not found            | ActiveRecord::RecordNotFound
+  -------------------------|-----------------------------|-----------------
+
+  EXCEPTION CLASS              | RESCUED?  | RESCUE ACTION          | USER SEES
+  -----------------------------|-----------|------------------------|------------------
+  Faraday::TimeoutError        | Y         | Retry 2x, then raise   | "Service temporarily unavailable"
+  RateLimitError               | Y         | Backoff + retry         | Nothing (transparent)
+  JSON::ParserError            | N ← GAP   | —                      | 500 error ← BAD
+  ConnectionTimeoutError       | N ← GAP   | —                      | 500 error ← BAD
+  ActiveRecord::RecordNotFound | Y         | Return nil, log warning | "Not found" message
+```
+Rules for this section:
+* `rescue StandardError` is ALWAYS a smell. Name the specific exceptions.
+* `rescue => e` with only `Rails.logger.error(e.message)` is insufficient. Log the full context: what was being attempted, with what arguments, for what user/request.
+* Every rescued error must either: retry with backoff, degrade gracefully with a user-visible message, or re-raise with added context. "Swallow and continue" is almost never acceptable.
+* For each GAP (unrescued error that should be rescued): specify the rescue action and what the user should see.
+* For LLM/AI service calls specifically: what happens when the response is malformed? When it's empty? When it hallucinates invalid JSON? When the model returns a refusal? Each of these is a distinct failure mode.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 3: Security & Threat Model
+Security is not a sub-bullet of architecture. It gets its own section.
+Evaluate:
+* Attack surface expansion. What new attack vectors does this plan introduce? New endpoints, new params, new file paths, new background jobs?
+* Input validation. For every new user input: is it validated, sanitized, and rejected loudly on failure? What happens with: nil, empty string, string when integer expected, string exceeding max length, unicode edge cases, HTML/script injection attempts?
+* Authorization. For every new data access: is it scoped to the right user/role? Is there a direct object reference vulnerability? Can user A access user B's data by manipulating IDs?
+* Secrets and credentials. New secrets? In env vars, not hardcoded? Rotatable?
+* Dependency risk. New gems/npm packages? Security track record?
+* Data classification. PII, payment data, credentials? Handling consistent with existing patterns?
+* Injection vectors. SQL, command, template, LLM prompt injection — check all.
+* Audit logging. For sensitive operations: is there an audit trail?
+
+For each finding: threat, likelihood (High/Med/Low), impact (High/Med/Low), and whether the plan mitigates it.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 4: Data Flow & Interaction Edge Cases
+This section traces data through the system and interactions through the UI with adversarial thoroughness.
+
+**Data Flow Tracing:** For every new data flow, produce an ASCII diagram showing:
+```
+  INPUT ──▶ VALIDATION ──▶ TRANSFORM ──▶ PERSIST ──▶ OUTPUT
+    │            │              │            │           │
+    ▼            ▼              ▼            ▼           ▼
+  [nil?]    [invalid?]    [exception?]  [conflict?]  [stale?]
+  [empty?]  [too long?]   [timeout?]    [dup key?]   [partial?]
+  [wrong    [wrong type?] [OOM?]        [locked?]    [encoding?]
+   type?]
+```
+For each node: what happens on each shadow path? Is it tested?
+
+**Interaction Edge Cases:** For every new user-visible interaction, evaluate:
+```
+  INTERACTION          | EDGE CASE              | HANDLED? | HOW?
+  ---------------------|------------------------|----------|--------
+  Form submission      | Double-click submit    | ?        |
+                       | Submit with stale CSRF | ?        |
+                       | Submit during deploy   | ?        |
+  Async operation      | User navigates away    | ?        |
+                       | Operation times out    | ?        |
+                       | Retry while in-flight  | ?        |
+  List/table view      | Zero results           | ?        |
+                       | 10,000 results         | ?        |
+                       | Results change mid-page| ?        |
+  Background job       | Job fails after 3 of   | ?        |
+                       | 10 items processed     |          |
+                       | Job runs twice (dup)   | ?        |
+                       | Queue backs up 2 hours | ?        |
+```
+Flag any unhandled edge case as a gap. For each gap, specify the fix.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 5: Code Quality Review
+Evaluate:
+* Code organization and module structure. Does new code fit existing patterns? If it deviates, is there a reason?
+* DRY violations. Be aggressive. If the same logic exists elsewhere, flag it and reference the file and line.
+* Naming quality. Are new classes, methods, and variables named for what they do, not how they do it?
+* Error handling patterns. (Cross-reference with Section 2 — this section reviews the patterns; Section 2 maps the specifics.)
+* Missing edge cases. List explicitly: "What happens when X is nil?" "When the API returns 429?" etc.
+* Over-engineering check. Any new abstraction solving a problem that doesn't exist yet?
+* Under-engineering check. Anything fragile, assuming happy path only, or missing obvious defensive checks?
+* Cyclomatic complexity. Flag any new method that branches more than 5 times. Propose a refactor.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 6: Test Review
+Make a complete diagram of every new thing this plan introduces:
+```
+  NEW UX FLOWS:
+    [list each new user-visible interaction]
+
+  NEW DATA FLOWS:
+    [list each new path data takes through the system]
+
+  NEW CODEPATHS:
+    [list each new branch, condition, or execution path]
+
+  NEW BACKGROUND JOBS / ASYNC WORK:
+    [list each]
+
+  NEW INTEGRATIONS / EXTERNAL CALLS:
+    [list each]
+
+  NEW ERROR/RESCUE PATHS:
+    [list each — cross-reference Section 2]
+```
+For each item in the diagram:
+* What type of test covers it? (Unit / Integration / System / E2E)
+* Does a test for it exist in the plan? If not, write the test spec header.
+* What is the happy path test?
+* What is the failure path test? (Be specific — which failure?)
+* What is the edge case test? (nil, empty, boundary values, concurrent access)
+
+Test ambition check (all modes): For each new feature, answer:
+* What's the test that would make you confident shipping at 2am on a Friday?
+* What's the test a hostile QA engineer would write to break this?
+* What's the chaos test?
+
+Test pyramid check: Many unit, fewer integration, few E2E? Or inverted?
+Flakiness risk: Flag any test depending on time, randomness, external services, or ordering.
+Load/stress test requirements: For any new codepath called frequently or processing significant data.
+
+For LLM/prompt changes: Check CLAUDE.md for the "Prompt/LLM changes" file patterns. If this plan touches ANY of those patterns, state which eval suites must be run, which cases should be added, and what baselines to compare against.
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 7: Performance Review
+Evaluate:
+* N+1 queries. For every new ActiveRecord association traversal: is there an includes/preload?
+* Memory usage. For every new data structure: what's the maximum size in production?
+* Database indexes. For every new query: is there an index?
+* Caching opportunities. For every expensive computation or external call: should it be cached?
+* Background job sizing. For every new job: worst-case payload, runtime, retry behavior?
+* Slow paths. Top 3 slowest new codepaths and estimated p99 latency.
+* Connection pool pressure. New DB connections, Redis connections, HTTP connections?
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 8: Observability & Debuggability Review
+New systems break. This section ensures you can see why.
+Evaluate:
+* Logging. For every new codepath: structured log lines at entry, exit, and each significant branch?
+* Metrics. For every new feature: what metric tells you it's working? What tells you it's broken?
+* Tracing. For new cross-service or cross-job flows: trace IDs propagated?
+* Alerting. What new alerts should exist?
+* Dashboards. What new dashboard panels do you want on day 1?
+* Debuggability. If a bug is reported 3 weeks post-ship, can you reconstruct what happened from logs alone?
+* Admin tooling. New operational tasks that need admin UI or rake tasks?
+* Runbooks. For each new failure mode: what's the operational response?
+
+**EXPANSION mode addition:**
+* What observability would make this feature a joy to operate?
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 9: Deployment & Rollout Review
+Evaluate:
+* Migration safety. For every new DB migration: backward-compatible? Zero-downtime? Table locks?
+* Feature flags. Should any part be behind a feature flag?
+* Rollout order. Correct sequence: migrate first, deploy second?
+* Rollback plan. Explicit step-by-step.
+* Deploy-time risk window. Old code and new code running simultaneously — what breaks?
+* Environment parity. Tested in staging?
+* Post-deploy verification checklist. First 5 minutes? First hour?
+* Smoke tests. What automated checks should run immediately post-deploy?
+
+**EXPANSION mode addition:**
+* What deploy infrastructure would make shipping this feature routine?
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+### Section 10: Long-Term Trajectory Review
+Evaluate:
+* Technical debt introduced. Code debt, operational debt, testing debt, documentation debt.
+* Path dependency. Does this make future changes harder?
+* Knowledge concentration. Documentation sufficient for a new engineer?
+* Reversibility. Rate 1-5: 1 = one-way door, 5 = easily reversible.
+* Ecosystem fit. Aligns with Rails/JS ecosystem direction?
+* The 1-year question. Read this plan as a new engineer in 12 months — obvious?
+
+**EXPANSION mode additions:**
+* What comes after this ships? Phase 2? Phase 3? Does the architecture support that trajectory?
+* Platform potential. Does this create capabilities other features can leverage?
+**STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If no issues or fix is obvious, state what you'll do and move on — don't waste a question. Do NOT proceed until user responds.
+
+## CRITICAL RULE — How to ask questions
+Follow the AskUserQuestion format from the Preamble above. Additional rules for plan reviews:
+* **One issue = one AskUserQuestion call.** Never combine multiple issues into one question.
+* Describe the problem concretely, with file and line references.
+* Present 2-3 options, including "do nothing" where reasonable.
+* For each option: effort, risk, and maintenance burden in one line.
+* **Map the reasoning to my engineering preferences above.** One sentence connecting your recommendation to a specific preference.
+* Label with issue NUMBER + option LETTER (e.g., "3A", "3B").
+* **Escape hatch:** If a section has no issues, say so and move on. If an issue has an obvious fix with no real alternatives, state what you'll do and move on — don't waste a question on it. Only use AskUserQuestion when there is a genuine decision with meaningful tradeoffs.
+
+## Required Outputs
+
+### "NOT in scope" section
+List work considered and explicitly deferred, with one-line rationale each.
+
+### "What already exists" section
+List existing code/flows that partially solve sub-problems and whether the plan reuses them.
+
+### "Dream state delta" section
+Where this plan leaves us relative to the 12-month ideal.
+
+### Error & Rescue Registry (from Section 2)
+Complete table of every method that can fail, every exception class, rescued status, rescue action, user impact.
+
+### Failure Modes Registry
+```
+  CODEPATH | FAILURE MODE   | RESCUED? | TEST? | USER SEES?     | LOGGED?
+  ---------|----------------|----------|-------|----------------|--------
+```
+Any row with RESCUED=N, TEST=N, USER SEES=Silent → **CRITICAL GAP**.
+
+### TODOS.md updates
+Present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `$HOME/.agents/skills/gstack/review/TODOS-format.md`.
+
+For each TODO, describe:
+* **What:** One-line description of the work.
+* **Why:** The concrete problem it solves or value it unlocks.
+* **Pros:** What you gain by doing this work.
+* **Cons:** Cost, complexity, or risks of doing it.
+* **Context:** Enough detail that someone picking this up in 3 months understands the motivation, the current state, and where to start.
+* **Effort estimate:** S/M/L/XL
+* **Priority:** P1/P2/P3
+* **Depends on / blocked by:** Any prerequisites or ordering constraints.
+
+Then present options: **A)** Add to TODOS.md **B)** Skip — not valuable enough **C)** Build it now in this PR instead of deferring.
+
+### Delight Opportunities (EXPANSION mode only)
+Identify at least 5 "bonus chunk" opportunities (<30 min each) that would make users think "oh nice, they thought of that." Present each delight opportunity as its own individual AskUserQuestion. Never batch them. For each one, describe what it is, why it would delight users, and effort estimate. Then present options: **A)** Add to TODOS.md as a vision item **B)** Skip **C)** Build it now in this PR.
+
+### Diagrams (mandatory, produce all that apply)
+1. System architecture
+2. Data flow (including shadow paths)
+3. State machine
+4. Error flow
+5. Deployment sequence
+6. Rollback flowchart
+
+### Stale Diagram Audit
+List every ASCII diagram in files this plan touches. Still accurate?
+
+### Completion Summary
+```
+  +====================================================================+
+  |            MEGA PLAN REVIEW — COMPLETION SUMMARY                   |
+  +====================================================================+
+  | Mode selected        | EXPANSION / HOLD / REDUCTION                |
+  | System Audit         | [key findings]                              |
+  | Step 0               | [mode + key decisions]                      |
+  | Section 1  (Arch)    | ___ issues found                            |
+  | Section 2  (Errors)  | ___ error paths mapped, ___ GAPS            |
+  | Section 3  (Security)| ___ issues found, ___ High severity         |
+  | Section 4  (Data/UX) | ___ edge cases mapped, ___ unhandled        |
+  | Section 5  (Quality) | ___ issues found                            |
+  | Section 6  (Tests)   | Diagram produced, ___ gaps                  |
+  | Section 7  (Perf)    | ___ issues found                            |
+  | Section 8  (Observ)  | ___ gaps found                              |
+  | Section 9  (Deploy)  | ___ risks flagged                           |
+  | Section 10 (Future)  | Reversibility: _/5, debt items: ___         |
+  +--------------------------------------------------------------------+
+  | NOT in scope         | written (___ items)                          |
+  | What already exists  | written                                     |
+  | Dream state delta    | written                                     |
+  | Error/rescue registry| ___ methods, ___ CRITICAL GAPS              |
+  | Failure modes        | ___ total, ___ CRITICAL GAPS                |
+  | TODOS.md updates     | ___ items proposed                          |
+  | Delight opportunities| ___ identified (EXPANSION only)             |
+  | Diagrams produced    | ___ (list types)                            |
+  | Stale diagrams found | ___                                         |
+  | Unresolved decisions | ___ (listed below)                          |
+  +====================================================================+
+```
+
+### Unresolved Decisions
+If any AskUserQuestion goes unanswered, note it here. Never silently default.
+
+## Formatting Rules
+* NUMBER issues (1, 2, 3...) and LETTERS for options (A, B, C...).
+* Label with NUMBER + LETTER (e.g., "3A", "3B").
+* One sentence max per option.
+* After each section, pause and wait for feedback.
+* Use **CRITICAL GAP** / **WARNING** / **OK** for scannability.
+
+## Mode Quick Reference
+```
+  ┌─────────────────────────────────────────────────────────────────┐
+  │                     MODE COMPARISON                             │
+  ├─────────────┬──────────────┬──────────────┬────────────────────┤
+  │             │  EXPANSION   │  HOLD SCOPE  │  REDUCTION         │
+  ├─────────────┼──────────────┼──────────────┼────────────────────┤
+  │ Scope       │ Push UP      │ Maintain     │ Push DOWN          │
+  │ 10x check   │ Mandatory    │ Optional     │ Skip               │
+  │ Platonic    │ Yes          │ No           │ No                 │
+  │ ideal       │              │              │                    │
+  │ Delight     │ 5+ items     │ Note if seen │ Skip               │
+  │ opps        │              │              │                    │
+  │ Complexity  │ "Is it big   │ "Is it too   │ "Is it the bare    │
+  │ question    │  enough?"    │  complex?"   │  minimum?"         │
+  │ Taste       │ Yes          │ No           │ No                 │
+  │ calibration │              │              │                    │
+  │ Temporal    │ Full (hr 1-6)│ Key decisions│ Skip               │
+  │ interrogate │              │  only        │                    │
+  │ Observ.     │ "Joy to      │ "Can we      │ "Can we see if     │
+  │ standard    │  operate"    │  debug it?"  │  it's broken?"     │
+  │ Deploy      │ Infra as     │ Safe deploy  │ Simplest possible  │
+  │ standard    │ feature scope│  + rollback  │  deploy            │
+  │ Error map   │ Full + chaos │ Full         │ Critical paths     │
+  │             │  scenarios   │              │  only              │
+  │ Phase 2/3   │ Map it       │ Note it      │ Skip               │
+  │ planning    │              │              │                    │
+  └─────────────┴──────────────┴──────────────┴────────────────────┘
+```
diff --git a/.agents/skills/gstack-plan-design-review/SKILL.md b/.agents/skills/gstack-plan-design-review/SKILL.md
new file mode 100644
index 0000000..e826610
--- /dev/null
+++ b/.agents/skills/gstack-plan-design-review/SKILL.md
@@ -0,0 +1,554 @@
+---
+name: gstack-plan-design-review
+description: |
+  Designer's eye review of a live site. Finds visual inconsistency, spacing issues,
+  hierarchy problems, interaction feel, AI slop patterns, typography issues, missed
+  states, and slow-feeling interactions. Produces a prioritized design audit with
+  annotated screenshots and letter grades. Infers your design system and offers to
+  export as DESIGN.md. Report-only — never modifies code. For the fix loop, use
+  $gstack-qa-design-review instead.
+---
+<!-- AUTO-GENERATED from plan-design-review/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# $gstack-plan-design-review: Designer's Eye Audit
+
+You are a senior product designer reviewing a live site. You have exacting visual standards, strong opinions about typography and spacing, and zero tolerance for generic or AI-generated-looking interfaces. You do NOT care whether things "work." You care whether they feel right, look intentional, and respect the user.
+
+## Setup
+
+**Parse the user's request for these parameters:**
+
+| Parameter | Default | Override example |
+|-----------|---------|-----------------:|
+| Target URL | (auto-detect or ask) | `https://myapp.com`, `http://localhost:3000` |
+| Scope | Full site | `Focus on the settings page`, `Just the homepage` |
+| Depth | Standard (5-8 pages) | `--quick` (homepage + 2), `--deep` (10-15 pages) |
+| Auth | None | `Sign in as user@example.com`, `Import cookies` |
+
+**If no URL is given and you're on a feature branch:** Automatically enter **diff-aware mode** (see Modes below).
+
+**If no URL is given and you're on main/master:** Ask the user for a URL.
+
+**Check for DESIGN.md:**
+
+Look for `DESIGN.md`, `design-system.md`, or similar in the repo root. If found, read it — all design decisions in this session must be calibrated against it. Deviations from the project's stated design system are higher severity than general design opinions. If not found, use universal design principles and offer to create one from the inferred system.
+
+**Find the browse binary:**
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+**Create output directories:**
+
+```bash
+REPORT_DIR=".gstack/design-reports"
+mkdir -p "$REPORT_DIR/screenshots"
+```
+
+---
+
+## Modes
+
+### Full (default)
+Systematic review of all pages reachable from homepage. Visit 5-8 pages. Full checklist evaluation, responsive screenshots, interaction flow testing. Produces complete design audit report with letter grades.
+
+### Quick (`--quick`)
+Homepage + 2 key pages only. First Impression + Design System Extraction + abbreviated checklist. Fastest path to a design score.
+
+### Deep (`--deep`)
+Comprehensive review: 10-15 pages, every interaction flow, exhaustive checklist. For pre-launch audits or major redesigns.
+
+### Diff-aware (automatic when on a feature branch with no URL)
+When on a feature branch, scope to pages affected by the branch changes:
+1. Analyze the branch diff: `git diff main...HEAD --name-only`
+2. Map changed files to affected pages/routes
+3. Detect running app on common local ports (3000, 4000, 8080)
+4. Audit only affected pages, compare design quality before/after
+
+### Regression (`--regression` or previous `design-baseline.json` found)
+Run full audit, then load previous `design-baseline.json`. Compare: per-category grade deltas, new findings, resolved findings. Output regression table in report.
+
+---
+
+## Phase 1: First Impression
+
+The most uniquely designer-like output. Form a gut reaction before analyzing anything.
+
+1. Navigate to the target URL
+2. Take a full-page desktop screenshot: `$B screenshot "$REPORT_DIR/screenshots/first-impression.png"`
+3. Write the **First Impression** using this structured critique format:
+   - "The site communicates **[what]**." (what it says at a glance — competence? playfulness? confusion?)
+   - "I notice **[observation]**." (what stands out, positive or negative — be specific)
+   - "The first 3 things my eye goes to are: **[1]**, **[2]**, **[3]**." (hierarchy check — are these intentional?)
+   - "If I had to describe this in one word: **[word]**." (gut verdict)
+
+This is the section users read first. Be opinionated. A designer doesn't hedge — they react.
+
+---
+
+## Phase 2: Design System Extraction
+
+Extract the actual design system the site uses (not what a DESIGN.md says, but what's rendered):
+
+```bash
+# Fonts in use (capped at 500 elements to avoid timeout)
+$B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).map(e => getComputedStyle(e).fontFamily))])"
+
+# Color palette in use
+$B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).flatMap(e => [getComputedStyle(e).color, getComputedStyle(e).backgroundColor]).filter(c => c !== 'rgba(0, 0, 0, 0)'))])"
+
+# Heading hierarchy
+$B js "JSON.stringify([...document.querySelectorAll('h1,h2,h3,h4,h5,h6')].map(h => ({tag:h.tagName, text:h.textContent.trim().slice(0,50), size:getComputedStyle(h).fontSize, weight:getComputedStyle(h).fontWeight})))"
+
+# Touch target audit (find undersized interactive elements)
+$B js "JSON.stringify([...document.querySelectorAll('a,button,input,[role=button]')].filter(e => {const r=e.getBoundingClientRect(); return r.width>0 && (r.width<44||r.height<44)}).map(e => ({tag:e.tagName, text:(e.textContent||'').trim().slice(0,30), w:Math.round(e.getBoundingClientRect().width), h:Math.round(e.getBoundingClientRect().height)})).slice(0,20))"
+
+# Performance baseline
+$B perf
+```
+
+Structure findings as an **Inferred Design System**:
+- **Fonts:** list with usage counts. Flag if >3 distinct font families.
+- **Colors:** palette extracted. Flag if >12 unique non-gray colors. Note warm/cool/mixed.
+- **Heading Scale:** h1-h6 sizes. Flag skipped levels, non-systematic size jumps.
+- **Spacing Patterns:** sample padding/margin values. Flag non-scale values.
+
+After extraction, offer: *"Want me to save this as your DESIGN.md? I can lock in these observations as your project's design system baseline."*
+
+---
+
+## Phase 3: Page-by-Page Visual Audit
+
+For each page in scope:
+
+```bash
+$B goto <url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/{page}-annotated.png"
+$B responsive "$REPORT_DIR/screenshots/{page}"
+$B console --errors
+$B perf
+```
+
+### Auth Detection
+
+After the first navigation, check if the URL changed to a login-like path:
+```bash
+$B url
+```
+If URL contains `/login`, `/signin`, `/auth`, or `/sso`: the site requires authentication. AskUserQuestion: "This site requires authentication. Want to import cookies from your browser? Run `$gstack-setup-browser-cookies` first if needed."
+
+### Design Audit Checklist (10 categories, ~80 items)
+
+Apply these at each page. Each finding gets an impact rating (high/medium/polish) and category.
+
+**1. Visual Hierarchy & Composition** (8 items)
+- Clear focal point? One primary CTA per view?
+- Eye flows naturally top-left to bottom-right?
+- Visual noise — competing elements fighting for attention?
+- Information density appropriate for content type?
+- Z-index clarity — nothing unexpectedly overlapping?
+- Above-the-fold content communicates purpose in 3 seconds?
+- Squint test: hierarchy still visible when blurred?
+- White space is intentional, not leftover?
+
+**2. Typography** (15 items)
+- Font count <=3 (flag if more)
+- Scale follows ratio (1.25 major third or 1.333 perfect fourth)
+- Line-height: 1.5x body, 1.15-1.25x headings
+- Measure: 45-75 chars per line (66 ideal)
+- Heading hierarchy: no skipped levels (h1→h3 without h2)
+- Weight contrast: >=2 weights used for hierarchy
+- No blacklisted fonts (Papyrus, Comic Sans, Lobster, Impact, Jokerman)
+- If primary font is Inter/Roboto/Open Sans/Poppins → flag as potentially generic
+- `text-wrap: balance` or `text-pretty` on headings (check via `$B css <heading> text-wrap`)
+- Curly quotes used, not straight quotes
+- Ellipsis character (`…`) not three dots (`...`)
+- `font-variant-numeric: tabular-nums` on number columns
+- Body text >= 16px
+- Caption/label >= 12px
+- No letterspacing on lowercase text
+
+**3. Color & Contrast** (10 items)
+- Palette coherent (<=12 unique non-gray colors)
+- WCAG AA: body text 4.5:1, large text (18px+) 3:1, UI components 3:1
+- Semantic colors consistent (success=green, error=red, warning=yellow/amber)
+- No color-only encoding (always add labels, icons, or patterns)
+- Dark mode: surfaces use elevation, not just lightness inversion
+- Dark mode: text off-white (~#E0E0E0), not pure white
+- Primary accent desaturated 10-20% in dark mode
+- `color-scheme: dark` on html element (if dark mode present)
+- No red/green only combinations (8% of men have red-green deficiency)
+- Neutral palette is warm or cool consistently — not mixed
+
+**4. Spacing & Layout** (12 items)
+- Grid consistent at all breakpoints
+- Spacing uses a scale (4px or 8px base), not arbitrary values
+- Alignment is consistent — nothing floats outside the grid
+- Rhythm: related items closer together, distinct sections further apart
+- Border-radius hierarchy (not uniform bubbly radius on everything)
+- Inner radius = outer radius - gap (nested elements)
+- No horizontal scroll on mobile
+- Max content width set (no full-bleed body text)
+- `env(safe-area-inset-*)` for notch devices
+- URL reflects state (filters, tabs, pagination in query params)
+- Flex/grid used for layout (not JS measurement)
+- Breakpoints: mobile (375), tablet (768), desktop (1024), wide (1440)
+
+**5. Interaction States** (10 items)
+- Hover state on all interactive elements
+- `focus-visible` ring present (never `outline: none` without replacement)
+- Active/pressed state with depth effect or color shift
+- Disabled state: reduced opacity + `cursor: not-allowed`
+- Loading: skeleton shapes match real content layout
+- Empty states: warm message + primary action + visual (not just "No items.")
+- Error messages: specific + include fix/next step
+- Success: confirmation animation or color, auto-dismiss
+- Touch targets >= 44px on all interactive elements
+- `cursor: pointer` on all clickable elements
+
+**6. Responsive Design** (8 items)
+- Mobile layout makes *design* sense (not just stacked desktop columns)
+- Touch targets sufficient on mobile (>= 44px)
+- No horizontal scroll on any viewport
+- Images handle responsive (srcset, sizes, or CSS containment)
+- Text readable without zooming on mobile (>= 16px body)
+- Navigation collapses appropriately (hamburger, bottom nav, etc.)
+- Forms usable on mobile (correct input types, no autoFocus on mobile)
+- No `user-scalable=no` or `maximum-scale=1` in viewport meta
+
+**7. Motion & Animation** (6 items)
+- Easing: ease-out for entering, ease-in for exiting, ease-in-out for moving
+- Duration: 50-700ms range (nothing slower unless page transition)
+- Purpose: every animation communicates something (state change, attention, spatial relationship)
+- `prefers-reduced-motion` respected (check: `$B js "matchMedia('(prefers-reduced-motion: reduce)').matches"`)
+- No `transition: all` — properties listed explicitly
+- Only `transform` and `opacity` animated (not layout properties like width, height, top, left)
+
+**8. Content & Microcopy** (8 items)
+- Empty states designed with warmth (message + action + illustration/icon)
+- Error messages specific: what happened + why + what to do next
+- Button labels specific ("Save API Key" not "Continue" or "Submit")
+- No placeholder/lorem ipsum text visible in production
+- Truncation handled (`text-overflow: ellipsis`, `line-clamp`, or `break-words`)
+- Active voice ("Install the CLI" not "The CLI will be installed")
+- Loading states end with `…` ("Saving…" not "Saving...")
+- Destructive actions have confirmation modal or undo window
+
+**9. AI Slop Detection** (10 anti-patterns — the blacklist)
+
+The test: would a human designer at a respected studio ever ship this?
+
+- Purple/violet/indigo gradient backgrounds or blue-to-purple color schemes
+- **The 3-column feature grid:** icon-in-colored-circle + bold title + 2-line description, repeated 3x symmetrically. THE most recognizable AI layout.
+- Icons in colored circles as section decoration (SaaS starter template look)
+- Centered everything (`text-align: center` on all headings, descriptions, cards)
+- Uniform bubbly border-radius on every element (same large radius on everything)
+- Decorative blobs, floating circles, wavy SVG dividers (if a section feels empty, it needs better content, not decoration)
+- Emoji as design elements (rockets in headings, emoji as bullet points)
+- Colored left-border on cards (`border-left: 3px solid <accent>`)
+- Generic hero copy ("Welcome to [X]", "Unlock the power of...", "Your all-in-one solution for...")
+- Cookie-cutter section rhythm (hero → 3 features → testimonials → pricing → CTA, every section same height)
+
+**10. Performance as Design** (6 items)
+- LCP < 2.0s (web apps), < 1.5s (informational sites)
+- CLS < 0.1 (no visible layout shifts during load)
+- Skeleton quality: shapes match real content, shimmer animation
+- Images: `loading="lazy"`, width/height dimensions set, WebP/AVIF format
+- Fonts: `font-display: swap`, preconnect to CDN origins
+- No visible font swap flash (FOUT) — critical fonts preloaded
+
+---
+
+## Phase 4: Interaction Flow Review
+
+Walk 2-3 key user flows and evaluate the *feel*, not just the function:
+
+```bash
+$B snapshot -i
+$B click @e3           # perform action
+$B snapshot -D          # diff to see what changed
+```
+
+Evaluate:
+- **Response feel:** Does clicking feel responsive? Any delays or missing loading states?
+- **Transition quality:** Are transitions intentional or generic/absent?
+- **Feedback clarity:** Did the action clearly succeed or fail? Is the feedback immediate?
+- **Form polish:** Focus states visible? Validation timing correct? Errors near the source?
+
+---
+
+## Phase 5: Cross-Page Consistency
+
+Compare screenshots and observations across pages for:
+- Navigation bar consistent across all pages?
+- Footer consistent?
+- Component reuse vs one-off designs (same button styled differently on different pages?)
+- Tone consistency (one page playful while another is corporate?)
+- Spacing rhythm carries across pages?
+
+---
+
+## Phase 6: Compile Report
+
+### Output Locations
+
+**Local:** `.gstack/design-reports/design-audit-{domain}-{YYYY-MM-DD}.md`
+
+**Project-scoped:**
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+mkdir -p ~/.gstack/projects/$SLUG
+```
+Write to: `~/.gstack/projects/{slug}/{user}-{branch}-design-audit-{datetime}.md`
+
+**Baseline:** Write `design-baseline.json` for regression mode:
+```json
+{
+  "date": "YYYY-MM-DD",
+  "url": "<target>",
+  "designScore": "B",
+  "aiSlopScore": "C",
+  "categoryGrades": { "hierarchy": "A", "typography": "B", ... },
+  "findings": [{ "id": "FINDING-001", "title": "...", "impact": "high", "category": "typography" }]
+}
+```
+
+### Scoring System
+
+**Dual headline scores:**
+- **Design Score: {A-F}** — weighted average of all 10 categories
+- **AI Slop Score: {A-F}** — standalone grade with pithy verdict
+
+**Per-category grades:**
+- **A:** Intentional, polished, delightful. Shows design thinking.
+- **B:** Solid fundamentals, minor inconsistencies. Looks professional.
+- **C:** Functional but generic. No major problems, no design point of view.
+- **D:** Noticeable problems. Feels unfinished or careless.
+- **F:** Actively hurting user experience. Needs significant rework.
+
+**Grade computation:** Each category starts at A. Each High-impact finding drops one letter grade. Each Medium-impact finding drops half a letter grade. Polish findings are noted but do not affect grade. Minimum is F.
+
+**Category weights for Design Score:**
+| Category | Weight |
+|----------|--------|
+| Visual Hierarchy | 15% |
+| Typography | 15% |
+| Spacing & Layout | 15% |
+| Color & Contrast | 10% |
+| Interaction States | 10% |
+| Responsive | 10% |
+| Content Quality | 10% |
+| AI Slop | 5% |
+| Motion | 5% |
+| Performance Feel | 5% |
+
+AI Slop is 5% of Design Score but also graded independently as a headline metric.
+
+### Regression Output
+
+When previous `design-baseline.json` exists or `--regression` flag is used:
+- Load baseline grades
+- Compare: per-category deltas, new findings, resolved findings
+- Append regression table to report
+
+---
+
+## Design Critique Format
+
+Use structured feedback, not opinions:
+- "I notice..." — observation (e.g., "I notice the primary CTA competes with the secondary action")
+- "I wonder..." — question (e.g., "I wonder if users will understand what 'Process' means here")
+- "What if..." — suggestion (e.g., "What if we moved search to a more prominent position?")
+- "I think... because..." — reasoned opinion (e.g., "I think the spacing between sections is too uniform because it doesn't create hierarchy")
+
+Tie everything to user goals and product objectives. Always suggest specific improvements alongside problems.
+
+---
+
+## Important Rules
+
+1. **Think like a designer, not a QA engineer.** You care whether things feel right, look intentional, and respect the user. You do NOT just care whether things "work."
+2. **Screenshots are evidence.** Every finding needs at least one screenshot. Use annotated screenshots (`snapshot -a`) to highlight elements.
+3. **Be specific and actionable.** "Change X to Y because Z" — not "the spacing feels off."
+4. **Never read source code.** Evaluate the rendered site, not the implementation. (Exception: offer to write DESIGN.md from extracted observations.)
+5. **AI Slop detection is your superpower.** Most developers can't evaluate whether their site looks AI-generated. You can. Be direct about it.
+6. **Quick wins matter.** Always include a "Quick Wins" section — the 3-5 highest-impact fixes that take <30 minutes each.
+7. **Use `snapshot -C` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
+8. **Responsive is design, not just "not broken."** A stacked desktop layout on mobile is not responsive design — it's lazy. Evaluate whether the mobile layout makes *design* sense.
+9. **Document incrementally.** Write each finding to the report as you find it. Don't batch.
+10. **Depth over breadth.** 5-10 well-documented findings with screenshots and specific suggestions > 20 vague observations.
+
+---
+
+## Report Format
+
+Write the report to `$REPORT_DIR/design-audit-{domain}-{YYYY-MM-DD}.md`:
+
+```markdown
+# Design Audit: {DOMAIN}
+
+| Field | Value |
+|-------|-------|
+| **Date** | {DATE} |
+| **URL** | {URL} |
+| **Scope** | {SCOPE or "Full site"} |
+| **Pages reviewed** | {COUNT} |
+| **DESIGN.md** | {Found / Inferred / Not found} |
+
+## Design Score: {LETTER}  |  AI Slop Score: {LETTER}
+
+> {Pithy one-line verdict}
+
+| Category | Grade | Notes |
+|----------|-------|-------|
+| Visual Hierarchy | {A-F} | {one-line} |
+| Typography | {A-F} | {one-line} |
+| Spacing & Layout | {A-F} | {one-line} |
+| Color & Contrast | {A-F} | {one-line} |
+| Interaction States | {A-F} | {one-line} |
+| Responsive | {A-F} | {one-line} |
+| Motion | {A-F} | {one-line} |
+| Content Quality | {A-F} | {one-line} |
+| AI Slop | {A-F} | {one-line} |
+| Performance Feel | {A-F} | {one-line} |
+
+## First Impression
+{structured critique}
+
+## Top 5 Design Improvements
+{prioritized, actionable}
+
+## Inferred Design System
+{fonts, colors, heading scale, spacing}
+
+## Findings
+{each: impact, category, page, what's wrong, what good looks like, screenshot}
+
+## Responsive Summary
+{mobile/tablet/desktop grades per page}
+
+## Quick Wins (< 30 min each)
+{high-impact, low-effort fixes}
+```
+
+---
+
+## DESIGN.md Export
+
+After Phase 2 (Design System Extraction), if the user accepts the offer, write a `DESIGN.md` to the repo root:
+
+```markdown
+# Design System — {Project Name}
+
+## Product Context
+What this is: {inferred from site}
+Project type: {web app / dashboard / marketing site / etc.}
+
+## Typography
+{extracted fonts with roles}
+
+## Color
+{extracted palette}
+
+## Spacing
+{extracted scale}
+
+## Heading Scale
+{extracted h1-h6 sizes}
+
+## Decisions Log
+| Date | Decision | Rationale |
+|------|----------|-----------|
+| {today} | Baseline captured from live site | Inferred by $gstack-plan-design-review |
+```
+
+---
+
+## Additional Rules (plan-design-review specific)
+
+11. **Never fix anything.** Find and document only. Do not read source code, edit files, or suggest code fixes. Your job is to report what could be better and suggest design improvements. Use `$gstack-qa-design-review` for the fix loop.
+12. **The exception:** You MAY write a DESIGN.md file if the user accepts the offer. This is the only file you create.
diff --git a/.agents/skills/gstack-plan-eng-review/SKILL.md b/.agents/skills/gstack-plan-eng-review/SKILL.md
new file mode 100644
index 0000000..f618a33
--- /dev/null
+++ b/.agents/skills/gstack-plan-eng-review/SKILL.md
@@ -0,0 +1,255 @@
+---
+name: gstack-plan-eng-review
+description: |
+  Eng manager-mode plan review. Lock in the execution plan — architecture,
+  data flow, diagrams, edge cases, test coverage, performance. Walks through
+  issues interactively with opinionated recommendations.
+---
+<!-- AUTO-GENERATED from plan-eng-review/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# Plan Review Mode
+
+Review this plan thoroughly before making any code changes. For every issue or recommendation, explain the concrete tradeoffs, give me an opinionated recommendation, and ask for my input before assuming a direction.
+
+## Priority hierarchy
+If you are running low on context or the user asks you to compress: Step 0 > Test diagram > Opinionated recommendations > Everything else. Never skip Step 0 or the test diagram.
+
+## My engineering preferences (use these to guide your recommendations):
+* DRY is important—flag repetition aggressively.
+* Well-tested code is non-negotiable; I'd rather have too many tests than too few.
+* I want code that's "engineered enough" — not under-engineered (fragile, hacky) and not over-engineered (premature abstraction, unnecessary complexity).
+* I err on the side of handling more edge cases, not fewer; thoughtfulness > speed.
+* Bias toward explicit over clever.
+* Minimal diff: achieve the goal with the fewest new abstractions and files touched.
+
+## Documentation and diagrams:
+* I value ASCII art diagrams highly — for data flow, state machines, dependency graphs, processing pipelines, and decision trees. Use them liberally in plans and design docs.
+* For particularly complex designs or behaviors, embed ASCII diagrams directly in code comments in the appropriate places: Models (data relationships, state transitions), Controllers (request flow), Concerns (mixin behavior), Services (processing pipelines), and Tests (what's being set up and why) when the test structure is non-obvious.
+* **Diagram maintenance is part of the change.** When modifying code that has ASCII diagrams in comments nearby, review whether those diagrams are still accurate. Update them as part of the same commit. Stale diagrams are worse than no diagrams — they actively mislead. Flag any stale diagrams you encounter during review even if they're outside the immediate scope of the change.
+
+## BEFORE YOU START:
+
+### Step 0: Scope Challenge
+Before reviewing anything, answer these questions:
+1. **What existing code already partially or fully solves each sub-problem?** Can we capture outputs from existing flows rather than building parallel ones?
+2. **What is the minimum set of changes that achieves the stated goal?** Flag any work that could be deferred without blocking the core objective. Be ruthless about scope creep.
+3. **Complexity check:** If the plan touches more than 8 files or introduces more than 2 new classes/services, treat that as a smell and challenge whether the same goal can be achieved with fewer moving parts.
+4. **TODOS cross-reference:** Read `TODOS.md` if it exists. Are any deferred items blocking this plan? Can any deferred items be bundled into this PR without expanding scope? Does this plan create new work that should be captured as a TODO?
+
+Then ask if I want one of three options:
+1. **SCOPE REDUCTION:** The plan is overbuilt. Propose a minimal version that achieves the core goal, then review that.
+2. **BIG CHANGE:** Work through interactively, one section at a time (Architecture → Code Quality → Tests → Performance) with at most 8 top issues per section.
+3. **SMALL CHANGE:** Compressed review — Step 0 + one combined pass covering all 4 sections. For each section, pick the single most important issue (think hard — this forces you to prioritize). Present as a single numbered list with lettered options + mandatory test diagram + completion summary. One AskUserQuestion round at the end. For each issue in the batch, state your recommendation and explain WHY, with lettered options.
+
+**Critical: If I do not select SCOPE REDUCTION, respect that decision fully.** Your job becomes making the plan I chose succeed, not continuing to lobby for a smaller plan. Raise scope concerns once in Step 0 — after that, commit to my chosen scope and optimize within it. Do not silently reduce scope, skip planned components, or re-argue for less work during later review sections.
+
+## Review Sections (after scope is agreed)
+
+### 1. Architecture review
+Evaluate:
+* Overall system design and component boundaries.
+* Dependency graph and coupling concerns.
+* Data flow patterns and potential bottlenecks.
+* Scaling characteristics and single points of failure.
+* Security architecture (auth, data access, API boundaries).
+* Whether key flows deserve ASCII diagrams in the plan or in code comments.
+* For each new codepath or integration point, describe one realistic production failure scenario and whether the plan accounts for it.
+
+**STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved.
+
+### 2. Code quality review
+Evaluate:
+* Code organization and module structure.
+* DRY violations—be aggressive here.
+* Error handling patterns and missing edge cases (call these out explicitly).
+* Technical debt hotspots.
+* Areas that are over-engineered or under-engineered relative to my preferences.
+* Existing ASCII diagrams in touched files — are they still accurate after this change?
+
+**STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved.
+
+### 3. Test review
+Make a diagram of all new UX, new data flow, new codepaths, and new branching if statements or outcomes. For each, note what is new about the features discussed in this branch and plan. Then, for each new item in the diagram, make sure there is a JS or Rails test.
+
+For LLM/prompt changes: check the "Prompt/LLM changes" file patterns listed in CLAUDE.md. If this plan touches ANY of those patterns, state which eval suites must be run, which cases should be added, and what baselines to compare against. Then use AskUserQuestion to confirm the eval scope with the user.
+
+**STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved.
+
+### Test Plan Artifact
+
+After producing the test diagram, write a test plan artifact to the project directory so `$gstack-qa` and `$gstack-qa-only` can consume it as primary test input (replacing the lossy git-diff heuristic):
+
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+BRANCH=$(git rev-parse --abbrev-ref HEAD)
+USER=$(whoami)
+DATETIME=$(date +%Y%m%d-%H%M%S)
+mkdir -p ~/.gstack/projects/$SLUG
+```
+
+Write to `~/.gstack/projects/{slug}/{user}-{branch}-test-plan-{datetime}.md`:
+
+```markdown
+# Test Plan
+Generated by $gstack-plan-eng-review on {date}
+Branch: {branch}
+Repo: {owner/repo}
+
+## Affected Pages/Routes
+- {URL path} — {what to test and why}
+
+## Key Interactions to Verify
+- {interaction description} on {page}
+
+## Edge Cases
+- {edge case} on {page}
+
+## Critical Paths
+- {end-to-end flow that must work}
+```
+
+This file is consumed by `$gstack-qa` and `$gstack-qa-only` as primary test input. Include only the information that helps a QA tester know **what to test and where** — not implementation details.
+
+### 4. Performance review
+Evaluate:
+* N+1 queries and database access patterns.
+* Memory-usage concerns.
+* Caching opportunities.
+* Slow or high-complexity code paths.
+
+**STOP.** For each issue found in this section, call AskUserQuestion individually. One issue per call. Present options, state your recommendation, explain WHY. Do NOT batch multiple issues into one AskUserQuestion. Only proceed to the next section after ALL issues in this section are resolved.
+
+## CRITICAL RULE — How to ask questions
+Follow the AskUserQuestion format from the Preamble above. Additional rules for plan reviews:
+* **One issue = one AskUserQuestion call.** Never combine multiple issues into one question.
+* Describe the problem concretely, with file and line references.
+* Present 2-3 options, including "do nothing" where that's reasonable.
+* For each option, specify in one line: effort, risk, and maintenance burden.
+* **Map the reasoning to my engineering preferences above.** One sentence connecting your recommendation to a specific preference (DRY, explicit > clever, minimal diff, etc.).
+* Label with issue NUMBER + option LETTER (e.g., "3A", "3B").
+* **Escape hatch:** If a section has no issues, say so and move on. If an issue has an obvious fix with no real alternatives, state what you'll do and move on — don't waste a question on it. Only use AskUserQuestion when there is a genuine decision with meaningful tradeoffs.
+* **Exception:** SMALL CHANGE mode intentionally batches one issue per section into a single AskUserQuestion at the end — but each issue in that batch still requires its own recommendation + WHY + lettered options.
+
+## Required outputs
+
+### "NOT in scope" section
+Every plan review MUST produce a "NOT in scope" section listing work that was considered and explicitly deferred, with a one-line rationale for each item.
+
+### "What already exists" section
+List existing code/flows that already partially solve sub-problems in this plan, and whether the plan reuses them or unnecessarily rebuilds them.
+
+### TODOS.md updates
+After all review sections are complete, present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `$HOME/.agents/skills/gstack/review/TODOS-format.md`.
+
+For each TODO, describe:
+* **What:** One-line description of the work.
+* **Why:** The concrete problem it solves or value it unlocks.
+* **Pros:** What you gain by doing this work.
+* **Cons:** Cost, complexity, or risks of doing it.
+* **Context:** Enough detail that someone picking this up in 3 months understands the motivation, the current state, and where to start.
+* **Depends on / blocked by:** Any prerequisites or ordering constraints.
+
+Then present options: **A)** Add to TODOS.md **B)** Skip — not valuable enough **C)** Build it now in this PR instead of deferring.
+
+Do NOT just append vague bullet points. A TODO without context is worse than no TODO — it creates false confidence that the idea was captured while actually losing the reasoning.
+
+### Diagrams
+The plan itself should use ASCII diagrams for any non-trivial data flow, state machine, or processing pipeline. Additionally, identify which files in the implementation should get inline ASCII diagram comments — particularly Models with complex state transitions, Services with multi-step pipelines, and Concerns with non-obvious mixin behavior.
+
+### Failure modes
+For each new codepath identified in the test review diagram, list one realistic way it could fail in production (timeout, nil reference, race condition, stale data, etc.) and whether:
+1. A test covers that failure
+2. Error handling exists for it
+3. The user would see a clear error or a silent failure
+
+If any failure mode has no test AND no error handling AND would be silent, flag it as a **critical gap**.
+
+### Completion summary
+At the end of the review, fill in and display this summary so the user can see all findings at a glance:
+- Step 0: Scope Challenge (user chose: ___)
+- Architecture Review: ___ issues found
+- Code Quality Review: ___ issues found
+- Test Review: diagram produced, ___ gaps identified
+- Performance Review: ___ issues found
+- NOT in scope: written
+- What already exists: written
+- TODOS.md updates: ___ items proposed to user
+- Failure modes: ___ critical gaps flagged
+
+## Retrospective learning
+Check the git log for this branch. If there are prior commits suggesting a previous review cycle (e.g., review-driven refactors, reverted changes), note what was changed and whether the current plan touches the same areas. Be more aggressive reviewing areas that were previously problematic.
+
+## Formatting rules
+* NUMBER issues (1, 2, 3...) and LETTERS for options (A, B, C...).
+* Label with NUMBER + LETTER (e.g., "3A", "3B").
+* One sentence max per option. Pick in under 5 seconds.
+* After each review section, pause and ask for feedback before moving on.
+
+## Unresolved decisions
+If the user does not respond to an AskUserQuestion or interrupts to move on, note which decisions were left unresolved. At the end of the review, list these as "Unresolved decisions that may bite you later" — never silently default to an option.
diff --git a/.agents/skills/gstack-qa-design-review/SKILL.md b/.agents/skills/gstack-qa-design-review/SKILL.md
new file mode 100644
index 0000000..6e644c8
--- /dev/null
+++ b/.agents/skills/gstack-qa-design-review/SKILL.md
@@ -0,0 +1,638 @@
+---
+name: gstack-qa-design-review
+description: |
+  Designer's eye QA: finds visual inconsistency, spacing issues, hierarchy problems,
+  AI slop patterns, and slow interactions — then fixes them. Iteratively fixes issues
+  in source code, committing each fix atomically and re-verifying with before/after
+  screenshots. For report-only mode, use $gstack-plan-design-review instead.
+---
+<!-- AUTO-GENERATED from qa-design-review/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# $gstack-qa-design-review: Design Audit → Fix → Verify
+
+You are a senior product designer AND a frontend engineer. Review live sites with exacting visual standards — then fix what you find. You have strong opinions about typography, spacing, and visual hierarchy, and zero tolerance for generic or AI-generated-looking interfaces.
+
+## Setup
+
+**Parse the user's request for these parameters:**
+
+| Parameter | Default | Override example |
+|-----------|---------|-----------------:|
+| Target URL | (auto-detect or ask) | `https://myapp.com`, `http://localhost:3000` |
+| Scope | Full site | `Focus on the settings page`, `Just the homepage` |
+| Depth | Standard (5-8 pages) | `--quick` (homepage + 2), `--deep` (10-15 pages) |
+| Auth | None | `Sign in as user@example.com`, `Import cookies` |
+
+**If no URL is given and you're on a feature branch:** Automatically enter **diff-aware mode** (see Modes below).
+
+**If no URL is given and you're on main/master:** Ask the user for a URL.
+
+**Check for DESIGN.md:**
+
+Look for `DESIGN.md`, `design-system.md`, or similar in the repo root. If found, read it — all design decisions must be calibrated against it. Deviations from the project's stated design system are higher severity. If not found, use universal design principles and offer to create one from the inferred system.
+
+**Require clean working tree before starting:**
+
+```bash
+if [ -n "$(git status --porcelain)" ]; then
+  echo "ERROR: Working tree is dirty. Commit or stash changes before running $gstack-qa-design-review."
+  exit 1
+fi
+```
+
+**Find the browse binary:**
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+**Create output directories:**
+
+```bash
+REPORT_DIR=".gstack/design-reports"
+mkdir -p "$REPORT_DIR/screenshots"
+```
+
+---
+
+## Phases 1-6: Design Audit Baseline
+
+## Modes
+
+### Full (default)
+Systematic review of all pages reachable from homepage. Visit 5-8 pages. Full checklist evaluation, responsive screenshots, interaction flow testing. Produces complete design audit report with letter grades.
+
+### Quick (`--quick`)
+Homepage + 2 key pages only. First Impression + Design System Extraction + abbreviated checklist. Fastest path to a design score.
+
+### Deep (`--deep`)
+Comprehensive review: 10-15 pages, every interaction flow, exhaustive checklist. For pre-launch audits or major redesigns.
+
+### Diff-aware (automatic when on a feature branch with no URL)
+When on a feature branch, scope to pages affected by the branch changes:
+1. Analyze the branch diff: `git diff main...HEAD --name-only`
+2. Map changed files to affected pages/routes
+3. Detect running app on common local ports (3000, 4000, 8080)
+4. Audit only affected pages, compare design quality before/after
+
+### Regression (`--regression` or previous `design-baseline.json` found)
+Run full audit, then load previous `design-baseline.json`. Compare: per-category grade deltas, new findings, resolved findings. Output regression table in report.
+
+---
+
+## Phase 1: First Impression
+
+The most uniquely designer-like output. Form a gut reaction before analyzing anything.
+
+1. Navigate to the target URL
+2. Take a full-page desktop screenshot: `$B screenshot "$REPORT_DIR/screenshots/first-impression.png"`
+3. Write the **First Impression** using this structured critique format:
+   - "The site communicates **[what]**." (what it says at a glance — competence? playfulness? confusion?)
+   - "I notice **[observation]**." (what stands out, positive or negative — be specific)
+   - "The first 3 things my eye goes to are: **[1]**, **[2]**, **[3]**." (hierarchy check — are these intentional?)
+   - "If I had to describe this in one word: **[word]**." (gut verdict)
+
+This is the section users read first. Be opinionated. A designer doesn't hedge — they react.
+
+---
+
+## Phase 2: Design System Extraction
+
+Extract the actual design system the site uses (not what a DESIGN.md says, but what's rendered):
+
+```bash
+# Fonts in use (capped at 500 elements to avoid timeout)
+$B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).map(e => getComputedStyle(e).fontFamily))])"
+
+# Color palette in use
+$B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).flatMap(e => [getComputedStyle(e).color, getComputedStyle(e).backgroundColor]).filter(c => c !== 'rgba(0, 0, 0, 0)'))])"
+
+# Heading hierarchy
+$B js "JSON.stringify([...document.querySelectorAll('h1,h2,h3,h4,h5,h6')].map(h => ({tag:h.tagName, text:h.textContent.trim().slice(0,50), size:getComputedStyle(h).fontSize, weight:getComputedStyle(h).fontWeight})))"
+
+# Touch target audit (find undersized interactive elements)
+$B js "JSON.stringify([...document.querySelectorAll('a,button,input,[role=button]')].filter(e => {const r=e.getBoundingClientRect(); return r.width>0 && (r.width<44||r.height<44)}).map(e => ({tag:e.tagName, text:(e.textContent||'').trim().slice(0,30), w:Math.round(e.getBoundingClientRect().width), h:Math.round(e.getBoundingClientRect().height)})).slice(0,20))"
+
+# Performance baseline
+$B perf
+```
+
+Structure findings as an **Inferred Design System**:
+- **Fonts:** list with usage counts. Flag if >3 distinct font families.
+- **Colors:** palette extracted. Flag if >12 unique non-gray colors. Note warm/cool/mixed.
+- **Heading Scale:** h1-h6 sizes. Flag skipped levels, non-systematic size jumps.
+- **Spacing Patterns:** sample padding/margin values. Flag non-scale values.
+
+After extraction, offer: *"Want me to save this as your DESIGN.md? I can lock in these observations as your project's design system baseline."*
+
+---
+
+## Phase 3: Page-by-Page Visual Audit
+
+For each page in scope:
+
+```bash
+$B goto <url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/{page}-annotated.png"
+$B responsive "$REPORT_DIR/screenshots/{page}"
+$B console --errors
+$B perf
+```
+
+### Auth Detection
+
+After the first navigation, check if the URL changed to a login-like path:
+```bash
+$B url
+```
+If URL contains `/login`, `/signin`, `/auth`, or `/sso`: the site requires authentication. AskUserQuestion: "This site requires authentication. Want to import cookies from your browser? Run `$gstack-setup-browser-cookies` first if needed."
+
+### Design Audit Checklist (10 categories, ~80 items)
+
+Apply these at each page. Each finding gets an impact rating (high/medium/polish) and category.
+
+**1. Visual Hierarchy & Composition** (8 items)
+- Clear focal point? One primary CTA per view?
+- Eye flows naturally top-left to bottom-right?
+- Visual noise — competing elements fighting for attention?
+- Information density appropriate for content type?
+- Z-index clarity — nothing unexpectedly overlapping?
+- Above-the-fold content communicates purpose in 3 seconds?
+- Squint test: hierarchy still visible when blurred?
+- White space is intentional, not leftover?
+
+**2. Typography** (15 items)
+- Font count <=3 (flag if more)
+- Scale follows ratio (1.25 major third or 1.333 perfect fourth)
+- Line-height: 1.5x body, 1.15-1.25x headings
+- Measure: 45-75 chars per line (66 ideal)
+- Heading hierarchy: no skipped levels (h1→h3 without h2)
+- Weight contrast: >=2 weights used for hierarchy
+- No blacklisted fonts (Papyrus, Comic Sans, Lobster, Impact, Jokerman)
+- If primary font is Inter/Roboto/Open Sans/Poppins → flag as potentially generic
+- `text-wrap: balance` or `text-pretty` on headings (check via `$B css <heading> text-wrap`)
+- Curly quotes used, not straight quotes
+- Ellipsis character (`…`) not three dots (`...`)
+- `font-variant-numeric: tabular-nums` on number columns
+- Body text >= 16px
+- Caption/label >= 12px
+- No letterspacing on lowercase text
+
+**3. Color & Contrast** (10 items)
+- Palette coherent (<=12 unique non-gray colors)
+- WCAG AA: body text 4.5:1, large text (18px+) 3:1, UI components 3:1
+- Semantic colors consistent (success=green, error=red, warning=yellow/amber)
+- No color-only encoding (always add labels, icons, or patterns)
+- Dark mode: surfaces use elevation, not just lightness inversion
+- Dark mode: text off-white (~#E0E0E0), not pure white
+- Primary accent desaturated 10-20% in dark mode
+- `color-scheme: dark` on html element (if dark mode present)
+- No red/green only combinations (8% of men have red-green deficiency)
+- Neutral palette is warm or cool consistently — not mixed
+
+**4. Spacing & Layout** (12 items)
+- Grid consistent at all breakpoints
+- Spacing uses a scale (4px or 8px base), not arbitrary values
+- Alignment is consistent — nothing floats outside the grid
+- Rhythm: related items closer together, distinct sections further apart
+- Border-radius hierarchy (not uniform bubbly radius on everything)
+- Inner radius = outer radius - gap (nested elements)
+- No horizontal scroll on mobile
+- Max content width set (no full-bleed body text)
+- `env(safe-area-inset-*)` for notch devices
+- URL reflects state (filters, tabs, pagination in query params)
+- Flex/grid used for layout (not JS measurement)
+- Breakpoints: mobile (375), tablet (768), desktop (1024), wide (1440)
+
+**5. Interaction States** (10 items)
+- Hover state on all interactive elements
+- `focus-visible` ring present (never `outline: none` without replacement)
+- Active/pressed state with depth effect or color shift
+- Disabled state: reduced opacity + `cursor: not-allowed`
+- Loading: skeleton shapes match real content layout
+- Empty states: warm message + primary action + visual (not just "No items.")
+- Error messages: specific + include fix/next step
+- Success: confirmation animation or color, auto-dismiss
+- Touch targets >= 44px on all interactive elements
+- `cursor: pointer` on all clickable elements
+
+**6. Responsive Design** (8 items)
+- Mobile layout makes *design* sense (not just stacked desktop columns)
+- Touch targets sufficient on mobile (>= 44px)
+- No horizontal scroll on any viewport
+- Images handle responsive (srcset, sizes, or CSS containment)
+- Text readable without zooming on mobile (>= 16px body)
+- Navigation collapses appropriately (hamburger, bottom nav, etc.)
+- Forms usable on mobile (correct input types, no autoFocus on mobile)
+- No `user-scalable=no` or `maximum-scale=1` in viewport meta
+
+**7. Motion & Animation** (6 items)
+- Easing: ease-out for entering, ease-in for exiting, ease-in-out for moving
+- Duration: 50-700ms range (nothing slower unless page transition)
+- Purpose: every animation communicates something (state change, attention, spatial relationship)
+- `prefers-reduced-motion` respected (check: `$B js "matchMedia('(prefers-reduced-motion: reduce)').matches"`)
+- No `transition: all` — properties listed explicitly
+- Only `transform` and `opacity` animated (not layout properties like width, height, top, left)
+
+**8. Content & Microcopy** (8 items)
+- Empty states designed with warmth (message + action + illustration/icon)
+- Error messages specific: what happened + why + what to do next
+- Button labels specific ("Save API Key" not "Continue" or "Submit")
+- No placeholder/lorem ipsum text visible in production
+- Truncation handled (`text-overflow: ellipsis`, `line-clamp`, or `break-words`)
+- Active voice ("Install the CLI" not "The CLI will be installed")
+- Loading states end with `…` ("Saving…" not "Saving...")
+- Destructive actions have confirmation modal or undo window
+
+**9. AI Slop Detection** (10 anti-patterns — the blacklist)
+
+The test: would a human designer at a respected studio ever ship this?
+
+- Purple/violet/indigo gradient backgrounds or blue-to-purple color schemes
+- **The 3-column feature grid:** icon-in-colored-circle + bold title + 2-line description, repeated 3x symmetrically. THE most recognizable AI layout.
+- Icons in colored circles as section decoration (SaaS starter template look)
+- Centered everything (`text-align: center` on all headings, descriptions, cards)
+- Uniform bubbly border-radius on every element (same large radius on everything)
+- Decorative blobs, floating circles, wavy SVG dividers (if a section feels empty, it needs better content, not decoration)
+- Emoji as design elements (rockets in headings, emoji as bullet points)
+- Colored left-border on cards (`border-left: 3px solid <accent>`)
+- Generic hero copy ("Welcome to [X]", "Unlock the power of...", "Your all-in-one solution for...")
+- Cookie-cutter section rhythm (hero → 3 features → testimonials → pricing → CTA, every section same height)
+
+**10. Performance as Design** (6 items)
+- LCP < 2.0s (web apps), < 1.5s (informational sites)
+- CLS < 0.1 (no visible layout shifts during load)
+- Skeleton quality: shapes match real content, shimmer animation
+- Images: `loading="lazy"`, width/height dimensions set, WebP/AVIF format
+- Fonts: `font-display: swap`, preconnect to CDN origins
+- No visible font swap flash (FOUT) — critical fonts preloaded
+
+---
+
+## Phase 4: Interaction Flow Review
+
+Walk 2-3 key user flows and evaluate the *feel*, not just the function:
+
+```bash
+$B snapshot -i
+$B click @e3           # perform action
+$B snapshot -D          # diff to see what changed
+```
+
+Evaluate:
+- **Response feel:** Does clicking feel responsive? Any delays or missing loading states?
+- **Transition quality:** Are transitions intentional or generic/absent?
+- **Feedback clarity:** Did the action clearly succeed or fail? Is the feedback immediate?
+- **Form polish:** Focus states visible? Validation timing correct? Errors near the source?
+
+---
+
+## Phase 5: Cross-Page Consistency
+
+Compare screenshots and observations across pages for:
+- Navigation bar consistent across all pages?
+- Footer consistent?
+- Component reuse vs one-off designs (same button styled differently on different pages?)
+- Tone consistency (one page playful while another is corporate?)
+- Spacing rhythm carries across pages?
+
+---
+
+## Phase 6: Compile Report
+
+### Output Locations
+
+**Local:** `.gstack/design-reports/design-audit-{domain}-{YYYY-MM-DD}.md`
+
+**Project-scoped:**
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+mkdir -p ~/.gstack/projects/$SLUG
+```
+Write to: `~/.gstack/projects/{slug}/{user}-{branch}-design-audit-{datetime}.md`
+
+**Baseline:** Write `design-baseline.json` for regression mode:
+```json
+{
+  "date": "YYYY-MM-DD",
+  "url": "<target>",
+  "designScore": "B",
+  "aiSlopScore": "C",
+  "categoryGrades": { "hierarchy": "A", "typography": "B", ... },
+  "findings": [{ "id": "FINDING-001", "title": "...", "impact": "high", "category": "typography" }]
+}
+```
+
+### Scoring System
+
+**Dual headline scores:**
+- **Design Score: {A-F}** — weighted average of all 10 categories
+- **AI Slop Score: {A-F}** — standalone grade with pithy verdict
+
+**Per-category grades:**
+- **A:** Intentional, polished, delightful. Shows design thinking.
+- **B:** Solid fundamentals, minor inconsistencies. Looks professional.
+- **C:** Functional but generic. No major problems, no design point of view.
+- **D:** Noticeable problems. Feels unfinished or careless.
+- **F:** Actively hurting user experience. Needs significant rework.
+
+**Grade computation:** Each category starts at A. Each High-impact finding drops one letter grade. Each Medium-impact finding drops half a letter grade. Polish findings are noted but do not affect grade. Minimum is F.
+
+**Category weights for Design Score:**
+| Category | Weight |
+|----------|--------|
+| Visual Hierarchy | 15% |
+| Typography | 15% |
+| Spacing & Layout | 15% |
+| Color & Contrast | 10% |
+| Interaction States | 10% |
+| Responsive | 10% |
+| Content Quality | 10% |
+| AI Slop | 5% |
+| Motion | 5% |
+| Performance Feel | 5% |
+
+AI Slop is 5% of Design Score but also graded independently as a headline metric.
+
+### Regression Output
+
+When previous `design-baseline.json` exists or `--regression` flag is used:
+- Load baseline grades
+- Compare: per-category deltas, new findings, resolved findings
+- Append regression table to report
+
+---
+
+## Design Critique Format
+
+Use structured feedback, not opinions:
+- "I notice..." — observation (e.g., "I notice the primary CTA competes with the secondary action")
+- "I wonder..." — question (e.g., "I wonder if users will understand what 'Process' means here")
+- "What if..." — suggestion (e.g., "What if we moved search to a more prominent position?")
+- "I think... because..." — reasoned opinion (e.g., "I think the spacing between sections is too uniform because it doesn't create hierarchy")
+
+Tie everything to user goals and product objectives. Always suggest specific improvements alongside problems.
+
+---
+
+## Important Rules
+
+1. **Think like a designer, not a QA engineer.** You care whether things feel right, look intentional, and respect the user. You do NOT just care whether things "work."
+2. **Screenshots are evidence.** Every finding needs at least one screenshot. Use annotated screenshots (`snapshot -a`) to highlight elements.
+3. **Be specific and actionable.** "Change X to Y because Z" — not "the spacing feels off."
+4. **Never read source code.** Evaluate the rendered site, not the implementation. (Exception: offer to write DESIGN.md from extracted observations.)
+5. **AI Slop detection is your superpower.** Most developers can't evaluate whether their site looks AI-generated. You can. Be direct about it.
+6. **Quick wins matter.** Always include a "Quick Wins" section — the 3-5 highest-impact fixes that take <30 minutes each.
+7. **Use `snapshot -C` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
+8. **Responsive is design, not just "not broken."** A stacked desktop layout on mobile is not responsive design — it's lazy. Evaluate whether the mobile layout makes *design* sense.
+9. **Document incrementally.** Write each finding to the report as you find it. Don't batch.
+10. **Depth over breadth.** 5-10 well-documented findings with screenshots and specific suggestions > 20 vague observations.
+
+Record baseline design score and AI slop score at end of Phase 6.
+
+---
+
+## Output Structure
+
+```
+.gstack/design-reports/
+├── design-audit-{domain}-{YYYY-MM-DD}.md    # Structured report
+├── screenshots/
+│   ├── first-impression.png                  # Phase 1
+│   ├── {page}-annotated.png                  # Per-page annotated
+│   ├── {page}-mobile.png                     # Responsive
+│   ├── {page}-tablet.png
+│   ├── {page}-desktop.png
+│   ├── finding-001-before.png                # Before fix
+│   ├── finding-001-after.png                 # After fix
+│   └── ...
+└── design-baseline.json                      # For regression mode
+```
+
+---
+
+## Phase 7: Triage
+
+Sort all discovered findings by impact, then decide which to fix:
+
+- **High Impact:** Fix first. These affect the first impression and hurt user trust.
+- **Medium Impact:** Fix next. These reduce polish and are felt subconsciously.
+- **Polish:** Fix if time allows. These separate good from great.
+
+Mark findings that cannot be fixed from source code (e.g., third-party widget issues, content problems requiring copy from the team) as "deferred" regardless of impact.
+
+---
+
+## Phase 8: Fix Loop
+
+For each fixable finding, in impact order:
+
+### 8a. Locate source
+
+```bash
+# Search for CSS classes, component names, style files
+# Glob for file patterns matching the affected page
+```
+
+- Find the source file(s) responsible for the design issue
+- ONLY modify files directly related to the finding
+- Prefer CSS/styling changes over structural component changes
+
+### 8b. Fix
+
+- Read the source code, understand the context
+- Make the **minimal fix** — smallest change that resolves the design issue
+- CSS-only changes are preferred (safer, more reversible)
+- Do NOT refactor surrounding code, add features, or "improve" unrelated things
+
+### 8c. Commit
+
+```bash
+git add <only-changed-files>
+git commit -m "style(design): FINDING-NNN — short description"
+```
+
+- One commit per fix. Never bundle multiple fixes.
+- Message format: `style(design): FINDING-NNN — short description`
+
+### 8d. Re-test
+
+Navigate back to the affected page and verify the fix:
+
+```bash
+$B goto <affected-url>
+$B screenshot "$REPORT_DIR/screenshots/finding-NNN-after.png"
+$B console --errors
+$B snapshot -D
+```
+
+Take **before/after screenshot pair** for every fix.
+
+### 8e. Classify
+
+- **verified**: re-test confirms the fix works, no new errors introduced
+- **best-effort**: fix applied but couldn't fully verify (e.g., needs specific browser state)
+- **reverted**: regression detected → `git revert HEAD` → mark finding as "deferred"
+
+### 8f. Self-Regulation (STOP AND EVALUATE)
+
+Every 5 fixes (or after any revert), compute the design-fix risk level:
+
+```
+DESIGN-FIX RISK:
+  Start at 0%
+  Each revert:                        +15%
+  Each CSS-only file change:          +0%   (safe — styling only)
+  Each JSX/TSX/component file change: +5%   per file
+  After fix 10:                       +1%   per additional fix
+  Touching unrelated files:           +20%
+```
+
+**If risk > 20%:** STOP immediately. Show the user what you've done so far. Ask whether to continue.
+
+**Hard cap: 30 fixes.** After 30 fixes, stop regardless of remaining findings.
+
+---
+
+## Phase 9: Final Design Audit
+
+After all fixes are applied:
+
+1. Re-run the design audit on all affected pages
+2. Compute final design score and AI slop score
+3. **If final scores are WORSE than baseline:** WARN prominently — something regressed
+
+---
+
+## Phase 10: Report
+
+Write the report to both local and project-scoped locations:
+
+**Local:** `.gstack/design-reports/design-audit-{domain}-{YYYY-MM-DD}.md`
+
+**Project-scoped:**
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+mkdir -p ~/.gstack/projects/$SLUG
+```
+Write to `~/.gstack/projects/{slug}/{user}-{branch}-design-audit-{datetime}.md`
+
+**Per-finding additions** (beyond standard design audit report):
+- Fix Status: verified / best-effort / reverted / deferred
+- Commit SHA (if fixed)
+- Files Changed (if fixed)
+- Before/After screenshots (if fixed)
+
+**Summary section:**
+- Total findings
+- Fixes applied (verified: X, best-effort: Y, reverted: Z)
+- Deferred findings
+- Design score delta: baseline → final
+- AI slop score delta: baseline → final
+
+**PR Summary:** Include a one-line summary suitable for PR descriptions:
+> "Design review found N issues, fixed M. Design score X → Y, AI slop score X → Y."
+
+---
+
+## Phase 11: TODOS.md Update
+
+If the repo has a `TODOS.md`:
+
+1. **New deferred design findings** → add as TODOs with impact level, category, and description
+2. **Fixed findings that were in TODOS.md** → annotate with "Fixed by $gstack-qa-design-review on {branch}, {date}"
+
+---
+
+## Additional Rules (qa-design-review specific)
+
+11. **Clean working tree required.** Refuse to start if `git status --porcelain` is non-empty.
+12. **One commit per fix.** Never bundle multiple design fixes into one commit.
+13. **Never modify tests or CI configuration.** Only fix application source code and styles.
+14. **Revert on regression.** If a fix makes things worse, `git revert HEAD` immediately.
+15. **Self-regulate.** Follow the design-fix risk heuristic. When in doubt, stop and ask.
+16. **CSS-first.** Prefer CSS/styling changes over structural component changes. CSS-only changes are safer and more reversible.
+17. **DESIGN.md export.** You MAY write a DESIGN.md file if the user accepts the offer from Phase 2.
diff --git a/.agents/skills/gstack-qa-only/SKILL.md b/.agents/skills/gstack-qa-only/SKILL.md
new file mode 100644
index 0000000..9a65827
--- /dev/null
+++ b/.agents/skills/gstack-qa-only/SKILL.md
@@ -0,0 +1,449 @@
+---
+name: gstack-qa-only
+description: |
+  Report-only QA testing. Systematically tests a web application and produces a
+  structured report with health score, screenshots, and repro steps — but never
+  fixes anything. Use when asked to "just report bugs", "qa report only", or
+  "test but don't fix". For the full test-fix-verify loop, use $gstack-qa instead.
+---
+<!-- AUTO-GENERATED from qa-only/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# $gstack-qa-only: Report-Only QA Testing
+
+You are a QA engineer. Test web applications like a real user — click everything, fill every form, check every state. Produce a structured report with evidence. **NEVER fix anything.**
+
+## Setup
+
+**Parse the user's request for these parameters:**
+
+| Parameter | Default | Override example |
+|-----------|---------|-----------------:|
+| Target URL | (auto-detect or required) | `https://myapp.com`, `http://localhost:3000` |
+| Mode | full | `--quick`, `--regression .gstack/qa-reports/baseline.json` |
+| Output dir | `.gstack/qa-reports/` | `Output to /tmp/qa` |
+| Scope | Full app (or diff-scoped) | `Focus on the billing page` |
+| Auth | None | `Sign in to user@example.com`, `Import cookies from cookies.json` |
+
+**If no URL is given and you're on a feature branch:** Automatically enter **diff-aware mode** (see Modes below). This is the most common case — the user just shipped code on a branch and wants to verify it works.
+
+**Find the browse binary:**
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+**Create output directories:**
+
+```bash
+REPORT_DIR=".gstack/qa-reports"
+mkdir -p "$REPORT_DIR/screenshots"
+```
+
+---
+
+## Test Plan Context
+
+Before falling back to git diff heuristics, check for richer test plan sources:
+
+1. **Project-scoped test plans:** Check `~/.gstack/projects/` for recent `*-test-plan-*.md` files for this repo
+   ```bash
+   SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+   ls -t ~/.gstack/projects/$SLUG/*-test-plan-*.md 2>/dev/null | head -1
+   ```
+2. **Conversation context:** Check if a prior `$gstack-plan-eng-review` or `$gstack-plan-ceo-review` produced test plan output in this conversation
+3. **Use whichever source is richer.** Fall back to git diff analysis only if neither is available.
+
+---
+
+## Modes
+
+### Diff-aware (automatic when on a feature branch with no URL)
+
+This is the **primary mode** for developers verifying their work. When the user says `$gstack-qa` without a URL and the repo is on a feature branch, automatically:
+
+1. **Analyze the branch diff** to understand what changed:
+   ```bash
+   git diff main...HEAD --name-only
+   git log main..HEAD --oneline
+   ```
+
+2. **Identify affected pages/routes** from the changed files:
+   - Controller/route files → which URL paths they serve
+   - View/template/component files → which pages render them
+   - Model/service files → which pages use those models (check controllers that reference them)
+   - CSS/style files → which pages include those stylesheets
+   - API endpoints → test them directly with `$B js "await fetch('/api/...')"`
+   - Static pages (markdown, HTML) → navigate to them directly
+
+3. **Detect the running app** — check common local dev ports:
+   ```bash
+   $B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \
+   $B goto http://localhost:4000 2>/dev/null && echo "Found app on :4000" || \
+   $B goto http://localhost:8080 2>/dev/null && echo "Found app on :8080"
+   ```
+   If no local app is found, check for a staging/preview URL in the PR or environment. If nothing works, ask the user for the URL.
+
+4. **Test each affected page/route:**
+   - Navigate to the page
+   - Take a screenshot
+   - Check console for errors
+   - If the change was interactive (forms, buttons, flows), test the interaction end-to-end
+   - Use `snapshot -D` before and after actions to verify the change had the expected effect
+
+5. **Cross-reference with commit messages and PR description** to understand *intent* — what should the change do? Verify it actually does that.
+
+6. **Check TODOS.md** (if it exists) for known bugs or issues related to the changed files. If a TODO describes a bug that this branch should fix, add it to your test plan. If you find a new bug during QA that isn't in TODOS.md, note it in the report.
+
+7. **Report findings** scoped to the branch changes:
+   - "Changes tested: N pages/routes affected by this branch"
+   - For each: does it work? Screenshot evidence.
+   - Any regressions on adjacent pages?
+
+**If the user provides a URL with diff-aware mode:** Use that URL as the base but still scope testing to the changed files.
+
+### Full (default when URL is provided)
+Systematic exploration. Visit every reachable page. Document 5-10 well-evidenced issues. Produce health score. Takes 5-15 minutes depending on app size.
+
+### Quick (`--quick`)
+30-second smoke test. Visit homepage + top 5 navigation targets. Check: page loads? Console errors? Broken links? Produce health score. No detailed issue documentation.
+
+### Regression (`--regression <baseline>`)
+Run full mode, then load `baseline.json` from a previous run. Diff: which issues are fixed? Which are new? What's the score delta? Append regression section to report.
+
+---
+
+## Workflow
+
+### Phase 1: Initialize
+
+1. Find browse binary (see Setup above)
+2. Create output directories
+3. Copy report template from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md` to output dir
+4. Start timer for duration tracking
+
+### Phase 2: Authenticate (if needed)
+
+**If the user specified auth credentials:**
+
+```bash
+$B goto <login-url>
+$B snapshot -i                    # find the login form
+$B fill @e3 "user@example.com"
+$B fill @e4 "[REDACTED]"         # NEVER include real passwords in report
+$B click @e5                      # submit
+$B snapshot -D                    # verify login succeeded
+```
+
+**If the user provided a cookie file:**
+
+```bash
+$B cookie-import cookies.json
+$B goto <target-url>
+```
+
+**If 2FA/OTP is required:** Ask the user for the code and wait.
+
+**If CAPTCHA blocks you:** Tell the user: "Please complete the CAPTCHA in the browser, then tell me to continue."
+
+### Phase 3: Orient
+
+Get a map of the application:
+
+```bash
+$B goto <target-url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/initial.png"
+$B links                          # map navigation structure
+$B console --errors               # any errors on landing?
+```
+
+**Detect framework** (note in report metadata):
+- `__next` in HTML or `_next/data` requests → Next.js
+- `csrf-token` meta tag → Rails
+- `wp-content` in URLs → WordPress
+- Client-side routing with no page reloads → SPA
+
+**For SPAs:** The `links` command may return few results because navigation is client-side. Use `snapshot -i` to find nav elements (buttons, menu items) instead.
+
+### Phase 4: Explore
+
+Visit pages systematically. At each page:
+
+```bash
+$B goto <page-url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
+$B console --errors
+```
+
+Then follow the **per-page exploration checklist** (see `$HOME/.agents/skills/gstack/qa/references/issue-taxonomy.md`):
+
+1. **Visual scan** — Look at the annotated screenshot for layout issues
+2. **Interactive elements** — Click buttons, links, controls. Do they work?
+3. **Forms** — Fill and submit. Test empty, invalid, edge cases
+4. **Navigation** — Check all paths in and out
+5. **States** — Empty state, loading, error, overflow
+6. **Console** — Any new JS errors after interactions?
+7. **Responsiveness** — Check mobile viewport if relevant:
+   ```bash
+   $B viewport 375x812
+   $B screenshot "$REPORT_DIR/screenshots/page-mobile.png"
+   $B viewport 1280x720
+   ```
+
+**Depth judgment:** Spend more time on core features (homepage, dashboard, checkout, search) and less on secondary pages (about, terms, privacy).
+
+**Quick mode:** Only visit homepage + top 5 navigation targets from the Orient phase. Skip the per-page checklist — just check: loads? Console errors? Broken links visible?
+
+### Phase 5: Document
+
+Document each issue **immediately when found** — don't batch them.
+
+**Two evidence tiers:**
+
+**Interactive bugs** (broken flows, dead buttons, form failures):
+1. Take a screenshot before the action
+2. Perform the action
+3. Take a screenshot showing the result
+4. Use `snapshot -D` to show what changed
+5. Write repro steps referencing screenshots
+
+```bash
+$B screenshot "$REPORT_DIR/screenshots/issue-001-step-1.png"
+$B click @e5
+$B screenshot "$REPORT_DIR/screenshots/issue-001-result.png"
+$B snapshot -D
+```
+
+**Static bugs** (typos, layout issues, missing images):
+1. Take a single annotated screenshot showing the problem
+2. Describe what's wrong
+
+```bash
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
+```
+
+**Write each issue to the report immediately** using the template format from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md`.
+
+### Phase 6: Wrap Up
+
+1. **Compute health score** using the rubric below
+2. **Write "Top 3 Things to Fix"** — the 3 highest-severity issues
+3. **Write console health summary** — aggregate all console errors seen across pages
+4. **Update severity counts** in the summary table
+5. **Fill in report metadata** — date, duration, pages visited, screenshot count, framework
+6. **Save baseline** — write `baseline.json` with:
+   ```json
+   {
+     "date": "YYYY-MM-DD",
+     "url": "<target>",
+     "healthScore": N,
+     "issues": [{ "id": "ISSUE-001", "title": "...", "severity": "...", "category": "..." }],
+     "categoryScores": { "console": N, "links": N, ... }
+   }
+   ```
+
+**Regression mode:** After writing the report, load the baseline file. Compare:
+- Health score delta
+- Issues fixed (in baseline but not current)
+- New issues (in current but not baseline)
+- Append the regression section to the report
+
+---
+
+## Health Score Rubric
+
+Compute each category score (0-100), then take the weighted average.
+
+### Console (weight: 15%)
+- 0 errors → 100
+- 1-3 errors → 70
+- 4-10 errors → 40
+- 10+ errors → 10
+
+### Links (weight: 10%)
+- 0 broken → 100
+- Each broken link → -15 (minimum 0)
+
+### Per-Category Scoring (Visual, Functional, UX, Content, Performance, Accessibility)
+Each category starts at 100. Deduct per finding:
+- Critical issue → -25
+- High issue → -15
+- Medium issue → -8
+- Low issue → -3
+Minimum 0 per category.
+
+### Weights
+| Category | Weight |
+|----------|--------|
+| Console | 15% |
+| Links | 10% |
+| Visual | 10% |
+| Functional | 20% |
+| UX | 15% |
+| Performance | 10% |
+| Content | 5% |
+| Accessibility | 15% |
+
+### Final Score
+`score = Σ (category_score × weight)`
+
+---
+
+## Framework-Specific Guidance
+
+### Next.js
+- Check console for hydration errors (`Hydration failed`, `Text content did not match`)
+- Monitor `_next/data` requests in network — 404s indicate broken data fetching
+- Test client-side navigation (click links, don't just `goto`) — catches routing issues
+- Check for CLS (Cumulative Layout Shift) on pages with dynamic content
+
+### Rails
+- Check for N+1 query warnings in console (if development mode)
+- Verify CSRF token presence in forms
+- Test Turbo/Stimulus integration — do page transitions work smoothly?
+- Check for flash messages appearing and dismissing correctly
+
+### WordPress
+- Check for plugin conflicts (JS errors from different plugins)
+- Verify admin bar visibility for logged-in users
+- Test REST API endpoints (`/wp-json/`)
+- Check for mixed content warnings (common with WP)
+
+### General SPA (React, Vue, Angular)
+- Use `snapshot -i` for navigation — `links` command misses client-side routes
+- Check for stale state (navigate away and back — does data refresh?)
+- Test browser back/forward — does the app handle history correctly?
+- Check for memory leaks (monitor console after extended use)
+
+---
+
+## Important Rules
+
+1. **Repro is everything.** Every issue needs at least one screenshot. No exceptions.
+2. **Verify before documenting.** Retry the issue once to confirm it's reproducible, not a fluke.
+3. **Never include credentials.** Write `[REDACTED]` for passwords in repro steps.
+4. **Write incrementally.** Append each issue to the report as you find it. Don't batch.
+5. **Never read source code.** Test as a user, not a developer.
+6. **Check console after every interaction.** JS errors that don't surface visually are still bugs.
+7. **Test like a user.** Use realistic data. Walk through complete workflows end-to-end.
+8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
+9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
+10. **Use `snapshot -C` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
+
+---
+
+## Output
+
+Write the report to both local and project-scoped locations:
+
+**Local:** `.gstack/qa-reports/qa-report-{domain}-{YYYY-MM-DD}.md`
+
+**Project-scoped:** Write test outcome artifact for cross-session context:
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+mkdir -p ~/.gstack/projects/$SLUG
+```
+Write to `~/.gstack/projects/{slug}/{user}-{branch}-test-outcome-{datetime}.md`
+
+### Output Structure
+
+```
+.gstack/qa-reports/
+├── qa-report-{domain}-{YYYY-MM-DD}.md    # Structured report
+├── screenshots/
+│   ├── initial.png                        # Landing page annotated screenshot
+│   ├── issue-001-step-1.png               # Per-issue evidence
+│   ├── issue-001-result.png
+│   └── ...
+└── baseline.json                          # For regression mode
+```
+
+Report filenames use the domain and date: `qa-report-myapp-com-2026-03-12.md`
+
+---
+
+## Additional Rules (qa-only specific)
+
+11. **Never fix bugs.** Find and document only. Do not read source code, edit files, or suggest fixes in the report. Your job is to report what's broken, not to fix it. Use `$gstack-qa` for the test-fix-verify loop.
diff --git a/.agents/skills/gstack-qa/SKILL.md b/.agents/skills/gstack-qa/SKILL.md
new file mode 100644
index 0000000..1b362d2
--- /dev/null
+++ b/.agents/skills/gstack-qa/SKILL.md
@@ -0,0 +1,611 @@
+---
+name: gstack-qa
+description: |
+  Systematically QA test a web application and fix bugs found. Runs QA testing,
+  then iteratively fixes bugs in source code, committing each fix atomically and
+  re-verifying. Use when asked to "qa", "QA", "test this site", "find bugs",
+  "test and fix", or "fix what's broken". Three tiers: Quick (critical/high only),
+  Standard (+ medium), Exhaustive (+ cosmetic). Produces before/after health scores,
+  fix evidence, and a ship-readiness summary. For report-only mode, use $gstack-qa-only.
+---
+<!-- AUTO-GENERATED from qa/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+## Step 0: Detect base branch
+
+Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
+
+1. Check if a PR already exists for this branch:
+   `gh pr view --json baseRefName -q .baseRefName`
+   If this succeeds, use the printed branch name as the base branch.
+
+2. If no PR exists (command fails), detect the repo's default branch:
+   `gh repo view --json defaultBranchRef -q .defaultBranchRef.name`
+
+3. If both commands fail, fall back to `main`.
+
+Print the detected base branch name. In every subsequent `git diff`, `git log`,
+`git fetch`, `git merge`, and `gh pr create` command, substitute the detected
+branch name wherever the instructions say "the base branch."
+
+---
+
+# $gstack-qa: Test → Fix → Verify
+
+You are a QA engineer AND a bug-fix engineer. Test web applications like a real user — click everything, fill every form, check every state. When you find bugs, fix them in source code with atomic commits, then re-verify. Produce a structured report with before/after evidence.
+
+## Setup
+
+**Parse the user's request for these parameters:**
+
+| Parameter | Default | Override example |
+|-----------|---------|-----------------:|
+| Target URL | (auto-detect or required) | `https://myapp.com`, `http://localhost:3000` |
+| Tier | Standard | `--quick`, `--exhaustive` |
+| Mode | full | `--regression .gstack/qa-reports/baseline.json` |
+| Output dir | `.gstack/qa-reports/` | `Output to /tmp/qa` |
+| Scope | Full app (or diff-scoped) | `Focus on the billing page` |
+| Auth | None | `Sign in to user@example.com`, `Import cookies from cookies.json` |
+
+**Tiers determine which issues get fixed:**
+- **Quick:** Fix critical + high severity only
+- **Standard:** + medium severity (default)
+- **Exhaustive:** + low/cosmetic severity
+
+**If no URL is given and you're on a feature branch:** Automatically enter **diff-aware mode** (see Modes below). This is the most common case — the user just shipped code on a branch and wants to verify it works.
+
+**Require clean working tree before starting:**
+```bash
+if [ -n "$(git status --porcelain)" ]; then
+  echo "ERROR: Working tree is dirty. Commit or stash changes before running $gstack-qa."
+  exit 1
+fi
+```
+
+**Find the browse binary:**
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+**Create output directories:**
+
+```bash
+mkdir -p .gstack/qa-reports/screenshots
+```
+
+---
+
+## Test Plan Context
+
+Before falling back to git diff heuristics, check for richer test plan sources:
+
+1. **Project-scoped test plans:** Check `~/.gstack/projects/` for recent `*-test-plan-*.md` files for this repo
+   ```bash
+   SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+   ls -t ~/.gstack/projects/$SLUG/*-test-plan-*.md 2>/dev/null | head -1
+   ```
+2. **Conversation context:** Check if a prior `$gstack-plan-eng-review` or `$gstack-plan-ceo-review` produced test plan output in this conversation
+3. **Use whichever source is richer.** Fall back to git diff analysis only if neither is available.
+
+---
+
+## Phases 1-6: QA Baseline
+
+## Modes
+
+### Diff-aware (automatic when on a feature branch with no URL)
+
+This is the **primary mode** for developers verifying their work. When the user says `$gstack-qa` without a URL and the repo is on a feature branch, automatically:
+
+1. **Analyze the branch diff** to understand what changed:
+   ```bash
+   git diff main...HEAD --name-only
+   git log main..HEAD --oneline
+   ```
+
+2. **Identify affected pages/routes** from the changed files:
+   - Controller/route files → which URL paths they serve
+   - View/template/component files → which pages render them
+   - Model/service files → which pages use those models (check controllers that reference them)
+   - CSS/style files → which pages include those stylesheets
+   - API endpoints → test them directly with `$B js "await fetch('/api/...')"`
+   - Static pages (markdown, HTML) → navigate to them directly
+
+3. **Detect the running app** — check common local dev ports:
+   ```bash
+   $B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \
+   $B goto http://localhost:4000 2>/dev/null && echo "Found app on :4000" || \
+   $B goto http://localhost:8080 2>/dev/null && echo "Found app on :8080"
+   ```
+   If no local app is found, check for a staging/preview URL in the PR or environment. If nothing works, ask the user for the URL.
+
+4. **Test each affected page/route:**
+   - Navigate to the page
+   - Take a screenshot
+   - Check console for errors
+   - If the change was interactive (forms, buttons, flows), test the interaction end-to-end
+   - Use `snapshot -D` before and after actions to verify the change had the expected effect
+
+5. **Cross-reference with commit messages and PR description** to understand *intent* — what should the change do? Verify it actually does that.
+
+6. **Check TODOS.md** (if it exists) for known bugs or issues related to the changed files. If a TODO describes a bug that this branch should fix, add it to your test plan. If you find a new bug during QA that isn't in TODOS.md, note it in the report.
+
+7. **Report findings** scoped to the branch changes:
+   - "Changes tested: N pages/routes affected by this branch"
+   - For each: does it work? Screenshot evidence.
+   - Any regressions on adjacent pages?
+
+**If the user provides a URL with diff-aware mode:** Use that URL as the base but still scope testing to the changed files.
+
+### Full (default when URL is provided)
+Systematic exploration. Visit every reachable page. Document 5-10 well-evidenced issues. Produce health score. Takes 5-15 minutes depending on app size.
+
+### Quick (`--quick`)
+30-second smoke test. Visit homepage + top 5 navigation targets. Check: page loads? Console errors? Broken links? Produce health score. No detailed issue documentation.
+
+### Regression (`--regression <baseline>`)
+Run full mode, then load `baseline.json` from a previous run. Diff: which issues are fixed? Which are new? What's the score delta? Append regression section to report.
+
+---
+
+## Workflow
+
+### Phase 1: Initialize
+
+1. Find browse binary (see Setup above)
+2. Create output directories
+3. Copy report template from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md` to output dir
+4. Start timer for duration tracking
+
+### Phase 2: Authenticate (if needed)
+
+**If the user specified auth credentials:**
+
+```bash
+$B goto <login-url>
+$B snapshot -i                    # find the login form
+$B fill @e3 "user@example.com"
+$B fill @e4 "[REDACTED]"         # NEVER include real passwords in report
+$B click @e5                      # submit
+$B snapshot -D                    # verify login succeeded
+```
+
+**If the user provided a cookie file:**
+
+```bash
+$B cookie-import cookies.json
+$B goto <target-url>
+```
+
+**If 2FA/OTP is required:** Ask the user for the code and wait.
+
+**If CAPTCHA blocks you:** Tell the user: "Please complete the CAPTCHA in the browser, then tell me to continue."
+
+### Phase 3: Orient
+
+Get a map of the application:
+
+```bash
+$B goto <target-url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/initial.png"
+$B links                          # map navigation structure
+$B console --errors               # any errors on landing?
+```
+
+**Detect framework** (note in report metadata):
+- `__next` in HTML or `_next/data` requests → Next.js
+- `csrf-token` meta tag → Rails
+- `wp-content` in URLs → WordPress
+- Client-side routing with no page reloads → SPA
+
+**For SPAs:** The `links` command may return few results because navigation is client-side. Use `snapshot -i` to find nav elements (buttons, menu items) instead.
+
+### Phase 4: Explore
+
+Visit pages systematically. At each page:
+
+```bash
+$B goto <page-url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
+$B console --errors
+```
+
+Then follow the **per-page exploration checklist** (see `$HOME/.agents/skills/gstack/qa/references/issue-taxonomy.md`):
+
+1. **Visual scan** — Look at the annotated screenshot for layout issues
+2. **Interactive elements** — Click buttons, links, controls. Do they work?
+3. **Forms** — Fill and submit. Test empty, invalid, edge cases
+4. **Navigation** — Check all paths in and out
+5. **States** — Empty state, loading, error, overflow
+6. **Console** — Any new JS errors after interactions?
+7. **Responsiveness** — Check mobile viewport if relevant:
+   ```bash
+   $B viewport 375x812
+   $B screenshot "$REPORT_DIR/screenshots/page-mobile.png"
+   $B viewport 1280x720
+   ```
+
+**Depth judgment:** Spend more time on core features (homepage, dashboard, checkout, search) and less on secondary pages (about, terms, privacy).
+
+**Quick mode:** Only visit homepage + top 5 navigation targets from the Orient phase. Skip the per-page checklist — just check: loads? Console errors? Broken links visible?
+
+### Phase 5: Document
+
+Document each issue **immediately when found** — don't batch them.
+
+**Two evidence tiers:**
+
+**Interactive bugs** (broken flows, dead buttons, form failures):
+1. Take a screenshot before the action
+2. Perform the action
+3. Take a screenshot showing the result
+4. Use `snapshot -D` to show what changed
+5. Write repro steps referencing screenshots
+
+```bash
+$B screenshot "$REPORT_DIR/screenshots/issue-001-step-1.png"
+$B click @e5
+$B screenshot "$REPORT_DIR/screenshots/issue-001-result.png"
+$B snapshot -D
+```
+
+**Static bugs** (typos, layout issues, missing images):
+1. Take a single annotated screenshot showing the problem
+2. Describe what's wrong
+
+```bash
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
+```
+
+**Write each issue to the report immediately** using the template format from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md`.
+
+### Phase 6: Wrap Up
+
+1. **Compute health score** using the rubric below
+2. **Write "Top 3 Things to Fix"** — the 3 highest-severity issues
+3. **Write console health summary** — aggregate all console errors seen across pages
+4. **Update severity counts** in the summary table
+5. **Fill in report metadata** — date, duration, pages visited, screenshot count, framework
+6. **Save baseline** — write `baseline.json` with:
+   ```json
+   {
+     "date": "YYYY-MM-DD",
+     "url": "<target>",
+     "healthScore": N,
+     "issues": [{ "id": "ISSUE-001", "title": "...", "severity": "...", "category": "..." }],
+     "categoryScores": { "console": N, "links": N, ... }
+   }
+   ```
+
+**Regression mode:** After writing the report, load the baseline file. Compare:
+- Health score delta
+- Issues fixed (in baseline but not current)
+- New issues (in current but not baseline)
+- Append the regression section to the report
+
+---
+
+## Health Score Rubric
+
+Compute each category score (0-100), then take the weighted average.
+
+### Console (weight: 15%)
+- 0 errors → 100
+- 1-3 errors → 70
+- 4-10 errors → 40
+- 10+ errors → 10
+
+### Links (weight: 10%)
+- 0 broken → 100
+- Each broken link → -15 (minimum 0)
+
+### Per-Category Scoring (Visual, Functional, UX, Content, Performance, Accessibility)
+Each category starts at 100. Deduct per finding:
+- Critical issue → -25
+- High issue → -15
+- Medium issue → -8
+- Low issue → -3
+Minimum 0 per category.
+
+### Weights
+| Category | Weight |
+|----------|--------|
+| Console | 15% |
+| Links | 10% |
+| Visual | 10% |
+| Functional | 20% |
+| UX | 15% |
+| Performance | 10% |
+| Content | 5% |
+| Accessibility | 15% |
+
+### Final Score
+`score = Σ (category_score × weight)`
+
+---
+
+## Framework-Specific Guidance
+
+### Next.js
+- Check console for hydration errors (`Hydration failed`, `Text content did not match`)
+- Monitor `_next/data` requests in network — 404s indicate broken data fetching
+- Test client-side navigation (click links, don't just `goto`) — catches routing issues
+- Check for CLS (Cumulative Layout Shift) on pages with dynamic content
+
+### Rails
+- Check for N+1 query warnings in console (if development mode)
+- Verify CSRF token presence in forms
+- Test Turbo/Stimulus integration — do page transitions work smoothly?
+- Check for flash messages appearing and dismissing correctly
+
+### WordPress
+- Check for plugin conflicts (JS errors from different plugins)
+- Verify admin bar visibility for logged-in users
+- Test REST API endpoints (`/wp-json/`)
+- Check for mixed content warnings (common with WP)
+
+### General SPA (React, Vue, Angular)
+- Use `snapshot -i` for navigation — `links` command misses client-side routes
+- Check for stale state (navigate away and back — does data refresh?)
+- Test browser back/forward — does the app handle history correctly?
+- Check for memory leaks (monitor console after extended use)
+
+---
+
+## Important Rules
+
+1. **Repro is everything.** Every issue needs at least one screenshot. No exceptions.
+2. **Verify before documenting.** Retry the issue once to confirm it's reproducible, not a fluke.
+3. **Never include credentials.** Write `[REDACTED]` for passwords in repro steps.
+4. **Write incrementally.** Append each issue to the report as you find it. Don't batch.
+5. **Never read source code.** Test as a user, not a developer.
+6. **Check console after every interaction.** JS errors that don't surface visually are still bugs.
+7. **Test like a user.** Use realistic data. Walk through complete workflows end-to-end.
+8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
+9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
+10. **Use `snapshot -C` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
+
+Record baseline health score at end of Phase 6.
+
+---
+
+## Output Structure
+
+```
+.gstack/qa-reports/
+├── qa-report-{domain}-{YYYY-MM-DD}.md    # Structured report
+├── screenshots/
+│   ├── initial.png                        # Landing page annotated screenshot
+│   ├── issue-001-step-1.png               # Per-issue evidence
+│   ├── issue-001-result.png
+│   ├── issue-001-before.png               # Before fix (if fixed)
+│   ├── issue-001-after.png                # After fix (if fixed)
+│   └── ...
+└── baseline.json                          # For regression mode
+```
+
+Report filenames use the domain and date: `qa-report-myapp-com-2026-03-12.md`
+
+---
+
+## Phase 7: Triage
+
+Sort all discovered issues by severity, then decide which to fix based on the selected tier:
+
+- **Quick:** Fix critical + high only. Mark medium/low as "deferred."
+- **Standard:** Fix critical + high + medium. Mark low as "deferred."
+- **Exhaustive:** Fix all, including cosmetic/low severity.
+
+Mark issues that cannot be fixed from source code (e.g., third-party widget bugs, infrastructure issues) as "deferred" regardless of tier.
+
+---
+
+## Phase 8: Fix Loop
+
+For each fixable issue, in severity order:
+
+### 8a. Locate source
+
+```bash
+# Grep for error messages, component names, route definitions
+# Glob for file patterns matching the affected page
+```
+
+- Find the source file(s) responsible for the bug
+- ONLY modify files directly related to the issue
+
+### 8b. Fix
+
+- Read the source code, understand the context
+- Make the **minimal fix** — smallest change that resolves the issue
+- Do NOT refactor surrounding code, add features, or "improve" unrelated things
+
+### 8c. Commit
+
+```bash
+git add <only-changed-files>
+git commit -m "fix(qa): ISSUE-NNN — short description"
+```
+
+- One commit per fix. Never bundle multiple fixes.
+- Message format: `fix(qa): ISSUE-NNN — short description`
+
+### 8d. Re-test
+
+- Navigate back to the affected page
+- Take **before/after screenshot pair**
+- Check console for errors
+- Use `snapshot -D` to verify the change had the expected effect
+
+```bash
+$B goto <affected-url>
+$B screenshot "$REPORT_DIR/screenshots/issue-NNN-after.png"
+$B console --errors
+$B snapshot -D
+```
+
+### 8e. Classify
+
+- **verified**: re-test confirms the fix works, no new errors introduced
+- **best-effort**: fix applied but couldn't fully verify (e.g., needs auth state, external service)
+- **reverted**: regression detected → `git revert HEAD` → mark issue as "deferred"
+
+### 8f. Self-Regulation (STOP AND EVALUATE)
+
+Every 5 fixes (or after any revert), compute the WTF-likelihood:
+
+```
+WTF-LIKELIHOOD:
+  Start at 0%
+  Each revert:                +15%
+  Each fix touching >3 files: +5%
+  After fix 15:               +1% per additional fix
+  All remaining Low severity: +10%
+  Touching unrelated files:   +20%
+```
+
+**If WTF > 20%:** STOP immediately. Show the user what you've done so far. Ask whether to continue.
+
+**Hard cap: 50 fixes.** After 50 fixes, stop regardless of remaining issues.
+
+---
+
+## Phase 9: Final QA
+
+After all fixes are applied:
+
+1. Re-run QA on all affected pages
+2. Compute final health score
+3. **If final score is WORSE than baseline:** WARN prominently — something regressed
+
+---
+
+## Phase 10: Report
+
+Write the report to both local and project-scoped locations:
+
+**Local:** `.gstack/qa-reports/qa-report-{domain}-{YYYY-MM-DD}.md`
+
+**Project-scoped:** Write test outcome artifact for cross-session context:
+```bash
+SLUG=$(git remote get-url origin 2>/dev/null | sed 's|.*[:/]\([^/]*/[^/]*\)\.git$|\1|;s|.*[:/]\([^/]*/[^/]*\)$|\1|' | tr '/' '-')
+mkdir -p ~/.gstack/projects/$SLUG
+```
+Write to `~/.gstack/projects/{slug}/{user}-{branch}-test-outcome-{datetime}.md`
+
+**Per-issue additions** (beyond standard report template):
+- Fix Status: verified / best-effort / reverted / deferred
+- Commit SHA (if fixed)
+- Files Changed (if fixed)
+- Before/After screenshots (if fixed)
+
+**Summary section:**
+- Total issues found
+- Fixes applied (verified: X, best-effort: Y, reverted: Z)
+- Deferred issues
+- Health score delta: baseline → final
+
+**PR Summary:** Include a one-line summary suitable for PR descriptions:
+> "QA found N issues, fixed M, health score X → Y."
+
+---
+
+## Phase 11: TODOS.md Update
+
+If the repo has a `TODOS.md`:
+
+1. **New deferred bugs** → add as TODOs with severity, category, and repro steps
+2. **Fixed bugs that were in TODOS.md** → annotate with "Fixed by $gstack-qa on {branch}, {date}"
+
+---
+
+## Additional Rules (qa-specific)
+
+11. **Clean working tree required.** Refuse to start if `git status --porcelain` is non-empty.
+12. **One commit per fix.** Never bundle multiple fixes into one commit.
+13. **Never modify tests or CI configuration.** Only fix application source code.
+14. **Revert on regression.** If a fix makes things worse, `git revert HEAD` immediately.
+15. **Self-regulate.** Follow the WTF-likelihood heuristic. When in doubt, stop and ask.
diff --git a/.agents/skills/gstack-retro/SKILL.md b/.agents/skills/gstack-retro/SKILL.md
new file mode 100644
index 0000000..e65932e
--- /dev/null
+++ b/.agents/skills/gstack-retro/SKILL.md
@@ -0,0 +1,543 @@
+---
+name: gstack-retro
+description: |
+  Weekly engineering retrospective. Analyzes commit history, work patterns,
+  and code quality metrics with persistent history and trend tracking.
+  Team-aware: breaks down per-person contributions with praise and growth areas.
+---
+<!-- AUTO-GENERATED from retro/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+## Detect default branch
+
+Before gathering data, detect the repo's default branch name:
+`gh repo view --json defaultBranchRef -q .defaultBranchRef.name`
+
+If this fails, fall back to `main`. Use the detected name wherever the instructions
+say `origin/<default>` below.
+
+---
+
+# $gstack-retro — Weekly Engineering Retrospective
+
+Generates a comprehensive engineering retrospective analyzing commit history, work patterns, and code quality metrics. Team-aware: identifies the user running the command, then analyzes every contributor with per-person praise and growth opportunities. Designed for a senior IC/CTO-level builder using Claude Code as a force multiplier.
+
+## User-invocable
+When the user types `$gstack-retro`, run this skill.
+
+## Arguments
+- `$gstack-retro` — default: last 7 days
+- `$gstack-retro 24h` — last 24 hours
+- `$gstack-retro 14d` — last 14 days
+- `$gstack-retro 30d` — last 30 days
+- `$gstack-retro compare` — compare current window vs prior same-length window
+- `$gstack-retro compare 14d` — compare with explicit window
+
+## Instructions
+
+Parse the argument to determine the time window. Default to 7 days if no argument given. Use `--since="N days ago"`, `--since="N hours ago"`, or `--since="N weeks ago"` (for `w` units) for git log queries. All times should be reported in **Pacific time** (use `TZ=America/Los_Angeles` when converting timestamps).
+
+**Argument validation:** If the argument doesn't match a number followed by `d`, `h`, or `w`, the word `compare`, or `compare` followed by a number and `d`/`h`/`w`, show this usage and stop:
+```
+Usage: $gstack-retro [window]
+  $gstack-retro              — last 7 days (default)
+  $gstack-retro 24h          — last 24 hours
+  $gstack-retro 14d          — last 14 days
+  $gstack-retro 30d          — last 30 days
+  $gstack-retro compare      — compare this period vs prior period
+  $gstack-retro compare 14d  — compare with explicit window
+```
+
+### Step 1: Gather Raw Data
+
+First, fetch origin and identify the current user:
+```bash
+git fetch origin <default> --quiet
+# Identify who is running the retro
+git config user.name
+git config user.email
+```
+
+The name returned by `git config user.name` is **"you"** — the person reading this retro. All other authors are teammates. Use this to orient the narrative: "your" commits vs teammate contributions.
+
+Run ALL of these git commands in parallel (they are independent):
+
+```bash
+# 1. All commits in window with timestamps, subject, hash, AUTHOR, files changed, insertions, deletions
+git log origin/<default> --since="<window>" --format="%H|%aN|%ae|%ai|%s" --shortstat
+
+# 2. Per-commit test vs total LOC breakdown with author
+#    Each commit block starts with COMMIT:<hash>|<author>, followed by numstat lines.
+#    Separate test files (matching test/|spec/|__tests__/) from production files.
+git log origin/<default> --since="<window>" --format="COMMIT:%H|%aN" --numstat
+
+# 3. Commit timestamps for session detection and hourly distribution (with author)
+#    Use TZ=America/Los_Angeles for Pacific time conversion
+TZ=America/Los_Angeles git log origin/<default> --since="<window>" --format="%at|%aN|%ai|%s" | sort -n
+
+# 4. Files most frequently changed (hotspot analysis)
+git log origin/<default> --since="<window>" --format="" --name-only | grep -v '^$' | sort | uniq -c | sort -rn
+
+# 5. PR numbers from commit messages (extract #NNN patterns)
+git log origin/<default> --since="<window>" --format="%s" | grep -oE '#[0-9]+' | sed 's/^#//' | sort -n | uniq | sed 's/^/#/'
+
+# 6. Per-author file hotspots (who touches what)
+git log origin/<default> --since="<window>" --format="AUTHOR:%aN" --name-only
+
+# 7. Per-author commit counts (quick summary)
+git shortlog origin/<default> --since="<window>" -sn --no-merges
+
+# 8. Greptile triage history (if available)
+cat ~/.gstack/greptile-history.md 2>/dev/null || true
+
+# 9. TODOS.md backlog (if available)
+cat TODOS.md 2>/dev/null || true
+```
+
+### Step 2: Compute Metrics
+
+Calculate and present these metrics in a summary table:
+
+| Metric | Value |
+|--------|-------|
+| Commits to main | N |
+| Contributors | N |
+| PRs merged | N |
+| Total insertions | N |
+| Total deletions | N |
+| Net LOC added | N |
+| Test LOC (insertions) | N |
+| Test LOC ratio | N% |
+| Version range | vX.Y.Z.W → vX.Y.Z.W |
+| Active days | N |
+| Detected sessions | N |
+| Avg LOC/session-hour | N |
+| Greptile signal | N% (Y catches, Z FPs) |
+
+Then show a **per-author leaderboard** immediately below:
+
+```
+Contributor         Commits   +/-          Top area
+You (garry)              32   +2400/-300   browse/
+alice                    12   +800/-150    app/services/
+bob                       3   +120/-40     tests/
+```
+
+Sort by commits descending. The current user (from `git config user.name`) always appears first, labeled "You (name)".
+
+**Greptile signal (if history exists):** Read `~/.gstack/greptile-history.md` (fetched in Step 1, command 8). Filter entries within the retro time window by date. Count entries by type: `fix`, `fp`, `already-fixed`. Compute signal ratio: `(fix + already-fixed) / (fix + already-fixed + fp)`. If no entries exist in the window or the file doesn't exist, skip the Greptile metric row. Skip unparseable lines silently.
+
+**Backlog Health (if TODOS.md exists):** Read `TODOS.md` (fetched in Step 1, command 9). Compute:
+- Total open TODOs (exclude items in `## Completed` section)
+- P0/P1 count (critical/urgent items)
+- P2 count (important items)
+- Items completed this period (items in Completed section with dates within the retro window)
+- Items added this period (cross-reference git log for commits that modified TODOS.md within the window)
+
+Include in the metrics table:
+```
+| Backlog Health | N open (X P0/P1, Y P2) · Z completed this period |
+```
+
+If TODOS.md doesn't exist, skip the Backlog Health row.
+
+### Step 3: Commit Time Distribution
+
+Show hourly histogram in Pacific time using bar chart:
+
+```
+Hour  Commits  ████████████████
+ 00:    4      ████
+ 07:    5      █████
+ ...
+```
+
+Identify and call out:
+- Peak hours
+- Dead zones
+- Whether pattern is bimodal (morning/evening) or continuous
+- Late-night coding clusters (after 10pm)
+
+### Step 4: Work Session Detection
+
+Detect sessions using **45-minute gap** threshold between consecutive commits. For each session report:
+- Start/end time (Pacific)
+- Number of commits
+- Duration in minutes
+
+Classify sessions:
+- **Deep sessions** (50+ min)
+- **Medium sessions** (20-50 min)
+- **Micro sessions** (<20 min, typically single-commit fire-and-forget)
+
+Calculate:
+- Total active coding time (sum of session durations)
+- Average session length
+- LOC per hour of active time
+
+### Step 5: Commit Type Breakdown
+
+Categorize by conventional commit prefix (feat/fix/refactor/test/chore/docs). Show as percentage bar:
+
+```
+feat:     20  (40%)  ████████████████████
+fix:      27  (54%)  ███████████████████████████
+refactor:  2  ( 4%)  ██
+```
+
+Flag if fix ratio exceeds 50% — this signals a "ship fast, fix fast" pattern that may indicate review gaps.
+
+### Step 6: Hotspot Analysis
+
+Show top 10 most-changed files. Flag:
+- Files changed 5+ times (churn hotspots)
+- Test files vs production files in the hotspot list
+- VERSION/CHANGELOG frequency (version discipline indicator)
+
+### Step 7: PR Size Distribution
+
+From commit diffs, estimate PR sizes and bucket them:
+- **Small** (<100 LOC)
+- **Medium** (100-500 LOC)
+- **Large** (500-1500 LOC)
+- **XL** (1500+ LOC) — flag these with file counts
+
+### Step 8: Focus Score + Ship of the Week
+
+**Focus score:** Calculate the percentage of commits touching the single most-changed top-level directory (e.g., `app/services/`, `app/views/`). Higher score = deeper focused work. Lower score = scattered context-switching. Report as: "Focus score: 62% (app/services/)"
+
+**Ship of the week:** Auto-identify the single highest-LOC PR in the window. Highlight it:
+- PR number and title
+- LOC changed
+- Why it matters (infer from commit messages and files touched)
+
+### Step 9: Team Member Analysis
+
+For each contributor (including the current user), compute:
+
+1. **Commits and LOC** — total commits, insertions, deletions, net LOC
+2. **Areas of focus** — which directories/files they touched most (top 3)
+3. **Commit type mix** — their personal feat/fix/refactor/test breakdown
+4. **Session patterns** — when they code (their peak hours), session count
+5. **Test discipline** — their personal test LOC ratio
+6. **Biggest ship** — their single highest-impact commit or PR in the window
+
+**For the current user ("You"):** This section gets the deepest treatment. Include all the detail from the solo retro — session analysis, time patterns, focus score. Frame it in first person: "Your peak hours...", "Your biggest ship..."
+
+**For each teammate:** Write 2-3 sentences covering what they worked on and their pattern. Then:
+
+- **Praise** (1-2 specific things): Anchor in actual commits. Not "great work" — say exactly what was good. Examples: "Shipped the entire auth middleware rewrite in 3 focused sessions with 45% test coverage", "Every PR under 200 LOC — disciplined decomposition."
+- **Opportunity for growth** (1 specific thing): Frame as a leveling-up suggestion, not criticism. Anchor in actual data. Examples: "Test ratio was 12% this week — adding test coverage to the payment module before it gets more complex would pay off", "5 fix commits on the same file suggest the original PR could have used a review pass."
+
+**If only one contributor (solo repo):** Skip the team breakdown and proceed as before — the retro is personal.
+
+**If there are Co-Authored-By trailers:** Parse `Co-Authored-By:` lines in commit messages. Credit those authors for the commit alongside the primary author. Note AI co-authors (e.g., `noreply@anthropic.com`) but do not include them as team members — instead, track "AI-assisted commits" as a separate metric.
+
+### Step 10: Week-over-Week Trends (if window >= 14d)
+
+If the time window is 14 days or more, split into weekly buckets and show trends:
+- Commits per week (total and per-author)
+- LOC per week
+- Test ratio per week
+- Fix ratio per week
+- Session count per week
+
+### Step 11: Streak Tracking
+
+Count consecutive days with at least 1 commit to origin/<default>, going back from today. Track both team streak and personal streak:
+
+```bash
+# Team streak: all unique commit dates (Pacific time) — no hard cutoff
+TZ=America/Los_Angeles git log origin/<default> --format="%ad" --date=format:"%Y-%m-%d" | sort -u
+
+# Personal streak: only the current user's commits
+TZ=America/Los_Angeles git log origin/<default> --author="<user_name>" --format="%ad" --date=format:"%Y-%m-%d" | sort -u
+```
+
+Count backward from today — how many consecutive days have at least one commit? This queries the full history so streaks of any length are reported accurately. Display both:
+- "Team shipping streak: 47 consecutive days"
+- "Your shipping streak: 32 consecutive days"
+
+### Step 12: Load History & Compare
+
+Before saving the new snapshot, check for prior retro history:
+
+```bash
+ls -t .context/retros/*.json 2>/dev/null
+```
+
+**If prior retros exist:** Load the most recent one using the Read tool. Calculate deltas for key metrics and include a **Trends vs Last Retro** section:
+```
+                    Last        Now         Delta
+Test ratio:         22%    →    41%         ↑19pp
+Sessions:           10     →    14          ↑4
+LOC/hour:           200    →    350         ↑75%
+Fix ratio:          54%    →    30%         ↓24pp (improving)
+Commits:            32     →    47          ↑47%
+Deep sessions:      3      →    5           ↑2
+```
+
+**If no prior retros exist:** Skip the comparison section and append: "First retro recorded — run again next week to see trends."
+
+### Step 13: Save Retro History
+
+After computing all metrics (including streak) and loading any prior history for comparison, save a JSON snapshot:
+
+```bash
+mkdir -p .context/retros
+```
+
+Determine the next sequence number for today (substitute the actual date for `$(date +%Y-%m-%d)`):
+```bash
+# Count existing retros for today to get next sequence number
+today=$(TZ=America/Los_Angeles date +%Y-%m-%d)
+existing=$(ls .context/retros/${today}-*.json 2>/dev/null | wc -l | tr -d ' ')
+next=$((existing + 1))
+# Save as .context/retros/${today}-${next}.json
+```
+
+Use the Write tool to save the JSON file with this schema:
+```json
+{
+  "date": "2026-03-08",
+  "window": "7d",
+  "metrics": {
+    "commits": 47,
+    "contributors": 3,
+    "prs_merged": 12,
+    "insertions": 3200,
+    "deletions": 800,
+    "net_loc": 2400,
+    "test_loc": 1300,
+    "test_ratio": 0.41,
+    "active_days": 6,
+    "sessions": 14,
+    "deep_sessions": 5,
+    "avg_session_minutes": 42,
+    "loc_per_session_hour": 350,
+    "feat_pct": 0.40,
+    "fix_pct": 0.30,
+    "peak_hour": 22,
+    "ai_assisted_commits": 32
+  },
+  "authors": {
+    "Garry Tan": { "commits": 32, "insertions": 2400, "deletions": 300, "test_ratio": 0.41, "top_area": "browse/" },
+    "Alice": { "commits": 12, "insertions": 800, "deletions": 150, "test_ratio": 0.35, "top_area": "app/services/" }
+  },
+  "version_range": ["1.16.0.0", "1.16.1.0"],
+  "streak_days": 47,
+  "tweetable": "Week of Mar 1: 47 commits (3 contributors), 3.2k LOC, 38% tests, 12 PRs, peak: 10pm",
+  "greptile": {
+    "fixes": 3,
+    "fps": 1,
+    "already_fixed": 2,
+    "signal_pct": 83
+  }
+}
+```
+
+**Note:** Only include the `greptile` field if `~/.gstack/greptile-history.md` exists and has entries within the time window. Only include the `backlog` field if `TODOS.md` exists. If either has no data, omit the field entirely.
+
+Include backlog data in the JSON when TODOS.md exists:
+```json
+  "backlog": {
+    "total_open": 28,
+    "p0_p1": 2,
+    "p2": 8,
+    "completed_this_period": 3,
+    "added_this_period": 1
+  }
+```
+
+### Step 14: Write the Narrative
+
+Structure the output as:
+
+---
+
+**Tweetable summary** (first line, before everything else):
+```
+Week of Mar 1: 47 commits (3 contributors), 3.2k LOC, 38% tests, 12 PRs, peak: 10pm | Streak: 47d
+```
+
+## Engineering Retro: [date range]
+
+### Summary Table
+(from Step 2)
+
+### Trends vs Last Retro
+(from Step 11, loaded before save — skip if first retro)
+
+### Time & Session Patterns
+(from Steps 3-4)
+
+Narrative interpreting what the team-wide patterns mean:
+- When the most productive hours are and what drives them
+- Whether sessions are getting longer or shorter over time
+- Estimated hours per day of active coding (team aggregate)
+- Notable patterns: do team members code at the same time or in shifts?
+
+### Shipping Velocity
+(from Steps 5-7)
+
+Narrative covering:
+- Commit type mix and what it reveals
+- PR size discipline (are PRs staying small?)
+- Fix-chain detection (sequences of fix commits on the same subsystem)
+- Version bump discipline
+
+### Code Quality Signals
+- Test LOC ratio trend
+- Hotspot analysis (are the same files churning?)
+- Any XL PRs that should have been split
+- Greptile signal ratio and trend (if history exists): "Greptile: X% signal (Y valid catches, Z false positives)"
+
+### Focus & Highlights
+(from Step 8)
+- Focus score with interpretation
+- Ship of the week callout
+
+### Your Week (personal deep-dive)
+(from Step 9, for the current user only)
+
+This is the section the user cares most about. Include:
+- Their personal commit count, LOC, test ratio
+- Their session patterns and peak hours
+- Their focus areas
+- Their biggest ship
+- **What you did well** (2-3 specific things anchored in commits)
+- **Where to level up** (1-2 specific, actionable suggestions)
+
+### Team Breakdown
+(from Step 9, for each teammate — skip if solo repo)
+
+For each teammate (sorted by commits descending), write a section:
+
+#### [Name]
+- **What they shipped**: 2-3 sentences on their contributions, areas of focus, and commit patterns
+- **Praise**: 1-2 specific things they did well, anchored in actual commits. Be genuine — what would you actually say in a 1:1? Examples:
+  - "Cleaned up the entire auth module in 3 small, reviewable PRs — textbook decomposition"
+  - "Added integration tests for every new endpoint, not just happy paths"
+  - "Fixed the N+1 query that was causing 2s load times on the dashboard"
+- **Opportunity for growth**: 1 specific, constructive suggestion. Frame as investment, not criticism. Examples:
+  - "Test coverage on the payment module is at 8% — worth investing in before the next feature lands on top of it"
+  - "3 of the 5 PRs were 800+ LOC — breaking these up would catch issues earlier and make review easier"
+  - "All commits land between 1-4am — sustainable pace matters for code quality long-term"
+
+**AI collaboration note:** If many commits have `Co-Authored-By` AI trailers (e.g., Claude, Copilot), note the AI-assisted commit percentage as a team metric. Frame it neutrally — "N% of commits were AI-assisted" — without judgment.
+
+### Top 3 Team Wins
+Identify the 3 highest-impact things shipped in the window across the whole team. For each:
+- What it was
+- Who shipped it
+- Why it matters (product/architecture impact)
+
+### 3 Things to Improve
+Specific, actionable, anchored in actual commits. Mix personal and team-level suggestions. Phrase as "to get even better, the team could..."
+
+### 3 Habits for Next Week
+Small, practical, realistic. Each must be something that takes <5 minutes to adopt. At least one should be team-oriented (e.g., "review each other's PRs same-day").
+
+### Week-over-Week Trends
+(if applicable, from Step 10)
+
+---
+
+## Compare Mode
+
+When the user runs `$gstack-retro compare` (or `$gstack-retro compare 14d`):
+
+1. Compute metrics for the current window (default 7d) using `--since="7 days ago"`
+2. Compute metrics for the immediately prior same-length window using both `--since` and `--until` to avoid overlap (e.g., `--since="14 days ago" --until="7 days ago"` for a 7d window)
+3. Show a side-by-side comparison table with deltas and arrows
+4. Write a brief narrative highlighting the biggest improvements and regressions
+5. Save only the current-window snapshot to `.context/retros/` (same as a normal retro run); do **not** persist the prior-window metrics.
+
+## Tone
+
+- Encouraging but candid, no coddling
+- Specific and concrete — always anchor in actual commits/code
+- Skip generic praise ("great job!") — say exactly what was good and why
+- Frame improvements as leveling up, not criticism
+- **Praise should feel like something you'd actually say in a 1:1** — specific, earned, genuine
+- **Growth suggestions should feel like investment advice** — "this is worth your time because..." not "you failed at..."
+- Never compare teammates against each other negatively. Each person's section stands on its own.
+- Keep total output around 3000-4500 words (slightly longer to accommodate team sections)
+- Use markdown tables and code blocks for data, prose for narrative
+- Output directly to the conversation — do NOT write to filesystem (except the `.context/retros/` JSON snapshot)
+
+## Important Rules
+
+- ALL narrative output goes directly to the user in the conversation. The ONLY file written is the `.context/retros/` JSON snapshot.
+- Use `origin/<default>` for all git queries (not local main which may be stale)
+- Convert all timestamps to Pacific time for display (use `TZ=America/Los_Angeles`)
+- If the window has zero commits, say so and suggest a different window
+- Round LOC/hour to nearest 50
+- Treat merge commits as PR boundaries
+- Do not read CLAUDE.md or other docs — this skill is self-contained
+- On first run (no prior retros), skip comparison sections gracefully
diff --git a/.agents/skills/gstack-review/SKILL.md b/.agents/skills/gstack-review/SKILL.md
new file mode 100644
index 0000000..5348284
--- /dev/null
+++ b/.agents/skills/gstack-review/SKILL.md
@@ -0,0 +1,259 @@
+---
+name: gstack-review
+description: |
+  Pre-landing PR review. Analyzes diff against the base branch for SQL safety, LLM trust
+  boundary violations, conditional side effects, and other structural issues.
+---
+<!-- AUTO-GENERATED from review/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+## Step 0: Detect base branch
+
+Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
+
+1. Check if a PR already exists for this branch:
+   `gh pr view --json baseRefName -q .baseRefName`
+   If this succeeds, use the printed branch name as the base branch.
+
+2. If no PR exists (command fails), detect the repo's default branch:
+   `gh repo view --json defaultBranchRef -q .defaultBranchRef.name`
+
+3. If both commands fail, fall back to `main`.
+
+Print the detected base branch name. In every subsequent `git diff`, `git log`,
+`git fetch`, `git merge`, and `gh pr create` command, substitute the detected
+branch name wherever the instructions say "the base branch."
+
+---
+
+# Pre-Landing PR Review
+
+You are running the `$gstack-review` workflow. Analyze the current branch's diff against the base branch for structural issues that tests don't catch.
+
+---
+
+## Step 1: Check branch
+
+1. Run `git branch --show-current` to get the current branch.
+2. If on the base branch, output: **"Nothing to review — you're on the base branch or have no changes against it."** and stop.
+3. Run `git fetch origin <base> --quiet && git diff origin/<base> --stat` to check if there's a diff. If no diff, output the same message and stop.
+
+---
+
+## Step 2: Read the checklist
+
+Read `$HOME/.agents/skills/gstack/review/checklist.md`.
+
+**If the file cannot be read, STOP and report the error.** Do not proceed without the checklist.
+
+---
+
+## Step 2.5: Check for Greptile review comments
+
+Read `$HOME/.agents/skills/gstack/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
+
+**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Greptile integration is additive — the review works without it.
+
+**If Greptile comments are found:** Store the classifications (VALID & ACTIONABLE, VALID BUT ALREADY FIXED, FALSE POSITIVE, SUPPRESSED) — you will need them in Step 5.
+
+---
+
+## Step 3: Get the diff
+
+Fetch the latest base branch to avoid false positives from stale local state:
+
+```bash
+git fetch origin <base> --quiet
+```
+
+Run `git diff origin/<base>` to get the full diff. This includes both committed and uncommitted changes against the latest base branch.
+
+---
+
+## Step 4: Two-pass review
+
+Apply the checklist against the diff in two passes:
+
+1. **Pass 1 (CRITICAL):** SQL & Data Safety, Race Conditions & Concurrency, LLM Output Trust Boundary, Enum & Value Completeness
+2. **Pass 2 (INFORMATIONAL):** Conditional Side Effects, Magic Numbers & String Coupling, Dead Code & Consistency, LLM Prompt Issues, Test Gaps, View/Frontend
+
+**Enum & Value Completeness requires reading code OUTSIDE the diff.** When the diff introduces a new enum value, status, tier, or type constant, use Grep to find all files that reference sibling values, then Read those files to check if the new value is handled. This is the one category where within-diff review is insufficient.
+
+Follow the output format specified in the checklist. Respect the suppressions — do NOT flag items listed in the "DO NOT flag" section.
+
+---
+
+## Step 5: Fix-First Review
+
+**Every finding gets action — not just critical ones.**
+
+Output a summary header: `Pre-Landing Review: N issues (X critical, Y informational)`
+
+### Step 5a: Classify each finding
+
+For each finding, classify as AUTO-FIX or ASK per the Fix-First Heuristic in
+checklist.md. Critical findings lean toward ASK; informational findings lean
+toward AUTO-FIX.
+
+### Step 5b: Auto-fix all AUTO-FIX items
+
+Apply each fix directly. For each one, output a one-line summary:
+`[AUTO-FIXED] [file:line] Problem → what you did`
+
+### Step 5c: Batch-ask about ASK items
+
+If there are ASK items remaining, present them in ONE AskUserQuestion:
+
+- List each item with a number, the severity label, the problem, and a recommended fix
+- For each item, provide options: A) Fix as recommended, B) Skip
+- Include an overall RECOMMENDATION
+
+Example format:
+```
+I auto-fixed 5 issues. 2 need your input:
+
+1. [CRITICAL] app/models/post.rb:42 — Race condition in status transition
+   Fix: Add `WHERE status = 'draft'` to the UPDATE
+   → A) Fix  B) Skip
+
+2. [INFORMATIONAL] app/services/generator.rb:88 — LLM output not type-checked before DB write
+   Fix: Add JSON schema validation
+   → A) Fix  B) Skip
+
+RECOMMENDATION: Fix both — #1 is a real race condition, #2 prevents silent data corruption.
+```
+
+If 3 or fewer ASK items, you may use individual AskUserQuestion calls instead of batching.
+
+### Step 5d: Apply user-approved fixes
+
+Apply fixes for items where the user chose "Fix." Output what was fixed.
+
+If no ASK items exist (everything was AUTO-FIX), skip the question entirely.
+
+### Greptile comment resolution
+
+After outputting your own findings, if Greptile comments were classified in Step 2.5:
+
+**Include a Greptile summary in your output header:** `+ N Greptile comments (X valid, Y fixed, Z FP)`
+
+Before replying to any comment, run the **Escalation Detection** algorithm from greptile-triage.md to determine whether to use Tier 1 (friendly) or Tier 2 (firm) reply templates.
+
+1. **VALID & ACTIONABLE comments:** These are included in your findings — they follow the Fix-First flow (auto-fixed if mechanical, batched into ASK if not) (A: Fix it now, B: Acknowledge, C: False positive). If the user chooses A (fix), reply using the **Fix reply template** from greptile-triage.md (include inline diff + explanation). If the user chooses C (false positive), reply using the **False Positive reply template** (include evidence + suggested re-rank), save to both per-project and global greptile-history.
+
+2. **FALSE POSITIVE comments:** Present each one via AskUserQuestion:
+   - Show the Greptile comment: file:line (or [top-level]) + body summary + permalink URL
+   - Explain concisely why it's a false positive
+   - Options:
+     - A) Reply to Greptile explaining why this is incorrect (recommended if clearly wrong)
+     - B) Fix it anyway (if low-effort and harmless)
+     - C) Ignore — don't reply, don't fix
+
+   If the user chooses A, reply using the **False Positive reply template** from greptile-triage.md (include evidence + suggested re-rank), save to both per-project and global greptile-history.
+
+3. **VALID BUT ALREADY FIXED comments:** Reply using the **Already Fixed reply template** from greptile-triage.md — no AskUserQuestion needed:
+   - Include what was done and the fixing commit SHA
+   - Save to both per-project and global greptile-history
+
+4. **SUPPRESSED comments:** Skip silently — these are known false positives from previous triage.
+
+---
+
+## Step 5.5: TODOS cross-reference
+
+Read `TODOS.md` in the repository root (if it exists). Cross-reference the PR against open TODOs:
+
+- **Does this PR close any open TODOs?** If yes, note which items in your output: "This PR addresses TODO: <title>"
+- **Does this PR create work that should become a TODO?** If yes, flag it as an informational finding.
+- **Are there related TODOs that provide context for this review?** If yes, reference them when discussing related findings.
+
+If TODOS.md doesn't exist, skip this step silently.
+
+---
+
+## Step 5.6: Documentation staleness check
+
+Cross-reference the diff against documentation files. For each `.md` file in the repo root (README.md, ARCHITECTURE.md, CONTRIBUTING.md, CLAUDE.md, etc.):
+
+1. Check if code changes in the diff affect features, components, or workflows described in that doc file.
+2. If the doc file was NOT updated in this branch but the code it describes WAS changed, flag it as an INFORMATIONAL finding:
+   "Documentation may be stale: [file] describes [feature/component] but code changed in this branch. Consider running `$gstack-document-release`."
+
+This is informational only — never critical. The fix action is `$gstack-document-release`.
+
+If no documentation files exist, skip this step silently.
+
+---
+
+## Important Rules
+
+- **Read the FULL diff before commenting.** Do not flag issues already addressed in the diff.
+- **Fix-first, not read-only.** AUTO-FIX items are applied directly. ASK items are only applied after user approval. Never commit, push, or create PRs — that's $gstack-ship's job.
+- **Be terse.** One line problem, one line fix. No preamble.
+- **Only flag real problems.** Skip anything that's fine.
+- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence. Never post vague replies.
diff --git a/.agents/skills/gstack-setup-browser-cookies/SKILL.md b/.agents/skills/gstack-setup-browser-cookies/SKILL.md
new file mode 100644
index 0000000..7585b2c
--- /dev/null
+++ b/.agents/skills/gstack-setup-browser-cookies/SKILL.md
@@ -0,0 +1,152 @@
+---
+name: gstack-setup-browser-cookies
+description: |
+  Import cookies from your real browser (Comet, Chrome, Arc, Brave, Edge) into the
+  headless browse session. Opens an interactive picker UI where you select which
+  cookie domains to import. Use before QA testing authenticated pages.
+---
+<!-- AUTO-GENERATED from setup-browser-cookies/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# Setup Browser Cookies
+
+Import logged-in sessions from your real Chromium browser into the headless browse session.
+
+## How it works
+
+1. Find the browse binary
+2. Run `cookie-import-browser` to detect installed browsers and open the picker UI
+3. User selects which cookie domains to import in their browser
+4. Cookies are decrypted and loaded into the Playwright session
+
+## Steps
+
+### 1. Find the browse binary
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+### 2. Open the cookie picker
+
+```bash
+$B cookie-import-browser
+```
+
+This auto-detects installed Chromium browsers (Comet, Chrome, Arc, Brave, Edge) and opens
+an interactive picker UI in your default browser where you can:
+- Switch between installed browsers
+- Search domains
+- Click "+" to import a domain's cookies
+- Click trash to remove imported cookies
+
+Tell the user: **"Cookie picker opened — select the domains you want to import in your browser, then tell me when you're done."**
+
+### 3. Direct import (alternative)
+
+If the user specifies a domain directly (e.g., `$gstack-setup-browser-cookies github.com`), skip the UI:
+
+```bash
+$B cookie-import-browser comet --domain github.com
+```
+
+Replace `comet` with the appropriate browser if specified.
+
+### 4. Verify
+
+After the user confirms they're done:
+
+```bash
+$B cookies
+```
+
+Show the user a summary of imported cookies (domain counts).
+
+## Notes
+
+- First import per browser may trigger a macOS Keychain dialog — click "Allow" / "Always Allow"
+- Cookie picker is served on the same port as the browse server (no extra process)
+- Only domain names and cookie counts are shown in the UI — no cookie values are exposed
+- The browse session persists cookies between commands, so imported cookies work immediately
diff --git a/.agents/skills/gstack-ship/SKILL.md b/.agents/skills/gstack-ship/SKILL.md
new file mode 100644
index 0000000..9a8bffb
--- /dev/null
+++ b/.agents/skills/gstack-ship/SKILL.md
@@ -0,0 +1,495 @@
+---
+name: gstack-ship
+description: |
+  Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, update CHANGELOG, commit, push, create PR.
+---
+<!-- AUTO-GENERATED from ship/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+## Step 0: Detect base branch
+
+Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
+
+1. Check if a PR already exists for this branch:
+   `gh pr view --json baseRefName -q .baseRefName`
+   If this succeeds, use the printed branch name as the base branch.
+
+2. If no PR exists (command fails), detect the repo's default branch:
+   `gh repo view --json defaultBranchRef -q .defaultBranchRef.name`
+
+3. If both commands fail, fall back to `main`.
+
+Print the detected base branch name. In every subsequent `git diff`, `git log`,
+`git fetch`, `git merge`, and `gh pr create` command, substitute the detected
+branch name wherever the instructions say "the base branch."
+
+---
+
+# Ship: Fully Automated Ship Workflow
+
+You are running the `$gstack-ship` workflow. This is a **non-interactive, fully automated** workflow. Do NOT ask for confirmation at any step. The user said `$gstack-ship` which means DO IT. Run straight through and output the PR URL at the end.
+
+**Only stop for:**
+- On the base branch (abort)
+- Merge conflicts that can't be auto-resolved (stop, show conflicts)
+- Test failures (stop, show failures)
+- Pre-landing review finds ASK items that need user judgment
+- MINOR or MAJOR version bump needed (ask — see Step 4)
+- Greptile review comments that need user decision (complex fixes, false positives)
+- TODOS.md missing and user wants to create one (ask — see Step 5.5)
+- TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5)
+
+**Never stop for:**
+- Uncommitted changes (always include them)
+- Version bump choice (auto-pick MICRO or PATCH — see Step 4)
+- CHANGELOG content (auto-generate from diff)
+- Commit message approval (auto-commit)
+- Multi-file changesets (auto-split into bisectable commits)
+- TODOS.md completed-item detection (auto-mark)
+- Auto-fixable review findings (dead code, N+1, stale comments — fixed automatically)
+
+---
+
+## Step 1: Pre-flight
+
+1. Check the current branch. If on the base branch or the repo's default branch, **abort**: "You're on the base branch. Ship from a feature branch."
+
+2. Run `git status` (never use `-uall`). Uncommitted changes are always included — no need to ask.
+
+3. Run `git diff <base>...HEAD --stat` and `git log <base>..HEAD --oneline` to understand what's being shipped.
+
+---
+
+## Step 2: Merge the base branch (BEFORE tests)
+
+Fetch and merge the base branch into the feature branch so tests run against the merged state:
+
+```bash
+git fetch origin <base> && git merge origin/<base> --no-edit
+```
+
+**If there are merge conflicts:** Try to auto-resolve if they are simple (VERSION, schema.rb, CHANGELOG ordering). If conflicts are complex or ambiguous, **STOP** and show them.
+
+**If already up to date:** Continue silently.
+
+---
+
+## Step 3: Run tests (on merged code)
+
+**Do NOT run `RAILS_ENV=test bin/rails db:migrate`** — `bin/test-lane` already calls
+`db:test:prepare` internally, which loads the schema into the correct lane database.
+Running bare test migrations without INSTANCE hits an orphan DB and corrupts structure.sql.
+
+Run both test suites in parallel:
+
+```bash
+bin/test-lane 2>&1 | tee /tmp/ship_tests.txt &
+npm run test 2>&1 | tee /tmp/ship_vitest.txt &
+wait
+```
+
+After both complete, read the output files and check pass/fail.
+
+**If any test fails:** Show the failures and **STOP**. Do not proceed.
+
+**If all pass:** Continue silently — just note the counts briefly.
+
+---
+
+## Step 3.25: Eval Suites (conditional)
+
+Evals are mandatory when prompt-related files change. Skip this step entirely if no prompt files are in the diff.
+
+**1. Check if the diff touches prompt-related files:**
+
+```bash
+git diff origin/<base> --name-only
+```
+
+Match against these patterns (from CLAUDE.md):
+- `app/services/*_prompt_builder.rb`
+- `app/services/*_generation_service.rb`, `*_writer_service.rb`, `*_designer_service.rb`
+- `app/services/*_evaluator.rb`, `*_scorer.rb`, `*_classifier_service.rb`, `*_analyzer.rb`
+- `app/services/concerns/*voice*.rb`, `*writing*.rb`, `*prompt*.rb`, `*token*.rb`
+- `app/services/chat_tools/*.rb`, `app/services/x_thread_tools/*.rb`
+- `config/system_prompts/*.txt`
+- `test/evals/**/*` (eval infrastructure changes affect all suites)
+
+**If no matches:** Print "No prompt-related files changed — skipping evals." and continue to Step 3.5.
+
+**2. Identify affected eval suites:**
+
+Each eval runner (`test/evals/*_eval_runner.rb`) declares `PROMPT_SOURCE_FILES` listing which source files affect it. Grep these to find which suites match the changed files:
+
+```bash
+grep -l "changed_file_basename" test/evals/*_eval_runner.rb
+```
+
+Map runner → test file: `post_generation_eval_runner.rb` → `post_generation_eval_test.rb`.
+
+**Special cases:**
+- Changes to `test/evals/judges/*.rb`, `test/evals/support/*.rb`, or `test/evals/fixtures/` affect ALL suites that use those judges/support files. Check imports in the eval test files to determine which.
+- Changes to `config/system_prompts/*.txt` — grep eval runners for the prompt filename to find affected suites.
+- If unsure which suites are affected, run ALL suites that could plausibly be impacted. Over-testing is better than missing a regression.
+
+**3. Run affected suites at `EVAL_JUDGE_TIER=full`:**
+
+`$gstack-ship` is a pre-merge gate, so always use full tier (Sonnet structural + Opus persona judges).
+
+```bash
+EVAL_JUDGE_TIER=full EVAL_VERBOSE=1 bin/test-lane --eval test/evals/<suite>_eval_test.rb 2>&1 | tee /tmp/ship_evals.txt
+```
+
+If multiple suites need to run, run them sequentially (each needs a test lane). If the first suite fails, stop immediately — don't burn API cost on remaining suites.
+
+**4. Check results:**
+
+- **If any eval fails:** Show the failures, the cost dashboard, and **STOP**. Do not proceed.
+- **If all pass:** Note pass counts and cost. Continue to Step 3.5.
+
+**5. Save eval output** — include eval results and cost dashboard in the PR body (Step 8).
+
+**Tier reference (for context — $gstack-ship always uses `full`):**
+| Tier | When | Speed (cached) | Cost |
+|------|------|----------------|------|
+| `fast` (Haiku) | Dev iteration, smoke tests | ~5s (14x faster) | ~$0.07/run |
+| `standard` (Sonnet) | Default dev, `bin/test-lane --eval` | ~17s (4x faster) | ~$0.37/run |
+| `full` (Opus persona) | **`$gstack-ship` and pre-merge** | ~72s (baseline) | ~$1.27/run |
+
+---
+
+## Step 3.5: Pre-Landing Review
+
+Review the diff for structural issues that tests don't catch.
+
+1. Read `$HOME/.agents/skills/gstack/review/checklist.md`. If the file cannot be read, **STOP** and report the error.
+
+2. Run `git diff origin/<base>` to get the full diff (scoped to feature changes against the freshly-fetched base branch).
+
+3. Apply the review checklist in two passes:
+   - **Pass 1 (CRITICAL):** SQL & Data Safety, LLM Output Trust Boundary
+   - **Pass 2 (INFORMATIONAL):** All remaining categories
+
+4. **Classify each finding as AUTO-FIX or ASK** per the Fix-First Heuristic in
+   checklist.md. Critical findings lean toward ASK; informational lean toward AUTO-FIX.
+
+5. **Auto-fix all AUTO-FIX items.** Apply each fix. Output one line per fix:
+   `[AUTO-FIXED] [file:line] Problem → what you did`
+
+6. **If ASK items remain,** present them in ONE AskUserQuestion:
+   - List each with number, severity, problem, recommended fix
+   - Per-item options: A) Fix  B) Skip
+   - Overall RECOMMENDATION
+   - If 3 or fewer ASK items, you may use individual AskUserQuestion calls instead
+
+7. **After all fixes (auto + user-approved):**
+   - If ANY fixes were applied: commit fixed files by name (`git add <fixed-files> && git commit -m "fix: pre-landing review fixes"`), then **STOP** and tell the user to run `$gstack-ship` again to re-test.
+   - If no fixes applied (all ASK items skipped, or no issues found): continue to Step 4.
+
+8. Output summary: `Pre-Landing Review: N issues — M auto-fixed, K asked (J fixed, L skipped)`
+
+   If no issues found: `Pre-Landing Review: No issues found.`
+
+Save the review output — it goes into the PR body in Step 8.
+
+---
+
+## Step 3.75: Address Greptile review comments (if PR exists)
+
+Read `$HOME/.agents/skills/gstack/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
+
+**If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4.
+
+**If Greptile comments are found:**
+
+Include a Greptile summary in your output: `+ N Greptile comments (X valid, Y fixed, Z FP)`
+
+Before replying to any comment, run the **Escalation Detection** algorithm from greptile-triage.md to determine whether to use Tier 1 (friendly) or Tier 2 (firm) reply templates.
+
+For each classified comment:
+
+**VALID & ACTIONABLE:** Use AskUserQuestion with:
+- The comment (file:line or [top-level] + body summary + permalink URL)
+- `RECOMMENDATION: Choose A because [one-line reason]`
+- Options: A) Fix now, B) Acknowledge and ship anyway, C) It's a false positive
+- If user chooses A: apply the fix, commit the fixed files (`git add <fixed-files> && git commit -m "fix: address Greptile review — <brief description>"`), reply using the **Fix reply template** from greptile-triage.md (include inline diff + explanation), and save to both per-project and global greptile-history (type: fix).
+- If user chooses C: reply using the **False Positive reply template** from greptile-triage.md (include evidence + suggested re-rank), save to both per-project and global greptile-history (type: fp).
+
+**VALID BUT ALREADY FIXED:** Reply using the **Already Fixed reply template** from greptile-triage.md — no AskUserQuestion needed:
+- Include what was done and the fixing commit SHA
+- Save to both per-project and global greptile-history (type: already-fixed)
+
+**FALSE POSITIVE:** Use AskUserQuestion:
+- Show the comment and why you think it's wrong (file:line or [top-level] + body summary + permalink URL)
+- Options:
+  - A) Reply to Greptile explaining the false positive (recommended if clearly wrong)
+  - B) Fix it anyway (if trivial)
+  - C) Ignore silently
+- If user chooses A: reply using the **False Positive reply template** from greptile-triage.md (include evidence + suggested re-rank), save to both per-project and global greptile-history (type: fp)
+
+**SUPPRESSED:** Skip silently — these are known false positives from previous triage.
+
+**After all comments are resolved:** If any fixes were applied, the tests from Step 3 are now stale. **Re-run tests** (Step 3) before continuing to Step 4. If no fixes were applied, continue to Step 4.
+
+---
+
+## Step 4: Version bump (auto-decide)
+
+1. Read the current `VERSION` file (4-digit format: `MAJOR.MINOR.PATCH.MICRO`)
+
+2. **Auto-decide the bump level based on the diff:**
+   - Count lines changed (`git diff origin/<base>...HEAD --stat | tail -1`)
+   - **MICRO** (4th digit): < 50 lines changed, trivial tweaks, typos, config
+   - **PATCH** (3rd digit): 50+ lines changed, bug fixes, small-medium features
+   - **MINOR** (2nd digit): **ASK the user** — only for major features or significant architectural changes
+   - **MAJOR** (1st digit): **ASK the user** — only for milestones or breaking changes
+
+3. Compute the new version:
+   - Bumping a digit resets all digits to its right to 0
+   - Example: `0.19.1.0` + PATCH → `0.19.2.0`
+
+4. Write the new version to the `VERSION` file.
+
+---
+
+## Step 5: CHANGELOG (auto-generate)
+
+1. Read `CHANGELOG.md` header to know the format.
+
+2. Auto-generate the entry from **ALL commits on the branch** (not just recent ones):
+   - Use `git log <base>..HEAD --oneline` to see every commit being shipped
+   - Use `git diff <base>...HEAD` to see the full diff against the base branch
+   - The CHANGELOG entry must be comprehensive of ALL changes going into the PR
+   - If existing CHANGELOG entries on the branch already cover some commits, replace them with one unified entry for the new version
+   - Categorize changes into applicable sections:
+     - `### Added` — new features
+     - `### Changed` — changes to existing functionality
+     - `### Fixed` — bug fixes
+     - `### Removed` — removed features
+   - Write concise, descriptive bullet points
+   - Insert after the file header (line 5), dated today
+   - Format: `## [X.Y.Z.W] - YYYY-MM-DD`
+
+**Do NOT ask the user to describe changes.** Infer from the diff and commit history.
+
+---
+
+## Step 5.5: TODOS.md (auto-update)
+
+Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
+
+Read `$HOME/.agents/skills/gstack/review/TODOS-format.md` for the canonical format reference.
+
+**1. Check if TODOS.md exists** in the repository root.
+
+**If TODOS.md does not exist:** Use AskUserQuestion:
+- Message: "GStack recommends maintaining a TODOS.md organized by skill/component, then priority (P0 at top through P4, then Completed at bottom). See TODOS-format.md for the full format. Would you like to create one?"
+- Options: A) Create it now, B) Skip for now
+- If A: Create `TODOS.md` with a skeleton (# TODOS heading + ## Completed section). Continue to step 3.
+- If B: Skip the rest of Step 5.5. Continue to Step 6.
+
+**2. Check structure and organization:**
+
+Read TODOS.md and verify it follows the recommended structure:
+- Items grouped under `## <Skill/Component>` headings
+- Each item has `**Priority:**` field with P0-P4 value
+- A `## Completed` section at the bottom
+
+**If disorganized** (missing priority fields, no component groupings, no Completed section): Use AskUserQuestion:
+- Message: "TODOS.md doesn't follow the recommended structure (skill/component groupings, P0-P4 priority, Completed section). Would you like to reorganize it?"
+- Options: A) Reorganize now (recommended), B) Leave as-is
+- If A: Reorganize in-place following TODOS-format.md. Preserve all content — only restructure, never delete items.
+- If B: Continue to step 3 without restructuring.
+
+**3. Detect completed TODOs:**
+
+This step is fully automatic — no user interaction.
+
+Use the diff and commit history already gathered in earlier steps:
+- `git diff <base>...HEAD` (full diff against the base branch)
+- `git log <base>..HEAD --oneline` (all commits being shipped)
+
+For each TODO item, check if the changes in this PR complete it by:
+- Matching commit messages against the TODO title and description
+- Checking if files referenced in the TODO appear in the diff
+- Checking if the TODO's described work matches the functional changes
+
+**Be conservative:** Only mark a TODO as completed if there is clear evidence in the diff. If uncertain, leave it alone.
+
+**4. Move completed items** to the `## Completed` section at the bottom. Append: `**Completed:** vX.Y.Z (YYYY-MM-DD)`
+
+**5. Output summary:**
+- `TODOS.md: N items marked complete (item1, item2, ...). M items remaining.`
+- Or: `TODOS.md: No completed items detected. M items remaining.`
+- Or: `TODOS.md: Created.` / `TODOS.md: Reorganized.`
+
+**6. Defensive:** If TODOS.md cannot be written (permission error, disk full), warn the user and continue. Never stop the ship workflow for a TODOS failure.
+
+Save this summary — it goes into the PR body in Step 8.
+
+---
+
+## Step 6: Commit (bisectable chunks)
+
+**Goal:** Create small, logical commits that work well with `git bisect` and help LLMs understand what changed.
+
+1. Analyze the diff and group changes into logical commits. Each commit should represent **one coherent change** — not one file, but one logical unit.
+
+2. **Commit ordering** (earlier commits first):
+   - **Infrastructure:** migrations, config changes, route additions
+   - **Models & services:** new models, services, concerns (with their tests)
+   - **Controllers & views:** controllers, views, JS/React components (with their tests)
+   - **VERSION + CHANGELOG + TODOS.md:** always in the final commit
+
+3. **Rules for splitting:**
+   - A model and its test file go in the same commit
+   - A service and its test file go in the same commit
+   - A controller, its views, and its test go in the same commit
+   - Migrations are their own commit (or grouped with the model they support)
+   - Config/route changes can group with the feature they enable
+   - If the total diff is small (< 50 lines across < 4 files), a single commit is fine
+
+4. **Each commit must be independently valid** — no broken imports, no references to code that doesn't exist yet. Order commits so dependencies come first.
+
+5. Compose each commit message:
+   - First line: `<type>: <summary>` (type = feat/fix/chore/refactor/docs)
+   - Body: brief description of what this commit contains
+   - Only the **final commit** (VERSION + CHANGELOG) gets the version tag and co-author trailer:
+
+```bash
+git commit -m "$(cat <<'EOF'
+chore: bump version and changelog (vX.Y.Z.W)
+
+Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Step 7: Push
+
+Push to the remote with upstream tracking:
+
+```bash
+git push -u origin <branch-name>
+```
+
+---
+
+## Step 8: Create PR
+
+Create a pull request using `gh`:
+
+```bash
+gh pr create --base <base> --title "<type>: <summary>" --body "$(cat <<'EOF'
+## Summary
+<bullet points from CHANGELOG>
+
+## Pre-Landing Review
+<findings from Step 3.5, or "No issues found.">
+
+## Eval Results
+<If evals ran: suite names, pass/fail counts, cost dashboard summary. If skipped: "No prompt-related files changed — evals skipped.">
+
+## Greptile Review
+<If Greptile comments were found: bullet list with [FIXED] / [FALSE POSITIVE] / [ALREADY FIXED] tag + one-line summary per comment>
+<If no Greptile comments found: "No Greptile comments.">
+<If no PR existed during Step 3.75: omit this section entirely>
+
+## TODOS
+<If items marked complete: bullet list of completed items with version>
+<If no items completed: "No TODO items completed in this PR.">
+<If TODOS.md created or reorganized: note that>
+<If TODOS.md doesn't exist and user skipped: omit this section>
+
+## Test plan
+- [x] All Rails tests pass (N runs, 0 failures)
+- [x] All Vitest tests pass (N tests)
+
+🤖 Generated with [Claude Code](https://claude.com/claude-code)
+EOF
+)"
+```
+
+**Output the PR URL** — this should be the final output the user sees.
+
+---
+
+## Important Rules
+
+- **Never skip tests.** If tests fail, stop.
+- **Never skip the pre-landing review.** If checklist.md is unreadable, stop.
+- **Never force push.** Use regular `git push` only.
+- **Never ask for confirmation** except for MINOR/MAJOR version bumps and pre-landing review ASK items (batched into at most one AskUserQuestion).
+- **Always use the 4-digit version format** from the VERSION file.
+- **Date format in CHANGELOG:** `YYYY-MM-DD`
+- **Split commits for bisectability** — each commit = one logical change.
+- **TODOS.md completion detection must be conservative.** Only mark items as completed when the diff clearly shows the work is done.
+- **Use Greptile reply templates from greptile-triage.md.** Every reply includes evidence (inline diff, code references, re-rank suggestion). Never post vague replies.
+- **The goal is: user says `$gstack-ship`, next thing they see is the review + PR URL.**
diff --git a/.agents/skills/gstack-upgrade/SKILL.md b/.agents/skills/gstack-upgrade/SKILL.md
new file mode 100644
index 0000000..80d6d8b
--- /dev/null
+++ b/.agents/skills/gstack-upgrade/SKILL.md
@@ -0,0 +1,195 @@
+---
+name: gstack-upgrade
+description: |
+  Upgrade gstack to the latest version. Detects global vs vendored install,
+  runs the upgrade, and shows what's new.
+---
+<!-- AUTO-GENERATED from gstack-upgrade/SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+# $gstack-upgrade
+
+Upgrade gstack to the latest version and show what's new.
+
+## Inline upgrade flow
+
+This section is referenced by all skill preambles when they detect `UPGRADE_AVAILABLE`.
+
+### Step 1: Ask the user (or auto-upgrade)
+
+First, check if auto-upgrade is enabled:
+```bash
+_AUTO=""
+[ "${GSTACK_AUTO_UPGRADE:-}" = "1" ] && _AUTO="true"
+[ -z "$_AUTO" ] && _AUTO=$($HOME/.agents/skills/gstack/bin/gstack-config get auto_upgrade 2>/dev/null || true)
+echo "AUTO_UPGRADE=$_AUTO"
+```
+
+**If `AUTO_UPGRADE=true` or `AUTO_UPGRADE=1`:** Skip AskUserQuestion. Log "Auto-upgrading gstack v{old} → v{new}..." and proceed directly to Step 2. If `./setup` fails during auto-upgrade, restore from backup (`.bak` directory) and warn the user: "Auto-upgrade failed — restored previous version. Run `$gstack-upgrade` manually to retry."
+
+**Otherwise**, use AskUserQuestion:
+- Question: "gstack **v{new}** is available (you're on v{old}). Upgrade now?"
+- Options: ["Yes, upgrade now", "Always keep me up to date", "Not now", "Never ask again"]
+
+**If "Yes, upgrade now":** Proceed to Step 2.
+
+**If "Always keep me up to date":**
+```bash
+$HOME/.agents/skills/gstack/bin/gstack-config set auto_upgrade true
+```
+Tell user: "Auto-upgrade enabled. Future updates will install automatically." Then proceed to Step 2.
+
+**If "Not now":** Write snooze state with escalating backoff (first snooze = 24h, second = 48h, third+ = 1 week), then continue with the current skill. Do not mention the upgrade again.
+```bash
+_SNOOZE_FILE=~/.gstack/update-snoozed
+_REMOTE_VER="{new}"
+_CUR_LEVEL=0
+if [ -f "$_SNOOZE_FILE" ]; then
+  _SNOOZED_VER=$(awk '{print $1}' "$_SNOOZE_FILE")
+  if [ "$_SNOOZED_VER" = "$_REMOTE_VER" ]; then
+    _CUR_LEVEL=$(awk '{print $2}' "$_SNOOZE_FILE")
+    case "$_CUR_LEVEL" in *[!0-9]*) _CUR_LEVEL=0 ;; esac
+  fi
+fi
+_NEW_LEVEL=$((_CUR_LEVEL + 1))
+[ "$_NEW_LEVEL" -gt 3 ] && _NEW_LEVEL=3
+echo "$_REMOTE_VER $_NEW_LEVEL $(date +%s)" > "$_SNOOZE_FILE"
+```
+Note: `{new}` is the remote version from the `UPGRADE_AVAILABLE` output — substitute it from the update check result.
+
+Tell user the snooze duration: "Next reminder in 24h" (or 48h or 1 week, depending on level). Tip: "Set `auto_upgrade: true` in `~/.gstack/config.yaml` for automatic upgrades."
+
+**If "Never ask again":**
+```bash
+$HOME/.agents/skills/gstack/bin/gstack-config set update_check false
+```
+Tell user: "Update checks disabled. Run `$HOME/.agents/skills/gstack/bin/gstack-config set update_check true` to re-enable."
+Continue with the current skill.
+
+### Step 2: Detect install type
+
+```bash
+if [ -d "$HOME/.agents/skills/gstack/.git" ]; then
+  INSTALL_TYPE="global-git"
+  INSTALL_DIR="$HOME/.agents/skills/gstack"
+elif [ -d ".agents/skills/gstack/.git" ]; then
+  INSTALL_TYPE="local-git"
+  INSTALL_DIR=".agents/skills/gstack"
+elif [ -d ".agents/skills/gstack" ]; then
+  INSTALL_TYPE="vendored"
+  INSTALL_DIR=".agents/skills/gstack"
+elif [ -d "$HOME/.agents/skills/gstack" ]; then
+  INSTALL_TYPE="vendored-global"
+  INSTALL_DIR="$HOME/.agents/skills/gstack"
+else
+  echo "ERROR: gstack not found"
+  exit 1
+fi
+echo "Install type: $INSTALL_TYPE at $INSTALL_DIR"
+```
+
+The install type and directory path printed above will be used in all subsequent steps.
+
+### Step 3: Save old version
+
+Use the install directory from Step 2's output below:
+
+```bash
+OLD_VERSION=$(cat "$INSTALL_DIR/VERSION" 2>/dev/null || echo "unknown")
+```
+
+### Step 4: Upgrade
+
+Use the install type and directory detected in Step 2:
+
+**For git installs** (global-git, local-git):
+```bash
+cd "$INSTALL_DIR"
+STASH_OUTPUT=$(git stash 2>&1)
+git fetch origin
+git reset --hard origin/main
+./setup
+```
+If `$STASH_OUTPUT` contains "Saved working directory", warn the user: "Note: local changes were stashed. Run `git stash pop` in the skill directory to restore them."
+
+**For vendored installs** (vendored, vendored-global):
+```bash
+PARENT=$(dirname "$INSTALL_DIR")
+TMP_DIR=$(mktemp -d)
+git clone --depth 1 https://github.com/garrytan/gstack.git "$TMP_DIR/gstack"
+mv "$INSTALL_DIR" "$INSTALL_DIR.bak"
+mv "$TMP_DIR/gstack" "$INSTALL_DIR"
+cd "$INSTALL_DIR" && ./setup
+rm -rf "$INSTALL_DIR.bak" "$TMP_DIR"
+```
+
+### Step 4.5: Sync local vendored copy
+
+Use the install directory from Step 2. Check if there's also a local vendored copy that needs updating:
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+LOCAL_GSTACK=""
+if [ -n "$_ROOT" ] && [ -d "$_ROOT/.agents/skills/gstack" ]; then
+  _RESOLVED_LOCAL=$(cd "$_ROOT/.agents/skills/gstack" && pwd -P)
+  _RESOLVED_PRIMARY=$(cd "$INSTALL_DIR" && pwd -P)
+  if [ "$_RESOLVED_LOCAL" != "$_RESOLVED_PRIMARY" ]; then
+    LOCAL_GSTACK="$_ROOT/.agents/skills/gstack"
+  fi
+fi
+echo "LOCAL_GSTACK=$LOCAL_GSTACK"
+```
+
+If `LOCAL_GSTACK` is non-empty, update it by copying from the freshly-upgraded primary install (same approach as README vendored install):
+```bash
+mv "$LOCAL_GSTACK" "$LOCAL_GSTACK.bak"
+cp -Rf "$INSTALL_DIR" "$LOCAL_GSTACK"
+rm -rf "$LOCAL_GSTACK/.git"
+cd "$LOCAL_GSTACK" && ./setup
+rm -rf "$LOCAL_GSTACK.bak"
+```
+Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit `.agents/skills/gstack/` when you're ready."
+
+### Step 5: Write marker + clear cache
+
+```bash
+mkdir -p ~/.gstack
+echo "$OLD_VERSION" > ~/.gstack/just-upgraded-from
+rm -f ~/.gstack/last-update-check
+rm -f ~/.gstack/update-snoozed
+```
+
+### Step 6: Show What's New
+
+Read `$INSTALL_DIR/CHANGELOG.md`. Find all version entries between the old version and the new version. Summarize as 5-7 bullets grouped by theme. Don't overwhelm — focus on user-facing changes. Skip internal refactors unless they're significant.
+
+Format:
+```
+gstack v{new} — upgraded from v{old}!
+
+What's new:
+- [bullet 1]
+- [bullet 2]
+- ...
+
+Happy shipping!
+```
+
+### Step 7: Continue
+
+After showing What's New, continue with whatever skill the user originally invoked. The upgrade is done — no further action needed.
+
+---
+
+## Standalone usage
+
+When invoked directly as `$gstack-upgrade` (not from a preamble):
+
+1. Force a fresh update check (bypass cache):
+```bash
+$HOME/.agents/skills/gstack/bin/gstack-update-check --force
+```
+Use the output to determine if an upgrade is available.
+
+2. If `UPGRADE_AVAILABLE <old> <new>`: follow Steps 2-6 above.
+3. If no output (up to date): tell the user "You're already on the latest version (v{version})."
diff --git a/.agents/skills/gstack/CHANGELOG.md b/.agents/skills/gstack/CHANGELOG.md
new file mode 120000
index 0000000..79b747a
--- /dev/null
+++ b/.agents/skills/gstack/CHANGELOG.md
@@ -0,0 +1 @@
+../../../CHANGELOG.md
\ No newline at end of file
diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md
new file mode 100644
index 0000000..a262f71
--- /dev/null
+++ b/.agents/skills/gstack/SKILL.md
@@ -0,0 +1,439 @@
+---
+name: gstack
+description: |
+  Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with
+  elements, verify page state, diff before/after actions, take annotated screenshots, check
+  responsive layouts, test forms and uploads, handle dialogs, and assert element states.
+  ~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a
+  user flow, or file a bug with evidence.
+---
+<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
+<!-- Regenerate: bun run gen:skill-docs --host codex -->
+
+## Preamble (run first)
+
+```bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+```
+
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]`
+4. **Options:** Lettered options: `A) ... B) ... C) ...`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+```
+# {Title}
+
+Hey gstack team — ran into this while using $gstack-{skill-name}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+```
+{paste the actual error or unexpected output here}
+```
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** $gstack-{skill}
+```
+
+Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"
+
+# gstack browse: QA Testing & Dogfooding
+
+Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.
+Auto-shuts down after 30 min idle. State persists between calls (cookies, tabs, sessions).
+
+## SETUP (run this check BEFORE any browse command)
+
+```bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+```
+
+If `NEEDS_SETUP`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: `cd <SKILL_DIR> && ./setup --host codex`
+3. If `bun` is not installed: `curl -fsSL https://bun.sh/install | bash`
+
+## IMPORTANT
+
+- Use the compiled binary via Bash: `$B <command>`
+- NEVER use `mcp__claude-in-chrome__*` tools. They are slow and unreliable.
+- Browser persists between calls — cookies, login sessions, and tabs carry over.
+- Dialogs (alert/confirm/prompt) are auto-accepted by default — no browser lockup.
+
+## QA Workflows
+
+### Test a user flow (login, signup, checkout, etc.)
+
+```bash
+# 1. Go to the page
+$B goto https://app.example.com/login
+
+# 2. See what's interactive
+$B snapshot -i
+
+# 3. Fill the form using refs
+$B fill @e3 "test@example.com"
+$B fill @e4 "password123"
+$B click @e5
+
+# 4. Verify it worked
+$B snapshot -D              # diff shows what changed after clicking
+$B is visible ".dashboard"  # assert the dashboard appeared
+$B screenshot /tmp/after-login.png
+```
+
+### Verify a deployment / check prod
+
+```bash
+$B goto https://yourapp.com
+$B text                          # read the page — does it load?
+$B console                       # any JS errors?
+$B network                       # any failed requests?
+$B js "document.title"           # correct title?
+$B is visible ".hero-section"    # key elements present?
+$B screenshot /tmp/prod-check.png
+```
+
+### Dogfood a feature end-to-end
+
+```bash
+# Navigate to the feature
+$B goto https://app.example.com/new-feature
+
+# Take annotated screenshot — shows every interactive element with labels
+$B snapshot -i -a -o /tmp/feature-annotated.png
+
+# Find ALL clickable things (including divs with cursor:pointer)
+$B snapshot -C
+
+# Walk through the flow
+$B snapshot -i          # baseline
+$B click @e3            # interact
+$B snapshot -D          # what changed? (unified diff)
+
+# Check element states
+$B is visible ".success-toast"
+$B is enabled "#next-step-btn"
+$B is checked "#agree-checkbox"
+
+# Check console for errors after interactions
+$B console
+```
+
+### Test responsive layouts
+
+```bash
+# Quick: 3 screenshots at mobile/tablet/desktop
+$B goto https://yourapp.com
+$B responsive /tmp/layout
+
+# Manual: specific viewport
+$B viewport 375x812     # iPhone
+$B screenshot /tmp/mobile.png
+$B viewport 1440x900    # Desktop
+$B screenshot /tmp/desktop.png
+
+# Element screenshot (crop to specific element)
+$B screenshot "#hero-banner" /tmp/hero.png
+$B snapshot -i
+$B screenshot @e3 /tmp/button.png
+
+# Region crop
+$B screenshot --clip 0,0,800,600 /tmp/above-fold.png
+
+# Viewport only (no scroll)
+$B screenshot --viewport /tmp/viewport.png
+```
+
+### Test file upload
+
+```bash
+$B goto https://app.example.com/upload
+$B snapshot -i
+$B upload @e3 /path/to/test-file.pdf
+$B is visible ".upload-success"
+$B screenshot /tmp/upload-result.png
+```
+
+### Test forms with validation
+
+```bash
+$B goto https://app.example.com/form
+$B snapshot -i
+
+# Submit empty — check validation errors appear
+$B click @e10                        # submit button
+$B snapshot -D                       # diff shows error messages appeared
+$B is visible ".error-message"
+
+# Fill and resubmit
+$B fill @e3 "valid input"
+$B click @e10
+$B snapshot -D                       # diff shows errors gone, success state
+```
+
+### Test dialogs (delete confirmations, prompts)
+
+```bash
+# Set up dialog handling BEFORE triggering
+$B dialog-accept              # will auto-accept next alert/confirm
+$B click "#delete-button"     # triggers confirmation dialog
+$B dialog                     # see what dialog appeared
+$B snapshot -D                # verify the item was deleted
+
+# For prompts that need input
+$B dialog-accept "my answer"  # accept with text
+$B click "#rename-button"     # triggers prompt
+```
+
+### Test authenticated pages (import real browser cookies)
+
+```bash
+# Import cookies from your real browser (opens interactive picker)
+$B cookie-import-browser
+
+# Or import a specific domain directly
+$B cookie-import-browser comet --domain .github.com
+
+# Now test authenticated pages
+$B goto https://github.com/settings/profile
+$B snapshot -i
+$B screenshot /tmp/github-profile.png
+```
+
+### Compare two pages / environments
+
+```bash
+$B diff https://staging.app.com https://prod.app.com
+```
+
+### Multi-step chain (efficient for long flows)
+
+```bash
+echo '[
+  ["goto","https://app.example.com"],
+  ["snapshot","-i"],
+  ["fill","@e3","test@test.com"],
+  ["fill","@e4","password"],
+  ["click","@e5"],
+  ["snapshot","-D"],
+  ["screenshot","/tmp/result.png"]
+]' | $B chain
+```
+
+## Quick Assertion Patterns
+
+```bash
+# Element exists and is visible
+$B is visible ".modal"
+
+# Button is enabled/disabled
+$B is enabled "#submit-btn"
+$B is disabled "#submit-btn"
+
+# Checkbox state
+$B is checked "#agree"
+
+# Input is editable
+$B is editable "#name-field"
+
+# Element has focus
+$B is focused "#search-input"
+
+# Page contains text
+$B js "document.body.textContent.includes('Success')"
+
+# Element count
+$B js "document.querySelectorAll('.list-item').length"
+
+# Specific attribute value
+$B attrs "#logo"    # returns all attributes as JSON
+
+# CSS property
+$B css ".button" "background-color"
+```
+
+## Snapshot System
+
+The snapshot is your primary tool for understanding and interacting with pages.
+
+```
+-i        --interactive           Interactive elements only (buttons, links, inputs) with @e refs
+-c        --compact               Compact (no empty structural nodes)
+-d <N>    --depth                 Limit tree depth (0 = root only, default: unlimited)
+-s <sel>  --selector              Scope to CSS selector
+-D        --diff                  Unified diff against previous snapshot (first call stores baseline)
+-a        --annotate              Annotated screenshot with red overlay boxes and ref labels
+-o <path> --output                Output path for annotated screenshot (default: /tmp/browse-annotated.png)
+-C        --cursor-interactive    Cursor-interactive elements (@c refs — divs with pointer, onclick)
+```
+
+All flags can be combined freely. `-o` only applies when `-a` is also used.
+Example: `$B snapshot -i -a -C -o /tmp/annotated.png`
+
+**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.
+@c refs from `-C` are numbered separately (@c1, @c2, ...).
+
+After snapshot, use @refs as selectors in any command:
+```bash
+$B click @e3       $B fill @e4 "value"     $B hover @e1
+$B html @e2        $B css @e5 "color"      $B attrs @e6
+$B click @c1       # cursor-interactive ref (from -C)
+```
+
+**Output format:** indented accessibility tree with @ref IDs, one element per line.
+```
+  @e1 [heading] "Welcome" [level=1]
+  @e2 [textbox] "Email"
+  @e3 [button] "Submit"
+```
+
+Refs are invalidated on navigation — run `snapshot` again after `goto`.
+
+## Command Reference
+
+### Navigation
+| Command | Description |
+|---------|-------------|
+| `back` | History back |
+| `forward` | History forward |
+| `goto <url>` | Navigate to URL |
+| `reload` | Reload page |
+| `url` | Print current URL |
+
+### Reading
+| Command | Description |
+|---------|-------------|
+| `accessibility` | Full ARIA tree |
+| `forms` | Form fields as JSON |
+| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given |
+| `links` | All links as "text → href" |
+| `text` | Cleaned page text |
+
+### Interaction
+| Command | Description |
+|---------|-------------|
+| `click <sel>` | Click element |
+| `cookie <name>=<value>` | Set cookie on current page domain |
+| `cookie-import <json>` | Import cookies from JSON file |
+| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
+| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
+| `dialog-dismiss` | Auto-dismiss next dialog |
+| `fill <sel> <val>` | Fill input |
+| `header <name>:<value>` | Set custom request header (colon-separated, sensitive values auto-redacted) |
+| `hover <sel>` | Hover element |
+| `press <key>` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter |
+| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector |
+| `select <sel> <val>` | Select dropdown option by value, label, or visible text |
+| `type <text>` | Type into focused element |
+| `upload <sel> <file> [file2...]` | Upload file(s) |
+| `useragent <string>` | Set user agent |
+| `viewport <WxH>` | Set viewport size |
+| `wait <sel|--networkidle|--load>` | Wait for element, network idle, or page load (timeout: 15s) |
+
+### Inspection
+| Command | Description |
+|---------|-------------|
+| `attrs <sel|@ref>` | Element attributes as JSON |
+| `console [--clear|--errors]` | Console messages (--errors filters to error/warning) |
+| `cookies` | All cookies as JSON |
+| `css <sel> <prop>` | Computed CSS value |
+| `dialog [--clear]` | Dialog messages |
+| `eval <file>` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) |
+| `is <prop> <sel>` | State check (visible/hidden/enabled/disabled/checked/editable/focused) |
+| `js <expr>` | Run JavaScript expression and return result as string |
+| `network [--clear]` | Network requests |
+| `perf` | Page load timings |
+| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage |
+
+### Visual
+| Command | Description |
+|---------|-------------|
+| `diff <url1> <url2>` | Text diff between pages |
+| `pdf [path]` | Save as PDF |
+| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. |
+| `screenshot [--viewport] [--clip x,y,w,h] [selector|@ref] [path]` | Save screenshot (supports element crop via CSS/@ref, --clip region, --viewport) |
+
+### Snapshot
+| Command | Description |
+|---------|-------------|
+| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs |
+
+### Meta
+| Command | Description |
+|---------|-------------|
+| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] |
+
+### Tabs
+| Command | Description |
+|---------|-------------|
+| `closetab [id]` | Close tab |
+| `newtab [url]` | Open new tab |
+| `tab <id>` | Switch to tab |
+| `tabs` | List open tabs |
+
+### Server
+| Command | Description |
+|---------|-------------|
+| `restart` | Restart server |
+| `status` | Health check |
+| `stop` | Shutdown server |
+
+## Tips
+
+1. **Navigate once, query many times.** `goto` loads the page; then `text`, `js`, `screenshot` all hit the loaded page instantly.
+2. **Use `snapshot -i` first.** See all interactive elements, then click/fill by ref. No CSS selector guessing.
+3. **Use `snapshot -D` to verify.** Baseline → action → diff. See exactly what changed.
+4. **Use `is` for assertions.** `is visible .modal` is faster and more reliable than parsing page text.
+5. **Use `snapshot -a` for evidence.** Annotated screenshots are great for bug reports.
+6. **Use `snapshot -C` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
+7. **Check `console` after actions.** Catch JS errors that don't surface visually.
+8. **Use `chain` for long flows.** Single command, no per-step CLI overhead.
diff --git a/.agents/skills/gstack/VERSION b/.agents/skills/gstack/VERSION
new file mode 120000
index 0000000..d62dc73
--- /dev/null
+++ b/.agents/skills/gstack/VERSION
@@ -0,0 +1 @@
+../../../VERSION
\ No newline at end of file
diff --git a/.agents/skills/gstack/bin b/.agents/skills/gstack/bin
new file mode 120000
index 0000000..1660911
--- /dev/null
+++ b/.agents/skills/gstack/bin
@@ -0,0 +1 @@
+../../../bin
\ No newline at end of file
diff --git a/.agents/skills/gstack/browse b/.agents/skills/gstack/browse
new file mode 120000
index 0000000..93859bb
--- /dev/null
+++ b/.agents/skills/gstack/browse
@@ -0,0 +1 @@
+../../../browse
\ No newline at end of file
diff --git a/.agents/skills/gstack/design-consultation b/.agents/skills/gstack/design-consultation
new file mode 120000
index 0000000..1d20b5c
--- /dev/null
+++ b/.agents/skills/gstack/design-consultation
@@ -0,0 +1 @@
+../../../design-consultation
\ No newline at end of file
diff --git a/.agents/skills/gstack/document-release b/.agents/skills/gstack/document-release
new file mode 120000
index 0000000..b174b6a
--- /dev/null
+++ b/.agents/skills/gstack/document-release
@@ -0,0 +1 @@
+../../../document-release
\ No newline at end of file
diff --git a/.agents/skills/gstack/gstack-upgrade b/.agents/skills/gstack/gstack-upgrade
new file mode 120000
index 0000000..05f8ec7
--- /dev/null
+++ b/.agents/skills/gstack/gstack-upgrade
@@ -0,0 +1 @@
+../../../gstack-upgrade
\ No newline at end of file
diff --git a/.agents/skills/gstack/package.json b/.agents/skills/gstack/package.json
new file mode 120000
index 0000000..8627c75
--- /dev/null
+++ b/.agents/skills/gstack/package.json
@@ -0,0 +1 @@
+../../../package.json
\ No newline at end of file
diff --git a/.agents/skills/gstack/plan-ceo-review b/.agents/skills/gstack/plan-ceo-review
new file mode 120000
index 0000000..c0eda16
--- /dev/null
+++ b/.agents/skills/gstack/plan-ceo-review
@@ -0,0 +1 @@
+../../../plan-ceo-review
\ No newline at end of file
diff --git a/.agents/skills/gstack/plan-design-review b/.agents/skills/gstack/plan-design-review
new file mode 120000
index 0000000..9efd991
--- /dev/null
+++ b/.agents/skills/gstack/plan-design-review
@@ -0,0 +1 @@
+../../../plan-design-review
\ No newline at end of file
diff --git a/.agents/skills/gstack/plan-eng-review b/.agents/skills/gstack/plan-eng-review
new file mode 120000
index 0000000..9cc9813
--- /dev/null
+++ b/.agents/skills/gstack/plan-eng-review
@@ -0,0 +1 @@
+../../../plan-eng-review
\ No newline at end of file
diff --git a/.agents/skills/gstack/qa b/.agents/skills/gstack/qa
new file mode 120000
index 0000000..3b4e224
--- /dev/null
+++ b/.agents/skills/gstack/qa
@@ -0,0 +1 @@
+../../../qa
\ No newline at end of file
diff --git a/.agents/skills/gstack/qa-design-review b/.agents/skills/gstack/qa-design-review
new file mode 120000
index 0000000..20bac80
--- /dev/null
+++ b/.agents/skills/gstack/qa-design-review
@@ -0,0 +1 @@
+../../../qa-design-review
\ No newline at end of file
diff --git a/.agents/skills/gstack/qa-only b/.agents/skills/gstack/qa-only
new file mode 120000
index 0000000..3cac28e
--- /dev/null
+++ b/.agents/skills/gstack/qa-only
@@ -0,0 +1 @@
+../../../qa-only
\ No newline at end of file
diff --git a/.agents/skills/gstack/retro b/.agents/skills/gstack/retro
new file mode 120000
index 0000000..7b814e4
--- /dev/null
+++ b/.agents/skills/gstack/retro
@@ -0,0 +1 @@
+../../../retro
\ No newline at end of file
diff --git a/.agents/skills/gstack/review b/.agents/skills/gstack/review
new file mode 120000
index 0000000..bc49ca2
--- /dev/null
+++ b/.agents/skills/gstack/review
@@ -0,0 +1 @@
+../../../review
\ No newline at end of file
diff --git a/.agents/skills/gstack/scripts b/.agents/skills/gstack/scripts
new file mode 120000
index 0000000..bbaa944
--- /dev/null
+++ b/.agents/skills/gstack/scripts
@@ -0,0 +1 @@
+../../../scripts
\ No newline at end of file
diff --git a/.agents/skills/gstack/setup b/.agents/skills/gstack/setup
new file mode 120000
index 0000000..17bf0c4
--- /dev/null
+++ b/.agents/skills/gstack/setup
@@ -0,0 +1 @@
+../../../setup
\ No newline at end of file
diff --git a/.agents/skills/gstack/setup-browser-cookies b/.agents/skills/gstack/setup-browser-cookies
new file mode 120000
index 0000000..93251df
--- /dev/null
+++ b/.agents/skills/gstack/setup-browser-cookies
@@ -0,0 +1 @@
+../../../setup-browser-cookies
\ No newline at end of file
diff --git a/.agents/skills/gstack/ship b/.agents/skills/gstack/ship
new file mode 120000
index 0000000..e445d5c
--- /dev/null
+++ b/.agents/skills/gstack/ship
@@ -0,0 +1 @@
+../../../ship
\ No newline at end of file
diff --git a/document-release/SKILL.md b/document-release/SKILL.md
index 694d64d..c53a45d 100644
--- a/document-release/SKILL.md
+++ b/document-release/SKILL.md
@@ -285,7 +285,7 @@ After auditing each file individually, do a cross-doc consistency pass:
 
 ## Step 7: TODOS.md Cleanup
 
-This is a second pass that complements `/ship`'s Step 5.5. Read `review/TODOS-format.md` (if
+This is a second pass that complements `/ship`'s Step 5.5. Read `.claude/skills/review/TODOS-format.md` (if
 available) for the canonical TODO item format.
 
 If TODOS.md does not exist, skip this step.
diff --git a/document-release/SKILL.md.tmpl b/document-release/SKILL.md.tmpl
index e38c243..57ad814 100644
--- a/document-release/SKILL.md.tmpl
+++ b/document-release/SKILL.md.tmpl
@@ -203,7 +203,7 @@ After auditing each file individually, do a cross-doc consistency pass:
 
 ## Step 7: TODOS.md Cleanup
 
-This is a second pass that complements `/ship`'s Step 5.5. Read `review/TODOS-format.md` (if
+This is a second pass that complements `/ship`'s Step 5.5. Read `{{REVIEW_ROOT}}/TODOS-format.md` (if
 available) for the canonical TODO item format.
 
 If TODOS.md does not exist, skip this step.
diff --git a/gstack-upgrade/SKILL.md.tmpl b/gstack-upgrade/SKILL.md.tmpl
index a441b8d..e19166a 100644
--- a/gstack-upgrade/SKILL.md.tmpl
+++ b/gstack-upgrade/SKILL.md.tmpl
@@ -25,7 +25,7 @@ First, check if auto-upgrade is enabled:
 ```bash
 _AUTO=""
 [ "${GSTACK_AUTO_UPGRADE:-}" = "1" ] && _AUTO="true"
-[ -z "$_AUTO" ] && _AUTO=$(~/.claude/skills/gstack/bin/gstack-config get auto_upgrade 2>/dev/null || true)
+[ -z "$_AUTO" ] && _AUTO=$({{SKILL_ROOT}}/bin/gstack-config get auto_upgrade 2>/dev/null || true)
 echo "AUTO_UPGRADE=$_AUTO"
 ```
 
@@ -39,7 +39,7 @@ echo "AUTO_UPGRADE=$_AUTO"
 
 **If "Always keep me up to date":**
 ```bash
-~/.claude/skills/gstack/bin/gstack-config set auto_upgrade true
+{{SKILL_ROOT}}/bin/gstack-config set auto_upgrade true
 ```
 Tell user: "Auto-upgrade enabled. Future updates will install automatically." Then proceed to Step 2.
 
@@ -65,26 +65,26 @@ Tell user the snooze duration: "Next reminder in 24h" (or 48h or 1 week, dependi
 
 **If "Never ask again":**
 ```bash
-~/.claude/skills/gstack/bin/gstack-config set update_check false
+{{SKILL_ROOT}}/bin/gstack-config set update_check false
 ```
-Tell user: "Update checks disabled. Run `~/.claude/skills/gstack/bin/gstack-config set update_check true` to re-enable."
+Tell user: "Update checks disabled. Run `{{SKILL_ROOT}}/bin/gstack-config set update_check true` to re-enable."
 Continue with the current skill.
 
 ### Step 2: Detect install type
 
 ```bash
-if [ -d "$HOME/.claude/skills/gstack/.git" ]; then
+if [ -d "$HOME/{{LOCAL_SKILL_ROOT}}/.git" ]; then
   INSTALL_TYPE="global-git"
-  INSTALL_DIR="$HOME/.claude/skills/gstack"
-elif [ -d ".claude/skills/gstack/.git" ]; then
+  INSTALL_DIR="$HOME/{{LOCAL_SKILL_ROOT}}"
+elif [ -d "{{LOCAL_SKILL_ROOT}}/.git" ]; then
   INSTALL_TYPE="local-git"
-  INSTALL_DIR=".claude/skills/gstack"
-elif [ -d ".claude/skills/gstack" ]; then
+  INSTALL_DIR="{{LOCAL_SKILL_ROOT}}"
+elif [ -d "{{LOCAL_SKILL_ROOT}}" ]; then
   INSTALL_TYPE="vendored"
-  INSTALL_DIR=".claude/skills/gstack"
-elif [ -d "$HOME/.claude/skills/gstack" ]; then
+  INSTALL_DIR="{{LOCAL_SKILL_ROOT}}"
+elif [ -d "$HOME/{{LOCAL_SKILL_ROOT}}" ]; then
   INSTALL_TYPE="vendored-global"
-  INSTALL_DIR="$HOME/.claude/skills/gstack"
+  INSTALL_DIR="$HOME/{{LOCAL_SKILL_ROOT}}"
 else
   echo "ERROR: gstack not found"
   exit 1
@@ -134,11 +134,11 @@ Use the install directory from Step 2. Check if there's also a local vendored co
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 LOCAL_GSTACK=""
-if [ -n "$_ROOT" ] && [ -d "$_ROOT/.claude/skills/gstack" ]; then
-  _RESOLVED_LOCAL=$(cd "$_ROOT/.claude/skills/gstack" && pwd -P)
+if [ -n "$_ROOT" ] && [ -d "$_ROOT/{{LOCAL_SKILL_ROOT}}" ]; then
+  _RESOLVED_LOCAL=$(cd "$_ROOT/{{LOCAL_SKILL_ROOT}}" && pwd -P)
   _RESOLVED_PRIMARY=$(cd "$INSTALL_DIR" && pwd -P)
   if [ "$_RESOLVED_LOCAL" != "$_RESOLVED_PRIMARY" ]; then
-    LOCAL_GSTACK="$_ROOT/.claude/skills/gstack"
+    LOCAL_GSTACK="$_ROOT/{{LOCAL_SKILL_ROOT}}"
   fi
 fi
 echo "LOCAL_GSTACK=$LOCAL_GSTACK"
@@ -152,7 +152,7 @@ rm -rf "$LOCAL_GSTACK/.git"
 cd "$LOCAL_GSTACK" && ./setup
 rm -rf "$LOCAL_GSTACK.bak"
 ```
-Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit `.claude/skills/gstack/` when you're ready."
+Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit `{{LOCAL_SKILL_ROOT}}/` when you're ready."
 
 ### Step 5: Write marker + clear cache
 
@@ -191,7 +191,7 @@ When invoked directly as `/gstack-upgrade` (not from a preamble):
 
 1. Force a fresh update check (bypass cache):
 ```bash
-~/.claude/skills/gstack/bin/gstack-update-check --force
+{{SKILL_ROOT}}/bin/gstack-update-check --force
 ```
 Use the output to determine if an upgrade is available.
 
diff --git a/package.json b/package.json
index a5044b7..d277b0c 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,7 @@
     "browse": "./browse/dist/browse"
   },
   "scripts": {
-    "build": "bun run gen:skill-docs && bun build --compile browse/src/cli.ts --outfile browse/dist/browse && bun build --compile browse/src/find-browse.ts --outfile browse/dist/find-browse && git rev-parse HEAD > browse/dist/.version && rm -f .*.bun-build",
+    "build": "bun run gen:skill-docs && bun run gen:skill-docs --host codex && bun build --compile browse/src/cli.ts --outfile browse/dist/browse && bun build --compile browse/src/find-browse.ts --outfile browse/dist/find-browse && git rev-parse HEAD > browse/dist/.version && rm -f .*.bun-build",
     "gen:skill-docs": "bun run scripts/gen-skill-docs.ts",
     "dev": "bun run browse/src/cli.ts",
     "server": "bun run browse/src/server.ts",
diff --git a/plan-ceo-review/SKILL.md.tmpl b/plan-ceo-review/SKILL.md.tmpl
index 9902faf..c658a0d 100644
--- a/plan-ceo-review/SKILL.md.tmpl
+++ b/plan-ceo-review/SKILL.md.tmpl
@@ -389,7 +389,7 @@ Complete table of every method that can fail, every exception class, rescued sta
 Any row with RESCUED=N, TEST=N, USER SEES=Silent → **CRITICAL GAP**.
 
 ### TODOS.md updates
-Present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `.claude/skills/review/TODOS-format.md`.
+Present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `{{REVIEW_ROOT}}/TODOS-format.md`.
 
 For each TODO, describe:
 * **What:** One-line description of the work.
diff --git a/plan-eng-review/SKILL.md.tmpl b/plan-eng-review/SKILL.md.tmpl
index 410b072..b30d3df 100644
--- a/plan-eng-review/SKILL.md.tmpl
+++ b/plan-eng-review/SKILL.md.tmpl
@@ -148,7 +148,7 @@ Every plan review MUST produce a "NOT in scope" section listing work that was co
 List existing code/flows that already partially solve sub-problems in this plan, and whether the plan reuses them or unnecessarily rebuilds them.
 
 ### TODOS.md updates
-After all review sections are complete, present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `.claude/skills/review/TODOS-format.md`.
+After all review sections are complete, present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `{{REVIEW_ROOT}}/TODOS-format.md`.
 
 For each TODO, describe:
 * **What:** One-line description of the work.
diff --git a/review/SKILL.md.tmpl b/review/SKILL.md.tmpl
index c122ada..dc0c2d7 100644
--- a/review/SKILL.md.tmpl
+++ b/review/SKILL.md.tmpl
@@ -34,7 +34,7 @@ You are running the `/review` workflow. Analyze the current branch's diff agains
 
 ## Step 2: Read the checklist
 
-Read `.claude/skills/review/checklist.md`.
+Read `{{REVIEW_ROOT}}/checklist.md`.
 
 **If the file cannot be read, STOP and report the error.** Do not proceed without the checklist.
 
@@ -42,7 +42,7 @@ Read `.claude/skills/review/checklist.md`.
 
 ## Step 2.5: Check for Greptile review comments
 
-Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
+Read `{{REVIEW_ROOT}}/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
 
 **If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Greptile integration is additive — the review works without it.
 
diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts
index 7a0b85a..e4453f8 100644
--- a/scripts/gen-skill-docs.ts
+++ b/scripts/gen-skill-docs.ts
@@ -16,10 +16,106 @@ import * as path from 'path';
 
 const ROOT = path.resolve(import.meta.dir, '..');
 const DRY_RUN = process.argv.includes('--dry-run');
+const HOST_ARG_INDEX = process.argv.indexOf('--host');
+const HOST = HOST_ARG_INDEX === -1 ? 'claude' : process.argv[HOST_ARG_INDEX + 1];
+
+type Host = 'claude' | 'codex';
+
+interface TemplateContext {
+  host: Host;
+  sourcePath: string;
+  relSourcePath: string;
+  skillDirName: string | null;
+}
+
+if (HOST !== 'claude' && HOST !== 'codex') {
+  throw new Error(`Invalid --host value: ${HOST}. Expected "claude" or "codex".`);
+}
+
+const KNOWN_SKILLS = [
+  'browse',
+  'design-consultation',
+  'document-release',
+  'gstack-upgrade',
+  'plan-ceo-review',
+  'plan-design-review',
+  'plan-eng-review',
+  'qa',
+  'qa-design-review',
+  'qa-only',
+  'retro',
+  'review',
+  'setup-browser-cookies',
+  'ship',
+] as const;
+
+function resolveCodexSkillName(skillDirName: string | null): string {
+  if (!skillDirName) return 'gstack';
+  return skillDirName.startsWith('gstack-') ? skillDirName : `gstack-${skillDirName}`;
+}
+
+function getSkillRoot(host: Host): string {
+  return host === 'codex' ? '$HOME/.agents/skills/gstack' : '~/.claude/skills/gstack';
+}
+
+function getInvocation(host: Host, skill: string): string {
+  return host === 'codex' ? `$${resolveCodexSkillName(skill)}` : `/${skill}`;
+}
+
+function escapeRegExp(value: string): string {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+function parseDescription(frontmatter: string): string {
+  const blockMatch = frontmatter.match(/^description:\s*\|\n((?:^[ \t].*\n?)*)/m);
+  if (blockMatch) {
+    return blockMatch[1]
+      .split('\n')
+      .map(line => line.replace(/^[ \t]{2}/, ''))
+      .join('\n')
+      .trim();
+  }
+
+  const inlineMatch = frontmatter.match(/^description:\s*(.+)$/m);
+  if (!inlineMatch) {
+    throw new Error('Missing description in template frontmatter');
+  }
+  return inlineMatch[1].trim();
+}
+
+function splitFrontmatter(content: string): { frontmatter: string; body: string } {
+  if (!content.startsWith('---\n')) {
+    return { frontmatter: '', body: content };
+  }
+
+  const end = content.indexOf('\n---\n', 4);
+  if (end === -1) {
+    throw new Error('Unterminated frontmatter');
+  }
+
+  return {
+    frontmatter: content.slice(4, end),
+    body: content.slice(end + 5),
+  };
+}
+
+function generateFrontmatter(context: TemplateContext, frontmatter: string): string {
+  if (!frontmatter) return '';
+  if (context.host === 'claude') {
+    return `---\n${frontmatter}\n---\n`;
+  }
+
+  const description = replaceInvocations(parseDescription(frontmatter), context.host)
+    .split('\n')
+    .map(line => `  ${line}`.trimEnd())
+    .join('\n');
+
+  return `---\nname: ${resolveCodexSkillName(context.skillDirName)}\ndescription: |\n${description}\n---\n`;
+}
 
 // ─── Placeholder Resolvers ──────────────────────────────────
 
-function generateCommandReference(): string {
+function generateCommandReference(_context: TemplateContext): string {
   // Group commands by category
   const groups = new Map<string, Array<{ command: string; description: string; usage?: string }>>();
   for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
@@ -55,7 +151,7 @@ function generateCommandReference(): string {
   return sections.join('\n').trimEnd();
 }
 
-function generateSnapshotFlags(): string {
+function generateSnapshotFlags(_context: TemplateContext): string {
   const lines: string[] = [
     'The snapshot is your primary tool for understanding and interacting with pages.',
     '',
@@ -94,8 +190,9 @@ function generateSnapshotFlags(): string {
   return lines.join('\n');
 }
 
-function generatePreamble(): string {
-  return `## Preamble (run first)
+function generatePreamble(context: TemplateContext): string {
+  if (context.host === 'claude') {
+    return `## Preamble (run first)
 
 \`\`\`bash
 _UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
@@ -158,11 +255,78 @@ Hey gstack team — ran into this while using /{skill-name}:
 **Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill}
 \`\`\`
 
+Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-js-no-await\`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"`;
+  }
+
+  return `## Preamble (run first)
+
+\`\`\`bash
+_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+[ -n "$_UPD" ] && echo "$_UPD" || true
+mkdir -p ~/.gstack/sessions
+touch ~/.gstack/sessions/"$PPID"
+_SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
+find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
+_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
+echo "BRANCH: $_BRANCH"
+\`\`\`
+
+If output shows \`UPGRADE_AVAILABLE <old> <new>\`: read \`${getSkillRoot(context.host)}/gstack-upgrade/SKILL.md\` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If \`JUST_UPGRADED <from> <to>\`: tell user "Running gstack v{to} (just updated!)" and continue.
+
+## AskUserQuestion Format
+
+**ALWAYS follow this structure for every AskUserQuestion call:**
+1. **Re-ground:** State the project, the current branch (use the \`_BRANCH\` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences)
+2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called.
+3. **Recommend:** \`RECOMMENDATION: Choose [X] because [one-line reason]\`
+4. **Options:** Lettered options: \`A) ... B) ... C) ...\`
+
+Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex.
+
+Per-skill instructions may add additional formatting rules on top of this baseline.
+
+## Contributor Mode
+
+If \`_CONTRIB\` is \`true\`: you are in **contributor mode**. You're a gstack user who also helps make it better.
+
+**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better!
+
+**Calibration — this is the bar:** For example, \`$B js "await fetch(...)"\` used to fail with \`SyntaxError: await is only valid in async functions\` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore.
+
+**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs.
+
+**To file:** write \`~/.gstack/contributor-logs/{slug}.md\` with **all sections below** (do not truncate — include every section through the Date/Version footer):
+
+\`\`\`
+# {Title}
+
+Hey gstack team — ran into this while using ${getInvocation(context.host, '{skill-name}')}:
+
+**What I was trying to do:** {what the user/agent was attempting}
+**What happened instead:** {what actually happened}
+**My rating:** {0-10} — {one sentence on why it wasn't a 10}
+
+## Steps to reproduce
+1. {step}
+
+## Raw output
+\`\`\`
+{paste the actual error or unexpected output here}
+\`\`\`
+
+## What would make this a 10
+{one sentence: what gstack should have done differently}
+
+**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** ${getInvocation(context.host, '{skill}')}
+\`\`\`
+
 Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-js-no-await\`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}"`;
 }
 
-function generateBrowseSetup(): string {
-  return `## SETUP (run this check BEFORE any browse command)
+function generateBrowseSetup(context: TemplateContext): string {
+  if (context.host === 'claude') {
+    return `## SETUP (run this check BEFORE any browse command)
 
 \`\`\`bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
@@ -182,7 +346,29 @@ If \`NEEDS_SETUP\`:
 3. If \`bun\` is not installed: \`curl -fsSL https://bun.sh/install | bash\``;
 }
 
-function generateBaseBranchDetect(): string {
+  return `## SETUP (run this check BEFORE any browse command)
+
+\`\`\`bash
+_ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
+B=""
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
+if [ -x "$B" ]; then
+  echo "READY: $B"
+else
+  echo "NEEDS_SETUP"
+fi
+\`\`\`
+
+If \`NEEDS_SETUP\`:
+1. Tell the user: "gstack browse needs a one-time build (~10 seconds). OK to proceed?" Then STOP and wait.
+2. Run: \`cd <SKILL_DIR> && ./setup --host codex\`
+3. If \`bun\` is not installed: \`curl -fsSL https://bun.sh/install | bash\``;
+}
+
+function generateBaseBranchDetect(_context: TemplateContext): string {
   return `## Step 0: Detect base branch
 
 Determine which branch this PR targets. Use the result as "the base branch" in all subsequent steps.
@@ -203,7 +389,7 @@ branch name wherever the instructions say "the base branch."
 ---`;
 }
 
-function generateQAMethodology(): string {
+function generateQAMethodology(context: TemplateContext): string {
   return `## Modes
 
 ### Diff-aware (automatic when on a feature branch with no URL)
@@ -267,7 +453,7 @@ Run full mode, then load \`baseline.json\` from a previous run. Diff: which issu
 
 1. Find browse binary (see Setup above)
 2. Create output directories
-3. Copy report template from \`qa/templates/qa-report-template.md\` to output dir
+3. Copy report template from \`${context.host === 'codex' ? `${getSkillRoot(context.host)}/qa` : 'qa'}/templates/qa-report-template.md\` to output dir
 4. Start timer for duration tracking
 
 ### Phase 2: Authenticate (if needed)
@@ -323,7 +509,7 @@ $B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
 $B console --errors
 \`\`\`
 
-Then follow the **per-page exploration checklist** (see \`qa/references/issue-taxonomy.md\`):
+Then follow the **per-page exploration checklist** (see \`${context.host === 'codex' ? `${getSkillRoot(context.host)}/qa` : 'qa'}/references/issue-taxonomy.md\`):
 
 1. **Visual scan** — Look at the annotated screenshot for layout issues
 2. **Interactive elements** — Click buttons, links, controls. Do they work?
@@ -370,7 +556,7 @@ $B snapshot -D
 $B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
 \`\`\`
 
-**Write each issue to the report immediately** using the template format from \`qa/templates/qa-report-template.md\`.
+**Write each issue to the report immediately** using the template format from \`${context.host === 'codex' ? `${getSkillRoot(context.host)}/qa` : 'qa'}/templates/qa-report-template.md\`.
 
 ### Phase 6: Wrap Up
 
@@ -812,7 +998,7 @@ Tie everything to user goals and product objectives. Always suggest specific imp
 10. **Depth over breadth.** 5-10 well-documented findings with screenshots and specific suggestions > 20 vague observations.`;
 }
 
-const RESOLVERS: Record<string, () => string> = {
+const RESOLVERS: Record<string, (context: TemplateContext) => string> = {
   COMMAND_REFERENCE: generateCommandReference,
   SNAPSHOT_FLAGS: generateSnapshotFlags,
   PREAMBLE: generatePreamble,
@@ -820,39 +1006,78 @@ const RESOLVERS: Record<string, () => string> = {
   BASE_BRANCH_DETECT: generateBaseBranchDetect,
   QA_METHODOLOGY: generateQAMethodology,
   DESIGN_METHODOLOGY: generateDesignMethodology,
+  SKILL_ROOT: context => getSkillRoot(context.host),
+  LOCAL_SKILL_ROOT: context => context.host === 'codex' ? '.agents/skills/gstack' : '.claude/skills/gstack',
+  REVIEW_ROOT: context => context.host === 'codex' ? `${getSkillRoot(context.host)}/review` : '.claude/skills/review',
 };
 
 // ─── Template Processing ────────────────────────────────────
 
-const GENERATED_HEADER = `<!-- AUTO-GENERATED from {{SOURCE}} — do not edit directly -->\n<!-- Regenerate: bun run gen:skill-docs -->\n`;
+const GENERATED_HEADER = `<!-- AUTO-GENERATED from {{SOURCE}} — do not edit directly -->\n<!-- Regenerate: {{COMMAND}} -->\n`;
+
+function resolveOutputPath(context: TemplateContext): string {
+  if (context.host === 'claude') {
+    return context.sourcePath.replace(/\.tmpl$/, '');
+  }
+
+  return path.join(ROOT, '.agents', 'skills', resolveCodexSkillName(context.skillDirName), 'SKILL.md');
+}
+
+function buildHeader(context: TemplateContext): string {
+  const source = context.host === 'claude'
+    ? path.basename(context.sourcePath)
+    : context.relSourcePath;
+  const command = context.host === 'claude'
+    ? 'bun run gen:skill-docs'
+    : 'bun run gen:skill-docs --host codex';
+
+  return GENERATED_HEADER
+    .replace('{{SOURCE}}', source)
+    .replace('{{COMMAND}}', command);
+}
+
+function replaceInvocations(content: string, host: Host): string {
+  if (host === 'claude') return content;
+
+  let transformed = content;
+  for (const skill of KNOWN_SKILLS) {
+    const pattern = new RegExp(`(^|[^\\w.-])/${escapeRegExp(skill)}(?=\\b)`, 'gm');
+    transformed = transformed.replace(pattern, (_match, prefix) => `${prefix}${getInvocation(host, skill)}`);
+  }
+
+  return transformed;
+}
+
+function transformForHost(content: string, context: TemplateContext): string {
+  return replaceInvocations(content, context.host);
+}
 
 function processTemplate(tmplPath: string): { outputPath: string; content: string } {
   const tmplContent = fs.readFileSync(tmplPath, 'utf-8');
-  const relTmplPath = path.relative(ROOT, tmplPath);
-  const outputPath = tmplPath.replace(/\.tmpl$/, '');
+  const context: TemplateContext = {
+    host: HOST,
+    sourcePath: tmplPath,
+    relSourcePath: path.relative(ROOT, tmplPath),
+    skillDirName: path.dirname(path.relative(ROOT, tmplPath)) === '.' ? null : path.dirname(path.relative(ROOT, tmplPath)),
+  };
+  const outputPath = resolveOutputPath(context);
+  const { frontmatter, body } = splitFrontmatter(tmplContent);
 
   // Replace placeholders
-  let content = tmplContent.replace(/\{\{(\w+)\}\}/g, (match, name) => {
+  let content = body.replace(/\{\{(\w+)\}\}/g, (match, name) => {
     const resolver = RESOLVERS[name];
-    if (!resolver) throw new Error(`Unknown placeholder {{${name}}} in ${relTmplPath}`);
-    return resolver();
+    if (!resolver) throw new Error(`Unknown placeholder {{${name}}} in ${context.relSourcePath}`);
+    return resolver(context);
   });
 
   // Check for any remaining unresolved placeholders
   const remaining = content.match(/\{\{(\w+)\}\}/g);
   if (remaining) {
-    throw new Error(`Unresolved placeholders in ${relTmplPath}: ${remaining.join(', ')}`);
+    throw new Error(`Unresolved placeholders in ${context.relSourcePath}: ${remaining.join(', ')}`);
   }
 
-  // Prepend generated header (after frontmatter)
-  const header = GENERATED_HEADER.replace('{{SOURCE}}', path.basename(tmplPath));
-  const fmEnd = content.indexOf('---', content.indexOf('---') + 3);
-  if (fmEnd !== -1) {
-    const insertAt = content.indexOf('\n', fmEnd) + 1;
-    content = content.slice(0, insertAt) + header + content.slice(insertAt);
-  } else {
-    content = header + content;
-  }
+  content = transformForHost(content, context);
+  content = `${generateFrontmatter(context, frontmatter)}${buildHeader(context)}${content}`;
 
   return { outputPath, content };
 }
@@ -861,27 +1086,25 @@ function processTemplate(tmplPath: string): { outputPath: string; content: strin
 
 function findTemplates(): string[] {
   const templates: string[] = [];
-  const candidates = [
-    path.join(ROOT, 'SKILL.md.tmpl'),
-    path.join(ROOT, 'browse', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'qa', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'qa-only', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'setup-browser-cookies', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'ship', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'review', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'plan-eng-review', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'retro', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'gstack-upgrade', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'plan-design-review', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'qa-design-review', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'design-consultation', 'SKILL.md.tmpl'),
-    path.join(ROOT, 'document-release', 'SKILL.md.tmpl'),
-  ];
-  for (const p of candidates) {
-    if (fs.existsSync(p)) templates.push(p);
+  const stack = [ROOT];
+
+  while (stack.length > 0) {
+    const current = stack.pop()!;
+    for (const entry of fs.readdirSync(current, { withFileTypes: true })) {
+      if (entry.name === '.git' || entry.name === '.claude' || entry.name === '.agents' || entry.name === 'node_modules') {
+        continue;
+      }
+
+      const fullPath = path.join(current, entry.name);
+      if (entry.isDirectory()) {
+        stack.push(fullPath);
+      } else if (entry.isFile() && entry.name === 'SKILL.md.tmpl') {
+        templates.push(fullPath);
+      }
+    }
   }
-  return templates;
+
+  return templates.sort((left, right) => left.localeCompare(right));
 }
 
 let hasChanges = false;
@@ -899,12 +1122,14 @@ for (const tmplPath of findTemplates()) {
       console.log(`FRESH: ${relOutput}`);
     }
   } else {
+    fs.mkdirSync(path.dirname(outputPath), { recursive: true });
     fs.writeFileSync(outputPath, content);
     console.log(`GENERATED: ${relOutput}`);
   }
 }
 
 if (DRY_RUN && hasChanges) {
-  console.error('\nGenerated SKILL.md files are stale. Run: bun run gen:skill-docs');
+  const command = HOST === 'claude' ? 'bun run gen:skill-docs' : 'bun run gen:skill-docs --host codex';
+  console.error(`\nGenerated SKILL.md files are stale. Run: ${command}`);
   process.exit(1);
 }
diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl
index 9339e90..7883a1f 100644
--- a/ship/SKILL.md.tmpl
+++ b/ship/SKILL.md.tmpl
@@ -154,7 +154,7 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
 
 Review the diff for structural issues that tests don't catch.
 
-1. Read `.claude/skills/review/checklist.md`. If the file cannot be read, **STOP** and report the error.
+1. Read `{{REVIEW_ROOT}}/checklist.md`. If the file cannot be read, **STOP** and report the error.
 
 2. Run `git diff origin/<base>` to get the full diff (scoped to feature changes against the freshly-fetched base branch).
 
@@ -188,7 +188,7 @@ Save the review output — it goes into the PR body in Step 8.
 
 ## Step 3.75: Address Greptile review comments (if PR exists)
 
-Read `.claude/skills/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
+Read `{{REVIEW_ROOT}}/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
 
 **If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4.
 
@@ -270,7 +270,7 @@ For each classified comment:
 
 Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
 
-Read `.claude/skills/review/TODOS-format.md` for the canonical format reference.
+Read `{{REVIEW_ROOT}}/TODOS-format.md` for the canonical format reference.
 
 **1. Check if TODOS.md exists** in the repository root.
 

From 79610157ef611a87176be771eb6437d1d8200eea Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrewcampbell@Andrews-Mini.lan>
Date: Mon, 16 Mar 2026 23:50:04 -0500
Subject: [PATCH 2/6] feat: add codex setup and browse discovery

Teach the install and development helpers how to wire Codex skill directories, and prefer .agents paths when locating the browse binary.
---
 bin/dev-setup             |  42 +++++++---
 bin/dev-teardown          |   7 +-
 browse/src/find-browse.ts |   6 ++
 setup                     | 165 +++++++++++++++++++++++++++++---------
 4 files changed, 164 insertions(+), 56 deletions(-)

diff --git a/bin/dev-setup b/bin/dev-setup
index 6c5619d..a679ec5 100755
--- a/bin/dev-setup
+++ b/bin/dev-setup
@@ -1,14 +1,13 @@
 #!/usr/bin/env bash
 # Set up gstack for local development — test skills from within this repo.
 #
-# Creates .claude/skills/gstack → (symlink to repo root) so Claude Code
-# discovers skills from your working tree. Changes take effect immediately.
+# Creates .claude/skills/gstack → repo root so Claude Code resolves skills from
+# your working tree. Codex reads the checked-in .agents/skills tree directly, so
+# this script also ensures the support symlinks inside .agents/skills/gstack are
+# present for browse/setup/helper-doc access.
 #
-# Also copies .env from the main worktree if this is a Conductor workspace
-# or git worktree (so API keys carry over automatically).
-#
-# Usage: bin/dev-setup       # set up
-#        bin/dev-teardown    # clean up
+# Also copies .env from the main worktree if this is a git worktree so API keys
+# carry over automatically.
 set -e
 
 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
@@ -32,10 +31,8 @@ fi
 mkdir -p "$REPO_ROOT/.claude/skills"
 
 # 4. Symlink .claude/skills/gstack → repo root
-# This makes setup think it's inside a real .claude/skills/ directory
 GSTACK_LINK="$REPO_ROOT/.claude/skills/gstack"
 if [ -L "$GSTACK_LINK" ]; then
-  echo "Updating existing symlink..."
   rm "$GSTACK_LINK"
 elif [ -d "$GSTACK_LINK" ]; then
   echo "Error: .claude/skills/gstack is a real directory, not a symlink." >&2
@@ -47,8 +44,31 @@ ln -s "$REPO_ROOT" "$GSTACK_LINK"
 # 5. Run setup via the symlink so it detects .claude/skills/ as its parent
 "$GSTACK_LINK/setup"
 
+# 6. Ensure Codex support links exist inside .agents/skills/gstack
+mkdir -p "$REPO_ROOT/.agents/skills/gstack"
+ln -snf ../../../bin "$REPO_ROOT/.agents/skills/gstack/bin"
+ln -snf ../../../browse "$REPO_ROOT/.agents/skills/gstack/browse"
+ln -snf ../../../design-consultation "$REPO_ROOT/.agents/skills/gstack/design-consultation"
+ln -snf ../../../document-release "$REPO_ROOT/.agents/skills/gstack/document-release"
+ln -snf ../../../gstack-upgrade "$REPO_ROOT/.agents/skills/gstack/gstack-upgrade"
+ln -snf ../../../plan-ceo-review "$REPO_ROOT/.agents/skills/gstack/plan-ceo-review"
+ln -snf ../../../plan-design-review "$REPO_ROOT/.agents/skills/gstack/plan-design-review"
+ln -snf ../../../plan-eng-review "$REPO_ROOT/.agents/skills/gstack/plan-eng-review"
+ln -snf ../../../qa "$REPO_ROOT/.agents/skills/gstack/qa"
+ln -snf ../../../qa-design-review "$REPO_ROOT/.agents/skills/gstack/qa-design-review"
+ln -snf ../../../qa-only "$REPO_ROOT/.agents/skills/gstack/qa-only"
+ln -snf ../../../review "$REPO_ROOT/.agents/skills/gstack/review"
+ln -snf ../../../retro "$REPO_ROOT/.agents/skills/gstack/retro"
+ln -snf ../../../scripts "$REPO_ROOT/.agents/skills/gstack/scripts"
+ln -snf ../../../setup "$REPO_ROOT/.agents/skills/gstack/setup"
+ln -snf ../../../setup-browser-cookies "$REPO_ROOT/.agents/skills/gstack/setup-browser-cookies"
+ln -snf ../../../ship "$REPO_ROOT/.agents/skills/gstack/ship"
+ln -snf ../../../CHANGELOG.md "$REPO_ROOT/.agents/skills/gstack/CHANGELOG.md"
+ln -snf ../../../package.json "$REPO_ROOT/.agents/skills/gstack/package.json"
+ln -snf ../../../VERSION "$REPO_ROOT/.agents/skills/gstack/VERSION"
+
 echo ""
-echo "Dev mode active. Skills resolve from this working tree."
-echo "Edit any SKILL.md and test immediately — no copy/deploy needed."
+echo "Dev mode active. Claude resolves from .claude/skills/gstack; Codex resolves from .agents/skills/."
+echo "Edit skill docs, regenerate if needed, and test immediately — no copy/deploy needed."
 echo ""
 echo "To tear down: bin/dev-teardown"
diff --git a/bin/dev-teardown b/bin/dev-teardown
index e333a75..1dd3cd4 100755
--- a/bin/dev-teardown
+++ b/bin/dev-teardown
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Remove local dev skill symlinks. Restores global gstack as the active install.
+# Remove local Claude dev symlinks. Codex uses the checked-in .agents/skills tree.
 set -e
 
 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
@@ -10,7 +10,6 @@ if [ ! -d "$SKILLS_DIR" ]; then
   exit 0
 fi
 
-# Remove individual skill symlinks
 removed=()
 for link in "$SKILLS_DIR"/*/; do
   name="$(basename "$link")"
@@ -21,13 +20,11 @@ for link in "$SKILLS_DIR"/*/; do
   fi
 done
 
-# Remove the gstack symlink
 if [ -L "$SKILLS_DIR/gstack" ]; then
   rm "$SKILLS_DIR/gstack"
   removed+=("gstack")
 fi
 
-# Clean up empty dirs
 rmdir "$SKILLS_DIR" 2>/dev/null || true
 rmdir "$REPO_ROOT/.claude" 2>/dev/null || true
 
@@ -36,4 +33,4 @@ if [ ${#removed[@]} -gt 0 ]; then
 else
   echo "No symlinks found."
 fi
-echo "Dev mode deactivated. Global gstack (~/.claude/skills/gstack) is now active."
+echo "Dev mode deactivated. Global gstack (~/.claude/skills/gstack or ~/.agents/skills/gstack) is now active."
diff --git a/browse/src/find-browse.ts b/browse/src/find-browse.ts
index 44d76b4..3fdc7f1 100644
--- a/browse/src/find-browse.ts
+++ b/browse/src/find-browse.ts
@@ -30,10 +30,16 @@ export function locateBinary(): string | null {
 
   // Workspace-local takes priority (for development)
   if (root) {
+    const localAgents = join(root, '.agents', 'skills', 'gstack', 'browse', 'dist', 'browse');
+    if (existsSync(localAgents)) return localAgents;
+
     const local = join(root, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse');
     if (existsSync(local)) return local;
   }
 
+  const globalAgents = join(home, '.agents', 'skills', 'gstack', 'browse', 'dist', 'browse');
+  if (existsSync(globalAgents)) return globalAgents;
+
   // Global fallback
   const global = join(home, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse');
   if (existsSync(global)) return global;
diff --git a/setup b/setup
index 607c277..8f0b4de 100755
--- a/setup
+++ b/setup
@@ -1,40 +1,119 @@
 #!/usr/bin/env bash
-# gstack setup — build browser binary + register all skills with Claude Code
+# gstack setup — build browser binary + register skills for Claude and/or Codex
 set -e
 
+HOST="claude"
+if [ "${1:-}" = "--host" ] && [ -n "${2:-}" ]; then
+  HOST="$2"
+fi
+
 GSTACK_DIR="$(cd "$(dirname "$0")" && pwd)"
+GSTACK_DIR_REAL="$(cd "$(dirname "$0")" && pwd -P)"
 SKILLS_DIR="$(dirname "$GSTACK_DIR")"
-BROWSE_BIN="$GSTACK_DIR/browse/dist/browse"
+
+if [ -d "$GSTACK_DIR_REAL/.agents/skills" ]; then
+  REPO_ROOT="$GSTACK_DIR_REAL"
+  CODEX_ROOT="$GSTACK_DIR_REAL/.agents/skills/gstack"
+else
+  REPO_ROOT="$(cd "$GSTACK_DIR_REAL/../../.." && pwd -P)"
+  CODEX_ROOT="$GSTACK_DIR_REAL"
+fi
+
+BROWSE_BIN="$REPO_ROOT/browse/dist/browse"
 
 ensure_playwright_browser() {
   (
-    cd "$GSTACK_DIR"
+    cd "$REPO_ROOT"
     bun --eval 'import { chromium } from "playwright"; const browser = await chromium.launch(); await browser.close();'
   ) >/dev/null 2>&1
 }
 
+link_claude_local() {
+  local skills_basename
+  skills_basename="$(basename "$SKILLS_DIR")"
+
+  if [ "$skills_basename" != "skills" ]; then
+    echo "  (skipped Claude skill symlinks — not inside .claude/skills/)"
+    return
+  fi
+
+  local linked=()
+  for skill_dir in "$GSTACK_DIR"/*/; do
+    [ -f "$skill_dir/SKILL.md" ] || continue
+    local skill_name
+    skill_name="$(basename "$skill_dir")"
+    [ "$skill_name" = "node_modules" ] && continue
+    local target="$SKILLS_DIR/$skill_name"
+    if [ -L "$target" ] || [ ! -e "$target" ]; then
+      ln -snf "gstack/$skill_name" "$target"
+      linked+=("$skill_name")
+    fi
+  done
+
+  if [ ${#linked[@]} -gt 0 ]; then
+    echo "  linked Claude skills: ${linked[*]}"
+  fi
+}
+
+link_claude_global() {
+  local skills_dir="$HOME/.claude/skills"
+  mkdir -p "$skills_dir"
+
+  ln -snf "$REPO_ROOT" "$skills_dir/gstack"
+
+  local linked=()
+  for skill_dir in "$REPO_ROOT"/*/; do
+    [ -f "$skill_dir/SKILL.md" ] || continue
+    local skill_name
+    skill_name="$(basename "$skill_dir")"
+    [ "$skill_name" = "node_modules" ] && continue
+    ln -snf "gstack/$skill_name" "$skills_dir/$skill_name"
+    linked+=("$skill_name")
+  done
+
+  echo "  linked Claude skills: ${linked[*]}"
+}
+
+link_codex_global() {
+  local skills_dir="$HOME/.agents/skills"
+  mkdir -p "$skills_dir"
+
+  ln -snf "$CODEX_ROOT" "$skills_dir/gstack"
+
+  local linked=()
+  for skill_dir in "$REPO_ROOT/.agents/skills"/gstack*; do
+    [ -d "$skill_dir" ] || continue
+    local skill_name
+    skill_name="$(basename "$skill_dir")"
+    [ "$skill_name" = "gstack" ] && continue
+    ln -snf "$skill_dir" "$skills_dir/$skill_name"
+    linked+=("$skill_name")
+  done
+
+  echo "  linked Codex skills: gstack${linked:+ ${linked[*]}}"
+}
+
 # 1. Build browse binary if needed (smart rebuild: stale sources, package.json, lock)
 NEEDS_BUILD=0
 if [ ! -x "$BROWSE_BIN" ]; then
   NEEDS_BUILD=1
-elif [ -n "$(find "$GSTACK_DIR/browse/src" -type f -newer "$BROWSE_BIN" -print -quit 2>/dev/null)" ]; then
+elif [ -n "$(find "$REPO_ROOT/browse/src" -type f -newer "$BROWSE_BIN" -print -quit 2>/dev/null)" ]; then
   NEEDS_BUILD=1
-elif [ "$GSTACK_DIR/package.json" -nt "$BROWSE_BIN" ]; then
+elif [ "$REPO_ROOT/package.json" -nt "$BROWSE_BIN" ]; then
   NEEDS_BUILD=1
-elif [ -f "$GSTACK_DIR/bun.lock" ] && [ "$GSTACK_DIR/bun.lock" -nt "$BROWSE_BIN" ]; then
+elif [ -f "$REPO_ROOT/bun.lock" ] && [ "$REPO_ROOT/bun.lock" -nt "$BROWSE_BIN" ]; then
   NEEDS_BUILD=1
 fi
 
 if [ "$NEEDS_BUILD" -eq 1 ]; then
   echo "Building browse binary..."
   (
-    cd "$GSTACK_DIR"
+    cd "$REPO_ROOT"
     bun install
     bun run build
   )
-  # Safety net: write .version if build script didn't (e.g., git not available during build)
-  if [ ! -f "$GSTACK_DIR/browse/dist/.version" ]; then
-    git -C "$GSTACK_DIR" rev-parse HEAD > "$GSTACK_DIR/browse/dist/.version" 2>/dev/null || true
+  if [ ! -f "$REPO_ROOT/browse/dist/.version" ]; then
+    git -C "$REPO_ROOT" rev-parse HEAD > "$REPO_ROOT/browse/dist/.version" 2>/dev/null || true
   fi
 fi
 
@@ -47,7 +126,7 @@ fi
 if ! ensure_playwright_browser; then
   echo "Installing Playwright Chromium..."
   (
-    cd "$GSTACK_DIR"
+    cd "$REPO_ROOT"
     bunx playwright install chromium
   )
 fi
@@ -60,38 +139,44 @@ fi
 # 3. Ensure ~/.gstack global state directory exists
 mkdir -p "$HOME/.gstack/projects"
 
-# 4. Only create skill symlinks if we're inside a .claude/skills directory
-SKILLS_BASENAME="$(basename "$SKILLS_DIR")"
-if [ "$SKILLS_BASENAME" = "skills" ]; then
-  linked=()
-  for skill_dir in "$GSTACK_DIR"/*/; do
-    if [ -f "$skill_dir/SKILL.md" ]; then
-      skill_name="$(basename "$skill_dir")"
-      # Skip node_modules
-      [ "$skill_name" = "node_modules" ] && continue
-      target="$SKILLS_DIR/$skill_name"
-      # Create or update symlink; skip if a real file/directory exists
-      if [ -L "$target" ] || [ ! -e "$target" ]; then
-        ln -snf "gstack/$skill_name" "$target"
-        linked+=("$skill_name")
-      fi
-    fi
-  done
+echo "gstack ready."
+echo "  browse: $BROWSE_BIN"
 
-  echo "gstack ready."
-  echo "  browse: $BROWSE_BIN"
-  if [ ${#linked[@]} -gt 0 ]; then
-    echo "  linked skills: ${linked[*]}"
-  fi
-else
-  echo "gstack ready."
-  echo "  browse: $BROWSE_BIN"
-  echo "  (skipped skill symlinks — not inside .claude/skills/)"
-fi
+case "$HOST" in
+  claude)
+    link_claude_local
+    ;;
+  codex)
+    link_codex_global
+    ;;
+  auto)
+    if command -v codex >/dev/null 2>&1; then
+      link_codex_global
+    fi
+    if [ -d "$HOME/.claude" ]; then
+      link_claude_global
+    fi
+    ;;
+  *)
+    echo "Unknown host: $HOST" >&2
+    echo "Expected one of: claude, codex, auto" >&2
+    exit 1
+    ;;
+esac
 
-# 4. First-time welcome + legacy cleanup
 if [ ! -d "$HOME/.gstack" ]; then
   mkdir -p "$HOME/.gstack"
-  echo "  Welcome! Run /gstack-upgrade anytime to stay current."
+  case "$HOST" in
+    codex)
+      echo "  Welcome! Run \$gstack-upgrade anytime to stay current."
+      ;;
+    auto)
+      echo "  Welcome! Run /gstack-upgrade or \$gstack-upgrade anytime to stay current."
+      ;;
+    *)
+      echo "  Welcome! Run /gstack-upgrade anytime to stay current."
+      ;;
+  esac
 fi
+
 rm -f /tmp/gstack-latest-version

From ae1bc0c97fdfb1892b93cec057e167f71e29a7a7 Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrewcampbell@Andrews-Mini.lan>
Date: Mon, 16 Mar 2026 23:50:07 -0500
Subject: [PATCH 3/6] test: cover codex skill generation

Extend the skill generation and validation checks so Codex outputs are exercised in tests and CI.
---
 .github/workflows/skill-docs.yml |  4 +-
 test/gen-skill-docs.test.ts      | 76 ++++++++++++++++++++++++--------
 test/skill-validation.test.ts    | 58 ++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/skill-docs.yml b/.github/workflows/skill-docs.yml
index 6f8f174..50dcd58 100644
--- a/.github/workflows/skill-docs.yml
+++ b/.github/workflows/skill-docs.yml
@@ -7,5 +7,5 @@ jobs:
       - uses: actions/checkout@v4
       - uses: oven-sh/setup-bun@v2
       - run: bun install
-      - run: bun run gen:skill-docs
-      - run: git diff --exit-code || (echo "Generated SKILL.md files are stale. Run: bun run gen:skill-docs" && exit 1)
+      - run: bun run gen:skill-docs --dry-run
+      - run: bun run gen:skill-docs --host codex --dry-run
diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts
index def042c..4f634db 100644
--- a/test/gen-skill-docs.test.ts
+++ b/test/gen-skill-docs.test.ts
@@ -5,6 +5,32 @@ import * as fs from 'fs';
 import * as path from 'path';
 
 const ROOT = path.resolve(import.meta.dir, '..');
+const ALL_SKILLS = [
+  { dir: '.', name: 'root gstack' },
+  { dir: 'browse', name: 'browse' },
+  { dir: 'qa', name: 'qa' },
+  { dir: 'qa-only', name: 'qa-only' },
+  { dir: 'review', name: 'review' },
+  { dir: 'ship', name: 'ship' },
+  { dir: 'plan-ceo-review', name: 'plan-ceo-review' },
+  { dir: 'plan-eng-review', name: 'plan-eng-review' },
+  { dir: 'retro', name: 'retro' },
+  { dir: 'setup-browser-cookies', name: 'setup-browser-cookies' },
+  { dir: 'gstack-upgrade', name: 'gstack-upgrade' },
+  { dir: 'plan-design-review', name: 'plan-design-review' },
+  { dir: 'qa-design-review', name: 'qa-design-review' },
+  { dir: 'design-consultation', name: 'design-consultation' },
+  { dir: 'document-release', name: 'document-release' },
+];
+
+function codexSkillName(dir: string): string {
+  if (dir === '.') return 'gstack';
+  return dir.startsWith('gstack-') ? dir : `gstack-${dir}`;
+}
+
+function codexSkillPath(dir: string): string {
+  return path.join(ROOT, '.agents', 'skills', codexSkillName(dir), 'SKILL.md');
+}
 
 describe('gen-skill-docs', () => {
   test('generated SKILL.md contains all command categories', () => {
@@ -56,24 +82,6 @@ describe('gen-skill-docs', () => {
     }
   });
 
-  // All skills that must have templates — single source of truth
-  const ALL_SKILLS = [
-    { dir: '.', name: 'root gstack' },
-    { dir: 'browse', name: 'browse' },
-    { dir: 'qa', name: 'qa' },
-    { dir: 'qa-only', name: 'qa-only' },
-    { dir: 'review', name: 'review' },
-    { dir: 'ship', name: 'ship' },
-    { dir: 'plan-ceo-review', name: 'plan-ceo-review' },
-    { dir: 'plan-eng-review', name: 'plan-eng-review' },
-    { dir: 'retro', name: 'retro' },
-    { dir: 'setup-browser-cookies', name: 'setup-browser-cookies' },
-    { dir: 'gstack-upgrade', name: 'gstack-upgrade' },
-    { dir: 'plan-design-review', name: 'plan-design-review' },
-    { dir: 'qa-design-review', name: 'qa-design-review' },
-    { dir: 'design-consultation', name: 'design-consultation' },
-  ];
-
   test('every skill has a SKILL.md.tmpl template', () => {
     for (const skill of ALL_SKILLS) {
       const tmplPath = path.join(ROOT, skill.dir, 'SKILL.md.tmpl');
@@ -116,6 +124,21 @@ describe('gen-skill-docs', () => {
     expect(output).not.toContain('STALE');
   });
 
+  test('codex generated files are fresh (match --host codex --dry-run)', () => {
+    const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--host', 'codex', '--dry-run'], {
+      cwd: ROOT,
+      stdout: 'pipe',
+      stderr: 'pipe',
+    });
+    expect(result.exitCode).toBe(0);
+    const output = result.stdout.toString();
+    for (const skill of ALL_SKILLS) {
+      const file = path.relative(ROOT, codexSkillPath(skill.dir));
+      expect(output).toContain(`FRESH: ${file}`);
+    }
+    expect(output).not.toContain('STALE');
+  });
+
   test('no generated SKILL.md contains unresolved placeholders', () => {
     for (const skill of ALL_SKILLS) {
       const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
@@ -124,6 +147,23 @@ describe('gen-skill-docs', () => {
     }
   });
 
+  test('every Codex skill has a generated SKILL.md with a host-specific header', () => {
+    for (const skill of ALL_SKILLS) {
+      const content = fs.readFileSync(codexSkillPath(skill.dir), 'utf-8');
+      const source = skill.dir === '.' ? 'SKILL.md.tmpl' : `${skill.dir}/SKILL.md.tmpl`;
+      expect(content).toContain(`AUTO-GENERATED from ${source}`);
+      expect(content).toContain('Regenerate: bun run gen:skill-docs --host codex');
+    }
+  });
+
+  test('Codex-generated SKILL.md files do not reference Claude skill paths', () => {
+    for (const skill of ALL_SKILLS) {
+      const content = fs.readFileSync(codexSkillPath(skill.dir), 'utf-8');
+      expect(content).not.toContain('.claude/skills');
+      expect(content).not.toContain('~/.claude/skills');
+    }
+  });
+
   test('templates contain placeholders', () => {
     const rootTmpl = fs.readFileSync(path.join(ROOT, 'SKILL.md.tmpl'), 'utf-8');
     expect(rootTmpl).toContain('{{COMMAND_REFERENCE}}');
diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
index 83cdca6..1514936 100644
--- a/test/skill-validation.test.ts
+++ b/test/skill-validation.test.ts
@@ -6,6 +6,39 @@ import * as fs from 'fs';
 import * as path from 'path';
 
 const ROOT = path.resolve(import.meta.dir, '..');
+const CODEX_SKILLS = [
+  '.',
+  'browse',
+  'design-consultation',
+  'document-release',
+  'gstack-upgrade',
+  'plan-ceo-review',
+  'plan-design-review',
+  'plan-eng-review',
+  'qa',
+  'qa-design-review',
+  'qa-only',
+  'retro',
+  'review',
+  'setup-browser-cookies',
+  'ship',
+];
+
+function codexSkillName(dir: string): string {
+  if (dir === '.') return 'gstack';
+  return dir.startsWith('gstack-') ? dir : `gstack-${dir}`;
+}
+
+function codexSkillPath(dir: string): string {
+  return path.join(ROOT, '.agents', 'skills', codexSkillName(dir), 'SKILL.md');
+}
+
+function frontmatter(content: string): string {
+  expect(content.startsWith('---\n')).toBe(true);
+  const end = content.indexOf('\n---\n', 4);
+  expect(end).toBeGreaterThan(3);
+  return content.slice(4, end);
+}
 
 describe('SKILL.md command validation', () => {
   test('all $B commands in SKILL.md are valid browse commands', () => {
@@ -194,6 +227,31 @@ describe('Generated SKILL.md freshness', () => {
   });
 });
 
+describe('Codex-generated SKILL.md validation', () => {
+  test('every Codex-generated SKILL.md has YAML frontmatter with the expected name', () => {
+    for (const skill of CODEX_SKILLS) {
+      const content = fs.readFileSync(codexSkillPath(skill), 'utf-8');
+      const yaml = frontmatter(content);
+      expect(yaml).toContain(`name: ${codexSkillName(skill)}`);
+      expect(yaml).toContain('description: |');
+    }
+  });
+
+  test('every Codex-generated SKILL.md has a non-empty description', () => {
+    for (const skill of CODEX_SKILLS) {
+      const content = fs.readFileSync(codexSkillPath(skill), 'utf-8');
+      const yaml = frontmatter(content);
+      const match = yaml.match(/description:\s*\|\n([\s\S]+)/);
+      expect(match).not.toBeNull();
+      const lines = match![1]
+        .split('\n')
+        .map(line => line.trim())
+        .filter(Boolean);
+      expect(lines.length).toBeGreaterThan(0);
+    }
+  });
+});
+
 // --- Update check preamble validation ---
 
 describe('Update check preamble', () => {

From dae406f9cf386a4c0ca11a7eda3f6c51c8269284 Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrewcampbell@Andrews-Mini.lan>
Date: Mon, 16 Mar 2026 23:50:15 -0500
Subject: [PATCH 4/6] docs: add codex host documentation

Document the Codex install path, contributor workflow, and repository metadata for the new host.
---
 AGENTS.md          | 122 +++++++++++++++++++++++++++++++++++++++++++++
 CONTRIBUTING.md    |  29 +++++++----
 README.md          |  30 +++++++++--
 agents/openai.yaml |   5 ++
 pr-body.md         |  53 ++++++++++++++++++++
 5 files changed, 225 insertions(+), 14 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 agents/openai.yaml
 create mode 100644 pr-body.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..ab8f154
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,122 @@
+# gstack development
+
+## Commands
+
+```bash
+bun install          # install dependencies
+bun test             # run free tests (browse + snapshot + skill validation)
+bun run test:evals   # run paid evals: LLM judge + E2E (~$4/run)
+bun run test:e2e     # run E2E tests only (~$3.85/run)
+bun run dev <cmd>    # run CLI in dev mode, e.g. bun run dev goto https://example.com
+bun run build        # gen docs for Claude + Codex, then compile binaries
+bun run gen:skill-docs            # regenerate Claude SKILL.md files
+bun run gen:skill-docs --host codex  # regenerate Codex SKILL.md files
+bun run skill:check  # health dashboard for all skills
+bun run dev:skill    # watch mode: auto-regen + validate on change
+bun run eval:list    # list all eval runs from ~/.gstack-dev/evals/
+bun run eval:compare # compare two eval runs (auto-picks most recent)
+bun run eval:summary # aggregate stats across all eval runs
+```
+
+`test:evals` requires `ANTHROPIC_API_KEY`. E2E tests stream progress in real-time
+(tool-by-tool via `--output-format stream-json --verbose`). Results are persisted
+to `~/.gstack-dev/evals/` with auto-comparison against the previous run.
+
+## Project structure
+
+```text
+gstack/
+├── .agents/skills/  # Generated Codex skill tree (don't hand-edit)
+├── browse/          # Headless browser CLI (Playwright)
+│   ├── src/         # CLI + server + commands
+│   ├── test/        # Integration tests + fixtures
+│   └── dist/        # Compiled binary
+├── scripts/         # Build + DX tooling
+├── test/            # Skill validation + eval tests
+├── ship/            # $gstack-ship / /ship workflow skill
+├── review/          # $gstack-review / /review workflow skill
+├── setup            # One-time setup: build binary + symlink skills
+├── AGENTS.md        # Codex guidance
+├── CLAUDE.md        # Claude guidance
+├── SKILL.md         # Generated Claude browse skill
+├── SKILL.md.tmpl    # Shared template source
+└── package.json     # Build scripts for browse + doc generation
+```
+
+## SKILL.md workflow
+
+SKILL.md files are **generated** from `.tmpl` templates. To update docs:
+
+1. Edit the `.tmpl` file.
+2. Run `bun run gen:skill-docs` for Claude and `bun run gen:skill-docs --host codex` for Codex.
+3. Commit both the template and generated outputs.
+
+Codex outputs live in `.agents/skills/gstack*/SKILL.md`. Do not hand-edit them.
+
+## Writing SKILL templates
+
+SKILL.md.tmpl files are prompt templates read by hosts, not bash scripts. Each
+bash code block runs in a separate shell, so variables do not persist between blocks.
+
+Rules:
+- Use natural language for logic and state.
+- Don't hardcode branch names. Use `{{BASE_BRANCH_DETECT}}`.
+- Keep bash blocks self-contained.
+- Express conditionals as English instead of nested shell control flow.
+- Keep the template system as the single source of truth for both hosts.
+
+## Browser interaction
+
+When you need to interact with a browser (QA, dogfooding, cookie setup), use
+`$gstack-browse` or run the browse binary directly via `$B <command>`. Never use
+`mcp__claude-in-chrome__*` tools.
+
+## Vendored symlink awareness
+
+When developing gstack, `.agents/skills/gstack` may be the checked-in Codex tree
+and `.claude/skills/gstack` may be a gitignored symlink back to this working
+directory. Skill changes can go live immediately for both hosts.
+
+Check once per session:
+
+```bash
+ls -la .agents/skills/gstack
+ls -la .claude/skills/gstack 2>/dev/null || true
+```
+
+If `.claude/skills/gstack` points at your working tree, template changes plus
+`bun run gen:skill-docs` immediately affect active Claude sessions. Codex reads
+the checked-in `.agents/skills/` tree, so regenerate Codex docs before testing.
+
+## CHANGELOG style
+
+CHANGELOG.md is for users, not contributors.
+
+- Lead with what the user can now do.
+- Use plain language, not implementation details.
+- Put contributor/internal notes in a separate "For contributors" section.
+
+## Local plans
+
+Contributors can store long-range vision docs and design documents in
+`~/.gstack-dev/plans/`. These are local-only and not checked in.
+
+## E2E eval failure blame protocol
+
+If an E2E eval fails, never claim it is unrelated without proving it on the base
+branch first. Prompt and skill changes have invisible couplings.
+
+## Deploying to the active skill
+
+The active Codex skill lives at `~/.agents/skills/gstack/`. After making changes:
+
+1. Push your branch.
+2. Refresh the checkout you installed from.
+3. Run `./setup --host codex`.
+
+Or copy binaries directly:
+
+```bash
+cp browse/dist/browse ~/.agents/skills/gstack/browse/dist/browse
+cp browse/dist/find-browse ~/.agents/skills/gstack/browse/dist/find-browse
+```
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4af2e88..f14b4f7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,9 +4,14 @@ Thanks for wanting to make gstack better. Whether you're fixing a typo in a skil
 
 ## Quick start
 
-gstack skills are Markdown files that Claude Code discovers from a `skills/` directory. Normally they live at `~/.claude/skills/gstack/` (your global install). But when you're developing gstack itself, you want Claude Code to use the skills *in your working tree* — so edits take effect instantly without copying or deploying anything.
+gstack skills are Markdown files discovered from a `skills/` directory. Claude uses
+`~/.claude/skills/gstack/`; Codex uses the generated `.agents/skills/` tree and
+installs it into `~/.agents/skills/`. When you're developing gstack itself, you
+want both hosts to read from your working tree so edits take effect instantly.
 
-That's what dev mode does. It symlinks your repo into the local `.claude/skills/` directory so Claude Code reads skills straight from your checkout.
+That's what dev mode does. It symlinks your repo into the local `.claude/skills/`
+directory for Claude and ensures the checked-in `.agents/skills/gstack` tree has
+the support links Codex expects.
 
 ```bash
 git clone <repo> && cd gstack
@@ -14,7 +19,8 @@ bun install                    # install dependencies
 bin/dev-setup                  # activate dev mode
 ```
 
-Now edit any `SKILL.md`, invoke it in Claude Code (e.g. `/review`), and see your changes live. When you're done developing:
+Now edit any `SKILL.md`, invoke it in Claude Code (e.g. `/review`) or Codex
+(e.g. `$gstack-review`), and see your changes live. When you're done developing:
 
 ```bash
 bin/dev-teardown               # deactivate — back to your global install
@@ -58,11 +64,12 @@ project where you actually felt the pain.
 
 When you're editing gstack skills and want to test them by actually using gstack
 in the same repo, `bin/dev-setup` wires this up. It creates `.claude/skills/`
-symlinks (gitignored) pointing back to your working tree, so Claude Code uses
-your local edits instead of the global install.
+symlinks (gitignored) pointing back to your working tree, and keeps
+`.agents/skills/gstack` ready for Codex.
 
 ```
 gstack/                          <- your working tree
+├── .agents/skills/              <- generated Codex skill tree (checked in)
 ├── .claude/skills/              <- created by dev-setup (gitignored)
 │   ├── gstack -> ../../         <- symlink back to repo root
 │   ├── review -> gstack/review
@@ -87,8 +94,9 @@ bin/dev-setup
 # 2. Edit a skill
 vim review/SKILL.md
 
-# 3. Test it in Claude Code — changes are live
+# 3. Test it in Claude Code or Codex — changes are live
 #    > /review
+#    > $gstack-review
 
 # 4. Editing browse source? Rebuild the binary
 bun run build
@@ -133,7 +141,7 @@ Runs automatically with `bun test`. No API keys needed.
 
 - **Skill parser tests** (`test/skill-parser.test.ts`) — Extracts every `$B` command from SKILL.md bash code blocks and validates against the command registry in `browse/src/commands.ts`. Catches typos, removed commands, and invalid snapshot flags.
 - **Skill validation tests** (`test/skill-validation.test.ts`) — Validates that SKILL.md files reference only real commands and flags, and that command descriptions meet quality thresholds.
-- **Generator tests** (`test/gen-skill-docs.test.ts`) — Tests the template system: verifies placeholders resolve correctly, output includes value hints for flags (e.g. `-d <N>` not just `-d`), enriched descriptions for key commands (e.g. `is` lists valid states, `press` lists key examples).
+- **Generator tests** (`test/gen-skill-docs.test.ts`) — Tests the template system for both Claude and Codex outputs: placeholder resolution, output routing, value hints for flags (e.g. `-d <N>` not just `-d`), enriched descriptions for key commands, and no Claude-only paths in Codex docs.
 
 ### Tier 2: E2E via `claude -p` (~$3.85/run)
 
@@ -197,7 +205,7 @@ Each dimension is scored 1-5. Threshold: every dimension must score **≥ 4**. T
 
 ### CI
 
-A GitHub Action (`.github/workflows/skill-docs.yml`) runs `bun run gen:skill-docs --dry-run` on every push and PR. If the generated SKILL.md files differ from what's committed, CI fails. This catches stale docs before they merge.
+A GitHub Action (`.github/workflows/skill-docs.yml`) runs both `bun run gen:skill-docs --dry-run` and `bun run gen:skill-docs --host codex --dry-run` on every push and PR. If either generated skill tree differs from what's committed, CI fails.
 
 Tests run against the browse binary directly — they don't require dev mode.
 
@@ -211,6 +219,7 @@ vim SKILL.md.tmpl              # or browse/SKILL.md.tmpl
 
 # 2. Regenerate
 bun run gen:skill-docs
+bun run gen:skill-docs --host codex
 
 # 3. Check health
 bun run skill:check
@@ -238,10 +247,10 @@ When Conductor creates a new workspace, `bin/dev-setup` runs automatically. It d
 
 ## Things to know
 
-- **SKILL.md files are generated.** Edit the `.tmpl` template, not the `.md`. Run `bun run gen:skill-docs` to regenerate.
+- **SKILL.md files are generated.** Edit the `.tmpl` template, not the `.md`. Run `bun run gen:skill-docs` and `bun run gen:skill-docs --host codex` (or just `bun run build`) to regenerate both hosts.
 - **TODOS.md is the unified backlog.** Organized by skill/component with P0-P4 priorities. `/ship` auto-detects completed items. All planning/review/retro skills read it for context.
 - **Browse source changes need a rebuild.** If you touch `browse/src/*.ts`, run `bun run build`.
-- **Dev mode shadows your global install.** Project-local skills take priority over `~/.claude/skills/gstack`. `bin/dev-teardown` restores the global one.
+- **Dev mode shadows your global Claude install and refreshes Codex support links.** Project-local Claude skills take priority over `~/.claude/skills/gstack`, while Codex reads the checked-in `.agents/skills/` tree. `bin/dev-teardown` restores the global Claude install.
 - **Conductor workspaces are independent.** Each workspace is its own git worktree. `bin/dev-setup` runs automatically via `conductor.json`.
 - **`.env` propagates across worktrees.** Set it once in the main repo, all Conductor workspaces get it.
 - **`.claude/skills/` is gitignored.** The symlinks never get committed.
diff --git a/README.md b/README.md
index 1db8d1d..5ae358d 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 # gstack
 
-**gstack turns Claude Code from one generic assistant into a team of specialists you can summon on demand.**
+**gstack turns Claude Code and Codex CLI from one generic assistant into a team of specialists you can summon on demand.**
 
-Twelve opinionated workflow skills for [Claude Code](https://docs.anthropic.com/en/docs/claude-code). Plan review, design review, code review, one-command shipping, browser automation, QA testing, engineering retrospectives, and post-ship documentation — all as slash commands.
+Twelve opinionated workflow skills for [Claude Code](https://docs.anthropic.com/en/docs/claude-code) and Codex CLI. Plan review, design review, code review, one-command shipping, browser automation, QA testing, engineering retrospectives, and post-ship documentation — all from one template system.
 
 ### Without gstack
 
@@ -125,7 +125,7 @@ This is the setup I use. One person, ten parallel agents, each with the right co
 
 ## Install
 
-**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+. `/browse` compiles a native binary — works on macOS and Linux (x64 and arm64).
+**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code) or Codex CLI, [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+. `/browse` compiles a native binary — works on macOS and Linux (x64 and arm64).
 
 ### Step 1: Install on your machine
 
@@ -133,6 +133,27 @@ Open Claude Code and paste this. Claude will do the rest.
 
 > Install gstack: run `git clone https://github.com/garrytan/gstack.git ~/.claude/skills/gstack && cd ~/.claude/skills/gstack && ./setup` then add a "gstack" section to CLAUDE.md that says to use the /browse skill from gstack for all web browsing, never use mcp\_\_claude-in-chrome\_\_\* tools, and lists the available skills: /plan-ceo-review, /plan-eng-review, /plan-design-review, /review, /ship, /browse, /qa, /qa-only, /qa-design-review, /setup-browser-cookies, /retro, /document-release. Then ask the user if they also want to add gstack to the current project so teammates get it.
 
+### Codex Setup
+
+Clone gstack anywhere convenient, then let `setup --host codex` register the generated
+Codex skill tree in `~/.agents/skills/`:
+
+```bash
+git clone https://github.com/garrytan/gstack.git ~/.codex/skills/gstack
+cd ~/.codex/skills/gstack
+./setup --host codex
+```
+
+This links:
+
+- `~/.agents/skills/gstack/` → the generated root Codex skill
+- `~/.agents/skills/gstack-review`, `~/.agents/skills/gstack-ship`, etc.
+- the shared browse binary and helper docs through the checked-in `.agents/skills/` tree
+
+If you keep repo-level agent instructions, add a short `gstack` section to `AGENTS.md`
+that tells Codex to use `$gstack-browse` for web browsing, never use
+`mcp__claude-in-chrome__*`, and lists the available `$gstack-*` skills.
+
 ### Step 2: Add to your repo so teammates get it (optional)
 
 > Add gstack to this project: run `cp -Rf ~/.claude/skills/gstack .claude/skills/gstack && rm -rf .claude/skills/gstack/.git && cd .claude/skills/gstack && ./setup` then add a "gstack" section to this project's CLAUDE.md that says to use the /browse skill from gstack for all web browsing, never use mcp\_\_claude-in-chrome\_\_\* tools, lists the available skills: /plan-ceo-review, /plan-eng-review, /plan-design-review, /review, /ship, /browse, /qa, /qa-only, /qa-design-review, /setup-browser-cookies, /retro, /document-release, and tells Claude that if gstack skills aren't working, run `cd .claude/skills/gstack && ./setup` to build the binary and register skills.
@@ -143,11 +164,12 @@ Real files get committed to your repo (not a submodule), so `git clone` just wor
 
 - Skill files (Markdown prompts) in `~/.claude/skills/gstack/` (or `.claude/skills/gstack/` for project installs)
 - Symlinks at `~/.claude/skills/browse`, `~/.claude/skills/qa`, `~/.claude/skills/review`, etc. pointing into the gstack directory
+- Codex skill files in `~/.agents/skills/gstack*/`
 - Browser binary at `browse/dist/browse` (~58MB, gitignored)
 - `node_modules/` (gitignored)
 - `/retro` saves JSON snapshots to `.context/retros/` in your project for trend tracking
 
-Everything lives inside `.claude/`. Nothing touches your PATH or runs in the background.
+Everything lives inside `.claude/` or `.agents/`. Nothing touches your PATH or runs in the background.
 
 ---
 
diff --git a/agents/openai.yaml b/agents/openai.yaml
new file mode 100644
index 0000000..de76b4d
--- /dev/null
+++ b/agents/openai.yaml
@@ -0,0 +1,5 @@
+interface:
+  display_name: "gstack"
+  short_description: "Ship, review, QA, plan, and retro — powered by a headless browser"
+  brand_color: "#18181B"
+  default_prompt: "What would you like to ship today?"
diff --git a/pr-body.md b/pr-body.md
new file mode 100644
index 0000000..b758173
--- /dev/null
+++ b/pr-body.md
@@ -0,0 +1,53 @@
+## Summary
+
+- add Codex as a first-class gstack host without duplicating workflow logic
+- generate `.agents/skills/gstack*/SKILL.md` from the same `.tmpl` templates as Claude
+- make setup, browse discovery, tests, CI, and docs understand both Claude and Codex installs
+- add Codex-facing repo docs (`AGENTS.md`, `agents/openai.yaml`) and generated support links under `.agents/skills/gstack`
+
+## What Changed
+
+- `scripts/gen-skill-docs.ts`
+  - adds `--host codex`
+  - routes Codex output into `.agents/skills/`
+  - injects Codex frontmatter (`name`, `description`)
+  - keeps shared resolvers host-aware
+  - rewrites skill invocation syntax to `$gstack-*` for Codex
+- shared templates
+  - replace hardcoded helper-doc paths with minimal placeholders (`{{SKILL_ROOT}}`, `{{LOCAL_SKILL_ROOT}}`, `{{REVIEW_ROOT}}`)
+  - keep one template source for both hosts
+- install/runtime
+  - `setup` now supports `--host claude|codex|auto`
+  - `bin/dev-setup` and `bin/dev-teardown` cover the Codex support tree alongside Claude dev mode
+  - `browse/src/find-browse.ts` checks `.agents` before `.claude`
+- quality gates
+  - extend tests for Codex freshness/frontmatter/path safety
+  - run Codex dry-run generation in CI
+- docs
+  - add `AGENTS.md`
+  - add `agents/openai.yaml`
+  - update README/CONTRIBUTING for dual-host development
+  - add user-facing CHANGELOG entry for Codex support
+
+## Testing
+
+```bash
+bash -n setup bin/dev-setup bin/dev-teardown
+bun test
+bun run gen:skill-docs --dry-run
+bun run gen:skill-docs --host codex --dry-run
+bun run skill:check
+TMP_HOME=$(mktemp -d)
+HOME="$TMP_HOME" ./setup --host codex
+HOME="$TMP_HOME" "$TMP_HOME/.agents/skills/gstack/setup" --host codex
+```
+
+## Pre-Landing Review
+
+Pre-Landing Review: No issues found.
+
+## Notes
+
+- Codex skills are generated only. Do not hand-edit `.agents/skills/*/SKILL.md`.
+- Codex-generated markdown intentionally contains no `.claude/skills` paths.
+- The root Codex skill directory includes support symlinks (`bin`, `browse`, `review`, `setup`, etc.) so helper-doc lookups and browse setup work after `./setup --host codex`.

From 5aad796e16e06a1c56f375d3d2999745ca7aebe3 Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrewcampbell@Andrews-Mini.lan>
Date: Mon, 16 Mar 2026 23:50:21 -0500
Subject: [PATCH 5/6] chore: bump version and changelog (v0.5.1)

Co-Authored-By: GPT-5-Codex <noreply@openai.com>
---
 CHANGELOG.md | 12 ++++++++++++
 VERSION      |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a86569c..8fd14ff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## 0.5.1 — 2026-03-16
+
+- **You can now use gstack from Codex CLI.** The same skill templates now generate native Codex skill files in `.agents/skills/`, so you get `$gstack-review`, `$gstack-ship`, `$gstack-qa`, and the rest without maintaining a second prompt stack.
+- **Claude and Codex stay in sync automatically.** One `.tmpl` template now produces both hosts' `SKILL.md` files, which means new workflow improvements ship to both environments from a single source of truth.
+- **Codex setup is one command.** Run `./setup --host codex` to register the generated Codex skills in `~/.agents/skills/`, including the shared browse binary and helper docs.
+- **Contributing now covers the dual-host workflow.** The docs, tests, and CI checks all verify both Claude and Codex generation, so it is much harder to accidentally fix one host and break the other.
+
+### For contributors
+
+- Added host-aware generation to `scripts/gen-skill-docs.ts`, including Codex frontmatter, `.agents/skills/` output routing, and shared placeholder resolution.
+- Added Codex support to `setup`, `bin/dev-setup`, `bin/dev-teardown`, `browse/src/find-browse.ts`, tests, and `skill-docs.yml`.
+
 ## 0.5.0 — 2026-03-16
 
 - **Your site just got a design review.** `/plan-design-review` opens your site and reviews it like a senior product designer — typography, spacing, hierarchy, color, responsive, interactions, and AI slop detection. Get letter grades (A-F) per category, a dual headline "Design Score" + "AI Slop Score", and a structured first impression that doesn't pull punches.
diff --git a/VERSION b/VERSION
index 8f0916f..4b9fcbe 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.0
+0.5.1

From 8da4d5467972a258750a5fe753df609f1f9e25be Mon Sep 17 00:00:00 2001
From: Andrew Campbell <andrewcampbell@Andrews-Mini.lan>
Date: Tue, 17 Mar 2026 00:23:26 -0500
Subject: [PATCH 6/6] fix: harden codex install and browse discovery

Make ~/.codex/skills the canonical Codex install root, teach browse discovery to prefer it consistently, and update generated docs/tests so the bootstrap path works out of the box.
---
 .agents/skills/gstack-browse/SKILL.md         | 12 +++--
 .../gstack-design-consultation/SKILL.md       |  6 +--
 .../skills/gstack-document-release/SKILL.md   |  8 ++--
 .../skills/gstack-plan-ceo-review/SKILL.md    |  8 ++--
 .../skills/gstack-plan-design-review/SKILL.md | 12 +++--
 .../skills/gstack-plan-eng-review/SKILL.md    |  8 ++--
 .../skills/gstack-qa-design-review/SKILL.md   | 12 +++--
 .agents/skills/gstack-qa-only/SKILL.md        | 18 ++++---
 .agents/skills/gstack-qa/SKILL.md             | 18 ++++---
 .agents/skills/gstack-retro/SKILL.md          |  6 +--
 .agents/skills/gstack-review/SKILL.md         | 10 ++--
 .../gstack-setup-browser-cookies/SKILL.md     | 12 +++--
 .agents/skills/gstack-ship/SKILL.md           | 12 ++---
 .agents/skills/gstack-upgrade/SKILL.md        | 10 ++--
 .agents/skills/gstack/SKILL.md                | 12 +++--
 AGENTS.md                                     |  6 +--
 CHANGELOG.md                                  |  3 +-
 CONTRIBUTING.md                               |  2 +-
 README.md                                     | 14 +++---
 bin/dev-setup                                 |  9 ++--
 bin/dev-teardown                              |  5 +-
 browse/bin/find-browse                        | 24 ++++++----
 browse/src/find-browse.ts                     | 32 +++++++------
 browse/test/find-browse.test.ts               | 48 +++++++++++++++++--
 pr-body.md                                    |  7 ++-
 scripts/gen-skill-docs.ts                     | 12 +++--
 scripts/skill-check.ts                        | 22 +++++++++
 setup                                         |  4 +-
 test/gen-skill-docs.test.ts                   |  6 +++
 test/skill-validation.test.ts                 | 22 +++++++++
 30 files changed, 259 insertions(+), 121 deletions(-)

diff --git a/.agents/skills/gstack-browse/SKILL.md b/.agents/skills/gstack-browse/SKILL.md
index 6cab766..fcbdfa9 100644
--- a/.agents/skills/gstack-browse/SKILL.md
+++ b/.agents/skills/gstack-browse/SKILL.md
@@ -13,18 +13,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -85,8 +85,12 @@ State persists between calls (cookies, tabs, login sessions).
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
diff --git a/.agents/skills/gstack-design-consultation/SKILL.md b/.agents/skills/gstack-design-consultation/SKILL.md
index e1ceadb..dcee1be 100644
--- a/.agents/skills/gstack-design-consultation/SKILL.md
+++ b/.agents/skills/gstack-design-consultation/SKILL.md
@@ -12,18 +12,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
diff --git a/.agents/skills/gstack-document-release/SKILL.md b/.agents/skills/gstack-document-release/SKILL.md
index f1a95a9..41eaff3 100644
--- a/.agents/skills/gstack-document-release/SKILL.md
+++ b/.agents/skills/gstack-document-release/SKILL.md
@@ -11,18 +11,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -276,7 +276,7 @@ After auditing each file individually, do a cross-doc consistency pass:
 
 ## Step 7: TODOS.md Cleanup
 
-This is a second pass that complements `$gstack-ship`'s Step 5.5. Read `$HOME/.agents/skills/gstack/review/TODOS-format.md` (if
+This is a second pass that complements `$gstack-ship`'s Step 5.5. Read `$HOME/.codex/skills/gstack/review/TODOS-format.md` (if
 available) for the canonical TODO item format.
 
 If TODOS.md does not exist, skip this step.
diff --git a/.agents/skills/gstack-plan-ceo-review/SKILL.md b/.agents/skills/gstack-plan-ceo-review/SKILL.md
index 0649740..e7e8ad8 100644
--- a/.agents/skills/gstack-plan-ceo-review/SKILL.md
+++ b/.agents/skills/gstack-plan-ceo-review/SKILL.md
@@ -12,18 +12,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -464,7 +464,7 @@ Complete table of every method that can fail, every exception class, rescued sta
 Any row with RESCUED=N, TEST=N, USER SEES=Silent → **CRITICAL GAP**.
 
 ### TODOS.md updates
-Present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `$HOME/.agents/skills/gstack/review/TODOS-format.md`.
+Present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `$HOME/.codex/skills/gstack/review/TODOS-format.md`.
 
 For each TODO, describe:
 * **What:** One-line description of the work.
diff --git a/.agents/skills/gstack-plan-design-review/SKILL.md b/.agents/skills/gstack-plan-design-review/SKILL.md
index e826610..6d06294 100644
--- a/.agents/skills/gstack-plan-design-review/SKILL.md
+++ b/.agents/skills/gstack-plan-design-review/SKILL.md
@@ -14,18 +14,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -106,8 +106,12 @@ Look for `DESIGN.md`, `design-system.md`, or similar in the repo root. If found,
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
diff --git a/.agents/skills/gstack-plan-eng-review/SKILL.md b/.agents/skills/gstack-plan-eng-review/SKILL.md
index f618a33..720cc68 100644
--- a/.agents/skills/gstack-plan-eng-review/SKILL.md
+++ b/.agents/skills/gstack-plan-eng-review/SKILL.md
@@ -11,18 +11,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -205,7 +205,7 @@ Every plan review MUST produce a "NOT in scope" section listing work that was co
 List existing code/flows that already partially solve sub-problems in this plan, and whether the plan reuses them or unnecessarily rebuilds them.
 
 ### TODOS.md updates
-After all review sections are complete, present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `$HOME/.agents/skills/gstack/review/TODOS-format.md`.
+After all review sections are complete, present each potential TODO as its own individual AskUserQuestion. Never batch TODOs — one per question. Never silently skip this step. Follow the format in `$HOME/.codex/skills/gstack/review/TODOS-format.md`.
 
 For each TODO, describe:
 * **What:** One-line description of the work.
diff --git a/.agents/skills/gstack-qa-design-review/SKILL.md b/.agents/skills/gstack-qa-design-review/SKILL.md
index 6e644c8..f142682 100644
--- a/.agents/skills/gstack-qa-design-review/SKILL.md
+++ b/.agents/skills/gstack-qa-design-review/SKILL.md
@@ -12,18 +12,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -113,8 +113,12 @@ fi
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
diff --git a/.agents/skills/gstack-qa-only/SKILL.md b/.agents/skills/gstack-qa-only/SKILL.md
index 9a65827..011c107 100644
--- a/.agents/skills/gstack-qa-only/SKILL.md
+++ b/.agents/skills/gstack-qa-only/SKILL.md
@@ -12,18 +12,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -99,8 +99,12 @@ You are a QA engineer. Test web applications like a real user — click everythi
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
@@ -201,7 +205,7 @@ Run full mode, then load `baseline.json` from a previous run. Diff: which issues
 
 1. Find browse binary (see Setup above)
 2. Create output directories
-3. Copy report template from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md` to output dir
+3. Copy report template from `$HOME/.codex/skills/gstack/qa/templates/qa-report-template.md` to output dir
 4. Start timer for duration tracking
 
 ### Phase 2: Authenticate (if needed)
@@ -257,7 +261,7 @@ $B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
 $B console --errors
 ```
 
-Then follow the **per-page exploration checklist** (see `$HOME/.agents/skills/gstack/qa/references/issue-taxonomy.md`):
+Then follow the **per-page exploration checklist** (see `$HOME/.codex/skills/gstack/qa/references/issue-taxonomy.md`):
 
 1. **Visual scan** — Look at the annotated screenshot for layout issues
 2. **Interactive elements** — Click buttons, links, controls. Do they work?
@@ -304,7 +308,7 @@ $B snapshot -D
 $B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
 ```
 
-**Write each issue to the report immediately** using the template format from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md`.
+**Write each issue to the report immediately** using the template format from `$HOME/.codex/skills/gstack/qa/templates/qa-report-template.md`.
 
 ### Phase 6: Wrap Up
 
diff --git a/.agents/skills/gstack-qa/SKILL.md b/.agents/skills/gstack-qa/SKILL.md
index 1b362d2..31555e7 100644
--- a/.agents/skills/gstack-qa/SKILL.md
+++ b/.agents/skills/gstack-qa/SKILL.md
@@ -14,18 +14,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -134,8 +134,12 @@ fi
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
@@ -237,7 +241,7 @@ Run full mode, then load `baseline.json` from a previous run. Diff: which issues
 
 1. Find browse binary (see Setup above)
 2. Create output directories
-3. Copy report template from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md` to output dir
+3. Copy report template from `$HOME/.codex/skills/gstack/qa/templates/qa-report-template.md` to output dir
 4. Start timer for duration tracking
 
 ### Phase 2: Authenticate (if needed)
@@ -293,7 +297,7 @@ $B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
 $B console --errors
 ```
 
-Then follow the **per-page exploration checklist** (see `$HOME/.agents/skills/gstack/qa/references/issue-taxonomy.md`):
+Then follow the **per-page exploration checklist** (see `$HOME/.codex/skills/gstack/qa/references/issue-taxonomy.md`):
 
 1. **Visual scan** — Look at the annotated screenshot for layout issues
 2. **Interactive elements** — Click buttons, links, controls. Do they work?
@@ -340,7 +344,7 @@ $B snapshot -D
 $B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
 ```
 
-**Write each issue to the report immediately** using the template format from `$HOME/.agents/skills/gstack/qa/templates/qa-report-template.md`.
+**Write each issue to the report immediately** using the template format from `$HOME/.codex/skills/gstack/qa/templates/qa-report-template.md`.
 
 ### Phase 6: Wrap Up
 
diff --git a/.agents/skills/gstack-retro/SKILL.md b/.agents/skills/gstack-retro/SKILL.md
index e65932e..0e0bf42 100644
--- a/.agents/skills/gstack-retro/SKILL.md
+++ b/.agents/skills/gstack-retro/SKILL.md
@@ -11,18 +11,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
diff --git a/.agents/skills/gstack-review/SKILL.md b/.agents/skills/gstack-review/SKILL.md
index 5348284..9a65bb3 100644
--- a/.agents/skills/gstack-review/SKILL.md
+++ b/.agents/skills/gstack-review/SKILL.md
@@ -10,18 +10,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -107,7 +107,7 @@ You are running the `$gstack-review` workflow. Analyze the current branch's diff
 
 ## Step 2: Read the checklist
 
-Read `$HOME/.agents/skills/gstack/review/checklist.md`.
+Read `$HOME/.codex/skills/gstack/review/checklist.md`.
 
 **If the file cannot be read, STOP and report the error.** Do not proceed without the checklist.
 
@@ -115,7 +115,7 @@ Read `$HOME/.agents/skills/gstack/review/checklist.md`.
 
 ## Step 2.5: Check for Greptile review comments
 
-Read `$HOME/.agents/skills/gstack/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
+Read `$HOME/.codex/skills/gstack/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
 
 **If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Greptile integration is additive — the review works without it.
 
diff --git a/.agents/skills/gstack-setup-browser-cookies/SKILL.md b/.agents/skills/gstack-setup-browser-cookies/SKILL.md
index 7585b2c..351395f 100644
--- a/.agents/skills/gstack-setup-browser-cookies/SKILL.md
+++ b/.agents/skills/gstack-setup-browser-cookies/SKILL.md
@@ -11,18 +11,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -93,8 +93,12 @@ Import logged-in sessions from your real Chromium browser into the headless brow
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
diff --git a/.agents/skills/gstack-ship/SKILL.md b/.agents/skills/gstack-ship/SKILL.md
index 9a8bffb..6656092 100644
--- a/.agents/skills/gstack-ship/SKILL.md
+++ b/.agents/skills/gstack-ship/SKILL.md
@@ -9,18 +9,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -227,7 +227,7 @@ If multiple suites need to run, run them sequentially (each needs a test lane).
 
 Review the diff for structural issues that tests don't catch.
 
-1. Read `$HOME/.agents/skills/gstack/review/checklist.md`. If the file cannot be read, **STOP** and report the error.
+1. Read `$HOME/.codex/skills/gstack/review/checklist.md`. If the file cannot be read, **STOP** and report the error.
 
 2. Run `git diff origin/<base>` to get the full diff (scoped to feature changes against the freshly-fetched base branch).
 
@@ -261,7 +261,7 @@ Save the review output — it goes into the PR body in Step 8.
 
 ## Step 3.75: Address Greptile review comments (if PR exists)
 
-Read `$HOME/.agents/skills/gstack/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
+Read `$HOME/.codex/skills/gstack/review/greptile-triage.md` and follow the fetch, filter, classify, and **escalation detection** steps.
 
 **If no PR exists, `gh` fails, API returns an error, or there are zero Greptile comments:** Skip this step silently. Continue to Step 4.
 
@@ -343,7 +343,7 @@ For each classified comment:
 
 Cross-reference the project's TODOS.md against the changes being shipped. Mark completed items automatically; prompt only if the file is missing or disorganized.
 
-Read `$HOME/.agents/skills/gstack/review/TODOS-format.md` for the canonical format reference.
+Read `$HOME/.codex/skills/gstack/review/TODOS-format.md` for the canonical format reference.
 
 **1. Check if TODOS.md exists** in the repository root.
 
diff --git a/.agents/skills/gstack-upgrade/SKILL.md b/.agents/skills/gstack-upgrade/SKILL.md
index 80d6d8b..6c0d86b 100644
--- a/.agents/skills/gstack-upgrade/SKILL.md
+++ b/.agents/skills/gstack-upgrade/SKILL.md
@@ -21,7 +21,7 @@ First, check if auto-upgrade is enabled:
 ```bash
 _AUTO=""
 [ "${GSTACK_AUTO_UPGRADE:-}" = "1" ] && _AUTO="true"
-[ -z "$_AUTO" ] && _AUTO=$($HOME/.agents/skills/gstack/bin/gstack-config get auto_upgrade 2>/dev/null || true)
+[ -z "$_AUTO" ] && _AUTO=$($HOME/.codex/skills/gstack/bin/gstack-config get auto_upgrade 2>/dev/null || true)
 echo "AUTO_UPGRADE=$_AUTO"
 ```
 
@@ -35,7 +35,7 @@ echo "AUTO_UPGRADE=$_AUTO"
 
 **If "Always keep me up to date":**
 ```bash
-$HOME/.agents/skills/gstack/bin/gstack-config set auto_upgrade true
+$HOME/.codex/skills/gstack/bin/gstack-config set auto_upgrade true
 ```
 Tell user: "Auto-upgrade enabled. Future updates will install automatically." Then proceed to Step 2.
 
@@ -61,9 +61,9 @@ Tell user the snooze duration: "Next reminder in 24h" (or 48h or 1 week, dependi
 
 **If "Never ask again":**
 ```bash
-$HOME/.agents/skills/gstack/bin/gstack-config set update_check false
+$HOME/.codex/skills/gstack/bin/gstack-config set update_check false
 ```
-Tell user: "Update checks disabled. Run `$HOME/.agents/skills/gstack/bin/gstack-config set update_check true` to re-enable."
+Tell user: "Update checks disabled. Run `$HOME/.codex/skills/gstack/bin/gstack-config set update_check true` to re-enable."
 Continue with the current skill.
 
 ### Step 2: Detect install type
@@ -187,7 +187,7 @@ When invoked directly as `$gstack-upgrade` (not from a preamble):
 
 1. Force a fresh update check (bypass cache):
 ```bash
-$HOME/.agents/skills/gstack/bin/gstack-update-check --force
+$HOME/.codex/skills/gstack/bin/gstack-update-check --force
 ```
 Use the output to determine if an upgrade is available.
 
diff --git a/.agents/skills/gstack/SKILL.md b/.agents/skills/gstack/SKILL.md
index a262f71..8604b11 100644
--- a/.agents/skills/gstack/SKILL.md
+++ b/.agents/skills/gstack/SKILL.md
@@ -13,18 +13,18 @@ description: |
 ## Preamble (run first)
 
 ```bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 ```
 
-If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.agents/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
+If output shows `UPGRADE_AVAILABLE <old> <new>`: read `$HOME/.codex/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED <from> <to>`: tell user "Running gstack v{to} (just updated!)" and continue.
 
 ## AskUserQuestion Format
 
@@ -85,8 +85,12 @@ Auto-shuts down after 30 min idle. State persists between calls (cookies, tabs,
 ```bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
diff --git a/AGENTS.md b/AGENTS.md
index ab8f154..1072bc6 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -108,7 +108,7 @@ branch first. Prompt and skill changes have invisible couplings.
 
 ## Deploying to the active skill
 
-The active Codex skill lives at `~/.agents/skills/gstack/`. After making changes:
+The active Codex skill lives at `~/.codex/skills/gstack/`. After making changes:
 
 1. Push your branch.
 2. Refresh the checkout you installed from.
@@ -117,6 +117,6 @@ The active Codex skill lives at `~/.agents/skills/gstack/`. After making changes
 Or copy binaries directly:
 
 ```bash
-cp browse/dist/browse ~/.agents/skills/gstack/browse/dist/browse
-cp browse/dist/find-browse ~/.agents/skills/gstack/browse/dist/find-browse
+cp browse/dist/browse ~/.codex/skills/gstack/browse/dist/browse
+cp browse/dist/find-browse ~/.codex/skills/gstack/browse/dist/find-browse
 ```
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8fd14ff..59a6110 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,8 @@
 
 - **You can now use gstack from Codex CLI.** The same skill templates now generate native Codex skill files in `.agents/skills/`, so you get `$gstack-review`, `$gstack-ship`, `$gstack-qa`, and the rest without maintaining a second prompt stack.
 - **Claude and Codex stay in sync automatically.** One `.tmpl` template now produces both hosts' `SKILL.md` files, which means new workflow improvements ship to both environments from a single source of truth.
-- **Codex setup is one command.** Run `./setup --host codex` to register the generated Codex skills in `~/.agents/skills/`, including the shared browse binary and helper docs.
+- **Codex setup is one command.** Run `./setup --host codex` to register the generated Codex skills in `~/.codex/skills/`, including the shared browse binary and helper docs.
+- **Codex installs now resolve the right browse binary out of the box.** The Codex installer and browse discovery logic now agree on `~/.codex/skills/` as the active install root, while still using `.agents/skills/` as the generated source tree inside the repo.
 - **Contributing now covers the dual-host workflow.** The docs, tests, and CI checks all verify both Claude and Codex generation, so it is much harder to accidentally fix one host and break the other.
 
 ### For contributors
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f14b4f7..c1296bd 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@ Thanks for wanting to make gstack better. Whether you're fixing a typo in a skil
 
 gstack skills are Markdown files discovered from a `skills/` directory. Claude uses
 `~/.claude/skills/gstack/`; Codex uses the generated `.agents/skills/` tree and
-installs it into `~/.agents/skills/`. When you're developing gstack itself, you
+installs it into `~/.codex/skills/`. When you're developing gstack itself, you
 want both hosts to read from your working tree so edits take effect instantly.
 
 That's what dev mode does. It symlinks your repo into the local `.claude/skills/`
diff --git a/README.md b/README.md
index 5ae358d..4c6e8a1 100644
--- a/README.md
+++ b/README.md
@@ -136,18 +136,18 @@ Open Claude Code and paste this. Claude will do the rest.
 ### Codex Setup
 
 Clone gstack anywhere convenient, then let `setup --host codex` register the generated
-Codex skill tree in `~/.agents/skills/`:
+Codex skill tree in `~/.codex/skills/`:
 
 ```bash
-git clone https://github.com/garrytan/gstack.git ~/.codex/skills/gstack
-cd ~/.codex/skills/gstack
+git clone https://github.com/garrytan/gstack.git ~/src/gstack
+cd ~/src/gstack
 ./setup --host codex
 ```
 
 This links:
 
-- `~/.agents/skills/gstack/` → the generated root Codex skill
-- `~/.agents/skills/gstack-review`, `~/.agents/skills/gstack-ship`, etc.
+- `~/.codex/skills/gstack/` → the generated root Codex skill
+- `~/.codex/skills/gstack-review`, `~/.codex/skills/gstack-ship`, etc.
 - the shared browse binary and helper docs through the checked-in `.agents/skills/` tree
 
 If you keep repo-level agent instructions, add a short `gstack` section to `AGENTS.md`
@@ -164,12 +164,12 @@ Real files get committed to your repo (not a submodule), so `git clone` just wor
 
 - Skill files (Markdown prompts) in `~/.claude/skills/gstack/` (or `.claude/skills/gstack/` for project installs)
 - Symlinks at `~/.claude/skills/browse`, `~/.claude/skills/qa`, `~/.claude/skills/review`, etc. pointing into the gstack directory
-- Codex skill files in `~/.agents/skills/gstack*/`
+- Codex skill files in `~/.codex/skills/gstack*/`
 - Browser binary at `browse/dist/browse` (~58MB, gitignored)
 - `node_modules/` (gitignored)
 - `/retro` saves JSON snapshots to `.context/retros/` in your project for trend tracking
 
-Everything lives inside `.claude/` or `.agents/`. Nothing touches your PATH or runs in the background.
+Everything lives inside `.claude/`, `.agents/`, or `~/.codex/skills/`. Nothing touches your PATH or runs in the background.
 
 ---
 
diff --git a/bin/dev-setup b/bin/dev-setup
index a679ec5..3323740 100755
--- a/bin/dev-setup
+++ b/bin/dev-setup
@@ -2,9 +2,9 @@
 # Set up gstack for local development — test skills from within this repo.
 #
 # Creates .claude/skills/gstack → repo root so Claude Code resolves skills from
-# your working tree. Codex reads the checked-in .agents/skills tree directly, so
-# this script also ensures the support symlinks inside .agents/skills/gstack are
-# present for browse/setup/helper-doc access.
+# your working tree. Codex installs are linked from ~/.codex/skills into the
+# checked-in .agents/skills tree, so this script also ensures the support
+# symlinks inside .agents/skills/gstack are present for browse/setup/helper-doc access.
 #
 # Also copies .env from the main worktree if this is a git worktree so API keys
 # carry over automatically.
@@ -68,7 +68,8 @@ ln -snf ../../../package.json "$REPO_ROOT/.agents/skills/gstack/package.json"
 ln -snf ../../../VERSION "$REPO_ROOT/.agents/skills/gstack/VERSION"
 
 echo ""
-echo "Dev mode active. Claude resolves from .claude/skills/gstack; Codex resolves from .agents/skills/."
+echo "Dev mode active. Claude resolves from .claude/skills/gstack."
+echo "Codex uses the generated .agents/skills tree once you run ./setup --host codex."
 echo "Edit skill docs, regenerate if needed, and test immediately — no copy/deploy needed."
 echo ""
 echo "To tear down: bin/dev-teardown"
diff --git a/bin/dev-teardown b/bin/dev-teardown
index 1dd3cd4..e55e5c1 100755
--- a/bin/dev-teardown
+++ b/bin/dev-teardown
@@ -1,5 +1,6 @@
 #!/usr/bin/env bash
-# Remove local Claude dev symlinks. Codex uses the checked-in .agents/skills tree.
+# Remove local Claude dev symlinks. Codex installs continue to point at the
+# generated .agents/skills tree via ~/.codex/skills.
 set -e
 
 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
@@ -33,4 +34,4 @@ if [ ${#removed[@]} -gt 0 ]; then
 else
   echo "No symlinks found."
 fi
-echo "Dev mode deactivated. Global gstack (~/.claude/skills/gstack or ~/.agents/skills/gstack) is now active."
+echo "Dev mode deactivated. Global gstack (~/.claude/skills/gstack or ~/.codex/skills/gstack) is now active."
diff --git a/browse/bin/find-browse b/browse/bin/find-browse
index 9cbd7f8..911ddf3 100755
--- a/browse/bin/find-browse
+++ b/browse/bin/find-browse
@@ -7,11 +7,19 @@ if test -x "$DIR/find-browse"; then
 fi
 # Fallback: basic discovery
 ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
-if [ -n "$ROOT" ] && test -x "$ROOT/.claude/skills/gstack/browse/dist/browse"; then
-  echo "$ROOT/.claude/skills/gstack/browse/dist/browse"
-elif test -x "$HOME/.claude/skills/gstack/browse/dist/browse"; then
-  echo "$HOME/.claude/skills/gstack/browse/dist/browse"
-else
-  echo "ERROR: browse binary not found. Run: cd <skill-dir> && ./setup" >&2
-  exit 1
-fi
+for candidate in \
+  "${ROOT:+$ROOT/.codex/skills/gstack/browse/dist/browse}" \
+  "${ROOT:+$ROOT/.agents/skills/gstack/browse/dist/browse}" \
+  "${ROOT:+$ROOT/.claude/skills/gstack/browse/dist/browse}" \
+  "$HOME/.codex/skills/gstack/browse/dist/browse" \
+  "$HOME/.agents/skills/gstack/browse/dist/browse" \
+  "$HOME/.claude/skills/gstack/browse/dist/browse"
+do
+  if [ -n "$candidate" ] && test -x "$candidate"; then
+    echo "$candidate"
+    exit 0
+  fi
+done
+
+echo "ERROR: browse binary not found. Run: cd <skill-dir> && ./setup" >&2
+exit 1
diff --git a/browse/src/find-browse.ts b/browse/src/find-browse.ts
index 3fdc7f1..fcfd530 100644
--- a/browse/src/find-browse.ts
+++ b/browse/src/find-browse.ts
@@ -9,6 +9,12 @@ import { existsSync } from 'fs';
 import { join } from 'path';
 import { homedir } from 'os';
 
+const SKILL_ROOT_MARKERS = [
+  ['.codex', 'skills', 'gstack'],
+  ['.agents', 'skills', 'gstack'],
+  ['.claude', 'skills', 'gstack'],
+] as const;
+
 // ─── Binary Discovery ───────────────────────────────────────────
 
 function getGitRoot(): string | null {
@@ -24,25 +30,21 @@ function getGitRoot(): string | null {
   }
 }
 
-export function locateBinary(): string | null {
-  const root = getGitRoot();
-  const home = homedir();
+export function locateBinary(options: { root?: string | null; home?: string } = {}): string | null {
+  const root = options.root ?? getGitRoot();
+  const home = options.home ?? homedir();
 
-  // Workspace-local takes priority (for development)
   if (root) {
-    const localAgents = join(root, '.agents', 'skills', 'gstack', 'browse', 'dist', 'browse');
-    if (existsSync(localAgents)) return localAgents;
-
-    const local = join(root, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse');
-    if (existsSync(local)) return local;
+    for (const marker of SKILL_ROOT_MARKERS) {
+      const local = join(root, ...marker, 'browse', 'dist', 'browse');
+      if (existsSync(local)) return local;
+    }
   }
 
-  const globalAgents = join(home, '.agents', 'skills', 'gstack', 'browse', 'dist', 'browse');
-  if (existsSync(globalAgents)) return globalAgents;
-
-  // Global fallback
-  const global = join(home, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse');
-  if (existsSync(global)) return global;
+  for (const marker of SKILL_ROOT_MARKERS) {
+    const global = join(home, ...marker, 'browse', 'dist', 'browse');
+    if (existsSync(global)) return global;
+  }
 
   return null;
 }
diff --git a/browse/test/find-browse.test.ts b/browse/test/find-browse.test.ts
index 7ac5a3f..07cba88 100644
--- a/browse/test/find-browse.test.ts
+++ b/browse/test/find-browse.test.ts
@@ -2,14 +2,26 @@
  * Tests for find-browse binary locator.
  */
 
-import { describe, test, expect } from 'bun:test';
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
 import { locateBinary } from '../src/find-browse';
-import { existsSync } from 'fs';
+import { existsSync, mkdtempSync, rmSync, writeFileSync, mkdirSync } from 'fs';
+import { dirname, join } from 'path';
+import { tmpdir } from 'os';
+
+let tempDir: string;
+
+beforeEach(() => {
+  tempDir = mkdtempSync(join(tmpdir(), 'find-browse-test-'));
+});
+
+afterEach(() => {
+  rmSync(tempDir, { recursive: true, force: true });
+});
 
 describe('locateBinary', () => {
   test('returns null when no binary exists at known paths', () => {
-    // This test depends on the test environment — if a real binary exists at
-    // ~/.claude/skills/gstack/browse/dist/browse, it will find it.
+    // This test depends on the test environment — if a real binary exists at a
+    // known skill path, it will find it.
     // We mainly test that the function doesn't throw.
     const result = locateBinary();
     expect(result === null || typeof result === 'string').toBe(true);
@@ -21,4 +33,32 @@ describe('locateBinary', () => {
       expect(existsSync(result)).toBe(true);
     }
   });
+
+  test('prefers .codex over .agents and .claude when multiple binaries exist', () => {
+    const root = join(tempDir, 'repo');
+    const home = join(tempDir, 'home');
+    const codex = join(root, '.codex', 'skills', 'gstack', 'browse', 'dist', 'browse');
+    const agents = join(root, '.agents', 'skills', 'gstack', 'browse', 'dist', 'browse');
+    const claude = join(root, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse');
+
+    for (const file of [claude, agents, codex]) {
+      mkdirSync(dirname(file), { recursive: true });
+      writeFileSync(file, '');
+    }
+
+    expect(locateBinary({ root, home })).toBe(codex);
+  });
+
+  test('falls back to global Codex install before older locations', () => {
+    const home = join(tempDir, 'home');
+    const codex = join(home, '.codex', 'skills', 'gstack', 'browse', 'dist', 'browse');
+    const agents = join(home, '.agents', 'skills', 'gstack', 'browse', 'dist', 'browse');
+
+    for (const file of [agents, codex]) {
+      mkdirSync(dirname(file), { recursive: true });
+      writeFileSync(file, '');
+    }
+
+    expect(locateBinary({ root: null, home })).toBe(codex);
+  });
 });
diff --git a/pr-body.md b/pr-body.md
index b758173..095b91d 100644
--- a/pr-body.md
+++ b/pr-body.md
@@ -3,6 +3,7 @@
 - add Codex as a first-class gstack host without duplicating workflow logic
 - generate `.agents/skills/gstack*/SKILL.md` from the same `.tmpl` templates as Claude
 - make setup, browse discovery, tests, CI, and docs understand both Claude and Codex installs
+- harden the canonical Codex bootstrap path so installs land in `~/.codex/skills/` and browse resolves there first
 - add Codex-facing repo docs (`AGENTS.md`, `agents/openai.yaml`) and generated support links under `.agents/skills/gstack`
 
 ## What Changed
@@ -19,7 +20,8 @@
 - install/runtime
   - `setup` now supports `--host claude|codex|auto`
   - `bin/dev-setup` and `bin/dev-teardown` cover the Codex support tree alongside Claude dev mode
-  - `browse/src/find-browse.ts` checks `.agents` before `.claude`
+  - Codex installs now link into `~/.codex/skills/`, backed by the generated `.agents/skills/` tree
+  - `browse/src/find-browse.ts` and `browse/bin/find-browse` now prefer `.codex`, then `.agents`, then legacy Claude paths
 - quality gates
   - extend tests for Codex freshness/frontmatter/path safety
   - run Codex dry-run generation in CI
@@ -39,7 +41,7 @@ bun run gen:skill-docs --host codex --dry-run
 bun run skill:check
 TMP_HOME=$(mktemp -d)
 HOME="$TMP_HOME" ./setup --host codex
-HOME="$TMP_HOME" "$TMP_HOME/.agents/skills/gstack/setup" --host codex
+HOME="$TMP_HOME" "$TMP_HOME/.codex/skills/gstack/browse/bin/find-browse"
 ```
 
 ## Pre-Landing Review
@@ -51,3 +53,4 @@ Pre-Landing Review: No issues found.
 - Codex skills are generated only. Do not hand-edit `.agents/skills/*/SKILL.md`.
 - Codex-generated markdown intentionally contains no `.claude/skills` paths.
 - The root Codex skill directory includes support symlinks (`bin`, `browse`, `review`, `setup`, etc.) so helper-doc lookups and browse setup work after `./setup --host codex`.
+- `.agents/skills/` remains the generated source tree in the repo; `~/.codex/skills/` is the active installed skill location.
diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts
index e4453f8..abc6d9f 100644
--- a/scripts/gen-skill-docs.ts
+++ b/scripts/gen-skill-docs.ts
@@ -55,7 +55,7 @@ function resolveCodexSkillName(skillDirName: string | null): string {
 }
 
 function getSkillRoot(host: Host): string {
-  return host === 'codex' ? '$HOME/.agents/skills/gstack' : '~/.claude/skills/gstack';
+  return host === 'codex' ? '$HOME/.codex/skills/gstack' : '~/.claude/skills/gstack';
 }
 
 function getInvocation(host: Host, skill: string): string {
@@ -261,13 +261,13 @@ Slug: lowercase, hyphens, max 60 chars (e.g. \`browse-js-no-await\`). Skip if fi
   return `## Preamble (run first)
 
 \`\`\`bash
-_UPD=$($HOME/.agents/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
+_UPD=$($HOME/.codex/skills/gstack/bin/gstack-update-check 2>/dev/null || .agents/skills/gstack/bin/gstack-update-check 2>/dev/null || true)
 [ -n "$_UPD" ] && echo "$_UPD" || true
 mkdir -p ~/.gstack/sessions
 touch ~/.gstack/sessions/"$PPID"
 _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr -d ' ')
 find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true
-_CONTRIB=$($HOME/.agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
+_CONTRIB=$($HOME/.codex/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || .agents/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true)
 _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown")
 echo "BRANCH: $_BRANCH"
 \`\`\`
@@ -351,8 +351,12 @@ If \`NEEDS_SETUP\`:
 \`\`\`bash
 _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
 B=""
-[ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.codex/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.codex/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$_ROOT/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -n "$_ROOT" ] && [ -x "$_ROOT/.agents/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.agents/skills/gstack/browse/dist/browse"
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.codex/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
+[ -z "$B" ] && [ -x "$HOME/.codex/skills/gstack/browse/dist/browse" ] && B="$HOME/.codex/skills/gstack/browse/dist/browse"
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/find-browse" ] && B=$("$HOME/.agents/skills/gstack/browse/dist/find-browse" 2>/dev/null || true)
 [ -z "$B" ] && [ -x "$HOME/.agents/skills/gstack/browse/dist/browse" ] && B="$HOME/.agents/skills/gstack/browse/dist/browse"
 if [ -x "$B" ]; then
diff --git a/scripts/skill-check.ts b/scripts/skill-check.ts
index 97c417e..97475a2 100644
--- a/scripts/skill-check.ts
+++ b/scripts/skill-check.ts
@@ -33,6 +33,12 @@ const SKILL_FILES = [
   'document-release/SKILL.md',
 ].filter(f => fs.existsSync(path.join(ROOT, f)));
 
+const CODEX_SKILLS = fs.existsSync(path.join(ROOT, '.agents', 'skills'))
+  ? fs.readdirSync(path.join(ROOT, '.agents', 'skills'))
+      .map(dir => path.join('.agents', 'skills', dir, 'SKILL.md'))
+      .filter(file => fs.existsSync(path.join(ROOT, file)))
+  : [];
+
 let hasErrors = false;
 
 // ─── Skills ─────────────────────────────────────────────────
@@ -67,6 +73,21 @@ for (const file of SKILL_FILES) {
 
 // ─── Templates ──────────────────────────────────────────────
 
+console.log('\n  Codex skills:');
+for (const file of CODEX_SKILLS) {
+  const content = fs.readFileSync(path.join(ROOT, file), 'utf-8');
+  const nameMatch = content.match(/^name:\s+(.+)$/m);
+  const descMatch = content.match(/^description:\s+\|$/m);
+  if (!nameMatch || !descMatch || content.includes('.claude/skills/')) {
+    hasErrors = true;
+    console.log(`  \u274c ${file.padEnd(30)} — invalid frontmatter or legacy Claude path`);
+    continue;
+  }
+  console.log(`  \u2705 ${file.padEnd(30)} — frontmatter + paths look good`);
+}
+
+// ─── Templates ──────────────────────────────────────────────
+
 console.log('\n  Templates:');
 const TEMPLATES = [
   { tmpl: 'SKILL.md.tmpl', output: 'SKILL.md' },
@@ -101,6 +122,7 @@ for (const file of SKILL_FILES) {
 console.log('\n  Freshness:');
 try {
   execSync('bun run scripts/gen-skill-docs.ts --dry-run', { cwd: ROOT, stdio: 'pipe' });
+  execSync('bun run scripts/gen-skill-docs.ts --host codex --dry-run', { cwd: ROOT, stdio: 'pipe' });
   console.log('  \u2705 All generated files are fresh');
 } catch (err: any) {
   hasErrors = true;
diff --git a/setup b/setup
index 8f0b4de..08f27f2 100755
--- a/setup
+++ b/setup
@@ -75,7 +75,7 @@ link_claude_global() {
 }
 
 link_codex_global() {
-  local skills_dir="$HOME/.agents/skills"
+  local skills_dir="$HOME/.codex/skills"
   mkdir -p "$skills_dir"
 
   ln -snf "$CODEX_ROOT" "$skills_dir/gstack"
@@ -150,7 +150,7 @@ case "$HOST" in
     link_codex_global
     ;;
   auto)
-    if command -v codex >/dev/null 2>&1; then
+    if command -v codex >/dev/null 2>&1 || [ -d "$HOME/.codex" ]; then
       link_codex_global
     fi
     if [ -d "$HOME/.claude" ]; then
diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts
index 4f634db..8e290e9 100644
--- a/test/gen-skill-docs.test.ts
+++ b/test/gen-skill-docs.test.ts
@@ -164,6 +164,12 @@ describe('gen-skill-docs', () => {
     }
   });
 
+  test('root Codex skill uses the canonical install path', () => {
+    const content = fs.readFileSync(codexSkillPath('.'), 'utf-8');
+    expect(content).toContain('$HOME/.codex/skills/gstack');
+    expect(content).not.toContain('$HOME/.claude/skills/gstack');
+  });
+
   test('templates contain placeholders', () => {
     const rootTmpl = fs.readFileSync(path.join(ROOT, 'SKILL.md.tmpl'), 'utf-8');
     expect(rootTmpl).toContain('{{COMMAND_REFERENCE}}');
diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
index 1514936..b4c2ff6 100644
--- a/test/skill-validation.test.ts
+++ b/test/skill-validation.test.ts
@@ -250,6 +250,28 @@ describe('Codex-generated SKILL.md validation', () => {
       expect(lines.length).toBeGreaterThan(0);
     }
   });
+
+  test('generated Codex skills do not contain Claude install paths', () => {
+    for (const skill of CODEX_SKILLS) {
+      const content = fs.readFileSync(codexSkillPath(skill), 'utf-8');
+      expect(content).not.toContain('.claude/skills/');
+    }
+  });
+
+  test('root Codex skill points at the canonical Codex install path', () => {
+    const content = fs.readFileSync(codexSkillPath('.'), 'utf-8');
+    expect(content).toContain('$HOME/.codex/skills/gstack');
+    expect(content).not.toContain('$HOME/.claude/skills/gstack');
+  });
+
+  test('Codex browse setup prefers ~/.codex before repo fallbacks', () => {
+    const content = fs.readFileSync(codexSkillPath('.'), 'utf-8');
+    expect(content).toContain('$HOME/.codex/skills/gstack/browse/dist/find-browse');
+    expect(content).toContain('$HOME/.agents/skills/gstack/browse/dist/find-browse');
+    expect(content.indexOf('$HOME/.codex/skills/gstack/browse/dist/find-browse')).toBeLessThan(
+      content.indexOf('$HOME/.agents/skills/gstack/browse/dist/find-browse')
+    );
+  });
 });
 
 // --- Update check preamble validation ---