hack-ink · yvette-carlisle · Apr 20, 2026 · Apr 20, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -8,7 +8,7 @@ These instructions define repository-specific execution rules and scope limits f
 
 ## 1.1 Workspace Automation (cargo make)
 
-- `Makefile.toml` is the source of truth for task names and behavior.
-- Run `cargo make` from the repository root, and use it whenever an equivalent task exists.
-- Run standalone commands only when `Makefile.toml` does not cover the capability or cannot produce the required effect for the current task.
-- When task details are needed, inspect `Makefile.toml` directly or run `cargo make --list-all-steps`.
+- `Makefile.toml` is the source of truth for generic repo gates: `fmt`, `lint`, `test`, and `checks`.
+- Run `cargo make` from the repository root when you need one of those generic repo gates.
+- Smoke and performance validation entrypoints live under `scripts/smoke/` and `scripts/perf/`; run those scripts directly instead of adding them back to `Makefile.toml`.
+- When task details are needed, inspect `Makefile.toml` for generic gates and the relevant script or runbook for smoke/perf flows.
diff --git a/Makefile.toml b/Makefile.toml
@@ -125,147 +125,6 @@ args = [
 ]
 
 
-# Smoke
-# | task                         | type      | cwd |
-# | ---------------------------- | --------- | --- |
-# | smoke-macos                  | composite |     |
-# | smoke-self-check-macos       | composite |     |
-# | replay-scroll-capture        | command   |     |
-# | replay-scroll-capture-self-check | command |   |
-# | smoke-live-loupe-perf-macos  | command   |     |
-# | smoke-live-loupe-self-check-macos     | command | |
-# | analyze-scroll-capture-trace | command   |     |
-
-[tasks.smoke-macos]
-workspace = false
-dependencies = [
-	"smoke-live-loupe-perf-macos",
-	"replay-scroll-capture",
-]
-
-[tasks.smoke-self-check-macos]
-workspace = false
-dependencies = [
-	"smoke-live-loupe-self-check-macos",
-	"replay-scroll-capture-self-check",
-]
-
-[tasks.replay-scroll-capture]
-workspace = false
-command = "cargo"
-args = [
-	"run",
-	"-p",
-	"rsnap-overlay",
-	"--example",
-	"scroll_capture_replay",
-	"--",
-	"--force-worker-pairwise",
-]
-
-[tasks.analyze-scroll-capture-trace]
-workspace = false
-command = "cargo"
-args = [
-	"run",
-	"-p",
-	"rsnap-overlay",
-	"--example",
-	"scroll_capture_replay",
-	"--",
-	"--force-worker-pairwise",
-	"--json",
-	"--summary-only",
-]
-
-[tasks.smoke-live-loupe-perf-macos]
-workspace = false
-command = "scripts/live-loupe-perf-smoke-macos.sh"
-
-[tasks.replay-scroll-capture-self-check]
-workspace = false
-command = "cargo"
-args = [
-	"test",
-	"-p",
-	"rsnap-overlay",
-	"replay_recorded_live_trace_round_trips_one_commit",
-	"--lib",
-]
-
-[tasks.smoke-live-loupe-self-check-macos]
-workspace = false
-command = "scripts/live-loupe-perf-smoke-macos.sh"
-args = ["--self-check"]
-
-
-# Performance
-# | task                          | type      | cwd |
-# | ----------------------------- | --------- | --- |
-# | perf-local                    | composite |     |
-# | perf-macos                    | composite |     |
-# | perf-self-check-macos         | composite |     |
-# | perf-bench-settings-window    | command   |     |
-# | perf-bench-scroll-capture     | command   |     |
-
-[tasks.perf-local]
-workspace = false
-dependencies = [
-	"perf-bench-settings-window",
-	"perf-bench-scroll-capture",
-]
-
-[tasks.perf-macos]
-workspace = false
-dependencies = [
-	"perf-local",
-	"smoke-macos",
-]
-
-[tasks.perf-self-check-macos]
-workspace = false
-dependencies = [
-	"perf-local",
-	"smoke-self-check-macos",
-]
-
-[tasks.perf-bench-settings-window]
-workspace = false
-command = "cargo"
-args = [
-	"bench",
-	"-p",
-	"rsnap",
-	"--bench",
-	"settings_window",
-	"--",
-	"--sample-size",
-	"10",
-	"--warm-up-time",
-	"0.1",
-	"--measurement-time",
-	"0.1",
-]
-
-[tasks.perf-bench-scroll-capture]
-workspace = false
-command = "cargo"
-args = [
-	"bench",
-	"-p",
-	"rsnap-overlay",
-	"--bench",
-	"scroll_capture",
-	"--",
-	"--sample-size",
-	"10",
-	"--warm-up-time",
-	"0.1",
-	"--measurement-time",
-	"0.1",
-]
-
-
 # Format
 # | task           | type      | cwd |
 # | -------------- | --------- | --- |

diff --git a/README.md b/README.md
@@ -117,60 +117,61 @@ cargo make test
 Scroll-capture verification now starts with deterministic replay instead of the old GUI smoke:
 
 ```sh
-cargo make replay-scroll-capture
-cargo make replay-scroll-capture-self-check
+scripts/smoke/replay-scroll-capture.sh
+scripts/smoke/replay-scroll-capture-self-check.sh
 ```
 
 For semantic trace analysis (first bad frame, under-consumption, overshoot), use:
 
 ```sh
-cargo make analyze-scroll-capture-trace
+scripts/smoke/analyze-scroll-capture-trace.sh
 ```
 
 The remaining macOS GUI smoke harnesses are still available for live-loupe and
 desktop-session checks:
 
 ```sh
-cargo make smoke-self-check-macos
-cargo make smoke-macos
+scripts/smoke/self-check-macos.sh
+scripts/smoke/macos.sh
 ```
 
-`cargo make replay-scroll-capture` and `cargo make analyze-scroll-capture-trace`
-now force the latest recorded live trace through the same worker-pairwise
-commit path that current macOS production scroll capture uses. They are
-trace-driven rather than scenario-driven, so they expect at least one recorded
-trace under `~/Library/Application Support/ink.hack.rsnap/scroll-capture-traces/`
-unless you pass `--trace <manifest-path>` directly to the example. Use the
-direct example without `--force-worker-pairwise` only when you intentionally
-want to compare the legacy recorded-source replay mode. `cargo make
-replay-scroll-capture-self-check` is the repo-local fallback when you want to
-verify the replay harness itself without relying on a user-recorded trace.
-`cargo make smoke-self-check-macos` and `cargo make smoke-macos` still drive
-the logged-in macOS live-loupe smoke path and require the expected Screen
-Recording / automation permissions.
+`scripts/smoke/replay-scroll-capture.sh` and
+`scripts/smoke/analyze-scroll-capture-trace.sh` now force the latest recorded
+live trace through the same worker-pairwise commit path that current macOS
+production scroll capture uses. They are trace-driven rather than
+scenario-driven, so they expect at least one recorded trace under
+`~/Library/Application Support/ink.hack.rsnap/scroll-capture-traces/` unless
+you pass `--trace <manifest-path>` directly to the example. Use the direct
+example without `--force-worker-pairwise` only when you intentionally want to
+compare the legacy recorded-source replay mode.
+`scripts/smoke/replay-scroll-capture-self-check.sh` is the repo-local fallback
+when you want to verify the replay harness itself without relying on a
+user-recorded trace. `scripts/smoke/self-check-macos.sh` and
+`scripts/smoke/macos.sh` still drive the logged-in macOS live-loupe smoke path
+and require the expected Screen Recording / automation permissions.
 
 For `XY-185` style downward scroll-capture work, treat the verification order as:
 
 1. deterministic tests and `cargo make checks`
-2. `cargo make replay-scroll-capture`
-3. `cargo make analyze-scroll-capture-trace`
+2. `scripts/smoke/replay-scroll-capture.sh`
+3. `scripts/smoke/analyze-scroll-capture-trace.sh`
 4. one fresh release live touchpad run with a newly recorded trace
 
 Repo-native performance entrypoints are available for deterministic benches and
 dedicated smoke:
 
 ```sh
-cargo make perf-local
-cargo make perf-self-check-macos
-cargo make perf-macos
+scripts/perf/local.sh
+scripts/perf/self-check-macos.sh
+scripts/perf/macos.sh
 ```
 
-Use `cargo make perf-local` for component-render and scroll-capture regressions
-that should stay comparable on a normal development machine. Use
-`cargo make perf-self-check-macos` to validate the dedicated macOS smoke
-environment, and `cargo make perf-macos` only on a logged-in desktop session
-when you need end-to-end GUI performance evidence. The durable runbook for
-command selection and baseline comparison lives at
+Use `scripts/perf/local.sh` for component-render and scroll-capture regressions
+that should stay comparable on a normal development machine.
+`scripts/perf/self-check-macos.sh` validates the dedicated macOS smoke
+environment, and `scripts/perf/macos.sh` is the end-to-end GUI performance
+entrypoint for a logged-in desktop session. The durable runbook for command
+selection and baseline comparison lives at
 `docs/runbook/performance-validation.md`.
 
 The capture-session contract lives at `docs/spec/capture-session.md`.

diff --git a/WORKFLOW.md b/WORKFLOW.md
@@ -36,7 +36,8 @@ read_first = [
   "docs/policy.md",
 ]
 +++
-Use `cargo make` whenever an equivalent task exists.
+Use `cargo make` for the generic repo gates defined in `Makefile.toml`.
+Use `scripts/smoke/` and `scripts/perf/` for smoke/perf validation entrypoints that intentionally live outside `Makefile.toml`.
 
 Use single-line `maestro/commit/1` JSON commit messages for local commits. Commit messages describe the tree change only; do not encode landing, CI, or closeout state.
 
@@ -58,7 +59,7 @@ Route documentation updates by class: behavioral or schema changes go in `docs/s
 
 When capture-session behavior, scroll-capture behavior, or performance contracts change, update the relevant docs in the same lane. The usual authority is `docs/spec/capture-session.md`, `docs/spec/performance.md`, and `docs/runbook/performance-validation.md`.
 
-Use deterministic validation first. Reach for `cargo make replay-scroll-capture`, `cargo make analyze-scroll-capture-trace`, or the macOS smoke and perf tasks only when the changed surface actually needs that evidence; do not treat dedicated live macOS smoke as a default PR gate.
+Use deterministic validation first. Reach for `scripts/smoke/replay-scroll-capture.sh`, `scripts/smoke/analyze-scroll-capture-trace.sh`, or the `scripts/smoke/` and `scripts/perf/` macOS entrypoints only when the changed surface actually needs that evidence; do not treat dedicated live macOS smoke as a default PR gate.
 
 Do not claim work is complete, fixed, or passing without fresh verification evidence from the selected repo gate or another command that directly proves the claim.
 

diff --git a/docs/index.md b/docs/index.md
@@ -8,7 +8,8 @@ The active split below is by question type, not by human-versus-agent audience.
 ## Read order
 
 - Read `docs/policy.md` for document contracts and placement rules.
-- Read `Makefile.toml` when the task depends on repo task names or execution entrypoints.
+- Read `Makefile.toml` when the task depends on generic repo gates such as `fmt`, `lint`, `test`, or `checks`.
+- Read `scripts/smoke/` and `scripts/perf/` when the task depends on smoke or performance validation entrypoints.
 - Then choose one primary lane:
   - `docs/spec/index.md` when the question is "what must be true?"
   - `docs/runbook/index.md` when the question is "which sequence should I execute?"
@@ -34,7 +35,8 @@ The active split below is by question type, not by human-versus-agent audience.
   `docs/reference/workspace-layout.md`
 - Need durable rationale for the architecture reset ->
   `docs/decisions/native-host-rust-core-reset.md`
-- Need repo task names or automation entrypoints -> `Makefile.toml`
+- Need generic repo gate names -> `Makefile.toml`
+- Need smoke or perf validation entrypoints -> `scripts/smoke/` and `scripts/perf/`
 - Need documentation placement or authoring rules -> `docs/policy.md`
 
 ## Retrieval rules

diff --git a/docs/policy.md b/docs/policy.md
@@ -98,7 +98,8 @@ Decision header:
 - Procedural router: `docs/runbook/index.md`
 - Descriptive router: `docs/reference/index.md`
 - Decision router: `docs/decisions/index.md`
-- Repo task and automation entrypoints: `Makefile.toml`
+- Generic repo gates: `Makefile.toml`
+- Smoke/perf validation entrypoints: `scripts/smoke/` and `scripts/perf/`
 
 ## LLM reading guidance
 
@@ -110,7 +111,8 @@ When answering a repository question:
    - "Which sequence should I execute?" -> `docs/runbook/index.md`
    - "How is it currently organized or implemented?" -> `docs/reference/index.md`
    - "Why was this tradeoff accepted?" -> `docs/decisions/index.md`
-3. Read `Makefile.toml` when the task depends on repository automation or named tasks.
+3. Read `Makefile.toml` when the task depends on generic repo gates such as `fmt`, `lint`, `test`, or `checks`.
+4. Read `scripts/smoke/`, `scripts/perf/`, and the relevant runbook when the task depends on smoke or performance validation entrypoints.
 
 ## Update workflow
 

diff --git a/docs/reference/workspace-layout.md b/docs/reference/workspace-layout.md
@@ -34,7 +34,7 @@ For the active target architecture and migration direction, read:
 | `packages/rsnap-overlay/` | Rust-core session/rendering crate: capture-session logic, overlay rendering, capture backend integration, worker runtime, and scroll-capture stitching/replay semantics, with any remaining macOS host adapters quarantined behind explicit host modules |
 | `docs/` | Agent-facing repository docs split into `spec`, `runbook`, `reference`, and `decisions` |
 | `assets/` | Shared app-icon and tray-icon source plus generated bundle/runtime assets |
-| `scripts/` | Packaging and dedicated macOS smoke helpers |
+| `scripts/` | Packaging helpers plus structured smoke/perf entrypoints under `scripts/smoke/` and `scripts/perf/` |
 | `.github/` | CI workflows and repository rules |
 
 This top-level split reflects the codebase as checked in today. It remains useful for navigation,

diff --git a/docs/runbook/architecture-reset-implementation.md b/docs/runbook/architecture-reset-implementation.md
@@ -71,11 +71,11 @@ Start with the smallest checked-in command set that matches the slice:
 - Rust logic or cross-platform product behavior:
   run `cargo make test`
 - scroll-capture behavior or replay-sensitive stitching logic:
-  run `cargo make replay-scroll-capture`
+  run `scripts/smoke/replay-scroll-capture.sh`
 - performance-sensitive rendering or interaction:
-  run `cargo make perf-local`
+  run `scripts/perf/local.sh`
 - dedicated macOS desktop readiness without claiming full live acceptance:
-  run `cargo make smoke-self-check-macos`
+  run `scripts/smoke/self-check-macos.sh`
 
 If the slice changes live desktop behavior materially, finish with the relevant manual validation
 from `docs/runbook/architecture-reset-validation.md`.

diff --git a/docs/runbook/index.md b/docs/runbook/index.md
@@ -20,7 +20,7 @@ Question this index answers: "which sequence should I execute?"
 - You need durable rationale for why an accepted tradeoff exists; read
   `docs/decisions/index.md`.
 - You need broad documentation policy or repo task-entrypoint rules; read
-  `docs/policy.md` or `Makefile.toml` instead.
+  `docs/policy.md`, `Makefile.toml`, or `scripts/smoke/` and `scripts/perf/` instead.
 
 ## What belongs in `docs/runbook/`