From 6007b4b6bb2a70edf7f251544d0eb44cd262dc72 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Sat, 25 Apr 2026 09:28:02 +0200 Subject: [PATCH] feat: add mythos slop-hunt + three patterns colliding posts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New post: "Mythos slop-hunt: oracle-gated audits in practice" — concrete oracle pair, three pitfalls, and three findings from running the Mythos scaffold over the rivet codebase - New post: "Three patterns colliding: Karpathy's LLM Wiki, oracle-gated agents, and typed compliance" — reflection on what the three patterns together mean for safety-critical software - Add tree shortcode (templates/shortcodes/tree.html + sass/_tree.scss) used by the mythos post for clean directory listings; wire into main.scss Co-Authored-By: Claude Opus 4.7 (1M context) --- content/blog/2026-04-25-mythos-slop-hunt.md | 239 ++++++++++++++++++ .../2026-04-25-three-patterns-colliding.md | 140 ++++++++++ sass/_tree.scss | 79 ++++++ sass/main.scss | 1 + templates/shortcodes/tree.html | 38 +++ 5 files changed, 497 insertions(+) create mode 100644 content/blog/2026-04-25-mythos-slop-hunt.md create mode 100644 content/blog/2026-04-25-three-patterns-colliding.md create mode 100644 sass/_tree.scss create mode 100644 templates/shortcodes/tree.html diff --git a/content/blog/2026-04-25-mythos-slop-hunt.md b/content/blog/2026-04-25-mythos-slop-hunt.md new file mode 100644 index 0000000..ed4a558 --- /dev/null +++ b/content/blog/2026-04-25-mythos-slop-hunt.md @@ -0,0 +1,239 @@ ++++ +title = "Mythos slop-hunt: oracle-gated audits in practice" +description = "The Anthropic red-team Mythos scaffold runs four prompts — rank → discover → validate → emit — gated by a mechanical oracle that either fires or it doesn't. We pointed it at our own codebase to hunt slop: typed-but-unwired code, orphan modules, aspirational scaffolding. Here is the v2.2 oracle pair we settled on, the three pitfalls the audit surfaced, and three concrete findings that produced ~370 lines of orphan deletion plus three approved-but-unrealized requirements wired to live tested implementations." +date = 2026-04-25 +draft = false +[taxonomies] +tags = ["verification", "process", "deep-dive"] +authors = ["Ralf Anton Beier"] ++++ + +{% note(kind="tip") %} +**Reading order for this stack** — if you arrived here cold, [*Three patterns colliding*](/blog/three-patterns-colliding/) is the synthesis: why this method belongs alongside Karpathy's LLM Wiki and typed compliance. This post is the method itself. The [v0.1.0 announcement](/blog/rivet-v0.1.0/) covers what rivet is and how to install it. +{% end %} + +{% insight() %} +The Anthropic red team's Mythos scaffold is four prompts and one rule: a mechanical oracle either fires or it doesn't, and a fresh-session validator reproves anything before it ships as an artifact. We adapted it to hunt *slop* — typed-but-unwired code, orphan modules, aspirational scaffolding — by pairing two oracles instead of one: excision (primary, ground-truth reachability) and symbol-scoped trace (interpretive, classifies the kind of slop). Round one exposed three bugs in our own oracle design. Round two, with the v2.2 fixes, produced three confirmed findings — DD-064, DD-065, DD-066 — and three approved-but-unrealized requirements newly wired and tested. ~370 LOC of orphan code deleted, traceability coverage 36.9% → 39.5% on a single branch. The audit was the kind of session-length depth that the same scaffold would have produced quarters of work in a manual review. +{% end %} + +## The four prompts + +`scripts/mythos/` in [rivet](https://github.com/pulseengine/rivet/tree/feat/agent-pipelines-foundation/scripts/mythos) ships the pipeline. The shape is the same as the [SDD post](/blog/spec-driven-development-is-half-the-loop/) described in the abstract; this is the concrete oracle-and-prompt pair tuned for slop. + +{% tree(root="scripts/mythos/") %} +HOWTO.md | pipeline overview + oracle design +rank.md | score every source file 1-5 by slop likelihood +discover.md | minimal prompt + oracle requirement; one agent per file +validate.md | fresh-session validator; reruns the oracle +emit.md | confirmed finding → draft design-decision artifact +{% end %} + +The flow: + +{% mermaid() %} +flowchart TB + rank["rank.md · 1 agent · read-only
score each file 1-5"] + subgraph discover_block["discover.md — N agents in parallel · isolated worktrees"] + direction LR + d1["agent · file A"] + d2["agent · file B"] + dn["agent · file N"] + end + excision["excision oracle · primary
stub symbol · cargo build/test/clippy + rivet validate + commits
baseline-match required"] + trace["symbol trace · interpretive
git log -L · rivet artifacts · inline annotations
classifies orphan vs aspirational"] + validate["validate.md · fresh session · no discovery context
re-runs both oracles"] + emit["emit.md
draft design-decision artifact · verbatim oracle output in rationale"] + + rank --> discover_block + discover_block --> excision + excision --> trace + trace --> validate + validate --> emit + + classDef phase fill:#13161f,stroke:#3d4258,color:#8b90a0; + classDef agent fill:#1a1d27,stroke:#6c8cff,color:#e1e4ed; + classDef gate fill:#1a1d27,stroke:#fbbf24,color:#e1e4ed; + classDef interp fill:#1a1d27,stroke:#c084fc,color:#e1e4ed; + classDef good fill:#1a1d27,stroke:#4ade80,color:#e1e4ed; + + class discover_block phase; + class rank,d1,d2,dn,validate agent; + class excision gate; + class trace interp; + class emit good; +{% end %} + +Run order: ranker classifies the corpus → for each rank-5 file, one parallel discover agent in an isolated git worktree → excision must pass before trace runs → validator in a *separate* fresh session re-runs both oracles → emit produces the audit-trail artifact. One agent per file is load-bearing — parallel coverage of independent files finds diverse bugs; one agent across the whole codebase converges on surface issues. + +The agent topology is *one supervisor, many workers*. The supervisor holds the plan, dispatches workers, collects their structured outputs; the workers run in fresh contexts with narrow tasks and no awareness of each other. This is what keeps the discipline — a worker that hallucinates a finding cannot influence its peers, and the validator that re-runs the oracle has no exposure to the discovery agent's reasoning. + +{% mermaid() %} +flowchart TB + human["human curator
scopes the run · approves emit drafts"] + rank_out["rank.md output
file scores 1-5"] + sup["supervisor agent
holds plan · dispatches workers · collects results"] + + subgraph discoverers["discover workers · parallel · isolated git worktrees"] + direction LR + d1["worker A
file rivet-core/foo.rs"] + d2["worker B
file rivet-core/bar.rs"] + dn["worker N
file rivet-cli/baz.rs"] + end + + subgraph validators["validator workers · fresh sessions · no discovery context"] + direction LR + v1["validator A
finding from worker A"] + vn["validator N
finding from worker N"] + end + + artifact["audit artifact
design-decision · status: draft
rationale carries verbatim oracle output"] + + human -->|"intent · what to audit"| sup + rank_out --> sup + sup -->|"spawn 1-per-file"| discoverers + discoverers -->|"structured findings"| sup + sup -->|"spawn fresh-session validators"| validators + validators -->|"verdicts"| sup + sup -->|"emit confirmed only"| artifact + artifact -->|"review · promote draft → approved"| human + + classDef human fill:#1a1d27,stroke:#fbbf24,color:#e1e4ed,stroke-width:1.5px; + classDef sup fill:#1a1d27,stroke:#6c8cff,color:#e1e4ed,stroke-width:2px; + classDef worker fill:#0f1117,stroke:#4a5068,color:#8b90a0; + classDef artifact fill:#1a1d27,stroke:#4ade80,color:#e1e4ed; + classDef grp fill:#13161f,stroke:#3d4258,color:#8b90a0; + + class human human; + class sup sup; + class d1,d2,dn,v1,vn worker; + class artifact artifact; + class discoverers,validators grp; +{% end %} + +The supervisor is the only agent with full plan visibility. Discoverers have one file each; validators have one finding each. The "fresh session" property is structural — validators can't see what the discoverer hypothesised, only the patch and the claim, so they verify the claim mechanically rather than rationalising it. + +## The two oracles + +The original Mythos paper uses one oracle (failing PoC test). Slop hunting needs two because the bug class — *unexercised typed-but-unwired code* — has two distinguishable signatures: + +**Excision (primary, ground-truth reachability).** The agent submits a patch that stubs the target symbol with `unimplemented!("slop-hunt excision: {{file}}::{{symbol}}")` or — for whole-module excision — annotates the `mod` declaration with `#[cfg(not(all()))]` (not `#[cfg(never)]`; that one trips `unexpected_cfgs` under `-D warnings` and fabricates a non-baseline lint error, see *pitfalls* below). Then run, on the excised tree: + +``` +cargo build --workspace --all-targets +cargo test --workspace --no-fail-fast +cargo clippy --workspace --all-targets -- -D warnings +cargo run --bin rivet --quiet -- validate +cargo run --bin rivet --quiet -- commits +``` + +`build` and `test` must exit 0. `clippy`, `validate`, `commits` must match a *baseline* recorded on a pristine checkout — pristine main is often non-zero on these for unrelated reasons (pre-existing lint noise, schema drift). Any *new* error after excision means the symbol is exercised; the finding is rejected. If the matrix passes, the symbol is unreachable. + +**Symbol-scoped trace (interpretive, classifies kind).** Trace does not gate the finding — it tells us what kind of slop we have: + +``` +git log -L ':SYMBOL:path/to/file.rs' --format="%H" | + grep -oE "[0-9a-f]{40}" | sort -u | while read sha; do + git log -1 --format="%B" "$sha" | + grep -qE "^(Implements|Refs|Fixes|Verifies): " && echo "$sha traced" + done + +rg -n "// rivet: (verifies|implements|refs|fixes) [A-Z]+-[0-9]+" path/to/file.rs + +rivet list --format json | jq -r --arg p path/to/file.rs --arg s SYMBOL ' + .[] | select( + (.description // "" | (contains($p) and contains($s))) or + (.fields["source-ref"] // "" | (contains($p) and contains($s))) + ) | .id' +``` + +Three queries. All empty → **orphan-slop** — nobody specced it, nobody calls it, propose `delete`. Any non-empty → **aspirational-slop** — somebody specced it, nobody wired it up, propose `add-test` or `document-as-non-goal`. The classification matters because the right disposition is different for each: orphans get cut; aspirations get either built or formally rescinded. + +{% mermaid() %} +flowchart TB + excise["excision oracle
tests still pass with symbol stubbed?"] + excise -- "no — symbol is exercised" --> reject["finding rejected
not slop"] + excise -- "yes — symbol is unreachable" --> trace + trace["any of:
git log -L trailers
artifact source-ref
inline // verifies REQ-N"] + trace -- "all three empty" --> orphan["orphan-slop
delete"] + trace -- "any non-empty" --> aspir["aspirational-slop
add-test or document-as-non-goal"] + + classDef gate fill:#1a1d27,stroke:#fbbf24,color:#e1e4ed; + classDef interp fill:#1a1d27,stroke:#c084fc,color:#e1e4ed; + classDef bad fill:#1a1d27,stroke:#f87171,color:#e1e4ed; + classDef good fill:#1a1d27,stroke:#4ade80,color:#e1e4ed; + classDef warn fill:#1a1d27,stroke:#fbbf24,color:#e1e4ed; + + class excise gate; + class trace interp; + class reject bad; + class orphan good; + class aspir warn; +{% end %} + +## Three pitfalls v2.2 caught + +Round one had a working oracle but three bugs the second round exposed. Each is small individually; collectively they're the difference between a method that flatters the practitioner and one that catches what the practitioner missed. + +**Trailer passthrough.** v1's trace was *file-level*: any commit touching the file with `Implements:` / `Refs:` / `Fixes:` / `Verifies:` counted. That gave file-wide credit to unrelated refactor commits. The `wasm_runtime.rs` file passed v1 trace because a Phase-6-rowan refactor touched it for trailerless reasons; the four genuinely unwired methods (`call_id`, `call_name`, `call_supported_types`, `call_analyze`) hid behind that. Fix: switch to `git log -L :SYMBOL:file` so only commits that modified the *specific symbol* count. + +**`#[cfg(never)]` fabricates lint errors.** Module-level excision needs an always-false cfg. `#[cfg(never)]` works on stable Rust but trips `unexpected_cfgs` under `-D warnings` (post-Rust 1.80), generating a clippy error that looks like the code is exercised when it is not. Use `#[cfg(not(all()))]` — recognised, always false, no lint noise. + +**Inline-annotation blindness.** Tests in rivet carry `// rivet: verifies REQ-N` comments that tie tests to requirements. The artifact corpus does not expose these via `rivet list`; the v1 trace query missed them entirely. v2 greps the source for the inline form too. The `providers.rs` audit illustrates the difference: ten tests inline-tagged `verifies REQ-027`, REQ-027 status `approved`, no artifact `source-ref` mentioned the file. Strict v1 oracle classified it orphan; v2 classified it aspirational and we wired it up instead of deleting. + +## Three case studies + +### DD-064 — orphan-slop, four `WasmAdapter` methods + +Excision target: `call_id`, `call_name`, `call_supported_types`, `call_analyze` in `rivet-core/src/wasm_runtime.rs`. Each `#[allow(dead_code)]` or `pub fn` with no caller in the workspace. + +Excision diff: each method body replaced with `unimplemented!("slop-hunt excision: wasm_runtime.rs::METHOD_NAME")`, `_root` / `_aadl_dir` prefixed for unused parameters. Build, test, clippy, validate, commits all baseline-match. + +Trace per symbol: introducing commits `50c5107` and `3b04f01` are both trailer-less; `git log -L` returns those single SHAs and they fail the trailer regex. No artifact references any of the four symbol names. Inline-annotation grep: empty. **Three queries empty → orphan-slop → delete.** + +Result: 155 lines removed in [`75f3916`](https://github.com/pulseengine/rivet/pull/205/commits/75f3916). The `Adapter` trait impl at L590-619 had a `// TODO: call self.call_id() and cache` comment with no surrounding work — that comment was the smoking gun. Cleaned up alongside the deletion. + +### DD-065 — orphan-slop, four narrow symbols across four files + +| File | Symbol | Reason orphan | +|---|---|---| +| `rivet-core/src/sexpr.rs` | `line_starts`, `offset_to_line_col`, `SyntaxToken` alias | Duplicates of `yaml_cst::*` with no caller. LSP and db diagnostics use the `yaml_cst` versions. | +| `rivet-core/src/commits.rs` | `CommitClass::Exempt` variant + its match arm | `classify_commit_refs` has three return sites (`Linked`, `BrokenRef`, `Orphan`); none yields `Exempt`. The match arm was the author's `// for completeness` confession. | +| `rivet-core/src/reqif.rs` | `build_reqif` shorthand | Backward-compat wrapper for `build_reqif_with_schema(_, None)`. Zero callers. Every test goes through `ReqIfAdapter::export` to the schema-aware path. | +| `rivet-core/src/formats/needs_json.rs` | `import_needs_json_directory` | Adapter dispatch arm `AdapterSource::Directory` unreached at runtime; nothing in the corpus declares `format: needs-json` as a directory source. | + +All four passed excision oracle with baseline match across build/test/clippy/validate/commits. All four trace-empty across the three queries. 75 lines removed in [`8c17daa`](https://github.com/pulseengine/rivet/pull/205/commits/8c17daa). The discovery agent on `formats/generic.rs` proposed a fifth fictional symbol `build_reqif_with_schema_unused`; grep refuted it before action — a reminder that bonus-finding hallucinations happen in agent NOTES even when the main finding is sound. Always grep-verify before deletion. + +### DD-066 — orphan-slop, the entire `NeedsJsonAdapter` chain + +Whole-block excision target: `pub struct NeedsJsonAdapter`, its `Default`/`Adapter` impls, the `adapter_config_to_needs_config` helper, the helper-only round-trip test, *plus* the `"needs-json" =>` arm in `rivet-core/src/lib.rs::load_artifacts` (the dispatch arm is also dead because no source declares `format: needs-json`). 129 LOC across two files. + +Excision used `#[cfg(not(all()))]` on the relevant items. Live path preserved: the CLI command `cmd_import_results_needs_json` and the fuzz target both call `import_needs_json` directly, bypassing the adapter. Trace queries: empty for both `NeedsJsonAdapter` and `adapter_config_to_needs_config`. Inline-annotation grep: one hit on the helper-only test (which gets deleted with the helper). 12 retained tests on the live path still verify `REQ-025`. Removed in [`48ff990`](https://github.com/pulseengine/rivet/pull/205/commits/48ff990). + +## What this also closed + +Three approved-but-unrealized requirements where the audit found existing implementations had no live caller: + +- **`REQ-027`** ("Build-system-aware cross-repo discovery") — `providers.rs` had the implementation, no CLI command exposed it. Wired up via `rivet externals discover` with three integration tests; commit [`aa257cd`](https://github.com/pulseengine/rivet/pull/205/commits/aa257cd). +- **`REQ-006`** ("OSLC-based tool synchronization") + **`FEAT-011`** ("OSLC client for bidirectional sync") — `OslcSyncAdapter::push` was a fire-and-forget POST loop; the doc comment admitted it was incomplete. Implemented diff-then-POST-or-PUT semantics with five wiremock integration tests; commit [`cc735f2`](https://github.com/pulseengine/rivet/pull/205/commits/cc735f2). Both promoted from `draft` to `approved`. + +These are aspirational-slop cases where the right disposition was add-wire-up, not delete. The v2 oracle made the call correctly because the inline-annotation query surfaced `verifies REQ-027` on the providers tests and lifted the classification from orphan to aspirational. + +## What's reusable + +The oracle is interchangeable. Anything mechanical your domain produces — failing PoC test, fuzzer crash, type error, proof obligation, `rivet validate` diagnostic, sanitizer fault — can be the oracle. The Mythos paper's failing-PoC oracle for vulnerability research, the SDD post's `rivet validate` oracle for traceability gap closure, and this post's excision-plus-trace oracle for slop hunting are three instantiations of the same scaffold against three oracle types. The four prompts and the parallel-agent-plus-fresh-validator discipline transfer; the oracle is the parameter. + +The discipline transfers to any codebase that has typed validators and audit-grade artifacts — not just rivet. A C++/CMake project with `clang-tidy` plus a configuration-validator has both halves; the same pipeline shape works there. + +## Numbers from PR #205 + +`scripts/mythos/` shipped, then ran against rivet's own codebase: + +- ~370 lines of orphan code deleted across DD-064/065/066 +- 3 approved-but-unrealized requirements wired and tested +- 8 new integration tests (3 externals discover, 5 OSLC push) +- Traceability coverage 87/236 (36.9%) → 94/238 (39.5%) +- 12 typed audit artifacts produced; every one carries the verbatim oracle output in its `rationale` field + +The session that produced these took roughly six hours of wall time, almost entirely waiting on cargo builds in parallel worktrees. The bookkeeping the LLM agents did would have taken weeks of human review at the same depth — and a human reviewer would have produced prose, not typed audit artifacts. + +`scripts/mythos/` is the file. `rivet validate` is the oracle. `git log -L :symbol:file` is the trace. Everything else is discipline. diff --git a/content/blog/2026-04-25-three-patterns-colliding.md b/content/blog/2026-04-25-three-patterns-colliding.md new file mode 100644 index 0000000..88e1df8 --- /dev/null +++ b/content/blog/2026-04-25-three-patterns-colliding.md @@ -0,0 +1,140 @@ ++++ +title = "Three patterns colliding: Karpathy's LLM Wiki, oracle-gated agents, and typed compliance" +description = "Andrej Karpathy posted his LLM Wiki gist in April 2026. From most LLM-tooling vantage points it's a clean idea about knowledge bases. From a cybersecurity and safety-critical vantage point it's the third pillar of a pattern that has been forming for years. The other two — oracle-gated agent verification, and typed compliance traceability — are running here already. This is the reflection on what the three together actually mean." +date = 2026-04-25 +draft = false +[taxonomies] +tags = ["knowledge-base", "process", "deep-dive"] +authors = ["Ralf Anton Beier"] ++++ + +{% note(kind="tip") %} +**Reading order for this stack** — start here for the synthesis. Then [*Mythos slop-hunt: oracle-gated audits in practice*](/blog/mythos-slop-hunt/) for the audit method that produced PR #205. The [v0.1.0 announcement](/blog/rivet-v0.1.0/) covers what rivet is and how to install it. +{% end %} + +{% insight() %} +Karpathy named the missing piece. From most LLM-tooling vantage points his April 2026 LLM Wiki gist is a clean idea about personal knowledge bases. From the desk of someone who has spent fifteen years in cybersecurity and safety-critical engineering, it is the third pillar of a pattern that has been forming since early 2025 — alongside [oracle-gated agent verification](/blog/spec-driven-development-is-half-the-loop/) and [typed compliance traceability in rivet](/blog/rivet-v0.1.0/). Each pillar by itself is incomplete. Knowledge accumulation alone hallucinates over time. Oracle-gated verification alone has nothing to remember. Typed compliance alone has no narrative and no acceleration. Together — agents read sources, run oracles, write typed artifacts, humans curate at the edges, the auditor queries the result — they are a candidate state of the art for AI-assisted engineering on systems that have to be both fast and provable. From where I sit, in my direct experience of both communities, that union is unheard of. This post is why. +{% end %} + +## The three patterns + +### Karpathy's LLM Wiki — *knowledge compounds* + +Andrej Karpathy's [gist](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) describes a persistent, LLM-maintained, interlinked knowledge corpus as a replacement for RAG. The LLM does the bookkeeping. You curate sources and ask questions; the LLM writes summary pages, updates indexes, flags contradictions, maintains cross-references. The output compounds across sessions instead of being re-derived on every query. + +Three quotes that change how you read it: + +> *"The LLM writes and maintains all of the data of the wiki. I rarely touch it directly."* + +> *"Obsidian is the IDE; the LLM is the programmer; the wiki is the codebase."* + +> *"This document is intentionally abstract. It describes the idea, not a specific implementation. The exact directory structure, the schema conventions, the page formats, the tooling — all of that will depend on your domain, your preferences, and your LLM of choice."* + +Markdown shows up in his stack because Obsidian renders markdown. He's explicit that the format is open. The pattern is what matters. + +### Oracle-gated agents — *verification mechanically gates* + +From [the spec-driven-development-is-half-the-loop post](/blog/spec-driven-development-is-half-the-loop/): a minimal prompt, a strong mechanical oracle, parallel agents, a fresh-session validator. The oracle either fires or it does not. Findings that pass the oracle land as artifacts; findings that don't are rejected. The same shape Anthropic's red team uses for vulnerability research; we run two pipelines on it — bug hunting in sigil's `scripts/mythos/`, traceability-gap closure across rivet-managed corpora — and the oracle is interchangeable. Different domains; same scaffold. + +This is the verification half of the loop spec-driven development omits. SDD's QA-lens agent is a soft oracle: another LLM reading the spec back. A soft oracle cannot find what the spec did not think to say, and bug classes are almost by definition what the spec did not think to say. + +### rivet — *audit reads the result* + +[rivet](/blog/rivet-v0.1.0/) keeps SDLC traceability — requirements, design decisions, hazards, tests — as YAML in git, validated on every commit, designed for an LLM agent to read and write. Two co-equal content layers: typed atomic artifacts with typed links between them, and Markdown documents that cite the atoms by ID. One Rust binary the agent drives three ways — CLI, [MCP](https://modelcontextprotocol.io) server, or LSP backend. + +The data-model lineage runs through [sphinx-needs](https://sphinx-needs.readthedocs.io/) (which we used across PulseEngine projects from early 2025 through early 2026, before rivet replaced it everywhere) and DOORS-style typed traceability with decades of safety-critical practice on it. Stable typed IDs, typed link predicates, schema-validated fields. The auditor queries the graph and the graph answers. + +## Why each is incomplete alone + +| Pattern | What it does well | What it cannot do alone | +|---|---|---| +| LLM Wiki | Compounds knowledge across sessions; LLM does the bookkeeping | No mechanical truth signal — *@SEO-Warlord* on Karpathy's gist: *"prose that may have been silently revised three ingests ago"* | +| Oracle-gated agents | Catches what spec-as-oracle misses; rejects hallucinations | Has nothing to accumulate into; verification with no memory | +| Typed compliance | Audit-grade; queryable; provenance-stamped | No narrative; no agent-scale labor; no compounding outside the typed graph | + +Run any one in isolation and you reproduce one of three failure modes that experienced engineers know by heart: the wiki that drifted into fiction, the test suite that goes green on stale assumptions, the traceability matrix nobody actually consults. Run all three together — agent reads sources, runs the oracle, writes the typed artifact, the auditor and the next agent both query it — and the failure modes cancel. + +## What this looks like, drawn + +{% mermaid() %} +flowchart TB + src["external sources
papers · clippings · mirrored wikis · transcripts"] + agent["LLM agent
reads · runs the oracle · writes typed artifacts"] + oracle["mechanical oracle
fires pass/fail · never maybe
(rivet validate · failing PoC · Kani · sanitizer)"] + + subgraph dests["where the work lands"] + direction LR + subgraph rivetBox["rivet — one binary"] + direction TB + rivetContent["typed atoms · documents · link graph
auditor and human both read this"] + rivetCli["CLI"] + rivetMcp["MCP"] + rivetLsp["LSP"] + end + blog["pulseengine.eu
cross-project memory
long-form posts"] + end + + src --> agent + agent <-. "validates" .-> oracle + agent --> rivetCli + agent --> rivetMcp + agent --> rivetLsp + agent --> blog + rivetBox -. "context" .-> agent + blog -. "context" .-> agent + + classDef src fill:#13161f,stroke:#4a5068,color:#8b90a0; + classDef agent fill:#1a1d27,stroke:#fbbf24,color:#e1e4ed; + classDef oracle fill:#1a1d27,stroke:#f87171,color:#e1e4ed; + classDef tspine fill:#1a1d27,stroke:#4ade80,color:#e1e4ed; + classDef tcross fill:#1a1d27,stroke:#c084fc,color:#e1e4ed; + classDef grp fill:#13161f,stroke:#3d4258,color:#8b90a0; + classDef inner fill:#0f1117,stroke:#4ade80,color:#8b90a0; + + class src src; + class agent agent; + class oracle oracle; + class rivetContent tspine; + class rivetCli,rivetMcp,rivetLsp inner; + class blog tcross; + class dests,rivetBox grp; +{% end %} + +The agent is the verb. The oracle is the gate. The typed corpus is the auditable result. The blog is the cross-project compounding layer that survives session resets and seeds context for the next agent. Karpathy's pattern names the third surface; the SDD post named the second; rivet was already the first. + +## Why this is unheard from where I sit + +Cybersecurity and safety-critical engineering have their own knowledge architectures. Threat models, hazard analyses, traceability matrices, safety cases. Most of the people doing this work are still doing the bookkeeping by hand and doubting AI agents as too unreliable to trust with the audit artifact. The view from there is: agents accelerate code production, and that's a problem because traceability cannot keep up. + +The LLM-tooling community lives elsewhere. Building knowledge bases, building agent scaffolds, building MCP integrations — but typically without knowing what an ASPICE assessor actually asks for, what an ISO 26262 auditor reads, what a safety case argues. The view from there is: knowledge bases are interesting; rigor is a separate problem. + +The pattern that bridges both communities — agents do the labor, mechanical oracles do the verification, typed schemas do the bookkeeping, humans curate the inputs and approve the outputs — sits at an intersection that almost nobody occupies. The first community thinks AI is unreliable; the second community has not met an auditor. It is genuinely rare to see a working stack that takes both seriously, and it took the three patterns landing in public over the same six-week window — Karpathy's gist (April 2026), Anthropic's Mythos preview (April 2026), rivet's v0.1.0 (March 2026) — for me to articulate why the union is the thing that matters, not any one of the patterns. + +This is not a foresight claim. The data model in rivet is sphinx-needs and DOORS lineage; the agent scaffold in our `scripts/mythos/` is openly modeled on Anthropic's preview. What the lineage gives me is a vantage point: when the LLM-tooling community independently arrives at structural patterns the safety-critical community settled decades ago, I can recognise the convergence quickly and ship a Rust-native MCP-exposed implementation faster than someone who has to rediscover the pattern. That advantage is real and durable but it is community arbitrage, not prediction. The thing genuinely worth saying is that the *union* is unheard of, not that I anticipated any single pillar of it. + +## What rivet should borrow from Karpathy + +Two small things — both extending the typed model rather than loosening it. Both are open as issues against the rivet repo: + +- **[#206](https://github.com/pulseengine/rivet/issues/206) — `rivet bundle --depth N`.** Emit one self-contained YAML or JSONL document with the root + transitively-linked neighbours, link types as inline annotations. Format-equivalent to Karpathy's whole-wiki-paste; YAML preserves typed structure where markdown would flatten it. +- **[#207](https://github.com/pulseengine/rivet/issues/207) — Inline ID-reference detection.** When a description names `REQ-028`, `rivet validate` warns if no typed link exists. The `[[wikilinks]]` discipline for the typed-graph world. + +Notably absent: an ingest verb. The agent runs the ingest. rivet's job is the validators. + +## What rivet should not borrow + +The format. Several gist commenters argue forcefully that LLM-generated markdown is *"linguistic fraud"*[^gnusupport-comment] — un-audited prose written by the same LLM that reads it is a closed loop with no external truth. rivet's escape from that loop is the typed schema and the mechanical validators. A free-form markdown surface inside rivet would bypass both, and that is exactly the failure mode the safety-critical community has spent decades learning to refuse. + +The librarian metaphor — *"the librarian writes index cards and essays, not revised encyclopedia entries"*[^seo-warlord-comment] — is the rivet stance. Atoms are typed artifacts; synthesis is a future `synthesis` artifact type that links to its atoms via `derived-from`. + +## What this changes + +[PR #205](https://github.com/pulseengine/rivet/pull/205) is the union doing the work. The Mythos slop-hunt audit pipeline at `scripts/mythos/` ran the oracle-gated pattern over rivet's own codebase: it confirmed three orphan-slop chains (DD-064, DD-065, DD-066), wired three approved-but-unrealized requirements (REQ-027, REQ-006, FEAT-011) into live tested implementations, and moved traceability coverage from 87/236 (36.9%) to 94/238 (39.5%) on a single branch. Twelve typed artifacts, mechanical oracles confirming each, ~370 lines of orphan code deleted, no LLM hallucinations because the oracle either fires or it doesn't. + +That run does not happen — not in the same way, not with the same audit-readable evidence — if you take any one of the three pillars away. Without typed compliance, there is no place for the audit findings to land. Without oracle-gated agents, there is no mechanical signal separating real findings from confident-sounding hallucinations. Without LLM-maintained knowledge, there is no agent doing the labor that lets you do an audit at this depth in a session, instead of in a quarter. + +That is the synthesis worth defending. Not as a roadmap claim, not as foresight, but as a working observation from a cybersecurity and safety-critical desk: anyone running fewer than all three of these pillars is leaving rigor or labor on the table. Karpathy named the third one. PR #205 gave it teeth. + +[^seo-warlord-comment]: Comment by `@SEO-Warlord` on Karpathy's gist, 2026-04-23. The thread itself is worth reading end-to-end — the architectural debate between "LLM Wiki" and "Zettelkasten" framings is more useful than the gist alone. + +[^gnusupport-comment]: Comment by `@gnusupport` on the same gist. The post is intemperate ("ARCHITECTURAL CRIME SCENE") but the underlying critique — un-audited LLM prose summarising LLM prose with no source provenance is dangerous — is the exact failure mode rivet's typed schema and `Provenance.reviewed_by` are designed to prevent. diff --git a/sass/_tree.scss b/sass/_tree.scss new file mode 100644 index 0000000..a365c5a --- /dev/null +++ b/sass/_tree.scss @@ -0,0 +1,79 @@ +// Directory-tree shortcode (templates/shortcodes/tree.html). +// Two-column grid: file names left in monospace + accent, descriptions +// right in body colour. No ASCII tree characters — clean card style +// that matches the rest of the dark theme. + +.tree { + margin: 1.6rem 0; + padding: 1rem 1.25rem; + background: var(--surface, #1a1d27); + border: 1px solid var(--border-faint, #2e3345); + border-radius: 6px; + font-size: 0.92rem; + line-height: 1.55; + color: var(--text, #e1e4ed); +} + +.tree__root { + display: flex; + align-items: baseline; + gap: 0.45rem; + margin: 0 0 0.7rem; + padding-bottom: 0.55rem; + border-bottom: 1px solid var(--border-faint, #2e3345); +} + +.tree__root-icon { + color: var(--accent, #6c8cff); + font-size: 0.85em; + user-select: none; +} + +.tree__root-name { + color: var(--text, #e1e4ed); + font-family: 'Atkinson Hyperlegible Mono', ui-monospace, SFMono-Regular, + Menlo, monospace; + font-weight: 600; +} + +.tree__items { + display: grid; + grid-template-columns: max-content 1fr; + column-gap: 1.6rem; + row-gap: 0.4rem; + margin: 0; +} + +.tree__name { + font-family: 'Atkinson Hyperlegible Mono', ui-monospace, SFMono-Regular, + Menlo, monospace; + color: var(--accent, #6c8cff); + font-weight: 500; + margin: 0; + white-space: nowrap; +} + +.tree__desc { + color: var(--text-dim, #8b90a0); + margin: 0; +} + +// Empty descriptions still occupy a grid row so the file name aligns; +// hide them visually so they don't print a stray dot or whitespace. +.tree__desc:empty { + visibility: hidden; +} + +// Narrow screens: stack name above description and indent the description. +@media (max-width: 640px) { + .tree__items { + grid-template-columns: 1fr; + row-gap: 0.6rem; + } + .tree__name { + margin-top: 0.15rem; + } + .tree__desc { + padding-left: 1rem; + } +} diff --git a/sass/main.scss b/sass/main.scss index 0ce6fb5..191d8e9 100644 --- a/sass/main.scss +++ b/sass/main.scss @@ -5,4 +5,5 @@ @import "glass"; @import "blog"; @import "pipeline"; +@import "tree"; @import "responsive"; diff --git a/templates/shortcodes/tree.html b/templates/shortcodes/tree.html new file mode 100644 index 0000000..ad289ff --- /dev/null +++ b/templates/shortcodes/tree.html @@ -0,0 +1,38 @@ +{# tree: render a directory listing as a clean two-column grid. + Files left, descriptions right. Monospace file names, regular text + descriptions. No tree characters, no terminal-output mimicry — + just a card with structured rows. + + Usage: + {% tree(root="scripts/mythos/") %} + HOWTO.md | pipeline overview + oracle design + rank.md | score every source file 1-5 by slop likelihood + {% end %} + + - `root` is optional; rendered as a small heading with a folder glyph. + - Each line of the body is one entry: `name | description`. The + description is optional. Empty body lines are skipped. +#} +{%- set lines = body | trim | split(pat="\n") -%} +
+{%- if root %} +
+ + {{ root }} +
+{%- endif %} +
+{%- for line in lines -%} +{%- set entry = line | trim -%} +{%- if entry %} +{%- set parts = entry | split(pat=" | ") -%} +
{{ parts | nth(n=0) }}
+
+{%- if parts | length > 1 -%} +{{ parts | slice(start=1) | join(sep=" | ") }} +{%- endif -%} +
+{%- endif -%} +{%- endfor %} +
+