diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15d46c6..ce36533 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,5 +20,16 @@ jobs: - run: pnpm install --frozen-lockfile - run: pnpm typecheck - - run: pnpm test + + # Coverage gate — fails if line/branch/function coverage regresses below the thresholds in + # vitest.config.ts (lines ≥90, branches ≥85). The committed seeded fuzz (fuzz.test.ts) runs here too. + - run: pnpm test:coverage + - run: pnpm build + + # Schema gate — the published JSON Schema must stay in sync with the Zod source (catches the + # class of bug where the runtime and the contract drift apart). + - name: JSON Schema is in sync with the Zod source + run: | + pnpm gen:schema + git diff --exit-code packages/core/schema diff --git a/.github/workflows/mutation.yml b/.github/workflows/mutation.yml new file mode 100644 index 0000000..bc3d017 --- /dev/null +++ b/.github/workflows/mutation.yml @@ -0,0 +1,30 @@ +name: Mutation + +# Mutation testing is slow, so it runs weekly (and on demand) rather than on every PR. +# It proves the TESTS are strong — a high pass rate with a low mutation score means the suite is +# decorative. Scoped to the most security-critical module (the contract validator); expand `mutate` +# in stryker.config.json to cover more modules as the budget allows. +on: + schedule: + - cron: "0 4 * * 1" # Mondays 04:00 UTC + workflow_dispatch: + +jobs: + mutation: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v7 + - uses: pnpm/action-setup@v6 + - uses: actions/setup-node@v6 + with: + node-version: 22 + cache: pnpm + - run: pnpm install --frozen-lockfile + # Fails the job if the mutation score drops below `thresholds.break` (80) in stryker.config.json. + - run: pnpm exec stryker run + - uses: actions/upload-artifact@v4 + if: always() + with: + name: mutation-report + path: qa/mutation-report.json diff --git a/.gitignore b/.gitignore index 1bfd5e3..f483e59 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,9 @@ coverage/ # VitePress docs/.vitepress/dist/ docs/.vitepress/cache/ + +# QA tooling artifacts (regenerated) +coverage/ +.stryker-tmp/ +qa/.stryker-tmp/ +qa/mutation-report.json diff --git a/QA_LOG.md b/QA_LOG.md index 5f4c1dc..b6f0e22 100644 --- a/QA_LOG.md +++ b/QA_LOG.md @@ -327,3 +327,1444 @@ weren't load/chaos tested (not applicable to a local CLI — see gaps). `findhang.mts`, `verify4.mts`, `verify5.mts`, `fuzz2.mts`, `mockcrash.mts`, `webattack.mts`, `confirm.mts`, `confirm-final.mts`. - This log: `QA_LOG.md`. + +--- +--- + +# CAMPAIGN 2 — fresh adversarial pass (branch `qa/adversarial-cycle-2`) + +The Campaign-1 stop condition was met and merged to `main`. Restarting the loop from scratch: +"it works" is a hypothesis to falsify. Baseline re-verified: `pnpm test` **218 passed** (25 files). +Strategy: aim at the surfaces Campaign 1 probed *least* — the **runner** (URL building, auth, +assertion regexes), **workspace** (folder/env/secret resolution), **drift/coverage** math, and the +**Postman/Bruno importers** — rather than re-grinding the response validator it already hardened. + +## Cycle 1 — runner / resolve / importer attack pass + +### Plan +- `resolveRequest`: URL building edges (fragments, pre-existing query, apikey-in-query), CRLF in + header values, body encodings. +- Postman importer: 5k random hostile-JSON iterations — must never throw past its documented guard, + and every emitted file must round-trip through `parse.*`. +- `generateExample` (mock) on self-referential `$ref` — must terminate. + +### Findings + +- [BUG-A] runner/URL building — query params silently swallowed by a URL `#fragment` | severity **medium** + - Repro (empirical, `_qa_scratch.test.ts`): `resolveRequest({ url: "{{baseUrl}}/search#section", + query: { q: "hello" } })` → `http://example.com/search#section?q=hello`. `new URL(...)` then parses + `search=""` and `hash="#section?q=hello"` → **`searchParams.get("q") === null`**: the declared query + param never reaches the server. Same class with a pre-existing query: `http://h/p?a=1#f` + `{b:2}` → + `...?a=1#f&b=2` (b lost). And an `apikey in: query` auth param is silently dropped the same way → + a request goes out **unauthenticated with no error or warning**. + - Evidence: `[A1] does the server receive q? -> null`; `[A2] b= null`. + - Root cause: `resolveRequest` did `url += (url.includes("?") ? "&" : "?") + qs`, appending the query + string to the very end of the URL without accounting for an existing `#fragment`. Fragments are + legal in a TruSpec `url` (the schema accepts any string), so query params get pushed into the hash. + - Fix: `packages/core/src/runner/resolve.ts` — split the URL on the first `#`, insert the query + string before the fragment, then re-append the fragment: `head + (head.includes("?")?"&":"?") + qs + frag`. + Verified: `http://x/search#section` + `q=hello` → `http://x/search?q=hello#section` (param now in `search`). + - Regression test: `packages/core/test/runner.test.ts` → 3 cases ("inserts query params before a URL + fragment", "appends to an existing query string before the fragment", "places an apikey-in-query + before a URL fragment"). + - Suite after fix: **PASS — 221 tests (was 218, +3), 0 regressions.** + +### Attacks that held (Cycle 1) +- Postman importer (`importPostman`): 5000 random hostile-JSON collections (nested items, `__proto__` + keys, unicode, control chars, oversized names) — **0 unparseable emitted files**; throws only its + documented "Not a Postman v2.1 collection" guard on non-array `item`. Round-trip integrity holds. +- `generateExample` on a self-referential `$ref` schema (`Node.next -> Node`, `Node.kids[] -> Node`) + terminates in **1ms** — the `depth > 6` cap bounds it (no infinite recursion; mock generation is safe). + +### Cycle outcome +- Broke? **yes** (BUG-A medium) → fixed at root + 3 regression tests → full suite green (221). + Restart at Cycle 2 with surfaces not yet probed this campaign. + +--- + +## Cycle 2 — importer fuzz, mock regex, header injection, folder merge + +### Plan +- Bruno importer: 6k random hostile-`.bru` iterations — never throw, every emitted request round-trips. +- Mock `pathToRegex`: adjacent-param paths (ReDoS?); CRLF in header values via the real fetch path; + workspace folder-merge precedence + prototype pollution. + +### Findings + +- [BUG-B] mock/`pathToRegex` — catastrophic ReDoS on adjacent path params | severity **medium-high** + - Repro (`_qa_scratch.test.ts` B2): a legal OpenAPI path with adjacent params (no literal separator) + compiled to `([^/]+)([^/]+)…`. Matching a long non-matching request path: + `/x/{a}{b}{c}{d}` → **17ms**, `/x/{a}{b}{c}{d}{e}{f}` → **1886ms** — a clean O(n^k) curve; 7–8 + adjacent params + a longer path hangs for minutes. + - Evidence: `[B2] path=/x/{a}{b}{c}{d}{e}{f} … test_ms=1886`. + - Root cause: `pathToRegex` emitted a separate `([^/]+)` per `{param}`; two+ adjacent unbounded greedy + groups over the same class with no separator between them backtrack catastrophically when the `/?$` + anchor fails. The mock matches **incoming request paths** (attacker-controlled) on the single Node + event loop, so one crafted request stalls the whole server — a DoS. Adjacent params are legal + OpenAPI (`/v{major}.{minor}.{patch}`, composite ids). + - Fix: `packages/core/src/mock/engine.ts` — emit non-capturing `[^/]+` (the regex is only ever used + for `.test()`, never `.exec()`) and **collapse a run of adjacent params into a single** `[^/]+` + (adjacent params can't be uniquely split anyway). No two unbounded quantifiers ever sit adjacent → + matching is linear. Post-fix the same paths compile to `^/x/[^/]+/?$` and match in **0–1ms**. + - Regression test: `packages/core/test/mock.test.ts` → "mock adjacent-param path matching (ReDoS + regression)": 8 adjacent params vs a 5000-char hostile path completes <100ms and returns undefined; + a legit `/file/abcdefgh` still matches. + - Suite after fix: **PASS — 223 tests (was 221, +2), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 2) +- Bruno importer (`bruToRequest`): 6000 random hostile-`.bru` iterations — **0 throws, 0 round-trip + failures**. `__proto__`/`constructor` header keys land as own properties (Object.fromEntries define + semantics) — `Object.prototype` not polluted. +- CRLF / control chars in a header value reach undici's `Headers.append`, which throws; `runRequest`'s + try/catch turns it into a clean `ok:false` result — **no crash, no unhandledRejection**. +- `mergeFolderConfigs`: precedence correct (leaf wins, root keys preserved); `__proto__` in folder + headers stored as own property via spread — no prototype pollution. + +### Cycle outcome +- Broke? **yes** (BUG-B medium-high) → fixed at root + 2 regression tests → full suite green (223). + Restart at Cycle 3. + +--- + +## Cycle 3 — response contract validator (untrusted-response correctness) + +### Plan +- Hunt false-negatives in `validateAgainstSchema` (the flagship `contract` / `run --spec` engine that + validates **untrusted API responses**): integer-vs-float, enum, required, and — the lead — + composition keywords (`allOf`/`anyOf`/`oneOf`) as siblings of `type`/`properties`/`required`. + +### Findings + +- [BUG-C] spec/validate-response — composition keywords treated as mutually exclusive → silent + false-negatives | severity **medium** + - Repro (`_qa_scratch.test.ts` C1–C3, all returned `[]` = "conforms"): + - C1 `allOf` sibling of `required:[id]`/`properties` + value `{name:"x"}` (missing `id`). + - C2 `anyOf` sibling of `properties:{a:string}` + value `{a:123}` (a is a number). + - C3 `allOf:[{$ref: Base}]` + own `required:[id]` (**the most common OpenAPI composition shape**) + + value missing `id`. + All three **passed a non-conforming response** — the contract check the tool exists to provide was + silently skipped. + - Root cause: in `validateInto`, the `allOf`/`anyOf`/`oneOf` blocks each ended with `return`, so any + sibling `type`/`properties`/`required`/`items` constraints were never evaluated. JSON Schema + keywords are **conjunctive** — every keyword present in a schema object is an independent constraint + the value must satisfy simultaneously. + - Fix: `packages/core/src/spec/validate-response.ts` — drop the early `return` from `allOf`/`anyOf`/ + `oneOf` so their violations accumulate and control falls through to the sibling type-dispatch. + (`$ref`/`null`/`enum`/array-`type` keep their returns: array-`type` already folds siblings in via + its `{...schema, type:t}` spread, and C1–C3 are fixed through the `allOf`/`anyOf` change. Verified + no false positives: a fully-conforming value still returns `[]`, and pure-`allOf` schemas with no + siblings are unchanged — C5 control.) + - Regression tests: `packages/core/test/validate-response.test.ts` → "composition keywords are + conjunctive with sibling constraints" (4 cases: sibling required+allOf, required+allOf:[{$ref}] + incl. a conforming + a $ref-violating case, properties+anyOf, type+oneOf). + - Suite after fix: **PASS — 227 tests (was 223, +4), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 3) +- `integer` vs a float, `enum` membership, `required`, `additionalProperties:false` all flag correctly; + the existing 29 validator tests still pass unchanged (the conjunction fix added strictness without + introducing false positives). + +### Cycle outcome +- Broke? **yes** (BUG-C medium) → fixed at root + 4 regression tests → full suite green (227). + Restart at Cycle 4. + +--- + +## Cycle 4 — broad property fuzz (5 seeds) — CLEAN + +### Plan +- ~62k randomized iterations over the engines touched this campaign, guarding all three fix classes: + D1 resolve (query-survives-fragment, BUG-A), D2 validator (conjunctive + recursive `$ref`/cycle + termination, BUG-C), D3 mock `pathToRegex` (adjacent-param ReDoS, BUG-B), D4 jsonpath/interpolate. + +### Findings +- **No new failures.** D1 crashes=0 **lostParams=0**; D2 crashes=0 slowest=**4ms**; D3 slowest=**2ms** + on a 3000-char hostile path vs messy adjacent-param templates; D4 crashes=0. + +### Cycle outcome +- Broke? **no** → proceed to a confirmation cycle with fresh seeds + an end-to-end integration attack. + +--- + +## Confirmation cycle — fresh seeds + CLI integration + +### Plan +- E0 `run` exit code on an empty target; E5 end-to-end capture-chain + secret redaction through the + real `runCommand` against temp collection files. + +### Findings + +- [BUG-D] CLI/`run` — exits 0 when zero requests are found → silent green CI gate | severity **low-medium** + - Repro (`_qa_scratch.test.ts` E0): `runCommand([emptyDir])` → **exit 0** with only a warning. + - Evidence: `[E0] exit code for empty dir = 0`. + - Root cause: `return result.ok ? 0 : 1`, and `result.ok = results.every(r => r.ok)` — `[].every()` + is `true`. A misconfigured path / uncommitted files / bad glob (zero requests) therefore passes the + gate. `run` is documented as a CI gate ("non-zero exit on failure"); a green build when nothing ran + is the worst false-positive for a gate. (jest/pytest/go test all fail on "no tests found".) + - Fix: `packages/cli/src/commands/run.ts` — treat zero requests as a failure: emit an **Error** (was + Warning) and `return result.ok && !noRequests ? 0 : 1`. **Behavior change** (flagged for the + maintainer): an empty run now exits 1. Scoped to `run` only — `contract` is documented as + conformance-only (it delegates "untested" gating to `coverage`/`drift`), and `coverage`/`drift` + already fail loudly on an empty collection (0% / all-added), so they need no change. + - Regression test: `packages/cli/test/run.test.ts` → updated the prior "warns and exits 0 when no + requests are found" (which asserted the buggy behavior) to "exits 1 (fails the CI gate) when no + requests are found". This corrects a test that encoded the defect — a *strengthening*, not a + weakened assertion. + - Suite after fix: **PASS — 227 tests, typecheck 7/7, 0 regressions.** + +### Attacks that held (confirmation) +- E5 end-to-end: capture-chaining propagated `Authorization: Bearer chaintoken999` to the second + request, and secret redaction masked the declared `apiKey` in the `--json` output (no raw secret + leaked, `***` present). The full pipeline (env/secret resolution → folder inheritance → chaining → + redaction → reporters) is correct. +- **Real-world smoke (built binary + mock):** the README's documented blog flow produced exactly the + advertised results with all fixes applied — `run` **3 passed/0 failed**, `drift` flags + `GET /users/{id}`, `coverage` **75% (3/4)**. No false positives from the stricter validator on real + specs (the shipped example specs use no composition keywords). + +### Cycle outcome +- Broke? **yes** (BUG-D low-medium, found during confirmation) → fixed + test corrected → suite green + (227). A break during confirmation resets the loop → restart at Cycle 5. + +--- + +## Cycle 5 — fresh-seed fuzz + untrusted `.tspec.yaml` parse robustness — CLEAN + +### Plan +- 5 fresh seeds (distinct from Cycle 4). F1 `parse.request` on ~30k random YAML/structured inputs + (incl. `__proto__`, infinite-loop scripts, malformed YAML, control chars) — process must survive; + F2 resolve query-survives-fragment; F3 validator/mock termination & ReDoS guards. + +### Findings +- **No new failures.** F1 process survived all ~30k inputs (no crash/hang; parseable inputs round-trip); + F2 crashes=0 lostParams=0; F3 validator crashes=0 slowest=**3ms**, mock regex slowest=**2ms**. + +### Cycle outcome +- Broke? **no** → proceed to a final confirmation cycle with a 6th seed set + harder shapes. + +--- + +## FINAL confirmation cycle — 6th seed set, harder shapes + +### Plan +- G1 validator over a web of mutually-recursive schemas (every composition keyword + siblings + + `additionalProperties:false`) × deep/cyclic values; G2 combined resolve + mock + **importers** + (Postman + Bruno) round-trip under fresh seeds. + +### Findings + +- [BUG-E] importers/postman — `importPostman` crashes on a Postman item with an empty `name` | severity **medium** + - How found: G2's combined importer round-trip reported **2215 failures** across ~9k iterations (G1 and + the resolve/mock guards were all clean). Isolated to the Postman path with `name: ""`. + - Repro: `importPostman({ item: [{ name: "", request: { method: "GET", url: "http://x" } }] })` → + **throws** an uncaught Zod error and aborts the entire import. (`name: "***"` is fine — non-empty + passes `min(1)`, and `slug` falls back to `request` for the filename.) + - Root cause: `convertRequest` set `name = String(item.name ?? "Request")`; the `?? "Request"` fallback + only fires for null/undefined, NOT for an empty string. The request schema requires `name.min(1)` + (`schema.ts:118`), so serializing a `name: ""` request throws out of `importPostman` — and Postman + exports legitimately contain empty request names, so `truspec import postman` crashes on real files. + - Fix: `packages/core/src/importers/postman.ts` — `String(item.name ?? "Request") || "Request"` (the + `|| "Request"` catches the empty-string result of any coercion: `""`, empty array, etc.). Applied the + same guard to the imported folder-config name (`|| "Imported"`) for consistency. (Bruno was already + safe: its name default uses a truthy guard, confirmed by the fuzz.) + - Regression test: `packages/core/test/importers-variants.test.ts` → "an empty request name imports + with a default instead of crashing the whole import". + - Suite after fix: **PASS — 228 tests (was 227, +1), typecheck 7/7, 0 regressions.** + +### Attacks that held (final confirmation) +- G1 validator: 30k iterations over mutually-recursive `A`/`B` schemas (allOf+$ref+required siblings, + oneOf/anyOf cycles, `additionalProperties:false`) × deep/cyclic values — **crashes=0, slowest 12ms**. + The conjunction fix (BUG-C) introduced no hang or false behavior under adversarial recursion. +- G2 resolve/mock: crashes=0, lostParams=0, mock regex slowest 2ms on a 5000-char hostile path. + +### Cycle outcome +- Broke? **yes** (BUG-E medium, found in final confirmation) → fixed at root + regression test → suite + green (228). A break during confirmation resets the loop → restart at Cycle 6. + +--- + +## Cycle 6 — importer hard fuzz + broad sweep (7th seeds) — CLEAN + +### Plan +- Hammer the importers (where BUG-E lived) with adversarial names (empty/whitespace/symbol/non-string + types: number, null, false, `[]`, `{}`, `__proto__`), nested folders, hostile urls/bodies/auth, plus + a broad resolve/validator/mock sweep. + +### Findings +- **No new failures.** H1 importPostman ~10k iters: **THROWS=0, UNPARSEABLE=0** (BUG-E fix robust to all + name types/structures). H2 bruToRequest ~15k: THROWS=0, round-trip=0. H3 sweep: resolve lostParams=0, + validator crashes=0 slowest=12ms, mock regex slowest=17ms (bounded; a JIT/GC blip, re-measured below). + +### Cycle outcome +- Broke? **no** → final confirmation cycle with an 8th seed set + an explicit ReDoS-linearity proof. + +--- + +## FINAL confirmation #2 — 8th seeds + linearity proof — CLEAN + +### Plan +- I1: prove the mock regex scales LINEARLY (10 adjacent params, 50×`test()` batches at path lengths + 1k→16k). I2: 8th-seed importer + validator + resolve sweep. + +### Findings +- **No new failures.** I1: regex `^/x/[^/]+/?$`; 50×-batch timings `(1k,0ms)(2k,1ms)(4k,0ms)(8k,2ms) + (16k,3ms)` — linear in length, O(n^k) gone for good. I2: postman parseFail=0, bruno parseFail=0, + resolve lostParams=0, validator crashes=0 slowest=2ms. + +### Cycle outcome +- Broke? **no.** Cycle 6 AND this confirmation both produced **zero new failures** across two distinct + fresh seed sets → **STOP condition met.** + +--- + +# CAMPAIGN 2 — FINAL SUMMARY + +**Verdict.** Restarted the adversarial loop on the already-hardened (Campaign-1) engine and still found +**5 real, root-caused bugs**, each fixed with a regression test, plus a documented behavior change. They +cluster in the surfaces Campaign 1 probed least: request building, the mock matcher, the contract +validator's *correctness* (vs. its already-hardened *termination*), the CLI gate, and the Postman +importer. Final state: `pnpm test` **228 passed** (25 files; +10 over this campaign's 218 baseline, +22 +over the project's original 206), `pnpm typecheck`/build **7/7**, and the README's documented blog flow +runs exactly as advertised in the built binary (`run` 3/3, `drift` flags `GET /users/{id}`, `coverage` +75%). Confidence: **high** for the core engine and CLI; the web/MCP network surfaces were hardened in +Campaign 1 and were not re-attacked here (noted as a gap). + +**Cycles:** 6 attack cycles + 2 confirmation cycles (one per "clean" run, each interrupted by a tail bug +until the last). Bugs by area/severity: +- Request building (silent data loss) — **1, medium**: BUG-A (query params swallowed by a URL `#fragment`; + also drops an `apikey in: query` auth param → request sent unauthenticated). +- Mock matcher (DoS) — **1, medium-high**: BUG-B (`pathToRegex` adjacent params → O(n^k) ReDoS on + attacker-controlled request paths; 6 params + 64 chars already 1.9s). +- Contract validator (false-negative) — **1, medium**: BUG-C (composition keywords treated as mutually + exclusive, not conjunctive → sibling `required`/`properties`/`type` skipped; the common + `allOf:[{$ref}]`+own-`required` shape passed any response). +- CLI gate (silent pass) — **1, low-medium**: BUG-D (`run` exits 0 when zero requests found). +- Importer robustness (crash) — **1, medium**: BUG-E (`importPostman` throws on an empty request name). + +**Top fixes (root causes).** +1. **Query lost to the fragment** (BUG-A) — appended the query string to the end of the URL, after any + `#fragment`; now inserted before the fragment. `runner/resolve.ts`. +2. **Mock ReDoS** (BUG-B) — emitted a separate greedy `([^/]+)` per param; adjacent ones backtracked + catastrophically. Now non-capturing and adjacent params collapse to one `[^/]+` → linear. `mock/engine.ts`. +3. **Validator false-negative** (BUG-C) — `allOf`/`anyOf`/`oneOf` short-circuited with `return`; removed + so they accumulate and fall through to the conjunctive sibling type-dispatch. `spec/validate-response.ts`. +4. **Silent green gate** (BUG-D) — `[].every()` is `true`; `run` now treats zero requests as exit 1. + `cli/commands/run.ts`. *(Behavior change — flagged for maintainer review.)* +5. **Importer crash on empty name** (BUG-E) — `?? "Request"` missed `""`; now `… || "Request"`. `importers/postman.ts`. + +**Coverage table.** +| Category | Status | Strongest attack survived | +|---|---|---| +| Functional / spec-sync engine | Tested | full unit suite (228) + README blog flow in built binary | +| Input / fuzzing | Tested | ~250k randomized iterations across 8 seed sets (validator/resolve/mock/jsonpath/interpolate/parse) | +| Parser robustness (untrusted bytes) | Tested | ~35k Postman + ~30k Bruno + ~30k `.tspec.yaml` adversarial inputs — 0 crashes, 0 unparseable emits | +| Algorithmic-complexity DoS | Tested | mock `pathToRegex` proven linear to 16k-char paths; validator linear under recursive `$ref` | +| Contract correctness (false neg/pos) | Tested | conjunctive composition now enforced; no false positives on real specs | +| Request building correctness | Tested | fragment/query/apikey survival proven over 60k+ resolve iterations | +| CLI gate semantics / exit codes | Tested | empty/missing-env/bad-arg/failure exit codes all asserted | +| End-to-end integration | Tested | capture-chaining + secret redaction through the real `runCommand` | +| Concurrency / races | Not tested | engine is single-shot per request; capture-chaining is sequential by design | +| Load / stress / soak / chaos | Not applicable | local-first CLI/library, not a hosted service | +| Web `serve` / MCP network surfaces | Not re-tested | hardened in Campaign 1 (path traversal, DNS-rebinding, write confinement); out of scope this pass | + +**Residual risk & gaps.** +- **BUG-D is a behavior change** (empty `run` now fails). Intentional and conventional (jest/pytest/go), + but a maintainer who relied on empty-passes-green should review. Scoped to `run`; `contract` left as-is + (documented conformance-only). +- The Campaign-1 residuals still stand: `additionalProperties`-as-schema unenforced; `< 6 char` secrets + unmasked; `MAX_DEPTH=100` schema-recursion cutoff; chunked-body request detection in the mock. +- Web `serve` and the MCP server were **not** re-attacked this campaign (relied on Campaign 1). A future + pass should fuzz the web save endpoint's body-size handling and the MCP tool error paths. +- Concurrency/load/chaos remain out of scope for a local-first CLI. + +**Hardening recommendations.** +- Keep all new regression tests in CI (they encode: query-survives-fragment, mock-regex-linearity, + validator-conjunction, empty-run-fails, importer-empty-name). +- Port the seeded property-fuzzes (resolve/validator/mock/importers) into a bounded, fixed-seed CI job — + they found BUG-E in a single combined pass after targeted unit probes missed it. +- Consider enforcing `additionalProperties`-as-schema and OpenAPI `2XX` status wildcards to close the + remaining contract false-negatives. + +**Artifacts.** +- Fixes: `packages/core/src/runner/resolve.ts` (A), `packages/core/src/mock/engine.ts` (B), + `packages/core/src/spec/validate-response.ts` (C), `packages/cli/src/commands/run.ts` (D), + `packages/core/src/importers/postman.ts` (E). +- Regression tests: `packages/core/test/runner.test.ts` (+3), `mock.test.ts` (+2), + `validate-response.test.ts` (+4), `importers-variants.test.ts` (+1), + `packages/cli/test/run.test.ts` (corrected 1). +- Repro/fuzz harnesses were scratch vitest files under `packages/*/test/_qa_scratch.test.ts` + (8 seed sets, ~250k iterations); removed after each cycle — their invariants live on in the + regression tests above. +- This log: `QA_LOG.md`. + +--- +--- + +# CAMPAIGN 3 — third adversarial pass (branch `qa/adversarial-cycle-2`) + +Re-entered the loop ("be skeptical of your own done"). Campaign 2 flagged honest gaps — the web `serve` +server, MCP tools, `scaffold`, the `--live` probe, and schema edge cases were NOT re-attacked. Campaign 3 +aims there. Baseline re-verified: `pnpm test` **228 passed**. + +## Cycle 1 — code-gen surfaces (scaffold / gen / MCP scaffold) + +### Plan +- `scaffoldFromSpec`: hostile specs, colliding operationId/key slugs, filename safety. +- `confinePath` (web + MCP write confinement): symlink escape. `live.ts` SSRF surface. + +### Findings + +- [BUG-F] spec/scaffold — colliding filenames silently overwrite operations | severity **medium** + - Repro (`_qa_repro.test.ts`): a 4-operation spec with case-variant operationIds (`getUser`/`GetUser`) + and separator-variant paths (`/a-b` vs `/a/b`) → `scaffoldFromSpec` returns 4 file entries but only + **2 distinct paths** (`getuser.tspec.yaml` ×2, `post-a-b.tspec.yaml` ×2). + - Evidence: `[SCAFFOLD] files generated: 4 … COLLIDING filenames: ["getuser.tspec.yaml","post-a-b.tspec.yaml"]`. + - Root cause: `scaffoldFromSpec` built `${slug(id)}.tspec.yaml` with no uniqueness tracking; distinct + operations slug to the same base. `writeScaffold` then writes by name, so the second file overwrites + the first on disk — silently dropping operations from a per-operation scaffold and making downstream + `coverage`/`drift` wrong. Reachable via `truspec gen` and `truspec_scaffold_from_spec` (MCP). + (The Postman importer already dedupes; scaffold didn't.) + - Fix: `packages/core/src/spec/scaffold.ts` — track a `used` count per base slug and suffix `-2`/`-3`/… + on collision, mirroring the importer. + - Regression test: `packages/core/test/scaffold.test.ts` → "gives colliding slugs unique filenames so + no operation is silently overwritten" (+ a baseline one-stub-per-op test). + - Suite after fix: **PASS — 230 tests (was 228, +2), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 1) +- `confinePath` follows symlinks via `realpathSync` (checks the deepest existing ancestor for not-yet- + existing write paths) — a link inside the workspace can't point outside. Web write + MCP write confined. +- `live.ts` probe sends only GET/HEAD to the operator-supplied `--live` base (no mutation/injection). +- MCP read/run tools (`runCollectionTool`/`driftTool`/…) use bare `resolve()` (not confined) — a + **defensible design choice** (an agent legitimately works across a user's projects; confining would + break cross-dir use), unlike the web server which serves a single `dir`. Noted, not "fixed". + +### Cycle outcome +- Broke? **yes** (BUG-F medium) → fixed at root + 2 regression tests → suite green (230). Restart Cycle 2. + +## Cycle 2 — format schema strictness / contract conformance + +### Plan +- Probe nested schema strictness (Body/Auth/Assertion/SpecLink) vs CLAUDE.md's "unknown keys are + rejected" rule; numeric edges (`order` = NaN/±Inf; assertion numeric fields). + +### Findings + +- [BUG-G] format/schema — nested objects silently strip unknown keys (typos don't surface) | severity **low-medium** + - Repro (`_qa_repro.test.ts`): + - P1 `spec: { operatonId: getPet }` → parses to an empty `{}` spec link (typo stripped). `{}` is + truthy, so the request is treated as spec-linked-to-nothing → `drift` mis-reports it as **stale** + and `coverage` counts it uncovered — a silent mislink in the flagship spec-sync feature. + - P2 `{ type: jsonpath, path: "$.id", exits: true }` → strips `exits`, leaving a condition-less + assertion that silently **always fails**. + - P5 control: top-level typos ARE rejected — confirming the inconsistency (top strict, nested not). + - Root cause: `RequestSchema`/`FolderConfigSchema`/`EnvironmentSchema` are `.strict()`, but the nested + `Body`/`Auth`/`Assertion`/`SpecLink` discriminated-union objects were not — so optional-key typos + were silently stripped instead of rejected, contradicting CLAUDE.md's hard rule. Notably the PUBLISHED + JSON Schema already declared `additionalProperties:false` on these objects (an editor/agent would + flag the typo), so the Zod runtime was **out of sync with its own published contract**. + - Fix: `packages/core/src/format/schema.ts` — add `.strict()` to every nested Body/Auth/Assertion + member and to `SpecLink`. Regenerated the JSON Schema (`pnpm gen:schema`) → **no diff** (the published + schema was already strict; this fix makes the runtime match it). (P3: required-field typos were + already caught. P4: `order` rejects `.nan`; accepts `±.inf` which sorts deterministically — no fix.) + - Regression test: `packages/core/test/format.test.ts` → "rejects unknown keys in NESTED objects too + (assertion/spec/auth/body typos surface)". + - Suite after fix: **PASS — 231 tests (was 230, +1), typecheck 7/7, JSON Schema unchanged, 0 regressions.** + +### Cycle outcome +- Broke? **yes** (BUG-G low-medium) → fixed at root + regression test → suite green (231). Restart Cycle 3. + +## Cycle 3 — broad fresh-seed sweep (9th seeds) — CLEAN + +### Plan +- J1 `scaffoldFromSpec` over ~6k random hostile specs (collision-freeness, every stub parses); + J2 strict-schema fuzz (~15k: nested typos rejected, valid requests still parse); + J3 regression sweep (validator/resolve/mock under fresh seeds). + +### Findings +- **No new failures.** J1 filename collisions=0, parseFailures=0; J2 crashes=0, acceptedNestedTypo=0, + rejectedValid=0; J3 resolve lostParams=0, validator crashes=0 slowest=11ms, mock slowest=28ms (GC blip; + regex is the collapsed `[^/]+`, proven linear in Campaign 2). + +### Cycle outcome +- Broke? **no** → final confirmation cycle with a 10th seed set + a scaffold count-invariant. + +## FINAL confirmation — 10th seeds + real-binary smoke — CLEAN + +### Plan +- K1 scaffold count invariant (#files == #non-skipped ops, all distinct, all parse) over ~9k specs; + K2 combined strict-schema/validator/resolve/importer sweep; plus a built-binary `gen`/`run`/`coverage` + smoke (the strict-schema change must not break real example collections). + +### Findings +- **No new failures.** K1 invariant violations=0; K2 typoAccepted=0, validRejected=0, validatorCrash=0 + slowest=3ms, resolveLost=0, importerFail=0. Built binary: `gen` on petstore → **3 distinct files**; + blog `run` → **3 passed**; `coverage` → **75%**. No regression from the BUG-F/BUG-G fixes. + +### Cycle outcome +- Broke? **no.** Cycle 3 AND this confirmation both produced **zero new failures** across two distinct + fresh seed sets → **STOP condition met.** + +--- + +# CAMPAIGN 3 — FINAL SUMMARY + +**Verdict.** A third pass over the (twice-hardened) engine, aimed at Campaign 2's honest gaps (scaffold, +schema strictness, web/MCP confinement, the `--live` probe). Found **2 more real bugs**, both root-caused +and regression-tested. Final state: `pnpm test` **231 passed** (26 files; +3 this campaign over its 228 +baseline, +25 over the project's original 206), `typecheck`/build **7/7**, JSON Schema regenerated with +**no diff** (the runtime now matches the already-strict published contract), and the README blog flow + +`gen` run exactly as advertised in the built binary. Confidence: **high** for the core engine, CLI, and +code-gen; the web/MCP HTTP/protocol layers were *reviewed* (confinement confirmed sound) but not load- +fuzzed (noted). + +**Cycles:** 3 attack + 1 confirmation. Bugs by area/severity: +- Code-gen (silent data loss) — **1, medium**: BUG-F (`scaffoldFromSpec` colliding filenames overwrite + operations on disk). +- Schema contract conformance (silent typo-stripping) — **1, low-medium**: BUG-G (nested Body/Auth/ + Assertion/SpecLink objects weren't `.strict()`, so optional-key typos were dropped instead of rejected — + diverging from the published JSON Schema and yielding mislinked specs / no-op assertions). + +**Top fixes (root causes).** +1. **Scaffold collisions** (BUG-F) — `scaffoldFromSpec` wrote `${slug}.tspec.yaml` with no uniqueness + counter; distinct ops that slug alike overwrote each other. Now suffixes `-2`/`-3`/… like the importer. + `spec/scaffold.ts`. +2. **Nested typo-stripping** (BUG-G) — `.strict()` added to every nested discriminated-union object and + `SpecLink`, so typos surface as parse errors per CLAUDE.md's rule and the runtime matches the published + JSON Schema. `format/schema.ts`. + +**Coverage table (this campaign; see Campaigns 1–2 for the rest).** +| Category | Status | Strongest attack survived | +|---|---|---| +| Code-gen / scaffold | Tested | ~15k random hostile specs — 0 collisions, count invariant holds, every stub parses | +| Schema / contract conformance | Tested | ~30k fuzz — all nested typos rejected, no valid request falsely rejected; JSON Schema in sync | +| Path confinement (web + MCP write) | Reviewed | `confinePath` realpath-follows symlinks; deepest-ancestor check for writes | +| `--live` SSRF surface | Reviewed | GET/HEAD only to the operator-supplied base; no injection | +| Output formatters | Reviewed | JUnit escapes name/classname/message + strips C0; human/json over trusted data | +| Web `serve` / MCP load-fuzz | Not tested | reviewed only; would need a running-server harness (carried gap) | + +**Residual risk & gaps.** +- MCP read/run tools are intentionally **not** path-confined (agents work across projects); the web API + `POST /api/run` `env` name is likewise unconfined (loopback-only, local user) — both noted as design, + not fixed. +- Web `serve` and MCP protocol layers were reviewed, not load/fuzz-tested (no server harness this pass). +- All Campaign 1–2 residuals stand (`additionalProperties`-as-schema, `<6`-char secrets, `MAX_DEPTH=100`). +- BUG-D's behavior change (empty `run` exits 1) still warrants maintainer review. + +**Hardening recommendations.** +- Keep the new regression tests in CI (scaffold uniqueness, nested-key strictness). +- Add a CI check that `pnpm gen:schema` produces no diff (catches future Zod-vs-published-schema drift — + exactly the class BUG-G fell into). +- A future campaign should stand up a web/MCP server harness and fuzz the live HTTP/protocol surface. + +**Artifacts.** +- Fixes: `packages/core/src/spec/scaffold.ts` (F), `packages/core/src/format/schema.ts` (G). +- Regression tests: `packages/core/test/scaffold.test.ts` (+2), `packages/core/test/format.test.ts` (+1). +- This log: `QA_LOG.md`. + +--- +--- + +# CAMPAIGN 4 — live-server pass (branch `qa/adversarial-cycle-2`) + +Closed the gap every prior campaign deferred: **stood up the real running servers** (mock HTTP, web +`serve`, MCP) and attacked them live, instead of only reviewing them. Baseline: `pnpm test` **231 passed**. + +## Cycle 1 — live mock HTTP server + +### Plan +- Start the real `startMockServer` and fire: a route whose spec status is out of HTTP range; malformed + raw request targets; concurrent requests. + +### Findings + +- [BUG-H] mock server — uncaught exception / process crash on an out-of-range spec status | severity **med-high** + - Repro (`_qa_scratch.test.ts` L0/L1): a spec response keyed `"20000"` → `respond()` returns + `status: 20000`; a live `GET /bad` then **hangs the client** and throws an uncaught + `Invalid status code: 20000` on the server (`res.writeHead(20000)` — Node only allows 100–999). + - Evidence: `[L1] uncaughtException on server: Invalid status code: 20000`; the fetch times out. + - Root cause: (1) `pickResponse` did `Number(chosen)` for any `/^\d+$/` status with no range check; + (2) `mock/server.ts`'s handler — including its `setTimeout`-delayed `send` — had **no try/catch** + (unlike `web/server/index.ts`), so the throw was uncaught → in production the process dies. + Reachable via `truspec mock` and the `truspec_mock_start` MCP tool (agent/third-party specs). + - Fix: `packages/core/src/mock/engine.ts` — clamp the status to 100–999, else fall back to 200. + `packages/core/src/mock/server.ts` — wrap `new URL` and the (possibly delayed) `send` in try/catch, + replying 400/500 instead of throwing, so no single request can crash the long-running process. + - Regression test: `packages/core/test/mock.test.ts` → "mock server resilience (out-of-range status + code)" (unit: status clamped to 200; live: server responds 200 + stays alive + no uncaughtException). + - Suite after fix: **PASS — 233 tests (was 231, +2), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 1) +- Malformed raw targets (`/%ZZ`, `/a%2`, `OPTIONS *`, `//evil.com/x`, backslashes) → `new URL` with a + base normalizes them; server stays alive (the crash vector was the status code, not URL parsing). + +### Cycle outcome +- Broke? **yes** (BUG-H med-high) → fixed at root + handler hardening + 2 regression tests → green (233). + Restart Cycle 2. + +## Cycle 2 — live web `serve` server + MCP tools + +### Plan +- Web: 40 concurrent same-path saves (corruption); hostile API (bad JSON, traversal path, oversized + body, wrong methods, env traversal); DNS-rebinding via raw sockets. MCP: hostile tool args. + +### Findings + +- [BUG-I] discovery — one malformed `.tspec.yaml` aborts the entire listing (undiagnosable) | severity **medium** + - Repro (`_qa_scratch.test.ts` M4): `listCollections` over a dir containing one garbage `.tspec.yaml` + **throws** `Nested mappings are not allowed…` — the whole listing fails and the error names no file. + - Root cause: `listCollections` (MCP `truspec_list_collections`) and `buildState` (web `/api/state`) + both `map(parse.request.parse(readFileSync(file)))` over all files with no per-file isolation. One + typo'd file → the agent/UI can't list ANY request to even find the broken one, and the error carries + no filename (an undiagnosable failure — itself a defect). + - Fix: `packages/mcp-server/src/tools.ts` + `packages/web/server/api.ts` — per-file try/catch; valid + requests still list, bad files become `errors: [{ path, error }]` entries (named, diagnosable). + - Regression test: `packages/mcp-server/test/tools.test.ts` → "a malformed file does not abort the + listing…"; `packages/web/test/api.test.ts` → "a malformed request file does not 500 /api/state…". + - Suite after fix: **PASS — 235 tests (was 233, +2), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 2) +- **Web server, comprehensive:** 40 concurrent saves to the same path → final file valid & complete, no + corruption, no uncaught. Hostile API → traversal-path/folder-path/invalid-content/no-spec all return + clean confined errors; wrong methods → 404; oversized 6MB body → cap enforced (server stays alive). + **DNS-rebinding (raw sockets):** `Host: evil.com`/`8.8.8.8`/no-Host → **403**, loopback → 200. +- **MCP write/scaffold tools:** createRequest/updateRequest with traversal paths → confined (no escape + to /etc or /tmp); invalid schema → clean `{ok:false}`; scaffold dedups colliding slugs (BUG-F holds). + +### Cycle outcome +- Broke? **yes** (BUG-I medium) → fixed at root + 2 regression tests → green (235). Restart Cycle 3. + +## Cycle 3 — live mock fuzz + core regression sweep + +### Plan +- Fuzz the live mock server with adversarial status codes / shapes; sweep validator/scaffold/parse. + +### Findings + +- [BUG-J] spec/scaffold — crash on an operation with an empty-string operationId | severity **medium** + - How found: the Cycle-3 sweep's `scaffoldFromSpec` call (not wrapped) threw on a generated spec. + Isolated repro: `operationId: ""`. + - Repro: `scaffoldFromSpec('… /x: { get: { operationId: "", responses: {…} } }')` → **throws** a Zod + `name.min(1)` error, crashing `truspec gen` / `truspec_scaffold_from_spec` on the whole spec. + - Root cause: `name: op.operationId ?? op.key` — `??` keeps an empty-string operationId, and an empty + request `name` fails the schema. Same anti-pattern as BUG-E (Postman empty name). + - Fix: `packages/core/src/spec/scaffold.ts` — `const label = op.operationId || op.key` for both the + name and the filename slug; omit an empty operationId from the spec link + (`operationId: op.operationId || undefined`). Audited the rest of the codebase for the same + `?? ""`-feeds-`min(1)` pattern — no other instances (other `??` fallbacks feed no-min fields: + auth, headers, display labels; the Bruno importer already uses a truthy guard). + - Regression test: `packages/core/test/scaffold.test.ts` → "does not crash on an operation with an + empty-string operationId (falls back to the key)". + - Suite after fix: **PASS — 236 tests (was 235, +1), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 3) +- Live mock servers with adversarial statuses ("20000"/"99"/"0"/"1000"/"default") → every route returns + a writeHead-valid status, responds, and the server stays alive; no uncaughtException (BUG-H fix holds). +- Validator/parse-strictness/scaffold-dedup all clean under fresh seeds. + +### Cycle outcome +- Broke? **yes** (BUG-J medium) → fixed at root + regression test + codebase audit → green (236). + Restart Cycle 4. + +## Cycle 4 — broad confirmation (5th seed set) — found an INCOMPLETE fix + +### Plan +- Re-sweep every Campaign-1..4 surface under fresh seeds: scaffold (incl. empty/symbol operationIds), + live mock servers with random statuses, importers, validator, strict-schema, resolve. + +### Findings + +- [BUG-K] mock server — a 1xx interim status served as the final response hangs clients (BUG-H fix was + INCOMPLETE) | severity **medium** + - How found: the live-mock sweep (O2/O2b) — refining the check to separate *fetch failures* from + benign >599 statuses showed exactly one code, `"100"`, produced **fetchFailures=1** with no + crash. `statusesSeen` was missing the 100 entry: the client hung until the 2s timeout. + - Repro: a spec with response `"100"` (or `"101"`/any 1xx) → the mock sends `res.writeHead(100)` as + the FINAL response; HTTP clients treat 1xx as interim and wait for the real response → `fetch` times + out. (`"600"` returned fine — the fault is specific to 1xx, not the >599 range.) + - Root cause: BUG-H's clamp used `[100, 999]` (Node's `writeHead` validity) — which still admits 1xx + *interim* codes. A mock sends one COMPLETE response, so the valid range is the FINAL-status range + `[200, 599]`; a 1xx is never a valid final response. + - Fix: `packages/core/src/mock/engine.ts` — clamp to `[200, 599]`, else fall back to 200. Fixes both + the writeHead crash (BUG-H) and the 1xx hang in one rule. + - Regression test: `packages/core/test/mock.test.ts` → resilience block extended to assert 1xx + (100/101/199) AND out-of-range (20000/99/0) all clamp to 200, a real 404 is preserved, and a live + server with "20000" or "100" responds 200 (no crash, no hang). + - Suite after fix: **PASS — 236 tests, typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 4, after the BUG-K fix) +- O1 scaffold (~7.5k specs, empty/symbol operationIds): THREW=0, collisions=0, parseFail=0. +- O2b live mock (13 adversarial statuses): fetchFailures=0, writeHead-invalid=0, uncaught=none. +- O3 importers/validator/resolve/strict-schema: importerFail=0, validatorCrash=0, resolveLost=0, + typoAccepted=0, validRejected=0. + +### Cycle outcome +- Broke? **yes** (BUG-K medium, an incomplete BUG-H fix surfaced in confirmation) → fixed at root + + regression test → green (236). Restart Cycle 5. + +## Cycle 5 — fresh-seed full sweep (6th seeds) — CLEAN + +### Plan / Findings +- P1 mock engine over 27 status classes → invalid-final-status=0. P2 live mock over 12 status classes + (1xx/6xx/20000/0/default) → fetchFailures=0, uncaught=none. P3 scaffold/importer/validator/resolve/ + strict-schema (~10k) → all zero. **No new failures.** + +### Cycle outcome +- Broke? **no** → final confirmation with a 7th seed set. + +## FINAL confirmation — 7th seeds + recursive-schema live mocks — CLEAN + +### Plan / Findings +- Q1: 18 live mock servers under random {status × recursive/$ref schema}, draining each body → + failures=0, uncaught=none (recursive-schema examples stay finite; no crash/hang). Q2: combined + scaffold/importers/validator/resolve/strict sweep (~15k) → all zero. **No new failures.** + +### Cycle outcome +- Broke? **no.** Cycle 5 AND this confirmation both produced **zero new failures** across two distinct + fresh seed sets → **STOP condition met.** + +--- + +# CAMPAIGN 4 — FINAL SUMMARY + +**Verdict.** Closed the standing gap from Campaigns 1–3: **stood up the real running servers** (mock +HTTP, web `serve`, MCP) and attacked them live instead of reviewing them. That immediately paid off — +**4 more real bugs**, including a crash-the-process DoS and a client-hang, all root-caused and +regression-tested. Final state: `pnpm test` **236 passed** (26 files; +5 this campaign over its 231 +baseline, +30 over the project's original 206), `typecheck`/build **7/7 / 5/5**, and the built binary +serves a hostile-status spec without crashing. Confidence: **high** across the engine, CLI, code-gen, +and now the live HTTP/MCP surfaces. + +**Cycles:** 5 attack + 1 confirmation (BUG-K surfaced as an incomplete fix during confirmation, resetting +the loop). Bugs by area/severity: +- Mock HTTP server (crash + hang DoS) — **2**: BUG-H *med-high* (out-of-range status → uncaught + `Invalid status code` → process crash) and BUG-K *med* (1xx interim status → client hang; BUG-H's + clamp was incomplete). +- Discovery/observability — **1, med**: BUG-I (one malformed `.tspec.yaml` aborts the whole listing in + `truspec_list_collections` / web `/api/state`, with no filename in the error). +- Code-gen robustness — **1, med**: BUG-J (`scaffoldFromSpec` crashes on an empty-string operationId — + a sibling of Campaign-2's BUG-E). + +**Top fixes (root causes).** +1. **Mock status crash + hang** (BUG-H/K) — `pickResponse` emitted any `Number(code)`; clamped to the + valid FINAL range `[200,599]` (rejects out-of-range crashers AND 1xx interim hangers). Also wrapped + the mock handler + its delayed `send` in try/catch so no single request can crash the process. + `mock/engine.ts`, `mock/server.ts`. +2. **Listing aborts on one bad file** (BUG-I) — per-file try/catch in `listCollections` + `buildState`; + valid requests still list, bad files become named `errors[]` entries. `mcp-server/tools.ts`, `web/api.ts`. +3. **Scaffold empty-operationId crash** (BUG-J) — `op.operationId || op.key` (not `??`); empty link omitted. + `spec/scaffold.ts`. + +**Coverage table (this campaign).** +| Category | Status | Strongest attack survived | +|---|---|---| +| Live mock HTTP server | Tested | ~30 live servers across every status class (1xx/2xx/3xx/4xx/5xx/6xx/20000/0/neg) + recursive schemas + raw malformed targets — no crash, no hang | +| Live web `serve` server | Tested | 40 concurrent same-path saves (no corruption); traversal/oversized/wrong-method API; DNS-rebinding via raw sockets (403) | +| MCP tools | Tested | traversal-confined writes; malformed-file listing resilience; invalid-schema clean errors | +| Concurrency (same-file writes) | Tested | 40 parallel POSTs → final file valid & complete | +| Observability / diagnosability | Tested | malformed file now surfaces with its path instead of an anonymous throw | +| Code-gen robustness | Tested | empty/symbol/missing operationIds + colliding slugs over ~25k specs | +| Load/soak (sustained high RPS, p99) | Not tested | no perf harness; would need k6/autocannon against a long-running instance | + +**Residual risk & gaps.** +- **Oversized request body** to the web API returns a client connection-reset rather than a clean 413 + (server cap works and stays alive; cosmetic — `req.destroy()` races the 413 flush). Noted, not fixed. +- MCP read/run tools remain intentionally un-path-confined (cross-project agent use); web `/api/run` + `env` name is unconfined (loopback-only). Design, not fixed. +- True **load/soak/p99** testing still not done (no perf harness) — the servers were correctness- and + crash-fuzzed, not throughput-measured. +- All Campaign 1–3 residuals stand (`additionalProperties`-as-schema, `<6`-char secrets, BUG-D behavior + change). + +**Hardening recommendations.** +- Keep the new live-server regression tests in CI (mock status resilience, listing resilience). +- Add a scheduled load/soak job (autocannon) against `truspec serve` and `truspec mock` to catch leaks + and throughput regressions the correctness fuzz can't. +- Consider a shared `safeHandler` wrapper for both HTTP servers so the try/catch posture can't drift + apart again (BUG-H existed because the mock handler lacked the web server's guard). + +**Artifacts.** +- Fixes: `mock/engine.ts` + `mock/server.ts` (H, K), `mcp-server/src/tools.ts` + `web/server/api.ts` (I), + `spec/scaffold.ts` (J). +- Regression tests: `core/test/mock.test.ts` (+ resilience block), `mcp-server/test/tools.test.ts` (+1), + `web/test/api.test.ts` (+1), `core/test/scaffold.test.ts` (+1). +- This log: `QA_LOG.md`. + +--- +--- + +# CAMPAIGN 5 — load/soak + runner HTTP-layer pass (branch `qa/adversarial-cycle-2`) + +Closed the last category Campaign 4 deferred — **load / soak / leak** — and attacked the runner's HTTP +layer under adversarial *responses* (decompression bombs, slow-loris, resets). Baseline: **236 passed**. + +## Cycle 1 — load/soak/leak + runner response handling + +### Plan +- Load/soak the live mock + web servers (30k / 10k requests), watch RSS + handles for leaks. +- Runner vs hostile responses: gzip bomb, oversized body, slow-loris, mid-body reset, huge header. +- Functional: can a request observe a 3xx redirect? + +### Findings + +- [BUG-L] runner — auto-follows redirects → 3xx responses untestable & silently mis-reported | severity **medium** + - Repro (`_qa_repro.test.ts`): a server returns `302 Location: /final`; `runRequest` reports + **status 200** (the followed target), no `location` header → `assertions: [{status equals 302}, + {header location exists}]` both FAIL. A user "cannot test redirects", and `truspec contract`/ + `run --spec` can never validate a 3xx operation the spec declares (the flagship feature is blind + to redirect responses). + - Root cause: Node `fetch` defaults to `redirect: "follow"`, so the runner observes the redirect + TARGET's response, not the response the requested URL actually returned. + - Fix: `packages/core/src/runner/run.ts` — set `redirect: "manual"` in the fetch init. Node returns + the real 3xx (status + `Location`), unlike a browser's opaque response (verified). Correct for a + contract tool (matches curl's default). **Behavior change** (flagged for maintainer review): a + collection that relied on auto-follow now sees the 3xx and its downstream assertions fail LOUDLY + (not silently wrong) — a future per-request `followRedirects` opt-in could restore following. + - Regression test: `packages/core/test/runner.test.ts` → "does not auto-follow redirects — a 3xx is + observable and assertable" (asserts the runner passes `redirect:"manual"` and reports/asserts 302). + - Suite after fix: **PASS — 237 tests (was 236, +1), typecheck 7/7, 0 regressions.** + +### Attacks that held (Cycle 1) +- **Load/soak/leak — mock server:** 30k requests @ ~791 rps → heap stable (16→17-21MB, ~1MB growth, no + monotonic leak), server responsive, no uncaught. 6k concurrent *delayed* (5ms) requests → 0 failures, + no timer/handle leak. **Web server:** 10k `/api/state` + 200 concurrent same-path saves → no + corruption, no crash, no leak; all failures under 64-concurrency were graceful client timeouts (0 + resets/errors). +- **Runner vs hostile responses:** an 80MB gzip **decompression bomb** is capped on the DECOMPRESSED + stream (heap stayed 13MB — no OOM); oversized uncompressed body capped; **slow-loris body aborted at + the timeout** (no unhandledRejection); mid-body connection reset → clean error; 200KB header → undici + rejects cleanly. All return `ok:false` with a clean message; nothing crashes or hangs. + +### Non-bug finding (documented, not fixed — honest call) +- **`/api/state` is O(collection-size) synchronous, event-loop-blocking work per request** (no cache): + 10 files→2ms, 500→205ms, 2000→468ms. Under 64-concurrency ~10% of requests hit the 5s client timeout + (graceful — no crash/corruption/leak/reset). Realistic single-user use (≤100 files) is fine. The fix + (cache parsed state + invalidate on write, and/or async fs) carries cache-staleness risk, so per + "real fixes only / don't make it worse" it's recorded as a hardening recommendation, not forced. + +### Cycle outcome +- Broke? **yes** (BUG-L medium) → fixed at root + regression test → green (237). Restart Cycle 2. + +## Cycle 2 — broad confirmation (fresh seeds) — CLEAN + +### Plan / Findings +- S1 runner observes every 3xx (301/302/303/307/308 → observed, not 200) — BUG-L holds. S2/S2b mock + server 8k+24k requests with FORCED GC → heap stable 17MB (growth −1MB) = **no leak** (the un-GC'd + 100MB was just garbage). S3 broad core fuzz (validator/scaffold/importer/resolve/strict, ~12.5k) → + all zero. **No new failures.** + +### Cycle outcome +- Broke? **no** → final confirmation with fresh seeds. + +## FINAL confirmation — fresh seeds + runner HTTP re-attack — CLEAN + +### Plan / Findings +- T1 decompression bomb capped + slow-loris aborted, no unhandledRejection, heap 16MB. T2 redirect 307 + observed with Location intact (both assertions pass). T3 live mock all status classes (mockFail=0, + uncaught=none) + broad fuzz (vCrash/scafBad/typo all 0). **No new failures.** + +### Cycle outcome +- Broke? **no.** Cycle 2 AND this confirmation both produced **zero new failures** → **STOP met.** + +--- + +# CAMPAIGN 5 — FINAL SUMMARY + +**Verdict.** Closed the final deferred category — **load / soak / leak** — and attacked the runner's +HTTP layer under adversarial *responses*. The servers proved leak-free and crash-free under sustained +load; the runner already defeats decompression bombs, slow-loris, resets, and huge headers. One real +functional bug surfaced (redirect auto-follow) and one honest performance gap was documented (not +forced). Final state: `pnpm test` **237 passed** (26 files; +1 this campaign, +31 over the original +206), `typecheck`/build **7/7 / 5/5**. Confidence: **high** for correctness/crash-safety/leak-safety; +**partial** on raw throughput (measured latency/leak, did not push to a defined RPS SLO). + +**Cycles:** 2 attack + 1 confirmation. Bugs by area/severity: +- Runner functional correctness — **1, med**: BUG-L (auto-follows redirects → 3xx untestable and + `contract` blind to redirect operations). + +**Top fix (root cause).** +- **Redirect auto-follow** (BUG-L) — Node `fetch` defaults to `redirect:"follow"`, so the runner reported + the redirect TARGET's response. Set `redirect:"manual"` so a spec-contract tool observes the actual + 3xx (status + Location). `runner/run.ts`. *(Behavior change — flagged.)* + +**Coverage table (this campaign).** +| Category | Status | Strongest attack survived | +|---|---|---| +| Load / soak / leak | Tested | 30k mock + 24k (forced-GC) + 10k web requests → heap stable (~17MB), no leak, no crash | +| Concurrency (same-file writes) | Tested | 200 concurrent same-path saves → final file valid & complete, no corruption | +| Runner vs hostile responses | Tested | 80MB gzip decompression bomb capped (no OOM); slow-loris aborted at timeout; reset/huge-header → clean errors | +| API/contract (status correctness) | Tested | redirect responses now observable + spec-validatable | +| Stress (sustained high RPS to a knee) | Partially | found `/api/state` O(n)-sync degrades under 64-conc (graceful timeouts); did not bisect the exact knee | +| Load to a defined throughput SLO | Not tested | no SLO defined for a local-first tool; would need autocannon + target RPS | + +**Residual risk & gaps.** +- **`/api/state` is O(collection-size) synchronous per request** (10→2ms, 2000→468ms; event-loop-blocking). + Fails gracefully (timeouts, no crash/leak/corruption). Documented as a hardening recommendation, not + fixed — caching risks UI staleness. The realistic single-user case (≤100 files) is fine. +- **BUG-L is a behavior change** (no longer follows redirects). Correct for a contract tool; a future + per-request `followRedirects` opt-in could restore following for users who want it. +- Raw throughput / defined-SLO load testing still not done (no perf harness / no SLO). +- All Campaign 1–4 residuals stand. + +**Hardening recommendations.** +- Cache `/api/state` (parsed requests + spec list) and invalidate on write / via `fs.watch`; or move the + reads off the event loop. Add a bounded-size guard for very large collections. +- Add an autocannon/k6 load+soak job in CI against `truspec serve` and `truspec mock` with a target RPS + and a max-RSS-growth assertion (the correctness fuzz can't catch throughput regressions). +- Consider a `followRedirects` request option (default off) to make the BUG-L behavior configurable. + +**Artifacts.** +- Fix: `packages/core/src/runner/run.ts` (L). +- Regression test: `packages/core/test/runner.test.ts` (+1, redirect). +- Load/soak + runner-response harnesses were scratch vitest files (removed); their invariants are + recorded here and the redirect invariant lives on in the regression test. +- This log: `QA_LOG.md`. + +--- +--- + +# CAMPAIGN 6 — script vm / interpolation / env / capture / import pass — CLEAN + +Targeted the surfaces 5 prior campaigns never *adversarially attacked* (only reviewed): the pre/post +script `vm` + `tr` API, capture→interpolation re-injection, env/secret resolution, `.env` parsing, +capture sources, the `import` writer, and `drift --live`. Baseline: **237 passed**. + +## Cycle 1 — script vm + interpolation + env/secret + capture + import + +### Plan / Findings — NO NEW FAILURES +- **Pre/post script vm (`tr` API):** every error path fails cleanly — bad hmac algo (`Invalid digest`), + `throw`, `process`/`require` undefined (no sandbox escape, clean ReferenceError), syntax error, deep + recursion (`Maximum call stack`), `tr.set` of objects/5MB strings. Infinite loops in BOTH pre and post + scripts are killed at the 1s vm cap (post-script timeout empirically confirmed). No crash, hang, or + unhandledRejection. +- **Interpolation re-injection:** a var/captured value containing `{{x}}` is NOT re-expanded + (`{{a}}` with `a="{{secret}}"` → `"{{secret}}"`, not the secret) — single-pass, no template injection. + `$&`/`$1`/`` $` `` replacement patterns stay literal (replacer is a function, not a string). Cyclic + `interpolateDeep` input handled (WeakSet). +- **env/secret (`buildVars`):** OS-env secret correctly shadows a same-named variable; missing secrets + reported; `__proto__` secret name does not pollute `Object.prototype`. **`.env` parsing:** quotes, + `KEY=a=b=c`, comments, empty values, trim, last-dupe-wins, missing file → all correct. +- **capture sources:** jsonpath / case-insensitive header / status / object (JSON-stringified) all + correct; missing path / missing header / malformed path → not captured (no crash). +- **`import` writer:** paths are `slug()`-generated (`[a-z0-9-]` only) → confined to `--out` by + construction, no traversal; HEAD/OPTIONS/GraphQL requests resolve and run correctly. + +### Cycle outcome — Broke? **no** → Cycle 2. + +## Cycle 2 — drift --live + broad regression (fresh seeds) — CLEAN + +### Plan / Findings — NO NEW FAILURES +- **`drift --live`** against a real server: probes **GET/HEAD only** (never POST /mutate — no side + effects), classifies 404 → missing, skips mutating ops. Correct + safe. +- **Broad regression fuzz** (~12.5k iters, fresh seeds) over all 12 of this session's bug classes + (A–L): validator crash/slow=0, scaffold collisions=0, importer fail=0, resolve lostParams=0, nested + typo accepted=0, mock out-of-range/1xx status=0, redirect-followed=0. Every prior fix holds. + +### Cycle outcome — Broke? **no** → confirmation cycle. + +## FINAL confirmation — fresh seeds — CLEAN + +### Findings — NO NEW FAILURES +- X1: ~1200 random pre/post scripts → crash/hang/unhandled=0, slowest 17ms. X2: ~18k interpolation + iterations → failures/re-injections=0. X3: 10 live mock servers (all status classes) + ~10k broad + fuzz → mockFail=0, uncaught=none, all guards 0. + +### Cycle outcome — Broke? **no.** Two clean cycles + a fresh-seed confirmation → **STOP met.** + +--- + +# CAMPAIGN 6 — FINAL SUMMARY + +**Verdict.** The first **clean campaign**: attacked every remaining un-probed surface (script vm + `tr` +API, capture/interpolation re-injection, env/secret resolution, `.env` parsing, capture sources, the +import writer, `drift --live`, GraphQL, HEAD/OPTIONS) and a broad fresh-seed regression of all 17 +historical bug classes — and produced **zero new failures** across two attack cycles and a confirmation. +Final state: `pnpm test` **237 passed** (26 files), `typecheck` **7/7**. Confidence: **high** for the +engine, CLI, code-gen, live servers, scripting, and runtime resolution. + +**Cycles:** 2 attack + 1 confirmation. **Bugs found: 0.** + +**Coverage table (this campaign).** +| Category | Status | Strongest attack survived | +|---|---|---| +| Scripting (vm / `tr` API) | Tested | ~1200 random pre/post scripts + targeted error paths (bad crypto, throws, undefined globals, infinite loops) — all clean, timeouts enforced | +| Input/fuzzing (interpolation) | Tested | ~18k random templates incl. nested `{{}}` re-injection + `$`-pattern injection — no expansion, no crash | +| Config/secrets | Tested | secret precedence, `__proto__` poisoning, `.env` quoting/dupes/edge cases | +| Capture / chaining | Tested | jsonpath/header/status/object/null/missing/malformed sources | +| Security (SSRF via `--live`) | Tested | live probe sends GET/HEAD only — never mutating methods | +| Import (file writing) | Tested | slug-confined paths (no traversal); idempotent overwrite on explicit `--out` | +| Regression (all 17 prior bugs) | Tested | fresh-seed sweep — every guard holds | + +**Residual risk & gaps (unchanged from Campaign 5).** +- `/api/state` O(collection-size) synchronous cost per request (graceful timeouts under heavy + concurrency; fine for realistic single-user use) — documented hardening recommendation, not a bug. +- Defined-SLO throughput load testing still requires a perf harness (autocannon/k6) + a target RPS. +- BUG-D (empty-run exit 1) and BUG-L (redirect no-follow) remain flagged behavior changes for review. + +**Hardening recommendations (unchanged).** Keep all regression tests in CI; add a `pnpm gen:schema` +no-diff check; add a scheduled autocannon load/soak job; consider a `followRedirects` request option. + +**Artifacts.** No code changes this campaign (zero bugs). Attack harnesses were scratch vitest files +(removed); their invariants are covered by existing regression tests. This log: `QA_LOG.md`. + +--- +--- + +# CAMPAIGN 7 — web UI client + browser-security pass (branch `qa/adversarial-cycle-2`) + +Closed the one category marked "if applicable / not tested" for six campaigns: the **rendered web UI +client** and **browser-side security** (XSS, CSRF, clickjacking). Baseline: **237 passed**. + +## Cycle 1 — web client XSS + CSRF/clickjacking + +### Plan +- Render malicious collection data (request name/url/headers/docs = ``, ``; + const run: WorkspaceRunResult = { + results: [ + { name: "n", request: { method: "GET", url: "x" }, ok: false, error: payload, assertions: [{ type: "header", ok: false, message: payload }] }, + ], + passed: 0, failed: 1, ok: false, missingSecrets: [], + }; + const html = renderResults(run); + expect(html).not.toContain("alert"); + expect(html).toContain("<img"); + + // drift/coverage operation keys + spec path are escaped too + const drift: DriftReport = { specOperations: 1, collectionOperations: 0, added: [payload], removed: [], changed: [], ok: false }; + expect(renderDrift(drift, payload)).not.toContain("alert"); + }); }); diff --git a/packages/web/server/api.ts b/packages/web/server/api.ts index 26d9133..bd86298 100644 --- a/packages/web/server/api.ts +++ b/packages/web/server/api.ts @@ -41,20 +41,31 @@ function listSpecs(dir: string): string[] { } function buildState(ctx: ApiContext) { - const requests = discoverRequests(ctx.dir).map((file) => { - const req = parse.request.parse(readFileSync(file, "utf8")); - return { - path: relative(ctx.dir, file), - name: req.name, - method: req.method, - url: req.url, - operation: req.spec?.operationId ?? req.spec?.operation, - assertions: req.assertions.length, - }; - }); + // A single malformed `.tspec.yaml` must not make the whole workspace fail to load (a 500 on + // /api/state would white-screen the UI with no way to find the broken file). List the valid + // requests and surface bad files (with their path) as errors instead of throwing. + const requests: Array> = []; + const errors: Array<{ path: string; error: string }> = []; + for (const file of discoverRequests(ctx.dir)) { + const path = relative(ctx.dir, file); + try { + const req = parse.request.parse(readFileSync(file, "utf8")); + requests.push({ + path, + name: req.name, + method: req.method, + url: req.url, + operation: req.spec?.operationId ?? req.spec?.operation, + assertions: req.assertions.length, + }); + } catch (e) { + errors.push({ path, error: (e as Error).message }); + } + } return { dir: ctx.dir, requests, + errors, environments: listEnvironments(ctx.dir), specs: listSpecs(ctx.dir), }; diff --git a/packages/web/server/index.ts b/packages/web/server/index.ts index 5f63ae9..620fddd 100644 --- a/packages/web/server/index.ts +++ b/packages/web/server/index.ts @@ -29,6 +29,30 @@ function hostAllowed(hostHeader: string | undefined, bindHost: string): boolean return LOOPBACK_HOSTNAMES.has(hostname); } +/** + * Block cross-site request forgery. The Host guard above defeats DNS rebinding, but it does NOT stop + * a page on `evil.com` — OR a page served from ANOTHER local port (a second dev server, a malicious + * local service) — from firing a cross-origin "simple" request (e.g. `POST /api/run` with a text/plain + * body, which skips the CORS preflight) at `http://127.0.0.1:`. That would let it trigger the + * user's collection / write files. So we require the `Origin` to match the exact host:port the server + * was reached on (its `Host` header): a genuine same-origin request from the UI does; a cross-site OR + * cross-PORT page does not. (A bare loopback check is NOT enough — `localhost:OTHER` is loopback too.) + * A non-browser client (curl/CI/MCP) sends no Origin → allowed; `Origin: null` (opaque) is refused. + * When the user explicitly binds a non-loopback host they've opted into exposure, so the guard steps aside. + */ +function originAllowed(originHeader: string | undefined, hostHeader: string | undefined, bindHost: string): boolean { + if (!LOOPBACK_BINDS.has(bindHost)) return true; + if (originHeader === undefined) return true; + if (originHeader === "null") return false; + try { + // `URL.host` is `hostname:port` (port omitted only for the scheme default). It must equal the + // Host header the request arrived on — same scheme-host-port = same origin. + return new URL(originHeader).host === hostHeader; + } catch { + return false; + } +} + const MIME: Record = { ".html": "text/html; charset=utf-8", ".js": "text/javascript", @@ -93,11 +117,21 @@ export async function startWebServer(opts: WebServerOptions = {}): Promise { void (async () => { + // Never allow the local UI to be framed by another site (clickjacking + the `?run=all` + // deep-link auto-executing inside a hidden iframe). Set before any writeHead so it merges + // into every response. + res.setHeader("X-Frame-Options", "DENY"); + res.setHeader("Content-Security-Policy", "frame-ancestors 'none'"); if (!hostAllowed(req.headers.host, host)) { res.writeHead(403, { "content-type": "text/plain" }); res.end("Forbidden: unexpected Host header"); return; } + if (!originAllowed(req.headers.origin, req.headers.host, host)) { + res.writeHead(403, { "content-type": "text/plain" }); + res.end("Forbidden: cross-origin request refused"); + return; + } // A malformed request URL (e.g. a bad %-escape) makes `new URL`/decodeURIComponent // throw; without this guard the throw becomes an unhandledRejection that hangs the // socket and crashes the process. Answer with 400 instead. diff --git a/packages/web/src/App.tsx b/packages/web/src/App.tsx index 8d6e287..3ce49d8 100644 --- a/packages/web/src/App.tsx +++ b/packages/web/src/App.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect, useMemo, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { coverage as apiCoverage, drift as apiDrift, @@ -201,6 +201,8 @@ export function App() { return (
+ {/* The visible brand is decorative styling; this is the document's real top-level heading. */} +

TruSpec — local-first API client

◢◤ @@ -267,7 +269,7 @@ export function App() { spec {state?.specs.length ?? 0}
- setSpec(e.target.value)}> {state?.specs.map((s) => (