From 42ba63b6d889b179f2fd8eb59657a8d44f82202f Mon Sep 17 00:00:00 2001 From: Mathis Pinsault Date: Fri, 24 Apr 2026 12:21:33 +0200 Subject: [PATCH 1/2] feat(cli): add --cwd flag Run hashup as if invoked from the given directory. Changes where hashup.json is discovered, where relative entry/extras paths resolve, and where --out writes. Defaults to process.cwd(). Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/cli-cwd-flag.md | 13 +++++++++++++ docs/guide/cli.md | 3 +++ src/cli/main.ts | 17 ++++++++++++----- src/cli/parse-args.ts | 3 +++ src/cli/usage.ts | 1 + tests/cli/parse-args.test.ts | 5 +++++ 6 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 .changeset/cli-cwd-flag.md diff --git a/.changeset/cli-cwd-flag.md b/.changeset/cli-cwd-flag.md new file mode 100644 index 0000000..e213d25 --- /dev/null +++ b/.changeset/cli-cwd-flag.md @@ -0,0 +1,13 @@ +--- +"@maastrich/hashup": minor +--- + +Add a `--cwd ` CLI flag so you can run `hashup` from elsewhere +without `cd`-ing into the project. Changes where `hashup.json` is +discovered, where relative entry/extras paths resolve, and where the +`--out` target is written. Defaults to `process.cwd()`. + +```bash +hashup --cwd ./packages/app +hashup --cwd ./packages/app src/index.ts -o ../dist/app.hash +``` diff --git a/docs/guide/cli.md b/docs/guide/cli.md index fdebc6f..ac4a2c1 100644 --- a/docs/guide/cli.md +++ b/docs/guide/cli.md @@ -24,6 +24,9 @@ hashup src/index.ts Prints the hash of `src/index.ts` and its transitive import graph. Flags: - `-e, --extra ` — include an additional file in the hash (repeatable) +- `--cwd ` — run as if invoked from this directory. Changes where + `hashup.json` is discovered and where relative paths resolve. Defaults + to `process.cwd()`. - `-b, --base-dir ` — base directory for resolution (default: cwd) - `--json` — emit `{ "hash": "…" }` instead of plain text - `--files` — include the resolved file list in the JSON output diff --git a/src/cli/main.ts b/src/cli/main.ts index 0aaf605..9ae6f67 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -1,3 +1,4 @@ +import { resolve } from "node:path"; import { configJsonSchema } from "../config/json-schema.js"; import { die } from "./die.js"; import { parseCliArgs } from "./parse-args.js"; @@ -19,8 +20,14 @@ export async function main(argv: string[]): Promise { return; } + // --cwd is resolved against the real process.cwd() so that relative + // values on the command line behave predictably. Everything else + // (config path, baseDir, output path) resolves against this effective + // cwd, letting a single `--cwd ./packages/app` move the whole run. + const cwd = args.cwd !== undefined ? resolve(process.cwd(), args.cwd) : process.cwd(); + if (args.printSchema) { - await writeOutput(process.cwd(), args.out, `${JSON.stringify(configJsonSchema, null, 2)}\n`); + await writeOutput(cwd, args.out, `${JSON.stringify(configJsonSchema, null, 2)}\n`); return; } @@ -30,7 +37,7 @@ export async function main(argv: string[]): Promise { if (args.positionals.length === 1) { const output = await runSingleFileMode({ - cwd: process.cwd(), + cwd, file: args.positionals[0]!, extras: args.extras, baseDirOverride: args.baseDir, @@ -38,12 +45,12 @@ export async function main(argv: string[]): Promise { files: args.files, logLevel: args.logLevel, }); - await writeOutput(process.cwd(), args.out, output); + await writeOutput(cwd, args.out, output); return; } const result = await runConfigMode({ - cwd: process.cwd(), + cwd, configPath: args.config, baseDirOverride: args.baseDir, json: args.json, @@ -53,5 +60,5 @@ export async function main(argv: string[]): Promise { if (!result.ok) { die(result.error); } - await writeOutput(process.cwd(), args.out, result.output); + await writeOutput(cwd, args.out, result.output); } diff --git a/src/cli/parse-args.ts b/src/cli/parse-args.ts index 35133dc..8754af8 100644 --- a/src/cli/parse-args.ts +++ b/src/cli/parse-args.ts @@ -5,6 +5,7 @@ export interface CliArgs { config: string | undefined; extras: string[]; baseDir: string | undefined; + cwd: string | undefined; json: boolean; files: boolean; help: boolean; @@ -28,6 +29,7 @@ export function parseCliArgs(argv: string[]): CliArgs { "print-schema": { type: "boolean", default: false }, out: { type: "string", short: "o" }, "log-level": { type: "string", short: "l" }, + cwd: { type: "string" }, }, }); @@ -42,6 +44,7 @@ export function parseCliArgs(argv: string[]): CliArgs { config: values.config as string | undefined, extras: (values.extra as string[] | undefined) ?? [], baseDir: values["base-dir"] as string | undefined, + cwd: values.cwd as string | undefined, json: values.json === true, files: values.files === true, help: values.help === true, diff --git a/src/cli/usage.ts b/src/cli/usage.ts index 7643c4e..3f90c37 100644 --- a/src/cli/usage.ts +++ b/src/cli/usage.ts @@ -5,6 +5,7 @@ export const USAGE = `Usage: Options: -c, --config Path to config file (default: hashup.json) -e, --extra Extra file to include (repeatable, single-file mode) + --cwd Run as if from this directory (default: process.cwd()) -b, --base-dir Base directory for resolution (default: cwd) --json Output JSON instead of plain text --files Include resolved file list in JSON output diff --git a/tests/cli/parse-args.test.ts b/tests/cli/parse-args.test.ts index 24baae0..3200048 100644 --- a/tests/cli/parse-args.test.ts +++ b/tests/cli/parse-args.test.ts @@ -72,4 +72,9 @@ describe("parseCliArgs", () => { test("rejects invalid --log-level", () => { expect(() => parseCliArgs(["--log-level", "trace"])).toThrow(/Invalid --log-level/); }); + + test("parses --cwd", () => { + expect(parseCliArgs([]).cwd).toBeUndefined(); + expect(parseCliArgs(["--cwd", "./packages/app"]).cwd).toBe("./packages/app"); + }); }); From 37ba058722a2106e8cf0a596a67ef91a5daf0ba6 Mon Sep 17 00:00:00 2001 From: Mathis Pinsault Date: Fri, 24 Apr 2026 12:37:53 +0200 Subject: [PATCH 2/2] perf: make the hash cache linear in unique files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the per-file flattened transitive hash list. Store only each file's own content hash plus its direct deps; reconstruct the transitive contribution at combine time by walking cache.deps. Final digest is now sha256(each reachable file's hash, sorted by path), so every unique file contributes exactly once regardless of how many import paths reach it. Measured on a real monorepo config (81-entry glob against shared UI code): peak RSS 9.3 GB → 125 MB, wall time ~3 min → 1.1 s. Hash output changes. Any stored 0.6.x hashes must be re-baselined. Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/cli-cwd-flag.md | 13 -------- .changeset/linear-cache-and-cwd.md | 36 ++++++++++++++++++++++ docs/api/hashup.md | 2 ++ docs/api/utilities.md | 49 ++++++++++++++++++------------ docs/guide/how-it-works.md | 15 ++++----- src/lib/cache.ts | 14 +++++---- src/lib/hash-file.ts | 41 +++++++++++++------------ src/lib/hashup.ts | 31 ++++++++++--------- tests/circular.test.ts | 5 ++- tests/examples.test.ts | 2 +- tests/shared-cache.test.ts | 2 +- 11 files changed, 126 insertions(+), 84 deletions(-) delete mode 100644 .changeset/cli-cwd-flag.md create mode 100644 .changeset/linear-cache-and-cwd.md diff --git a/.changeset/cli-cwd-flag.md b/.changeset/cli-cwd-flag.md deleted file mode 100644 index e213d25..0000000 --- a/.changeset/cli-cwd-flag.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -"@maastrich/hashup": minor ---- - -Add a `--cwd ` CLI flag so you can run `hashup` from elsewhere -without `cd`-ing into the project. Changes where `hashup.json` is -discovered, where relative entry/extras paths resolve, and where the -`--out` target is written. Defaults to `process.cwd()`. - -```bash -hashup --cwd ./packages/app -hashup --cwd ./packages/app src/index.ts -o ../dist/app.hash -``` diff --git a/.changeset/linear-cache-and-cwd.md b/.changeset/linear-cache-and-cwd.md new file mode 100644 index 0000000..49e20ef --- /dev/null +++ b/.changeset/linear-cache-and-cwd.md @@ -0,0 +1,36 @@ +--- +"@maastrich/hashup": minor +--- + +Linear-memory cache + `--cwd` CLI flag. + +**Linear-memory cache.** `HashupCache.hashes` now stores each file's own +sha256 content hash (one 64-char string) instead of the flattened +transitive hash list. The transitive contribution is reconstructed at +combine time by walking `cache.deps`. Memory drops from +O(files × avg closure) to O(unique files) — on a real-world run that +previously needed 9 GB of heap, peak RSS is now ~125 MB and wall time +drops from minutes to ~1 s. + +**Hash output changes.** The final digest is now +`sha256(concat of each reachable file's content hash, sorted by path)`. +Each unique file contributes exactly once regardless of how many import +paths reach it. Any stored 0.6.x hashes must be re-baselined. As a +welcome side effect: cycles now hash the same regardless of which +member was the entry point. + +**`--cwd ` CLI flag.** Run `hashup` as if invoked from the given +directory. Changes where `hashup.json` is discovered, where relative +entry/extras paths resolve, and where `--out` writes. Defaults to +`process.cwd()`. + +```bash +hashup --cwd ./packages/app +hashup --cwd ./packages/app src/index.ts -o ../dist/app.hash +``` + +**Targeted break for direct `hashFile` callers.** Return type is now +`Promise` (the file's own hash, or `null` on failure) +instead of `Promise`. Callers should use `collectReachable` +to enumerate the transitive set and read each file's hash from +`cache.hashes` at combine time. `hashup()` itself is unchanged. diff --git a/docs/api/hashup.md b/docs/api/hashup.md index 8461ee7..ef39c1a 100644 --- a/docs/api/hashup.md +++ b/docs/api/hashup.md @@ -6,6 +6,8 @@ function hashup(entryFile: string, options?: HashupOptions): Promise; + hashes: Map; deps: Map; } @@ -55,12 +55,14 @@ function collectReachable(roots: readonly string[], cache: HashupCache): string[ ``` An in-memory cache scoped to one consumer's lifetime — not persisted, -not shared across processes. `hashes` stores each file's flattened -hash list; `deps` stores each file's direct resolved dependency paths. -Pass the same `HashupCache` to multiple `hashup()` or `hashFile()` calls -to dedupe work. `collectReachable` walks `deps` iteratively to rebuild -a per-call file list (used internally by `hashup()` to produce -`result.files`). +not shared across processes. `hashes` stores each file's own content +hash (one 64-char sha256 string per file); `deps` stores each file's +direct resolved dependency paths. Memory is linear in the number of +unique files. Pass the same `HashupCache` to multiple `hashup()` or +`hashFile()` calls to dedupe work. `collectReachable` walks `deps` +iteratively (no recursion) to enumerate the transitive closure — used +internally by `hashup()` to produce `result.files` and to fold each +file's content hash into the final digest. ## hashFile @@ -70,15 +72,18 @@ function hashFile( cache: HashupCache, resolver: Resolver, logger?: Logger, -): Promise; +): Promise; ``` -Hashes a file and all its transitive static imports. Results are memoized in -`cache` — pass the same `HashupCache` across multiple calls to dedupe work. -On error (file read or parse failure) the failure is sent through -`logger.warn` and an empty array is returned. `logger` defaults to a silent -logger; build one with [`createLogger`](#createlogger) when you want -diagnostics on stderr. +Hashes a file and recursively populates `cache.hashes` and `cache.deps` +for every non-`node_modules` transitive import. Returns the file's own +content hash on success, or `null` if the file could not be read or +parsed. The transitive contribution is reconstructed at combine time by +walking `cache.deps` — `hashFile` never returns the flattened list. +Results are memoized in `cache` — pass the same `HashupCache` across +multiple calls to dedupe work. `logger` defaults to a silent logger; +build one with [`createLogger`](#createlogger) when you want diagnostics +on stderr. Imports that resolve into `node_modules` are treated as opaque: the resolved path is skipped, its files are never read, and the dependency's own imports @@ -132,17 +137,23 @@ and hashing the result. Order-sensitive — pass hashes in a stable order. ## Composing Your Own Pipeline ```ts -import { combineHashes, createHashupCache, createResolver, hashFile } from "@maastrich/hashup"; +import { + collectReachable, + combineHashes, + createHashupCache, + createResolver, + hashFile, +} from "@maastrich/hashup"; const resolver = createResolver(); const cache = createHashupCache(); const entries = ["./src/a.ts", "./src/b.ts"]; -const allHashes: string[] = []; - for (const entry of entries) { - allHashes.push(...(await hashFile(entry, cache, resolver))); + await hashFile(entry, cache, resolver); } -const combined = combineHashes(allHashes); +const files = collectReachable(entries, cache).sort(); +const selfHashes = files.map((f) => cache.hashes.get(f)).filter((h) => h !== undefined); +const combined = combineHashes(selfHashes); ``` diff --git a/docs/guide/how-it-works.md b/docs/guide/how-it-works.md index b355e99..ed379be 100644 --- a/docs/guide/how-it-works.md +++ b/docs/guide/how-it-works.md @@ -11,8 +11,10 @@ conditional exports, and extension resolution. 3. **Hash each file's content** (SHA-256). Results are cached per absolute path so a file reachable through multiple paths is hashed once. -4. **Combine all hashes** — the entry's graph plus any `extras` — into a single - deterministic SHA-256 digest. +4. **Combine the unique file hashes**, in sorted-path order, into a single + SHA-256 digest. Every file in the transitive closure contributes exactly + once, regardless of how many import paths reach it — memory stays linear + in the number of unique files, independent of graph width or diamond count. ## Determinism @@ -60,8 +62,7 @@ top-level `"logLevel"` field. The CLI flag wins when both are set. ## Caveats -- **Circular imports** terminate deterministically, but the exact hash of a - cycle depends on which member was the entry point — the cache is seeded - with the entry's content hash first, so cycle re-visits return that - placeholder. Entering the same cycle from a different file produces a - different (still deterministic) hash. +- **Circular imports** terminate deterministically. The cache is seeded with + the file's own content hash before recursing, and each unique file + contributes exactly once to the final digest, so entering the same + cycle from any of its members produces the same hash. diff --git a/src/lib/cache.ts b/src/lib/cache.ts index 98d71c0..5bf2b54 100644 --- a/src/lib/cache.ts +++ b/src/lib/cache.ts @@ -7,14 +7,16 @@ * computation (the file's content hash is recomputed at most once). * * Two parallel maps keyed by absolute file path: - * - `hashes`: the flattened hash list (self + transitive deps). - * Returned directly to callers and combined into the final digest. - * - `deps`: the file's direct resolved dependency paths. Used by - * `collectReachable` to rebuild the per-call file list without - * re-walking the graph. + * - `hashes`: the file's own content hash (sha256 of its bytes). + * One 64-char string per file — not a flattened transitive list, + * because that was O(files × avg closure) and blew out the heap + * on large monorepos. See `hashup()` for how the transitive + * contribution is reconstructed at combine time. + * - `deps`: the file's direct resolved dependency paths. Walked by + * `collectReachable` to enumerate the transitive closure. */ export interface HashupCache { - hashes: Map; + hashes: Map; deps: Map; } diff --git a/src/lib/hash-file.ts b/src/lib/hash-file.ts index 71b0de0..efc1ad1 100644 --- a/src/lib/hash-file.ts +++ b/src/lib/hash-file.ts @@ -4,54 +4,58 @@ import { createContentHash } from "./create-content-hash.js"; import { extractImports } from "./extract-imports.js"; import { isInNodeModules } from "./is-in-node-modules.js"; import { createLogger, type Logger } from "./logger.js"; -import { pushAll } from "./push-all.js"; import { readFileContent } from "./read-file-content.js"; import { resolveImport } from "./resolve-import.js"; +/** + * Ensure `file` and every file reachable from it are present in the + * cache. Returns the file's own content hash (sha256 hex) on success, + * or `null` if the file could not be read or parsed — in which case + * callers should skip it. The transitive contribution is reconstructed + * at combine time by walking `cache.deps`. + * + * Terminates deterministically on circular imports: the cache entry is + * seeded with the self hash before recursing, so a cycle A → B → A + * short-circuits on the revisit. + */ export async function hashFile( file: string, cache: HashupCache, resolver: Resolver, logger: Logger = createLogger("silent"), -): Promise { +): Promise { const cached = cache.hashes.get(file); - if (cached) { + if (cached !== undefined) { return cached; } try { const content = await readFileContent(file); - const hashes = [createContentHash(content)]; + const selfHash = createContentHash(content); const deps: string[] = []; - // Seed both caches before recursing so circular imports terminate: - // on a cycle A → B → A, the revisit of A hits `cache.hashes` and - // returns the placeholder instead of walking forever. - cache.hashes.set(file, hashes); + cache.hashes.set(file, selfHash); cache.deps.set(file, deps); const imports = await extractImports(file, content); - const dependencyHashes = await hashDependencies(imports, file, cache, resolver, logger, deps); - pushAll(hashes, dependencyHashes); + await walkDependencies(imports, file, cache, resolver, logger, deps); - return hashes; + return selfHash; } catch (error) { logger.warn(`Failed to hash file ${file}:`, error); cache.hashes.delete(file); cache.deps.delete(file); - return []; + return null; } } -async function hashDependencies( +async function walkDependencies( imports: string[], sourceFile: string, cache: HashupCache, resolver: Resolver, logger: Logger, deps: string[], -): Promise { - const hashes: string[] = []; - +): Promise { for (const imported of imports) { const resolved = await resolveImport(resolver, sourceFile, imported); if (!resolved) continue; @@ -65,9 +69,6 @@ async function hashDependencies( continue; } deps.push(resolved); - const resolvedHashes = await hashFile(resolved, cache, resolver, logger); - pushAll(hashes, resolvedHashes); + await hashFile(resolved, cache, resolver, logger); } - - return hashes; } diff --git a/src/lib/hashup.ts b/src/lib/hashup.ts index 47f03ff..05b7aa0 100644 --- a/src/lib/hashup.ts +++ b/src/lib/hashup.ts @@ -5,7 +5,6 @@ import { combineHashes } from "./combine-hashes.js"; import { createResolver } from "./create-resolver.js"; import { hashFile } from "./hash-file.js"; import { createLogger, type LogLevel } from "./logger.js"; -import { pushAll } from "./push-all.js"; export interface HashupOptions { /** @@ -71,6 +70,11 @@ export interface HashupResult { * treated as opaque and skipped — add a lockfile to `extras` if you * want install-tree changes reflected in the hash. * + * The hash is `sha256` over the concatenation of each reachable file's + * own content hash, in sorted-path order. Each file contributes exactly + * once regardless of how many import paths reach it, which keeps memory + * usage linear in the number of unique files. + * * @param entryFile - The entry file to hash * @param options - Optional configuration * @returns The deterministic hash and list of included files @@ -115,26 +119,25 @@ export async function hashup( const logger = createLogger(logLevel); const resolvedEntry = resolve(baseDir, entryFile); - const entryHashes = await hashFile(resolvedEntry, cache, resolver, logger); + await hashFile(resolvedEntry, cache, resolver, logger); - const extraHashes: string[] = []; const resolvedExtras: string[] = []; for (const extraFile of extras) { const resolvedExtra = resolve(baseDir, extraFile); resolvedExtras.push(resolvedExtra); - const hashes = await hashFile(resolvedExtra, cache, resolver, logger); - pushAll(extraHashes, hashes); + await hashFile(resolvedExtra, cache, resolver, logger); } - const combined: string[] = []; - pushAll(combined, entryHashes); - pushAll(combined, extraHashes); - const finalHash = combineHashes(combined); + // Reconstruct the transitive contribution by walking `cache.deps` + // from this call's roots. Each file contributes exactly once; sort + // by path so the combined hash is independent of traversal order. + const files = collectReachable([resolvedEntry, ...resolvedExtras], cache).sort(); - // `files` is the transitive closure of this call's roots — entry + - // extras — regardless of whether individual files were already in - // the shared cache. Walks the `deps` map, which is cheap. - const files = collectReachable([resolvedEntry, ...resolvedExtras], cache); + const selfHashes: string[] = []; + for (let i = 0; i < files.length; i++) { + const h = cache.hashes.get(files[i] as string); + if (h !== undefined) selfHashes.push(h); + } - return { hash: finalHash, files }; + return { hash: combineHashes(selfHashes), files }; } diff --git a/tests/circular.test.ts b/tests/circular.test.ts index 580d265..35a48a7 100644 --- a/tests/circular.test.ts +++ b/tests/circular.test.ts @@ -18,11 +18,10 @@ describe("hashup with circular imports", () => { expect(r1.files).toEqual(r2.files); }); - test("should produce the same hash regardless of which cycle member is the entry", async () => { + test("produces the same hash from either cycle member", async () => { const fromA = await hashup("./tests/fixtures/circular/a.ts"); const fromB = await hashup("./tests/fixtures/circular/b.ts"); - expect(fromA.hash).toMatch(/^[a-f0-9]{64}$/); - expect(fromB.hash).toMatch(/^[a-f0-9]{64}$/); + expect(fromA.hash).toBe(fromB.hash); }); }); diff --git a/tests/examples.test.ts b/tests/examples.test.ts index b2df23a..7c7b464 100644 --- a/tests/examples.test.ts +++ b/tests/examples.test.ts @@ -105,7 +105,7 @@ describe("hashup with example files", () => { const result = await hashup("./examples/src/index.ts"); expect(result.hash).toMatchInlineSnapshot( - `"48adf62a70c2645d0fc15ee3060973245af5dc30a542372791a7e1f05eaeacf6"`, + `"ed1c4758b6b759306f2b44feee0bbc2d06291ae490d97367043ab188ce670770"`, ); }); }); diff --git a/tests/shared-cache.test.ts b/tests/shared-cache.test.ts index a1211cd..fd3b6d1 100644 --- a/tests/shared-cache.test.ts +++ b/tests/shared-cache.test.ts @@ -85,7 +85,7 @@ describe("collectReachable", () => { const cache = createHashupCache(); const N = 50_000; for (let i = 0; i < N; i++) { - cache.hashes.set(`/f${i}`, ["x"]); + cache.hashes.set(`/f${i}`, "x"); cache.deps.set(`/f${i}`, i + 1 < N ? [`/f${i + 1}`] : []); } const files = collectReachable(["/f0"], cache);