diff --git a/.changeset/linear-cache-and-cwd.md b/.changeset/linear-cache-and-cwd.md
new file mode 100644
index 0000000..49e20ef
--- /dev/null
+++ b/.changeset/linear-cache-and-cwd.md
@@ -0,0 +1,36 @@
+---
+"@maastrich/hashup": minor
+---
+
+Linear-memory cache + `--cwd` CLI flag.
+
+**Linear-memory cache.** `HashupCache.hashes` now stores each file's own
+sha256 content hash (one 64-char string) instead of the flattened
+transitive hash list. The transitive contribution is reconstructed at
+combine time by walking `cache.deps`. Memory drops from
+O(files × avg closure) to O(unique files) — on a real-world run that
+previously needed 9 GB of heap, peak RSS is now ~125 MB and wall time
+drops from minutes to ~1 s.
+
+**Hash output changes.** The final digest is now
+`sha256(concat of each reachable file's content hash, sorted by path)`.
+Each unique file contributes exactly once regardless of how many import
+paths reach it. Any stored 0.6.x hashes must be re-baselined. As a
+welcome side effect: cycles now hash the same regardless of which
+member was the entry point.
+
+**`--cwd
` CLI flag.** Run `hashup` as if invoked from the given
+directory. Changes where `hashup.json` is discovered, where relative
+entry/extras paths resolve, and where `--out` writes. Defaults to
+`process.cwd()`.
+
+```bash
+hashup --cwd ./packages/app
+hashup --cwd ./packages/app src/index.ts -o ../dist/app.hash
+```
+
+**Targeted break for direct `hashFile` callers.** Return type is now
+`Promise` (the file's own hash, or `null` on failure)
+instead of `Promise`. Callers should use `collectReachable`
+to enumerate the transitive set and read each file's hash from
+`cache.hashes` at combine time. `hashup()` itself is unchanged.
diff --git a/docs/api/hashup.md b/docs/api/hashup.md
index 8461ee7..ef39c1a 100644
--- a/docs/api/hashup.md
+++ b/docs/api/hashup.md
@@ -6,6 +6,8 @@ function hashup(entryFile: string, options?: HashupOptions): Promise;
+ hashes: Map;
deps: Map;
}
@@ -55,12 +55,14 @@ function collectReachable(roots: readonly string[], cache: HashupCache): string[
```
An in-memory cache scoped to one consumer's lifetime — not persisted,
-not shared across processes. `hashes` stores each file's flattened
-hash list; `deps` stores each file's direct resolved dependency paths.
-Pass the same `HashupCache` to multiple `hashup()` or `hashFile()` calls
-to dedupe work. `collectReachable` walks `deps` iteratively to rebuild
-a per-call file list (used internally by `hashup()` to produce
-`result.files`).
+not shared across processes. `hashes` stores each file's own content
+hash (one 64-char sha256 string per file); `deps` stores each file's
+direct resolved dependency paths. Memory is linear in the number of
+unique files. Pass the same `HashupCache` to multiple `hashup()` or
+`hashFile()` calls to dedupe work. `collectReachable` walks `deps`
+iteratively (no recursion) to enumerate the transitive closure — used
+internally by `hashup()` to produce `result.files` and to fold each
+file's content hash into the final digest.
## hashFile
@@ -70,15 +72,18 @@ function hashFile(
cache: HashupCache,
resolver: Resolver,
logger?: Logger,
-): Promise;
+): Promise;
```
-Hashes a file and all its transitive static imports. Results are memoized in
-`cache` — pass the same `HashupCache` across multiple calls to dedupe work.
-On error (file read or parse failure) the failure is sent through
-`logger.warn` and an empty array is returned. `logger` defaults to a silent
-logger; build one with [`createLogger`](#createlogger) when you want
-diagnostics on stderr.
+Hashes a file and recursively populates `cache.hashes` and `cache.deps`
+for every non-`node_modules` transitive import. Returns the file's own
+content hash on success, or `null` if the file could not be read or
+parsed. The transitive contribution is reconstructed at combine time by
+walking `cache.deps` — `hashFile` never returns the flattened list.
+Results are memoized in `cache` — pass the same `HashupCache` across
+multiple calls to dedupe work. `logger` defaults to a silent logger;
+build one with [`createLogger`](#createlogger) when you want diagnostics
+on stderr.
Imports that resolve into `node_modules` are treated as opaque: the resolved
path is skipped, its files are never read, and the dependency's own imports
@@ -132,17 +137,23 @@ and hashing the result. Order-sensitive — pass hashes in a stable order.
## Composing Your Own Pipeline
```ts
-import { combineHashes, createHashupCache, createResolver, hashFile } from "@maastrich/hashup";
+import {
+ collectReachable,
+ combineHashes,
+ createHashupCache,
+ createResolver,
+ hashFile,
+} from "@maastrich/hashup";
const resolver = createResolver();
const cache = createHashupCache();
const entries = ["./src/a.ts", "./src/b.ts"];
-const allHashes: string[] = [];
-
for (const entry of entries) {
- allHashes.push(...(await hashFile(entry, cache, resolver)));
+ await hashFile(entry, cache, resolver);
}
-const combined = combineHashes(allHashes);
+const files = collectReachable(entries, cache).sort();
+const selfHashes = files.map((f) => cache.hashes.get(f)).filter((h) => h !== undefined);
+const combined = combineHashes(selfHashes);
```
diff --git a/docs/guide/cli.md b/docs/guide/cli.md
index fdebc6f..ac4a2c1 100644
--- a/docs/guide/cli.md
+++ b/docs/guide/cli.md
@@ -24,6 +24,9 @@ hashup src/index.ts
Prints the hash of `src/index.ts` and its transitive import graph. Flags:
- `-e, --extra ` — include an additional file in the hash (repeatable)
+- `--cwd ` — run as if invoked from this directory. Changes where
+ `hashup.json` is discovered and where relative paths resolve. Defaults
+ to `process.cwd()`.
- `-b, --base-dir ` — base directory for resolution (default: cwd)
- `--json` — emit `{ "hash": "…" }` instead of plain text
- `--files` — include the resolved file list in the JSON output
diff --git a/docs/guide/how-it-works.md b/docs/guide/how-it-works.md
index b355e99..ed379be 100644
--- a/docs/guide/how-it-works.md
+++ b/docs/guide/how-it-works.md
@@ -11,8 +11,10 @@
conditional exports, and extension resolution.
3. **Hash each file's content** (SHA-256). Results are cached per absolute path
so a file reachable through multiple paths is hashed once.
-4. **Combine all hashes** — the entry's graph plus any `extras` — into a single
- deterministic SHA-256 digest.
+4. **Combine the unique file hashes**, in sorted-path order, into a single
+ SHA-256 digest. Every file in the transitive closure contributes exactly
+ once, regardless of how many import paths reach it — memory stays linear
+ in the number of unique files, independent of graph width or diamond count.
## Determinism
@@ -60,8 +62,7 @@ top-level `"logLevel"` field. The CLI flag wins when both are set.
## Caveats
-- **Circular imports** terminate deterministically, but the exact hash of a
- cycle depends on which member was the entry point — the cache is seeded
- with the entry's content hash first, so cycle re-visits return that
- placeholder. Entering the same cycle from a different file produces a
- different (still deterministic) hash.
+- **Circular imports** terminate deterministically. The cache is seeded with
+ the file's own content hash before recursing, and each unique file
+ contributes exactly once to the final digest, so entering the same
+ cycle from any of its members produces the same hash.
diff --git a/src/cli/main.ts b/src/cli/main.ts
index 0aaf605..9ae6f67 100644
--- a/src/cli/main.ts
+++ b/src/cli/main.ts
@@ -1,3 +1,4 @@
+import { resolve } from "node:path";
import { configJsonSchema } from "../config/json-schema.js";
import { die } from "./die.js";
import { parseCliArgs } from "./parse-args.js";
@@ -19,8 +20,14 @@ export async function main(argv: string[]): Promise {
return;
}
+ // --cwd is resolved against the real process.cwd() so that relative
+ // values on the command line behave predictably. Everything else
+ // (config path, baseDir, output path) resolves against this effective
+ // cwd, letting a single `--cwd ./packages/app` move the whole run.
+ const cwd = args.cwd !== undefined ? resolve(process.cwd(), args.cwd) : process.cwd();
+
if (args.printSchema) {
- await writeOutput(process.cwd(), args.out, `${JSON.stringify(configJsonSchema, null, 2)}\n`);
+ await writeOutput(cwd, args.out, `${JSON.stringify(configJsonSchema, null, 2)}\n`);
return;
}
@@ -30,7 +37,7 @@ export async function main(argv: string[]): Promise {
if (args.positionals.length === 1) {
const output = await runSingleFileMode({
- cwd: process.cwd(),
+ cwd,
file: args.positionals[0]!,
extras: args.extras,
baseDirOverride: args.baseDir,
@@ -38,12 +45,12 @@ export async function main(argv: string[]): Promise {
files: args.files,
logLevel: args.logLevel,
});
- await writeOutput(process.cwd(), args.out, output);
+ await writeOutput(cwd, args.out, output);
return;
}
const result = await runConfigMode({
- cwd: process.cwd(),
+ cwd,
configPath: args.config,
baseDirOverride: args.baseDir,
json: args.json,
@@ -53,5 +60,5 @@ export async function main(argv: string[]): Promise {
if (!result.ok) {
die(result.error);
}
- await writeOutput(process.cwd(), args.out, result.output);
+ await writeOutput(cwd, args.out, result.output);
}
diff --git a/src/cli/parse-args.ts b/src/cli/parse-args.ts
index 35133dc..8754af8 100644
--- a/src/cli/parse-args.ts
+++ b/src/cli/parse-args.ts
@@ -5,6 +5,7 @@ export interface CliArgs {
config: string | undefined;
extras: string[];
baseDir: string | undefined;
+ cwd: string | undefined;
json: boolean;
files: boolean;
help: boolean;
@@ -28,6 +29,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
"print-schema": { type: "boolean", default: false },
out: { type: "string", short: "o" },
"log-level": { type: "string", short: "l" },
+ cwd: { type: "string" },
},
});
@@ -42,6 +44,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
config: values.config as string | undefined,
extras: (values.extra as string[] | undefined) ?? [],
baseDir: values["base-dir"] as string | undefined,
+ cwd: values.cwd as string | undefined,
json: values.json === true,
files: values.files === true,
help: values.help === true,
diff --git a/src/cli/usage.ts b/src/cli/usage.ts
index 7643c4e..3f90c37 100644
--- a/src/cli/usage.ts
+++ b/src/cli/usage.ts
@@ -5,6 +5,7 @@ export const USAGE = `Usage:
Options:
-c, --config Path to config file (default: hashup.json)
-e, --extra Extra file to include (repeatable, single-file mode)
+ --cwd Run as if from this directory (default: process.cwd())
-b, --base-dir Base directory for resolution (default: cwd)
--json Output JSON instead of plain text
--files Include resolved file list in JSON output
diff --git a/src/lib/cache.ts b/src/lib/cache.ts
index 98d71c0..5bf2b54 100644
--- a/src/lib/cache.ts
+++ b/src/lib/cache.ts
@@ -7,14 +7,16 @@
* computation (the file's content hash is recomputed at most once).
*
* Two parallel maps keyed by absolute file path:
- * - `hashes`: the flattened hash list (self + transitive deps).
- * Returned directly to callers and combined into the final digest.
- * - `deps`: the file's direct resolved dependency paths. Used by
- * `collectReachable` to rebuild the per-call file list without
- * re-walking the graph.
+ * - `hashes`: the file's own content hash (sha256 of its bytes).
+ * One 64-char string per file — not a flattened transitive list,
+ * because that was O(files × avg closure) and blew out the heap
+ * on large monorepos. See `hashup()` for how the transitive
+ * contribution is reconstructed at combine time.
+ * - `deps`: the file's direct resolved dependency paths. Walked by
+ * `collectReachable` to enumerate the transitive closure.
*/
export interface HashupCache {
- hashes: Map;
+ hashes: Map;
deps: Map;
}
diff --git a/src/lib/hash-file.ts b/src/lib/hash-file.ts
index 71b0de0..efc1ad1 100644
--- a/src/lib/hash-file.ts
+++ b/src/lib/hash-file.ts
@@ -4,54 +4,58 @@ import { createContentHash } from "./create-content-hash.js";
import { extractImports } from "./extract-imports.js";
import { isInNodeModules } from "./is-in-node-modules.js";
import { createLogger, type Logger } from "./logger.js";
-import { pushAll } from "./push-all.js";
import { readFileContent } from "./read-file-content.js";
import { resolveImport } from "./resolve-import.js";
+/**
+ * Ensure `file` and every file reachable from it are present in the
+ * cache. Returns the file's own content hash (sha256 hex) on success,
+ * or `null` if the file could not be read or parsed — in which case
+ * callers should skip it. The transitive contribution is reconstructed
+ * at combine time by walking `cache.deps`.
+ *
+ * Terminates deterministically on circular imports: the cache entry is
+ * seeded with the self hash before recursing, so a cycle A → B → A
+ * short-circuits on the revisit.
+ */
export async function hashFile(
file: string,
cache: HashupCache,
resolver: Resolver,
logger: Logger = createLogger("silent"),
-): Promise {
+): Promise {
const cached = cache.hashes.get(file);
- if (cached) {
+ if (cached !== undefined) {
return cached;
}
try {
const content = await readFileContent(file);
- const hashes = [createContentHash(content)];
+ const selfHash = createContentHash(content);
const deps: string[] = [];
- // Seed both caches before recursing so circular imports terminate:
- // on a cycle A → B → A, the revisit of A hits `cache.hashes` and
- // returns the placeholder instead of walking forever.
- cache.hashes.set(file, hashes);
+ cache.hashes.set(file, selfHash);
cache.deps.set(file, deps);
const imports = await extractImports(file, content);
- const dependencyHashes = await hashDependencies(imports, file, cache, resolver, logger, deps);
- pushAll(hashes, dependencyHashes);
+ await walkDependencies(imports, file, cache, resolver, logger, deps);
- return hashes;
+ return selfHash;
} catch (error) {
logger.warn(`Failed to hash file ${file}:`, error);
cache.hashes.delete(file);
cache.deps.delete(file);
- return [];
+ return null;
}
}
-async function hashDependencies(
+async function walkDependencies(
imports: string[],
sourceFile: string,
cache: HashupCache,
resolver: Resolver,
logger: Logger,
deps: string[],
-): Promise {
- const hashes: string[] = [];
-
+): Promise {
for (const imported of imports) {
const resolved = await resolveImport(resolver, sourceFile, imported);
if (!resolved) continue;
@@ -65,9 +69,6 @@ async function hashDependencies(
continue;
}
deps.push(resolved);
- const resolvedHashes = await hashFile(resolved, cache, resolver, logger);
- pushAll(hashes, resolvedHashes);
+ await hashFile(resolved, cache, resolver, logger);
}
-
- return hashes;
}
diff --git a/src/lib/hashup.ts b/src/lib/hashup.ts
index 47f03ff..05b7aa0 100644
--- a/src/lib/hashup.ts
+++ b/src/lib/hashup.ts
@@ -5,7 +5,6 @@ import { combineHashes } from "./combine-hashes.js";
import { createResolver } from "./create-resolver.js";
import { hashFile } from "./hash-file.js";
import { createLogger, type LogLevel } from "./logger.js";
-import { pushAll } from "./push-all.js";
export interface HashupOptions {
/**
@@ -71,6 +70,11 @@ export interface HashupResult {
* treated as opaque and skipped — add a lockfile to `extras` if you
* want install-tree changes reflected in the hash.
*
+ * The hash is `sha256` over the concatenation of each reachable file's
+ * own content hash, in sorted-path order. Each file contributes exactly
+ * once regardless of how many import paths reach it, which keeps memory
+ * usage linear in the number of unique files.
+ *
* @param entryFile - The entry file to hash
* @param options - Optional configuration
* @returns The deterministic hash and list of included files
@@ -115,26 +119,25 @@ export async function hashup(
const logger = createLogger(logLevel);
const resolvedEntry = resolve(baseDir, entryFile);
- const entryHashes = await hashFile(resolvedEntry, cache, resolver, logger);
+ await hashFile(resolvedEntry, cache, resolver, logger);
- const extraHashes: string[] = [];
const resolvedExtras: string[] = [];
for (const extraFile of extras) {
const resolvedExtra = resolve(baseDir, extraFile);
resolvedExtras.push(resolvedExtra);
- const hashes = await hashFile(resolvedExtra, cache, resolver, logger);
- pushAll(extraHashes, hashes);
+ await hashFile(resolvedExtra, cache, resolver, logger);
}
- const combined: string[] = [];
- pushAll(combined, entryHashes);
- pushAll(combined, extraHashes);
- const finalHash = combineHashes(combined);
+ // Reconstruct the transitive contribution by walking `cache.deps`
+ // from this call's roots. Each file contributes exactly once; sort
+ // by path so the combined hash is independent of traversal order.
+ const files = collectReachable([resolvedEntry, ...resolvedExtras], cache).sort();
- // `files` is the transitive closure of this call's roots — entry +
- // extras — regardless of whether individual files were already in
- // the shared cache. Walks the `deps` map, which is cheap.
- const files = collectReachable([resolvedEntry, ...resolvedExtras], cache);
+ const selfHashes: string[] = [];
+ for (let i = 0; i < files.length; i++) {
+ const h = cache.hashes.get(files[i] as string);
+ if (h !== undefined) selfHashes.push(h);
+ }
- return { hash: finalHash, files };
+ return { hash: combineHashes(selfHashes), files };
}
diff --git a/tests/circular.test.ts b/tests/circular.test.ts
index 580d265..35a48a7 100644
--- a/tests/circular.test.ts
+++ b/tests/circular.test.ts
@@ -18,11 +18,10 @@ describe("hashup with circular imports", () => {
expect(r1.files).toEqual(r2.files);
});
- test("should produce the same hash regardless of which cycle member is the entry", async () => {
+ test("produces the same hash from either cycle member", async () => {
const fromA = await hashup("./tests/fixtures/circular/a.ts");
const fromB = await hashup("./tests/fixtures/circular/b.ts");
- expect(fromA.hash).toMatch(/^[a-f0-9]{64}$/);
- expect(fromB.hash).toMatch(/^[a-f0-9]{64}$/);
+ expect(fromA.hash).toBe(fromB.hash);
});
});
diff --git a/tests/cli/parse-args.test.ts b/tests/cli/parse-args.test.ts
index 24baae0..3200048 100644
--- a/tests/cli/parse-args.test.ts
+++ b/tests/cli/parse-args.test.ts
@@ -72,4 +72,9 @@ describe("parseCliArgs", () => {
test("rejects invalid --log-level", () => {
expect(() => parseCliArgs(["--log-level", "trace"])).toThrow(/Invalid --log-level/);
});
+
+ test("parses --cwd", () => {
+ expect(parseCliArgs([]).cwd).toBeUndefined();
+ expect(parseCliArgs(["--cwd", "./packages/app"]).cwd).toBe("./packages/app");
+ });
});
diff --git a/tests/examples.test.ts b/tests/examples.test.ts
index b2df23a..7c7b464 100644
--- a/tests/examples.test.ts
+++ b/tests/examples.test.ts
@@ -105,7 +105,7 @@ describe("hashup with example files", () => {
const result = await hashup("./examples/src/index.ts");
expect(result.hash).toMatchInlineSnapshot(
- `"48adf62a70c2645d0fc15ee3060973245af5dc30a542372791a7e1f05eaeacf6"`,
+ `"ed1c4758b6b759306f2b44feee0bbc2d06291ae490d97367043ab188ce670770"`,
);
});
});
diff --git a/tests/shared-cache.test.ts b/tests/shared-cache.test.ts
index a1211cd..fd3b6d1 100644
--- a/tests/shared-cache.test.ts
+++ b/tests/shared-cache.test.ts
@@ -85,7 +85,7 @@ describe("collectReachable", () => {
const cache = createHashupCache();
const N = 50_000;
for (let i = 0; i < N; i++) {
- cache.hashes.set(`/f${i}`, ["x"]);
+ cache.hashes.set(`/f${i}`, "x");
cache.deps.set(`/f${i}`, i + 1 < N ? [`/f${i + 1}`] : []);
}
const files = collectReachable(["/f0"], cache);