diff --git a/.changeset/shared-cache.md b/.changeset/shared-cache.md new file mode 100644 index 0000000..9fab973 --- /dev/null +++ b/.changeset/shared-cache.md @@ -0,0 +1,22 @@ +--- +"@maastrich/hashup": minor +--- + +Share the hash cache across entries — shared utilities are now read and +hashed exactly once per `hashup` invocation instead of once per entry. + +- `HashupOptions` gains `cache?: HashupCache` and `resolver?: Resolver`. + Pass the same values across multiple `hashup()` calls to reuse work. + The CLI's config mode does this automatically: every named entry in a + single `hashup` invocation now shares one cache + one resolver. +- New exports: `createHashupCache()`, `collectReachable()`, and the + `HashupCache` type. +- Hash output is byte-identical to 0.5.0 on every existing input — the + inline snapshot in `tests/examples.test.ts` still matches. Shared cache + is a pure dedupe, not a semantic change. + +**Targeted break for direct `hashFile` callers:** the cache parameter is +now `HashupCache` (an object with `hashes` and `deps` maps) instead of +`Map`. `hashup()` itself is untouched — the new +`cache` option is additive. Callers of `hashFile` directly should swap +`new Map()` for `createHashupCache()`. diff --git a/docs/api/hashup.md b/docs/api/hashup.md index 7d8352d..8461ee7 100644 --- a/docs/api/hashup.md +++ b/docs/api/hashup.md @@ -37,6 +37,20 @@ interface HashupOptions { * @default "silent" */ logLevel?: "silent" | "warn" | "info" | "debug"; + + /** + * Optional shared cache. Pass the same cache across multiple calls + * to dedupe work — a file visited by entry A is reused by entry B. + * Create with `createHashupCache()`. + */ + cache?: HashupCache; + + /** + * Optional shared `enhanced-resolve` resolver. Pass a shared + * instance to reuse its internal filesystem cache across calls. + * Create with `createResolver()`. + */ + resolver?: Resolver; } ``` @@ -64,6 +78,22 @@ console.log(result.hash); console.log(result.files); ``` +### Sharing a cache across entries + +```ts +import { createHashupCache, createResolver, hashup } from "@maastrich/hashup"; + +const cache = createHashupCache(); +const resolver = createResolver(); + +const app = await hashup("./src/app.ts", { cache, resolver }); +const worker = await hashup("./src/worker.ts", { cache, resolver }); +// Files imported by both app and worker are read + hashed exactly once. +``` + +`result.files` still returns only the files reachable from each call's +own roots (entry + extras), not the full cache contents. + ## Notes - The `files` array contains every file that was hashed, including files reached diff --git a/docs/api/index.md b/docs/api/index.md index 1dc6b06..8062a12 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -25,6 +25,10 @@ lower-level utilities for advanced use cases. internally. Exports `type LogLevel` and `type Logger`. - [`isInNodeModules(file)`](./utilities#isinnodemodules) — predicate used by the hasher to decide whether to walk into a resolved path. +- [`createHashupCache()`](./utilities#createhashupcache) — build a + `HashupCache` to share across multiple `hashup()` or `hashFile()` calls. +- [`collectReachable(roots, cache)`](./utilities#createhashupcache) — rebuild + a per-call file list from the cache's dependency edges. ## Config (subpath export) @@ -53,6 +57,9 @@ import { createLogger, isLogLevel, isInNodeModules, + createHashupCache, + collectReachable, + type HashupCache, type Logger, type LogLevel, } from "@maastrich/hashup"; diff --git a/docs/api/utilities.md b/docs/api/utilities.md index e8bbbdf..16079ee 100644 --- a/docs/api/utilities.md +++ b/docs/api/utilities.md @@ -42,22 +42,43 @@ Parses a file's source with `es-module-lexer` and returns its static import specifiers. Type-only imports and dynamic imports with non-literal specifiers are excluded. +## createHashupCache + +```ts +interface HashupCache { + hashes: Map; + deps: Map; +} + +function createHashupCache(): HashupCache; +function collectReachable(roots: readonly string[], cache: HashupCache): string[]; +``` + +An in-memory cache scoped to one consumer's lifetime — not persisted, +not shared across processes. `hashes` stores each file's flattened +hash list; `deps` stores each file's direct resolved dependency paths. +Pass the same `HashupCache` to multiple `hashup()` or `hashFile()` calls +to dedupe work. `collectReachable` walks `deps` iteratively to rebuild +a per-call file list (used internally by `hashup()` to produce +`result.files`). + ## hashFile ```ts function hashFile( file: string, - cache: Map, + cache: HashupCache, resolver: Resolver, logger?: Logger, ): Promise; ``` Hashes a file and all its transitive static imports. Results are memoized in -`cache` — pass the same `Map` across multiple calls to dedupe work. On error -(file read or parse failure) the failure is sent through `logger.warn` and an -empty array is returned. `logger` defaults to a silent logger; build one with -[`createLogger`](#createlogger) when you want diagnostics on stderr. +`cache` — pass the same `HashupCache` across multiple calls to dedupe work. +On error (file read or parse failure) the failure is sent through +`logger.warn` and an empty array is returned. `logger` defaults to a silent +logger; build one with [`createLogger`](#createlogger) when you want +diagnostics on stderr. Imports that resolve into `node_modules` are treated as opaque: the resolved path is skipped, its files are never read, and the dependency's own imports @@ -111,10 +132,10 @@ and hashing the result. Order-sensitive — pass hashes in a stable order. ## Composing Your Own Pipeline ```ts -import { createResolver, hashFile, combineHashes } from "@maastrich/hashup"; +import { combineHashes, createHashupCache, createResolver, hashFile } from "@maastrich/hashup"; const resolver = createResolver(); -const cache = new Map(); +const cache = createHashupCache(); const entries = ["./src/a.ts", "./src/b.ts"]; const allHashes: string[] = []; diff --git a/src/cli/run-config-mode.ts b/src/cli/run-config-mode.ts index 84446da..65e5afb 100644 --- a/src/cli/run-config-mode.ts +++ b/src/cli/run-config-mode.ts @@ -1,5 +1,7 @@ import { dirname, resolve } from "node:path"; +import { createHashupCache, type HashupCache } from "../lib/cache.js"; import { combineHashes } from "../lib/combine-hashes.js"; +import { createResolver } from "../lib/create-resolver.js"; import { hashup, type HashupResult } from "../lib/hashup.js"; import type { LogLevel } from "../lib/logger.js"; import { expandPaths } from "./expand-paths.js"; @@ -33,6 +35,12 @@ export async function runConfigMode(input: RunConfigModeInput): Promise = {}; for (const [name, entry] of Object.entries(loaded.data.entries)) { const baseDir = entry.baseDir !== undefined ? resolveFrom(configDir, entry.baseDir) : rootBase; @@ -45,7 +53,7 @@ export async function runConfigMode(input: RunConfigModeInput): Promise, ): Promise { const perFile: HashupResult[] = []; for (let i = 0; i < entryFiles.length; i++) { const entry = entryFiles[i]!; const options = - i === 0 && extras.length > 0 ? { extras, baseDir, logLevel } : { baseDir, logLevel }; + i === 0 && extras.length > 0 + ? { extras, baseDir, logLevel, cache, resolver } + : { baseDir, logLevel, cache, resolver }; perFile.push(await hashup(entry, options)); } diff --git a/src/index.ts b/src/index.ts index ad3b907..493fd09 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,7 @@ export { hashup, type HashupOptions, type HashupResult } from "./lib/hashup.js"; export { createLogger, isLogLevel, type Logger, type LogLevel } from "./lib/logger.js"; export { isInNodeModules } from "./lib/is-in-node-modules.js"; +export { createHashupCache, collectReachable, type HashupCache } from "./lib/cache.js"; export { combineHashes } from "./lib/combine-hashes.js"; export { createContentHash } from "./lib/create-content-hash.js"; export { createResolver } from "./lib/create-resolver.js"; diff --git a/src/lib/cache.ts b/src/lib/cache.ts new file mode 100644 index 0000000..98d71c0 --- /dev/null +++ b/src/lib/cache.ts @@ -0,0 +1,48 @@ +/** + * In-memory memoization for the hasher. Scoped to one consumer's + * lifetime — not persisted, not shared across processes. + * + * Passing the same cache to multiple `hashup()` calls dedupes both + * work (a file visited by entry A is reused by entry B) and + * computation (the file's content hash is recomputed at most once). + * + * Two parallel maps keyed by absolute file path: + * - `hashes`: the flattened hash list (self + transitive deps). + * Returned directly to callers and combined into the final digest. + * - `deps`: the file's direct resolved dependency paths. Used by + * `collectReachable` to rebuild the per-call file list without + * re-walking the graph. + */ +export interface HashupCache { + hashes: Map; + deps: Map; +} + +export function createHashupCache(): HashupCache { + return { hashes: new Map(), deps: new Map() }; +} + +/** + * Compute the transitive closure of `roots` against the cache's direct + * dependency edges. Iterative — never recurses — so deep graphs cannot + * blow the stack. + */ +export function collectReachable(roots: readonly string[], cache: HashupCache): string[] { + const visited = new Set(); + const stack: string[] = []; + for (let i = 0; i < roots.length; i++) { + stack.push(roots[i] as string); + } + while (stack.length > 0) { + const file = stack.pop() as string; + if (visited.has(file)) continue; + visited.add(file); + const depList = cache.deps.get(file); + if (!depList) continue; + for (let i = 0; i < depList.length; i++) { + const d = depList[i] as string; + if (!visited.has(d)) stack.push(d); + } + } + return Array.from(visited); +} diff --git a/src/lib/hash-file.ts b/src/lib/hash-file.ts index be6755e..71b0de0 100644 --- a/src/lib/hash-file.ts +++ b/src/lib/hash-file.ts @@ -1,4 +1,5 @@ import type { Resolver } from "enhanced-resolve"; +import { type HashupCache } from "./cache.js"; import { createContentHash } from "./create-content-hash.js"; import { extractImports } from "./extract-imports.js"; import { isInNodeModules } from "./is-in-node-modules.js"; @@ -9,11 +10,11 @@ import { resolveImport } from "./resolve-import.js"; export async function hashFile( file: string, - cache: Map, + cache: HashupCache, resolver: Resolver, logger: Logger = createLogger("silent"), ): Promise { - const cached = cache.get(file); + const cached = cache.hashes.get(file); if (cached) { return cached; } @@ -21,19 +22,22 @@ export async function hashFile( try { const content = await readFileContent(file); const hashes = [createContentHash(content)]; - // Seed the cache before recursing so that circular imports terminate: - // on a cycle A → B → A, the revisit of A returns this placeholder - // instead of walking forever until the stack blows. - cache.set(file, hashes); + const deps: string[] = []; + // Seed both caches before recursing so circular imports terminate: + // on a cycle A → B → A, the revisit of A hits `cache.hashes` and + // returns the placeholder instead of walking forever. + cache.hashes.set(file, hashes); + cache.deps.set(file, deps); const imports = await extractImports(file, content); - const dependencyHashes = await hashDependencies(imports, file, cache, resolver, logger); + const dependencyHashes = await hashDependencies(imports, file, cache, resolver, logger, deps); pushAll(hashes, dependencyHashes); return hashes; } catch (error) { logger.warn(`Failed to hash file ${file}:`, error); - cache.delete(file); + cache.hashes.delete(file); + cache.deps.delete(file); return []; } } @@ -41,9 +45,10 @@ export async function hashFile( async function hashDependencies( imports: string[], sourceFile: string, - cache: Map, + cache: HashupCache, resolver: Resolver, logger: Logger, + deps: string[], ): Promise { const hashes: string[] = []; @@ -59,6 +64,7 @@ async function hashDependencies( logger.debug(`Skipping node_modules dependency: ${resolved}`); continue; } + deps.push(resolved); const resolvedHashes = await hashFile(resolved, cache, resolver, logger); pushAll(hashes, resolvedHashes); } diff --git a/src/lib/hashup.ts b/src/lib/hashup.ts index 642a6bd..47f03ff 100644 --- a/src/lib/hashup.ts +++ b/src/lib/hashup.ts @@ -1,4 +1,6 @@ import { resolve } from "node:path"; +import type { Resolver } from "enhanced-resolve"; +import { collectReachable, createHashupCache, type HashupCache } from "./cache.js"; import { combineHashes } from "./combine-hashes.js"; import { createResolver } from "./create-resolver.js"; import { hashFile } from "./hash-file.js"; @@ -30,6 +32,23 @@ export interface HashupOptions { * @default "silent" */ logLevel?: LogLevel; + + /** + * Optional shared cache. Pass the same cache across multiple + * `hashup()` calls to dedupe work (a file visited by entry A is + * reused by entry B). Scoped to one consumer's lifetime — not + * persisted and not shared across processes. + * + * Create with `createHashupCache()`. + */ + cache?: HashupCache; + + /** + * Optional shared `enhanced-resolve` resolver. Pass a shared instance + * across many calls to reuse its internal filesystem cache. Create + * with `createResolver()`. + */ + resolver?: Resolver; } export interface HashupResult { @@ -39,7 +58,9 @@ export interface HashupResult { hash: string; /** - * All file paths that were included in the hash calculation + * All file paths that contributed to this call's hash (entry + + * extras + their transitive non-`node_modules` imports). Accurate + * whether or not a shared cache was used. */ files: string[]; } @@ -67,23 +88,40 @@ export interface HashupResult { * extras: ['./pnpm-lock.yaml', './package.json'] * }); * ``` + * + * @example + * ```typescript + * // Share a cache across entries to dedupe work + * import { hashup, createHashupCache, createResolver } from '@maastrich/hashup'; + * + * const cache = createHashupCache(); + * const resolver = createResolver(); + * const app = await hashup('./src/app.ts', { cache, resolver }); + * const worker = await hashup('./src/worker.ts', { cache, resolver }); + * ``` */ export async function hashup( entryFile: string, options: HashupOptions = {}, ): Promise { - const { extras = [], baseDir = process.cwd(), logLevel = "silent" } = options; + const { + extras = [], + baseDir = process.cwd(), + logLevel = "silent", + cache = createHashupCache(), + resolver = createResolver(), + } = options; const logger = createLogger(logLevel); const resolvedEntry = resolve(baseDir, entryFile); - const cache = new Map(); - const resolver = createResolver(); const entryHashes = await hashFile(resolvedEntry, cache, resolver, logger); const extraHashes: string[] = []; + const resolvedExtras: string[] = []; for (const extraFile of extras) { const resolvedExtra = resolve(baseDir, extraFile); + resolvedExtras.push(resolvedExtra); const hashes = await hashFile(resolvedExtra, cache, resolver, logger); pushAll(extraHashes, hashes); } @@ -92,7 +130,11 @@ export async function hashup( pushAll(combined, entryHashes); pushAll(combined, extraHashes); const finalHash = combineHashes(combined); - const files = Array.from(cache.keys()); + + // `files` is the transitive closure of this call's roots — entry + + // extras — regardless of whether individual files were already in + // the shared cache. Walks the `deps` map, which is cheap. + const files = collectReachable([resolvedEntry, ...resolvedExtras], cache); return { hash: finalHash, files }; } diff --git a/tests/shared-cache.test.ts b/tests/shared-cache.test.ts new file mode 100644 index 0000000..a1211cd --- /dev/null +++ b/tests/shared-cache.test.ts @@ -0,0 +1,118 @@ +import { describe, expect, test } from "vite-plus/test"; +import { collectReachable, createHashupCache, createResolver, hashup } from "../src/index.js"; + +const ENTRY = "./examples/src/index.ts"; + +describe("shared cache", () => { + test("second identical call adds no new cache entries (work was reused)", async () => { + const cache = createHashupCache(); + const resolver = createResolver(); + + await hashup(ENTRY, { cache, resolver }); + const sizeAfterFirst = cache.hashes.size; + expect(sizeAfterFirst).toBeGreaterThan(1); + + await hashup(ENTRY, { cache, resolver }); + expect(cache.hashes.size).toBe(sizeAfterFirst); + }); + + test("produces the same final hash whether cache is shared or fresh", async () => { + const fresh = await hashup(ENTRY); + + const cache = createHashupCache(); + const resolver = createResolver(); + // Pre-warm with an unrelated-but-overlapping call + await hashup("./examples/src/utils/math.ts", { cache, resolver }); + const reused = await hashup(ENTRY, { cache, resolver }); + + expect(reused.hash).toBe(fresh.hash); + }); + + test("files list is the per-call reachable set, not the whole cache", async () => { + const cache = createHashupCache(); + const resolver = createResolver(); + + const mathOnly = await hashup("./examples/src/utils/math.ts", { cache, resolver }); + const full = await hashup(ENTRY, { cache, resolver }); + + // Second call's files should match a fresh run's files exactly — + // even though `cache.hashes.size` now covers both calls. + const freshFull = await hashup(ENTRY); + expect(new Set(full.files)).toEqual(new Set(freshFull.files)); + + // And the narrower call's files must not include the broader set. + expect(mathOnly.files.length).toBeLessThan(full.files.length); + expect(full.files.some((f) => !mathOnly.files.includes(f))).toBe(true); + }); + + test("extras are tracked as roots in files", async () => { + const result = await hashup(ENTRY, { extras: ["./package.json"] }); + expect(result.files.some((f) => f.endsWith("package.json"))).toBe(true); + }); + + test("extras' reachability is folded in with a shared cache too", async () => { + const cache = createHashupCache(); + const resolver = createResolver(); + const r = await hashup(ENTRY, { extras: ["./package.json"], cache, resolver }); + expect(r.files.some((f) => f.endsWith("package.json"))).toBe(true); + }); + + test("cross-entry sharing: overlap between entries is walked once", async () => { + const cache = createHashupCache(); + const resolver = createResolver(); + + // Two different entry files that share a transitive dep + // (examples/src/utils/math.ts is imported by index.ts). + const shared = "./examples/src/utils/math.ts"; + await hashup(shared, { cache, resolver }); + const mathSize = cache.hashes.size; + + await hashup(ENTRY, { cache, resolver }); + const finalSize = cache.hashes.size; + + // ENTRY pulls in everything math.ts did plus more. The cache grew + // by strictly less than a fresh run would have produced. + const freshCache = createHashupCache(); + await hashup(ENTRY, { cache: freshCache, resolver: createResolver() }); + + expect(finalSize).toBe(freshCache.hashes.size); + expect(finalSize - mathSize).toBeLessThan(freshCache.hashes.size); + }); +}); + +describe("collectReachable", () => { + test("iterative walk handles deep chains without stack overflow", () => { + const cache = createHashupCache(); + const N = 50_000; + for (let i = 0; i < N; i++) { + cache.hashes.set(`/f${i}`, ["x"]); + cache.deps.set(`/f${i}`, i + 1 < N ? [`/f${i + 1}`] : []); + } + const files = collectReachable(["/f0"], cache); + expect(files.length).toBe(N); + }); + + test("multiple roots produce the union of reachable sets", () => { + const cache = createHashupCache(); + cache.deps.set("/a", ["/c"]); + cache.deps.set("/b", ["/d"]); + cache.deps.set("/c", []); + cache.deps.set("/d", []); + const files = collectReachable(["/a", "/b"], cache); + expect(new Set(files)).toEqual(new Set(["/a", "/b", "/c", "/d"])); + }); + + test("cycles don't loop forever", () => { + const cache = createHashupCache(); + cache.deps.set("/a", ["/b"]); + cache.deps.set("/b", ["/a"]); + const files = collectReachable(["/a"], cache); + expect(new Set(files)).toEqual(new Set(["/a", "/b"])); + }); + + test("tolerates missing deps entries", () => { + const cache = createHashupCache(); + const files = collectReachable(["/missing"], cache); + expect(files).toEqual(["/missing"]); + }); +});