From 38add9d0b28874aa49cd03ea3b76ee1d40efecb2 Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Mon, 8 Jun 2026 17:14:21 -0300 Subject: [PATCH 1/4] refactor(corpus): improve compare scoring models --- README.md | 5 +- packages/fallbacks/README.md | 3 +- packages/fallbacks/compare.test.ts | 141 ++++++++++++++++ packages/fallbacks/scripts/compare.ts | 225 ++++++++++++++++++++++---- 4 files changed, 343 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 078db4b..0bfe00e 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,9 @@ Built by the team behind [SuperDoc](https://github.com/superdoc-dev/superdoc). S - Runtime: install `@docfonts/fallbacks` and call the lookup helpers. - Acquire: run `bun run --cwd packages/fallbacks acquire` to download reviewed open-font source archives into an ignored local cache and write local hash snapshots. -- Compare: planned local tooling. Results should stay local unless deliberately published through a - curated product surface. +- Compare: run `bun run --cwd packages/fallbacks compare` to rank acquired open fonts against a + licensed local reference. Results stay local unless deliberately published through a curated + product surface. ## API diff --git a/packages/fallbacks/README.md b/packages/fallbacks/README.md index 28f88d4..0553400 100644 --- a/packages/fallbacks/README.md +++ b/packages/fallbacks/README.md @@ -125,8 +125,9 @@ bun run --cwd packages/fallbacks compare -- \ - `--reference` (required) - path to the font to measure against. - `--family` - a label shown in the report header. - `--source` - restrict to one or more acquired source ids (repeat the flag or comma-separate). Defaults to every acquired source. +- `--model` - `latin` by default. Use `monospace` for mono references so matching cells report `cell_width_only`, not `metric_safe`. -The comparison is a lead finder, not an automatic verdict. It measures Latin advance widths over a fixed sample and reports the tier, coverage, outlier counts, and worst glyphs for each candidate. +The comparison is a lead finder, not an automatic verdict. For proportional Latin fonts, tier, mean, and max use a text-carrying subset while outlier counts and worst glyphs still use the full Latin sample. ## Provenance diff --git a/packages/fallbacks/compare.test.ts b/packages/fallbacks/compare.test.ts index a08a832..e67c4c8 100644 --- a/packages/fallbacks/compare.test.ts +++ b/packages/fallbacks/compare.test.ts @@ -8,6 +8,8 @@ import { collectCandidates, type FontMetrics, LATIN_SAMPLE, + LATIN_TEXT_SAMPLE, + parseArgs, parseFont, renderReport, type SnapshotSource, @@ -190,6 +192,29 @@ describe("LATIN_SAMPLE", () => { }); }); +describe("LATIN_TEXT_SAMPLE", () => { + test("keeps text carriers and excludes symbol outliers", () => { + expect(LATIN_TEXT_SAMPLE).toContain(0x41); // 'A' + expect(LATIN_TEXT_SAMPLE).toContain(0x39); // '9' + expect(LATIN_TEXT_SAMPLE).toContain(0x00e9); // e with acute + expect(LATIN_TEXT_SAMPLE).toContain(0x00a0); // no-break space + expect(LATIN_TEXT_SAMPLE).toContain(0x2014); // em dash codepoint + expect(LATIN_TEXT_SAMPLE).not.toContain(0x00af); // macron + expect(LATIN_TEXT_SAMPLE).not.toContain(0x00b5); // micro sign + expect(LATIN_TEXT_SAMPLE).not.toContain(0x00b7); // middle dot + expect(LATIN_TEXT_SAMPLE).not.toContain(0x00b1); // plus-minus sign + }); + + test("is a sorted subset of the full Latin sample", () => { + const full = new Set(LATIN_SAMPLE); + expect(LATIN_TEXT_SAMPLE.every((cp) => full.has(cp))).toBe(true); + expect(new Set(LATIN_TEXT_SAMPLE).size).toBe(LATIN_TEXT_SAMPLE.length); + expect([...LATIN_TEXT_SAMPLE]).toEqual( + [...LATIN_TEXT_SAMPLE].sort((a, b) => a - b), + ); + }); +}); + // --- Tiers ------------------------------------------------------------------ describe("classifyTier", () => { @@ -202,6 +227,18 @@ describe("classifyTier", () => { expect(classifyTier(0.01, 0.026)).toBe("visual_only"); expect(classifyTier(0, 0)).toBe("metric_safe"); }); + + test("monospace model collapses the metric bands to cell_width_only", () => { + // What the latin model calls metric_safe or near_metric is only proof of cell width here. + expect(classifyTier(0, 0, "monospace")).toBe("cell_width_only"); + expect(classifyTier(0.005, 0.01, "monospace")).toBe("cell_width_only"); + expect(classifyTier(0.01, 0.025, "monospace")).toBe("cell_width_only"); + // Non-matching candidates stay visual_only under both models. + expect(classifyTier(0.0101, 0.025, "monospace")).toBe("visual_only"); + // The latin model is the default and is unchanged. + expect(classifyTier(0, 0, "latin")).toBe("metric_safe"); + expect(classifyTier(0, 0)).toBe("metric_safe"); + }); }); // --- Scoring ---------------------------------------------------------------- @@ -260,6 +297,58 @@ describe("scoreAdvances", () => { expect(Number.isNaN(score.meanDelta)).toBe(true); expect(Number.isNaN(score.maxDelta)).toBe(true); }); + + test("monospace model downgrades a matching candidate to cell_width_only", () => { + const reference = new Map([ + [0x41, 0.6], + [0x42, 0.6], + [0x43, 0.6], + ]); + const matching = new Map([ + [0x41, 0.6], + [0x42, 0.6], + [0x43, 0.6], + ]); + const diverging = new Map([ + [0x41, 0.6], + [0x42, 0.7], + [0x43, 0.8], + ]); + // A matching cell is metric_safe under latin but only cell_width_only under monospace. + expect(scoreAdvances(reference, matching, sample).tier).toBe("metric_safe"); + expect( + scoreAdvances(reference, matching, sample, 3, "monospace").tier, + ).toBe("cell_width_only"); + // A non-matching candidate stays visual_only under either model. + expect( + scoreAdvances(reference, diverging, sample, 3, "monospace").tier, + ).toBe("visual_only"); + }); + + test("can rank on text carriers while reporting full-sample outliers", () => { + const reportSample = [0x41, 0x00af, 0x00b5, 0x00b7]; + const tierSample = [0x41]; + const reference = new Map(reportSample.map((cp) => [cp, 0.5])); + const candidate = new Map([ + [0x41, 0.5], + [0x00af, 0.33], + [0x00b5, 0.58], + [0x00b7, 0.42], + ]); + const score = scoreAdvances(reference, candidate, { + reportSample, + tierSample, + }); + expect(score.tier).toBe("metric_safe"); + expect(score.compared).toBe(1); + expect(score.total).toBe(1); + expect(score.meanDelta).toBe(0); + expect(score.maxDelta).toBe(0); + expect(score.over2_5Percent).toBe(3); + expect(score.worstGlyphs.map((g) => g.codepoint)).toEqual([ + 0x00af, 0x00b7, 0x00b5, + ]); + }); }); // --- SFNT parsing ----------------------------------------------------------- @@ -360,6 +449,37 @@ describe("renderReport", () => { expect(lines[2]).toContain("visual_only"); }); + test("ranks cell_width_only after near_metric and before visual_only", () => { + const reference = new Map([[0x41, 0.6]]); + const sample = [0x41]; + // mean 0, max 0 -> near_metric is impossible from a perfect match, so build a near_metric by a + // small delta, a cell_width_only via the monospace model, and a visual_only via a large delta. + const near = scoreAdvances(reference, new Map([[0x41, 0.607]]), sample); + const cell = scoreAdvances( + reference, + new Map([[0x41, 0.6]]), + sample, + 3, + "monospace", + ); + const visual = scoreAdvances(reference, new Map([[0x41, 0.9]]), sample); + expect(near.tier).toBe("near_metric"); + expect(cell.tier).toBe("cell_width_only"); + expect(visual.tier).toBe("visual_only"); + const report = renderReport([ + { sourceId: "visual-src", file: "v.otf", score: visual }, + { sourceId: "cell-src", file: "c.otf", score: cell }, + { sourceId: "near-src", file: "n.otf", score: near }, + ]); + const lines = report.split("\n"); + expect(lines[1]).toContain("near-src"); + expect(lines[1]).toContain("near_metric"); + expect(lines[2]).toContain("cell-src"); + expect(lines[2]).toContain("cell_width_only"); + expect(lines[3]).toContain("visual-src"); + expect(lines[3]).toContain("visual_only"); + }); + test("can limit the rendered table to the top rows", () => { const reference = sampleMetrics(mockFont(0.5), [0x41]); const close = scoreAdvances( @@ -405,6 +525,27 @@ describe("renderReport", () => { }); }); +// --- Argument parsing ------------------------------------------------------- + +describe("parseArgs", () => { + test("defaults to the latin model", () => { + expect(parseArgs(["--reference", "ref.otf"]).model).toBe("latin"); + }); + + test("accepts --model monospace", () => { + expect(parseArgs(["--model", "monospace"]).model).toBe("monospace"); + expect(parseArgs(["--model", "latin"]).model).toBe("latin"); + }); + + test("rejects an unknown model", () => { + expect(() => parseArgs(["--model", "serif"])).toThrow(/--model requires/); + }); + + test("rejects --model without a value", () => { + expect(() => parseArgs(["--model"])).toThrow(/requires a value/); + }); +}); + // --- Cached-file candidate collection --------------------------------------- describe("collectCandidates (GitHub tree sources)", () => { diff --git a/packages/fallbacks/scripts/compare.ts b/packages/fallbacks/scripts/compare.ts index ceca031..dd9bf7e 100644 --- a/packages/fallbacks/scripts/compare.ts +++ b/packages/fallbacks/scripts/compare.ts @@ -36,25 +36,96 @@ export const LATIN_SAMPLE: readonly number[] = (() => { return [...new Set(all)].sort((a, b) => a - b); })(); +const TEXT_PUNCTUATION = new Set([ + 0x20, // space + 0x21, // ! + 0x22, // " + 0x23, // # + 0x26, // & + 0x27, // ' + 0x28, // ( + 0x29, // ) + 0x2c, // , + 0x2d, // - + 0x2e, // . + 0x2f, // / + 0x3a, // : + 0x3b, // ; + 0x3f, // ? + 0x40, // @ + 0x5b, // [ + 0x5d, // ] + 0x7b, // { + 0x7d, // } + 0x00a0, // no-break space + 0x2013, // en dash + 0x2014, // em dash + 0x2018, // left single quote + 0x2019, // right single quote + 0x201c, // left double quote + 0x201d, // right double quote + 0x2026, // ellipsis +]); + +const EXCLUDED_TEXT_LETTERS = new Set([ + 0x00b5, // micro sign: Unicode treats it as a letter, but it behaves like a symbol here. +]); + +function isTextLetterOrDigit(codepoint: number): boolean { + if (EXCLUDED_TEXT_LETTERS.has(codepoint)) return false; + return /^[\p{L}\p{N}]$/u.test(String.fromCodePoint(codepoint)); +} + +/** + * Text-carrying Latin sample used to rank proportional-font candidates. The full sample still reports + * outliers, but rare symbols should not hide a strong body-text lead. + */ +export const LATIN_TEXT_SAMPLE: readonly number[] = LATIN_SAMPLE.filter( + (cp) => TEXT_PUNCTUATION.has(cp) || isTextLetterOrDigit(cp), +); + // --- Tiers ------------------------------------------------------------------ /** * Advance-fidelity tier. Thresholds mirror the package's verdict language (see `src/types.ts`): * metric_safe is the DIRECT band, near_metric the LIKELY band, everything else visual_only. + * cell_width_only is the monospace model's verdict for a matching cell: it proves line width, not + * glyph-shape fidelity. */ -export type CompareTier = "metric_safe" | "near_metric" | "visual_only"; +export type CompareTier = + | "metric_safe" + | "near_metric" + | "cell_width_only" + | "visual_only"; + +/** + * Classification model. `latin` is the default proportional comparison. `monospace` treats a matching + * advance as proof of cell width only, since every glyph in a monospace cell shares one advance. + */ +export type CompareModel = "latin" | "monospace"; const TIER_RANK: Record = { metric_safe: 0, near_metric: 1, - visual_only: 2, + cell_width_only: 2, + visual_only: 3, }; -/** Classify a (mean, max) advance-delta pair into a fidelity tier. Deltas are fractions of the em. */ -export function classifyTier(meanDelta: number, maxDelta: number): CompareTier { - if (meanDelta <= 0.005 && maxDelta <= 0.01) return "metric_safe"; - if (meanDelta <= 0.01 && maxDelta <= 0.025) return "near_metric"; - return "visual_only"; +/** + * Classify a (mean, max) advance-delta pair into a fidelity tier. Deltas are fractions of the em. Under + * the monospace model a matching cell only vouches for line width, so the metric bands collapse to + * cell_width_only while non-matching candidates stay visual_only. + */ +export function classifyTier( + meanDelta: number, + maxDelta: number, + model: CompareModel = "latin", +): CompareTier { + let tier: CompareTier = "visual_only"; + if (meanDelta <= 0.005 && maxDelta <= 0.01) tier = "metric_safe"; + else if (meanDelta <= 0.01 && maxDelta <= 0.025) tier = "near_metric"; + if (model === "monospace" && tier !== "visual_only") return "cell_width_only"; + return tier; } // --- SFNT parsing ----------------------------------------------------------- @@ -260,33 +331,48 @@ export interface GlyphDelta { /** The advance-parity score of one candidate font against the reference, over a fixed sample. */ export interface CompareScore { - /** codepoints in the sample that both fonts map. */ + /** codepoints in the tier sample that both fonts map. */ compared: number; - /** sample size. */ + /** tier sample size. */ total: number; - /** sample codepoints not mapped by both fonts. */ + /** tier sample codepoints not mapped by both fonts. */ missing: number; meanDelta: number; maxDelta: number; - /** shared sample codepoints whose advance delta exceeds the metric_safe max threshold. */ + /** shared report-sample codepoints whose advance delta exceeds the metric_safe max threshold. */ over1Percent: number; - /** shared sample codepoints whose advance delta exceeds the near_metric max threshold. */ + /** shared report-sample codepoints whose advance delta exceeds the near_metric max threshold. */ over2_5Percent: number; tier: CompareTier; worstGlyphs: GlyphDelta[]; } -/** - * Score one candidate against the reference over the sample. Both inputs are normalized advance maps - * (codepoint -> advance/unitsPerEm); only codepoints present in both are compared. Pure, so it can be - * tested with mocked metric maps and never needs a real font. - */ -export function scoreAdvances( +export interface ScoreOptions { + /** Sample used for outlier reporting and worst-glyph display. */ + reportSample?: readonly number[]; + /** Sample used for tier classification and mean/max columns. Defaults to `reportSample`. */ + tierSample?: readonly number[]; + worstCount?: number; + model?: CompareModel; +} + +interface MeasuredDeltas { + compared: number; + total: number; + missing: number; + meanDelta: number; + maxDelta: number; + over1Percent: number; + over2_5Percent: number; + worstGlyphs: GlyphDelta[]; +} + +function measureDeltas( reference: ReadonlyMap, candidate: ReadonlyMap, - sample: readonly number[] = LATIN_SAMPLE, - worstCount = 3, -): CompareScore { + sample: readonly number[], + worstCount: number, +): MeasuredDeltas { const deltas: GlyphDelta[] = []; let sum = 0; let max = 0; @@ -320,12 +406,78 @@ export function scoreAdvances( maxDelta, over1Percent, over2_5Percent, - // With no shared codepoints there is nothing to vouch for: report the floor tier. - tier: compared === 0 ? "visual_only" : classifyTier(meanDelta, maxDelta), worstGlyphs, }; } +function normalizeScoreOptions( + optionsOrSample: ScoreOptions | readonly number[] | undefined, + worstCount: number | undefined, + model: CompareModel | undefined, +): Required { + if (!optionsOrSample || Array.isArray(optionsOrSample)) { + const reportSample = optionsOrSample ?? LATIN_SAMPLE; + return { + reportSample, + tierSample: reportSample, + worstCount: worstCount ?? 3, + model: model ?? "latin", + }; + } + + const options = optionsOrSample as ScoreOptions; + const reportSample = options.reportSample ?? LATIN_SAMPLE; + return { + reportSample, + tierSample: options.tierSample ?? reportSample, + worstCount: options.worstCount ?? 3, + model: options.model ?? "latin", + }; +} + +/** + * Score one candidate against the reference. The tier can use a narrower text sample while the report + * still surfaces full-sample outliers. Both inputs are normalized advance maps (codepoint -> + * advance/unitsPerEm); only codepoints present in both are compared. + */ +export function scoreAdvances( + reference: ReadonlyMap, + candidate: ReadonlyMap, + optionsOrSample?: ScoreOptions | readonly number[], + worstCount?: number, + model?: CompareModel, +): CompareScore { + const options = normalizeScoreOptions(optionsOrSample, worstCount, model); + const report = measureDeltas( + reference, + candidate, + options.reportSample, + options.worstCount, + ); + const tierMetrics = + options.tierSample === options.reportSample + ? report + : measureDeltas(reference, candidate, options.tierSample, 0); + return { + compared: tierMetrics.compared, + total: tierMetrics.total, + missing: tierMetrics.missing, + meanDelta: tierMetrics.meanDelta, + maxDelta: tierMetrics.maxDelta, + over1Percent: report.over1Percent, + over2_5Percent: report.over2_5Percent, + tier: + tierMetrics.compared === 0 + ? "visual_only" + : classifyTier( + tierMetrics.meanDelta, + tierMetrics.maxDelta, + options.model, + ), + worstGlyphs: report.worstGlyphs, + }; +} + /** Build a font's normalized-advance map over the sample (only codepoints it maps are included). */ export function sampleMetrics( font: FontMetrics, @@ -506,15 +658,16 @@ interface CompareRow { score: CompareScore; } -interface ParsedArgs { +export interface ParsedArgs { reference?: string; family?: string; limit: number | null; sources: string[]; + model: CompareModel; } -function parseArgs(argv: string[]): ParsedArgs { - const args: ParsedArgs = { limit: 50, sources: [] }; +export function parseArgs(argv: string[]): ParsedArgs { + const args: ParsedArgs = { limit: 50, sources: [], model: "latin" }; const readValue = (flag: string, index: number): string => { const value = argv[index + 1]; if (!value || value.startsWith("--")) @@ -540,6 +693,14 @@ function parseArgs(argv: string[]): ParsedArgs { args.sources.push(id); i++; break; + case "--model": { + const value = readValue(flag, i); + if (value !== "latin" && value !== "monospace") + throw new Error("--model requires 'latin' or 'monospace'"); + args.model = value; + i++; + break; + } case "--limit": { const value = readValue(flag, i); if (value === "all") { @@ -683,7 +844,11 @@ function main(): void { for (const candidate of collectCandidates(source, cacheDir)) { try { const font = parseFont(candidate.bytes); - const score = scoreAdvances(reference, sampleMetrics(font)); + const score = scoreAdvances(reference, sampleMetrics(font), { + reportSample: LATIN_SAMPLE, + tierSample: args.model === "latin" ? LATIN_TEXT_SAMPLE : LATIN_SAMPLE, + model: args.model, + }); rows.push({ sourceId: source.sourceId, file: candidate.file, score }); } catch { skipped++; @@ -695,8 +860,12 @@ function main(): void { const shown = args.limit === null ? rows.length : Math.min(args.limit, rows.length); const skippedText = skipped === 0 ? "" : `; skipped ${skipped} unsupported`; + const modelText = + args.model === "latin" + ? `; tier/mean/max ${LATIN_TEXT_SAMPLE.length} text codepoints` + : `; model ${args.model}`; console.log( - `reference ${basename(args.reference)} as "${label}" vs ${rows.length} candidate(s) over ${LATIN_SAMPLE.length} Latin codepoints; showing ${shown}${skippedText}\n`, + `reference ${basename(args.reference)} as "${label}" vs ${rows.length} candidate(s) over ${LATIN_SAMPLE.length} Latin codepoints${modelText}; showing ${shown}${skippedText}\n`, ); console.log(renderReport(rows, { limit: args.limit })); } From e1a309f5b5020144c4ab781c5b1589251741dfd3 Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Mon, 8 Jun 2026 17:45:11 -0300 Subject: [PATCH 2/4] refactor: move corpus tooling out of fallbacks package --- .gitignore | 1 + README.md | 25 ++-- package.json | 4 +- packages/fallbacks/README.md | 107 +++++------------- packages/fallbacks/package.json | 2 - packages/fallbacks/src/types.ts | 2 +- tools/corpus/README.md | 38 +++++++ .../corpus}/acquire.test.ts | 7 +- .../scripts => tools/corpus}/acquire.ts | 6 +- .../corpus}/compare.test.ts | 2 +- .../scripts => tools/corpus}/compare.ts | 14 +-- tsconfig.json | 2 +- 12 files changed, 90 insertions(+), 120 deletions(-) create mode 100644 tools/corpus/README.md rename {packages/fallbacks => tools/corpus}/acquire.test.ts (98%) rename {packages/fallbacks/scripts => tools/corpus}/acquire.ts (99%) rename {packages/fallbacks => tools/corpus}/compare.test.ts (99%) rename {packages/fallbacks/scripts => tools/corpus}/compare.ts (98%) diff --git a/.gitignore b/.gitignore index 7d6ae54..12837e4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,6 @@ dev/ .wrangler/ .mcp.json mockups/ +.cache/ packages/fallbacks/.cache/ STATE.md diff --git a/README.md b/README.md index 0bfe00e..45737fc 100644 --- a/README.md +++ b/README.md @@ -6,32 +6,21 @@ > Document font substitution, measured. docfonts publishes `@docfonts/fallbacks`, a small runtime package for document renderers. -It maps common proprietary document fonts to reviewed open-font fallback decisions. - -The package ships no font binaries and no proprietary data. It contains a public evidence snapshot, -asset-aware lookup helpers, and tests that prove the npm package only includes supported runtime files. +It maps common proprietary document fonts to reviewed open-font fallback decisions. It ships no font binaries and no proprietary data. Built by the team behind [SuperDoc](https://github.com/superdoc-dev/superdoc). Standalone and neutral. -## Package +## Structure - `packages/fallbacks` - runtime fallback decisions and lookup helpers. +- `tools/corpus` - local source acquisition and comparison tools. -## Workflows +## Use - Runtime: install `@docfonts/fallbacks` and call the lookup helpers. -- Acquire: run `bun run --cwd packages/fallbacks acquire` to download reviewed open-font source - archives into an ignored local cache and write local hash snapshots. -- Compare: run `bun run --cwd packages/fallbacks compare` to rank acquired open fonts against a - licensed local reference. Results stay local unless deliberately published through a curated - product surface. - -## API - -- `getRenderableFallback` - returns the open family to render, or `null` when none is renderable. -- `getFallbackDecision` - explains the outcome for UI, diagnostics, and reporting. -- `createFallbackMap` - builds a resolver map from only the font families you can render. -- `normalizeFamilyName` - normalizes map lookup keys. +- Acquire: run `bun run corpus:acquire` to download open-font sources into an ignored local cache. +- Compare: run `bun run corpus:compare` to rank acquired open fonts against a licensed local + reference. Results stay local unless deliberately published through a curated product surface. ## Install diff --git a/package.json b/package.json index 4b98d59..584935a 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,9 @@ "typecheck": "tsc --noEmit", "lint": "biome check .", "format": "biome check --write .", - "test": "bun test packages/fallbacks", + "test": "bun test packages/fallbacks tools/corpus", + "corpus:acquire": "bun run tools/corpus/acquire.ts", + "corpus:compare": "bun run tools/corpus/compare.ts", "check": "bun run typecheck && bun run test && bun run lint && bun run build", "check:fast": "bun run typecheck && bun run lint", "prepare": "if [ -z \"$CI\" ]; then bunx lefthook install; fi" diff --git a/packages/fallbacks/README.md b/packages/fallbacks/README.md index 0553400..25d4502 100644 --- a/packages/fallbacks/README.md +++ b/packages/fallbacks/README.md @@ -1,10 +1,8 @@ # @docfonts/fallbacks -Document font substitution, measured. +Measured open-font fallbacks for proprietary document fonts. -Measured open-font fallbacks for proprietary document fonts. Use it to decide whether a requested document font can render with an open family you actually ship. - -It ships no fonts and no proprietary binaries. It ships decisions: the recommended open family when one exists, the fidelity verdict, and the honest cases where no open family should be used. +The package ships decisions, not fonts: which open family to render when one is reviewed, how faithful it is, and when no open fallback should be used. ## Install @@ -12,11 +10,11 @@ It ships no fonts and no proprietary binaries. It ships decisions: the recommend npm install @docfonts/fallbacks ``` -ESM-only. Use `import`, or let your bundler handle it. CommonJS `require()` is not supported. +ESM-only. -## Render A Font +## Render a font -Use `getRenderableFallback` when you need one font family to render now. Pass `canRenderFamily` so docfonts only returns families your app can load. +Use `getRenderableFallback` when you need one family to render now. Pass `canRenderFamily` so the result only includes fonts your app can load. ```ts import { getRenderableFallback } from "@docfonts/fallbacks"; @@ -24,15 +22,13 @@ import { getRenderableFallback } from "@docfonts/fallbacks"; const fallback = getRenderableFallback("Helvetica", { canRenderFamily: (family) => bundledFamilies.has(family), }); - -// { substituteFamily: "Liberation Sans", policyAction: "substitute", verdict: "metric_safe", lineBreakSafe: true, evidenceId: "helvetica", generic: "sans-serif" } ``` -The result is `null` when there is nothing renderable from your available assets. Use `getFallbackDecision` when you need to know why. +Returns `null` when docfonts has no renderable fallback from your available assets. -## Explain A Decision +## Explain a decision -Use `getFallbackDecision` for UI, diagnostics, and reporting. It distinguishes known fonts with no recommended fallback from fonts docfonts has never seen. +Use `getFallbackDecision` for UI, diagnostics, and reports. ```ts import { getFallbackDecision } from "@docfonts/fallbacks"; @@ -40,31 +36,23 @@ import { getFallbackDecision } from "@docfonts/fallbacks"; getFallbackDecision("Aptos"); // { kind: "customer_supplied", evidenceId: "aptos", generic: "sans-serif" } -getFallbackDecision("Tahoma"); -// { kind: "no_recommended_fallback", evidenceId: "tahoma", generic: "sans-serif" } - -getFallbackDecision("Made Up Font"); -// { kind: "unknown" } - getFallbackDecision("Georgia", { canRenderFamily: (family) => bundledFamilies.has(family), }); -// { kind: "asset_missing", substituteFamily: "Gelasio", verdict: "near_metric", evidenceId: "georgia", generic: "serif" } +// { kind: "asset_missing", substituteFamily: "Gelasio", verdict: "near_metric", ... } ``` -Decision kinds: +Important decision kinds: -- `fallback` - render the returned `substituteFamily`. -- `asset_missing` - docfonts has a fallback, but your app does not load that family. -- `face_missing` - (face-aware lookups only) the family has a substitute, but not for the requested face. Route that face through your absence handling; do not substitute it. -- `no_recommended_fallback` - docfonts knows the font but recommends no renderable open family. -- `customer_supplied` - the real font should come from the customer or environment. -- `preserve_only` - keep the original family name. Do not substitute. +- `fallback` - render `fallback.substituteFamily`. +- `asset_missing` - docfonts has a fallback, but your app does not load it. +- `face_missing` - the fallback does not provide the requested face. +- `customer_supplied`, `preserve_only`, or `no_recommended_fallback` - do not substitute. - `unknown` - docfonts has no evidence for this family. -## Create A Resolver Map +## Build a resolver map -Use `createFallbackMap` when wiring a resolver. `canRenderFamily` is required because a resolver map must never point at fonts you cannot load. +Use `createFallbackMap` when wiring a resolver. `canRenderFamily` is required so the map never points at fonts you cannot load. ```ts import { createFallbackMap, normalizeFamilyName } from "@docfonts/fallbacks"; @@ -73,64 +61,21 @@ const map = createFallbackMap({ canRenderFamily: (family) => bundledFamilies.has(family), }); -map[normalizeFamilyName("Times New Roman")]; // { substituteFamily: "Liberation Serif", ... } -``` - -Keys are normalized. Use `normalizeFamilyName` for lookups. Rows whose substitute family is not available are omitted. Each entry carries `faces`: a Regular-only entry is only safe in a **face-aware** resolver (one that checks `faces` or uses `getRenderableFallbackForFace`), since applying it to bold/italic would route a face the substitute does not provide. - -## What the fields mean - -- `substituteFamily` - the open family to render in place of the requested one. -- `policyAction` - what a renderer should do, not a quality claim. Use `verdict` for fidelity. -- `verdict` - the measured fidelity. Examples: `metric_safe`, `near_metric`, `cell_width_only`, `visual_only`. -- `lineBreakSafe` - true when advances preserve line breaks: `metric_safe`, `near_metric`, or monospace `cell_width_only`. -- `faces` - reviewed face coverage for this evidence row. If any face is `true`, respect it as face-scoped coverage (a row can be Regular-only). If all faces are `false`, the row is **not** face-scoped (e.g. a category fallback whose physical font does have faces) and the face-aware helpers treat it as renderable for any face. -- `evidenceId` - the stable id for the reviewed evidence row; look the full row up in `SUBSTITUTION_EVIDENCE`. -- `generic` - the logical font's broad CSS category (`serif`, `sans-serif`, or `monospace`), for a last-resort generic `font-family` keyword when no named substitute renders. Also present on the known (non-`unknown`) decision kinds. -- `glyphExceptions` - named glyph-level divergences that qualify this fallback (e.g. one codepoint reflows), or omitted when none. A family lookup carries all of the row's; a face lookup (`getRenderableFallbackForFace`) carries only that face's, so Cambria Regular shows none while Bold Italic shows its grave-accent exception. - -`cell_width_only` keeps monospace advances stable, but glyph shapes can still differ. A `substitute` can still have a lower-fidelity `verdict` when one face or glyph is qualified. The verdict is the fidelity signal. - -## Face-aware routing (Regular-only substitutes) - -Some substitutes provide only some faces - e.g. Baskerville Old Face -> Bacasime Antique is Regular-only. The family-level helpers above answer "which family", and every result carries `faces`, so a resolver must route per-face. The face-aware helpers do it for you: - -```ts -import { getRenderableFallbackForFace } from "@docfonts/fallbacks"; -const opts = { canRenderFamily: (family) => bundledFamilies.has(family) }; - -getRenderableFallbackForFace("Baskerville Old Face", "regular", opts)?.substituteFamily; // "Bacasime Antique" -getRenderableFallbackForFace("Baskerville Old Face", "bold", opts); // null (Regular-only) +map[normalizeFamilyName("Times New Roman")]; ``` -`getFallbackDecisionForFace(family, face, options)` reports the reason - `face_missing` when the substitute exists but lacks that face. A covered face carries its OWN verdict, not the family's worst-face rollup (e.g. `Cambria` regular is `metric_safe` even though the family rolls up to `visual_only`). +Some fallbacks are face-scoped. Use `getRenderableFallbackForFace`, or respect the returned `faces` field before applying a fallback to bold or italic text. -The full structured rows are exported as `SUBSTITUTION_EVIDENCE` for richer reporting (faces, per-face verdicts, glyph exceptions). +## Fidelity fields -## Local tools +- `verdict` - measured fidelity, such as `metric_safe`, `near_metric`, `cell_width_only`, or `visual_only`. +- `lineBreakSafe` - true when advances preserve line breaks. +- `glyphExceptions` - named glyphs that can reflow. +- `generic` - CSS generic family for last-resort fallback. +- `evidenceId` - stable id for the reviewed evidence row. -These maintainer tools use ignored `.cache` files and are not shipped in the package. - -`bun run acquire` downloads open-font candidates into `.cache/sources`. Sources come in two shapes: release archives (zip or tar.gz) and pinned source trees. Set `DOCFONTS_SOURCE_CACHE` to use another cache directory, or pass `--source google-fonts` to acquire one source. - -`bun run compare` checks a private reference font against acquired OTF/TTF candidates and prints a ranked Latin advance-width table. It writes no fonts, paths, or results to the tree. - -```sh -bun run --cwd packages/fallbacks compare -- \ - --reference /path/to/reference.ttf \ - --family "Bookman Old Style" \ - --source tex-gyre-bonum -``` - -- `--reference` (required) - path to the font to measure against. -- `--family` - a label shown in the report header. -- `--source` - restrict to one or more acquired source ids (repeat the flag or comma-separate). Defaults to every acquired source. -- `--model` - `latin` by default. Use `monospace` for mono references so matching cells report `cell_width_only`, not `metric_safe`. - -The comparison is a lead finder, not an automatic verdict. For proportional Latin fonts, tier, mean, and max use a text-carrying subset while outlier counts and worst glyphs still use the full Latin sample. +`SUBSTITUTION_EVIDENCE` exposes the full reviewed rows for richer reporting. ## Provenance -The data comes from reviewed docfonts evidence. Measurements are produced against licensed originals, but this package distributes no proprietary binaries or raw proprietary metrics. - -Built by the team behind SuperDoc. Standalone and neutral. +Measurements are produced against licensed originals. This package distributes no proprietary binaries, raw proprietary metrics, or font files. diff --git a/packages/fallbacks/package.json b/packages/fallbacks/package.json index f38938e..6b0fca2 100644 --- a/packages/fallbacks/package.json +++ b/packages/fallbacks/package.json @@ -36,8 +36,6 @@ }, "scripts": { "gen:data": "bun run scripts/generate-data.ts", - "acquire": "bun run scripts/acquire.ts", - "compare": "bun run scripts/compare.ts", "build": "tsc -p tsconfig.build.json", "prepack": "bun run build" }, diff --git a/packages/fallbacks/src/types.ts b/packages/fallbacks/src/types.ts index d2407af..aa1150d 100644 --- a/packages/fallbacks/src/types.ts +++ b/packages/fallbacks/src/types.ts @@ -33,7 +33,7 @@ export type FaceSlot = "regular" | "bold" | "italic" | "boldItalic"; */ export type CssGeneric = "serif" | "sans-serif" | "monospace"; -/** Advance-width divergence vs the proprietary oracle, as fractions (0 = identical advances). */ +/** Advance-width divergence vs the licensed reference font, as fractions (0 = identical advances). */ export interface AdvanceDelta { meanDelta: number; /** the worst-case delta, not the mean, is what gates line-break fidelity. */ diff --git a/tools/corpus/README.md b/tools/corpus/README.md new file mode 100644 index 0000000..8c9e4ff --- /dev/null +++ b/tools/corpus/README.md @@ -0,0 +1,38 @@ +# Corpus Tools + +Local tools for finding open-font fallback candidates. + +They download fonts into an ignored cache, compare a licensed local reference against that cache, and print ranked leads. They do not publish fallback decisions. + +## Commands + +```sh +bun run corpus:acquire +bun run corpus:compare -- --reference /path/to/reference.ttf --family "Verdana" +``` + +## Acquire + +```sh +bun run corpus:acquire -- --source google-fonts +``` + +Without `--source`, all configured sources are acquired. Use `DOCFONTS_SOURCE_CACHE` to choose a cache directory. The default is `.cache/corpus`. + +## Compare + +```sh +bun run corpus:compare -- \ + --reference /path/to/reference.ttf \ + --family "Lucida Console" \ + --source dejavu,noto-sans-mono \ + --model monospace +``` + +- `--reference` is required. +- `--family` is a report label. +- `--source` limits the acquired sources to compare. Without it, every acquired source is used. +- `--model latin` is the default. Proportional Latin ranking uses text-carrying codepoints for tier, mean, and max while still reporting full Latin outliers. +- `--model monospace` reports matching mono cells as `cell_width_only`, not `metric_safe`. + +Comparison output is a lead finder. A public fallback row still needs review, provenance, face-scope checks, and visual sanity. diff --git a/packages/fallbacks/acquire.test.ts b/tools/corpus/acquire.test.ts similarity index 98% rename from packages/fallbacks/acquire.test.ts rename to tools/corpus/acquire.test.ts index 288c1bd..5d47cec 100644 --- a/packages/fallbacks/acquire.test.ts +++ b/tools/corpus/acquire.test.ts @@ -5,7 +5,7 @@ import { collectGitHubTreeFonts, type GitHubTreeEntry, SOURCE_RELEASES, -} from "./scripts/acquire"; +} from "./acquire"; const joined = (...parts: string[]) => parts.join(""); @@ -213,10 +213,7 @@ describe("source acquisition catalog", () => { }); test("does not include private paths or measurement environment details", () => { - const script = readFileSync( - join(import.meta.dir, "scripts", "acquire.ts"), - "utf8", - ); + const script = readFileSync(join(import.meta.dir, "acquire.ts"), "utf8"); for (const needle of [ joined("/", "Users", "/"), joined("/", "Applications", "/"), diff --git a/packages/fallbacks/scripts/acquire.ts b/tools/corpus/acquire.ts similarity index 99% rename from packages/fallbacks/scripts/acquire.ts rename to tools/corpus/acquire.ts index 14c5412..a6ff5c5 100644 --- a/packages/fallbacks/scripts/acquire.ts +++ b/tools/corpus/acquire.ts @@ -551,8 +551,8 @@ interface GitHubTreeSnapshot extends BaseSnapshot { type SourceSnapshot = ArchiveSnapshot | GitHubTreeSnapshot; -const PKG_DIR = join(import.meta.dir, ".."); -const DEFAULT_CACHE_DIR = join(PKG_DIR, ".cache", "sources"); +const REPO_DIR = join(import.meta.dir, "..", ".."); +const DEFAULT_CACHE_DIR = join(REPO_DIR, ".cache", "corpus"); const FONT_EXTENSIONS = [".otf", ".ttf", ".otc", ".ttc", ".woff2", ".woff"]; const sha256 = (bytes: Uint8Array): string => @@ -920,7 +920,7 @@ async function main(): Promise { const outPath = join(cacheDir, "source-snapshot.json"); writeFileSync( outPath, - `${JSON.stringify({ generatedBy: "scripts/acquire.ts", snapshots }, null, 2)}\n`, + `${JSON.stringify({ generatedBy: "tools/corpus/acquire.ts", snapshots }, null, 2)}\n`, ); console.log(`wrote ${outPath}`); } diff --git a/packages/fallbacks/compare.test.ts b/tools/corpus/compare.test.ts similarity index 99% rename from packages/fallbacks/compare.test.ts rename to tools/corpus/compare.test.ts index e67c4c8..f6a6eb8 100644 --- a/packages/fallbacks/compare.test.ts +++ b/tools/corpus/compare.test.ts @@ -15,7 +15,7 @@ import { type SnapshotSource, sampleMetrics, scoreAdvances, -} from "./scripts/compare"; +} from "./compare"; // --- Synthetic SFNT builder ------------------------------------------------- // diff --git a/packages/fallbacks/scripts/compare.ts b/tools/corpus/compare.ts similarity index 98% rename from packages/fallbacks/scripts/compare.ts rename to tools/corpus/compare.ts index dd9bf7e..59a6c07 100644 --- a/packages/fallbacks/scripts/compare.ts +++ b/tools/corpus/compare.ts @@ -6,8 +6,8 @@ import { execFileSync } from "node:child_process"; import { existsSync, readFileSync } from "node:fs"; import { basename, join } from "node:path"; -const PKG_DIR = join(import.meta.dir, ".."); -const DEFAULT_CACHE_DIR = join(PKG_DIR, ".cache", "sources"); +const REPO_DIR = join(import.meta.dir, "..", ".."); +const DEFAULT_CACHE_DIR = join(REPO_DIR, ".cache", "corpus"); const SNAPSHOT_FILE = "source-snapshot.json"; const RAW_SFNT_EXTENSIONS = [".otf", ".ttf"]; @@ -583,12 +583,12 @@ function readArchiveMember( function loadSnapshot(cacheDir: string): SnapshotSource[] { if (!existsSync(cacheDir)) throw new Error( - `source cache not found at ${cacheDir}. Run \`bun run acquire\` first.`, + `source cache not found at ${cacheDir}. Run \`bun run corpus:acquire\` first.`, ); const snapshotPath = join(cacheDir, SNAPSHOT_FILE); if (!existsSync(snapshotPath)) throw new Error( - `${SNAPSHOT_FILE} not found in ${cacheDir}. Run \`bun run acquire\` first.`, + `${SNAPSHOT_FILE} not found in ${cacheDir}. Run \`bun run corpus:acquire\` first.`, ); const parsed = JSON.parse(readFileSync(snapshotPath, "utf8")) as { snapshots?: SnapshotSource[]; @@ -602,7 +602,7 @@ function loadSnapshot(cacheDir: string): SnapshotSource[] { /** * Collect the candidate fonts for one source from the cache. GitHub tree sources read each snapshot file * entry directly; archive sources list and extract font members from the cached release archive. Throws - * when an expected cache file is absent so the caller can point the user back at `bun run acquire`. + * when an expected cache file is absent so the caller can point the user back at `bun run corpus:acquire`. */ export function collectCandidates( source: SnapshotSource, @@ -616,7 +616,7 @@ export function collectCandidates( const filePath = join(cacheDir, entry.path); if (!existsSync(filePath)) throw new Error( - `candidate file missing for ${source.sourceId}: ${filePath}. Run \`bun run acquire\` first.`, + `candidate file missing for ${source.sourceId}: ${filePath}. Run \`bun run corpus:acquire\` first.`, ); return { file: entry.name, bytes: readFileSync(filePath) }; }); @@ -629,7 +629,7 @@ export function collectCandidates( ); if (!existsSync(archivePath)) throw new Error( - `candidate archive missing for ${source.sourceId}: ${archivePath}. Run \`bun run acquire\` first.`, + `candidate archive missing for ${source.sourceId}: ${archivePath}. Run \`bun run corpus:acquire\` first.`, ); const members = listFontMembers(archivePath, format); if (members.length === 0) diff --git a/tsconfig.json b/tsconfig.json index 2e8aaec..f82275f 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -13,6 +13,6 @@ "@docfonts/fallbacks": ["./packages/fallbacks/src/index.ts"] } }, - "include": ["packages/fallbacks/**/*.ts"], + "include": ["packages/fallbacks/**/*.ts", "tools/**/*.ts"], "exclude": ["node_modules", "dist", "**/dist"] } From 2a68da0425aa13cae8cbc1b3b5b54a41ca0175ce Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Mon, 8 Jun 2026 17:57:31 -0300 Subject: [PATCH 3/4] refactor: split corpus compare tool --- tools/corpus/compare.ts | 842 ++++-------------------------------- tools/corpus/src/cache.ts | 171 ++++++++ tools/corpus/src/font.ts | 203 +++++++++ tools/corpus/src/report.ts | 83 ++++ tools/corpus/src/samples.ts | 70 +++ tools/corpus/src/score.ts | 157 +++++++ tools/corpus/src/tiers.ts | 41 ++ 7 files changed, 809 insertions(+), 758 deletions(-) create mode 100644 tools/corpus/src/cache.ts create mode 100644 tools/corpus/src/font.ts create mode 100644 tools/corpus/src/report.ts create mode 100644 tools/corpus/src/samples.ts create mode 100644 tools/corpus/src/score.ts create mode 100644 tools/corpus/src/tiers.ts diff --git a/tools/corpus/compare.ts b/tools/corpus/compare.ts index 59a6c07..c5a4b30 100644 --- a/tools/corpus/compare.ts +++ b/tools/corpus/compare.ts @@ -1,656 +1,46 @@ /** - * Local maintainer tool: compare a private reference font with acquired open-font archives. + * Local maintainer tool: compare a licensed reference font with acquired open-font archives. * Reads ignored cache files, prints to stdout, and writes nothing to the tree. */ -import { execFileSync } from "node:child_process"; import { existsSync, readFileSync } from "node:fs"; import { basename, join } from "node:path"; +import { + archiveFormatOf, + collectCandidates, + loadSnapshot, + requireArchiveTool, + type SnapshotSource, +} from "./src/cache"; +import { parseFont, sampleMetrics } from "./src/font"; +import { renderReport } from "./src/report"; +import { LATIN_SAMPLE, LATIN_TEXT_SAMPLE } from "./src/samples"; +import { type CompareScore, scoreAdvances } from "./src/score"; +import type { CompareModel } from "./src/tiers"; + +export { + archiveFormatOf, + collectCandidates, + loadSnapshot, + requireArchiveTool, + type SnapshotSource, +} from "./src/cache"; +export { type FontMetrics, parseFont, sampleMetrics } from "./src/font"; +export { renderReport } from "./src/report"; +export { LATIN_SAMPLE, LATIN_TEXT_SAMPLE } from "./src/samples"; +export { + type CompareScore, + type GlyphDelta, + type ScoreOptions, + scoreAdvances, +} from "./src/score"; +export { + type CompareModel, + type CompareTier, + classifyTier, +} from "./src/tiers"; const REPO_DIR = join(import.meta.dir, "..", ".."); const DEFAULT_CACHE_DIR = join(REPO_DIR, ".cache", "corpus"); -const SNAPSHOT_FILE = "source-snapshot.json"; -const RAW_SFNT_EXTENSIONS = [".otf", ".ttf"]; - -// --- Latin sample ----------------------------------------------------------- - -/** Inclusive codepoint range helper for building the sample. */ -function codepointRange(start: number, end: number): number[] { - const out: number[] = []; - for (let cp = start; cp <= end; cp++) out.push(cp); - return out; -} - -/** - * Fixed Latin sample for advance comparison: every printable ASCII codepoint (U+0020 space through - * U+007E tilde), Latin-1 letters with diacritics, and common punctuation/symbols a document is likely - * to use. Named and tested so the metric is reproducible. Stored as numeric codepoints, sorted and - * unique. - */ -export const LATIN_SAMPLE: readonly number[] = (() => { - const latin1 = codepointRange(0x00a0, 0x00ff).filter((cp) => cp !== 0x00ad); - const generalPunctuation = [ - 0x2013, 0x2014, 0x2018, 0x2019, 0x201c, 0x201d, 0x2020, 0x2021, 0x2022, - 0x2026, 0x2030, 0x2039, 0x203a, 0x20ac, 0x2122, - ]; - const all = [...codepointRange(0x20, 0x7e), ...latin1, ...generalPunctuation]; - return [...new Set(all)].sort((a, b) => a - b); -})(); - -const TEXT_PUNCTUATION = new Set([ - 0x20, // space - 0x21, // ! - 0x22, // " - 0x23, // # - 0x26, // & - 0x27, // ' - 0x28, // ( - 0x29, // ) - 0x2c, // , - 0x2d, // - - 0x2e, // . - 0x2f, // / - 0x3a, // : - 0x3b, // ; - 0x3f, // ? - 0x40, // @ - 0x5b, // [ - 0x5d, // ] - 0x7b, // { - 0x7d, // } - 0x00a0, // no-break space - 0x2013, // en dash - 0x2014, // em dash - 0x2018, // left single quote - 0x2019, // right single quote - 0x201c, // left double quote - 0x201d, // right double quote - 0x2026, // ellipsis -]); - -const EXCLUDED_TEXT_LETTERS = new Set([ - 0x00b5, // micro sign: Unicode treats it as a letter, but it behaves like a symbol here. -]); - -function isTextLetterOrDigit(codepoint: number): boolean { - if (EXCLUDED_TEXT_LETTERS.has(codepoint)) return false; - return /^[\p{L}\p{N}]$/u.test(String.fromCodePoint(codepoint)); -} - -/** - * Text-carrying Latin sample used to rank proportional-font candidates. The full sample still reports - * outliers, but rare symbols should not hide a strong body-text lead. - */ -export const LATIN_TEXT_SAMPLE: readonly number[] = LATIN_SAMPLE.filter( - (cp) => TEXT_PUNCTUATION.has(cp) || isTextLetterOrDigit(cp), -); - -// --- Tiers ------------------------------------------------------------------ - -/** - * Advance-fidelity tier. Thresholds mirror the package's verdict language (see `src/types.ts`): - * metric_safe is the DIRECT band, near_metric the LIKELY band, everything else visual_only. - * cell_width_only is the monospace model's verdict for a matching cell: it proves line width, not - * glyph-shape fidelity. - */ -export type CompareTier = - | "metric_safe" - | "near_metric" - | "cell_width_only" - | "visual_only"; - -/** - * Classification model. `latin` is the default proportional comparison. `monospace` treats a matching - * advance as proof of cell width only, since every glyph in a monospace cell shares one advance. - */ -export type CompareModel = "latin" | "monospace"; - -const TIER_RANK: Record = { - metric_safe: 0, - near_metric: 1, - cell_width_only: 2, - visual_only: 3, -}; - -/** - * Classify a (mean, max) advance-delta pair into a fidelity tier. Deltas are fractions of the em. Under - * the monospace model a matching cell only vouches for line width, so the metric bands collapse to - * cell_width_only while non-matching candidates stay visual_only. - */ -export function classifyTier( - meanDelta: number, - maxDelta: number, - model: CompareModel = "latin", -): CompareTier { - let tier: CompareTier = "visual_only"; - if (meanDelta <= 0.005 && maxDelta <= 0.01) tier = "metric_safe"; - else if (meanDelta <= 0.01 && maxDelta <= 0.025) tier = "near_metric"; - if (model === "monospace" && tier !== "visual_only") return "cell_width_only"; - return tier; -} - -// --- SFNT parsing ----------------------------------------------------------- - -const REQUIRED_TABLES = ["head", "maxp", "hhea", "hmtx", "cmap"] as const; - -/** A parsed font's em size plus a normalized advance lookup over its Unicode `cmap`. */ -export interface FontMetrics { - unitsPerEm: number; - /** Advance width of a codepoint as a fraction of the em, or undefined when the font does not map it. */ - normalizedAdvance(codepoint: number): number | undefined; -} - -function tagAt(view: DataView, offset: number): string { - return String.fromCharCode( - view.getUint8(offset), - view.getUint8(offset + 1), - view.getUint8(offset + 2), - view.getUint8(offset + 3), - ); -} - -/** Resolve a codepoint to a glyph id within one `cmap` subtable, for the formats we support (4, 6, 12). */ -function makeCmapLookup( - view: DataView, - subOffset: number, -): (codepoint: number) => number | undefined { - const format = view.getUint16(subOffset); - - if (format === 4) { - const segX2 = view.getUint16(subOffset + 6); - const segCount = segX2 / 2; - const endOffset = subOffset + 14; - const startOffset = endOffset + segX2 + 2; // skip reservedPad - const deltaOffset = startOffset + segX2; - const rangeOffsetBase = deltaOffset + segX2; - return (cp) => { - if (cp > 0xffff) return undefined; - for (let i = 0; i < segCount; i++) { - const end = view.getUint16(endOffset + i * 2); - if (cp > end) continue; - const start = view.getUint16(startOffset + i * 2); - if (cp < start) return undefined; - const delta = view.getInt16(deltaOffset + i * 2); - const rangeOffset = view.getUint16(rangeOffsetBase + i * 2); - if (rangeOffset === 0) { - const gid = (cp + delta) & 0xffff; - return gid === 0 ? undefined : gid; - } - const glyphOffset = - rangeOffsetBase + i * 2 + rangeOffset + (cp - start) * 2; - const raw = view.getUint16(glyphOffset); - if (raw === 0) return undefined; - const gid = (raw + delta) & 0xffff; - return gid === 0 ? undefined : gid; - } - return undefined; - }; - } - - if (format === 6) { - const firstCode = view.getUint16(subOffset + 6); - const entryCount = view.getUint16(subOffset + 8); - return (cp) => { - if (cp < firstCode || cp >= firstCode + entryCount) return undefined; - const gid = view.getUint16(subOffset + 10 + (cp - firstCode) * 2); - return gid === 0 ? undefined : gid; - }; - } - - if (format === 12) { - const numGroups = view.getUint32(subOffset + 12); - const groupsOffset = subOffset + 16; - return (cp) => { - let lo = 0; - let hi = numGroups - 1; - while (lo <= hi) { - const mid = (lo + hi) >> 1; - const g = groupsOffset + mid * 12; - const start = view.getUint32(g); - const end = view.getUint32(g + 4); - if (cp < start) hi = mid - 1; - else if (cp > end) lo = mid + 1; - else { - const gid = view.getUint32(g + 8) + (cp - start); - return gid === 0 ? undefined : gid; - } - } - return undefined; - }; - } - - throw new Error(`unsupported cmap subtable format: ${format}`); -} - -/** Pick the best Unicode `cmap` subtable and return its glyph lookup. */ -function readCmap( - view: DataView, - cmapOffset: number, -): (codepoint: number) => number | undefined { - const numSubtables = view.getUint16(cmapOffset + 2); - const candidates: { score: number; offset: number }[] = []; - for (let i = 0; i < numSubtables; i++) { - const recordOffset = cmapOffset + 4 + i * 8; - const platformId = view.getUint16(recordOffset); - const encodingId = view.getUint16(recordOffset + 2); - const score = cmapPreference(platformId, encodingId); - // Skip non-Unicode subtables (Macintosh, Windows symbol, ...): their codepoints are not Unicode, - // so reading Latin advances through them would be wrong. We never fall back to one. - if (score === null) continue; - candidates.push({ - score, - offset: cmapOffset + view.getUint32(recordOffset + 4), - }); - } - candidates.sort((a, b) => b.score - a.score); - - for (const candidate of candidates) { - const format = view.getUint16(candidate.offset); - if (format === 4 || format === 6 || format === 12) - return makeCmapLookup(view, candidate.offset); - } - throw new Error("unsupported font: no readable Unicode cmap subtable"); -} - -/** Rank Unicode `cmap` subtables (full Unicode first, then BMP); null for non-Unicode subtables. */ -function cmapPreference(platformId: number, encodingId: number): number | null { - if (platformId === 3 && encodingId === 10) return 4; // Windows Unicode UCS-4 - if (platformId === 0 && (encodingId === 4 || encodingId === 6)) return 3; // Unicode full - if (platformId === 3 && encodingId === 1) return 2; // Windows Unicode BMP - if (platformId === 0) return 1; // Unicode BMP and earlier - return null; // Macintosh, Windows symbol, and anything else: not a Unicode cmap -} - -/** - * Parse just enough of an SFNT font (TrueType or CFF/OTF) to read normalized advance widths by - * codepoint. Throws an explicit error when the container is a collection or a required table is missing. - */ -export function parseFont(bytes: Uint8Array): FontMetrics { - const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); - if (bytes.byteLength < 12) - throw new Error("unsupported font: file is too small to be an SFNT"); - - const sfntVersion = view.getUint32(0); - if (sfntVersion === 0x74746366) - throw new Error("unsupported font: TrueType/OpenType collections (ttcf)"); - const isSfnt = - sfntVersion === 0x00010000 || // TrueType outlines - sfntVersion === 0x4f54544f || // 'OTTO' - CFF outlines - sfntVersion === 0x74727565; // 'true' - if (!isSfnt) - throw new Error( - `unsupported font: not an SFNT (sfntVersion 0x${sfntVersion.toString(16)})`, - ); - - const numTables = view.getUint16(4); - const tables = new Map(); - for (let i = 0; i < numTables; i++) { - const recordOffset = 12 + i * 16; - tables.set(tagAt(view, recordOffset), view.getUint32(recordOffset + 8)); - } - - const missing = REQUIRED_TABLES.filter((tag) => !tables.has(tag)); - if (missing.length > 0) - throw new Error( - `unsupported font: missing required table(s): ${missing.join(", ")}`, - ); - - const headOffset = tables.get("head") as number; - const unitsPerEm = view.getUint16(headOffset + 18); - if (unitsPerEm === 0) - throw new Error("unsupported font: head.unitsPerEm is zero"); - - const numberOfHMetrics = view.getUint16((tables.get("hhea") as number) + 34); - if (numberOfHMetrics === 0) - throw new Error("unsupported font: hhea.numberOfHMetrics is zero"); - - const hmtxOffset = tables.get("hmtx") as number; - const advanceOfGlyph = (glyphId: number): number => { - const index = glyphId < numberOfHMetrics ? glyphId : numberOfHMetrics - 1; - return view.getUint16(hmtxOffset + index * 4); - }; - - const lookup = readCmap(view, tables.get("cmap") as number); - - return { - unitsPerEm, - normalizedAdvance(codepoint: number): number | undefined { - const glyphId = lookup(codepoint); - if (glyphId === undefined) return undefined; - return advanceOfGlyph(glyphId) / unitsPerEm; - }, - }; -} - -// --- Scoring ---------------------------------------------------------------- - -/** One codepoint whose advance diverges, for the "worst glyphs" column. */ -export interface GlyphDelta { - codepoint: number; - delta: number; -} - -/** The advance-parity score of one candidate font against the reference, over a fixed sample. */ -export interface CompareScore { - /** codepoints in the tier sample that both fonts map. */ - compared: number; - /** tier sample size. */ - total: number; - /** tier sample codepoints not mapped by both fonts. */ - missing: number; - meanDelta: number; - maxDelta: number; - /** shared report-sample codepoints whose advance delta exceeds the metric_safe max threshold. */ - over1Percent: number; - /** shared report-sample codepoints whose advance delta exceeds the near_metric max threshold. */ - over2_5Percent: number; - tier: CompareTier; - worstGlyphs: GlyphDelta[]; -} - -export interface ScoreOptions { - /** Sample used for outlier reporting and worst-glyph display. */ - reportSample?: readonly number[]; - /** Sample used for tier classification and mean/max columns. Defaults to `reportSample`. */ - tierSample?: readonly number[]; - worstCount?: number; - model?: CompareModel; -} - -interface MeasuredDeltas { - compared: number; - total: number; - missing: number; - meanDelta: number; - maxDelta: number; - over1Percent: number; - over2_5Percent: number; - worstGlyphs: GlyphDelta[]; -} - -function measureDeltas( - reference: ReadonlyMap, - candidate: ReadonlyMap, - sample: readonly number[], - worstCount: number, -): MeasuredDeltas { - const deltas: GlyphDelta[] = []; - let sum = 0; - let max = 0; - let over1Percent = 0; - let over2_5Percent = 0; - for (const cp of sample) { - const a = reference.get(cp); - const b = candidate.get(cp); - if (a === undefined || b === undefined) continue; - const delta = Math.abs(a - b); - deltas.push({ codepoint: cp, delta }); - sum += delta; - if (delta > max) max = delta; - if (delta > 0.01) over1Percent++; - if (delta > 0.025) over2_5Percent++; - } - - const compared = deltas.length; - const meanDelta = compared === 0 ? Number.NaN : sum / compared; - const maxDelta = compared === 0 ? Number.NaN : max; - const worstGlyphs = [...deltas] - .sort((x, y) => y.delta - x.delta) - .slice(0, worstCount) - .filter((g) => g.delta > 0); - - return { - compared, - total: sample.length, - missing: sample.length - compared, - meanDelta, - maxDelta, - over1Percent, - over2_5Percent, - worstGlyphs, - }; -} - -function normalizeScoreOptions( - optionsOrSample: ScoreOptions | readonly number[] | undefined, - worstCount: number | undefined, - model: CompareModel | undefined, -): Required { - if (!optionsOrSample || Array.isArray(optionsOrSample)) { - const reportSample = optionsOrSample ?? LATIN_SAMPLE; - return { - reportSample, - tierSample: reportSample, - worstCount: worstCount ?? 3, - model: model ?? "latin", - }; - } - - const options = optionsOrSample as ScoreOptions; - const reportSample = options.reportSample ?? LATIN_SAMPLE; - return { - reportSample, - tierSample: options.tierSample ?? reportSample, - worstCount: options.worstCount ?? 3, - model: options.model ?? "latin", - }; -} - -/** - * Score one candidate against the reference. The tier can use a narrower text sample while the report - * still surfaces full-sample outliers. Both inputs are normalized advance maps (codepoint -> - * advance/unitsPerEm); only codepoints present in both are compared. - */ -export function scoreAdvances( - reference: ReadonlyMap, - candidate: ReadonlyMap, - optionsOrSample?: ScoreOptions | readonly number[], - worstCount?: number, - model?: CompareModel, -): CompareScore { - const options = normalizeScoreOptions(optionsOrSample, worstCount, model); - const report = measureDeltas( - reference, - candidate, - options.reportSample, - options.worstCount, - ); - const tierMetrics = - options.tierSample === options.reportSample - ? report - : measureDeltas(reference, candidate, options.tierSample, 0); - return { - compared: tierMetrics.compared, - total: tierMetrics.total, - missing: tierMetrics.missing, - meanDelta: tierMetrics.meanDelta, - maxDelta: tierMetrics.maxDelta, - over1Percent: report.over1Percent, - over2_5Percent: report.over2_5Percent, - tier: - tierMetrics.compared === 0 - ? "visual_only" - : classifyTier( - tierMetrics.meanDelta, - tierMetrics.maxDelta, - options.model, - ), - worstGlyphs: report.worstGlyphs, - }; -} - -/** Build a font's normalized-advance map over the sample (only codepoints it maps are included). */ -export function sampleMetrics( - font: FontMetrics, - sample: readonly number[] = LATIN_SAMPLE, -): Map { - const map = new Map(); - for (const cp of sample) { - const advance = font.normalizedAdvance(cp); - if (advance !== undefined) map.set(cp, advance); - } - return map; -} - -// --- Source cache + candidates --------------------------------------------- - -/** One snapshot file entry: a font member by path, with its display name. */ -interface SnapshotFile { - name: string; - path: string; -} - -type ArchiveFormat = "zip" | "tar.gz"; - -/** - * A source as recorded in `source-snapshot.json`. Archive sources extract their candidate fonts from a - * cached release archive; GitHub tree sources read each `files[].path` directly from the cache. `kind` is - * optional so older snapshots (archive-only) still load and default to archive behavior. - */ -export interface SnapshotSource { - sourceId: string; - family: string; - targetFamilies: string[]; - kind?: "archive" | "github-tree"; - archiveFormat?: ArchiveFormat; - files?: SnapshotFile[]; -} - -/** A candidate font ready to score: its display name and raw bytes. */ -export interface CandidateFile { - file: string; - bytes: Uint8Array; -} - -const archiveFormatOf = (source: SnapshotSource): ArchiveFormat => - source.archiveFormat ?? "zip"; - -const archiveExtensions: Record = { - zip: "zip", - "tar.gz": "tar.gz", -}; - -function requireArchiveTool(format: ArchiveFormat): void { - const tool = format === "tar.gz" ? "tar" : "unzip"; - const probe = format === "tar.gz" ? "--version" : "-v"; - try { - execFileSync(tool, [probe], { stdio: "ignore" }); - } catch { - throw new Error(`\`${tool}\` is required on PATH.`); - } -} - -function isFontFile(path: string): boolean { - return RAW_SFNT_EXTENSIONS.some((ext) => path.toLowerCase().endsWith(ext)); -} - -/** Font members inside a source archive, by their in-archive path. */ -function listFontMembers(archivePath: string, format: ArchiveFormat): string[] { - const out = - format === "tar.gz" - ? execFileSync("tar", ["-tzf", archivePath], { encoding: "utf8" }) - : execFileSync("unzip", ["-Z1", archivePath], { encoding: "utf8" }); - return out - .split("\n") - .map((line) => line.trim()) - .filter(Boolean) - .filter(isFontFile); -} - -// `unzip -p` matches its member argument as a glob, so members with literal glob -// metacharacters (e.g. variable-font names like `NotoSans-Italic[wdth,wght].ttf`) -// must be escaped to extract by exact name. -const escapeArchiveMember = (name: string): string => - name.replace(/[\\*?[\]]/g, "\\$&"); - -function readArchiveMember( - archivePath: string, - member: string, - format: ArchiveFormat, -): Uint8Array { - const opts = { maxBuffer: 256 * 1024 * 1024 }; - return new Uint8Array( - format === "tar.gz" - ? execFileSync("tar", ["-xzOf", archivePath, "--", member], opts) - : execFileSync( - "unzip", - ["-p", archivePath, escapeArchiveMember(member)], - opts, - ), - ); -} - -/** Load the acquire snapshot, failing explicitly when the cache or snapshot is absent. */ -function loadSnapshot(cacheDir: string): SnapshotSource[] { - if (!existsSync(cacheDir)) - throw new Error( - `source cache not found at ${cacheDir}. Run \`bun run corpus:acquire\` first.`, - ); - const snapshotPath = join(cacheDir, SNAPSHOT_FILE); - if (!existsSync(snapshotPath)) - throw new Error( - `${SNAPSHOT_FILE} not found in ${cacheDir}. Run \`bun run corpus:acquire\` first.`, - ); - const parsed = JSON.parse(readFileSync(snapshotPath, "utf8")) as { - snapshots?: SnapshotSource[]; - }; - const snapshots = parsed.snapshots ?? []; - if (snapshots.length === 0) - throw new Error(`${SNAPSHOT_FILE} lists no acquired sources.`); - return snapshots; -} - -/** - * Collect the candidate fonts for one source from the cache. GitHub tree sources read each snapshot file - * entry directly; archive sources list and extract font members from the cached release archive. Throws - * when an expected cache file is absent so the caller can point the user back at `bun run corpus:acquire`. - */ -export function collectCandidates( - source: SnapshotSource, - cacheDir: string, -): CandidateFile[] { - if (source.kind === "github-tree") { - const files = source.files ?? []; - if (files.length === 0) - throw new Error(`no candidate files listed for ${source.sourceId}`); - return files.map((entry) => { - const filePath = join(cacheDir, entry.path); - if (!existsSync(filePath)) - throw new Error( - `candidate file missing for ${source.sourceId}: ${filePath}. Run \`bun run corpus:acquire\` first.`, - ); - return { file: entry.name, bytes: readFileSync(filePath) }; - }); - } - - const format = archiveFormatOf(source); - const archivePath = join( - cacheDir, - `${source.sourceId}.${archiveExtensions[format]}`, - ); - if (!existsSync(archivePath)) - throw new Error( - `candidate archive missing for ${source.sourceId}: ${archivePath}. Run \`bun run corpus:acquire\` first.`, - ); - const members = listFontMembers(archivePath, format); - if (members.length === 0) - throw new Error(`no candidate font files in ${archivePath}`); - - const basenameCounts = new Map(); - for (const member of members) { - const file = basename(member); - basenameCounts.set(file, (basenameCounts.get(file) ?? 0) + 1); - } - const duplicateBasenames = new Set( - [...basenameCounts].filter(([, count]) => count > 1).map(([file]) => file), - ); - - return members.map((member) => ({ - file: displayNameForMember(member, duplicateBasenames), - bytes: readArchiveMember(archivePath, member, format), - })); -} - -// --- CLI -------------------------------------------------------------------- interface CompareRow { sourceId: string; @@ -721,87 +111,53 @@ export function parseArgs(argv: string[]): ParsedArgs { return args; } -function formatCodepoint(cp: number): string { - return `U+${cp.toString(16).toUpperCase().padStart(4, "0")}`; -} +function selectSources( + snapshot: SnapshotSource[], + requestedIds: string[], +): SnapshotSource[] { + if (requestedIds.length === 0) return snapshot; -function formatDelta(value: number): string { - return Number.isNaN(value) ? "n/a" : value.toFixed(4); -} - -function formatWorst(worst: GlyphDelta[]): string { - if (worst.length === 0) return "-"; - return worst - .map((g) => `${formatCodepoint(g.codepoint)} ${g.delta.toFixed(4)}`) - .join("; "); + const byId = new Map(snapshot.map((source) => [source.sourceId, source])); + const unknown = requestedIds.filter((id) => !byId.has(id)); + if (unknown.length > 0) + throw new Error( + `source(s) not in cache: ${unknown.join(", ")}. Acquired: ${[...byId.keys()].join(", ")}`, + ); + return requestedIds.map((id) => byId.get(id) as SnapshotSource); } -interface RenderOptions { - limit?: number | null; +function scoreSources( + reference: ReadonlyMap, + selected: SnapshotSource[], + cacheDir: string, + model: CompareModel, +): { rows: CompareRow[]; skipped: number } { + const rows: CompareRow[] = []; + let skipped = 0; + for (const source of selected) { + for (const candidate of collectCandidates(source, cacheDir)) { + try { + const font = parseFont(candidate.bytes); + const score = scoreAdvances(reference, sampleMetrics(font), { + reportSample: LATIN_SAMPLE, + tierSample: model === "latin" ? LATIN_TEXT_SAMPLE : LATIN_SAMPLE, + model, + }); + rows.push({ sourceId: source.sourceId, file: candidate.file, score }); + } catch { + skipped++; + } + } + } + return { rows, skipped }; } -/** Render the ranked table. Returned as a string so it can be tested without capturing stdout. */ -export function renderReport( - rows: CompareRow[], - options: RenderOptions = {}, -): string { - const ranked = [...rows].sort((a, b) => { - const tierDiff = TIER_RANK[a.score.tier] - TIER_RANK[b.score.tier]; - if (tierDiff !== 0) return tierDiff; - const aMean = Number.isNaN(a.score.meanDelta) - ? Infinity - : a.score.meanDelta; - const bMean = Number.isNaN(b.score.meanDelta) - ? Infinity - : b.score.meanDelta; - return aMean - bMean; - }); - - const visible = - options.limit === null ? ranked : ranked.slice(0, options.limit); - - const header = [ - "source", - "file", - "mean", - "max", - "tier", - "coverage", - "missing", - "over1", - "over2.5", - "worst", - ]; - const body = visible.map((row) => [ - row.sourceId, - row.file, - formatDelta(row.score.meanDelta), - formatDelta(row.score.maxDelta), - row.score.tier, - `${row.score.compared}/${row.score.total}`, - String(row.score.missing), - String(row.score.over1Percent), - String(row.score.over2_5Percent), - formatWorst(row.score.worstGlyphs), - ]); - - const widths = header.map((h, col) => - Math.max(h.length, ...body.map((r) => r[col].length)), +function requireArchiveTools(selected: SnapshotSource[]): void { + const archiveSources = selected.filter( + (source) => source.kind !== "github-tree", ); - const line = (cells: string[]) => - cells - .map((cell, col) => cell.padEnd(widths[col])) - .join(" ") - .trimEnd(); - return [line(header), ...body.map(line)].join("\n"); -} - -function displayNameForMember( - member: string, - duplicateBasenames: Set, -): string { - const file = basename(member); - return duplicateBasenames.has(file) ? member : file; + for (const format of new Set(archiveSources.map(archiveFormatOf))) + requireArchiveTool(format); } function main(): void { @@ -815,46 +171,16 @@ function main(): void { throw new Error(`reference font not found: ${args.reference}`); const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; - const snapshot = loadSnapshot(cacheDir); - const byId = new Map(snapshot.map((source) => [source.sourceId, source])); - - let selected: SnapshotSource[]; - if (args.sources.length > 0) { - const unknown = args.sources.filter((id) => !byId.has(id)); - if (unknown.length > 0) - throw new Error( - `source(s) not in cache: ${unknown.join(", ")}. Acquired: ${[...byId.keys()].join(", ")}`, - ); - selected = args.sources.map((id) => byId.get(id) as SnapshotSource); - } else { - selected = snapshot; - } - - const archiveSources = selected.filter( - (source) => source.kind !== "github-tree", - ); - for (const format of new Set(archiveSources.map(archiveFormatOf))) - requireArchiveTool(format); + const selected = selectSources(loadSnapshot(cacheDir), args.sources); + requireArchiveTools(selected); const reference = sampleMetrics(parseFont(readFileSync(args.reference))); - - const rows: CompareRow[] = []; - let skipped = 0; - for (const source of selected) { - for (const candidate of collectCandidates(source, cacheDir)) { - try { - const font = parseFont(candidate.bytes); - const score = scoreAdvances(reference, sampleMetrics(font), { - reportSample: LATIN_SAMPLE, - tierSample: args.model === "latin" ? LATIN_TEXT_SAMPLE : LATIN_SAMPLE, - model: args.model, - }); - rows.push({ sourceId: source.sourceId, file: candidate.file, score }); - } catch { - skipped++; - } - } - } + const { rows, skipped } = scoreSources( + reference, + selected, + cacheDir, + args.model, + ); const label = args.family ?? "(family not specified)"; const shown = diff --git a/tools/corpus/src/cache.ts b/tools/corpus/src/cache.ts new file mode 100644 index 0000000..053d2bc --- /dev/null +++ b/tools/corpus/src/cache.ts @@ -0,0 +1,171 @@ +import { execFileSync } from "node:child_process"; +import { existsSync, readFileSync } from "node:fs"; +import { basename, join } from "node:path"; + +const RAW_SFNT_EXTENSIONS = [".otf", ".ttf"]; +const SNAPSHOT_FILE = "source-snapshot.json"; + +/** One snapshot file entry: a font member by path, with its display name. */ +interface SnapshotFile { + name: string; + path: string; +} + +export type ArchiveFormat = "zip" | "tar.gz"; + +/** + * A source as recorded in `source-snapshot.json`. Archive sources extract their candidate fonts from a + * cached release archive; GitHub tree sources read each `files[].path` directly from the cache. `kind` is + * optional so older snapshots (archive-only) still load and default to archive behavior. + */ +export interface SnapshotSource { + sourceId: string; + family: string; + targetFamilies: string[]; + kind?: "archive" | "github-tree"; + archiveFormat?: ArchiveFormat; + files?: SnapshotFile[]; +} + +/** A candidate font ready to score: its display name and raw bytes. */ +export interface CandidateFile { + file: string; + bytes: Uint8Array; +} + +export const archiveFormatOf = (source: SnapshotSource): ArchiveFormat => + source.archiveFormat ?? "zip"; + +const archiveExtensions: Record = { + zip: "zip", + "tar.gz": "tar.gz", +}; + +export function requireArchiveTool(format: ArchiveFormat): void { + const tool = format === "tar.gz" ? "tar" : "unzip"; + const probe = format === "tar.gz" ? "--version" : "-v"; + try { + execFileSync(tool, [probe], { stdio: "ignore" }); + } catch { + throw new Error(`\`${tool}\` is required on PATH.`); + } +} + +function isFontFile(path: string): boolean { + return RAW_SFNT_EXTENSIONS.some((ext) => path.toLowerCase().endsWith(ext)); +} + +/** Font members inside a source archive, by their in-archive path. */ +function listFontMembers(archivePath: string, format: ArchiveFormat): string[] { + const out = + format === "tar.gz" + ? execFileSync("tar", ["-tzf", archivePath], { encoding: "utf8" }) + : execFileSync("unzip", ["-Z1", archivePath], { encoding: "utf8" }); + return out + .split("\n") + .map((line) => line.trim()) + .filter(Boolean) + .filter(isFontFile); +} + +// `unzip -p` matches its member argument as a glob, so members with literal glob +// metacharacters (e.g. variable-font names like `NotoSans-Italic[wdth,wght].ttf`) +// must be escaped to extract by exact name. +const escapeArchiveMember = (name: string): string => + name.replace(/[\\*?[\]]/g, "\\$&"); + +function readArchiveMember( + archivePath: string, + member: string, + format: ArchiveFormat, +): Uint8Array { + const opts = { maxBuffer: 256 * 1024 * 1024 }; + return new Uint8Array( + format === "tar.gz" + ? execFileSync("tar", ["-xzOf", archivePath, "--", member], opts) + : execFileSync( + "unzip", + ["-p", archivePath, escapeArchiveMember(member)], + opts, + ), + ); +} + +/** Load the acquire snapshot, failing explicitly when the cache or snapshot is absent. */ +export function loadSnapshot(cacheDir: string): SnapshotSource[] { + if (!existsSync(cacheDir)) + throw new Error( + `source cache not found at ${cacheDir}. Run \`bun run corpus:acquire\` first.`, + ); + const snapshotPath = join(cacheDir, SNAPSHOT_FILE); + if (!existsSync(snapshotPath)) + throw new Error( + `${SNAPSHOT_FILE} not found in ${cacheDir}. Run \`bun run corpus:acquire\` first.`, + ); + const parsed = JSON.parse(readFileSync(snapshotPath, "utf8")) as { + snapshots?: SnapshotSource[]; + }; + const snapshots = parsed.snapshots ?? []; + if (snapshots.length === 0) + throw new Error(`${SNAPSHOT_FILE} lists no acquired sources.`); + return snapshots; +} + +/** + * Collect the candidate fonts for one source from the cache. GitHub tree sources read each snapshot file + * entry directly; archive sources list and extract font members from the cached release archive. Throws + * when an expected cache file is absent so the caller can point the user back at `bun run corpus:acquire`. + */ +export function collectCandidates( + source: SnapshotSource, + cacheDir: string, +): CandidateFile[] { + if (source.kind === "github-tree") { + const files = source.files ?? []; + if (files.length === 0) + throw new Error(`no candidate files listed for ${source.sourceId}`); + return files.map((entry) => { + const filePath = join(cacheDir, entry.path); + if (!existsSync(filePath)) + throw new Error( + `candidate file missing for ${source.sourceId}: ${filePath}. Run \`bun run corpus:acquire\` first.`, + ); + return { file: entry.name, bytes: readFileSync(filePath) }; + }); + } + + const format = archiveFormatOf(source); + const archivePath = join( + cacheDir, + `${source.sourceId}.${archiveExtensions[format]}`, + ); + if (!existsSync(archivePath)) + throw new Error( + `candidate archive missing for ${source.sourceId}: ${archivePath}. Run \`bun run corpus:acquire\` first.`, + ); + const members = listFontMembers(archivePath, format); + if (members.length === 0) + throw new Error(`no candidate font files in ${archivePath}`); + + const basenameCounts = new Map(); + for (const member of members) { + const file = basename(member); + basenameCounts.set(file, (basenameCounts.get(file) ?? 0) + 1); + } + const duplicateBasenames = new Set( + [...basenameCounts].filter(([, count]) => count > 1).map(([file]) => file), + ); + + return members.map((member) => ({ + file: displayNameForMember(member, duplicateBasenames), + bytes: readArchiveMember(archivePath, member, format), + })); +} + +function displayNameForMember( + member: string, + duplicateBasenames: Set, +): string { + const file = basename(member); + return duplicateBasenames.has(file) ? member : file; +} diff --git a/tools/corpus/src/font.ts b/tools/corpus/src/font.ts new file mode 100644 index 0000000..05da3d0 --- /dev/null +++ b/tools/corpus/src/font.ts @@ -0,0 +1,203 @@ +import { LATIN_SAMPLE } from "./samples"; + +const REQUIRED_TABLES = ["head", "maxp", "hhea", "hmtx", "cmap"] as const; + +/** A parsed font's em size plus a normalized advance lookup over its Unicode `cmap`. */ +export interface FontMetrics { + unitsPerEm: number; + /** Advance width of a codepoint as a fraction of the em, or undefined when the font does not map it. */ + normalizedAdvance(codepoint: number): number | undefined; +} + +function tagAt(view: DataView, offset: number): string { + return String.fromCharCode( + view.getUint8(offset), + view.getUint8(offset + 1), + view.getUint8(offset + 2), + view.getUint8(offset + 3), + ); +} + +/** Resolve a codepoint to a glyph id within one `cmap` subtable, for the formats we support (4, 6, 12). */ +function makeCmapLookup( + view: DataView, + subOffset: number, +): (codepoint: number) => number | undefined { + const format = view.getUint16(subOffset); + + if (format === 4) { + const segX2 = view.getUint16(subOffset + 6); + const segCount = segX2 / 2; + const endOffset = subOffset + 14; + const startOffset = endOffset + segX2 + 2; // skip reservedPad + const deltaOffset = startOffset + segX2; + const rangeOffsetBase = deltaOffset + segX2; + return (cp) => { + if (cp > 0xffff) return undefined; + for (let i = 0; i < segCount; i++) { + const end = view.getUint16(endOffset + i * 2); + if (cp > end) continue; + const start = view.getUint16(startOffset + i * 2); + if (cp < start) return undefined; + const delta = view.getInt16(deltaOffset + i * 2); + const rangeOffset = view.getUint16(rangeOffsetBase + i * 2); + if (rangeOffset === 0) { + const gid = (cp + delta) & 0xffff; + return gid === 0 ? undefined : gid; + } + const glyphOffset = + rangeOffsetBase + i * 2 + rangeOffset + (cp - start) * 2; + const raw = view.getUint16(glyphOffset); + if (raw === 0) return undefined; + const gid = (raw + delta) & 0xffff; + return gid === 0 ? undefined : gid; + } + return undefined; + }; + } + + if (format === 6) { + const firstCode = view.getUint16(subOffset + 6); + const entryCount = view.getUint16(subOffset + 8); + return (cp) => { + if (cp < firstCode || cp >= firstCode + entryCount) return undefined; + const gid = view.getUint16(subOffset + 10 + (cp - firstCode) * 2); + return gid === 0 ? undefined : gid; + }; + } + + if (format === 12) { + const numGroups = view.getUint32(subOffset + 12); + const groupsOffset = subOffset + 16; + return (cp) => { + let lo = 0; + let hi = numGroups - 1; + while (lo <= hi) { + const mid = (lo + hi) >> 1; + const g = groupsOffset + mid * 12; + const start = view.getUint32(g); + const end = view.getUint32(g + 4); + if (cp < start) hi = mid - 1; + else if (cp > end) lo = mid + 1; + else { + const gid = view.getUint32(g + 8) + (cp - start); + return gid === 0 ? undefined : gid; + } + } + return undefined; + }; + } + + throw new Error(`unsupported cmap subtable format: ${format}`); +} + +/** Pick the best Unicode `cmap` subtable and return its glyph lookup. */ +function readCmap( + view: DataView, + cmapOffset: number, +): (codepoint: number) => number | undefined { + const numSubtables = view.getUint16(cmapOffset + 2); + const candidates: { score: number; offset: number }[] = []; + for (let i = 0; i < numSubtables; i++) { + const recordOffset = cmapOffset + 4 + i * 8; + const platformId = view.getUint16(recordOffset); + const encodingId = view.getUint16(recordOffset + 2); + const score = cmapPreference(platformId, encodingId); + if (score === null) continue; + candidates.push({ + score, + offset: cmapOffset + view.getUint32(recordOffset + 4), + }); + } + candidates.sort((a, b) => b.score - a.score); + + for (const candidate of candidates) { + const format = view.getUint16(candidate.offset); + if (format === 4 || format === 6 || format === 12) + return makeCmapLookup(view, candidate.offset); + } + throw new Error("unsupported font: no readable Unicode cmap subtable"); +} + +/** Rank Unicode `cmap` subtables (full Unicode first, then BMP); null for non-Unicode subtables. */ +function cmapPreference(platformId: number, encodingId: number): number | null { + if (platformId === 3 && encodingId === 10) return 4; // Windows Unicode UCS-4 + if (platformId === 0 && (encodingId === 4 || encodingId === 6)) return 3; // Unicode full + if (platformId === 3 && encodingId === 1) return 2; // Windows Unicode BMP + if (platformId === 0) return 1; // Unicode BMP and earlier + return null; // Macintosh, Windows symbol, and anything else: not a Unicode cmap +} + +/** + * Parse just enough of an SFNT font (TrueType or CFF/OTF) to read normalized advance widths by + * codepoint. Throws an explicit error when the container is a collection or a required table is missing. + */ +export function parseFont(bytes: Uint8Array): FontMetrics { + const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + if (bytes.byteLength < 12) + throw new Error("unsupported font: file is too small to be an SFNT"); + + const sfntVersion = view.getUint32(0); + if (sfntVersion === 0x74746366) + throw new Error("unsupported font: TrueType/OpenType collections (ttcf)"); + const isSfnt = + sfntVersion === 0x00010000 || + sfntVersion === 0x4f54544f || + sfntVersion === 0x74727565; + if (!isSfnt) + throw new Error( + `unsupported font: not an SFNT (sfntVersion 0x${sfntVersion.toString(16)})`, + ); + + const numTables = view.getUint16(4); + const tables = new Map(); + for (let i = 0; i < numTables; i++) { + const recordOffset = 12 + i * 16; + tables.set(tagAt(view, recordOffset), view.getUint32(recordOffset + 8)); + } + + const missing = REQUIRED_TABLES.filter((tag) => !tables.has(tag)); + if (missing.length > 0) + throw new Error( + `unsupported font: missing required table(s): ${missing.join(", ")}`, + ); + + const headOffset = tables.get("head") as number; + const unitsPerEm = view.getUint16(headOffset + 18); + if (unitsPerEm === 0) + throw new Error("unsupported font: head.unitsPerEm is zero"); + + const numberOfHMetrics = view.getUint16((tables.get("hhea") as number) + 34); + if (numberOfHMetrics === 0) + throw new Error("unsupported font: hhea.numberOfHMetrics is zero"); + + const hmtxOffset = tables.get("hmtx") as number; + const advanceOfGlyph = (glyphId: number): number => { + const index = glyphId < numberOfHMetrics ? glyphId : numberOfHMetrics - 1; + return view.getUint16(hmtxOffset + index * 4); + }; + + const lookup = readCmap(view, tables.get("cmap") as number); + + return { + unitsPerEm, + normalizedAdvance(codepoint: number): number | undefined { + const glyphId = lookup(codepoint); + if (glyphId === undefined) return undefined; + return advanceOfGlyph(glyphId) / unitsPerEm; + }, + }; +} + +/** Build a font's normalized-advance map over the sample (only codepoints it maps are included). */ +export function sampleMetrics( + font: FontMetrics, + sample: readonly number[] = LATIN_SAMPLE, +): Map { + const map = new Map(); + for (const cp of sample) { + const advance = font.normalizedAdvance(cp); + if (advance !== undefined) map.set(cp, advance); + } + return map; +} diff --git a/tools/corpus/src/report.ts b/tools/corpus/src/report.ts new file mode 100644 index 0000000..6fa75c8 --- /dev/null +++ b/tools/corpus/src/report.ts @@ -0,0 +1,83 @@ +import type { CompareScore, GlyphDelta } from "./score"; +import { TIER_RANK } from "./tiers"; + +interface CompareRow { + sourceId: string; + file: string; + score: CompareScore; +} + +interface RenderOptions { + limit?: number | null; +} + +function formatCodepoint(cp: number): string { + return `U+${cp.toString(16).toUpperCase().padStart(4, "0")}`; +} + +function formatDelta(value: number): string { + return Number.isNaN(value) ? "n/a" : value.toFixed(4); +} + +function formatWorst(worst: GlyphDelta[]): string { + if (worst.length === 0) return "-"; + return worst + .map((g) => `${formatCodepoint(g.codepoint)} ${g.delta.toFixed(4)}`) + .join("; "); +} + +/** Render the ranked table. Returned as a string so it can be tested without capturing stdout. */ +export function renderReport( + rows: CompareRow[], + options: RenderOptions = {}, +): string { + const ranked = [...rows].sort((a, b) => { + const tierDiff = TIER_RANK[a.score.tier] - TIER_RANK[b.score.tier]; + if (tierDiff !== 0) return tierDiff; + const aMean = Number.isNaN(a.score.meanDelta) + ? Infinity + : a.score.meanDelta; + const bMean = Number.isNaN(b.score.meanDelta) + ? Infinity + : b.score.meanDelta; + return aMean - bMean; + }); + + const visible = + options.limit === null ? ranked : ranked.slice(0, options.limit); + + const header = [ + "source", + "file", + "mean", + "max", + "tier", + "coverage", + "missing", + "over1", + "over2.5", + "worst", + ]; + const body = visible.map((row) => [ + row.sourceId, + row.file, + formatDelta(row.score.meanDelta), + formatDelta(row.score.maxDelta), + row.score.tier, + `${row.score.compared}/${row.score.total}`, + String(row.score.missing), + String(row.score.over1Percent), + String(row.score.over2_5Percent), + formatWorst(row.score.worstGlyphs), + ]); + + const widths = header.map((h, col) => + Math.max(h.length, ...body.map((r) => r[col].length)), + ); + const line = (cells: string[]) => + cells + .map((cell, col) => cell.padEnd(widths[col])) + .join(" ") + .trimEnd(); + return [line(header), ...body.map(line)].join("\n"); +} diff --git a/tools/corpus/src/samples.ts b/tools/corpus/src/samples.ts new file mode 100644 index 0000000..bc1418e --- /dev/null +++ b/tools/corpus/src/samples.ts @@ -0,0 +1,70 @@ +/** Inclusive codepoint range helper for building the sample. */ +function codepointRange(start: number, end: number): number[] { + const out: number[] = []; + for (let cp = start; cp <= end; cp++) out.push(cp); + return out; +} + +/** + * Fixed Latin sample for advance comparison: every printable ASCII codepoint (U+0020 space through + * U+007E tilde), Latin-1 letters with diacritics, and common punctuation/symbols a document is likely + * to use. Named and tested so the metric is reproducible. Stored as numeric codepoints, sorted and + * unique. + */ +export const LATIN_SAMPLE: readonly number[] = (() => { + const latin1 = codepointRange(0x00a0, 0x00ff).filter((cp) => cp !== 0x00ad); + const generalPunctuation = [ + 0x2013, 0x2014, 0x2018, 0x2019, 0x201c, 0x201d, 0x2020, 0x2021, 0x2022, + 0x2026, 0x2030, 0x2039, 0x203a, 0x20ac, 0x2122, + ]; + const all = [...codepointRange(0x20, 0x7e), ...latin1, ...generalPunctuation]; + return [...new Set(all)].sort((a, b) => a - b); +})(); + +const TEXT_PUNCTUATION = new Set([ + 0x20, // space + 0x21, // ! + 0x22, // " + 0x23, // # + 0x26, // & + 0x27, // ' + 0x28, // ( + 0x29, // ) + 0x2c, // , + 0x2d, // - + 0x2e, // . + 0x2f, // / + 0x3a, // : + 0x3b, // ; + 0x3f, // ? + 0x40, // @ + 0x5b, // [ + 0x5d, // ] + 0x7b, // { + 0x7d, // } + 0x00a0, // no-break space + 0x2013, // en dash + 0x2014, // em dash codepoint + 0x2018, // left single quote + 0x2019, // right single quote + 0x201c, // left double quote + 0x201d, // right double quote + 0x2026, // ellipsis +]); + +const EXCLUDED_TEXT_LETTERS = new Set([ + 0x00b5, // micro sign: Unicode treats it as a letter, but it behaves like a symbol here. +]); + +function isTextLetterOrDigit(codepoint: number): boolean { + if (EXCLUDED_TEXT_LETTERS.has(codepoint)) return false; + return /^[\p{L}\p{N}]$/u.test(String.fromCodePoint(codepoint)); +} + +/** + * Text-carrying Latin sample used to rank proportional-font candidates. The full sample still reports + * outliers, but rare symbols should not hide a strong body-text lead. + */ +export const LATIN_TEXT_SAMPLE: readonly number[] = LATIN_SAMPLE.filter( + (cp) => TEXT_PUNCTUATION.has(cp) || isTextLetterOrDigit(cp), +); diff --git a/tools/corpus/src/score.ts b/tools/corpus/src/score.ts new file mode 100644 index 0000000..48790f6 --- /dev/null +++ b/tools/corpus/src/score.ts @@ -0,0 +1,157 @@ +import { LATIN_SAMPLE } from "./samples"; +import { type CompareModel, type CompareTier, classifyTier } from "./tiers"; + +/** One codepoint whose advance diverges, for the "worst glyphs" column. */ +export interface GlyphDelta { + codepoint: number; + delta: number; +} + +/** The advance-parity score of one candidate font against the reference, over a fixed sample. */ +export interface CompareScore { + /** codepoints in the tier sample that both fonts map. */ + compared: number; + /** tier sample size. */ + total: number; + /** tier sample codepoints not mapped by both fonts. */ + missing: number; + meanDelta: number; + maxDelta: number; + /** shared report-sample codepoints whose advance delta exceeds the metric_safe max threshold. */ + over1Percent: number; + /** shared report-sample codepoints whose advance delta exceeds the near_metric max threshold. */ + over2_5Percent: number; + tier: CompareTier; + worstGlyphs: GlyphDelta[]; +} + +export interface ScoreOptions { + /** Sample used for outlier reporting and worst-glyph display. */ + reportSample?: readonly number[]; + /** Sample used for tier classification and mean/max columns. Defaults to `reportSample`. */ + tierSample?: readonly number[]; + worstCount?: number; + model?: CompareModel; +} + +interface MeasuredDeltas { + compared: number; + total: number; + missing: number; + meanDelta: number; + maxDelta: number; + over1Percent: number; + over2_5Percent: number; + worstGlyphs: GlyphDelta[]; +} + +function measureDeltas( + reference: ReadonlyMap, + candidate: ReadonlyMap, + sample: readonly number[], + worstCount: number, +): MeasuredDeltas { + const deltas: GlyphDelta[] = []; + let sum = 0; + let max = 0; + let over1Percent = 0; + let over2_5Percent = 0; + for (const cp of sample) { + const a = reference.get(cp); + const b = candidate.get(cp); + if (a === undefined || b === undefined) continue; + const delta = Math.abs(a - b); + deltas.push({ codepoint: cp, delta }); + sum += delta; + if (delta > max) max = delta; + if (delta > 0.01) over1Percent++; + if (delta > 0.025) over2_5Percent++; + } + + const compared = deltas.length; + const meanDelta = compared === 0 ? Number.NaN : sum / compared; + const maxDelta = compared === 0 ? Number.NaN : max; + const worstGlyphs = [...deltas] + .sort((x, y) => y.delta - x.delta) + .slice(0, worstCount) + .filter((g) => g.delta > 0); + + return { + compared, + total: sample.length, + missing: sample.length - compared, + meanDelta, + maxDelta, + over1Percent, + over2_5Percent, + worstGlyphs, + }; +} + +function normalizeScoreOptions( + optionsOrSample: ScoreOptions | readonly number[] | undefined, + worstCount: number | undefined, + model: CompareModel | undefined, +): Required { + if (!optionsOrSample || Array.isArray(optionsOrSample)) { + const reportSample = optionsOrSample ?? LATIN_SAMPLE; + return { + reportSample, + tierSample: reportSample, + worstCount: worstCount ?? 3, + model: model ?? "latin", + }; + } + + const options = optionsOrSample as ScoreOptions; + const reportSample = options.reportSample ?? LATIN_SAMPLE; + return { + reportSample, + tierSample: options.tierSample ?? reportSample, + worstCount: options.worstCount ?? 3, + model: options.model ?? "latin", + }; +} + +/** + * Score one candidate against the reference. The tier can use a narrower text sample while the report + * still surfaces full-sample outliers. Both inputs are normalized advance maps (codepoint -> + * advance/unitsPerEm); only codepoints present in both are compared. + */ +export function scoreAdvances( + reference: ReadonlyMap, + candidate: ReadonlyMap, + optionsOrSample?: ScoreOptions | readonly number[], + worstCount?: number, + model?: CompareModel, +): CompareScore { + const options = normalizeScoreOptions(optionsOrSample, worstCount, model); + const report = measureDeltas( + reference, + candidate, + options.reportSample, + options.worstCount, + ); + const tierMetrics = + options.tierSample === options.reportSample + ? report + : measureDeltas(reference, candidate, options.tierSample, 0); + return { + compared: tierMetrics.compared, + total: tierMetrics.total, + missing: tierMetrics.missing, + meanDelta: tierMetrics.meanDelta, + maxDelta: tierMetrics.maxDelta, + over1Percent: report.over1Percent, + over2_5Percent: report.over2_5Percent, + tier: + tierMetrics.compared === 0 + ? "visual_only" + : classifyTier( + tierMetrics.meanDelta, + tierMetrics.maxDelta, + options.model, + ), + worstGlyphs: report.worstGlyphs, + }; +} diff --git a/tools/corpus/src/tiers.ts b/tools/corpus/src/tiers.ts new file mode 100644 index 0000000..a8d83e2 --- /dev/null +++ b/tools/corpus/src/tiers.ts @@ -0,0 +1,41 @@ +/** + * Advance-fidelity tier. Thresholds mirror the package's verdict language (see `src/types.ts`): + * metric_safe is the DIRECT band, near_metric the LIKELY band, everything else visual_only. + * cell_width_only is the monospace model's verdict for a matching cell: it proves line width, not + * glyph-shape fidelity. + */ +export type CompareTier = + | "metric_safe" + | "near_metric" + | "cell_width_only" + | "visual_only"; + +/** + * Classification model. `latin` is the default proportional comparison. `monospace` treats a matching + * advance as proof of cell width only, since every glyph in a monospace cell shares one advance. + */ +export type CompareModel = "latin" | "monospace"; + +export const TIER_RANK: Record = { + metric_safe: 0, + near_metric: 1, + cell_width_only: 2, + visual_only: 3, +}; + +/** + * Classify a (mean, max) advance-delta pair into a fidelity tier. Deltas are fractions of the em. Under + * the monospace model a matching cell only vouches for line width, so the metric bands collapse to + * cell_width_only while non-matching candidates stay visual_only. + */ +export function classifyTier( + meanDelta: number, + maxDelta: number, + model: CompareModel = "latin", +): CompareTier { + let tier: CompareTier = "visual_only"; + if (meanDelta <= 0.005 && maxDelta <= 0.01) tier = "metric_safe"; + else if (meanDelta <= 0.01 && maxDelta <= 0.025) tier = "near_metric"; + if (model === "monospace" && tier !== "visual_only") return "cell_width_only"; + return tier; +} From 0461f70b2df4041e8a6ea4ecb02431ec09d654fa Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Mon, 8 Jun 2026 18:00:21 -0300 Subject: [PATCH 4/4] ci: skip fallbacks release without package changes --- .github/workflows/release.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4f27535..1e778db 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,17 +25,37 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Check package changes + id: package_changes + run: | + latest_tag="$(git describe --tags --match 'v*' --abbrev=0 2>/dev/null || true)" + if [ -z "$latest_tag" ]; then + echo "changed=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + if git diff --quiet "$latest_tag"..HEAD -- packages/fallbacks; then + echo "changed=false" >> "$GITHUB_OUTPUT" + echo "No packages/fallbacks changes since $latest_tag." + else + echo "changed=true" >> "$GITHUB_OUTPUT" + fi - uses: oven-sh/setup-bun@v2 + if: steps.package_changes.outputs.changed == 'true' with: bun-version: 1.3.12 - run: bun install --frozen-lockfile + if: steps.package_changes.outputs.changed == 'true' - run: bun run build + if: steps.package_changes.outputs.changed == 'true' - name: Clear Bun install tree before npm publish + if: steps.package_changes.outputs.changed == 'true' run: rm -rf node_modules packages/*/node_modules - env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} NPM_TOKEN: ${{ secrets.NPM_TOKEN }} ANTHROPIC_API_KEY_RELEASE_NOTES: ${{ secrets.ANTHROPIC_API_KEY_RELEASE_NOTES }} + if: steps.package_changes.outputs.changed == 'true' run: > npx --yes --package semantic-release@24