diff --git a/packages/fallbacks/README.md b/packages/fallbacks/README.md index c108728..28f88d4 100644 --- a/packages/fallbacks/README.md +++ b/packages/fallbacks/README.md @@ -111,7 +111,7 @@ The full structured rows are exported as `SUBSTITUTION_EVIDENCE` for richer repo These maintainer tools use ignored `.cache` files and are not shipped in the package. -`bun run acquire` downloads open-font candidates into `.cache/sources`. Sources come in two shapes: zip archives and pinned source trees. Set `DOCFONTS_SOURCE_CACHE` to use another cache directory, or pass `--source google-fonts` to acquire one source. +`bun run acquire` downloads open-font candidates into `.cache/sources`. Sources come in two shapes: release archives (zip or tar.gz) and pinned source trees. Set `DOCFONTS_SOURCE_CACHE` to use another cache directory, or pass `--source google-fonts` to acquire one source. `bun run compare` checks a private reference font against acquired OTF/TTF candidates and prints a ranked Latin advance-width table. It writes no fonts, paths, or results to the tree. diff --git a/packages/fallbacks/acquire.test.ts b/packages/fallbacks/acquire.test.ts index 06b6ab8..288c1bd 100644 --- a/packages/fallbacks/acquire.test.ts +++ b/packages/fallbacks/acquire.test.ts @@ -22,6 +22,8 @@ describe("source acquisition catalog", () => { "AGPL-3.0-FE", "Apache-2.0", "UFL-1.0", + "GPL-2.0-FE", + "Bitstream-Vera-DejaVu", ]); for (const source of SOURCE_RELEASES) { expect(source.sourceId).toMatch(/^[a-z0-9]+(-[a-z0-9]+)*$/); @@ -38,6 +40,8 @@ describe("source acquisition catalog", () => { expect(allowedLicenses.has(source.licenseFamily)).toBe(true); expect(source.downloadUrl.startsWith("https://")).toBe(true); expect(source.expectedFiles.length).toBeGreaterThan(0); + if (source.archiveFormat !== undefined) + expect(["zip", "tar.gz"]).toContain(source.archiveFormat); } } }); @@ -111,6 +115,69 @@ describe("source acquisition catalog", () => { ]); }); + test("carries the priority Liberation, Selawik, and DejaVu sources", () => { + const byId = new Map( + SOURCE_RELEASES.map((source) => [source.sourceId, source]), + ); + for (const id of [ + "liberation-fonts", + "liberation-sans-narrow", + "selawik", + "dejavu", + ]) + expect(byId.has(id)).toBe(true); + }); + + test("declares tar.gz sources and defaults the rest to zip", () => { + const byId = new Map( + SOURCE_RELEASES.map((source) => [source.sourceId, source]), + ); + const archiveFormatOf = (id: string) => { + const source = byId.get(id); + if (!source || source.kind === "github-tree") return undefined; + return source.archiveFormat ?? "zip"; + }; + + expect(archiveFormatOf("liberation-fonts")).toBe("tar.gz"); + expect(archiveFormatOf("liberation-sans-narrow")).toBe("tar.gz"); + expect(archiveFormatOf("selawik")).toBe("zip"); + expect(archiveFormatOf("dejavu")).toBe("zip"); + // Pre-existing sources never set the field, so they keep extracting as zip. + expect(archiveFormatOf("urw-base35")).toBe("zip"); + expect(byId.get("urw-base35")).not.toHaveProperty("archiveFormat"); + }); + + test("anchors every Selawik 1.01 TTF, WOFF, and WOFF2 member", () => { + const source = SOURCE_RELEASES.find( + (candidate) => candidate.sourceId === "selawik", + ); + expect(source?.kind !== "github-tree").toBe(true); + if (!source || source.kind === "github-tree") return; + + // Selawik 1.01 ships five weights, each as .ttf, .woff, and .woff2. + const weights = ["selawk", "selawkb", "selawkl", "selawksb", "selawksl"]; + const expected = weights.flatMap((stem) => [ + `${stem}.ttf`, + `${stem}.woff`, + `${stem}.woff2`, + ]); + expect([...source.expectedFiles].sort()).toEqual([...expected].sort()); + }); + + test("uses the GPL font exception and DejaVu license families", () => { + const byId = new Map( + SOURCE_RELEASES.map((source) => [source.sourceId, source]), + ); + const narrow = byId.get("liberation-sans-narrow"); + const dejavu = byId.get("dejavu"); + expect(narrow?.kind !== "github-tree" && narrow?.licenseFamily).toBe( + "GPL-2.0-FE", + ); + expect(dejavu?.kind !== "github-tree" && dejavu?.licenseFamily).toBe( + "Bitstream-Vera-DejaVu", + ); + }); + test("spans more than one project and license family", () => { expect( new Set(SOURCE_RELEASES.map((source) => source.project)).size, diff --git a/packages/fallbacks/compare.test.ts b/packages/fallbacks/compare.test.ts index 930484f..a08a832 100644 --- a/packages/fallbacks/compare.test.ts +++ b/packages/fallbacks/compare.test.ts @@ -1,4 +1,5 @@ import { describe, expect, test } from "bun:test"; +import { execFileSync } from "node:child_process"; import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; @@ -465,3 +466,54 @@ describe("collectCandidates (GitHub tree sources)", () => { } }); }); + +describe("collectCandidates (archive sources)", () => { + test("reads tar.gz archive sources from the acquire snapshot format", () => { + const cacheDir = mkdtempSync(join(tmpdir(), "docfonts-tar-source-")); + try { + const sourceId = "tar-example"; + const root = join(cacheDir, "archive-root"); + mkdirSync(join(root, "fonts"), { recursive: true }); + writeFileSync( + join(root, "fonts", "Example-Regular.ttf"), + syntheticFont(), + ); + writeFileSync( + join(root, "fonts", "Example.woff"), + new Uint8Array([1, 2]), + ); + execFileSync( + "tar", + [ + "-czf", + join(cacheDir, `${sourceId}.tar.gz`), + "-C", + root, + "fonts/Example-Regular.ttf", + "fonts/Example.woff", + ], + { stdio: "ignore" }, + ); + + const source: SnapshotSource = { + sourceId, + family: "Example", + targetFamilies: ["Some Proprietary"], + kind: "archive", + archiveFormat: "tar.gz", + }; + + const candidates = collectCandidates(source, cacheDir); + expect(candidates.map((candidate) => candidate.file)).toEqual([ + "Example-Regular.ttf", + ]); + const score = scoreAdvances( + sampleMetrics(parseFont(syntheticFont())), + sampleMetrics(parseFont(candidates[0].bytes)), + ); + expect(score.tier).toBe("metric_safe"); + } finally { + rmSync(cacheDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/fallbacks/scripts/acquire.ts b/packages/fallbacks/scripts/acquire.ts index 9a8d509..14c5412 100644 --- a/packages/fallbacks/scripts/acquire.ts +++ b/packages/fallbacks/scripts/acquire.ts @@ -33,7 +33,12 @@ export type LicenseFamily = | "OFL-1.1" | "AGPL-3.0-FE" | "Apache-2.0" - | "UFL-1.0"; + | "UFL-1.0" + | "GPL-2.0-FE" + | "Bitstream-Vera-DejaVu"; + +/** Container format an archive source ships in. Absent is treated as "zip". */ +export type ArchiveFormat = "zip" | "tar.gz"; /** Fields shared by every acquisition source, regardless of how it is fetched. */ interface BaseSource { @@ -48,10 +53,12 @@ interface LicensedSource extends BaseSource { licenseUrl: string; } -/** A source delivered as a single zip archive containing many font members. */ +/** A source delivered as a single archive containing many font members. */ export interface ArchiveSource extends LicensedSource { /** Optional for archive sources: an absent `kind` is treated as "archive". */ kind?: "archive"; + /** Archive container format. Absent is treated as "zip". */ + archiveFormat?: ArchiveFormat; downloadUrl: string; expectedFiles: string[]; } @@ -382,6 +389,114 @@ export const SOURCE_RELEASES: SourceRelease[] = [ targetFamilies: ["Consolas", "Courier New"], }, + { + sourceId: "liberation-fonts", + family: "Liberation", + project: "Liberation", + licenseFamily: "OFL-1.1", + archiveFormat: "tar.gz", + downloadUrl: + "https://github.com/liberationfonts/liberation-fonts/files/7261482/liberation-fonts-ttf-2.1.5.tar.gz", + licenseUrl: + "https://raw.githubusercontent.com/liberationfonts/liberation-fonts/2.1.5/LICENSE", + expectedFiles: [ + "LiberationMono-Regular.ttf", + "LiberationMono-Bold.ttf", + "LiberationMono-Italic.ttf", + "LiberationMono-BoldItalic.ttf", + "LiberationSans-Regular.ttf", + "LiberationSans-Bold.ttf", + "LiberationSans-Italic.ttf", + "LiberationSans-BoldItalic.ttf", + "LiberationSerif-Regular.ttf", + "LiberationSerif-Bold.ttf", + "LiberationSerif-Italic.ttf", + "LiberationSerif-BoldItalic.ttf", + ], + targetFamilies: ["Arial", "Helvetica", "Times New Roman", "Courier New"], + }, + { + sourceId: "liberation-sans-narrow", + family: "Liberation Sans Narrow", + project: "Liberation", + licenseFamily: "GPL-2.0-FE", + archiveFormat: "tar.gz", + downloadUrl: + "https://github.com/liberationfonts/liberation-sans-narrow/files/2579431/liberation-narrow-fonts-ttf-1.07.6.tar.gz", + licenseUrl: + "https://raw.githubusercontent.com/liberationfonts/liberation-sans-narrow/1.07.6/License.txt", + expectedFiles: [ + "LiberationSansNarrow-Regular.ttf", + "LiberationSansNarrow-Bold.ttf", + "LiberationSansNarrow-Italic.ttf", + "LiberationSansNarrow-BoldItalic.ttf", + ], + targetFamilies: ["Arial Narrow"], + }, + { + sourceId: "selawik", + family: "Selawik", + project: "Selawik", + licenseFamily: "OFL-1.1", + downloadUrl: + "https://github.com/microsoft/Selawik/releases/download/1.01/Selawik_Release.zip", + licenseUrl: + "https://raw.githubusercontent.com/microsoft/Selawik/1.01/LICENSE.txt", + expectedFiles: [ + "selawk.ttf", + "selawk.woff", + "selawk.woff2", + "selawkb.ttf", + "selawkb.woff", + "selawkb.woff2", + "selawkl.ttf", + "selawkl.woff", + "selawkl.woff2", + "selawksb.ttf", + "selawksb.woff", + "selawksb.woff2", + "selawksl.ttf", + "selawksl.woff", + "selawksl.woff2", + ], + targetFamilies: ["Segoe UI"], + }, + { + sourceId: "dejavu", + family: "DejaVu", + project: "DejaVu", + licenseFamily: "Bitstream-Vera-DejaVu", + downloadUrl: + "https://github.com/dejavu-fonts/dejavu-fonts/releases/download/version_2_37/dejavu-fonts-ttf-2.37.zip", + licenseUrl: + "https://raw.githubusercontent.com/dejavu-fonts/dejavu-fonts/version_2_37/LICENSE", + expectedFiles: [ + "DejaVuMathTeXGyre.ttf", + "DejaVuSans.ttf", + "DejaVuSans-Bold.ttf", + "DejaVuSans-BoldOblique.ttf", + "DejaVuSans-ExtraLight.ttf", + "DejaVuSans-Oblique.ttf", + "DejaVuSansCondensed.ttf", + "DejaVuSansCondensed-Bold.ttf", + "DejaVuSansCondensed-BoldOblique.ttf", + "DejaVuSansCondensed-Oblique.ttf", + "DejaVuSansMono.ttf", + "DejaVuSansMono-Bold.ttf", + "DejaVuSansMono-BoldOblique.ttf", + "DejaVuSansMono-Oblique.ttf", + "DejaVuSerif.ttf", + "DejaVuSerif-Bold.ttf", + "DejaVuSerif-BoldItalic.ttf", + "DejaVuSerif-Italic.ttf", + "DejaVuSerifCondensed.ttf", + "DejaVuSerifCondensed-Bold.ttf", + "DejaVuSerifCondensed-BoldItalic.ttf", + "DejaVuSerifCondensed-Italic.ttf", + ], + targetFamilies: ["Verdana", "Tahoma"], + }, + { kind: "github-tree", sourceId: "google-fonts", @@ -420,6 +535,7 @@ interface BaseSnapshot { interface ArchiveSnapshot extends BaseSnapshot { kind: "archive"; + archiveFormat: ArchiveFormat; licenseFamily: string; licenseUrl: string; licenseSha256: string; @@ -554,16 +670,30 @@ async function mapLimit( return results; } -function requireUnzip(): void { +const archiveFormatOf = (source: ArchiveSource): ArchiveFormat => + source.archiveFormat ?? "zip"; + +const archiveExtensions: Record = { + zip: "zip", + "tar.gz": "tar.gz", +}; + +function requireArchiveTool(format: ArchiveFormat): void { + const tool = format === "tar.gz" ? "tar" : "unzip"; + const probe = format === "tar.gz" ? "--version" : "-v"; try { - execFileSync("unzip", ["-v"], { stdio: "ignore" }); + execFileSync(tool, [probe], { stdio: "ignore" }); } catch { - throw new Error("`unzip` is required on PATH."); + throw new Error(`\`${tool}\` is required on PATH.`); } } -function listArchive(zipPath: string): string[] { - return execFileSync("unzip", ["-Z1", zipPath], { encoding: "utf8" }) +function listArchive(archivePath: string, format: ArchiveFormat): string[] { + const out = + format === "tar.gz" + ? execFileSync("tar", ["-tzf", archivePath], { encoding: "utf8" }) + : execFileSync("unzip", ["-Z1", archivePath], { encoding: "utf8" }); + return out .split("\n") .map((line) => line.trim()) .filter(Boolean); @@ -575,11 +705,22 @@ function listArchive(zipPath: string): string[] { const escapeArchiveMember = (name: string): string => name.replace(/[\\*?[\]]/g, "\\$&"); -function readArchiveMember(zipPath: string, name: string): Uint8Array { +function readArchiveMember( + archivePath: string, + name: string, + format: ArchiveFormat, +): Uint8Array { + const opts = { maxBuffer: 256 * 1024 * 1024 }; + // tar takes the member as a path after `--`; the tar.gz sources use plain ASCII + // member names, so the glob escaping that `unzip -p` needs does not apply here. return new Uint8Array( - execFileSync("unzip", ["-p", zipPath, escapeArchiveMember(name)], { - maxBuffer: 256 * 1024 * 1024, - }), + format === "tar.gz" + ? execFileSync("tar", ["-xzOf", archivePath, "--", name], opts) + : execFileSync( + "unzip", + ["-p", archivePath, escapeArchiveMember(name)], + opts, + ), ); } @@ -587,11 +728,15 @@ async function acquireArchive( source: ArchiveSource, cacheDir: string, ): Promise { + const format = archiveFormatOf(source); const archive = await fetchBytes(source.downloadUrl); - const zipPath = join(cacheDir, `${source.sourceId}.zip`); - writeFileSync(zipPath, archive); + const archivePath = join( + cacheDir, + `${source.sourceId}.${archiveExtensions[format]}`, + ); + writeFileSync(archivePath, archive); - const members = listArchive(zipPath).filter(isFontFile); + const members = listArchive(archivePath, format).filter(isFontFile); if (members.length === 0) throw new Error(`${source.sourceId}: archive has no font files`); @@ -599,7 +744,7 @@ async function acquireArchive( .map((member) => ({ name: basename(member), path: member, - sha256: sha256(readArchiveMember(zipPath, member)), + sha256: sha256(readArchiveMember(archivePath, member, format)), })) .sort((a, b) => a.path.localeCompare(b.path)); @@ -615,6 +760,7 @@ async function acquireArchive( return { kind: "archive", + archiveFormat: format, sourceId: source.sourceId, family: source.family, project: source.project, @@ -751,7 +897,11 @@ async function main(): Promise { return source; }); - if (sources.some((source) => source.kind !== "github-tree")) requireUnzip(); + const archiveSources = sources.filter( + (source): source is ArchiveSource => source.kind !== "github-tree", + ); + for (const format of new Set(archiveSources.map(archiveFormatOf))) + requireArchiveTool(format); const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; mkdirSync(cacheDir, { recursive: true }); diff --git a/packages/fallbacks/scripts/compare.ts b/packages/fallbacks/scripts/compare.ts index 185a233..ceca031 100644 --- a/packages/fallbacks/scripts/compare.ts +++ b/packages/fallbacks/scripts/compare.ts @@ -347,16 +347,19 @@ interface SnapshotFile { path: string; } +type ArchiveFormat = "zip" | "tar.gz"; + /** * A source as recorded in `source-snapshot.json`. Archive sources extract their candidate fonts from a - * cached zip; GitHub tree sources read each `files[].path` directly from the cache. `kind` is optional so - * older snapshots (archive-only) still load and default to archive behavior. + * cached release archive; GitHub tree sources read each `files[].path` directly from the cache. `kind` is + * optional so older snapshots (archive-only) still load and default to archive behavior. */ export interface SnapshotSource { sourceId: string; family: string; targetFamilies: string[]; kind?: "archive" | "github-tree"; + archiveFormat?: ArchiveFormat; files?: SnapshotFile[]; } @@ -366,11 +369,21 @@ export interface CandidateFile { bytes: Uint8Array; } -function requireUnzip(): void { +const archiveFormatOf = (source: SnapshotSource): ArchiveFormat => + source.archiveFormat ?? "zip"; + +const archiveExtensions: Record = { + zip: "zip", + "tar.gz": "tar.gz", +}; + +function requireArchiveTool(format: ArchiveFormat): void { + const tool = format === "tar.gz" ? "tar" : "unzip"; + const probe = format === "tar.gz" ? "--version" : "-v"; try { - execFileSync("unzip", ["-v"], { stdio: "ignore" }); + execFileSync(tool, [probe], { stdio: "ignore" }); } catch { - throw new Error("`unzip` is required on PATH."); + throw new Error(`\`${tool}\` is required on PATH.`); } } @@ -379,8 +392,12 @@ function isFontFile(path: string): boolean { } /** Font members inside a source archive, by their in-archive path. */ -function listFontMembers(zipPath: string): string[] { - return execFileSync("unzip", ["-Z1", zipPath], { encoding: "utf8" }) +function listFontMembers(archivePath: string, format: ArchiveFormat): string[] { + const out = + format === "tar.gz" + ? execFileSync("tar", ["-tzf", archivePath], { encoding: "utf8" }) + : execFileSync("unzip", ["-Z1", archivePath], { encoding: "utf8" }); + return out .split("\n") .map((line) => line.trim()) .filter(Boolean) @@ -393,11 +410,20 @@ function listFontMembers(zipPath: string): string[] { const escapeArchiveMember = (name: string): string => name.replace(/[\\*?[\]]/g, "\\$&"); -function readArchiveMember(zipPath: string, member: string): Uint8Array { +function readArchiveMember( + archivePath: string, + member: string, + format: ArchiveFormat, +): Uint8Array { + const opts = { maxBuffer: 256 * 1024 * 1024 }; return new Uint8Array( - execFileSync("unzip", ["-p", zipPath, escapeArchiveMember(member)], { - maxBuffer: 256 * 1024 * 1024, - }), + format === "tar.gz" + ? execFileSync("tar", ["-xzOf", archivePath, "--", member], opts) + : execFileSync( + "unzip", + ["-p", archivePath, escapeArchiveMember(member)], + opts, + ), ); } @@ -423,8 +449,8 @@ function loadSnapshot(cacheDir: string): SnapshotSource[] { /** * Collect the candidate fonts for one source from the cache. GitHub tree sources read each snapshot file - * entry directly; archive sources list and extract font members from the cached zip. Throws when an - * expected cache file is absent so the caller can point the user back at `bun run acquire`. + * entry directly; archive sources list and extract font members from the cached release archive. Throws + * when an expected cache file is absent so the caller can point the user back at `bun run acquire`. */ export function collectCandidates( source: SnapshotSource, @@ -444,14 +470,18 @@ export function collectCandidates( }); } - const zipPath = join(cacheDir, `${source.sourceId}.zip`); - if (!existsSync(zipPath)) + const format = archiveFormatOf(source); + const archivePath = join( + cacheDir, + `${source.sourceId}.${archiveExtensions[format]}`, + ); + if (!existsSync(archivePath)) throw new Error( - `candidate archive missing for ${source.sourceId}: ${zipPath}. Run \`bun run acquire\` first.`, + `candidate archive missing for ${source.sourceId}: ${archivePath}. Run \`bun run acquire\` first.`, ); - const members = listFontMembers(zipPath); + const members = listFontMembers(archivePath, format); if (members.length === 0) - throw new Error(`no candidate font files in ${zipPath}`); + throw new Error(`no candidate font files in ${archivePath}`); const basenameCounts = new Map(); for (const member of members) { @@ -464,7 +494,7 @@ export function collectCandidates( return members.map((member) => ({ file: displayNameForMember(member, duplicateBasenames), - bytes: readArchiveMember(zipPath, member), + bytes: readArchiveMember(archivePath, member, format), })); } @@ -639,8 +669,11 @@ function main(): void { selected = snapshot; } - // Only archive sources need `unzip`; a GitHub-tree-only run does not. - if (selected.some((source) => source.kind !== "github-tree")) requireUnzip(); + const archiveSources = selected.filter( + (source) => source.kind !== "github-tree", + ); + for (const format of new Set(archiveSources.map(archiveFormatOf))) + requireArchiveTool(format); const reference = sampleMetrics(parseFont(readFileSync(args.reference)));