From 01a4b76dae17cd351328c9ad1d441f9e3c263f76 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:29:16 +0300 Subject: [PATCH 01/11] feat(loader): add subtree operator to taxonomy where filters --- packages/core/src/index.ts | 1 + packages/core/src/loader.ts | 76 +++++++++++++++- packages/core/src/query.ts | 3 +- .../loader-taxonomy-subtree-filter.test.ts | 87 +++++++++++++++++++ 4 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index f0d29f75e..e44e90152 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -131,6 +131,7 @@ export type { TranslationSummary, TranslationsResult, WhereRange, + WhereSubtree, WhereValue, } from "./query.js"; diff --git a/packages/core/src/loader.ts b/packages/core/src/loader.ts index afab5970a..ea1ce3471 100644 --- a/packages/core/src/loader.ts +++ b/packages/core/src/loader.ts @@ -543,9 +543,16 @@ function buildCursorCondition( return sql`(${sql.ref(primary.field)} > ${orderValue} OR (${sql.ref(primary.field)} = ${orderValue} AND ${sql.ref(idField)} > ${cursorId}))`; } -/** Type guard: is the where value a range object (not a string or array)? */ +/** Type guard: is the where value a taxonomy subtree operator? */ +function isWhereSubtree(value: WhereValue): value is WhereSubtree { + return value !== null && typeof value === "object" && !Array.isArray(value) && "subtree" in value; +} + +/** Type guard: is the where value a range object (not a string, array, or subtree)? */ function isWhereRange(value: WhereValue): value is WhereRange { - return value !== null && typeof value === "object" && !Array.isArray(value); + return ( + value !== null && typeof value === "object" && !Array.isArray(value) && !("subtree" in value) + ); } /** @@ -598,9 +605,20 @@ export interface WhereRange { } /** - * A where clause value: exact match, multi-value match, or range comparison. + * Match a hierarchical taxonomy term and all of its descendants. `subtree` is + * one or more root slugs; descendants are resolved in SQL from the term + * hierarchy, so the matched set is independent of how many descendants exist + * (no per-slug bound parameters). + */ +export interface WhereSubtree { + subtree: string | string[]; +} + +/** + * A where clause value: exact match, multi-value match, range comparison, or + * taxonomy subtree (term-or-descendants) match. */ -export type WhereValue = string | string[] | WhereRange; +export type WhereValue = string | string[] | WhereRange | WhereSubtree; /** * Fields shared by every collection filter, independent of pagination mode. @@ -803,6 +821,7 @@ export function emdashLoader(): LiveLoader 0 + ? sql`${sql.join( + subtreeFilters.map( + (f) => sql`AND EXISTS ( + SELECT 1 FROM content_taxonomies ct + WHERE ct.collection = ${type} + AND ct.entry_id = ${sql.ref(tableName)}.id + AND ct.taxonomy_id IN ( + WITH RECURSIVE sub(grp) AS ( + SELECT COALESCE(translation_group, id) FROM taxonomies + WHERE name = ${f.name} + AND slug IN (${sql.join(f.roots.map((s) => sql`${s}`))}) + UNION + SELECT COALESCE(c.translation_group, c.id) FROM taxonomies c + JOIN sub ON c.parent_id = sub.grp + ) + SELECT grp FROM sub + ) + )`, + ), + sql` `, + )}` + : sql``; + // `_emdash_content_bylines.byline_id` stores the byline's // translation_group (migration 040), so a credit spans every // locale variant of the byline and we match the group directly. @@ -929,6 +996,7 @@ export function emdashLoader(): LiveLoader; -export type { WhereRange, WhereValue }; +export type { WhereRange, WhereValue, WhereSubtree }; /** * Fields shared by every collection query, independent of pagination mode. diff --git a/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts b/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts new file mode 100644 index 000000000..2aab98551 --- /dev/null +++ b/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts @@ -0,0 +1,87 @@ +import type { Kysely } from "kysely"; +import { it, expect, beforeEach, afterEach } from "vitest"; + +import { handleContentCreate } from "../../src/api/index.js"; +import type { Database } from "../../src/database/types.js"; +import { emdashLoader } from "../../src/loader.js"; +import { runWithContext } from "../../src/request-context.js"; +import { + describeEachDialect, + setupForDialectWithCollections, + teardownForDialect, + type DialectName, + type DialectTestContext, +} from "../utils/test-db.js"; + +describeEachDialect("Loader taxonomy subtree filter", (dialectName: DialectName) => { + let ctx: DialectTestContext; + let db: Kysely; + let termSeq = 0; + + beforeEach(async () => { + ctx = await setupForDialectWithCollections(dialectName); + db = ctx.db; + termSeq = 0; + }); + + afterEach(async () => { + await teardownForDialect(ctx); + }); + + async function createPost(title: string) { + const result = await handleContentCreate(db, "post", { data: { title }, status: "published" }); + if (!result.success) throw new Error("Failed to create post"); + return result.data!.item; + } + + // parentId is the parent's translation_group (== parent id for untranslated terms). + async function term(name: string, slug: string, parentId?: string) { + const id = `tax_${name}_${slug}_${termSeq++}`; + await db + .insertInto("taxonomies" as never) + .values({ + id, + name, + slug, + label: slug, + translation_group: id, + parent_id: parentId ?? null, + } as never) + .execute(); + return id; + } + + async function tag(contentId: string, taxonomyId: string) { + await db + .insertInto("content_taxonomies" as never) + .values({ collection: "post", entry_id: contentId, taxonomy_id: taxonomyId } as never) + .execute(); + } + + function load(where: Record) { + const loader = emdashLoader(); + return runWithContext({ editMode: false, db }, () => + loader.loadCollection!({ filter: { type: "post", where: where as never } }), + ); + } + + it("matches a term and its descendants (single root)", async () => { + const region = await term("category", "region"); + const north = await term("category", "north", region); + const city = await term("category", "city", north); + + const rootPost = await createPost("Tagged at root"); + const leafPost = await createPost("Tagged at leaf"); + const outsidePost = await createPost("Outside subtree"); + const other = await term("category", "south", region); + + await tag(rootPost.id, north); + await tag(leafPost.id, city); + await tag(outsidePost.id, other); + + const result = await load({ category: { subtree: "north" } }); + + const titles = result.entries.map((e) => e.data.title).toSorted(); + expect(titles).toEqual(["Tagged at leaf", "Tagged at root"]); + }); +}); From f2f9ecfcd94fc8835f65978c897b3e1f578c19dc Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:36:13 +0300 Subject: [PATCH 02/11] fix(loader): short-circuit empty subtree roots to avoid invalid SQL Co-Authored-By: Claude Opus 4.8 --- packages/core/src/loader.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/core/src/loader.ts b/packages/core/src/loader.ts index ea1ce3471..45c3f196c 100644 --- a/packages/core/src/loader.ts +++ b/packages/core/src/loader.ts @@ -881,7 +881,8 @@ export function emdashLoader(): LiveLoader f.slugs.length === 0) + taxonomyFilters.some((f) => f.slugs.length === 0) || + subtreeFilters.some((f) => f.roots.length === 0) ) { return { entries: [], cacheHint: { tags: [type] } }; } From 15f4961eabf2bd0908e640f5e7a86f37030fb44f Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:40:15 +0300 Subject: [PATCH 03/11] test(loader): cover subtree filter overflow, multi-root, locale, pagination Co-Authored-By: Claude Sonnet 4.6 --- .../loader-taxonomy-subtree-filter.test.ts | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts b/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts index 2aab98551..4b72ecf40 100644 --- a/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts +++ b/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts @@ -84,4 +84,131 @@ describeEachDialect("Loader taxonomy subtree filter", (dialectName: DialectName) const titles = result.entries.map((e) => e.data.title).toSorted(); expect(titles).toEqual(["Tagged at leaf", "Tagged at root"]); }); + + it("matches the union of several roots", async () => { + const region = await term("category", "region"); + const north = await term("category", "north", region); + const south = await term("category", "south", region); + const east = await term("category", "east", region); + + const np = await createPost("north"); + const sp = await createPost("south"); + const ep = await createPost("east"); + await tag(np.id, north); + await tag(sp.id, south); + await tag(ep.id, east); + + const result = await load({ category: { subtree: ["north", "south"] } }); + const titles = result.entries.map((e) => e.data.title).toSorted(); + expect(titles).toEqual(["north", "south"]); + }); + + it("matches a subtree with more than 100 descendants (no bind-param overflow)", async () => { + const region = await term("category", "region"); + const leaves: string[] = []; + for (let i = 0; i < 150; i++) { + leaves.push(await term("category", `leaf-${i}`, region)); + } + const post = await createPost("deep"); + await tag(post.id, leaves[120]!); // tagged under one deep leaf + + // Selecting the root must match via the descendant without enumerating + // 150 slugs as bound parameters. + const result = await load({ category: { subtree: "region" } }); + expect(result.entries.map((e) => e.data.title)).toEqual(["deep"]); + }); + + it("combines a subtree filter with an exact filter across two taxonomies", async () => { + const region = await term("category", "region"); + const north = await term("category", "north", region); + const featured = await term("tag", "featured"); + + const both = await createPost("north + featured"); + const northOnly = await createPost("north only"); + await tag(both.id, north); + await tag(both.id, featured); + await tag(northOnly.id, north); + + const result = await load({ category: { subtree: "region" }, tag: ["featured"] }); + expect(result.entries.map((e) => e.data.title)).toEqual(["north + featured"]); + }); + + it("an empty subtree roots array matches nothing", async () => { + const region = await term("category", "region"); + const post = await createPost("anything"); + await tag(post.id, region); + + const result = await load({ category: { subtree: [] } }); + expect(result.entries).toHaveLength(0); + }); + + it("matches descendants tagged in a different locale (match is by group)", async () => { + // Parent + child share a group across locales: the "en" child and its + // "de" translation share translation_group; content tagged by group + // matches regardless of the term row's locale. + const region = await term("category", "region"); + const childGroup = `grp_child_${termSeq}`; + const childEn = `tax_en_child_${termSeq++}`; + const childDe = `tax_de_child_${termSeq++}`; + await db + .insertInto("taxonomies" as never) + .values({ + id: childEn, + name: "category", + slug: "child-en", + label: "child", + translation_group: childGroup, + parent_id: region, + locale: "en", + } as never) + .execute(); + await db + .insertInto("taxonomies" as never) + .values({ + id: childDe, + name: "category", + slug: "child-de", + label: "child", + translation_group: childGroup, + parent_id: region, + locale: "de", + } as never) + .execute(); + + const post = await createPost("tagged by group"); + await tag(post.id, childGroup); // content_taxonomies stores the group + + const result = await load({ category: { subtree: "region" } }); + expect(result.entries.map((e) => e.data.title)).toEqual(["tagged by group"]); + }); + + it("paginates a subtree filter with limit and cursor", async () => { + const region = await term("category", "region"); + const north = await term("category", "north", region); + for (let i = 0; i < 3; i++) { + const p = await createPost(`p${i}`); + await tag(p.id, north); + } + + const loader = emdashLoader(); + const first = await runWithContext({ editMode: false, db }, () => + loader.loadCollection!({ + filter: { type: "post", where: { category: { subtree: "region" } } as never, limit: 2 }, + }), + ); + expect(first.entries).toHaveLength(2); + expect(first.nextCursor).toBeTruthy(); + + const second = await runWithContext({ editMode: false, db }, () => + loader.loadCollection!({ + filter: { + type: "post", + where: { category: { subtree: "region" } } as never, + limit: 2, + cursor: first.nextCursor, + }, + }), + ); + expect(second.entries).toHaveLength(1); + }); }); From f6a3c2cad528237e8a8dc1f6dad13f18e93135ea Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:49:10 +0300 Subject: [PATCH 04/11] test(loader): strengthen subtree overflow guard past SQLite bind limit Co-Authored-By: Claude Opus 4.8 --- .../loader-taxonomy-subtree-filter.test.ts | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts b/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts index 4b72ecf40..db3769f60 100644 --- a/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts +++ b/packages/core/tests/unit/loader-taxonomy-subtree-filter.test.ts @@ -103,19 +103,52 @@ describeEachDialect("Loader taxonomy subtree filter", (dialectName: DialectName) expect(titles).toEqual(["north", "south"]); }); - it("matches a subtree with more than 100 descendants (no bind-param overflow)", async () => { + it("matches a subtree with more than 999 descendants (would overflow SQLite bind limit if slugs were enumerated)", async () => { const region = await term("category", "region"); - const leaves: string[] = []; - for (let i = 0; i < 150; i++) { - leaves.push(await term("category", `leaf-${i}`, region)); + + // Build 1001 direct children of region. Insert in chunks of 100 rows + // (~600 bind params per statement) to stay safely under SQLite's 999-variable + // limit during insertion while still exceeding it for any naive slug-enumeration + // regression in the filter itself. + const TOTAL = 1001; + const CHUNK_SIZE = 100; + const rows: { + id: string; + name: string; + slug: string; + label: string; + translation_group: string; + parent_id: string; + }[] = []; + for (let i = 0; i < TOTAL; i++) { + const id = `tax_category_leaf-${i}_${termSeq++}`; + rows.push({ + id, + name: "category", + slug: `leaf-${i}`, + label: `leaf-${i}`, + translation_group: id, + parent_id: region, + }); + } + for (let start = 0; start < rows.length; start += CHUNK_SIZE) { + await db + .insertInto("taxonomies" as never) + .values(rows.slice(start, start + CHUNK_SIZE) as never) + .execute(); } - const post = await createPost("deep"); - await tag(post.id, leaves[120]!); // tagged under one deep leaf - // Selecting the root must match via the descendant without enumerating - // 150 slugs as bound parameters. + // Tag under the last leaf — one match regardless of which leaf is chosen. + const post = await createPost("matched via a descendant, not by enumerating slugs"); + await tag(post.id, rows[TOTAL - 1]!.id); + + // The recursive CTE binds only the single root slug. If this regressed to + // enumerating 1001 descendant slugs as bound params, SQLite would reject the + // query and this test would fail — which is the guard we want. const result = await load({ category: { subtree: "region" } }); - expect(result.entries.map((e) => e.data.title)).toEqual(["deep"]); + expect(result.entries.map((e) => e.data.title)).toEqual([ + "matched via a descendant, not by enumerating slugs", + ]); }); it("combines a subtree filter with an exact filter across two taxonomies", async () => { From 507b7a83885819594b274abdab92217e5f933d77 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:53:25 +0300 Subject: [PATCH 05/11] feat(taxonomy): add countEntriesForSubtrees distinct rollup Co-Authored-By: Claude Opus 4.8 --- .../src/database/repositories/taxonomy.ts | 34 ++++++++- .../taxonomy-subtree-counts.test.ts | 75 +++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts diff --git a/packages/core/src/database/repositories/taxonomy.ts b/packages/core/src/database/repositories/taxonomy.ts index de6edaa5d..ef7d26770 100644 --- a/packages/core/src/database/repositories/taxonomy.ts +++ b/packages/core/src/database/repositories/taxonomy.ts @@ -1,4 +1,4 @@ -import type { Kysely, Selectable } from "kysely"; +import { sql, type Kysely, type Selectable } from "kysely"; import { ulid } from "ulidx"; import { invalidateTaxonomyObjectCache } from "../../object-cache/index.js"; @@ -475,6 +475,38 @@ export class TaxonomyRepository { return counts; } + /** + * Rolled-up usage counts for every term in a taxonomy: each term's count is + * the number of DISTINCT entries tagged anywhere in its subtree (itself plus + * all descendants). A single transitive-closure walk of `parent_id` (a + * translation_group after migration 045) produces (ancestor, descendant) + * pairs; joining to `content_taxonomies` and counting distinct entries per + * ancestor gives counts that exactly match what a `{ subtree }` where filter + * returns — an entry tagged at multiple levels is counted once. + * + * Returns a Map from translation_group to distinct-entry count. Counts are + * global across collections, mirroring `countEntriesForTerms`. + */ + async countEntriesForSubtrees(taxonomyName: string): Promise> { + const result = await sql<{ grp: string; count: number | string }>` + WITH RECURSIVE closure(ancestor, descendant) AS ( + SELECT COALESCE(translation_group, id), COALESCE(translation_group, id) + FROM taxonomies WHERE name = ${taxonomyName} + UNION + SELECT closure.ancestor, COALESCE(c.translation_group, c.id) + FROM taxonomies c JOIN closure ON c.parent_id = closure.descendant + ) + SELECT closure.ancestor AS grp, COUNT(DISTINCT ct.entry_id) AS count + FROM closure + JOIN content_taxonomies ct ON ct.taxonomy_id = closure.descendant + GROUP BY closure.ancestor + `.execute(this.db); + + const counts = new Map(); + for (const row of result.rows) counts.set(row.grp, Number(row.count ?? 0)); + return counts; + } + private rowToTaxonomy(row: Selectable): Taxonomy { return { id: row.id, diff --git a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts new file mode 100644 index 000000000..9f82fce93 --- /dev/null +++ b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts @@ -0,0 +1,75 @@ +import type { Kysely } from "kysely"; +import { it, expect, beforeEach, afterEach } from "vitest"; + +import { handleContentCreate } from "../../../src/api/index.js"; +import { TaxonomyRepository } from "../../../src/database/repositories/taxonomy.js"; +import type { Database } from "../../../src/database/types.js"; +import { + describeEachDialect, + setupForDialectWithCollections, + teardownForDialect, + type DialectName, + type DialectTestContext, +} from "../../utils/test-db.js"; + +describeEachDialect("Taxonomy subtree counts", (dialectName: DialectName) => { + let ctx: DialectTestContext; + let db: Kysely; + let seq = 0; + + beforeEach(async () => { + ctx = await setupForDialectWithCollections(dialectName); + db = ctx.db; + seq = 0; + }); + afterEach(async () => { + await teardownForDialect(ctx); + }); + + async function term(slug: string, parentId?: string) { + const id = `tax_category_${slug}_${seq++}`; + await db + .insertInto("taxonomies" as never) + .values({ + id, + name: "category", + slug, + label: slug, + translation_group: id, + parent_id: parentId ?? null, + } as never) + .execute(); + return id; + } + async function post(title: string) { + const r = await handleContentCreate(db, "post", { data: { title }, status: "published" }); + if (!r.success) throw new Error("create failed"); + return r.data!.item; + } + async function tag(contentId: string, group: string) { + await db + .insertInto("content_taxonomies" as never) + .values({ collection: "post", entry_id: contentId, taxonomy_id: group } as never) + .execute(); + } + + it("rolls descendant counts up to ancestors as DISTINCT entries", async () => { + const region = await term("region"); + const north = await term("north", region); + const city = await term("city", north); + + const a = await post("a"); + const b = await post("b"); + await tag(a.id, city); + await tag(b.id, north); + // Entry tagged at BOTH a parent and its child must count once at the root. + await tag(a.id, north); + + const repo = new TaxonomyRepository(db); + const counts = await repo.countEntriesForSubtrees("category"); + + expect(counts.get(region)).toBe(2); // a + b, distinct (not 3) + expect(counts.get(north)).toBe(2); // a (via city + direct) + b, distinct + expect(counts.get(city)).toBe(1); // a + }); +}); From 6e6b14fe7cde094d817542a5b345fd7164e44949 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 13:03:01 +0300 Subject: [PATCH 06/11] feat(taxonomy): add rollup option to getTaxonomyTerms Co-Authored-By: Claude Opus 4.8 --- packages/core/src/taxonomies/index.ts | 29 +++++++++++++++++-- .../taxonomy-subtree-counts.test.ts | 22 ++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/packages/core/src/taxonomies/index.ts b/packages/core/src/taxonomies/index.ts index 66b333999..8ded902e9 100644 --- a/packages/core/src/taxonomies/index.ts +++ b/packages/core/src/taxonomies/index.ts @@ -11,6 +11,7 @@ * the right per-locale term. */ +import { TaxonomyRepository } from "../database/repositories/taxonomy.js"; import { resolveLocale, resolveLocaleChain } from "../i18n/resolve.js"; import { getDb } from "../loader.js"; import { @@ -26,6 +27,12 @@ import type { TaxonomyDef, TaxonomyTerm, TaxonomyTermRow } from "./types.js"; export interface TaxonomyQueryOptions { locale?: string; + /** + * Roll usage counts up the hierarchy: each term's `count` becomes the + * number of distinct entries tagged anywhere in its subtree. Default is + * exact-term counts. Only meaningful for hierarchical taxonomies. + */ + rollup?: boolean; } /** @@ -120,10 +127,11 @@ export async function getTaxonomyTerms( options: TaxonomyQueryOptions = {}, ): Promise { const locale = resolveLocale(options.locale); - return requestCached(`taxonomy-terms:${taxonomyName}:${locale ?? "*"}`, () => + const rollup = options.rollup ? "1" : "0"; + return requestCached(`taxonomy-terms:${taxonomyName}:${locale ?? "*"}:r${rollup}`, () => cachedQuery({ namespace: CacheNamespace.TAXONOMIES, - key: `terms:${taxonomyName}:${locale ?? "*"}`, + key: `terms:${taxonomyName}:${locale ?? "*"}:r${rollup}`, load: () => loadTaxonomyTerms(taxonomyName, locale, options), }), ); @@ -150,7 +158,9 @@ async function loadTaxonomyTerms( // Counts are keyed by translation_group (what the pivot stores) and are // locale-independent, so the aggregate is shared across every taxonomy // rendered in this request (Categories + Tags widgets, etc.). - const counts = await getTaxonomyTermCounts(); + const counts = options.rollup + ? await getTaxonomySubtreeCounts(taxonomyName) + : await getTaxonomyTermCounts(); const flatTerms: TaxonomyTermRow[] = rows.map((row) => ({ id: row.id, @@ -178,6 +188,19 @@ async function loadTaxonomyTerms( })); } +/** + * Subtree (rolled-up, distinct-entry) usage counts for one taxonomy, keyed by + * translation_group. Request-cached per taxonomy. Unlike the flat + * `getTaxonomyTermCounts` aggregate (shared across all taxonomies), the rollup + * is hierarchy-specific so it is keyed by name. + */ +function getTaxonomySubtreeCounts(taxonomyName: string): Promise> { + return requestCached(`taxonomy-subtree-counts:${taxonomyName}`, async () => { + const db = await getDb(); + return new TaxonomyRepository(db).countEntriesForSubtrees(taxonomyName); + }); +} + /** * Per-translation-group usage counts across all taxonomies, in one aggregate * scan of `content_taxonomies`. Counts are locale-independent (the pivot stores diff --git a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts index 9f82fce93..d17552170 100644 --- a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts +++ b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts @@ -72,4 +72,26 @@ describeEachDialect("Taxonomy subtree counts", (dialectName: DialectName) => { expect(counts.get(north)).toBe(2); // a (via city + direct) + b, distinct expect(counts.get(city)).toBe(1); // a }); + + it("getTaxonomyTerms({ rollup }) returns subtree counts on the tree", async () => { + const { getTaxonomyTerms } = await import("../../../src/taxonomies/index.js"); + const { runWithContext } = await import("../../../src/request-context.js"); + + const region = await term("region"); + const north = await term("north", region); + const a = await post("a"); + const b = await post("b"); + await tag(a.id, north); + await tag(b.id, region); + + const tree = await runWithContext({ editMode: false, db }, () => + getTaxonomyTerms("category", { rollup: true }), + ); + const root = tree.find((t) => t.slug === "region"); + expect(root?.count).toBe(2); // a (under north) + b (direct) + + const flat = await runWithContext({ editMode: false, db }, () => getTaxonomyTerms("category")); + const flatRoot = flat.find((t) => t.slug === "region"); + expect(flatRoot?.count).toBe(1); // exact-term only + }); }); From ce90d302a70f28146ce77faf4bf43fe86b865a10 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 13:22:47 +0300 Subject: [PATCH 07/11] feat(taxonomy): expose rollup counts via handleTermList and terms route Co-Authored-By: Claude Opus 4.8 --- packages/core/src/api/handlers/taxonomies.ts | 7 ++++--- packages/core/src/api/schemas/common.ts | 11 +++++++++++ .../routes/api/taxonomies/[name]/terms/index.ts | 9 ++++++--- .../taxonomies/taxonomy-subtree-counts.test.ts | 14 ++++++++++++++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/packages/core/src/api/handlers/taxonomies.ts b/packages/core/src/api/handlers/taxonomies.ts index 068240730..b1a5f98ab 100644 --- a/packages/core/src/api/handlers/taxonomies.ts +++ b/packages/core/src/api/handlers/taxonomies.ts @@ -369,7 +369,7 @@ export async function handleTaxonomyDefTranslations( export async function handleTermList( db: Kysely, taxonomyName: string, - options: { locale?: string } = {}, + options: { locale?: string; rollup?: boolean } = {}, ): Promise> { try { // Definitions are per-locale but terms aren't bound to the def's locale — @@ -383,8 +383,9 @@ export async function handleTermList( // Batch count entries per term in a single query (replaces N+1 pattern). // content_taxonomies.taxonomy_id stores the translation_group, so we // look up by group and map back to each term's id. - const groups = terms.map((t) => t.translationGroup ?? t.id); - const countsByGroup = await repo.countEntriesForTerms(groups); + const countsByGroup = options.rollup + ? await repo.countEntriesForSubtrees(taxonomyName) + : await repo.countEntriesForTerms(terms.map((t) => t.translationGroup ?? t.id)); const termData: TermWithCount[] = terms.map((term) => ({ id: term.id, diff --git a/packages/core/src/api/schemas/common.ts b/packages/core/src/api/schemas/common.ts index 292655381..602f86567 100644 --- a/packages/core/src/api/schemas/common.ts +++ b/packages/core/src/api/schemas/common.ts @@ -66,6 +66,17 @@ export const localeFilterQuery = z }) .meta({ id: "LocaleFilterQuery" }); +/** Query params for the terms list endpoint: locale filter + optional rollup subtree counts. */ +export const termListQuery = z + .object({ + locale: z.string().min(1).optional(), + rollup: z + .enum(["1", "true"]) + .optional() + .transform((v) => v != null), + }) + .meta({ id: "TermListQuery" }); + // --------------------------------------------------------------------------- // OpenAPI: Shared response schemas // --------------------------------------------------------------------------- diff --git a/packages/core/src/astro/routes/api/taxonomies/[name]/terms/index.ts b/packages/core/src/astro/routes/api/taxonomies/[name]/terms/index.ts index 26f3940a6..3d7225119 100644 --- a/packages/core/src/astro/routes/api/taxonomies/[name]/terms/index.ts +++ b/packages/core/src/astro/routes/api/taxonomies/[name]/terms/index.ts @@ -11,7 +11,7 @@ import { requirePerm } from "#api/authorize.js"; import { apiError, handleError, requireDb, unwrapResult } from "#api/error.js"; import { handleTermCreate, handleTermList } from "#api/handlers/taxonomies.js"; import { isParseError, parseBody, parseQuery } from "#api/parse.js"; -import { createTermBody, localeFilterQuery } from "#api/schemas.js"; +import { createTermBody, termListQuery } from "#api/schemas.js"; export const prerender = false; @@ -29,11 +29,14 @@ export const GET: APIRoute = async ({ params, request, locals }) => { const denied = requirePerm(user, "taxonomies:read"); if (denied) return denied; - const query = parseQuery(new URL(request.url), localeFilterQuery); + const query = parseQuery(new URL(request.url), termListQuery); if (isParseError(query)) return query; try { - const result = await handleTermList(emdash.db, name, { locale: query.locale }); + const result = await handleTermList(emdash.db, name, { + locale: query.locale, + rollup: query.rollup, + }); return unwrapResult(result); } catch (error) { return handleError(error, "Failed to list terms", "TERM_LIST_ERROR"); diff --git a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts index d17552170..54530a088 100644 --- a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts +++ b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts @@ -94,4 +94,18 @@ describeEachDialect("Taxonomy subtree counts", (dialectName: DialectName) => { const flatRoot = flat.find((t) => t.slug === "region"); expect(flatRoot?.count).toBe(1); // exact-term only }); + + it("handleTermList({ rollup }) rolls counts up the tree", async () => { + const { handleTermList } = await import("../../../src/api/handlers/taxonomies.js"); + + const region = await term("region"); + const north = await term("north", region); + const a = await post("a"); + await tag(a.id, north); + + const res = await handleTermList(db, "category", { rollup: true }); + if (!res.success) throw new Error("handleTermList failed"); + const root = res.data.terms.find((t) => t.slug === "region"); + expect(root?.count).toBe(1); // rolled up from the descendant + }); }); From 88458b839f1922566c0339ed53ab0c3daaf78326 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 13:52:40 +0300 Subject: [PATCH 08/11] chore: changeset for taxonomy subtree filter Co-Authored-By: Claude Opus 4.8 --- .changeset/taxonomy-subtree-filter.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/taxonomy-subtree-filter.md diff --git a/.changeset/taxonomy-subtree-filter.md b/.changeset/taxonomy-subtree-filter.md new file mode 100644 index 000000000..4832cedea --- /dev/null +++ b/.changeset/taxonomy-subtree-filter.md @@ -0,0 +1,5 @@ +--- +"emdash": minor +--- + +Adds a `subtree` operator to collection `where` taxonomy filters (`where: { category: { subtree: "news" } }`) that matches a term and all its descendants. Descendants are resolved in the database, so selecting a deep parent category no longer hits SQL bind-parameter limits. Also adds an opt-in `rollup` option to `getTaxonomyTerms` (and the admin terms endpoint via `?rollup=1`) for subtree-aware usage counts that count each entry once. From da8bc61ef58aa1cc7b4305ce1c2703ae1854fc2e Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:09:17 +0300 Subject: [PATCH 09/11] fix(taxonomy): coerce ?rollup=false/0 instead of rejecting with 400 Co-Authored-By: Claude Opus 4.8 --- packages/core/src/api/schemas/common.ts | 4 +-- packages/core/tests/unit/api/schemas.test.ts | 31 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/packages/core/src/api/schemas/common.ts b/packages/core/src/api/schemas/common.ts index 602f86567..ad90c369f 100644 --- a/packages/core/src/api/schemas/common.ts +++ b/packages/core/src/api/schemas/common.ts @@ -71,9 +71,9 @@ export const termListQuery = z .object({ locale: z.string().min(1).optional(), rollup: z - .enum(["1", "true"]) + .enum(["1", "true", "0", "false"]) .optional() - .transform((v) => v != null), + .transform((v) => v === "1" || v === "true"), }) .meta({ id: "TermListQuery" }); diff --git a/packages/core/tests/unit/api/schemas.test.ts b/packages/core/tests/unit/api/schemas.test.ts index 13be8052b..21e4e0315 100644 --- a/packages/core/tests/unit/api/schemas.test.ts +++ b/packages/core/tests/unit/api/schemas.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect } from "vitest"; +import { termListQuery } from "../../../src/api/schemas/common.js"; import { contentCreateBody, contentUpdateBody, @@ -246,3 +247,33 @@ describe("mediaUploadUrlBody schema factory", () => { expect(errorMessage).toContain("71MB"); }); }); + +describe("termListQuery schema — rollup coercion", () => { + it("coerces '1' to true", () => { + expect(termListQuery.parse({ rollup: "1" }).rollup).toBe(true); + }); + + it("coerces 'true' to true", () => { + expect(termListQuery.parse({ rollup: "true" }).rollup).toBe(true); + }); + + it("coerces '0' to false", () => { + expect(termListQuery.parse({ rollup: "0" }).rollup).toBe(false); + }); + + it("coerces 'false' to false", () => { + expect(termListQuery.parse({ rollup: "false" }).rollup).toBe(false); + }); + + it("defaults to false when rollup is absent", () => { + expect(termListQuery.parse({}).rollup).toBe(false); + }); + + it("still parses locale", () => { + expect(termListQuery.parse({ locale: "de" }).locale).toBe("de"); + }); + + it("rejects arbitrary string values", () => { + expect(() => termListQuery.parse({ rollup: "yes" })).toThrow(); + }); +}); From d9a1211358f0ad2b96f1e5a11cedd5962bcc1c71 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:50:15 +0300 Subject: [PATCH 10/11] fix(taxonomy): coerce flat term counts to number for Postgres Postgres COUNT() returns bigint as a string, so getTaxonomyTermCounts returned "1" instead of 1 under the pg driver, failing the rollup test's exact-count assertion. Coerce with Number(), matching countEntriesForSubtrees. Also document the new `subtree` where-operator in the loader/query docstrings (review suggestion). Co-Authored-By: Claude Opus 4.8 --- packages/core/src/loader.ts | 1 + packages/core/src/query.ts | 1 + packages/core/src/taxonomies/index.ts | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/core/src/loader.ts b/packages/core/src/loader.ts index 45c3f196c..b07eee96b 100644 --- a/packages/core/src/loader.ts +++ b/packages/core/src/loader.ts @@ -645,6 +645,7 @@ export interface CollectionFilterBase { * @example { byline: '01HXYZ...' } - entries credited to a byline (any position) * @example { series: 'main' } - exact match on a content field * @example { published_at: { gte: '2024-01-01', lt: '2025-01-01' } } - date range + * @example { category: { subtree: 'news' } } - match a term and all descendants */ where?: Record; /** diff --git a/packages/core/src/query.ts b/packages/core/src/query.ts index 2c557e189..27854aa36 100644 --- a/packages/core/src/query.ts +++ b/packages/core/src/query.ts @@ -124,6 +124,7 @@ export interface CollectionFilterBase { * @example { byline: ['01HXYZ...', '01HABC...'] } - Credited to any of these bylines (OR) * @example { series: 'main' } - Exact match on a content field * @example { published_at: { gte: '2024-01-01', lt: '2025-01-01' } } - Date range + * @example { category: { subtree: 'news' } } - Match a term and all its descendants */ where?: Record; /** diff --git a/packages/core/src/taxonomies/index.ts b/packages/core/src/taxonomies/index.ts index 8ded902e9..0897d74ec 100644 --- a/packages/core/src/taxonomies/index.ts +++ b/packages/core/src/taxonomies/index.ts @@ -217,7 +217,9 @@ function getTaxonomyTermCounts(): Promise> { .groupBy("taxonomy_id") .execute(); const counts = new Map(); - for (const row of countsResult) counts.set(row.taxonomy_id, row.count); + // Postgres `COUNT()` returns bigint as a string; coerce so callers always + // get a number (matches `countEntriesForSubtrees`). + for (const row of countsResult) counts.set(row.taxonomy_id, Number(row.count ?? 0)); return counts; }); } From 371dcce0334d167ff31648225da9b8496cd10f64 Mon Sep 17 00:00:00 2001 From: Malloo <26630797+MA2153@users.noreply.github.com> Date: Tue, 30 Jun 2026 16:05:44 +0300 Subject: [PATCH 11/11] Fix rollup counts query --- .../src/database/repositories/taxonomy.ts | 90 ++++++++++++++----- .../taxonomy-subtree-counts.test.ts | 32 +++++++ 2 files changed, 102 insertions(+), 20 deletions(-) diff --git a/packages/core/src/database/repositories/taxonomy.ts b/packages/core/src/database/repositories/taxonomy.ts index ef7d26770..cd837d00a 100644 --- a/packages/core/src/database/repositories/taxonomy.ts +++ b/packages/core/src/database/repositories/taxonomy.ts @@ -1,4 +1,4 @@ -import { sql, type Kysely, type Selectable } from "kysely"; +import { type Kysely, type Selectable } from "kysely"; import { ulid } from "ulidx"; import { invalidateTaxonomyObjectCache } from "../../object-cache/index.js"; @@ -478,32 +478,82 @@ export class TaxonomyRepository { /** * Rolled-up usage counts for every term in a taxonomy: each term's count is * the number of DISTINCT entries tagged anywhere in its subtree (itself plus - * all descendants). A single transitive-closure walk of `parent_id` (a - * translation_group after migration 045) produces (ancestor, descendant) - * pairs; joining to `content_taxonomies` and counting distinct entries per - * ancestor gives counts that exactly match what a `{ subtree }` where filter - * returns — an entry tagged at multiple levels is counted once. + * all descendants). Counts that exactly match what a `{ subtree }` where + * filter returns — an entry tagged at multiple levels is counted once. * * Returns a Map from translation_group to distinct-entry count. Counts are * global across collections, mirroring `countEntriesForTerms`. + * + * Implementation reads the data it needs exactly once: one scan of this + * taxonomy's `parent_id` edges (`O(terms)`) and one scan of the assignment + * pivot for those terms (`O(assignments)`). The rollup is a single in-memory + * fold up each term's ancestor chain. The earlier recursive-CTE formulation + * materialised the full ancestor×descendant closure and re-read the pivot + * once per ancestor level — `O(Σ depth × assignments)` rows read, hundreds of + * thousands of D1 rows for a few-thousand-term tree (and D1 bills per row + * read). Distinct semantics are preserved by unioning entry ids into each + * ancestor (a sum of child counts would double-count an entry tagged under + * two descendants of a shared ancestor). */ async countEntriesForSubtrees(taxonomyName: string): Promise> { - const result = await sql<{ grp: string; count: number | string }>` - WITH RECURSIVE closure(ancestor, descendant) AS ( - SELECT COALESCE(translation_group, id), COALESCE(translation_group, id) - FROM taxonomies WHERE name = ${taxonomyName} - UNION - SELECT closure.ancestor, COALESCE(c.translation_group, c.id) - FROM taxonomies c JOIN closure ON c.parent_id = closure.descendant - ) - SELECT closure.ancestor AS grp, COUNT(DISTINCT ct.entry_id) AS count - FROM closure - JOIN content_taxonomies ct ON ct.taxonomy_id = closure.descendant - GROUP BY closure.ancestor - `.execute(this.db); + // parent_id stores the parent's translation_group (migration 045), so the + // edge map is group -> parent group. Locale siblings share a group and an + // identical parent edge; first writer wins. + const termRows = await this.db + .selectFrom("taxonomies") + .select(["id", "parent_id", "translation_group"]) + .where("name", "=", taxonomyName) + .execute(); + if (termRows.length === 0) return new Map(); + + const parentOf = new Map(); + for (const row of termRows) { + const group = row.translation_group ?? row.id; + if (!parentOf.has(group)) parentOf.set(group, row.parent_id); + } + + // Self + ancestors for a group, memoised. The `seen` guard and the + // `parentOf.has` check bail on cycles or dangling parent refs in bad data. + const ancestorCache = new Map(); + const ancestorsOf = (group: string): string[] => { + const cached = ancestorCache.get(group); + if (cached) return cached; + const chain: string[] = []; + const seen = new Set(); + let current: string | null | undefined = group; + while (current && parentOf.has(current) && !seen.has(current)) { + chain.push(current); + seen.add(current); + current = parentOf.get(current) ?? null; + } + ancestorCache.set(group, chain); + return chain; + }; + + // Union each assignment's entry id into every ancestor's distinct set, so + // each ancestor accumulates the DISTINCT entries across its whole subtree. + const { chunks, SQL_BATCH_SIZE } = await import("../../utils/chunks.js"); + const entriesByGroup = new Map>(); + for (const chunk of chunks([...parentOf.keys()], SQL_BATCH_SIZE)) { + const rows = await this.db + .selectFrom("content_taxonomies") + .select(["taxonomy_id", "entry_id"]) + .where("taxonomy_id", "in", chunk) + .execute(); + for (const row of rows) { + for (const ancestor of ancestorsOf(row.taxonomy_id)) { + let set = entriesByGroup.get(ancestor); + if (!set) { + set = new Set(); + entriesByGroup.set(ancestor, set); + } + set.add(row.entry_id); + } + } + } const counts = new Map(); - for (const row of result.rows) counts.set(row.grp, Number(row.count ?? 0)); + for (const [group, entries] of entriesByGroup) counts.set(group, entries.size); return counts; } diff --git a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts index 54530a088..4cf9ccd1f 100644 --- a/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts +++ b/packages/core/tests/integration/taxonomies/taxonomy-subtree-counts.test.ts @@ -73,6 +73,38 @@ describeEachDialect("Taxonomy subtree counts", (dialectName: DialectName) => { expect(counts.get(city)).toBe(1); // a }); + it("counts an entry once at a shared ancestor across sibling subtrees", async () => { + // region -> {north -> city, south}. An entry tagged under both `city` + // (deep in north) and `south` (a sibling subtree) must count ONCE at the + // shared `region` ancestor. A rollup that sums child counts would report 2. + const region = await term("region"); + const north = await term("north", region); + const city = await term("city", north); + const south = await term("south", region); + + const a = await post("a"); + await tag(a.id, city); + await tag(a.id, south); + + const repo = new TaxonomyRepository(db); + const counts = await repo.countEntriesForSubtrees("category"); + + expect(counts.get(region)).toBe(1); // distinct across both subtrees + expect(counts.get(north)).toBe(1); + expect(counts.get(city)).toBe(1); + expect(counts.get(south)).toBe(1); + }); + + it("returns no entry for terms whose subtree has no assignments", async () => { + const region = await term("region"); + await term("north", region); + + const repo = new TaxonomyRepository(db); + const counts = await repo.countEntriesForSubtrees("category"); + + expect(counts.size).toBe(0); + }); + it("getTaxonomyTerms({ rollup }) returns subtree counts on the tree", async () => { const { getTaxonomyTerms } = await import("../../../src/taxonomies/index.js"); const { runWithContext } = await import("../../../src/request-context.js");