Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/taxonomy-subtree-filter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"emdash": minor
---

Adds a `subtree` operator to collection `where` taxonomy filters (`where: { category: { subtree: "news" } }`) that matches a term and all its descendants. Descendants are resolved in the database, so selecting a deep parent category no longer hits SQL bind-parameter limits. Also adds an opt-in `rollup` option to `getTaxonomyTerms` (and the admin terms endpoint via `?rollup=1`) for subtree-aware usage counts that count each entry once.
7 changes: 4 additions & 3 deletions packages/core/src/api/handlers/taxonomies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ export async function handleTaxonomyDefTranslations(
export async function handleTermList(
db: Kysely<Database>,
taxonomyName: string,
options: { locale?: string } = {},
options: { locale?: string; rollup?: boolean } = {},
): Promise<ApiResult<TermListResponse>> {
try {
// Definitions are per-locale but terms aren't bound to the def's locale —
Expand All @@ -383,8 +383,9 @@ export async function handleTermList(
// Batch count entries per term in a single query (replaces N+1 pattern).
// content_taxonomies.taxonomy_id stores the translation_group, so we
// look up by group and map back to each term's id.
const groups = terms.map((t) => t.translationGroup ?? t.id);
const countsByGroup = await repo.countEntriesForTerms(groups);
const countsByGroup = options.rollup
? await repo.countEntriesForSubtrees(taxonomyName)
: await repo.countEntriesForTerms(terms.map((t) => t.translationGroup ?? t.id));

const termData: TermWithCount[] = terms.map((term) => ({
id: term.id,
Expand Down
11 changes: 11 additions & 0 deletions packages/core/src/api/schemas/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ export const localeFilterQuery = z
})
.meta({ id: "LocaleFilterQuery" });

/** Query params for the terms list endpoint: locale filter + optional rollup subtree counts. */
export const termListQuery = z
.object({
locale: z.string().min(1).optional(),
rollup: z
.enum(["1", "true", "0", "false"])
.optional()
.transform((v) => v === "1" || v === "true"),
})
.meta({ id: "TermListQuery" });

// ---------------------------------------------------------------------------
// OpenAPI: Shared response schemas
// ---------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { requirePerm } from "#api/authorize.js";
import { apiError, handleError, requireDb, unwrapResult } from "#api/error.js";
import { handleTermCreate, handleTermList } from "#api/handlers/taxonomies.js";
import { isParseError, parseBody, parseQuery } from "#api/parse.js";
import { createTermBody, localeFilterQuery } from "#api/schemas.js";
import { createTermBody, termListQuery } from "#api/schemas.js";

export const prerender = false;

Expand All @@ -29,11 +29,14 @@ export const GET: APIRoute = async ({ params, request, locals }) => {
const denied = requirePerm(user, "taxonomies:read");
if (denied) return denied;

const query = parseQuery(new URL(request.url), localeFilterQuery);
const query = parseQuery(new URL(request.url), termListQuery);
if (isParseError(query)) return query;

try {
const result = await handleTermList(emdash.db, name, { locale: query.locale });
const result = await handleTermList(emdash.db, name, {
locale: query.locale,
rollup: query.rollup,
});
return unwrapResult(result);
} catch (error) {
return handleError(error, "Failed to list terms", "TERM_LIST_ERROR");
Expand Down
84 changes: 83 additions & 1 deletion packages/core/src/database/repositories/taxonomy.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Kysely, Selectable } from "kysely";
import { type Kysely, type Selectable } from "kysely";
import { ulid } from "ulidx";

import { invalidateTaxonomyObjectCache } from "../../object-cache/index.js";
Expand Down Expand Up @@ -475,6 +475,88 @@ export class TaxonomyRepository {
return counts;
}

/**
* Rolled-up usage counts for every term in a taxonomy: each term's count is
* the number of DISTINCT entries tagged anywhere in its subtree (itself plus
* all descendants). Counts that exactly match what a `{ subtree }` where
* filter returns — an entry tagged at multiple levels is counted once.
*
* Returns a Map from translation_group to distinct-entry count. Counts are
* global across collections, mirroring `countEntriesForTerms`.
*
* Implementation reads the data it needs exactly once: one scan of this
* taxonomy's `parent_id` edges (`O(terms)`) and one scan of the assignment
* pivot for those terms (`O(assignments)`). The rollup is a single in-memory
* fold up each term's ancestor chain. The earlier recursive-CTE formulation
* materialised the full ancestor×descendant closure and re-read the pivot
* once per ancestor level — `O(Σ depth × assignments)` rows read, hundreds of
* thousands of D1 rows for a few-thousand-term tree (and D1 bills per row
* read). Distinct semantics are preserved by unioning entry ids into each
* ancestor (a sum of child counts would double-count an entry tagged under
* two descendants of a shared ancestor).
*/
async countEntriesForSubtrees(taxonomyName: string): Promise<Map<string, number>> {
// parent_id stores the parent's translation_group (migration 045), so the
// edge map is group -> parent group. Locale siblings share a group and an
// identical parent edge; first writer wins.
const termRows = await this.db
.selectFrom("taxonomies")
.select(["id", "parent_id", "translation_group"])
.where("name", "=", taxonomyName)
.execute();
if (termRows.length === 0) return new Map();

const parentOf = new Map<string, string | null>();
for (const row of termRows) {
const group = row.translation_group ?? row.id;
if (!parentOf.has(group)) parentOf.set(group, row.parent_id);
}

// Self + ancestors for a group, memoised. The `seen` guard and the
// `parentOf.has` check bail on cycles or dangling parent refs in bad data.
const ancestorCache = new Map<string, string[]>();
const ancestorsOf = (group: string): string[] => {
const cached = ancestorCache.get(group);
if (cached) return cached;
const chain: string[] = [];
const seen = new Set<string>();
let current: string | null | undefined = group;
while (current && parentOf.has(current) && !seen.has(current)) {
chain.push(current);
seen.add(current);
current = parentOf.get(current) ?? null;
}
ancestorCache.set(group, chain);
return chain;
};

// Union each assignment's entry id into every ancestor's distinct set, so
// each ancestor accumulates the DISTINCT entries across its whole subtree.
const { chunks, SQL_BATCH_SIZE } = await import("../../utils/chunks.js");
const entriesByGroup = new Map<string, Set<string>>();
for (const chunk of chunks([...parentOf.keys()], SQL_BATCH_SIZE)) {
const rows = await this.db
.selectFrom("content_taxonomies")
.select(["taxonomy_id", "entry_id"])
.where("taxonomy_id", "in", chunk)
.execute();
for (const row of rows) {
for (const ancestor of ancestorsOf(row.taxonomy_id)) {
let set = entriesByGroup.get(ancestor);
if (!set) {
set = new Set();
entriesByGroup.set(ancestor, set);
}
set.add(row.entry_id);
}
}
}

const counts = new Map<string, number>();
for (const [group, entries] of entriesByGroup) counts.set(group, entries.size);
return counts;
}

private rowToTaxonomy(row: Selectable<TaxonomyTable>): Taxonomy {
return {
id: row.id,
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ export type {
TranslationSummary,
TranslationsResult,
WhereRange,
WhereSubtree,
WhereValue,
} from "./query.js";

Expand Down
80 changes: 75 additions & 5 deletions packages/core/src/loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -543,9 +543,16 @@ function buildCursorCondition(
return sql`(${sql.ref(primary.field)} > ${orderValue} OR (${sql.ref(primary.field)} = ${orderValue} AND ${sql.ref(idField)} > ${cursorId}))`;
}

/** Type guard: is the where value a range object (not a string or array)? */
/** Type guard: is the where value a taxonomy subtree operator? */
function isWhereSubtree(value: WhereValue): value is WhereSubtree {
return value !== null && typeof value === "object" && !Array.isArray(value) && "subtree" in value;
}

/** Type guard: is the where value a range object (not a string, array, or subtree)? */
function isWhereRange(value: WhereValue): value is WhereRange {
return value !== null && typeof value === "object" && !Array.isArray(value);
return (
value !== null && typeof value === "object" && !Array.isArray(value) && !("subtree" in value)
);
}

/**
Expand Down Expand Up @@ -598,9 +605,20 @@ export interface WhereRange {
}

/**
* A where clause value: exact match, multi-value match, or range comparison.
* Match a hierarchical taxonomy term and all of its descendants. `subtree` is
* one or more root slugs; descendants are resolved in SQL from the term
* hierarchy, so the matched set is independent of how many descendants exist
* (no per-slug bound parameters).
*/
export interface WhereSubtree {
subtree: string | string[];
}

/**
* A where clause value: exact match, multi-value match, range comparison, or
* taxonomy subtree (term-or-descendants) match.
*/
export type WhereValue = string | string[] | WhereRange;
export type WhereValue = string | string[] | WhereRange | WhereSubtree;

/**
* Fields shared by every collection filter, independent of pagination mode.
Expand All @@ -627,6 +645,7 @@ export interface CollectionFilterBase {
* @example { byline: '01HXYZ...' } - entries credited to a byline (any position)
* @example { series: 'main' } - exact match on a content field
* @example { published_at: { gte: '2024-01-01', lt: '2025-01-01' } } - date range
* @example { category: { subtree: 'news' } } - match a term and all descendants
*/
where?: Record<string, WhereValue>;
/**
Expand Down Expand Up @@ -803,6 +822,7 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
// keeps its own `name` and emits its own `EXISTS` clause rather
// than pooling slugs into one `IN`.
const taxonomyFilters: { name: string; slugs: string[] }[] = [];
const subtreeFilters: { name: string; roots: string[] }[] = [];
// A byline filter matches entries credited to any of the given
// byline translation groups via the `_emdash_content_bylines`
// junction table. `null` means no byline filter; an empty
Expand All @@ -817,6 +837,12 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
for (const [key, value] of Object.entries(where)) {
if (value == null) continue;
if (key === "byline") {
if (isWhereSubtree(value)) {
console.warn(
`[emdash] where filter: subtree operator is not supported on "byline", ignored`,
);
continue;
}
if (isWhereRange(value)) {
console.warn(
`[emdash] where filter: range operators are not supported on "byline", ignored`,
Expand All @@ -826,6 +852,11 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
const groups = Array.isArray(value) ? value : [value];
bylineFilter = { groups };
} else if (taxNames.has(key)) {
if (isWhereSubtree(value)) {
const roots = Array.isArray(value.subtree) ? value.subtree : [value.subtree];
subtreeFilters.push({ name: key, roots });
continue;
}
if (isWhereRange(value)) {
console.warn(
`[emdash] where filter: range operators are not supported on taxonomy "${key}", ignored`,
Expand All @@ -835,6 +866,12 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
const slugs = Array.isArray(value) ? value : [value];
taxonomyFilters.push({ name: key, slugs });
} else {
if (isWhereSubtree(value)) {
console.warn(
`[emdash] where filter: subtree operator is only valid on taxonomy keys, "${key}" ignored`,
);
continue;
}
fieldFilters[key] = value;
}
}
Expand All @@ -845,7 +882,8 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
// SQL on both dialects).
if (
(bylineFilter && bylineFilter.groups.length === 0) ||
taxonomyFilters.some((f) => f.slugs.length === 0)
taxonomyFilters.some((f) => f.slugs.length === 0) ||
subtreeFilters.some((f) => f.roots.length === 0)
) {
return { entries: [], cacheHint: { tags: [type] } };
}
Expand Down Expand Up @@ -890,6 +928,37 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
)}`
: sql``;

// Subtree filters: match content_taxonomies.taxonomy_id (already a
// translation_group) against the set of groups in each root's subtree,
// resolved by a recursive walk of taxonomies.parent_id (also a
// translation_group after migration 045). Only the root slugs are bound,
// so the parameter count is independent of subtree size — this is what
// avoids D1's 100-bind-parameter overflow.
const subtreeCond =
subtreeFilters.length > 0
? sql`${sql.join(
subtreeFilters.map(
(f) => sql`AND EXISTS (
SELECT 1 FROM content_taxonomies ct
WHERE ct.collection = ${type}
AND ct.entry_id = ${sql.ref(tableName)}.id
AND ct.taxonomy_id IN (
WITH RECURSIVE sub(grp) AS (
SELECT COALESCE(translation_group, id) FROM taxonomies
WHERE name = ${f.name}
AND slug IN (${sql.join(f.roots.map((s) => sql`${s}`))})
UNION
SELECT COALESCE(c.translation_group, c.id) FROM taxonomies c
JOIN sub ON c.parent_id = sub.grp
)
SELECT grp FROM sub
)
)`,
),
sql` `,
)}`
: sql``;

// `_emdash_content_bylines.byline_id` stores the byline's
// translation_group (migration 040), so a credit spans every
// locale variant of the byline and we match the group directly.
Expand Down Expand Up @@ -929,6 +998,7 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
${localeFilter}
${cursorCond}
${taxonomyCond}
${subtreeCond}
${bylineCond}
${fieldCondsSQL ? sql`AND ${fieldCondsSQL}` : sql``}
${orderByClause}
Expand Down
4 changes: 3 additions & 1 deletion packages/core/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
FOLDED_TERMS,
type WhereRange,
type WhereValue,
type WhereSubtree,
} from "./loader.js";
import {
cachedQuery,
Expand Down Expand Up @@ -94,7 +95,7 @@ export type SortDirection = "asc" | "desc";
*/
export type OrderBySpec = Record<string, SortDirection>;

export type { WhereRange, WhereValue };
export type { WhereRange, WhereValue, WhereSubtree };

/**
* Fields shared by every collection query, independent of pagination mode.
Expand Down Expand Up @@ -123,6 +124,7 @@ export interface CollectionFilterBase {
* @example { byline: ['01HXYZ...', '01HABC...'] } - Credited to any of these bylines (OR)
* @example { series: 'main' } - Exact match on a content field
* @example { published_at: { gte: '2024-01-01', lt: '2025-01-01' } } - Date range
* @example { category: { subtree: 'news' } } - Match a term and all its descendants
*/
where?: Record<string, WhereValue>;
/**
Expand Down
Loading
Loading