From 953b6eb0448ec92f522ee316d5c0e1295875f79b Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 9 Jul 2025 15:02:39 +0200 Subject: [PATCH 1/4] Add distinct operator --- packages/db/package.json | 2 +- packages/db/src/query/builder/index.ts | 21 + packages/db/src/query/compiler/index.ts | 9 +- packages/db/src/query/ir.ts | 1 + packages/db/tests/query/distinct.test.ts | 547 +++++++++++++++++++++++ 5 files changed, 577 insertions(+), 3 deletions(-) create mode 100644 packages/db/tests/query/distinct.test.ts diff --git a/packages/db/package.json b/packages/db/package.json index 69a269f4e..848fc2f9a 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -3,7 +3,7 @@ "description": "A reactive client store for building super fast apps on sync", "version": "0.0.21", "dependencies": { - "@electric-sql/d2mini": "^0.1.6", + "@electric-sql/d2mini": "link:/Users/kevin/Documents/Electric/development/d2ts/packages/d2mini", "@standard-schema/spec": "^1.0.0" }, "devDependencies": { diff --git a/packages/db/src/query/builder/index.ts b/packages/db/src/query/builder/index.ts index 9b62c874c..81c0df48a 100644 --- a/packages/db/src/query/builder/index.ts +++ b/packages/db/src/query/builder/index.ts @@ -480,6 +480,27 @@ export class BaseQueryBuilder { }) as any } + /** + * Specify that the query should return distinct rows. + * Deduplicates rows based on the selected columns. + * @returns A QueryBuilder with distinct enabled + * + * @example + * ```ts + * // Get countries our users are from + * query + * .from({ users: usersCollection }) + * .select(({users}) => users.country) + * .distinct() + * ``` + */ + distinct(): QueryBuilder { + return new BaseQueryBuilder({ + ...this.query, + distinct: true, + }) as any + } + // Helper methods private _getCurrentAliases(): Array { const aliases: Array = [] diff --git a/packages/db/src/query/compiler/index.ts b/packages/db/src/query/compiler/index.ts index 6eb70c525..44faabe47 100644 --- a/packages/db/src/query/compiler/index.ts +++ b/packages/db/src/query/compiler/index.ts @@ -1,4 +1,4 @@ -import { filter, map } from "@electric-sql/d2mini" +import { distinct, filter, map } from "@electric-sql/d2mini" import { compileExpression } from "./evaluators.js" import { processJoins } from "./joins.js" import { processGroupBy } from "./group-by.js" @@ -99,7 +99,7 @@ export function compileQuery( } // Process the SELECT clause early - always create __select_results - // This eliminates duplication and allows for future DISTINCT implementation + // This eliminates duplication and allows for DISTINCT implementation if (query.fnSelect) { // Handle functional select - apply the function to transform the row pipeline = pipeline.pipe( @@ -190,6 +190,11 @@ export function compileQuery( } } + // Process the DISTINCT clause if it exists + if (query.distinct) { + pipeline = pipeline.pipe(distinct(([_key, row]) => row.__select_results)) + } + // Process orderBy parameter if it exists if (query.orderBy && query.orderBy.length > 0) { const orderedPipeline = processOrderBy( diff --git a/packages/db/src/query/ir.ts b/packages/db/src/query/ir.ts index a795cd188..3c079a80d 100644 --- a/packages/db/src/query/ir.ts +++ b/packages/db/src/query/ir.ts @@ -15,6 +15,7 @@ export interface QueryIR { orderBy?: OrderBy limit?: Limit offset?: Offset + distinct?: true // Functional variants fnSelect?: (row: NamespacedRow) => any diff --git a/packages/db/tests/query/distinct.test.ts b/packages/db/tests/query/distinct.test.ts new file mode 100644 index 000000000..805f0f9c0 --- /dev/null +++ b/packages/db/tests/query/distinct.test.ts @@ -0,0 +1,547 @@ +import { beforeEach, describe, expect, test } from "vitest" +import { concat, createLiveQueryCollection } from "../../src/query/index.js" +import { createCollection } from "../../src/collection.js" +import { mockSyncCollectionOptions } from "../utls.js" + +// Sample data types for comprehensive DISTINCT testing +type User = { + id: number + name: string + email: string + department: string + role: string + salary: number + active: boolean + country: string + city: string +} + +// Sample user data with duplicates for testing DISTINCT +const sampleUsers: Array = [ + { + id: 1, + name: `John Doe`, + email: `john@example.com`, + department: `Engineering`, + role: `Developer`, + salary: 75000, + active: true, + country: `USA`, + city: `New York`, + }, + { + id: 2, + name: `Jane Smith`, + email: `jane@example.com`, + department: `Engineering`, + role: `Developer`, + salary: 80000, + active: true, + country: `USA`, + city: `San Francisco`, + }, + { + id: 3, + name: `Bob Johnson`, + email: `bob@example.com`, + department: `Marketing`, + role: `Manager`, + salary: 90000, + active: true, + country: `Canada`, + city: `Toronto`, + }, + { + id: 4, + name: `Alice Brown`, + email: `alice@example.com`, + department: `Engineering`, + role: `Developer`, + salary: 75000, + active: false, + country: `USA`, + city: `New York`, + }, + { + id: 5, + name: `Charlie Wilson`, + email: `charlie@example.com`, + department: `Sales`, + role: `Representative`, + salary: 60000, + active: true, + country: `USA`, + city: `Chicago`, + }, + { + id: 6, + name: `Diana Davis`, + email: `diana@example.com`, + department: `Engineering`, + role: `Developer`, + salary: 75000, + active: true, + country: `UK`, + city: `London`, + }, + { + id: 7, + name: `Eve Miller`, + email: `eve@example.com`, + department: `Marketing`, + role: `Manager`, + salary: 90000, + active: true, + country: `Canada`, + city: `Toronto`, + }, + { + id: 8, + name: `Frank Garcia`, + email: `frank@example.com`, + department: `Engineering`, + role: `Developer`, + salary: 75000, + active: true, + country: `USA`, + city: `New York`, + }, +] + +function createUsersCollection() { + return createCollection( + mockSyncCollectionOptions({ + id: `test-users`, + getKey: (user) => user.id, + initialData: sampleUsers, + }) + ) +} + +describe(`Query DISTINCT Execution`, () => { + describe(`Basic Usage`, () => { + let usersCollection: ReturnType + + beforeEach(() => { + usersCollection = createUsersCollection() + }) + + test(`distinct on a single column`, () => { + const distinctCountries = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ country: users.country })) + .distinct(), + }) + + expect(distinctCountries.size).toBe(3) // USA, Canada, UK + + const countries = Array.from(distinctCountries.values()).map( + (user) => user.country + ) + expect(countries).toContain(`USA`) + expect(countries).toContain(`Canada`) + expect(countries).toContain(`UK`) + expect(countries.length).toBe(3) + }) + + test(`distinct on multiple columns`, () => { + const distinctRoleSalary = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ + role: users.role, + salary: users.salary, + })) + .distinct(), + }) + + // Expected unique combinations: + // Developer-75000 (John, Alice, Diana, Frank) + // Developer-80000 (Jane) + // Manager-90000 (Bob, Eve) + // Representative-60000 (Charlie) + expect(distinctRoleSalary.size).toBe(4) + + const combinations = Array.from(distinctRoleSalary.values()).map( + (user) => `${user.role}-${user.salary}` + ) + expect(combinations).toContain(`Developer-75000`) + expect(combinations).toContain(`Developer-80000`) + expect(combinations).toContain(`Manager-90000`) + expect(combinations).toContain(`Representative-60000`) + expect(combinations.length).toBe(4) + }) + + test(`distinct without select`, () => { + const distinctUsers = createLiveQueryCollection({ + startSync: true, + query: (q) => q.from({ users: usersCollection }).distinct(), + }) + + expect(distinctUsers.size).toBe(8) + expect(Array.from(distinctUsers.values())).toEqual(sampleUsers) + }) + }) + + describe(`With Computed Values`, () => { + let usersCollection: ReturnType + + beforeEach(() => { + usersCollection = createUsersCollection() + }) + + test(`distinct on computed salary ranges`, () => { + const distinctSalaryRanges = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .fn.select(({ users }) => ({ + salary_range: + users.salary > 80000 + ? `High` + : users.salary < 70000 + ? `Medium` + : `Low`, + })) + .distinct(), + }) + + expect(distinctSalaryRanges.size).toBe(3) // High, Medium, Low + + const ranges = Array.from(distinctSalaryRanges.values()).map( + (user) => user.salary_range + ) + expect(ranges).toContain(`High`) + expect(ranges).toContain(`Medium`) + expect(ranges).toContain(`Low`) + }) + + test(`distinct on computed string value`, () => { + const distinctFullNames = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ + full_name: concat(users.department, ` - `, users.role), + })) + .distinct(), + }) + + expect(distinctFullNames.size).toBe(3) // All unique combinations of department and role + + const fullNames = Array.from(distinctFullNames.values()).map( + (user) => user.full_name + ) + + expect(fullNames).toContain(`Engineering - Developer`) + expect(fullNames).toContain(`Marketing - Manager`) + expect(fullNames).toContain(`Sales - Representative`) + }) + }) + + describe(`Live Updates`, () => { + let usersCollection: ReturnType + + beforeEach(() => { + usersCollection = createUsersCollection() + }) + + test(`live updates when inserting new users`, () => { + const distinctDepartments = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ department: users.department })) + .distinct(), + }) + + expect(distinctDepartments.size).toBe(3) // Engineering, Marketing, Sales + + // Insert new user with existing department + const newUser1: User = { + id: 9, + name: `Grace Lee`, + email: `grace@example.com`, + department: `Engineering`, + role: `Developer`, + salary: 75000, + active: true, + country: `USA`, + city: `Boston`, + } + + usersCollection.utils.begin() + usersCollection.utils.write({ type: `insert`, value: newUser1 }) + usersCollection.utils.commit() + + // Should still have 3 departments (Engineering already exists) + expect(distinctDepartments.size).toBe(3) + + // Insert new user with new department + const newUser2: User = { + id: 10, + name: `Henry Chen`, + email: `henry@example.com`, + department: `HR`, + role: `Manager`, + salary: 85000, + active: true, + country: `USA`, + city: `Seattle`, + } + + usersCollection.utils.begin() + usersCollection.utils.write({ type: `insert`, value: newUser2 }) + usersCollection.utils.commit() + + // Should now have 4 departments + expect(distinctDepartments.size).toBe(4) + + const departments = Array.from(distinctDepartments.values()).map( + (user) => user.department + ) + expect(departments).toContain(`HR`) + }) + + test(`live updates when updating existing users`, () => { + const distinctCountries = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ country: users.country })) + .distinct(), + }) + + expect(distinctCountries.size).toBe(3) // USA, Canada, UK + + // Update user 1 to change country from USA to Germany + const updatedUser = { + ...sampleUsers.find((u) => u.id === 1)!, + country: `Germany`, + } + + usersCollection.utils.begin() + usersCollection.utils.write({ type: `update`, value: updatedUser }) + usersCollection.utils.commit() + + // Should now have 4 countries (because there were also other users from USA so the distinct count is 4) + expect(distinctCountries.size).toBe(4) + + const countries = Array.from(distinctCountries.values()).map( + (user) => user.country + ) + expect(countries).toContain(`Germany`) + + // Modify user 1 to Canada such that Germany no longer occurs + // and the distinct count is back to 3 + const updatedUserAgain = { + ...sampleUsers.find((u) => u.id === 1)!, + country: `Canada`, + } + + usersCollection.utils.begin() + usersCollection.utils.write({ type: `update`, value: updatedUserAgain }) + usersCollection.utils.commit() + + // Should now have 3 countries + expect(distinctCountries.size).toBe(3) + + const finalCountries = Array.from(distinctCountries.values()).map( + (user) => user.country + ) + expect(finalCountries).not.toContain(`Germany`) + expect(finalCountries).toContain(`Canada`) + expect(finalCountries).toContain(`USA`) + expect(finalCountries).toContain(`UK`) + }) + + test(`live updates when deleting users`, () => { + const distinctCities = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ city: users.city })) + .distinct(), + }) + + expect(distinctCities.size).toBe(5) // New York, San Francisco, Toronto, Chicago, London + + // Delete all users from New York + const newYorkUsers = sampleUsers.filter((u) => u.city === `New York`) + for (const user of newYorkUsers) { + usersCollection.utils.begin() + usersCollection.utils.write({ type: `delete`, value: user }) + usersCollection.utils.commit() + } + + // Should now have 4 cities (New York removed) + expect(distinctCities.size).toBe(4) + + const cities = Array.from(distinctCities.values()).map( + (user) => user.city + ) + expect(cities).not.toContain(`New York`) + }) + }) + + describe(`Edge Cases and Complex Scenarios`, () => { + let usersCollection: ReturnType + + beforeEach(() => { + usersCollection = createUsersCollection() + }) + + test(`distinct with null values`, () => { + // Add a user with null department + const userWithNullDept: User = { + id: 11, + name: `Null User`, + email: `null@example.com`, + department: null as any, + role: `Developer`, + salary: 70000, + active: true, + country: `USA`, + city: `Austin`, + } + + usersCollection.utils.begin() + usersCollection.utils.write({ type: `insert`, value: userWithNullDept }) + usersCollection.utils.commit() + + const distinctDepartments = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ department: users.department })) + .distinct(), + }) + + expect(distinctDepartments.size).toBe(4) // Engineering, Marketing, Sales, null + + const departments = Array.from(distinctDepartments.values()).map( + (user) => user.department + ) + expect(departments).toContain(null) + }) + + test(`empty collection handling`, () => { + const emptyCollection = createCollection( + mockSyncCollectionOptions({ + id: `empty-users`, + getKey: (user) => user.id, + initialData: [], + }) + ) + + const emptyDistinct = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: emptyCollection }) + .select(({ users }) => ({ department: users.department })) + .distinct(), + }) + + expect(emptyDistinct.size).toBe(0) + + // Add data to empty collection + const newUser: User = { + id: 1, + name: `Test User`, + email: `test@example.com`, + department: `Test`, + role: `Tester`, + salary: 50000, + active: true, + country: `Test`, + city: `Test City`, + } + + emptyCollection.utils.begin() + emptyCollection.utils.write({ type: `insert`, value: newUser }) + emptyCollection.utils.commit() + + expect(emptyDistinct.size).toBe(1) + const department = emptyDistinct.get(1) + expect(department?.department).toBe(`Test`) + }) + + test(`distinct with boolean values`, () => { + const distinctActiveStatus = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ active: users.active })) + .distinct(), + }) + + expect(distinctActiveStatus.size).toBe(2) // true, false + + const statuses = Array.from(distinctActiveStatus.values()).map( + (user) => user.active + ) + expect(statuses).toContain(true) + expect(statuses).toContain(false) + }) + + test(`distinct with ordered results based on non-selected column`, () => { + const distinctOrderedData = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .select(({ users }) => ({ + role: users.role, + })) + .distinct() + .orderBy(({ users }) => users.salary, `desc`), + }) + + // In general, the combination of distinct with orderBy where orderBy uses a non-selected column is non-deterministic + // However, in this case each role has a salary range and those salary ranges don't overlap so it is deterministic + // So we expect the results to be: manager, developer, representative + const distinctOrderedRoles = distinctOrderedData.toArray.map( + (r) => r.role + ) + expect(distinctOrderedRoles).toEqual([ + `Manager`, + `Developer`, + `Representative`, + ]) + }) + + test(`distinct with functional select`, () => { + const distinctComputed = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ users: usersCollection }) + .fn.select((row) => ({ + salary_tier: row.users.salary >= 80000 ? `Senior` : `Junior`, + })) + .distinct(), + }) + + expect(distinctComputed.size).toBe(2) + + const locations = Array.from(distinctComputed.values()).map( + (user) => user.salary_tier + ) + expect(locations).toContain(`Senior`) + expect(locations).toContain(`Junior`) + }) + }) +}) From 2cd2e9f916c7e55c8c3ab10b47956363563d5715 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 9 Jul 2025 15:03:35 +0200 Subject: [PATCH 2/4] changeset --- .changeset/slick-ghosts-shine.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/slick-ghosts-shine.md diff --git a/.changeset/slick-ghosts-shine.md b/.changeset/slick-ghosts-shine.md new file mode 100644 index 000000000..9728dc6f5 --- /dev/null +++ b/.changeset/slick-ghosts-shine.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +New distinct operator for queries. From c94e6064222cf56a2e427dc60b4a66ddd7c4a358 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 9 Jul 2025 16:43:24 +0100 Subject: [PATCH 3/4] bump d2mini --- packages/db/package.json | 2 +- pnpm-lock.yaml | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/db/package.json b/packages/db/package.json index 848fc2f9a..4c8cedb0e 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -3,7 +3,7 @@ "description": "A reactive client store for building super fast apps on sync", "version": "0.0.21", "dependencies": { - "@electric-sql/d2mini": "link:/Users/kevin/Documents/Electric/development/d2ts/packages/d2mini", + "@electric-sql/d2mini": "^0.1.7", "@standard-schema/spec": "^1.0.0" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2fa32447e..dc16765f0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -211,8 +211,8 @@ importers: packages/db: dependencies: '@electric-sql/d2mini': - specifier: ^0.1.6 - version: 0.1.6 + specifier: ^0.1.7 + version: 0.1.7 '@standard-schema/spec': specifier: ^1.0.0 version: 1.0.0 @@ -622,8 +622,8 @@ packages: '@electric-sql/client@1.0.0': resolution: {integrity: sha512-kGiVbBIlMqc/CeJpWZuLjxNkm0836NWxeMtIWH2w5IUK8pUL13hyxg3ZkR7+FlTGhpKuZRiCP5nPOH9D6wbhPw==} - '@electric-sql/d2mini@0.1.6': - resolution: {integrity: sha512-FLzsK4kAtD3upU0MDj3JQx3jPwgFfwmPJ69kheMPeMexoPO8aTIr/nhWL7GSnCLFB+BDSPOFUeffDhSomLRNXA==} + '@electric-sql/d2mini@0.1.7': + resolution: {integrity: sha512-gcXZKkMmgGdNB6AQl0S9jJIbKZdlgafbA3u/a9TVltIgXE1VPuBN7j6tf25RfxX/5oi6o6ca9KHluJQwg40oLg==} '@emnapi/core@1.4.4': resolution: {integrity: sha512-A9CnAbC6ARNMKcIcrQwq6HeHCjpcBZ5wSx4U01WXCqEKlrzB9F9315WDNHkrs2xbx7YjjSxbUYxuN6EQzpcY2g==} @@ -7512,7 +7512,7 @@ snapshots: optionalDependencies: '@rollup/rollup-darwin-arm64': 4.44.2 - '@electric-sql/d2mini@0.1.6': + '@electric-sql/d2mini@0.1.7': dependencies: fractional-indexing: 3.2.0 murmurhash-js: 1.0.0 From d24f71e22382edefa0e2272660ed21f841a43fa7 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Tue, 15 Jul 2025 13:35:33 +0200 Subject: [PATCH 4/4] Require SELECT clause for DISTINCT operator. --- packages/db/src/query/compiler/index.ts | 4 ++++ packages/db/tests/query/distinct.test.ts | 15 +++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/packages/db/src/query/compiler/index.ts b/packages/db/src/query/compiler/index.ts index 44faabe47..86158632d 100644 --- a/packages/db/src/query/compiler/index.ts +++ b/packages/db/src/query/compiler/index.ts @@ -98,6 +98,10 @@ export function compileQuery( } } + if (query.distinct && !query.fnSelect && !query.select) { + throw new Error(`DISTINCT requires a SELECT clause.`) + } + // Process the SELECT clause early - always create __select_results // This eliminates duplication and allows for DISTINCT implementation if (query.fnSelect) { diff --git a/packages/db/tests/query/distinct.test.ts b/packages/db/tests/query/distinct.test.ts index 805f0f9c0..9f94e95c4 100644 --- a/packages/db/tests/query/distinct.test.ts +++ b/packages/db/tests/query/distinct.test.ts @@ -177,14 +177,13 @@ describe(`Query DISTINCT Execution`, () => { expect(combinations.length).toBe(4) }) - test(`distinct without select`, () => { - const distinctUsers = createLiveQueryCollection({ - startSync: true, - query: (q) => q.from({ users: usersCollection }).distinct(), - }) - - expect(distinctUsers.size).toBe(8) - expect(Array.from(distinctUsers.values())).toEqual(sampleUsers) + test(`distinct without select should throw`, () => { + expect(() => + createLiveQueryCollection({ + startSync: true, + query: (q) => q.from({ users: usersCollection }).distinct(), + }) + ).toThrow(`DISTINCT requires a SELECT clause.`) }) })