From ee31c96beb3fff95e95fc483ac1f9c83896eeb60 Mon Sep 17 00:00:00 2001 From: Alex Lubbock Date: Fri, 8 May 2026 22:09:30 +0100 Subject: [PATCH 1/3] perf: replace JS map/reduce with aggregation in updateDatasetSizeAndFiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation fetched all OrigDatablock documents for a dataset into memory — including full dataFileList arrays — then summed size and file count in JavaScript. This sent unbounded data across the wire on every origdatablock create/update/delete. Replaced with a single $group aggregation that computes both totals server-side (PERF-002). --- .../origdatablocks.controller.ts | 19 +++---------------- src/origdatablocks/origdatablocks.service.ts | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/origdatablocks/origdatablocks.controller.ts b/src/origdatablocks/origdatablocks.controller.ts index 33d99e5dd..7bfefce7d 100644 --- a/src/origdatablocks/origdatablocks.controller.ts +++ b/src/origdatablocks/origdatablocks.controller.ts @@ -40,7 +40,6 @@ import { IOrigDatablockFields } from "./interfaces/origdatablocks.interface"; import { plainToInstance } from "class-transformer"; import { validate, ValidationError } from "class-validator"; import { DatasetsService } from "src/datasets/datasets.service"; -import { PartialUpdateDatasetDto } from "src/datasets/dto/update-dataset.dto"; import { filterDescription, filterExample, parseDate } from "src/common/utils"; import { JWTUser } from "src/auth/interfaces/jwt-user.interface"; import { DatasetClass } from "src/datasets/schemas/dataset.schema"; @@ -213,22 +212,10 @@ export class OrigDatablocksController { } async updateDatasetSizeAndFiles(pid: string) { - // updates datasets size - const parsedFilters: IFilters = - { where: { datasetId: pid } }; - const datasetOrigdatablocks = - await this.origDatablocksService.findAll(parsedFilters); - - const updateDatasetDto: PartialUpdateDatasetDto = { - size: datasetOrigdatablocks - .map((od) => od.size) - .reduce((ps, a) => ps + a, 0), - numberOfFiles: datasetOrigdatablocks - .map((od) => od.dataFileList.length) - .reduce((ps, a) => ps + a, 0), - }; + const { size, numberOfFiles } = + await this.origDatablocksService.aggregateSizeAndFileCount(pid); - await this.datasetsService.findByIdAndUpdate(pid, updateDatasetDto); + await this.datasetsService.findByIdAndUpdate(pid, { size, numberOfFiles }); } @UseGuards(PoliciesGuard) diff --git a/src/origdatablocks/origdatablocks.service.ts b/src/origdatablocks/origdatablocks.service.ts index 9d0e793f4..7a68f3a70 100644 --- a/src/origdatablocks/origdatablocks.service.ts +++ b/src/origdatablocks/origdatablocks.service.ts @@ -382,4 +382,22 @@ export class OrigDatablocksService { .exec(); return { count }; } + + async aggregateSizeAndFileCount( + datasetId: string, + ): Promise<{ size: number; numberOfFiles: number }> { + const [result] = await this.origDatablockModel + .aggregate<{ size: number; numberOfFiles: number }>([ + { $match: { datasetId } }, + { + $group: { + _id: null, + size: { $sum: "$size" }, + numberOfFiles: { $sum: { $size: "$dataFileList" } }, + }, + }, + ]) + .exec(); + return result ?? { size: 0, numberOfFiles: 0 }; + } } From 3e71910d3d707953b428a7d0318bc5e71ad3b242 Mon Sep 17 00:00:00 2001 From: Alex Lubbock Date: Fri, 8 May 2026 22:34:08 +0100 Subject: [PATCH 2/3] test: add tests for aggregateSizeAndFileCount and updateDatasetSizeAndFiles - Service: three cases covering normal totals, empty dataset, and correct $match datasetId propagation - Controller: two cases verifying the aggregation result flows through to datasetsService.findByIdAndUpdate (including zero-total case) - Fix: destructure only {size, numberOfFiles} from the $group result so callers don't receive the internal _id:null field --- .../origdatablocks.controller.spec.ts | 69 ++++++++++++++++++- .../origdatablocks.service.spec.ts | 41 +++++++++++ src/origdatablocks/origdatablocks.service.ts | 4 +- 3 files changed, 111 insertions(+), 3 deletions(-) diff --git a/src/origdatablocks/origdatablocks.controller.spec.ts b/src/origdatablocks/origdatablocks.controller.spec.ts index 96004b284..104b9971a 100644 --- a/src/origdatablocks/origdatablocks.controller.spec.ts +++ b/src/origdatablocks/origdatablocks.controller.spec.ts @@ -10,9 +10,12 @@ import { Request } from "express"; class OrigDatablocksServiceMock { findOne = jest.fn(); findByIdAndUpdate = jest.fn(); + aggregateSizeAndFileCount = jest.fn(); } -class DatasetsServiceMock {} +class DatasetsServiceMock { + findByIdAndUpdate = jest.fn(); +} class CaslAbilityFactoryMock {} @@ -36,7 +39,7 @@ describe("OrigDatablocksController", () => { OrigDatablocksService, ); - // Mock internal methods + // Mock internal methods to isolate the handler under test controller["updateDatasetSizeAndFiles"] = jest.fn(); }); @@ -160,4 +163,66 @@ describe("OrigDatablocksController", () => { }); }); }); + + describe("updateDatasetSizeAndFiles", () => { + let controllerWithRealMethod: OrigDatablocksController; + let realOrigDatablocksService: OrigDatablocksServiceMock; + let realDatasetsService: DatasetsServiceMock; + + beforeEach(async () => { + const module: TestingModule = await Test.createTestingModule({ + controllers: [OrigDatablocksController], + imports: [ConfigModule], + providers: [ + { + provide: OrigDatablocksService, + useClass: OrigDatablocksServiceMock, + }, + { provide: DatasetsService, useClass: DatasetsServiceMock }, + { provide: CaslAbilityFactory, useClass: CaslAbilityFactoryMock }, + ], + }).compile(); + + controllerWithRealMethod = module.get( + OrigDatablocksController, + ); + realOrigDatablocksService = module.get( + OrigDatablocksService, + ) as unknown as OrigDatablocksServiceMock; + realDatasetsService = module.get( + DatasetsService, + ) as unknown as DatasetsServiceMock; + }); + + it("should use aggregateSizeAndFileCount and update the dataset", async () => { + realOrigDatablocksService.aggregateSizeAndFileCount.mockResolvedValue({ + size: 2000, + numberOfFiles: 5, + }); + + await controllerWithRealMethod["updateDatasetSizeAndFiles"]("testPid"); + + expect( + realOrigDatablocksService.aggregateSizeAndFileCount, + ).toHaveBeenCalledWith("testPid"); + expect(realDatasetsService.findByIdAndUpdate).toHaveBeenCalledWith( + "testPid", + { size: 2000, numberOfFiles: 5 }, + ); + }); + + it("should propagate zero totals when no origdatablocks exist", async () => { + realOrigDatablocksService.aggregateSizeAndFileCount.mockResolvedValue({ + size: 0, + numberOfFiles: 0, + }); + + await controllerWithRealMethod["updateDatasetSizeAndFiles"]("emptyPid"); + + expect(realDatasetsService.findByIdAndUpdate).toHaveBeenCalledWith( + "emptyPid", + { size: 0, numberOfFiles: 0 }, + ); + }); + }); }); diff --git a/src/origdatablocks/origdatablocks.service.spec.ts b/src/origdatablocks/origdatablocks.service.spec.ts index 7d5ca7999..5aff25fef 100644 --- a/src/origdatablocks/origdatablocks.service.spec.ts +++ b/src/origdatablocks/origdatablocks.service.spec.ts @@ -47,6 +47,7 @@ describe("OrigdatablocksService", () => { find: jest.fn(), create: jest.fn(), exec: jest.fn(), + aggregate: jest.fn(), }, }, ], @@ -61,4 +62,44 @@ describe("OrigdatablocksService", () => { it("should be defined", () => { expect(service).toBeDefined(); }); + + describe("aggregateSizeAndFileCount", () => { + it("should return summed size and file count from origdatablocks", async () => { + (model.aggregate as jest.Mock).mockReturnValue({ + exec: jest + .fn() + .mockResolvedValue([{ _id: null, size: 5000, numberOfFiles: 3 }]), + }); + + const result = await service.aggregateSizeAndFileCount("testPid"); + + expect(result).toEqual({ size: 5000, numberOfFiles: 3 }); + }); + + it("should return zeros when no origdatablocks exist for the dataset", async () => { + (model.aggregate as jest.Mock).mockReturnValue({ + exec: jest.fn().mockResolvedValue([]), + }); + + const result = await service.aggregateSizeAndFileCount("emptyPid"); + + expect(result).toEqual({ size: 0, numberOfFiles: 0 }); + }); + + it("should match on the given datasetId", async () => { + (model.aggregate as jest.Mock).mockReturnValue({ + exec: jest + .fn() + .mockResolvedValue([{ _id: null, size: 0, numberOfFiles: 0 }]), + }); + + await service.aggregateSizeAndFileCount("ds123"); + + expect(model.aggregate).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ $match: { datasetId: "ds123" } }), + ]), + ); + }); + }); }); diff --git a/src/origdatablocks/origdatablocks.service.ts b/src/origdatablocks/origdatablocks.service.ts index 7a68f3a70..be793a098 100644 --- a/src/origdatablocks/origdatablocks.service.ts +++ b/src/origdatablocks/origdatablocks.service.ts @@ -398,6 +398,8 @@ export class OrigDatablocksService { }, ]) .exec(); - return result ?? { size: 0, numberOfFiles: 0 }; + return result + ? { size: result.size, numberOfFiles: result.numberOfFiles } + : { size: 0, numberOfFiles: 0 }; } } From 027c4254c4754bbb923b3c25081abd313ebfa024 Mon Sep 17 00:00:00 2001 From: Alex Lubbock Date: Fri, 8 May 2026 23:03:20 +0100 Subject: [PATCH 3/3] refactor(origdatablocks): address Sourcery-AI review suggestions - Wrap dataFileList in $ifNull in aggregateSizeAndFileCount so that documents with a missing or null field do not cause the $size operator to error - Collapse controller spec to a single module setup; move the updateDatasetSizeAndFiles stub into the update describe block so the helper's own tests can use the real implementation directly, removing the duplicate beforeEach and extra controller instance --- .../origdatablocks.controller.spec.ts | 62 +++++++------------ src/origdatablocks/origdatablocks.service.ts | 4 +- 2 files changed, 24 insertions(+), 42 deletions(-) diff --git a/src/origdatablocks/origdatablocks.controller.spec.ts b/src/origdatablocks/origdatablocks.controller.spec.ts index 104b9971a..7a86a117e 100644 --- a/src/origdatablocks/origdatablocks.controller.spec.ts +++ b/src/origdatablocks/origdatablocks.controller.spec.ts @@ -22,6 +22,7 @@ class CaslAbilityFactoryMock {} describe("OrigDatablocksController", () => { let controller: OrigDatablocksController; let origDatablocksService: OrigDatablocksServiceMock; + let datasetsService: DatasetsServiceMock; beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ @@ -37,10 +38,10 @@ describe("OrigDatablocksController", () => { controller = module.get(OrigDatablocksController); origDatablocksService = module.get( OrigDatablocksService, - ); - - // Mock internal methods to isolate the handler under test - controller["updateDatasetSizeAndFiles"] = jest.fn(); + ) as unknown as OrigDatablocksServiceMock; + datasetsService = module.get( + DatasetsService, + ) as unknown as DatasetsServiceMock; }); it("should be defined", () => { @@ -58,6 +59,11 @@ describe("OrigDatablocksController", () => { datasetId: "ds1", }; + beforeEach(() => { + // Isolate the update handler from the side-effect helper + controller["updateDatasetSizeAndFiles"] = jest.fn(); + }); + it("should throw NotFoundException if datablock not found before update", async () => { origDatablocksService.findOne.mockResolvedValue(null); @@ -165,61 +171,35 @@ describe("OrigDatablocksController", () => { }); describe("updateDatasetSizeAndFiles", () => { - let controllerWithRealMethod: OrigDatablocksController; - let realOrigDatablocksService: OrigDatablocksServiceMock; - let realDatasetsService: DatasetsServiceMock; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - controllers: [OrigDatablocksController], - imports: [ConfigModule], - providers: [ - { - provide: OrigDatablocksService, - useClass: OrigDatablocksServiceMock, - }, - { provide: DatasetsService, useClass: DatasetsServiceMock }, - { provide: CaslAbilityFactory, useClass: CaslAbilityFactoryMock }, - ], - }).compile(); - - controllerWithRealMethod = module.get( - OrigDatablocksController, - ); - realOrigDatablocksService = module.get( - OrigDatablocksService, - ) as unknown as OrigDatablocksServiceMock; - realDatasetsService = module.get( - DatasetsService, - ) as unknown as DatasetsServiceMock; - }); - it("should use aggregateSizeAndFileCount and update the dataset", async () => { - realOrigDatablocksService.aggregateSizeAndFileCount.mockResolvedValue({ + origDatablocksService.aggregateSizeAndFileCount.mockResolvedValue({ size: 2000, numberOfFiles: 5, }); - await controllerWithRealMethod["updateDatasetSizeAndFiles"]("testPid"); + await controller["updateDatasetSizeAndFiles"]("testPid"); expect( - realOrigDatablocksService.aggregateSizeAndFileCount, + origDatablocksService.aggregateSizeAndFileCount, ).toHaveBeenCalledWith("testPid"); - expect(realDatasetsService.findByIdAndUpdate).toHaveBeenCalledWith( + expect(datasetsService.findByIdAndUpdate).toHaveBeenCalledWith( "testPid", - { size: 2000, numberOfFiles: 5 }, + { + size: 2000, + numberOfFiles: 5, + }, ); }); it("should propagate zero totals when no origdatablocks exist", async () => { - realOrigDatablocksService.aggregateSizeAndFileCount.mockResolvedValue({ + origDatablocksService.aggregateSizeAndFileCount.mockResolvedValue({ size: 0, numberOfFiles: 0, }); - await controllerWithRealMethod["updateDatasetSizeAndFiles"]("emptyPid"); + await controller["updateDatasetSizeAndFiles"]("emptyPid"); - expect(realDatasetsService.findByIdAndUpdate).toHaveBeenCalledWith( + expect(datasetsService.findByIdAndUpdate).toHaveBeenCalledWith( "emptyPid", { size: 0, numberOfFiles: 0 }, ); diff --git a/src/origdatablocks/origdatablocks.service.ts b/src/origdatablocks/origdatablocks.service.ts index be793a098..93fa40012 100644 --- a/src/origdatablocks/origdatablocks.service.ts +++ b/src/origdatablocks/origdatablocks.service.ts @@ -393,7 +393,9 @@ export class OrigDatablocksService { $group: { _id: null, size: { $sum: "$size" }, - numberOfFiles: { $sum: { $size: "$dataFileList" } }, + numberOfFiles: { + $sum: { $size: { $ifNull: ["$dataFileList", []] } }, + }, }, }, ])