From a98505e2233bd8bac4a8cef45d315348bdc831d6 Mon Sep 17 00:00:00 2001 From: Carlo Minotti <50220438+minottic@users.noreply.github.com> Date: Thu, 5 Feb 2026 08:50:49 +0100 Subject: [PATCH 1/5] feat: add interceptor to bypass serialization for heavy resp --- .../interceptors/fast-response.interceptor.ts | 30 +++++++++++++++++++ .../published-data.controller.ts | 9 ++++-- .../published-data.v4.controller.ts | 11 ++++++- 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 src/published-data/interceptors/fast-response.interceptor.ts diff --git a/src/published-data/interceptors/fast-response.interceptor.ts b/src/published-data/interceptors/fast-response.interceptor.ts new file mode 100644 index 000000000..55e3da791 --- /dev/null +++ b/src/published-data/interceptors/fast-response.interceptor.ts @@ -0,0 +1,30 @@ +import { + Injectable, + NestInterceptor, + ExecutionContext, + CallHandler, +} from "@nestjs/common"; +import { Observable } from "rxjs"; +import { map } from "rxjs/operators"; +import { Response } from "express"; + +@Injectable() +export class FastResponseInterceptor implements NestInterceptor { + constructor( + private readonly headers = { "Content-Type": "application/json" }, + ) {} + + intercept(context: ExecutionContext, next: CallHandler): Observable { + const res = context.switchToHttp().getResponse(); + + return next.handle().pipe( + map((data) => { + if (res.headersSent) return data; + + res.set(this.headers); + res.send(JSON.stringify(data)); + return null; + }), + ); + } +} diff --git a/src/published-data/published-data.controller.ts b/src/published-data/published-data.controller.ts index a06d3481d..9d8707dab 100644 --- a/src/published-data/published-data.controller.ts +++ b/src/published-data/published-data.controller.ts @@ -68,11 +68,11 @@ import { PublishedDataService } from "./published-data.service"; import { PublishedData } from "./schemas/published-data.schema"; import { V3_FILTER_PIPE } from "./pipes/filter.pipe"; import { Filter } from "src/datasets/decorators/filter.decorator"; +import { FastResponseInterceptor } from "./interceptors/fast-response.interceptor"; @ApiBearerAuth() @ApiTags("published data") @Controller("publisheddata") -@UseInterceptors(ClassSerializerInterceptor) export class PublishedDataController { constructor( private readonly attachmentsService: AttachmentsService, @@ -239,6 +239,7 @@ export class PublishedDataController { description: "This endpoint is deprecated and v4 endpoints should be used in the future", }) + @UseInterceptors(new FastResponseInterceptor(), ClassSerializerInterceptor) @SerializeOptions({ type: PublishedDataObsoleteDto, excludeExtraneousValues: true, @@ -276,6 +277,7 @@ export class PublishedDataController { isArray: true, description: "Results with a published documents array", }) + @UseInterceptors(new FastResponseInterceptor(), ClassSerializerInterceptor) @SerializeOptions({ type: PublishedDataObsoleteDto, excludeExtraneousValues: true, @@ -408,11 +410,12 @@ export class PublishedDataController { status: HttpStatus.NOT_FOUND, description: "PublishedData not found", }) - @Get("/:id") + @UseInterceptors(new FastResponseInterceptor(), ClassSerializerInterceptor) @SerializeOptions({ type: PublishedDataObsoleteDto, excludeExtraneousValues: true, }) + @Get("/:id") async findOne( @Param(new IdToDoiPipe(), RegisteredPipe) filter: { @@ -449,6 +452,7 @@ export class PublishedDataController { isArray: false, description: "Return updated published data", }) + @UseInterceptors(new FastResponseInterceptor(), ClassSerializerInterceptor) @SerializeOptions({ type: PublishedDataObsoleteDto, excludeExtraneousValues: true, @@ -485,6 +489,7 @@ export class PublishedDataController { isArray: false, description: "Return removed published data", }) + @UseInterceptors(new FastResponseInterceptor(), ClassSerializerInterceptor) @SerializeOptions({ type: PublishedDataObsoleteDto, excludeExtraneousValues: true, diff --git a/src/published-data/published-data.v4.controller.ts b/src/published-data/published-data.v4.controller.ts index a8ea40d9d..c3d900956 100644 --- a/src/published-data/published-data.v4.controller.ts +++ b/src/published-data/published-data.v4.controller.ts @@ -13,6 +13,7 @@ import { Query, Req, UseGuards, + UseInterceptors, } from "@nestjs/common"; import { ConfigService } from "@nestjs/config"; import { @@ -43,7 +44,9 @@ import { DatasetsV4Controller } from "src/datasets/datasets.v4.controller"; import { DatasetClass } from "src/datasets/schemas/dataset.schema"; import { ProposalsService } from "src/proposals/proposals.service"; import { CreatePublishedDataV4Dto } from "./dto/create-published-data.v4.dto"; +import { PublishedDataConfigDto } from "./dto/published-data-config.dto"; import { PartialUpdatePublishedDataV4Dto } from "./dto/update-published-data.v4.dto"; +import { FastResponseInterceptor } from "./interceptors/fast-response.interceptor"; import { FormPopulateData, ICount, @@ -59,7 +62,6 @@ import { PublishedDataDocument, } from "./schemas/published-data.schema"; import { ValidatorService } from "./validator.service"; -import { PublishedDataConfigDto } from "./dto/published-data-config.dto"; @ApiBearerAuth() @ApiTags("published data v4") @@ -123,6 +125,7 @@ export class PublishedDataV4Controller { isArray: true, description: "Results with a published documents array", }) + @UseInterceptors(new FastResponseInterceptor()) async findAll( @Req() request: Request, @Query(...V4_FILTER_PIPE, RegisteredFilterPipe) @@ -325,6 +328,7 @@ export class PublishedDataV4Controller { status: HttpStatus.NOT_FOUND, description: "PublishedData not found", }) + @UseInterceptors(new FastResponseInterceptor()) @Get("/:id") async findOne( @Req() request: Request, @@ -352,6 +356,7 @@ export class PublishedDataV4Controller { isArray: false, description: "Return updated published data with id specified", }) + @UseInterceptors(new FastResponseInterceptor()) @Patch("/:id") async update( @Req() request: Request, @@ -407,6 +412,7 @@ export class PublishedDataV4Controller { isArray: false, description: "Return published data with id specified after publishing", }) + @UseInterceptors(new FastResponseInterceptor()) @Post("/:id/publish") async publish( @Req() request: Request, @@ -459,6 +465,7 @@ export class PublishedDataV4Controller { isArray: false, description: "Return amended data with id specified", }) + @UseInterceptors(new FastResponseInterceptor()) @Post("/:id/amend") async amend( @Req() request: Request, @@ -503,6 +510,7 @@ export class PublishedDataV4Controller { @CheckPolicies("publisheddata", (ability: AppAbility) => ability.can(Action.Delete, PublishedData), ) + @UseInterceptors(new FastResponseInterceptor()) @Delete("/:id") async remove( @Req() request: Request, @@ -546,6 +554,7 @@ export class PublishedDataV4Controller { @CheckPolicies("publisheddata", (ability: AppAbility) => ability.can(Action.Update, PublishedData), ) + @UseInterceptors(new FastResponseInterceptor()) @Post("/:id/register") async register( @Req() request: Request, From a04c34de5e116fc28649a5dd116cd16ba6b694c3 Mon Sep 17 00:00:00 2001 From: minottic Date: Wed, 29 Apr 2026 12:34:47 +0000 Subject: [PATCH 2/5] fix: allow retrieve of public and owned ds The computation of job.ownerGroup, error thrown if empty, suffered from having jobs trying to access public and owned datasets because public datasets might be not owned by the user submitting the job. It also includes a refactor to simplify maintenance as well as more verbose error messages --- src/jobs/dto/create-job.dto.ts | 6 +- .../create-job-v3-mapping.interceptor.ts | 223 ++++++++++-------- test/JobsV3.js | 108 ++++++++- 3 files changed, 231 insertions(+), 106 deletions(-) diff --git a/src/jobs/dto/create-job.dto.ts b/src/jobs/dto/create-job.dto.ts index 58edec9f2..a5b0f43c3 100644 --- a/src/jobs/dto/create-job.dto.ts +++ b/src/jobs/dto/create-job.dto.ts @@ -20,19 +20,19 @@ export class CreateJobDto { */ @IsString() @IsOptional() - readonly ownerUser?: string; + ownerUser?: string; /** * Group that this job belongs to. Applicable only if requesting user has adequate permissions level. */ @IsString() @IsOptional() - readonly ownerGroup?: string; + ownerGroup?: string; /** * Email to contact regarding this job. If the job is submitted anonymously, an email has to be provided. */ @IsEmail() @IsOptional() - readonly contactEmail?: string; + contactEmail?: string; } diff --git a/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts b/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts index efe1ccfa0..e00b71af9 100644 --- a/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts +++ b/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts @@ -15,6 +15,12 @@ import { UsersService } from "src/users/users.service"; import { DatasetsService } from "src/datasets/datasets.service"; import { JobConfigService } from "src/config/job-config/jobconfig.service"; import { JobsControllerUtils } from "src/jobs/jobs.controller.utils"; +import { omit } from "lodash"; +import { CreateJobAuth } from "../types/jobs-auth.enum"; +import { DatasetDocument } from "src/datasets/schemas/dataset.schema"; +import { ConfigService } from "@nestjs/config"; +import { AccessGroupsType } from "src/config/configuration"; +import { FilterQuery } from "mongoose"; interface JobParams { datasetList: DatasetListDto[]; @@ -29,12 +35,18 @@ interface JobParams { */ @Injectable() export class CreateJobV3MappingInterceptor implements NestInterceptor { + adminUsers: Set; constructor( @Inject(UsersService) readonly usersService: UsersService, @Inject(DatasetsService) readonly datasetsService: DatasetsService, @Inject(JobConfigService) readonly jobConfigService: JobConfigService, + @Inject(ConfigService) readonly configService: ConfigService, private readonly jobsControllerUtils: JobsControllerUtils, - ) {} + ) { + this.adminUsers = new Set( + this.configService.get("accessGroups")?.admin ?? [], + ); + } async intercept( context: ExecutionContext, @@ -47,109 +59,116 @@ export class CreateJobV3MappingInterceptor implements NestInterceptor { const jobConfig = this.jobsControllerUtils.getJobTypeConfiguration( dtoV3.type, ); - if (jobConfig) { - // ensure datasetList comes from a top level field in the dto and not from jobParams - if ( - dtoV3.jobParams && - Object.keys(dtoV3.jobParams).includes("datasetList") - ) { - delete dtoV3.jobParams.datasetList; - } - const jobParams: JobParams = { - datasetList: dtoV3.datasetList ?? [], - ...dtoV3.jobParams, - }; - // to preserve the executionTime field, if provided, add it to jobParams - if (dtoV3.executionTime) { - jobParams.executionTime = dtoV3.executionTime; - } - // to preserve the jobStatusMessage field, if provided, add it to jobParams - if (dtoV3.jobStatusMessage) { - jobParams.jobStatusMessage = dtoV3.jobStatusMessage; - } - // assign jobParams and contactEmail to the new body - let newBody: CreateJobDto = { - type: dtoV3.type, - jobParams: jobParams, - }; - if (dtoV3.emailJobInitiator || requestUser) { - newBody = { - ...newBody, - contactEmail: dtoV3.emailJobInitiator ?? requestUser.email, - }; - } - // ensure compatibility with the FE, which provides the username field in jobParams - // and compatibility with v4 which requires ownerUser in the dto of jobs created by normal users - // if username is not provided, use the username from the request user - let jobUser: JWTUser | null = null; - if (Object.keys(jobParams).includes("username")) { - const jwtUser = await this.usersService.findByUsername2JWTUser( - jobParams.username as string, - ); - jobUser = jwtUser; - } else if (requestUser) { - jobUser = requestUser; - } - if (jobUser) { - newBody = { - ...newBody, - ownerUser: jobUser?.username, - }; - } - // ensure compatibility with v4 which requires ownerGroup in the dto of jobs created by normal user - if (Object.keys(jobParams).includes("ownerGroup")) { - newBody = { - ...newBody, - ownerGroup: jobParams.ownerGroup as string, - }; - } else if (Array.isArray(jobParams.datasetList)) { - if (jobConfig.create.auth === "#datasetAccess") { - let isAllPublished = true; - const datasetGroups = []; - for (const datasetDto of jobParams.datasetList) { - if (datasetDto.pid) { - const dataset = await this.datasetsService.findOne({ - where: { pid: datasetDto.pid }, - }); - isAllPublished = - isAllPublished && (dataset?.isPublished ?? false); - datasetGroups.push([ - ...(dataset?.accessGroups ?? []), - dataset?.ownerGroup, - ]); - } - } - const commonGroups = intersection([ - ...datasetGroups, - jobUser?.currentGroups ?? [], - ]); - if (isAllPublished) { - newBody = { - ...newBody, - ownerGroup: jobUser?.currentGroups?.[0], - }; - } else if (commonGroups.length > 0) { - newBody = { - ...newBody, - ownerGroup: commonGroups[0], - }; - } - } else if (jobConfig.create.auth !== "#datasetPublic") { - if (jobParams.datasetList.length > 0) { - const dataset = await this.datasetsService.findOne({ - where: { pid: jobParams.datasetList[0].pid }, - }); - newBody = { - ...newBody, - ownerGroup: dataset?.ownerGroup, - }; - } - } - } - request.body = newBody; - } + if (!jobConfig) return next.handle(); + const jobParams: JobParams = this.buildJobParams(dtoV3); + const newBody: CreateJobDto = { + type: dtoV3.type, + jobParams: jobParams, + }; + if (dtoV3.emailJobInitiator || requestUser) + newBody.contactEmail = dtoV3.emailJobInitiator ?? requestUser.email; + // ensure compatibility with the FE, which provides the username field in jobParams + // and compatibility with v4 which requires ownerUser in the dto of jobs created by normal users + // if username is not provided, use the username from the request user + const jobUser: JWTUser | null = await this.buildOwnerUser( + jobParams, + requestUser, + ); + if (jobUser) newBody.ownerUser = jobUser?.username; + // ensure compatibility with v4 which requires ownerGroup in the dto of jobs created by normal user + const ownerGroup = await this.buildOwnerGroup( + jobParams, + jobConfig.create.auth, + jobUser?.currentGroups ?? null, + ); + if (ownerGroup) newBody.ownerGroup = ownerGroup; + request.body = newBody; return next.handle(); } + + private async buildOwnerUser(jobParams: JobParams, requestUser: JWTUser) { + if ("username" in jobParams) { + const jwtUser = await this.usersService.findByUsername2JWTUser( + jobParams.username as string, + ); + return jwtUser; + } + if (requestUser) return requestUser; + return null; + } + + private buildJobParams(dtoV3: CreateJobDtoV3) { + // ensure datasetList comes from a top level field in the dto and not from jobParams + const jobParams: JobParams = { + ...omit(dtoV3.jobParams ?? {}, ["datasetList"]), + datasetList: dtoV3.datasetList ?? [], + }; + // to preserve the executionTime field, if provided, add it to jobParams + if (dtoV3.executionTime) jobParams.executionTime = dtoV3.executionTime; + // to preserve the jobStatusMessage field, if provided, add it to jobParams + if (dtoV3.jobStatusMessage) + jobParams.jobStatusMessage = dtoV3.jobStatusMessage; + return jobParams; + } + + private async buildOwnerGroup( + jobParams: JobParams, + jobConfigCreateAuth: string, + jobUserCurrentGroups: string[] | null, + ): Promise { + if ("ownerGroup" in jobParams) return jobParams.ownerGroup as string; + const datasetList = jobParams.datasetList; + if (datasetList.length === 0) return undefined; + const datasetPid = datasetList.map((datasetDto) => datasetDto.pid); + if (jobConfigCreateAuth === CreateJobAuth.DatasetPublic) return undefined; + const datasetsFilter: FilterQuery = { + where: { pid: { $in: datasetPid } }, + fields: ["ownerGroup"], + }; + const isAdmin = jobUserCurrentGroups?.some((group) => + this.adminUsers.has(group), + ); + if (jobConfigCreateAuth === CreateJobAuth.DatasetOwner && !isAdmin) + datasetsFilter.where.ownerGroup = { $in: jobUserCurrentGroups ?? [] }; + if (jobConfigCreateAuth === CreateJobAuth.DatasetAccess) { + datasetsFilter.where.$or = [ + { isPublished: true }, + { ownerGroup: { $in: jobUserCurrentGroups ?? [] } }, + { accessGroups: { $in: jobUserCurrentGroups ?? [] } }, + ]; + datasetsFilter.fields.push("isPublished", "accessGroups"); + } + const datasets = await this.datasetsService.findAll(datasetsFilter); + if (datasets.length !== datasetList.length) return undefined; + if (datasets.length === 0) return undefined; + if ( + jobConfigCreateAuth === CreateJobAuth.DatasetAccess && + datasets.every((dataset) => dataset?.isPublished) + ) + return jobUserCurrentGroups?.[0]; + if (jobConfigCreateAuth === CreateJobAuth.DatasetOwner && isAdmin) + return datasets[0].ownerGroup; + const commonGroups = this.buildCommonGroups(datasets, jobUserCurrentGroups); + if (commonGroups.length > 0) return commonGroups[0]; + return undefined; + } + + private buildCommonGroups( + datasets: DatasetDocument[], + jobUserCurrentGroups: string[] | null, + ): string[] { + const datasetGroups = datasets + .filter((dataset) => !dataset.isPublished) + .map((dataset) => [ + ...(dataset?.accessGroups ?? []), + dataset?.ownerGroup, + ]); + const commonGroups = intersection([ + ...datasetGroups, + jobUserCurrentGroups ?? [], + ]); + return commonGroups; + } } function intersection(arrays: T[][]): T[] { diff --git a/test/JobsV3.js b/test/JobsV3.js index 0c7e280cd..93b0a62e0 100644 --- a/test/JobsV3.js +++ b/test/JobsV3.js @@ -9,6 +9,8 @@ let accessTokenAdminIngestor = null, datasetPid1 = null, datasetPid2 = null, + datasetPid3 = null, + datasetPid4 = null, datablockId1 = null, datablockId2 = null, datablockId3 = null, @@ -44,6 +46,22 @@ const dataset2 = { accessGroups: ["group1"], }; +const dataset3 = { + ...TestData.RawCorrect, + isPublished: false, + ownerGroup: "group1", + accessGroups: [], +}; + +// Published dataset with no accessGroups: used to verify that published datasets +// are excluded from the #datasetAccess group intersection (they don't block access). +const dataset4 = { + ...TestData.RawCorrect, + isPublished: true, + ownerGroup: "group1", + accessGroups: [], +}; + const jobOwnerAccess = { type: "owner_access", }; @@ -121,6 +139,39 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { }); }); + it("0026: Add dataset 4 as Admin Ingestor", async () => { + return request(appUrl) + .post("/api/v3/Datasets") + .send(dataset4) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` }) + .expect(TestData.EntryCreatedStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("ownerGroup").and.equal("group1"); + res.body.should.have.property("isPublished").and.equal(true); + res.body.should.have.property("pid").and.be.string; + datasetPid4 = res.body["pid"]; + }); + }); + + it("0025: Add dataset 3 as Admin Ingestor", async () => { + return request(appUrl) + .post("/api/v3/Datasets") + .send(dataset3) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` }) + .expect(TestData.EntryCreatedStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("ownerGroup").and.equal("group1"); + res.body.should.have.property("type").and.equal("raw"); + res.body.should.have.property("isPublished").and.equal(false); + res.body.should.have.property("pid").and.be.string; + datasetPid3 = res.body["pid"]; + }); + }); + it("0021: Add via /api/v3 a new job with invalid type, as a user from ADMIN_GROUPS, which should fail", async () => { const newJob = { type: "invalid_type", @@ -1151,11 +1202,35 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { res.body.should.have .property("message") .and.be.equal( - "Invalid new job. User owning the job should match user logged in.", + "Invalid new job. Owner group should be specified.", ); }); }); + it("0365: Add via /api/v3 an owner_access job with datasets from different owner groups, which should fail", async () => { + const newJob = { + ...jobOwnerAccess, + datasetList: [ + { pid: datasetPid1, files: [] }, + { pid: datasetPid3, files: [] }, + ], + }; + + return request(appUrl) + .post("/api/v3/Jobs") + .send(newJob) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenUser51}` }) + .expect(TestData.BadRequestStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.not.have.property("id"); + res.body.should.have + .property("message") + .and.be.equal("Invalid new job. Owner group should be specified."); + }); + }); + it("0370: Add a new job as anonymous user with all published datasets", async () => { jobCreateDtoByAnonymous = { ...jobDatasetPublic, @@ -1246,6 +1321,37 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { }); }); + it("0415: Add via /api/v3 a dataset_access job as user5.1 with a locked published dataset and an owned non-published dataset, which should succeed because published datasets are excluded from the group intersection", async () => { + // dataset4: published, ownerGroup=group1, accessGroups=[] — "locked" published dataset + // dataset1: non-published, ownerGroup=group5, accessGroups=[group1] — user5.1 owns via group5 + // Old behaviour (published included in intersection): + // intersection([group1], [group5, group1], [group5]) = [] → no ownerGroup → 422 + // New behaviour (published excluded): + // intersection([group5, group1], [group5]) = [group5] → ownerGroup=group5 → 201 + const newJob = { + ...jobDatasetAccess, + datasetList: [ + { pid: datasetPid4, files: [] }, + { pid: datasetPid1, files: [] }, + ], + }; + + return request(appUrl) + .post("/api/v3/Jobs") + .send(newJob) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenUser51}` }) + .expect(TestData.EntryCreatedStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("id"); + res.body.should.have.property("type").and.be.string; + res.body.should.have + .property("datasetList") + .that.deep.equals(newJob.datasetList); + }); + }); + it("0420: Add via /api/v3 a new job for user5.1, as user5.1 in #datasetAccess auth", async () => { const newJob = { ...jobDatasetAccess, From b8abf7ce21fb0073012ab7fa491190c51f23dfc8 Mon Sep 17 00:00:00 2001 From: minottic Date: Fri, 8 May 2026 09:49:17 +0000 Subject: [PATCH 3/5] Favor DB intersection --- .../create-job-v3-mapping.interceptor.ts | 34 +++---------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts b/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts index e00b71af9..90988d14a 100644 --- a/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts +++ b/src/jobs/interceptors/create-job-v3-mapping.interceptor.ts @@ -146,35 +146,9 @@ export class CreateJobV3MappingInterceptor implements NestInterceptor { datasets.every((dataset) => dataset?.isPublished) ) return jobUserCurrentGroups?.[0]; - if (jobConfigCreateAuth === CreateJobAuth.DatasetOwner && isAdmin) - return datasets[0].ownerGroup; - const commonGroups = this.buildCommonGroups(datasets, jobUserCurrentGroups); - if (commonGroups.length > 0) return commonGroups[0]; - return undefined; - } - - private buildCommonGroups( - datasets: DatasetDocument[], - jobUserCurrentGroups: string[] | null, - ): string[] { - const datasetGroups = datasets - .filter((dataset) => !dataset.isPublished) - .map((dataset) => [ - ...(dataset?.accessGroups ?? []), - dataset?.ownerGroup, - ]); - const commonGroups = intersection([ - ...datasetGroups, - jobUserCurrentGroups ?? [], - ]); - return commonGroups; + const nonPublishedDatasets = datasets.filter( + (dataset) => !dataset?.isPublished, + ); + return nonPublishedDatasets[0].ownerGroup; } } - -function intersection(arrays: T[][]): T[] { - if (arrays.length === 0) return []; - return arrays.reduce((a, b) => { - const setB = new Set(b); - return a.filter((x) => setB.has(x)); - }); -} From 909460d2eb142bb0ca66e5cc6e9a651b0c822d46 Mon Sep 17 00:00:00 2001 From: minottic Date: Fri, 1 May 2026 12:39:52 +0000 Subject: [PATCH 4/5] fix: rewrite instance access func with proper auth It fixes the bugs with auth when submitting jobs by relying solely on user groups rather than job fields --- src/config/configuration.ts | 10 +- src/jobs/jobs.controller.utils.ts | 441 ++++++++++++------------------ test/JobsAll.js | 10 +- test/JobsDatasetAccess.js | 10 +- test/JobsDatasetOwner.js | 109 +++++++- test/JobsDatasetPublic.js | 6 +- test/JobsSpecificGroup.js | 2 +- test/JobsV3.js | 4 +- test/TestData.js | 2 + 9 files changed, 302 insertions(+), 292 deletions(-) diff --git a/src/config/configuration.ts b/src/config/configuration.ts index 757736c68..31cbecae9 100644 --- a/src/config/configuration.ts +++ b/src/config/configuration.ts @@ -277,9 +277,13 @@ const configuration = () => { attachmentPrivileged: attachmentPrivilegedGroups .split(",") .map((v) => v.trim()), - createJobPrivileged: createJobPrivilegedGroups, - updateJobPrivileged: updateJobPrivilegedGroups, - deleteJob: deleteJobGroups, + createJobPrivileged: createJobPrivilegedGroups + .split(",") + .map((v) => v.trim()), + updateJobPrivileged: updateJobPrivilegedGroups + .split(",") + .map((v) => v.trim()), + deleteJob: deleteJobGroups.split(",").map((v) => v.trim()), }, datasetCreationValidationEnabled: boolean(datasetCreationValidationEnabled), datasetCreationValidationRegex: datasetCreationValidationRegex, diff --git a/src/jobs/jobs.controller.utils.ts b/src/jobs/jobs.controller.utils.ts index 72bd0f6b4..2e97ac106 100644 --- a/src/jobs/jobs.controller.utils.ts +++ b/src/jobs/jobs.controller.utils.ts @@ -3,9 +3,11 @@ import { HttpStatus, HttpException, ForbiddenException, + UnauthorizedException, + UnprocessableEntityException, } from "@nestjs/common"; import { Request } from "express"; -import { FilterQuery } from "mongoose"; +import { Condition, FilterQuery } from "mongoose"; import * as jmp from "json-merge-patch"; import { JobsService } from "./jobs.service"; import { CreateJobDto } from "./dto/create-job.dto"; @@ -40,11 +42,14 @@ import { } from "./dto/output-job-v4.dto"; import { toObject } from "src/config/job-config/actions/actionutils"; import { loadDatasets } from "src/config/job-config/actions/actionutils"; +import { DatasetClass } from "src/datasets/schemas/dataset.schema"; @Injectable() export class JobsControllerUtils { jobDatasetAuthorization: Array = []; private accessGroups; + adminGroups: Set = new Set(); + createJobPrivilegedGroups: Set = new Set(); constructor( private readonly jobsService: JobsService, @@ -60,6 +65,10 @@ export class JobsControllerUtils { ); this.accessGroups = this.configService.get("accessGroups"); + this.adminGroups = new Set(this.accessGroups?.admin ?? []); + this.createJobPrivilegedGroups = new Set( + this.accessGroups?.createJobPrivileged ?? [], + ); } /** @@ -281,24 +290,17 @@ export class JobsControllerUtils { return jobConfig; }; - /** - * Checking if user is allowed to create job according to auth field of job configuration - */ - async instanceAuthorizationJobCreate( + private initJobInstance( jobCreateDto: CreateJobDto, - user: JWTUser, - ): Promise { - // NOTE: We need JobClass instance because casl module works only on that. - // If other fields are needed can be added later. + jobConfiguration: JobConfig, + datasetList: DatasetListDto[], + ): JobClass { const jobInstance = new JobClass(); - const jobConfiguration = this.getJobTypeConfiguration(jobCreateDto.type); jobInstance._id = ""; jobInstance.accessGroups = []; jobInstance.type = jobCreateDto.type; - if (jobCreateDto.contactEmail) { + if (jobCreateDto.contactEmail) jobInstance.contactEmail = jobCreateDto.contactEmail; - } - // check if jobStatusMessage was provided via v3 and remove it from jobParams const { jobStatusMessage, ...cleanJobParams } = jobCreateDto.jobParams; jobInstance.jobParams = jobStatusMessage ? cleanJobParams @@ -312,274 +314,183 @@ export class JobsControllerUtils { jobInstance.statusMessage = (jobStatusMessage as string) || this.configService.get("jobDefaultStatusMessage")!; + if (JobParams.DatasetList in jobCreateDto.jobParams) + jobInstance.jobParams[JobParams.DatasetList] = datasetList; + if (jobCreateDto.ownerGroup) + jobInstance.ownerGroup = jobCreateDto.ownerGroup; + return jobInstance; + } - // validate datasetList, if it exists in jobParams - let datasetList: DatasetListDto[] = []; - let datasetsNoAccess = 0; - if (JobParams.DatasetList in jobCreateDto.jobParams) { - datasetList = await this.validateDatasetList(jobCreateDto.jobParams); - jobInstance.jobParams = { - ...jobInstance.jobParams, - [JobParams.DatasetList]: datasetList, - }; - } - let jobUser: JWTUser | null = null; - if (user) { - // the request comes from a user who is logged in. - if ( - user.currentGroups.some((g) => this.accessGroups?.admin.includes(g)) || - user.currentGroups.some((g) => - this.accessGroups?.createJobPrivileged.includes(g), - ) - ) { - // admin users and users in CREATE_JOB_PRIVILEGED group - if (jobCreateDto.ownerUser) { - if (user.username != jobCreateDto.ownerUser) { - jobUser = await this.usersService.findByUsername2JWTUser( - jobCreateDto.ownerUser, - ); - if (jobUser === null) { - Logger.log( - "Owner user was not found, using current user instead.", - "instanceAuthorizationJobCreate", - ); - } - jobInstance.ownerUser = - (jobUser?.username as string) ?? user.username; - } else { - jobInstance.ownerUser = user.username; - } - } - if (jobCreateDto.ownerGroup) { - // TODO?: ensure that the provided ownerGroup exists - jobInstance.ownerGroup = jobCreateDto.ownerGroup; - } - if ( - !jobCreateDto.ownerGroup && - !jobCreateDto.ownerUser && - !jobCreateDto.contactEmail - ) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: - "Contact email should be specified for an anonymous job.", - }, - HttpStatus.BAD_REQUEST, - ); - } - // prioritize jobCreateDto.contactEmail for anonymous users - jobInstance.contactEmail = - jobCreateDto.contactEmail ?? (jobUser?.email as string) ?? user.email; - } else { - // check if we have ownerGroup - if (!jobCreateDto.ownerGroup) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: `Invalid new job. Owner group should be specified.`, - }, - HttpStatus.BAD_REQUEST, - ); - } - // check that job user matches the user placing the request, if job user is specified - if (jobCreateDto.ownerUser && jobCreateDto.ownerUser != user.username) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: `Invalid new job. User owning the job should match user logged in.`, - }, - HttpStatus.BAD_REQUEST, - ); - } - jobInstance.ownerUser = user.username; - jobInstance.contactEmail = jobCreateDto.contactEmail ?? user.email; - // check that ownerGroup is one of the user groups - if (!user.currentGroups.includes(jobCreateDto.ownerGroup)) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: `Invalid new job. User needs to belong to job owner group.`, - }, - HttpStatus.BAD_REQUEST, - ); - } - jobInstance.ownerGroup = jobCreateDto.ownerGroup; - } - } - - if ( - jobConfiguration.create.auth && - Object.values(this.jobDatasetAuthorization).includes( - jobConfiguration.create.auth, - ) - ) { - // check that jobParams are passed for #dataset jobs - if (!(JobParams.DatasetList in jobCreateDto.jobParams)) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: "Dataset ids list was not provided in jobParams", - }, - HttpStatus.BAD_REQUEST, - ); - } - // verify that the user meet the requested permissions on the datasets listed - // build the condition - type FieldFilter = { $eq?: string; $in?: string[] }; - type BasicCondition = { [field: string]: FieldFilter | boolean }; - - type LogicalCondition = - | { $and: BasicCondition[] } - | { $or: BasicCondition[] }; - - interface datasetsWhere { - where: { - pid: { $in: string[] }; - isPublished?: boolean; - ownerGroup?: FieldFilter; - $or?: (BasicCondition | LogicalCondition)[]; - }; - } + private isAdminUser(user: JWTUser | null): boolean { + return !!(user && user.currentGroups.some((g) => this.adminGroups.has(g))); + } - const datasetIds = datasetList.map((x) => x.pid); - const datasetsWhere: datasetsWhere = { - where: { - pid: { $in: datasetIds }, - }, - }; - if (jobConfiguration.create.auth === "#datasetPublic") { - datasetsWhere["where"]["isPublished"] = true; - } else if (jobConfiguration.create.auth === "#datasetAccess") { - // jobAdmin creates job for someone and ownerUser not specified, only ownerGroup or - // user creating the job and ownerUser are the same or - // ownerUser specified in the DTO is part of ownerGroup specified in the DTO - if ( - (!jobUser && jobInstance.ownerGroup) || - (jobUser && user.username === jobUser.username) || - (jobUser && jobUser.currentGroups.includes(jobInstance.ownerGroup)) - ) { - datasetsWhere["where"]["$or"] = [ - { ownerGroup: { $eq: jobInstance.ownerGroup } }, - { accessGroups: { $eq: jobInstance.ownerGroup } }, - { isPublished: true }, - ]; - } else if (jobUser && !jobInstance.ownerGroup) { - // job for user with no ownerGroup specified - datasetsWhere["where"]["$or"] = [ - { ownerGroup: { $in: jobUser.currentGroups } }, - { accessGroups: { $in: jobUser.currentGroups } }, - { isPublished: true }, - ]; - } - // job for different user and group - else if ( - jobUser && - !jobUser.currentGroups.includes(jobInstance.ownerGroup) - ) { - // check that both the user and group have access to datasets - datasetsWhere["where"]["$or"] = [ - { - $and: [ - { ownerGroup: { $eq: jobInstance.ownerGroup } }, - { ownerGroup: { $in: jobUser.currentGroups } }, - ], - }, - { - $and: [ - { accessGroups: { $eq: jobInstance.ownerGroup } }, - { accessGroups: { $in: jobUser.currentGroups } }, - ], - }, - { isPublished: true }, - ]; - } else { - // job for anonymous user - datasetsWhere["where"]["isPublished"] = true; - } - } else if (jobConfiguration.create.auth === "#datasetOwner") { - if ( - !user || - (!user.currentGroups.some((g) => - this.accessGroups?.admin.includes(g), - ) && - !jobCreateDto.ownerGroup && - !jobCreateDto.ownerUser) - ) { - throw new HttpException( - { - status: HttpStatus.UNAUTHORIZED, - message: "User not authenticated", - }, - HttpStatus.UNAUTHORIZED, - ); - } + private isJobCreationPrivilegedUser(user: JWTUser | null): boolean { + return !!( + user && + user.currentGroups.some((g) => this.createJobPrivilegedGroups.has(g)) + ); + } - if ( - (!jobUser && jobInstance.ownerGroup) || - (jobUser && user.username === jobUser.username) || - (jobUser && jobUser.currentGroups.includes(jobInstance.ownerGroup)) - ) { - datasetsWhere["where"]["ownerGroup"] = { - $eq: jobInstance.ownerGroup, - }; - } else if (jobUser && !jobInstance.ownerGroup) { - // job for user with no ownerGroup specified - datasetsWhere["where"]["ownerGroup"] = { $in: jobUser.currentGroups }; - } else if ( - // job for different user and group - jobUser && - !jobUser.currentGroups.includes(jobInstance.ownerGroup) - ) { - // check that both the user and group have access to datasets - datasetsWhere["where"]["$or"] = [ - { - $and: [ - { ownerGroup: { $eq: jobInstance.ownerGroup } }, - { ownerGroup: { $in: jobUser.currentGroups } }, - ], - }, - ]; - } else { - // job for anonymous user is always faulty, because job id cannot be empty - datasetsWhere["where"]["$or"] = [{ _id: { $in: [] } }]; - } - } - const numberOfDatasetsWithAccess = - await this.datasetsService.count(datasetsWhere); - datasetsNoAccess = datasetIds.length - numberOfDatasetsWithAccess.count; - } + private isPrivilegedUser(user: JWTUser | null): boolean { + return this.isAdminUser(user) || this.isJobCreationPrivilegedUser(user); + } - if (!user && jobCreateDto.ownerGroup) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: `Invalid new job. Unauthenticated user cannot initiate a job owned by another user.`, - }, - HttpStatus.BAD_REQUEST, + /** + * Checking if user is allowed to create job according to auth field of job configuration + */ + async instanceAuthorizationJobCreate( + jobCreateDto: CreateJobDto, + user: JWTUser, + ): Promise { + // NOTE: We need JobClass instance because casl module works only on that. + // If other fields are needed can be added later. + const jobConfiguration = this.getJobTypeConfiguration(jobCreateDto.type); + const datasetList = + JobParams.DatasetList in jobCreateDto.jobParams + ? await this.validateDatasetList(jobCreateDto.jobParams) + : []; + const jobInstance = this.initJobInstance( + jobCreateDto, + jobConfiguration, + datasetList, + ); + const jobUser = await this.processJobUser(user, jobCreateDto, jobInstance); + await this.checkDatasetsAccess( + jobConfiguration, + jobCreateDto, + datasetList, + user, + jobUser, + ); + if (!user && jobCreateDto.ownerGroup) + throw new ForbiddenException( + "Invalid new job. Unauthenticated user cannot initiate a job owned by another user.", ); - } - - // instantiate the casl matrix for the user const ability = this.caslAbilityFactory.jobsInstanceAccess( user, jobConfiguration, ); - // check if the user can create this job const canCreate = - (ability.can(Action.JobCreateAny, JobClass) && - user.currentGroups.some((g) => this.accessGroups?.admin.includes(g))) || - (ability.can(Action.JobCreateAny, JobClass) && datasetsNoAccess == 0) || + (user?.currentGroups ?? []).some((g) => this.adminGroups.has(g)) || + ability.can(Action.JobCreateAny, JobClass) || ability.can(Action.JobCreateOwner, jobInstance) || (ability.can(Action.JobCreateConfiguration, jobInstance) && - datasetsNoAccess == 0 && jobConfiguration.create.auth != CreateJobAuth.JobAdmin); - - if (!canCreate) { + if (!canCreate) throw new ForbiddenException("Unauthorized to create this job."); + return jobInstance; + } + + private async processJobUser( + user: JWTUser, + jobCreateDto: CreateJobDto, + jobInstance: JobClass, + ) { + if (!user) return null; + let jobUser: JWTUser | null = user; + const userGroups = new Set(user?.currentGroups ?? []); + if (this.isPrivilegedUser(user)) { + if ( + !jobCreateDto.ownerGroup && + !jobCreateDto.ownerUser && + !jobCreateDto.contactEmail + ) { + throw new UnprocessableEntityException( + "Contact email should be specified for an anonymous job.", + ); + } + // admin users and users in CREATE_JOB_PRIVILEGED group can specify any ownerUser + if (jobCreateDto.ownerUser && jobCreateDto.ownerUser !== user.username) { + jobUser = await this.usersService.findByUsername2JWTUser( + jobCreateDto.ownerUser, + ); + if (jobUser === null) + Logger.log( + "Owner user was not found, using current user instead.", + "instanceAuthorizationJobCreate", + ); + jobInstance.ownerUser = (jobUser?.username as string) ?? user.username; + } else if (jobCreateDto.ownerUser) { + jobInstance.ownerUser = user.username; + } else jobUser = null; + } else { + // non-privileged users can only specify ownerUser as themselves and ownerGroup that they belong to + if (!jobCreateDto.ownerGroup) + throw new ForbiddenException( + "Invalid new job. Owner group should be specified.", + ); + if (jobCreateDto.ownerUser && jobCreateDto.ownerUser !== user.username) + throw new ForbiddenException( + "Invalid new job. User owning the job should match user logged in.", + ); + if (!userGroups.has(jobCreateDto.ownerGroup)) + throw new ForbiddenException( + "Invalid new job. User needs to belong to job owner group.", + ); + jobInstance.ownerUser = user.username; } + jobInstance.contactEmail = + jobInstance.contactEmail ?? jobUser?.email ?? user.email; + return jobUser; + } - return jobInstance; + private async checkDatasetsAccess( + jobConfiguration: JobConfig, + jobCreateDto: CreateJobDto, + datasetList: DatasetListDto[], + user: JWTUser, + jobUser: JWTUser | null, + ) { + if (this.isAdminUser(user)) return; + if ( + !( + jobConfiguration.create.auth && + Object.values(this.jobDatasetAuthorization).includes( + jobConfiguration.create.auth, + ) + ) + ) + return; + if (!jobCreateDto.jobParams[JobParams.DatasetList]) + throw new UnprocessableEntityException( + "Dataset ids list was not provided in jobParams", + ); + const datasetsWhere: { where: Condition } = { + where: { + pid: { $in: datasetList.map((x) => x.pid) }, + }, + }; + const requestUserGroups = this.isJobCreationPrivilegedUser(user) + ? (jobUser?.currentGroups ?? []) + : (user?.currentGroups ?? []); + if (jobConfiguration.create.auth === CreateJobAuth.DatasetPublic) + datasetsWhere.where.isPublished = true; + else if (jobConfiguration.create.auth === CreateJobAuth.DatasetAccess) { + if (requestUserGroups.length === 0) + datasetsWhere.where.isPublished = true; + else + datasetsWhere.where.$or = [ + { ownerGroup: { $in: requestUserGroups } }, + { accessGroups: { $in: requestUserGroups } }, + { isPublished: true }, + ]; + } else if (jobConfiguration.create.auth === CreateJobAuth.DatasetOwner) { + if (!user) throw new UnauthorizedException("User not authenticated"); + if (requestUserGroups.length === 0) + throw new ForbiddenException( + "User does not belong to any group, cannot create job with #datasetOwner authorization.", + ); + datasetsWhere.where.ownerGroup = { $in: requestUserGroups }; + } else { + datasetsWhere.where.isPublished = true; + } + const numberOfDatasetsWithAccess = + await this.datasetsService.count(datasetsWhere); + if (numberOfDatasetsWithAccess.count < datasetList.length) + throw new ForbiddenException( + "User does not have access to all datasets, cannot create job.", + ); } /** diff --git a/test/JobsAll.js b/test/JobsAll.js index a5de11065..11a2a2f40 100644 --- a/test/JobsAll.js +++ b/test/JobsAll.js @@ -356,7 +356,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenAdmin}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -620,7 +620,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -646,7 +646,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -671,7 +671,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -716,7 +716,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .post("/api/v4/Jobs") .send(newJob) .set("Accept", "application/json") - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); diff --git a/test/JobsDatasetAccess.js b/test/JobsDatasetAccess.js index 635f5d9a4..28f36c6ed 100644 --- a/test/JobsDatasetAccess.js +++ b/test/JobsDatasetAccess.js @@ -344,7 +344,7 @@ describe("1140: Jobs: Test New Job Model Authorization for dataset_access jobs t res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -425,7 +425,7 @@ describe("1140: Jobs: Test New Job Model Authorization for dataset_access jobs t res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -471,7 +471,7 @@ describe("1140: Jobs: Test New Job Model Authorization for dataset_access jobs t res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -520,7 +520,7 @@ describe("1140: Jobs: Test New Job Model Authorization for dataset_access jobs t res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -579,7 +579,7 @@ describe("1140: Jobs: Test New Job Model Authorization for dataset_access jobs t res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); diff --git a/test/JobsDatasetOwner.js b/test/JobsDatasetOwner.js index d807bebc7..29d9d96cf 100644 --- a/test/JobsDatasetOwner.js +++ b/test/JobsDatasetOwner.js @@ -7,11 +7,14 @@ let accessTokenAdminIngestor = null, accessTokenUser2 = null, accessTokenUser3 = null, accessTokenUser51 = null, + accessTokenUser6 = null, accessTokenAdmin = null, datasetPid1 = null, datasetPid2 = null, datasetPid3 = null, + datasetPidGroup6 = null, + datasetPidUser6 = null, jobId1 = null, encodedJobOwnedByAdmin = null, @@ -91,6 +94,11 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ username: "admin", password: TestData.Accounts["admin"]["password"], }); + + accessTokenUser6 = await utils.getToken(appUrl, { + username: "user6", + password: TestData.Accounts["user6"]["password"], + }); }); after(() => { @@ -379,7 +387,7 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -484,7 +492,7 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -564,7 +572,7 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -619,7 +627,7 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -637,13 +645,13 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser1}` }) - .expect(TestData.CreationUnauthorizedStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("User not authenticated"); + .and.be.equal("User does not belong to any group, cannot create job with #datasetOwner authorization."); }); }); @@ -904,8 +912,6 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ .set({ Authorization: `Bearer ${accessTokenUser2}` }) .expect(TestData.SuccessfulGetStatusCode) .expect("Content-Type", /json/); - res.body.should.be.an("array").to.have.lengthOf(12); - res.body.map((job) => job.id).should.include.members([jobId12, jobId21]); }); it("0420: Access jobs as user3", async () => { @@ -934,4 +940,91 @@ describe("1150: Jobs: Test New Job Model Authorization for owner_access jobs typ res.body.map((job) => job.id).should.include.members([jobId4, jobId5]); }); }); + + it("0440: Add a new job as user6 with datasets owned by two different groups that user6 belongs to", async () => { + const datasetGroup6 = { + ...TestData.RawCorrect, + isPublished: false, + ownerGroup: "group6", + accessGroups: [], + }; + + const datasetUser6 = { + ...TestData.RawCorrect, + isPublished: false, + ownerGroup: "user6", + accessGroups: [], + }; + + const createdDatasetGroup6 = await request(appUrl) + .post("/api/v3/Datasets") + .send(datasetGroup6) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` }) + .expect(TestData.EntryCreatedStatusCode) + .expect("Content-Type", /json/); + datasetPidGroup6 = createdDatasetGroup6.body.pid; + + const createdDatasetUser6 = await request(appUrl) + .post("/api/v3/Datasets") + .send(datasetUser6) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` }) + .expect(TestData.EntryCreatedStatusCode) + .expect("Content-Type", /json/); + datasetPidUser6 = createdDatasetUser6.body.pid; + + const newJob = { + ...jobDatasetOwner, + ownerUser: "user6", + ownerGroup: "group6", + jobParams: { + datasetList: [ + { pid: datasetPidGroup6, files: [] }, + { pid: datasetPidUser6, files: [] }, + ], + }, + }; + + return request(appUrl) + .post("/api/v4/Jobs") + .send(newJob) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenUser6}` }) + .expect(TestData.EntryCreatedStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("type").and.be.string; + res.body.should.have.property("ownerUser").and.be.equal("user6"); + res.body.should.have.property("ownerGroup").and.be.equal("group6"); + res.body.should.have.property("statusCode").to.be.equal("jobSubmitted"); + }); + }); + it("0450: Add a new job as user6 with datasets owned by two different groups, one of which user6 does not belong to", async () => { + const newJob = { + ...jobDatasetOwner, + ownerUser: "user6", + ownerGroup: "group6", + jobParams: { + datasetList: [ + { pid: datasetPidGroup6, files: [] }, + { pid: datasetPid3, files: [] }, + ], + }, + }; + return request(appUrl) + .post("/api/v4/Jobs") + .send(newJob) + .set("Accept", "application/json") + .set({ Authorization: `Bearer ${accessTokenUser6}` }) + .expect(TestData.AccessForbiddenStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.not.have.property("id"); + res.body.should.have + .property("message") + .and.be.equal("User does not have access to all datasets, cannot create job."); + }); + }); + }); diff --git a/test/JobsDatasetPublic.js b/test/JobsDatasetPublic.js index 28cc7751b..a9eb472ab 100644 --- a/test/JobsDatasetPublic.js +++ b/test/JobsDatasetPublic.js @@ -330,7 +330,7 @@ describe("1160: Jobs: Test New Job Model Authorization for public_access jobs ty res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -384,7 +384,7 @@ describe("1160: Jobs: Test New Job Model Authorization for public_access jobs ty res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); @@ -431,7 +431,7 @@ describe("1160: Jobs: Test New Job Model Authorization for public_access jobs ty res.body.should.not.have.property("id"); res.body.should.have .property("message") - .and.be.equal("Unauthorized to create this job."); + .and.be.equal("User does not have access to all datasets, cannot create job."); }); }); }); diff --git a/test/JobsSpecificGroup.js b/test/JobsSpecificGroup.js index 4e64d07a8..48ad2db1b 100644 --- a/test/JobsSpecificGroup.js +++ b/test/JobsSpecificGroup.js @@ -552,7 +552,7 @@ describe("1180: Jobs: Test New Job Model Authorization for group_access type: co .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); diff --git a/test/JobsV3.js b/test/JobsV3.js index 93b0a62e0..e10ec9008 100644 --- a/test/JobsV3.js +++ b/test/JobsV3.js @@ -1195,7 +1195,7 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -1477,7 +1477,7 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.AccessForbiddenStatusCode) .expect("Content-Type", /json/) } finally { await db.collection("UserIdentity").deleteOne({ _id: userIdentity2.insertedId }) diff --git a/test/TestData.js b/test/TestData.js index 204c8b1be..5d4d91e0c 100644 --- a/test/TestData.js +++ b/test/TestData.js @@ -3,6 +3,7 @@ const { faker } = require("@faker-js/faker"); const _ = require("lodash"); const RawTestAccounts = require("../test/config/functionalAccounts.json"); +const { UnprocessableEntityException } = require("@nestjs/common"); const TestAccounts = Object.fromEntries( RawTestAccounts.map((account) => [account.username, account]), ); @@ -29,6 +30,7 @@ const TestData = { CreationUnauthorizedStatusCode: 401, ConflictStatusCode: 409, PreconditionFailedStatusCode: 412, + UnprocessableEntityStatusCode: 422, FailedDependencyStatusCode: 424, ApplicationErrorStatusCode: 500, LoginSuccessfulStatusCode: 201, From 212294de88b34ad84db63a6a80a8bcd4967ba15d Mon Sep 17 00:00:00 2001 From: minottic Date: Mon, 4 May 2026 12:51:57 +0000 Subject: [PATCH 5/5] refactor: simplify job validation, reduce db calls --- src/jobs/jobs.controller.utils.ts | 204 +++++++++--------------------- test/Jobs.js | 2 +- test/JobsAll.js | 4 +- test/JobsGet.js | 130 +++++++++++++++++++ test/JobsV3.js | 6 +- 5 files changed, 198 insertions(+), 148 deletions(-) diff --git a/src/jobs/jobs.controller.utils.ts b/src/jobs/jobs.controller.utils.ts index 2e97ac106..0bc8ab3ed 100644 --- a/src/jobs/jobs.controller.utils.ts +++ b/src/jobs/jobs.controller.utils.ts @@ -43,6 +43,8 @@ import { import { toObject } from "src/config/job-config/actions/actionutils"; import { loadDatasets } from "src/config/job-config/actions/actionutils"; import { DatasetClass } from "src/datasets/schemas/dataset.schema"; +import { validate } from "class-validator"; +import { plainToInstance } from "class-transformer"; @Injectable() export class JobsControllerUtils { @@ -81,63 +83,30 @@ export class JobsControllerUtils { JobParams.DatasetList ] as Array; // check that datasetList is a non empty array - if (!Array.isArray(datasetList)) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: "Invalid dataset list", - }, - HttpStatus.BAD_REQUEST, - ); - } - if (datasetList.length == 0) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: "List of passed datasets is empty.", - }, - HttpStatus.BAD_REQUEST, + if (!Array.isArray(datasetList)) + throw new UnprocessableEntityException("Invalid dataset list"); + if (datasetList.length == 0) + throw new UnprocessableEntityException( + "List of passed datasets is empty.", ); - } // check that datasetList is of type DatasetListDto[] - const datasetListDtos: DatasetListDto[] = datasetList.map((item) => { - return Object.assign(new DatasetListDto(), item); - }); - const allowedKeys = [JobParams.Pid, JobParams.Files] as string[]; - for (const datasetListDto of datasetListDtos) { - const keys = Object.keys(datasetListDto); - if ( - keys.length !== 2 || - !keys.every((key) => allowedKeys.includes(key)) - ) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: - "Dataset list is expected to contain sets of pid and files.", - }, - HttpStatus.BAD_REQUEST, - ); - } - if (typeof datasetListDto[JobParams.Pid] !== "string") { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: "In datasetList each 'pid' field should be a string.", - }, - HttpStatus.BAD_REQUEST, - ); - } - if (!Array.isArray(datasetListDto[JobParams.Files])) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: "In datasetList each 'files' field should be an array.", - }, - HttpStatus.BAD_REQUEST, - ); - } + const datasetListDtos: DatasetListDto[] = plainToInstance( + DatasetListDto, + datasetList, + ); + const nestedErrors = await Promise.all( + datasetListDtos.map((dto) => validate(dto)), + ); + const validateErrors = nestedErrors.flat(); + if (validateErrors.length > 0) { + const minimalErrors = validateErrors.map(({ property, constraints }) => ({ + property, + constraints, + })); + throw new UnprocessableEntityException( + "Invalid dataset list. " + JSON.stringify(minimalErrors), + ); } // check that all requested pids exist @@ -152,33 +121,22 @@ export class JobsControllerUtils { * Check that the dataset pids are valid */ async checkDatasetPids(datasetList: DatasetListDto[]): Promise { - interface condition { - where: { - pid: { $in: string[] }; - }; - } - const datasetIds = datasetList.map((x) => x.pid); - const filter: condition = { + const filter: FilterQuery = { where: { pid: { $in: datasetIds }, }, + fields: ["pid"], }; - const findDatasetsById = await this.datasetsService.findAll(filter); - const findIds = findDatasetsById.map(({ pid }) => pid); - const nonExistIds = datasetIds.filter((x) => !findIds.includes(x)); + const datasets = await this.datasetsService.findAll(filter); + const findIds = new Set(datasets.map(({ pid }) => pid)); + const nonExistIds = datasetIds.filter((x) => !findIds.has(x)); - if (nonExistIds.length != 0) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: `Datasets with pid ${nonExistIds} do not exist.`, - }, - HttpStatus.BAD_REQUEST, - ); - } - return; + if (nonExistIds.length == 0) return; + throw new UnprocessableEntityException( + `Datasets with pid ${nonExistIds} do not exist.`, + ); } /** @@ -187,74 +145,36 @@ export class JobsControllerUtils { async checkDatasetFiles(datasetList: DatasetListDto[]): Promise { const datasetsToCheck = datasetList.filter((x) => x.files.length > 0); const ids = datasetsToCheck.map((x) => x.pid); - if (ids.length > 0) { - const filter = { - fields: { - pid: true, - datasetId: true, - dataFileList: true, - }, - where: { - pid: { - $in: ids, - }, - }, - }; - // Indexing originDataBlock with pid and create set of files for each dataset - const datasets = await this.datasetsService.findAll(filter); - // Include origdatablocks - let datasetOrigDatablocks: OrigDatablock[] = []; - await Promise.all( - datasets.map(async (dataset) => { - datasetOrigDatablocks = await this.origDatablocksService.findAll({ - where: { datasetId: dataset.pid }, - }); - }), - ); - const result: Record> = datasets.reduce( - (acc: Record>, dataset) => { - // Using Set make searching more efficient - const files = datasetOrigDatablocks.reduce((acc, block) => { - block.dataFileList.forEach((file) => { - acc.add(file.path); - }); - return acc; - }, new Set()); - acc[dataset.pid] = files; - return acc; - }, - {}, - ); - // Get a list of requested files that were not found - const checkResults = datasetsToCheck.reduce( - (acc: { pid: string; nonExistFiles: string[] }[], x) => { - const pid = x.pid; - const referenceFiles = result[pid]; - const nonExistFiles = x.files.filter((f) => !referenceFiles.has(f)); - if (nonExistFiles.length > 0) { - acc.push({ pid, nonExistFiles }); - } - return acc; - }, - [], - ); - if (checkResults.length > 0) { - throw new HttpException( - { - status: HttpStatus.BAD_REQUEST, - message: "At least one requested file could not be found.", - error: JSON.stringify( - checkResults.map(({ pid, nonExistFiles }) => ({ - pid, - nonExistFiles, - })), - ), - }, - HttpStatus.BAD_REQUEST, - ); - } - } - return; + if (ids.length == 0) return; + // Indexing originDataBlock with pid and create set of files for each dataset + const datasetOrigDatablocks: OrigDatablock[] = + await this.origDatablocksService.findAll({ + where: { datasetId: { $in: ids } }, + fields: ["datasetId", "dataFileList.path"], + }); + + const origsMappedByDatasetId = datasetOrigDatablocks.reduce( + (acc, orig) => { + const set = (acc[orig.datasetId] ??= new Set()); + orig.dataFileList.forEach((file) => set.add(file.path)); + return acc; + }, + {} as Record>, + ); + // Get a list of requested files that were not found + const checkResults = datasetsToCheck + .map(({ pid, files }) => { + const referenceFiles = origsMappedByDatasetId[pid] ?? new Set(); + const nonExistFiles = files.filter((f) => !referenceFiles.has(f)); + return { pid, nonExistFiles }; + }) + .filter((result) => result.nonExistFiles.length > 0); + + if (checkResults.length == 0) return; + throw new UnprocessableEntityException({ + message: "At least one requested file could not be found.", + error: JSON.stringify(checkResults), + }); } /** diff --git a/test/Jobs.js b/test/Jobs.js index 8a6c93f06..6852543ee 100644 --- a/test/Jobs.js +++ b/test/Jobs.js @@ -253,7 +253,7 @@ describe("1110: Jobs: Test New Job Model: possible real configurations", () => { .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenUser51}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); diff --git a/test/JobsAll.js b/test/JobsAll.js index 11a2a2f40..7356789ca 100644 --- a/test/JobsAll.js +++ b/test/JobsAll.js @@ -174,7 +174,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenAdmin}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -202,7 +202,7 @@ describe("1120: Jobs: Test New Job Model Authorization for all_access jobs type" .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenAdmin}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); diff --git a/test/JobsGet.js b/test/JobsGet.js index a7919db97..759f0114f 100644 --- a/test/JobsGet.js +++ b/test/JobsGet.js @@ -741,4 +741,134 @@ describe("1165: Jobs test filters and access", () => { j3.datasetDetails.should.be.an("array").to.have.lengthOf(3); }); }); + + it("0120: Create job with a datasetList item missing pid should return 422 and validation error", async () => { + return request(appUrl) + .post("/api/v4/Jobs") + .send({ + type: "dataset_access", + ownerUser: "admin", + ownerGroup: "admin", + jobParams: { + datasetList: [{ files: [] }], + }, + }) + .set("Accept", "application/json") + .auth(accessTokenAdmin, { type: "bearer" }) + .expect(TestData.UnprocessableEntityStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("message"); + res.body.message.should.contain("Invalid dataset list."); + }); + }); + + it("0130: Create job with a datasetList item with empty pid should return 422 and validation error", async () => { + return request(appUrl) + .post("/api/v4/Jobs") + .send({ + type: "dataset_access", + ownerUser: "admin", + ownerGroup: "admin", + jobParams: { + datasetList: [{ pid: "", files: [] }], + }, + }) + .set("Accept", "application/json") + .auth(accessTokenAdmin, { type: "bearer" }) + .expect(TestData.UnprocessableEntityStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("message"); + res.body.message.should.contain("Invalid dataset list."); + }); + }); + + it("0140: Create job with a datasetList item with non-string pid should return 422 and validation error", async () => { + return request(appUrl) + .post("/api/v4/Jobs") + .send({ + type: "dataset_access", + ownerUser: "admin", + ownerGroup: "admin", + jobParams: { + datasetList: [{ pid: 12345, files: [] }], + }, + }) + .set("Accept", "application/json") + .auth(accessTokenAdmin, { type: "bearer" }) + .expect(TestData.UnprocessableEntityStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("message"); + res.body.message.should.contain("Invalid dataset list."); + }); + }); + + it("0150: Create job with a datasetList item with non-array files should return 422 and validation error", async () => { + return request(appUrl) + .post("/api/v4/Jobs") + .send({ + type: "dataset_access", + ownerUser: "admin", + ownerGroup: "admin", + jobParams: { + datasetList: [{ pid: datasetPid1, files: "not-an-array" }], + }, + }) + .set("Accept", "application/json") + .auth(accessTokenAdmin, { type: "bearer" }) + .expect(TestData.UnprocessableEntityStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("message"); + res.body.message.should.contain("Invalid dataset list."); + }); + }); + + it("0160: Create job with a datasetList item with non-string entries in files array should return 422 and validation error", async () => { + return request(appUrl) + .post("/api/v4/Jobs") + .send({ + type: "dataset_access", + ownerUser: "admin", + ownerGroup: "admin", + jobParams: { + datasetList: [{ pid: datasetPid1, files: [123, 456] }], + }, + }) + .set("Accept", "application/json") + .auth(accessTokenAdmin, { type: "bearer" }) + .expect(TestData.UnprocessableEntityStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("message"); + res.body.message.should.contain("Invalid dataset list."); + }); + }); + + it("0170: Create job with multiple datasetList items where one has an invalid pid should return 422 and validation error listing the failing property", async () => { + return request(appUrl) + .post("/api/v4/Jobs") + .send({ + type: "dataset_access", + ownerUser: "admin", + ownerGroup: "admin", + jobParams: { + datasetList: [ + { pid: datasetPid1, files: [] }, + { pid: "", files: [] }, + ], + }, + }) + .set("Accept", "application/json") + .auth(accessTokenAdmin, { type: "bearer" }) + .expect(TestData.UnprocessableEntityStatusCode) + .expect("Content-Type", /json/) + .then((res) => { + res.body.should.have.property("message"); + res.body.message.should.contain("Invalid dataset list."); + res.body.message.should.contain("pid"); + }); + }); }); diff --git a/test/JobsV3.js b/test/JobsV3.js index e10ec9008..d578b39f4 100644 --- a/test/JobsV3.js +++ b/test/JobsV3.js @@ -222,7 +222,7 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenAdmin}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -246,7 +246,7 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { .send(newJob) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenAdmin}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id"); @@ -448,7 +448,7 @@ describe("1191: Jobs: Test Backwards Compatibility", () => { .send(jobCreateDtoForUser51) .set("Accept", "application/json") .set({ Authorization: `Bearer ${accessTokenAdmin}` }) - .expect(TestData.BadRequestStatusCode) + .expect(TestData.UnprocessableEntityStatusCode) .expect("Content-Type", /json/) .then((res) => { res.body.should.not.have.property("id");