From 52b13aed6c7a634a066909da046a8a600922c186 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Thu, 16 Apr 2026 10:53:40 +0200
Subject: [PATCH 01/29] refactor(metadatakeys):  merge duplicate metadata keys
 across datasets into a single metadata key with combined userGroups

---
 src/datasets/datasets.service.ts              |  17 +--
 src/instruments/instruments.service.ts        |  18 ++-
 .../dto/create-metadata-key.dto.ts            |   8 --
 .../dto/output-metadata-key.dto.ts            |  10 +-
 src/metadata-keys/metadatakeys.service.ts     | 129 +++++++++++-------
 .../schemas/metadatakey.schema.ts             |  22 ++-
 src/proposals/proposals.service.ts            |  23 ++--
 src/samples/samples.service.ts                |  24 ++--
 8 files changed, 148 insertions(+), 103 deletions(-)
diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index 7ca344075..696315bd1 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -102,10 +102,10 @@ export class DatasetsService {
     doc: UpdateQuery<DatasetDocument>,
   ): MetadataSourceDoc {
     const source: MetadataSourceDoc = {
-      sourceType: "dataset",
-      sourceId: doc.pid,
-      ownerGroup: doc.owner,
-      accessGroups: doc.accessGroups || [],
+      sourceType: this.datasetModel.collection.name,
+      userGroups: Array.from(
+        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
+      ),
       isPublished: doc.isPublished || false,
       metadata: doc.scientificMetadata ?? {},
     };
@@ -501,6 +501,7 @@ export class DatasetsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
+      this.createMetadataKeysInstance(existingDataset),
       this.createMetadataKeysInstance(updatedDataset),
     );
     // we were able to find the dataset and update it
@@ -551,6 +552,7 @@ export class DatasetsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
+      this.createMetadataKeysInstance(existingDataset),
       this.createMetadataKeysInstance(patchedDataset),
     );
     // we were able to find the dataset and update it
@@ -580,10 +582,9 @@ export class DatasetsService {
     }
 
     // delete metadata keys associated with this dataset
-    await this.metadataKeysService.deleteMany({
-      sourceId: id,
-      sourceType: "dataset",
-    });
+    await this.metadataKeysService.deleteMany(
+      this.createMetadataKeysInstance(deletedDataset),
+    );
 
     return deletedDataset;
   }
diff --git a/src/instruments/instruments.service.ts b/src/instruments/instruments.service.ts
index 921927dcc..6c0b430e6 100644
--- a/src/instruments/instruments.service.ts
+++ b/src/instruments/instruments.service.ts
@@ -32,10 +32,10 @@ export class InstrumentsService {
     doc: UpdateQuery<InstrumentDocument>,
   ): MetadataSourceDoc {
     const source: MetadataSourceDoc = {
-      sourceType: "instrument",
-      sourceId: doc.pid,
-      ownerGroup: doc.ownerGroup,
-      accessGroups: doc.accessGroups || [],
+      sourceType: this.instrumentModel.collection.name,
+      userGroups: Array.from(
+        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
+      ),
       isPublished: doc.isPublished || false,
       metadata: doc.customMetadata ?? {},
     };
@@ -96,6 +96,15 @@ export class InstrumentsService {
     updateInstrumentDto: PartialUpdateInstrumentDto,
   ): Promise<Instrument | null> {
     const username = (this.request.user as JWTUser).username;
+    const existingInstrument = await this.instrumentModel
+      .findOne(filter)
+      .exec();
+
+    if (!existingInstrument) {
+      throw new NotFoundException(
+        `Instrument not found with filter: ${JSON.stringify(filter)}`,
+      );
+    }
 
     const updatedInstrument = this.instrumentModel
       .findOneAndUpdate(
@@ -117,6 +126,7 @@ export class InstrumentsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
+      this.createMetadataKeysInstance(existingInstrument),
       this.createMetadataKeysInstance(updatedInstrument),
     );
 
diff --git a/src/metadata-keys/dto/create-metadata-key.dto.ts b/src/metadata-keys/dto/create-metadata-key.dto.ts
index 4e5ca5b2f..cd71b7365 100644
--- a/src/metadata-keys/dto/create-metadata-key.dto.ts
+++ b/src/metadata-keys/dto/create-metadata-key.dto.ts
@@ -11,12 +11,4 @@ export class CreateMetadataKeyDto extends UpdateMetadataKeyDto {
   })
   @IsString()
   sourceType: string;
-
-  @ApiProperty({
-    type: String,
-    required: true,
-    description: "Unique identifier of the source item this key is linked to.",
-  })
-  @IsString()
-  sourceId: string;
 }
diff --git a/src/metadata-keys/dto/output-metadata-key.dto.ts b/src/metadata-keys/dto/output-metadata-key.dto.ts
index ad561ddc3..bf242f8c6 100644
--- a/src/metadata-keys/dto/output-metadata-key.dto.ts
+++ b/src/metadata-keys/dto/output-metadata-key.dto.ts
@@ -3,6 +3,7 @@ import {
   ArrayNotEmpty,
   IsArray,
   IsBoolean,
+  IsNumber,
   IsOptional,
   IsString,
 } from "class-validator";
@@ -56,12 +57,13 @@ export class OutputMetadataKeyDto extends QueryableClass {
   sourceType: string;
 
   @ApiProperty({
-    type: String,
+    type: Number,
     required: true,
-    description: "Unique identifier of the source item this key is linked to.",
+    description:
+      "Tracks how many sources are using this metadata key. Managed internally.",
   })
-  @IsString()
-  sourceId: string;
+  @IsNumber()
+  usageCount: number;
 
   @ApiProperty({
     type: Boolean,
diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index fc1b06076..054c0fe7d 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -4,14 +4,7 @@ import {
   MetadataKeyClass,
   MetadataKeyDocument,
 } from "./schemas/metadatakey.schema";
-import {
-  DeleteResult,
-  FilterQuery,
-  HydratedDocument,
-  Model,
-  PipelineStage,
-  QueryOptions,
-} from "mongoose";
+import { FilterQuery, Model, PipelineStage, QueryOptions } from "mongoose";
 import { isEmpty } from "lodash";
 import {
   addCreatedByFields,
@@ -24,10 +17,8 @@ type ScientificMetadataEntry = {
 };
 
 export type MetadataSourceDoc = {
-  sourceId: string;
   sourceType: string;
-  ownerGroup: string;
-  accessGroups: string[];
+  userGroups: string[];
   isPublished: boolean;
   metadata: Record<string, unknown>;
 };
@@ -58,6 +49,7 @@ export class MetadataKeysService {
         },
       },
     ];
+
     if (!isEmpty(fieldsProjection)) {
       const projection = parsePipelineProjection(fieldsProjection);
       pipeline.push({ $project: projection });
@@ -79,59 +71,96 @@ export class MetadataKeysService {
     return data;
   }
 
-  async deleteMany(
-    filter: FilterQuery<MetadataKeyDocument>,
-  ): Promise<DeleteResult> {
-    const result = await this.metadataKeyModel.deleteMany(filter).exec();
+  async deleteMany(doc: MetadataSourceDoc): Promise<void> {
+    if (isEmpty(doc.metadata)) {
+      return;
+    }
+    const metadataKeys = Object.keys(doc.metadata);
+    const ids = metadataKeys.map((key) => `${doc.sourceType}_${key}`);
+    const filter = { id: { $in: ids } };
+
+    const decrementUsageOp = {
+      updateMany: { filter, update: { $inc: { usageCount: -1 } } },
+    };
+    const deleteUnusedOp = {
+      deleteMany: { filter: { ...filter, usageCount: { $lte: 0 } } },
+    };
+
+    await this.metadataKeyModel.bulkWrite([decrementUsageOp, deleteUnusedOp]);
 
     Logger.log(
-      `MetadataKeys deleted: ${result.deletedCount ?? 0} With filter:`,
-      { filter },
+      `Removed or decremented MetadataKeys usageCount from source ${doc.sourceType} for key(s): ${metadataKeys.join(", ")}`,
     );
-
-    return result;
   }
 
-  async insertManyFromSource(
-    doc: MetadataSourceDoc,
-  ): Promise<HydratedDocument<MetadataKeyDocument>[] | void> {
+  async insertManyFromSource(doc: MetadataSourceDoc): Promise<void> {
     if (isEmpty(doc.metadata)) {
       return;
     }
-    const userGroups = Array.from(
-      new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
-    ) as string[];
-
-    const isPublished = doc.isPublished;
-
-    const metadata = doc.metadata ?? {};
-
-    const docs = Object.entries(metadata).map(([key, entry]) => {
-      const createMetadataKeyDto = {
-        _id: `${doc.sourceType}_${doc.sourceId}_${key}`,
-        id: `${doc.sourceType}_${doc.sourceId}_${key}`,
-        sourceType: doc.sourceType,
-        sourceId: doc.sourceId,
-        key,
-        userGroups,
-        isPublished,
-        humanReadableName: (entry as ScientificMetadataEntry).human_name ?? "",
+
+    const metadata = doc.metadata;
+    const userGroups = doc.userGroups;
+
+    const upsertOps = Object.entries(metadata).map(([key, entry]) => {
+      const id = `${doc.sourceType}_${key}`;
+      const humanReadableName =
+        (entry as ScientificMetadataEntry).human_name ?? "";
+      return {
+        updateOne: {
+          filter: { id },
+          update: {
+            $setOnInsert: addCreatedByFields(
+              {
+                _id: id as MetadataKeyDocument["_id"],
+                id,
+                key,
+                sourceType: doc.sourceType,
+                humanReadableName,
+              },
+              "system",
+            ),
+            ...(doc.isPublished && { $set: { isPublished: true } }),
+            $addToSet: { userGroups: { $each: userGroups } },
+            $inc: { usageCount: 1 },
+          },
+          upsert: true,
+        },
       };
-      return addCreatedByFields(createMetadataKeyDto, "system");
     });
 
+    await this.metadataKeyModel.bulkWrite(upsertOps);
+
     Logger.log(
-      `Created ${docs.length} MetadataKeys from source ${doc.sourceType} with ID ${doc.sourceId}`,
+      `Created or incremented MetadataKeys usageCount from source ${doc.sourceType} for key(s): ${Object.keys(metadata).join(", ")}`,
     );
-
-    return await this.metadataKeyModel.insertMany(docs);
   }
 
-  async replaceManyFromSource(doc: MetadataSourceDoc): Promise<void> {
-    await this.deleteMany({
-      sourceId: doc.sourceId,
-      sourceType: doc.sourceType,
-    });
-    await this.insertManyFromSource(doc);
+  async replaceManyFromSource(
+    oldDoc: MetadataSourceDoc,
+    newDoc: MetadataSourceDoc,
+  ): Promise<void> {
+    const oldKeys = Object.keys(oldDoc.metadata ?? {});
+    const newKeys = Object.keys(newDoc.metadata ?? {});
+
+    const addedKeys = newKeys.filter((k) => !oldKeys.includes(k));
+    const removedKeys = oldKeys.filter((k) => !newKeys.includes(k));
+
+    if (removedKeys.length > 0) {
+      await this.deleteMany({
+        ...oldDoc,
+        metadata: Object.fromEntries(
+          removedKeys.map((k) => [k, oldDoc.metadata[k]]),
+        ),
+      });
+    }
+
+    if (addedKeys.length > 0) {
+      await this.insertManyFromSource({
+        ...newDoc,
+        metadata: Object.fromEntries(
+          addedKeys.map((k) => [k, newDoc.metadata[k]]),
+        ),
+      });
+    }
   }
 }
diff --git a/src/metadata-keys/schemas/metadatakey.schema.ts b/src/metadata-keys/schemas/metadatakey.schema.ts
index b4c6b6d39..d49b74ee8 100644
--- a/src/metadata-keys/schemas/metadatakey.schema.ts
+++ b/src/metadata-keys/schemas/metadatakey.schema.ts
@@ -75,25 +75,21 @@ export class MetadataKeyClass extends QueryableClass {
   })
   sourceType: string;
 
-  @ApiProperty({
-    type: String,
-    required: true,
-    description: "Unique identifier of the source item this key is linked to.",
-  })
-  @Prop({
-    type: String,
-    required: true,
-    index: true,
-  })
-  sourceId: string;
-
   @ApiProperty({
     type: Boolean,
     required: true,
     description: "Flag is true when data are made publicly available.",
   })
-  @Prop({ type: Boolean, required: true })
+  @Prop({ type: Boolean, required: true, default: false })
   isPublished: boolean;
+
+  @ApiProperty({
+    type: Number,
+    description:
+      "Tracks how many sources are using this metadata key. Managed internally.",
+  })
+  @Prop({ type: Number, default: 0 })
+  usageCount: number;
 }
 
 export const MetadataKeySchema = SchemaFactory.createForClass(MetadataKeyClass);
diff --git a/src/proposals/proposals.service.ts b/src/proposals/proposals.service.ts
index 76c80323f..01ae66b07 100644
--- a/src/proposals/proposals.service.ts
+++ b/src/proposals/proposals.service.ts
@@ -179,10 +179,10 @@ export class ProposalsService {
     doc: UpdateQuery<ProposalDocument>,
   ): MetadataSourceDoc {
     const source: MetadataSourceDoc = {
-      sourceType: "proposal",
-      sourceId: doc.proposalId,
-      ownerGroup: doc.ownerGroup,
-      accessGroups: doc.accessGroups || [],
+      sourceType: this.proposalModel.collection.name,
+      userGroups: Array.from(
+        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
+      ),
       isPublished: doc.isPublished || false,
       metadata: doc.metadata ?? {},
     };
@@ -286,6 +286,13 @@ export class ProposalsService {
     updateProposalDto: PartialUpdateProposalDto,
   ): Promise<ProposalClass | null> {
     const username = (this.request.user as JWTUser).username;
+    const existingProposal = await this.proposalModel.findOne(filter).exec();
+
+    if (!existingProposal) {
+      throw new NotFoundException(
+        `Proposal not found with filter: ${JSON.stringify(filter)}`,
+      );
+    }
 
     const updatedProposal = await this.proposalModel
       .findOneAndUpdate(
@@ -309,6 +316,7 @@ export class ProposalsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
+      this.createMetadataKeysInstance(existingProposal),
       this.createMetadataKeysInstance(updatedProposal),
     );
 
@@ -326,10 +334,9 @@ export class ProposalsService {
       );
     }
 
-    this.metadataKeysService.deleteMany({
-      sourceType: "proposal",
-      sourceId: deletedProposal.proposalId,
-    });
+    await this.metadataKeysService.deleteMany(
+      this.createMetadataKeysInstance(deletedProposal),
+    );
 
     return deletedProposal;
   }
diff --git a/src/samples/samples.service.ts b/src/samples/samples.service.ts
index 36cc88548..91e25cdf9 100644
--- a/src/samples/samples.service.ts
+++ b/src/samples/samples.service.ts
@@ -38,10 +38,10 @@ export class SamplesService {
     doc: UpdateQuery<SampleDocument>,
   ): MetadataSourceDoc {
     const source: MetadataSourceDoc = {
-      sourceType: "sample",
-      sourceId: doc.sampleId,
-      ownerGroup: doc.ownerGroup,
-      accessGroups: doc.accessGroups || [],
+      sourceType: this.sampleModel.collection.name,
+      userGroups: Array.from(
+        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
+      ),
       isPublished: doc.isPublished || false,
       metadata: doc.sampleCharacteristics ?? {},
     };
@@ -177,6 +177,14 @@ export class SamplesService {
     updateSampleDto: PartialUpdateSampleDto,
   ): Promise<OutputSampleDto | null> {
     const username = (this.request.user as JWTUser).username;
+    const existingSample = await this.sampleModel.findOne(filter).exec();
+
+    if (!existingSample) {
+      throw new NotFoundException(
+        `Sample not found with filter: ${JSON.stringify(filter)}`,
+      );
+    }
+
     const updateData = addUpdatedByField(updateSampleDto, username);
 
     const updateDataMongoose = {
@@ -200,6 +208,7 @@ export class SamplesService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
+      this.createMetadataKeysInstance(existingSample),
       this.createMetadataKeysInstance(updatedSample),
     );
 
@@ -217,10 +226,9 @@ export class SamplesService {
       );
     }
 
-    this.metadataKeysService.deleteMany({
-      sourceType: "sample",
-      sourceId: deletedSample.sampleId,
-    });
+    this.metadataKeysService.deleteMany(
+      this.createMetadataKeysInstance(deletedSample),
+    );
     return deletedSample;
   }
 }

From 90cc22f6345d75f499ca057ce112fddebd0eae9e Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 20:40:49 +0200
Subject: [PATCH 02/29] refactor(metadatakeys): merge duplicate keys across
 datasets and track group references

---
 docs/developer-guide/metadatakeys-module.md   | 165 +++-
 ...aset-scientificMetadata-to-metadatakeys.md | 311 ++++++++
 ...aset-scientificMetadata-to-metadatakeys.js | 313 ++++++++
 .../metadatakeys.service.spec.ts              | 534 ++++++++++---
 src/metadata-keys/metadatakeys.service.ts     | 279 +++++--
 .../schemas/metadatakey.schema.ts             |  18 +-
 .../types/metadatakeys-filter-content.ts      |   2 +-
 test/MetadataKeys.js                          | 706 ++++++++++++++----
 test/TestData.js                              |   7 +-
 9 files changed, 2018 insertions(+), 317 deletions(-)
 create mode 100644 docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
 create mode 100644 migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js

diff --git a/docs/developer-guide/metadatakeys-module.md b/docs/developer-guide/metadatakeys-module.md
index f41dbe293..9440a1c08 100644
--- a/docs/developer-guide/metadatakeys-module.md
+++ b/docs/developer-guide/metadatakeys-module.md
@@ -17,74 +17,179 @@ The previous implementation in the Datasets service lacked a permission-based fi
 - Stability: Crashes occurred when retrieval limits were missing or improperly configured.
 - Risks: Users could see metadata keys they did not have permissions to access.
 
+---
+
 ## Module Architecture
 
 This module consists of a dedicated Controller and Service layer that implements a robust permission-aware logic.
 
 ### MetadataKeysController
 
-Provides the API interface for searching keys. Allowed filters can be found in `src/metadata-keys/metadatakeys.service.ts` and exmaple can be find in `src/metadata-keys/types/metadatakeys-filter-content.ts`
+Provides the API interface for searching metadata keys.
 
-- `Endpoint`: GET /metadatakeys (replaces /datasets/metadataKeys)
-- `Method`: findAll
-- `Endpoint Access`: Endpoint can be Accessed by any users
+- **Endpoint**: `GET /metadatakeys` (replaces `GET /datasets/metadataKeys`)
+- **Method**: `findAll`
+- **Access**: Any authenticated user (permission filtering is applied server-side)
+- Allowed filter fields: see `src/metadata-keys/types/metadatakeys-lookup.ts`
+- Filter examples: see `src/metadata-keys/types/metadatakeys-filter-content.ts`
+
+---
 
 ### MetadataKeysService
 
-This handles the business logic and talks to the database. It is divided into user-facing search logic and internal data synchronization.
+Handles business logic and database access. Split into two concerns:
+
+#### 1. User-facing search — `findAll`
+
+Applies CASL permission filters before querying:
+
+| User type            | Visible keys                                             |
+| -------------------- | -------------------------------------------------------- |
+| Admin                | All keys in the system                                   |
+| Authenticated user   | Keys where they belong to `ownerGroup` or `accessGroups` |
+| Unauthenticated user | Keys marked `isPublished: true`                          |
+
+Results default to 100 per page if no limit is provided.
+
+#### 2. Internal synchronization
+
+These methods are called internally when source documents are created, updated, or deleted. They are never called directly from the controller.
+
+##### `insertManyFromSource(doc)`
+
+Called when a dataset is **created** or **gains new metadata keys**.
+
+For each key in `scientificMetadata`:
+
+- Upserts a `MetadataKey` document identified by `${sourceType}_${key}_${humanReadableName}`
+- Increments `usageCount` (total datasets referencing this key)
+- Increments per-group reference counts in `userGroupCounts`
+- Adds new groups to the `userGroups` query array via `$addToSet`
+- Sets `isPublished: true` if the source dataset is published (never unsets inline — the cronjob handles the `true → false` transition)
+
+##### `deleteMany(doc)`
 
-#### Permission Layer (Applies to findAll only):
+Called when a dataset is **deleted** or **loses metadata keys**.
 
-When a user searches for keys, the service uses accessibleBy to automatically append access filters based on CASL permissions:
+Runs three sequential steps:
 
-- `Admins`: Can search and get all metadata keys in the system.
-- `Authenticated Users`: Can only get keys where they are part of the ownerGroup or accessGroups.
-- `Unauthenticated Users`: Can only get keys that are marked as isPublished.
+1. Decrements `usageCount` and per-group counts in `userGroupCounts`
+2. Recomputes the `userGroups` array from the updated counts — drops any group whose count reached zero
+3. Deletes `MetadataKey` documents where `usageCount <= 0`
+   `usageCount` is the authoritative deletion signal. A dataset with no `userGroups` and `isPublished: false` would be invisible to both `userGroupCounts` and `isPublished` checks, so neither alone can substitute for it.
 
-#### Service Methods:
+##### `replaceManyFromSource(oldDoc, newDoc)`
 
-- `findAll`: The only public-facing method. It applies the permission layer and then uses a database aggregation pipeline to find and return the specific keys requested by the user. Every search is limited to 100 results by default, if limit is not provided.
-- `insertManyFromSource`: An internal method that takes an original document (like a Dataset), extracts fields from **scientificMetadata**, **metadata**, and **customMetadata**, and creates new records in the Metadata Keys collection.
-- `deleteMany`: Removes metadata key entries associated with a source document when that document is deleted from the system.
-- `replaceManyFromSource`: Triggered when a source document (e.g., a Dataset or Proposal) is updated. It calls `deleteMany` and `insertManyFromSource` sequentially.
+Called when a dataset is **updated**. Diffs the old and new `scientificMetadata` to produce three disjoint key sets:
 
-## Usage Example
+| Set     | Keys             | Action                                                               |
+| ------- | ---------------- | -------------------------------------------------------------------- |
+| Added   | Only in `newDoc` | `insertManyFromSource`                                               |
+| Removed | Only in `oldDoc` | `deleteMany`                                                         |
+| Shared  | In both          | `updateSharedKeys` (group / isPublished / humanReadableName changes) |
 
-To list all metadata keys associated with a dataset, the user must provide the sourceType and sourceId. If the fields array is provided, only those specific fields will be returned:
+The three sets are disjoint by `_id` so they run in parallel via `Promise.all`.
+
+For shared keys, three things are handled independently:
+
+- **userGroups changed** — added groups are incremented, removed groups are decremented, then `userGroups` array is recomputed from the updated counts
+- **isPublished flipped true** — sets `isPublished: true` inline; `false` is left to the cronjob
+- **humanReadableName changed** — since `humanReadableName` is part of `_id`, this is treated as a delete of the old document + insert of a new one
+
+---
+
+## Schema
+
+Each `MetadataKey` document has the following key fields:
+
+| Field               | Type                  | Description                                                                              |
+| ------------------- | --------------------- | ---------------------------------------------------------------------------------------- |
+| `_id`               | `string`              | Composite key: `${sourceType}_${key}_${humanReadableName}`                               |
+| `key`               | `string`              | The raw metadata key name                                                                |
+| `humanReadableName` | `string`              | Human-readable label from `human_name`, empty string if absent                           |
+| `sourceType`        | `string`              | Source collection: `Dataset`, `Proposal`, `Sample`, etc.                                 |
+| `userGroups`        | `string[]`            | Groups that can see this key — kept in sync with `userGroupCounts` for query performance |
+| `userGroupCounts`   | `Map<string, number>` | Per-group reference counts — source of truth for safe group removal                      |
+| `usageCount`        | `number`              | Total datasets referencing this key — authoritative deletion signal                      |
+| `isPublished`       | `boolean`             | True if any contributing dataset is published                                            |
+
+`userGroups` and `userGroupCounts` are intentionally redundant. `userGroupCounts` owns the truth and enables safe atomic decrements. `userGroups` is a denormalized array kept for query performance — MongoDB's multikey index on `userGroups` makes `{ userGroups: { $in: [...] } }` efficient in a way that querying Map keys directly is not.
+
+---
+
+## Filter Examples
+
+List metadata keys visible to the current user for a given source type:
 
 ```json
 {
   "where": {
-    "sourceType": "dataset",
-    "sourceId": "datasetId"
+    "sourceType": "Dataset"
   },
-  "fields": ["humanreadableName", "key"],
+  "fields": ["key", "humanReadableName"],
   "limits": {
     "limit": 10,
     "skip": 0,
     "sort": {
-      "createdAt": "asc | desc"
+      "createdAt": "desc"
     }
   }
 }
 ```
 
-To retrieve a specific metadata key, use the following filter:
+Find a specific key by name:
+
+```json
+{
+  "where": {
+    "sourceType": "Dataset",
+    "key": "temperature"
+  },
+  "limits": {
+    "limit": 1,
+    "skip": 0
+  }
+}
+```
+
+Partial search on `key`:
 
 ```json
 {
   "where": {
-    "sourceType": "dataset",
-    "sourceId": "datasetId",
-    "key": "metadata_key_name"
+    "sourceType": "Dataset",
+    "key": { "$regex": "temp", "$options": "i" }
   },
-  "fields": ["key"],
   "limits": {
     "limit": 10,
-    "skip": 0,
-    "sort": {
-      "createdAt": "asc | desc"
-    }
+    "skip": 0
   }
 }
 ```
+
+Partial search on `humanReadableName`:
+
+```json
+{
+  "where": {
+    "sourceType": "Dataset",
+    "humanReadableName": { "$regex": "temp", "$options": "i" }
+  },
+  "limits": {
+    "limit": 10,
+    "skip": 0
+  }
+}
+```
+
+---
+
+## Initial Migration
+
+The `MetadataKeys` collection is populated by a migration script that must be run manually before the service is deployed for the first time.
+
+See: `migrations/20260417145401-sync-dataset-scientificMetadata-to-metadatakeys.js`
+
+Documentation: `migrations/20260417145401-sync-dataset-scientificMetadata-to-metadatakeys.md`
+
+> ⚠️ The application will start normally without the migration, but the MetadataKeys service will return empty results until it is run.
diff --git a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
new file mode 100644
index 000000000..b966d6388
--- /dev/null
+++ b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
@@ -0,0 +1,311 @@
+# 20260417145401 — Sync Dataset scientificMetadata to MetadataKeys
+
+## What this migration does
+
+Rebuilds the `MetadataKeys` collection from scratch by scanning all `Dataset` documents and extracting every key found in `scientificMetadata`.
+
+Each unique `(sourceType, key, humanReadableName)` combination becomes one `MetadataKey` document. If the same key appears across multiple datasets, their `userGroups` and counts are merged into a single entry.
+
+---
+
+## Why it exists
+
+The `MetadataKeys` collection powers metadata key search and access control. This is the initial population of the collection — it must be run once before the service can operate.
+
+Each `MetadataKey` document tracks:
+
+- `userGroupCounts: Map<string, number>` — how many datasets per group reference this key, enabling safe atomic group removal when a dataset is updated or deleted
+- `usageCount: number` — total datasets referencing this key regardless of groups, used as the authoritative deletion signal
+
+---
+
+## Source data shape
+
+```js
+// Dataset document
+{
+  ...,
+  _id: "uuid-A",
+  ownerGroup: "group-1",       // mandatory
+  accessGroups: ["group-2"],   // optional
+  isPublished: true,
+  scientificMetadata: {
+    temperature: { value: 100, unit: "C", human_name: "Temperature" },
+    pressure:    { value: 1,   unit: "bar" },   // no human_name
+  }
+}
+```
+
+---
+
+## Output shape
+
+```js
+// MetadataKey document
+{
+  _id: "Dataset_temperature_Temperature",  // ${sourceType}_${key}_${humanReadableName}
+  key: "temperature",
+  humanReadableName: "Temperature",
+  sourceType: "Dataset",
+  isPublished: true,
+  usageCount: 2,
+  userGroups: ["group-1", "group-2"],
+  userGroupCounts: { "group-1": 2, "group-2": 1 },
+  createdBy: "migration",
+  createdAt: ISODate("...")
+}
+```
+
+---
+
+## Pipeline walkthrough
+
+The pipeline uses two datasets as a running example throughout:
+
+|           | ownerGroup | accessGroups | isPublished | scientificMetadata keys |
+| --------- | ---------- | ------------ | ----------- | ----------------------- |
+| Dataset A | group-1    | [group-2]    | true        | temperature, pressure   |
+| Dataset B | group-1    | []           | false       | temperature             |
+
+---
+
+### Stage 1 — Filter
+
+Keep only documents where `scientificMetadata` exists and is an object.
+
+```
+Input:  all Dataset documents
+Output: documents where scientificMetadata exists
+```
+
+---
+
+### Stage 2 — Flatten scientificMetadata
+
+Expose `_id` as `datasetId` and convert `scientificMetadata` from an object to an array of `{k, v}` pairs using `$objectToArray`.
+
+```js
+// Input
+{
+  _id: "uuid-A",
+  ownerGroup: "group-1",
+  accessGroups: ["group-2"],
+  isPublished: true,
+  scientificMetadata: {
+    temperature: { value: 100, unit: "C", human_name: "Temperature" },
+    pressure:    { value: 1, unit: "bar" },
+  }
+}
+
+// Output
+{
+  datasetId: "uuid-A",
+  ownerGroup: "group-1",
+  accessGroups: ["group-2"],
+  isPublished: true,
+  metaArr: [
+    { k: "temperature", v: { value: 100, unit: "C", human_name: "Temperature" } },
+    { k: "pressure",    v: { value: 1, unit: "bar" } },
+  ]
+}
+```
+
+---
+
+### Stage 3 — Unwind metaArr
+
+One document per `(dataset, metadata key)`.
+
+```
+Input:  1 document (Dataset A) with metaArr of length 2
+Output: 2 documents
+
+{ datasetId: "uuid-A", ..., metaArr: { k: "temperature", v: { human_name: "Temperature", ... } } }
+{ datasetId: "uuid-A", ..., metaArr: { k: "pressure",    v: { ... } } }
+```
+
+---
+
+### Stage 4 — Shape each (dataset, key) document
+
+Extract `key`, `humanReadableName`, and compute `userGroups` as the union of `ownerGroup` and `accessGroups`.
+
+```js
+// Input (one of the two documents from stage 3)
+{ datasetId: "uuid-A", ownerGroup: "group-1", accessGroups: ["group-2"], isPublished: true, metaArr: { k: "temperature", v: { human_name: "Temperature" } } }
+
+// Output (all documents after both datasets are processed)
+{ datasetId: "uuid-A", key: "temperature", isPublished: true,  humanReadableName: "Temperature", userGroups: ["group-1", "group-2"] }
+{ datasetId: "uuid-A", key: "pressure",    isPublished: true,  humanReadableName: "",            userGroups: ["group-1", "group-2"] }
+{ datasetId: "uuid-B", key: "temperature", isPublished: false, humanReadableName: "Temperature", userGroups: ["group-1"] }
+```
+
+> `ownerGroup` is mandatory field so no null fallback is needed. `accessGroups` is optional so it falls back to `[]`.
+
+---
+
+### Stage 5 — Unwind userGroups
+
+One document per `(dataset, key, group)`. This is the pivot that makes per-group counting possible.
+
+`preserveNullAndEmptyArrays: true` keeps datasets with no groups so `usageCount` stays accurate.
+
+```
+Input
+{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true, userGroups: ["group-1", "group-2"] }
+
+Output
+{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-1" }
+{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-2" }
+{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-1" }
+{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-2" }
+{ datasetId: "uuid-B", key: "temperature", humanReadableName: "Temperature", isPublished: false, userGroups: "group-1" }
+```
+
+---
+
+### Stage 6 — Filter null userGroups
+
+Drop documents where `userGroups` is `null`. This only occurs when `preserveNullAndEmptyArrays` retains a document from a dataset that had no groups at all.
+
+```
+Input:  stream from stage 5, some may have userGroups: null
+Output: only documents where userGroups is a real group name
+```
+
+---
+
+### Stage 7 — Group by (metaKeyId, group)
+
+Each bucket is one unique `(key, humanReadableName, group)` combination.
+`groupCount` = how many datasets with this group reference this key.
+
+```
+Input (5 documents from stage 6)
+{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-1" }
+{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-2" }
+{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-1" }
+{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-2" }
+{ datasetId: "uuid-B", key: "temperature", humanReadableName: "Temperature", isPublished: false, userGroups: "group-1" }
+
+Output (4 buckets — one per unique metaKeyId + group)
+{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-1" }, key: "temperature", humanReadableName: "Temperature", isPublished: true,  groupCount: 2 }
+{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-2" }, key: "temperature", humanReadableName: "Temperature", isPublished: true,  groupCount: 1 }
+{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-1" }, key: "pressure",    humanReadableName: "",            isPublished: true,  groupCount: 1 }
+{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-2" }, key: "pressure",    humanReadableName: "",            isPublished: true,  groupCount: 1 }
+```
+
+> `isPublished: $max` means the field is `true` if **any** contributing dataset is published.
+
+---
+
+### Stage 8 — Group by metaKeyId
+
+Reassemble one document per metadata key by collecting the per-group buckets from stage 7.
+
+```
+Input (4 buckets from stage 7)
+{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-1" }, groupCount: 2, ... }
+{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-2" }, groupCount: 1, ... }
+{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-1" }, groupCount: 1, ... }
+{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-2" }, groupCount: 1, ... }
+
+Output (2 documents — one per unique key)
+{
+  _id: "Dataset_temperature_Temperature",
+  key: "temperature", humanReadableName: "Temperature", isPublished: true,
+  userGroups: ["group-1", "group-2"],
+  userGroupCountsArr: [{ k: "group-1", v: 2 }, { k: "group-2", v: 1 }],
+  datasetIdSets: [["uuid-A", "uuid-B"], ["uuid-A"]]
+}
+{
+  _id: "Dataset_pressure_",
+  key: "pressure", humanReadableName: "", isPublished: true,
+  userGroups: ["group-1", "group-2"],
+  userGroupCountsArr: [{ k: "group-1", v: 1 }, { k: "group-2", v: 1 }],
+  datasetIdSets: [["uuid-A"], ["uuid-A"]]
+}
+```
+
+---
+
+### Stage 9 — Final projection
+
+- `userGroupCounts`: converts `[{k, v}]` array to a plain object (Mongoose stores this as a `Map`)
+- `usageCount`: unions all per-group `datasetId` sets and counts distinct IDs
+
+```
+// usageCount for temperature:
+// datasetIdSets = [["uuid-A", "uuid-B"], ["uuid-A"]]
+// union         = ["uuid-A", "uuid-B"]
+// count         = 2
+```
+
+```js
+// Output (final document written to MetadataKeys)
+{
+  _id: "Dataset_temperature_Temperature",
+  key: "temperature",
+  sourceType: "Dataset",
+  humanReadableName: "Temperature",
+  isPublished: true,
+  userGroups: ["group-1", "group-2"],
+  userGroupCounts: { "group-1": 2, "group-2": 1 },
+  usageCount: 2,
+  createdBy: "migration",
+  createdAt: ISODate("...")
+}
+```
+
+---
+
+### Stage 10 — Merge into MetadataKeys
+
+- `whenNotMatched: insert` — new documents are inserted as-is
+- `whenMatched` — additively merges counts when two `SOURCE_COLLECTIONS` produce the same `_id`
+
+> This is not triggered today since `sourceType` is part of `_id`, making collisions between collections impossible. It is kept correct for when `Proposal`, `Sample`, or other collections are added as sources.
+
+```js
+// whenMatched userGroupCounts example
+// existing: { "group-1": 3, "group-2": 1 }
+// incoming: { "group-1": 2, "group-3": 5 }
+// result:   { "group-1": 5, "group-2": 1, "group-3": 5 }
+```
+
+---
+
+## Running the migration
+
+```bash
+# Run manually in production — ideally during low-traffic hours.
+# The migration is slow but non-blocking: the app continues to serve
+# requests while it runs. However, MetadataKeys will be unavailable
+# for the duration since the collection is wiped at the start.
+npm run migrate:db:up
+
+# verify
+db.MetadataKeys.countDocuments({ userGroupCounts: { $exists: true } })
+# should equal
+db.MetadataKeys.countDocuments()
+```
+
+> ⚠️ **Do not interrupt.** The migration wipes `MetadataKeys` at the start with `deleteMany`. If interrupted, re-run `migrate:db:up` — the wipe ensures a clean slate on retry.
+
+---
+
+## Rollback
+
+```bash
+npm run migrate:db:down
+```
+
+Wipes the entire `MetadataKeys` collection. The collection will be repopulated on the next `migrate:db:up`.
+
+Verify the rollback succeeded by checking the migration status:
+
+```bash
+npm run migrate:db:status
+```
+
+If the migration shows as `pending` it means the rollback was successful and the migration has not been run yet.
diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
new file mode 100644
index 000000000..904b48886
--- /dev/null
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -0,0 +1,313 @@
+const SOURCE_COLLECTIONS = ["Dataset"];
+
+function buildPipeline(sourceType) {
+  return [
+    // -------------------------------------------------------------------------
+    // Stage 1: Only process documents that have scientificMetadata
+    // -------------------------------------------------------------------------
+    {
+      $match: {
+        scientificMetadata: { $exists: true, $type: "object" },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 2: Flatten scientificMetadata into an array of key/value pairs.
+    // Preserve _id as datasetId so we can count unique datasets later.
+    // -------------------------------------------------------------------------
+    {
+      $project: {
+        datasetId: "$_id",
+        ownerGroup: 1,
+        accessGroups: 1,
+        isPublished: 1,
+        metaArr: { $objectToArray: "$scientificMetadata" },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 3: One document per (dataset, metadata key)
+    // -------------------------------------------------------------------------
+    { $unwind: "$metaArr" },
+
+    // -------------------------------------------------------------------------
+    // Stage 4: Shape each (dataset, key) document.
+    // userGroups is the union of ownerGroup + accessGroups for this dataset.
+    // -------------------------------------------------------------------------
+    {
+      $project: {
+        datasetId: 1,
+        key: "$metaArr.k",
+        isPublished: 1,
+        humanReadableName: { $ifNull: ["$metaArr.v.human_name", ""] },
+        userGroups: {
+          $setUnion: [["$ownerGroup"], { $ifNull: ["$accessGroups", []] }],
+        },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 5: One document per (dataset, key, group).
+    // This is the pivot that makes per-group counting possible.
+    // preserveNullAndEmptyArrays keeps datasets with no groups so
+    // usageCount stays accurate even for group-less datasets.
+    // -------------------------------------------------------------------------
+    {
+      $unwind: {
+        path: "$userGroups",
+        preserveNullAndEmptyArrays: true,
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 6: Filter out the empty-string sentinel that comes from ownerGroup
+    // being null/missing. Real group names are never empty strings.
+    // -------------------------------------------------------------------------
+    {
+      $match: {
+        userGroups: { $nin: [null, ""] },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 7: Group by (metaKeyId, group).
+    // groupCount = how many datasets with this group use this key.
+    // datasetIds = set of distinct dataset IDs (for accurate usageCount).
+    // -------------------------------------------------------------------------
+    {
+      $group: {
+        _id: {
+          metaKeyId: {
+            $concat: [`${sourceType}_`, "$key", "_", "$humanReadableName"],
+          },
+          group: "$userGroups",
+        },
+        key: { $first: "$key" },
+        humanReadableName: { $first: "$humanReadableName" },
+        isPublished: { $max: "$isPublished" },
+        groupCount: { $sum: 1 },
+        datasetIds: { $addToSet: "$datasetId" },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 8: Group by metaKeyId to reassemble one document per metadata key.
+    // userGroupCountsArr will become the userGroupCounts Map.
+    // datasetIdSets is a list of per-group dataset ID sets — merged in the
+    // next stage to compute total unique dataset count (usageCount).
+    // -------------------------------------------------------------------------
+    {
+      $group: {
+        _id: "$_id.metaKeyId",
+        key: { $first: "$key" },
+        humanReadableName: { $first: "$humanReadableName" },
+        isPublished: { $max: "$isPublished" },
+        userGroups: { $push: "$_id.group" },
+        userGroupCountsArr: {
+          $push: { k: "$_id.group", v: "$groupCount" },
+        },
+        datasetIdSets: { $push: "$datasetIds" },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 9: Final projection.
+    // userGroupCounts: [{k,v}] array → plain object (stored as Map in Mongoose).
+    // usageCount: union all per-group datasetId sets, count distinct IDs.
+    // -------------------------------------------------------------------------
+    {
+      $project: {
+        _id: 1,
+        key: 1,
+        sourceType: { $literal: sourceType },
+        humanReadableName: 1,
+        isPublished: 1,
+        userGroups: 1,
+        userGroupCounts: { $arrayToObject: "$userGroupCountsArr" },
+        usageCount: {
+          $size: {
+            $reduce: {
+              input: "$datasetIdSets",
+              initialValue: [],
+              in: { $setUnion: ["$$value", "$$this"] },
+            },
+          },
+        },
+        createdBy: { $literal: "migration" },
+        createdAt: { $toDate: "$$NOW" },
+      },
+    },
+
+    // -------------------------------------------------------------------------
+    // Stage 10: Merge into MetadataKeys.
+    // whenMatched handles the (future) case where multiple SOURCE_COLLECTIONS
+    // produce the same _id. Not possible today since sourceType is part of
+    // the _id, but kept correct for when more collections are added.
+    // -------------------------------------------------------------------------
+    {
+      $merge: {
+        into: "MetadataKeys",
+        on: "_id",
+        whenMatched: [
+          {
+            $replaceWith: {
+              $mergeObjects: [
+                "$$new",
+                {
+                  // Preserve original audit fields
+                  createdAt: { $ifNull: ["$createdAt", "$$new.createdAt"] },
+                  createdBy: { $ifNull: ["$createdBy", "$$new.createdBy"] },
+                  updatedBy: { $literal: "migration" },
+                  updatedAt: { $toDate: "$$NOW" },
+
+                  // Merge group arrays
+                  userGroups: {
+                    $setUnion: ["$userGroups", "$$new.userGroups"],
+                  },
+
+                  // Additively merge userGroupCounts — sum counts per group
+                  // across both the existing and incoming documents.
+                  userGroupCounts: {
+                    $arrayToObject: {
+                      $map: {
+                        input: {
+                          $setUnion: [
+                            {
+                              $map: {
+                                input: { $objectToArray: "$userGroupCounts" },
+                                as: "e",
+                                in: "$$e.k",
+                              },
+                            },
+                            {
+                              $map: {
+                                input: {
+                                  $objectToArray: "$$new.userGroupCounts",
+                                },
+                                as: "e",
+                                in: "$$e.k",
+                              },
+                            },
+                          ],
+                        },
+                        as: "group",
+                        in: {
+                          k: "$$group",
+                          v: {
+                            $add: [
+                              {
+                                $ifNull: [
+                                  {
+                                    $getField: {
+                                      field: "$$group",
+                                      input: "$userGroupCounts",
+                                    },
+                                  },
+                                  0,
+                                ],
+                              },
+                              {
+                                $ifNull: [
+                                  {
+                                    $getField: {
+                                      field: "$$group",
+                                      input: "$$new.userGroupCounts",
+                                    },
+                                  },
+                                  0,
+                                ],
+                              },
+                            ],
+                          },
+                        },
+                      },
+                    },
+                  },
+
+                  // Any source being published makes the key published
+                  isPublished: { $or: ["$isPublished", "$$new.isPublished"] },
+
+                  // Add incoming dataset count to existing
+                  usageCount: { $add: ["$usageCount", "$$new.usageCount"] },
+                },
+              ],
+            },
+          },
+        ],
+        whenNotMatched: "insert",
+      },
+    },
+  ];
+}
+
+module.exports = {
+  async up(db) {
+    const start = Date.now();
+    const elapsed = () => `${((Date.now() - start) / 1000).toFixed(1)}s`;
+
+    // Wipe MetadataKeys collection first to ensure a clean state
+    const deleted = await db.collection("MetadataKeys").deleteMany({});
+    console.log(
+      `[${elapsed()}] Cleared ${deleted.deletedCount} existing MetadataKeys`,
+    );
+
+    for (const collection of SOURCE_COLLECTIONS) {
+      const total = await db.collection(collection).countDocuments({
+        scientificMetadata: { $exists: true, $type: "object" },
+      });
+
+      if (total === 0) {
+        console.log(
+          `[${elapsed()}] No documents with scientificMetadata in ${collection}, skipping...`,
+        );
+        continue;
+      }
+
+      console.log(
+        `[${elapsed()}] Processing ${total.toLocaleString()} documents from ${collection}...`,
+      );
+
+      const timer = setInterval(
+        () =>
+          console.log(
+            `[${elapsed()}] ${collection} migration still running...`,
+          ),
+        5000,
+      );
+
+      try {
+        await db
+          .collection(collection)
+          .aggregate(buildPipeline(collection), {
+            allowDiskUse: true,
+            maxTimeMS: 0,
+          })
+          .toArray();
+      } finally {
+        clearInterval(timer);
+      }
+
+      console.log(`[${elapsed()}] ✅ ${collection} done`);
+    }
+
+    const result = await db.collection("MetadataKeys").countDocuments();
+    console.log(
+      `[${elapsed()}] Migration completed — Total MetadataKeys: ${result.toLocaleString()}`,
+    );
+  },
+
+  async down(db) {
+    const start = Date.now();
+    const elapsed = () => `${((Date.now() - start) / 1000).toFixed(1)}s`;
+
+    const total = await db.collection("MetadataKeys").countDocuments();
+    console.log(
+      `[${elapsed()}] Deleting ${total.toLocaleString()} MetadataKeys...`,
+    );
+
+    const deleted = await db.collection("MetadataKeys").deleteMany({});
+    console.log(
+      `[${elapsed()}] Rollback completed — Deleted ${deleted.deletedCount} MetadataKeys`,
+    );
+  },
+};
diff --git a/src/metadata-keys/metadatakeys.service.spec.ts b/src/metadata-keys/metadatakeys.service.spec.ts
index 152b96727..7f66ca359 100644
--- a/src/metadata-keys/metadatakeys.service.spec.ts
+++ b/src/metadata-keys/metadatakeys.service.spec.ts
@@ -1,141 +1,505 @@
-import { Logger } from "@nestjs/common";
 import { getModelToken } from "@nestjs/mongoose";
 import { Test, TestingModule } from "@nestjs/testing";
 import { MetadataKeysService, MetadataSourceDoc } from "./metadatakeys.service";
 import { MetadataKeyClass } from "./schemas/metadatakey.schema";
 
-class MetadataKeyModelMock {
-  aggregate = jest.fn().mockReturnValue({
-    exec: jest.fn().mockResolvedValue([{ key: "k1" }]),
-  });
-  deleteMany = jest.fn().mockReturnValue({
-    exec: jest.fn().mockResolvedValue({ deletedCount: 2 }),
-  });
-  insertMany = jest.fn().mockReturnValue([{ _id: "id1" }]);
-}
+const modelMock = {
+  aggregate: jest
+    .fn()
+    .mockReturnValue({ exec: jest.fn().mockResolvedValue([]) }),
+  bulkWrite: jest.fn().mockResolvedValue({}),
+  updateMany: jest.fn().mockResolvedValue({}),
+  deleteMany: jest.fn().mockResolvedValue({}),
+};
+
+const BASE_DOC: MetadataSourceDoc = {
+  sourceType: "Dataset",
+  userGroups: ["group-1", "group-2"],
+  isPublished: false,
+  metadata: {
+    temperature: { human_name: "Temperature" },
+    pressure: {},
+  },
+};
 
 describe("MetadataKeysService", () => {
   let service: MetadataKeysService;
-  let model: MetadataKeyModelMock;
 
   beforeEach(async () => {
+    jest.clearAllMocks();
+
     const module: TestingModule = await Test.createTestingModule({
       providers: [
         MetadataKeysService,
         {
           provide: getModelToken(MetadataKeyClass.name),
-          useClass: MetadataKeyModelMock,
+          useValue: modelMock,
         },
       ],
     }).compile();
 
     service = await module.resolve(MetadataKeysService);
-    model = module.get(
-      getModelToken(MetadataKeyClass.name),
-    ) as unknown as MetadataKeyModelMock;
   });
 
   it("should be defined", () => {
     expect(service).toBeDefined();
   });
 
-  it("findAll builds aggregation pipeline and executes it", async () => {
-    const filter = {
-      where: { sourceType: "dataset" },
-      fields: ["key"],
-      limits: { limit: 10, skip: 0, sort: { createdAt: "asc" } },
-    };
+  // -------------------------------------------------------------------------
+  // findAll
+  // -------------------------------------------------------------------------
 
-    const accessFilter = { userGroups: { $in: ["ess"] } };
+  describe("findAll", () => {
+    it("builds a pipeline with match, project, sort, skip, limit", async () => {
+      const accessFilter = { userGroups: { $in: ["group-1"] } };
+      const filter = {
+        where: { sourceType: "Dataset" },
+        fields: ["key", "humanReadableName"],
+        limits: { limit: 10, skip: 5, sort: { createdAt: "asc" } },
+      };
 
-    const res = await service.findAll(filter, accessFilter);
+      await service.findAll(filter, accessFilter);
 
-    expect(model.aggregate).toHaveBeenCalledTimes(1);
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      expect(pipeline[0]).toEqual({
+        $match: { $and: [accessFilter, filter.where] },
+      });
+      expect(pipeline).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ $project: expect.any(Object) }),
+          expect.objectContaining({ $sort: expect.any(Object) }),
+          expect.objectContaining({ $skip: 5 }),
+          expect.objectContaining({ $limit: 10 }),
+        ]),
+      );
+    });
+
+    it("applies default limits when none provided", async () => {
+      await service.findAll({}, {});
 
-    const pipeline = model.aggregate.mock.calls[0][0];
-    expect(pipeline[0]).toEqual({
-      $match: { $and: [accessFilter, filter.where] },
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      expect(pipeline).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ $skip: 0 }),
+          expect.objectContaining({ $limit: 100 }),
+        ]),
+      );
     });
 
-    expect(res).toEqual([{ key: "k1" }]);
+    it("omits $project stage when no fields specified", async () => {
+      await service.findAll({}, {});
+
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      const hasProject = pipeline.some((s: object) => "$project" in s);
+      expect(hasProject).toBe(false);
+    });
+
+    it("omits $sort stage when no sort specified", async () => {
+      await service.findAll({ limits: { limit: 10, skip: 0 } }, {});
+
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      const hasSort = pipeline.some((s: object) => "$sort" in s);
+      expect(hasSort).toBe(false);
+    });
+
+    it("returns aggregation results", async () => {
+      const mockData = [{ key: "temperature" }, { key: "pressure" }];
+      modelMock.aggregate.mockReturnValueOnce({
+        exec: jest.fn().mockResolvedValue(mockData),
+      });
+
+      const result = await service.findAll({}, {});
+
+      expect(result).toEqual(mockData);
+    });
   });
 
-  it("deleteMany deletes and logs result", async () => {
-    const logSpy = jest.spyOn(Logger, "log").mockImplementation();
+  // -------------------------------------------------------------------------
+  // insertManyFromSource
+  // -------------------------------------------------------------------------
 
-    const filter = { sourceType: "dataset", sourceId: "sid" };
-    const result = await service.deleteMany(filter);
+  describe("insertManyFromSource", () => {
+    it("does nothing when metadata is empty", async () => {
+      await service.insertManyFromSource({ ...BASE_DOC, metadata: {} });
+
+      expect(modelMock.bulkWrite).not.toHaveBeenCalled();
+    });
+
+    it("calls bulkWrite with one updateOne per metadata key", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+
+      expect(modelMock.bulkWrite).toHaveBeenCalledTimes(1);
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(ops).toHaveLength(2);
+      expect(ops.every((op: object) => "updateOne" in op)).toBe(true);
+    });
 
-    expect(model.deleteMany).toHaveBeenCalledWith(filter);
-    expect(result.deletedCount).toBe(2);
-    expect(logSpy).toHaveBeenCalled();
+    it("builds correct _id from sourceType + key + humanReadableName", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const ids = ops.map(
+        (op: { updateOne: { filter: { _id: string } } }) =>
+          op.updateOne.filter._id,
+      );
+
+      expect(ids).toContain("Dataset_temperature_Temperature");
+      expect(ids).toContain("Dataset_pressure_"); // no human_name
+    });
+
+    it("sets upsert: true on every operation", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(
+        ops.every(
+          (op: { updateOne: { upsert: boolean } }) =>
+            op.updateOne.upsert === true,
+        ),
+      ).toBe(true);
+    });
+
+    it("increments usageCount and per-group counts", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { $inc } = ops[0].updateOne.update;
+
+      expect($inc.usageCount).toBe(1);
+      expect($inc["userGroupCounts.group-1"]).toBe(1);
+      expect($inc["userGroupCounts.group-2"]).toBe(1);
+    });
+
+    it("adds userGroups via $addToSet", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { $addToSet } = ops[0].updateOne.update;
+
+      expect($addToSet.userGroups.$each).toEqual(["group-1", "group-2"]);
+    });
+
+    it("sets isPublished when dataset is published", async () => {
+      await service.insertManyFromSource({ ...BASE_DOC, isPublished: true });
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(ops[0].updateOne.update.$set.isPublished).toBe(true);
+    });
+
+    it("does not set isPublished when dataset is not published", async () => {
+      await service.insertManyFromSource({ ...BASE_DOC, isPublished: false });
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(ops[0].updateOne.update.$set.isPublished).toBeUndefined();
+    });
+
+    it("uses $setOnInsert for immutable fields", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { $setOnInsert } = ops[0].updateOne.update;
+
+      expect($setOnInsert).toMatchObject({
+        key: expect.any(String),
+        sourceType: "Dataset",
+        humanReadableName: expect.any(String),
+      });
+    });
   });
 
-  it("insertManyFromSource does nothing when metadata is empty", async () => {
-    const doc: MetadataSourceDoc = {
-      sourceId: "sid",
-      sourceType: "dataset",
-      ownerGroup: "ess",
-      accessGroups: ["ess"],
-      isPublished: false,
-      metadata: {},
-    };
+  // -------------------------------------------------------------------------
+  // deleteMany
+  // -------------------------------------------------------------------------
+
+  describe("deleteMany", () => {
+    it("does nothing when metadata is empty", async () => {
+      await service.deleteMany({ ...BASE_DOC, metadata: {} });
+
+      expect(modelMock.updateMany).not.toHaveBeenCalled();
+      expect(modelMock.deleteMany).not.toHaveBeenCalled();
+    });
+
+    it("runs three operations in order: decrement → recompute → delete", async () => {
+      const callOrder: string[] = [];
+      modelMock.updateMany.mockImplementation(() => {
+        callOrder.push("updateMany");
+        return Promise.resolve({});
+      });
+      modelMock.deleteMany.mockImplementation(() => {
+        callOrder.push("deleteMany");
+        return Promise.resolve({});
+      });
+
+      await service.deleteMany(BASE_DOC);
+
+      expect(callOrder).toEqual(["updateMany", "updateMany", "deleteMany"]);
+    });
+
+    it("decrements usageCount and per-group counts", async () => {
+      await service.deleteMany(BASE_DOC);
+
+      const [, firstUpdate] = modelMock.updateMany.mock.calls[0];
+      expect(firstUpdate.$inc.usageCount).toBe(-1);
+      expect(firstUpdate.$inc["userGroupCounts.group-1"]).toBe(-1);
+      expect(firstUpdate.$inc["userGroupCounts.group-2"]).toBe(-1);
+    });
 
-    const res = await service.insertManyFromSource(doc);
+    it("targets correct _ids based on metadata keys and humanReadableName", async () => {
+      await service.deleteMany(BASE_DOC);
 
-    expect(res).toBeUndefined();
-    expect(model.insertMany).not.toHaveBeenCalled();
+      const [firstFilter] = modelMock.updateMany.mock.calls[0];
+      expect(firstFilter._id.$in).toContain("Dataset_temperature_Temperature");
+      expect(firstFilter._id.$in).toContain("Dataset_pressure_");
+    });
+
+    it("deletes documents where usageCount <= 0", async () => {
+      await service.deleteMany(BASE_DOC);
+
+      const [deleteFilter] = modelMock.deleteMany.mock.calls[0];
+      expect(deleteFilter.usageCount).toEqual({ $lte: 0 });
+    });
+
+    it("recompute stage uses $set with $objectToArray on userGroupCounts", async () => {
+      await service.deleteMany(BASE_DOC);
+
+      // second updateMany call is the RECOMPUTE_USER_GROUPS_STAGE
+      const [, recomputeStage] = modelMock.updateMany.mock.calls[1];
+      expect(recomputeStage[0].$set.userGroups).toBeDefined();
+    });
   });
 
-  it("insertManyFromSource creates metadata keys", async () => {
-    const doc: MetadataSourceDoc = {
-      sourceId: "sid",
-      sourceType: "dataset",
-      ownerGroup: "ess",
-      accessGroups: ["swap", "ess"],
-      isPublished: false,
-      metadata: {
-        key1: { human_name: "Key One" },
-        key2: {},
-      },
-    };
+  // -------------------------------------------------------------------------
+  // replaceManyFromSource
+  // -------------------------------------------------------------------------
+
+  describe("replaceManyFromSource", () => {
+    it("calls insertManyFromSource for keys only in newDoc", async () => {
+      const insertSpy = jest
+        .spyOn(service, "insertManyFromSource")
+        .mockResolvedValue();
+
+      const oldDoc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc = {
+        ...BASE_DOC,
+        metadata: {
+          temperature: { human_name: "Temperature" },
+          wavelength: {},
+        },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      expect(insertSpy).toHaveBeenCalledWith(
+        expect.objectContaining({ metadata: { wavelength: {} } }),
+      );
+    });
 
-    const res = await service.insertManyFromSource(doc);
+    it("calls deleteMany for keys only in oldDoc", async () => {
+      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
 
-    expect(model.insertMany).toHaveBeenCalledTimes(1);
+      const oldDoc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temperature" }, pressure: {} },
+      };
+      const newDoc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
 
-    const insertedDocs = model.insertMany.mock.calls[0][0];
-    expect(insertedDocs).toHaveLength(2);
+      await service.replaceManyFromSource(oldDoc, newDoc);
 
-    expect(insertedDocs[0]).toMatchObject({
-      key: "key1",
-      humanReadableName: "Key One",
-      sourceType: "dataset",
-      sourceId: "sid",
+      expect(deleteSpy).toHaveBeenCalledWith(
+        expect.objectContaining({ metadata: { pressure: {} } }),
+      );
     });
 
-    expect(res).toEqual([{ _id: "id1" }]);
+    it("does not call deleteMany or insertManyFromSource for shared unchanged keys", async () => {
+      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
+      const insertSpy = jest
+        .spyOn(service, "insertManyFromSource")
+        .mockResolvedValue();
+
+      const doc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+
+      await service.replaceManyFromSource(doc, doc);
+
+      expect(deleteSpy).not.toHaveBeenCalled();
+      expect(insertSpy).not.toHaveBeenCalled();
+    });
+
+    it("handles all three buckets simultaneously", async () => {
+      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
+      const insertSpy = jest
+        .spyOn(service, "insertManyFromSource")
+        .mockResolvedValue();
+
+      const oldDoc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temperature" }, pressure: {} },
+      };
+      const newDoc = {
+        ...BASE_DOC,
+        metadata: {
+          temperature: { human_name: "Temperature" },
+          wavelength: {},
+        },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      // pressure removed → deleteMany
+      expect(deleteSpy).toHaveBeenCalledWith(
+        expect.objectContaining({ metadata: { pressure: {} } }),
+      );
+      // wavelength added → insertManyFromSource
+      expect(insertSpy).toHaveBeenCalledWith(
+        expect.objectContaining({ metadata: { wavelength: {} } }),
+      );
+    });
+
+    it("does nothing when both docs have empty metadata", async () => {
+      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
+      const insertSpy = jest
+        .spyOn(service, "insertManyFromSource")
+        .mockResolvedValue();
+
+      const empty = { ...BASE_DOC, metadata: {} };
+      await service.replaceManyFromSource(empty, empty);
+
+      expect(deleteSpy).not.toHaveBeenCalled();
+      expect(insertSpy).not.toHaveBeenCalled();
+    });
   });
 
-  it("replaceManyFromSource deletes then inserts", async () => {
-    const doc: MetadataSourceDoc = {
-      sourceId: "sid",
-      sourceType: "dataset",
-      ownerGroup: "ess",
-      accessGroups: ["swap"],
-      isPublished: false,
-      metadata: { key1: {} },
-    };
+  // -------------------------------------------------------------------------
+  // updateSharedKeys (tested via replaceManyFromSource)
+  // -------------------------------------------------------------------------
+
+  describe("updateSharedKeys (via replaceManyFromSource)", () => {
+    it("increments added groups and decrements removed groups", async () => {
+      const oldDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        userGroups: ["group-1"],
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        userGroups: ["group-2"],
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      const [, update] = modelMock.updateMany.mock.calls[0];
+      expect(update.$inc["userGroupCounts.group-2"]).toBe(1);
+      expect(update.$inc["userGroupCounts.group-1"]).toBe(-1);
+    });
+
+    it("runs RECOMPUTE_USER_GROUPS_STAGE when groups are removed", async () => {
+      const oldDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        userGroups: ["group-1", "group-2"],
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        userGroups: ["group-1"],
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      // two updateMany calls: one for the $inc, one for RECOMPUTE
+      expect(modelMock.updateMany).toHaveBeenCalledTimes(2);
+    });
+
+    it("does not run RECOMPUTE_USER_GROUPS_STAGE when only groups are added", async () => {
+      const oldDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        userGroups: ["group-1"],
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        userGroups: ["group-1", "group-2"],
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      // only one updateMany — no RECOMPUTE needed when nothing is removed
+      expect(modelMock.updateMany).toHaveBeenCalledTimes(1);
+    });
+
+    it("sets isPublished when it flips from false to true", async () => {
+      const oldDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        isPublished: false,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        isPublished: true,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      const [, update] = modelMock.updateMany.mock.calls[0];
+      expect(update.$set.isPublished).toBe(true);
+    });
+
+    it("does not touch isPublished when it flips from true to false", async () => {
+      const oldDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        isPublished: true,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        isPublished: false,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+
+      await service.replaceManyFromSource(oldDoc, newDoc);
+
+      // no group changes and no publishedFlippedOn → updateMany not called at all
+      expect(modelMock.updateMany).not.toHaveBeenCalled();
+    });
+
+    it("treats humanReadableName change as delete + insert", async () => {
+      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
+      const insertSpy = jest
+        .spyOn(service, "insertManyFromSource")
+        .mockResolvedValue();
 
-    const deleteSpy = jest.spyOn(service, "deleteMany");
-    const insertSpy = jest.spyOn(service, "insertManyFromSource");
+      const oldDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temperature" } },
+      };
+      const newDoc: MetadataSourceDoc = {
+        ...BASE_DOC,
+        metadata: { temperature: { human_name: "Temp (renamed)" } },
+      };
 
-    await service.replaceManyFromSource(doc);
+      await service.replaceManyFromSource(oldDoc, newDoc);
 
-    expect(deleteSpy).toHaveBeenCalledWith({
-      sourceId: "sid",
-      sourceType: "dataset",
+      expect(deleteSpy).toHaveBeenCalledWith(
+        expect.objectContaining({
+          metadata: { temperature: { human_name: "Temperature" } },
+        }),
+      );
+      expect(insertSpy).toHaveBeenCalledWith(
+        expect.objectContaining({
+          metadata: { temperature: { human_name: "Temp (renamed)" } },
+        }),
+      );
     });
-    expect(insertSpy).toHaveBeenCalledWith(doc);
   });
 });
diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index 054c0fe7d..0a2b84a63 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -23,6 +23,27 @@ export type MetadataSourceDoc = {
   metadata: Record<string, unknown>;
 };
 
+// Recomputes the userGroups string array from userGroupCounts after a decrement.
+// Retains only group names whose reference count is still above zero.
+const RECOMPUTE_USER_GROUPS_STAGE = [
+  {
+    $set: {
+      userGroups: {
+        $map: {
+          input: {
+            $filter: {
+              input: { $objectToArray: "$userGroupCounts" },
+              cond: { $gt: ["$$this.v", 0] },
+            },
+          },
+          as: "entry",
+          in: "$$entry.k",
+        },
+      },
+    },
+  },
+];
+
 @Injectable({ scope: Scope.REQUEST })
 export class MetadataKeysService {
   constructor(
@@ -71,70 +92,116 @@ export class MetadataKeysService {
     return data;
   }
 
-  async deleteMany(doc: MetadataSourceDoc): Promise<void> {
-    if (isEmpty(doc.metadata)) {
-      return;
-    }
-    const metadataKeys = Object.keys(doc.metadata);
-    const ids = metadataKeys.map((key) => `${doc.sourceType}_${key}`);
-    const filter = { id: { $in: ids } };
-
-    const decrementUsageOp = {
-      updateMany: { filter, update: { $inc: { usageCount: -1 } } },
-    };
-    const deleteUnusedOp = {
-      deleteMany: { filter: { ...filter, usageCount: { $lte: 0 } } },
-    };
-
-    await this.metadataKeyModel.bulkWrite([decrementUsageOp, deleteUnusedOp]);
-
-    Logger.log(
-      `Removed or decremented MetadataKeys usageCount from source ${doc.sourceType} for key(s): ${metadataKeys.join(", ")}`,
-    );
-  }
-
+  /**
+   * Called when a dataset is created or gains new metadata keys.
+   *
+   * For each key:
+   * - Creates a new MetadataKey entry if none exists (upsert)
+   * - Increments usageCount and per-group reference counts
+   * - Adds new groups to the userGroups query array
+   * - Sets isPublished if the source dataset is published (never unsets inline)
+   * - Updates humanReadableName to the latest value
+   */
   async insertManyFromSource(doc: MetadataSourceDoc): Promise<void> {
-    if (isEmpty(doc.metadata)) {
-      return;
-    }
+    if (isEmpty(doc.metadata)) return;
 
-    const metadata = doc.metadata;
-    const userGroups = doc.userGroups;
+    const { sourceType, userGroups, isPublished, metadata } = doc;
 
-    const upsertOps = Object.entries(metadata).map(([key, entry]) => {
-      const id = `${doc.sourceType}_${key}`;
+    const ops = Object.entries(metadata).map(([key, entry]) => {
       const humanReadableName =
         (entry as ScientificMetadataEntry).human_name ?? "";
+      const id = this.buildId(sourceType, key, humanReadableName);
+
       return {
         updateOne: {
-          filter: { id },
+          filter: { _id: id },
           update: {
             $setOnInsert: addCreatedByFields(
               {
-                _id: id as MetadataKeyDocument["_id"],
-                id,
+                _id: id as unknown as MetadataKeyDocument["_id"],
                 key,
-                sourceType: doc.sourceType,
+                sourceType,
                 humanReadableName,
               },
               "system",
             ),
-            ...(doc.isPublished && { $set: { isPublished: true } }),
+            $set: {
+              // Only ever set to true inline. The false case is handled
+              // by the reconciliation cronjob since it transitions rarely.
+              ...(isPublished && { isPublished: true }),
+            },
             $addToSet: { userGroups: { $each: userGroups } },
-            $inc: { usageCount: 1 },
+            $inc: {
+              usageCount: 1,
+              ...this.groupCountDeltas(userGroups, 1),
+            },
           },
           upsert: true,
         },
       };
     });
 
-    await this.metadataKeyModel.bulkWrite(upsertOps);
+    await this.metadataKeyModel.bulkWrite(ops);
 
     Logger.log(
-      `Created or incremented MetadataKeys usageCount from source ${doc.sourceType} for key(s): ${Object.keys(metadata).join(", ")}`,
+      `Upserted MetadataKeys for ${sourceType}: ${Object.keys(metadata).join(", ")}`,
     );
   }
 
+  /**
+   * Called when a dataset is deleted or loses metadata keys.
+   *
+   * Three-step process to ensure groups are safely removed:
+   *   1. Decrement usageCount and per-group reference counts
+   *   2. Recompute userGroups array from the updated counts
+   *      (drops any group whose count reached zero)
+   *   3. Delete entries no longer referenced by any dataset
+   *
+   * usageCount is the authoritative deletion signal because a dataset
+   * with no userGroups and isPublished: false would be invisible to
+   * both userGroupCounts and isPublished checks.
+   */
+  async deleteMany(doc: MetadataSourceDoc): Promise<void> {
+    if (isEmpty(doc.metadata)) return;
+
+    const { sourceType, userGroups, metadata } = doc;
+    const keys = Object.keys(metadata);
+    const ids = keys.map((key) => {
+      const humanReadableName =
+        (metadata[key] as ScientificMetadataEntry)?.human_name ?? "";
+      return this.buildId(sourceType, key, humanReadableName);
+    });
+    const filter = { _id: { $in: ids } };
+
+    await this.metadataKeyModel.updateMany(filter, {
+      $inc: {
+        usageCount: -1,
+        ...this.groupCountDeltas(userGroups, -1),
+      },
+    });
+
+    await this.metadataKeyModel.updateMany(filter, RECOMPUTE_USER_GROUPS_STAGE);
+
+    await this.metadataKeyModel.deleteMany({
+      ...filter,
+      usageCount: { $lte: 0 },
+    });
+
+    Logger.log(
+      `Decremented or deleted MetadataKeys for ${sourceType}: ${keys.join(", ")}`,
+    );
+  }
+  /**
+   * Called when a dataset is updated.
+   *
+   * Diffs the old and new metadata to produce three disjoint key sets:
+   *   - added:   only in newDoc → insertManyFromSource
+   *   - removed: only in oldDoc → deleteMany
+   *   - shared:  in both       → updateSharedKeys (handles group / isPublished
+   *                              / humanReadableName changes)
+   *
+   * The three sets are disjoint by _id so Promise.all is safe.
+   */
   async replaceManyFromSource(
     oldDoc: MetadataSourceDoc,
     newDoc: MetadataSourceDoc,
@@ -144,23 +211,135 @@ export class MetadataKeysService {
 
     const addedKeys = newKeys.filter((k) => !oldKeys.includes(k));
     const removedKeys = oldKeys.filter((k) => !newKeys.includes(k));
+    const sharedKeys = newKeys.filter((k) => oldKeys.includes(k));
+
+    await Promise.all([
+      removedKeys.length > 0 &&
+        this.deleteMany({
+          ...oldDoc,
+          metadata: Object.fromEntries(
+            removedKeys.map((k) => [k, oldDoc.metadata[k]]),
+          ),
+        }),
+
+      addedKeys.length > 0 &&
+        this.insertManyFromSource({
+          ...newDoc,
+          metadata: Object.fromEntries(
+            addedKeys.map((k) => [k, newDoc.metadata[k]]),
+          ),
+        }),
+
+      sharedKeys.length > 0 &&
+        this.updateSharedKeys(sharedKeys, oldDoc, newDoc),
+    ]);
+  }
+
+  private buildId(
+    sourceType: string,
+    key: string,
+    humanReadableName: string,
+  ): string {
+    return `${sourceType}_${key}_${humanReadableName}`;
+  }
 
-    if (removedKeys.length > 0) {
-      await this.deleteMany({
-        ...oldDoc,
-        metadata: Object.fromEntries(
-          removedKeys.map((k) => [k, oldDoc.metadata[k]]),
-        ),
+  /**
+   * Builds $inc paths for per-group reference counts.
+   *
+   * groupCountDeltas(["groupA", "groupB"], 1)
+   *   => { "userGroupCounts.groupA": 1, "userGroupCounts.groupB": 1 }
+   *
+   * MongoDB's dot-notation $inc creates the key if missing and increments
+   * if it already exists, making this safe for both first-time and
+   * subsequent upserts.
+   */
+  private groupCountDeltas(
+    groups: string[],
+    delta: 1 | -1,
+  ): Record<string, number> {
+    return Object.fromEntries(
+      groups.map((g) => [`userGroupCounts.${g}`, delta]),
+    );
+  }
+
+  /**
+   * Handles keys present in both the old and new dataset version.
+   *
+   * Checks three things that may have changed independently:
+   *   1. userGroups — increments added groups, decrements removed groups,
+   *      then recomputes the userGroups array from the updated counts
+   *   2. isPublished — only sets true inline; false is left to the cronjob
+   *   3. humanReadableName — updated per-key where it differs
+   */
+  private async updateSharedKeys(
+    sharedKeys: string[],
+    oldDoc: MetadataSourceDoc,
+    newDoc: MetadataSourceDoc,
+  ): Promise<void> {
+    const { sourceType } = newDoc;
+    const ids = sharedKeys.map((k) => {
+      const humanReadableName =
+        (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
+      return this.buildId(sourceType, k, humanReadableName);
+    });
+    const filter = { _id: { $in: ids } };
+
+    const addedGroups = newDoc.userGroups.filter(
+      (g) => !oldDoc.userGroups.includes(g),
+    );
+    const removedGroups = oldDoc.userGroups.filter(
+      (g) => !newDoc.userGroups.includes(g),
+    );
+    const publishedFlippedOn = !oldDoc.isPublished && newDoc.isPublished;
+    const hasGroupChanges = addedGroups.length > 0 || removedGroups.length > 0;
+
+    if (hasGroupChanges || publishedFlippedOn) {
+      await this.metadataKeyModel.updateMany(filter, {
+        ...(addedGroups.length > 0 && {
+          $addToSet: { userGroups: { $each: addedGroups } },
+        }),
+        $inc: {
+          ...(addedGroups.length > 0 && this.groupCountDeltas(addedGroups, 1)),
+          ...(removedGroups.length > 0 &&
+            this.groupCountDeltas(removedGroups, -1)),
+        },
+        ...(publishedFlippedOn && { $set: { isPublished: true } }),
       });
+
+      // Recompute userGroups to drop groups whose count reached zero
+      if (removedGroups.length > 0) {
+        await this.metadataKeyModel.updateMany(
+          filter,
+          RECOMPUTE_USER_GROUPS_STAGE,
+        );
+      }
     }
 
-    if (addedKeys.length > 0) {
-      await this.insertManyFromSource({
-        ...newDoc,
-        metadata: Object.fromEntries(
-          addedKeys.map((k) => [k, newDoc.metadata[k]]),
-        ),
-      });
+    // humanReadableName is part of _id, so a change means a different document.
+    // Treat it as delete the old entry + insert the new one.
+    const humanNameChangedKeys = sharedKeys.filter((k) => {
+      const oldName =
+        (oldDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
+      const newName =
+        (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
+      return oldName !== newName;
+    });
+
+    if (humanNameChangedKeys.length > 0) {
+      await Promise.all([
+        this.deleteMany({
+          ...oldDoc,
+          metadata: Object.fromEntries(
+            humanNameChangedKeys.map((k) => [k, oldDoc.metadata[k]]),
+          ),
+        }),
+        this.insertManyFromSource({
+          ...newDoc,
+          metadata: Object.fromEntries(
+            humanNameChangedKeys.map((k) => [k, newDoc.metadata[k]]),
+          ),
+        }),
+      ]);
     }
   }
 }
diff --git a/src/metadata-keys/schemas/metadatakey.schema.ts b/src/metadata-keys/schemas/metadatakey.schema.ts
index d49b74ee8..bbf602d32 100644
--- a/src/metadata-keys/schemas/metadatakey.schema.ts
+++ b/src/metadata-keys/schemas/metadatakey.schema.ts
@@ -90,10 +90,26 @@ export class MetadataKeyClass extends QueryableClass {
   })
   @Prop({ type: Number, default: 0 })
   usageCount: number;
+
+  @ApiProperty({
+    type: Object,
+    description:
+      "Tracks how many datasets per user group reference this metadata key. " +
+      "Used to safely remove groups from userGroups when the last dataset " +
+      "contributing that group is deleted or updated. " +
+      "e.g. { 'groupA': 3, 'groupB': 1 } means 3 datasets with groupA and 1 with groupB use this key.",
+  })
+  @Prop({ type: Map, of: Number, default: {} })
+  userGroupCounts: Map<string, number>;
 }
 
 export const MetadataKeySchema = SchemaFactory.createForClass(MetadataKeyClass);
 
-MetadataKeySchema.index({ sourceType: 1, sourceId: 1 });
+MetadataKeySchema.index({ sourceType: 1, isPublished: 1, key: 1 });
+MetadataKeySchema.index({
+  sourceType: 1,
+  isPublished: 1,
+  humanReadableName: 1,
+});
 MetadataKeySchema.index({ sourceType: 1, userGroups: 1, key: 1 });
 MetadataKeySchema.index({ sourceType: 1, userGroups: 1, humanReadableName: 1 });
diff --git a/src/metadata-keys/types/metadatakeys-filter-content.ts b/src/metadata-keys/types/metadatakeys-filter-content.ts
index b7a38afab..e9804e0b5 100644
--- a/src/metadata-keys/types/metadatakeys-filter-content.ts
+++ b/src/metadata-keys/types/metadatakeys-filter-content.ts
@@ -9,8 +9,8 @@ const FILTERS: Record<"limits" | "fields" | "where" | "include", object> = {
     type: "object",
     example: {
       sourceType: "dataset",
-      sourceId: "datasetId",
       key: "metadata_key_name",
+      humanReadableName: "Metadata Key Name",
     },
   },
   include: {},
diff --git a/test/MetadataKeys.js b/test/MetadataKeys.js
index c62e117f0..08b6f1e2a 100644
--- a/test/MetadataKeys.js
+++ b/test/MetadataKeys.js
@@ -2,173 +2,583 @@
 const utils = require("./LoginUtils");
 const { TestData } = require("./TestData");
 
-let accessTokenAdminIngestor = null;
+let accessTokenAdmin = null;
+let accessTokenArchiveManager = null;
 let accessTokenUser1 = null;
-let datasetIdPrivate = null;
-let datasetIdUser1 = null;
 
-describe("MetadataKeys v4 ACL", () => {
-  before(async () => {
-    db.collection("MetadataKeys").deleteMany({});
+let pidGroup1 = null; // ownerGroup: group1, not published
+let pidGroup2 = null; // ownerGroup: group2, not published
+let pidPublished = null; // ownerGroup: group2, isPublished: true
+
+// ---------------------------------------------------------------------------
+// Dataset fixtures — controlled ownerGroup for predictable access control
+// ---------------------------------------------------------------------------
+const datasetGroup1 = {
+  ...TestData.DatasetWithScientificMetadataV4,
+  ownerGroup: "group1",
+  accessGroups: [],
+  isPublished: false,
+};
+
+const datasetGroup2 = {
+  ...TestData.DatasetWithScientificMetadataV4,
+  ownerGroup: "group2",
+  accessGroups: ["group2"],
+  isPublished: false,
+  // extra key exclusive to group2 so access control tests are unambiguous
+  scientificMetadata: {
+    ...TestData.DatasetWithScientificMetadataV4.scientificMetadata,
+    group2_only_key: { value: "exclusive to group2" },
+  },
+};
+
+const datasetPublished = {
+  ...TestData.DatasetWithScientificMetadataV4,
+  ownerGroup: "group2",
+  accessGroups: ["group2"],
+  isPublished: true,
+};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+async function getMetadataKeys(filter, token) {
+  const req = request(appUrl)
+    .get(
+      `/api/v4/metadatakeys?filter=${encodeURIComponent(JSON.stringify(filter))}`,
+    )
+    .set("Accept", "application/json");
+
+  if (token) {
+    req.set({ Authorization: `Bearer ${token}` });
+  }
+
+  return req
+    .expect(TestData.SuccessfulGetStatusCode)
+    .expect("Content-Type", /json/);
+}
+
+async function createDataset(dataset) {
+  const res = await request(appUrl)
+    .post("/api/v3/Datasets")
+    .send(dataset)
+    .set("Accept", "application/json")
+    .set({ Authorization: `Bearer ${accessTokenAdmin}` })
+    .expect(TestData.EntryCreatedStatusCode);
+  return res.body.pid;
+}
+
+async function patchDataset(pid, payload) {
+  return request(appUrl)
+    .patch("/api/v4/datasets/" + encodeURIComponent(pid))
+    .send(payload)
+    .set("Accept", "application/json")
+    .set({ Authorization: `Bearer ${accessTokenAdmin}` })
+    .expect(TestData.SuccessfulGetStatusCode);
+}
 
-    accessTokenAdminIngestor = await utils.getToken(appUrl, {
+async function deleteDataset(pid) {
+  return request(appUrl)
+    .delete("/api/v3/Datasets/" + encodeURIComponent(pid))
+    .set("Accept", "application/json")
+    .set({ Authorization: `Bearer ${accessTokenArchiveManager}` })
+    .expect(TestData.SuccessfulDeleteStatusCode);
+}
+
+describe("2000: MetadataKeys: Access Control and Search", () => {
+  before(async () => {
+    accessTokenAdmin = await utils.getToken(appUrl, {
       username: "adminIngestor",
       password: TestData.Accounts["adminIngestor"]["password"],
     });
 
+    accessTokenArchiveManager = await utils.getToken(appUrl, {
+      username: "archiveManager",
+      password: TestData.Accounts["archiveManager"]["password"],
+    });
+
     accessTokenUser1 = await utils.getToken(appUrl, {
       username: "user1",
       password: TestData.Accounts["user1"]["password"],
     });
+
+    pidGroup1 = await createDataset(datasetGroup1);
+    pidGroup2 = await createDataset(datasetGroup2);
+    pidPublished = await createDataset(datasetPublished);
   });
 
-  it("0000: create a private dataset v3 has scientific metadata for admin", async () => {
-    return request(appUrl)
-      .post("/api/v3/Datasets")
-      .send(TestData.RawCorrectRandom)
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` })
-      .expect("Content-Type", /json/)
-      .expect(TestData.EntryCreatedStatusCode)
-      .then((res) => {
-        res.body.should.have
-          .property("datasetName")
-          .and.equal(TestData.RawCorrectRandom.datasetName);
-
-        datasetIdPrivate = encodeURIComponent(res.body["pid"]);
-      });
-  });
-
-  it("0001: create a public dataset v4 has scientific metadata for unauthenticated user", async () => {
-    const publicDataset = { ...TestData.RawCorrectV4, isPublished: true };
-    return request(appUrl)
-      .post("/api/v4/Datasets")
-      .send(publicDataset)
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` })
-      .expect("Content-Type", /json/)
-      .expect(TestData.EntryCreatedStatusCode)
-      .then((res) => {
-        res.body.should.have
-          .property("datasetName")
-          .and.equal(publicDataset.datasetName);
-      });
-  });
-
-  it("0002: create a private dataset v4 has scientific metadata for user1", async () => {
-    const user1Dataset = { ...TestData.RawCorrectV4, accessGroups: ["group1"] };
-    return request(appUrl)
-      .post("/api/v4/Datasets")
-      .send(user1Dataset)
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` })
-      .expect("Content-Type", /json/)
-      .expect(TestData.EntryCreatedStatusCode)
-      .then((res) => {
-        res.body.should.have
-          .property("datasetName")
-          .and.equal(user1Dataset.datasetName);
-
-        datasetIdUser1 = encodeURIComponent(res.body["pid"]);
-      });
-  });
-
-  it("0010: should allow admin to list all metadata keys", async () => {
-    const filter = {
-      limits: { limit: 10, skip: 0, sort: { createdAt: "desc" } },
-    };
+  after(async () => {
+    for (const pid of [pidGroup1, pidGroup2, pidPublished]) {
+      if (pid) await deleteDataset(pid);
+    }
+  });
+
+  // -------------------------------------------------------------------------
+  // Basic access
+  // -------------------------------------------------------------------------
+
+  it("0100: admin can fetch metadata keys and gets results from all groups", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset" } },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+
+    const allGroups = res.body.flatMap((k) => k.userGroups);
+    allGroups.should.include("group1");
+    allGroups.should.include("group2");
+  });
+
+  it("0110: user1 can fetch metadata keys and only sees group1 keys", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset" } },
+      accessTokenUser1,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+    res.body.forEach((k) => {
+      k.userGroups.should.include("group1");
+    });
+  });
+
+  it("0120: user1 cannot see keys exclusive to group2", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset", key: "group2_only_key" } },
+      accessTokenUser1,
+    );
+
+    res.body.should.be.an("array").and.have.lengthOf(0);
+  });
+
+  it("0130: unauthenticated user can only see published metadata keys", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset" } },
+      null,
+    );
+
+    res.body.should.be.an("array");
+    res.body.forEach((k) => {
+      k.isPublished.should.equal(true);
+    });
+  });
+
+  it("0140: unauthenticated user cannot see unpublished keys", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset", isPublished: false } },
+      null,
+    );
+
+    res.body.should.be.an("array").and.have.lengthOf(0);
+  });
+
+  // -------------------------------------------------------------------------
+  // Search — key
+  // -------------------------------------------------------------------------
+
+  it("0200: admin can search keys by exact key name", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset", key: "with_number" } },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array");
+    res.body.forEach((k) => k.key.should.equal("with_number"));
+  });
+
+  it("0210: admin can search keys by partial key name using $regex", async () => {
+    const res = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          key: { $regex: "with_", $options: "i" },
+        },
+      },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+    res.body.forEach((k) => k.key.should.match(/with_/i));
+  });
+
+  it("0220: regex search returns no results for non-matching pattern", async () => {
+    const res = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          key: { $regex: "nonexistent_xyz", $options: "i" },
+        },
+      },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.lengthOf(0);
+  });
+
+  it("0230: user1 regex search only returns keys from accessible groups", async () => {
+    const res = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          key: { $regex: "with_", $options: "i" },
+        },
+      },
+      accessTokenUser1,
+    );
+
+    res.body.should.be.an("array");
+    res.body.forEach((k) => k.userGroups.should.include("group1"));
+  });
+
+  // -------------------------------------------------------------------------
+  // Search — humanReadableName
+  // -------------------------------------------------------------------------
+
+  it("0240: admin can search by humanReadableName using $regex", async () => {
+    const res = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          humanReadableName: { $regex: "pressure", $options: "i" },
+        },
+      },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+    res.body.forEach((k) => k.humanReadableName.should.match(/pressure/i));
+  });
+
+  it("0250: humanReadableName regex search returns empty for no match", async () => {
+    const res = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          humanReadableName: { $regex: "nonexistent_xyz", $options: "i" },
+        },
+      },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.lengthOf(0);
+  });
+
+  it("0260: keys without human_name have empty humanReadableName", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset", key: "with_key_value" } },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+    res.body[0].humanReadableName.should.equal("");
+  });
+
+  // -------------------------------------------------------------------------
+  // Pagination
+  // -------------------------------------------------------------------------
+
+  it("0300: limit is respected", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset" }, limits: { limit: 2, skip: 0 } },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.at.most(2);
+  });
+
+  it("0310: skip is respected", async () => {
+    const resAll = await getMetadataKeys(
+      { where: { sourceType: "Dataset" }, limits: { limit: 100, skip: 0 } },
+      accessTokenAdmin,
+    );
+    const resSkipped = await getMetadataKeys(
+      { where: { sourceType: "Dataset" }, limits: { limit: 100, skip: 1 } },
+      accessTokenAdmin,
+    );
+
+    resSkipped.body.length.should.equal(resAll.body.length - 1);
+  });
 
-    return request(appUrl)
-      .get(
-        `/api/v4/metadatakeys?filter=${encodeURIComponent(JSON.stringify(filter))}`,
-      )
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenAdminIngestor}` })
-      .expect(TestData.SuccessfulGetStatusCode)
-      .expect("Content-Type", /json/)
-      .then((res) => {
-        res.body.should.be.an("array").that.satisfies((arr) => {
-          const values = arr.map((item) => item.isPublished);
-          return values.includes(true) && values.includes(false);
-        });
-      });
-  });
-
-  it("0020: should allow unauthenticated user to list only published metadata keys", async () => {
-    const filter = {
-      where: { sourceType: "dataset" },
-      limits: { limit: 10, skip: 0, sort: { createdAt: "desc" } },
+  // -------------------------------------------------------------------------
+  // Fields projection
+  // -------------------------------------------------------------------------
+
+  it("0400: fields projection returns only requested fields", async () => {
+    const res = await getMetadataKeys(
+      {
+        where: { sourceType: "Dataset" },
+        fields: ["key", "sourceType"],
+        limits: { limit: 5, skip: 0 },
+      },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+    res.body.forEach((k) => {
+      k.should.have.property("key");
+      k.should.have.property("sourceType");
+      k.should.not.have.property("userGroups");
+      k.should.not.have.property("userGroupCounts");
+      k.should.not.have.property("usageCount");
+    });
+  });
+
+  // -------------------------------------------------------------------------
+  // Response shape
+  // -------------------------------------------------------------------------
+
+  it("0500: response documents have expected shape", async () => {
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset" }, limits: { limit: 1, skip: 0 } },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+
+    const doc = res.body[0];
+    doc.should.have.property("key").and.be.a("string");
+    doc.should.have.property("sourceType").and.equal("Dataset");
+    doc.should.have.property("humanReadableName").and.be.a("string");
+    doc.should.have.property("userGroups").and.be.an("array");
+    doc.should.have.property("isPublished").and.be.a("boolean");
+    doc.should.have.property("usageCount").and.be.a("number");
+  });
+
+  it("0510: usageCount reflects number of datasets using the key", async () => {
+    // with_number exists in all 3 datasets created in before()
+    const res = await getMetadataKeys(
+      { where: { sourceType: "Dataset", key: "with_number" } },
+      accessTokenAdmin,
+    );
+
+    res.body.should.be.an("array").and.have.length.greaterThan(0);
+    res.body[0].usageCount.should.equal(3);
+  });
+
+  // -------------------------------------------------------------------------
+  // Mutation tests
+  // -------------------------------------------------------------------------
+
+  it("0600: updating human_name renames the MetadataKey (delete old + insert new)", async () => {
+    const pid = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: {
+        rename_test_key: { value: 1, human_name: "Original Name" },
+      },
+    });
+
+    // Old key exists
+    const before = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          key: "rename_test_key",
+          humanReadableName: "Original Name",
+        },
+      },
+      accessTokenAdmin,
+    );
+    before.body.should.have.lengthOf(1);
+
+    await patchDataset(pid, {
+      scientificMetadata: {
+        rename_test_key: { value: 1, human_name: "Renamed Name" },
+      },
+    });
+
+    // Old key gone
+    const afterOld = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          key: "rename_test_key",
+          humanReadableName: "Original Name",
+        },
+      },
+      accessTokenAdmin,
+    );
+    afterOld.body.should.have.lengthOf(0);
+
+    // New key exists
+    const afterNew = await getMetadataKeys(
+      {
+        where: {
+          sourceType: "Dataset",
+          key: "rename_test_key",
+          humanReadableName: "Renamed Name",
+        },
+      },
+      accessTokenAdmin,
+    );
+    afterNew.body.should.have.lengthOf(1);
+    afterNew.body[0].usageCount.should.equal(1);
+
+    await deleteDataset(pid);
+  });
+
+  it("0610: removing a key from scientificMetadata decrements usageCount", async () => {
+    const pidA = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { shared_key: { value: 1 } },
+    });
+
+    const pidB = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { shared_key: { value: 2 } },
+    });
+
+    const filterKey = { where: { sourceType: "Dataset", key: "shared_key" } };
+
+    // usageCount starts at 2
+    const before = await getMetadataKeys(filterKey, accessTokenAdmin);
+    before.body.should.have.lengthOf(1);
+    before.body[0].usageCount.should.equal(2);
+
+    // Remove key from datasetA
+    await patchDataset(pidA, { scientificMetadata: {} });
+
+    // usageCount drops to 1, key still exists
+    const after = await getMetadataKeys(filterKey, accessTokenAdmin);
+    after.body.should.have.lengthOf(1);
+    after.body[0].usageCount.should.equal(1);
+
+    await deleteDataset(pidA);
+    await deleteDataset(pidB);
+  });
+
+  it("0620: removing a key from the last dataset deletes the MetadataKey entirely", async () => {
+    const pid = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { sole_key: { value: 42 } },
+    });
+
+    const filterKey = { where: { sourceType: "Dataset", key: "sole_key" } };
+
+    const before = await getMetadataKeys(filterKey, accessTokenAdmin);
+    before.body.should.have.lengthOf(1);
+    before.body[0].usageCount.should.equal(1);
+
+    await patchDataset(pid, { scientificMetadata: {} });
+
+    const after = await getMetadataKeys(filterKey, accessTokenAdmin);
+    after.body.should.have.lengthOf(0);
+
+    await deleteDataset(pid);
+  });
+
+  it("0630: adding a new key to scientificMetadata creates a new MetadataKey", async () => {
+    const pid = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { original_key: { value: 1 } },
+    });
+
+    const filterNewKey = {
+      where: { sourceType: "Dataset", key: "brand_new_key" },
     };
 
-    return request(appUrl)
-      .get(
-        `/api/v4/metadatakeys?filter=${encodeURIComponent(JSON.stringify(filter))}`,
-      )
-      .set("Accept", "application/json")
-      .expect(TestData.SuccessfulGetStatusCode)
-      .expect("Content-Type", /json/)
-      .then((res) => {
-        res.body.should.be.an("array").that.satisfies((arr) => {
-          return arr.every((item) => item.isPublished === true);
-        });
-      });
-  });
-
-  it("0030: should allow authenticated user to list metadata keys they have access", async () => {
-    const filter = { limits: { limit: 1, skip: 0 } };
-
-    return request(appUrl)
-      .get(
-        `/api/v4/metadatakeys?filter=${encodeURIComponent(JSON.stringify(filter))}`,
-      )
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenUser1}` })
-      .expect(TestData.SuccessfulGetStatusCode)
-      .expect("Content-Type", /json/)
-      .then((res) => {
-        res.body.should.be.an("array").that.satisfies((arr) => {
-          return arr.every((item) => {
-            return (
-              item.isPublished === true || item.userGroups.includes("group1")
-            );
-          });
-        });
-      });
-  });
-
-  it("0040: should return empty array when user queries keys they don't have access to", async () => {
-    const filter = {
-      where: { sourceType: "dataset", sourceId: datasetIdPrivate },
-      limits: { limit: 10, skip: 0 },
+    // Key does not exist yet
+    const before = await getMetadataKeys(filterNewKey, accessTokenAdmin);
+    before.body.should.have.lengthOf(0);
+
+    await patchDataset(pid, {
+      scientificMetadata: {
+        original_key: { value: 1 },
+        brand_new_key: { value: 99, human_name: "Brand New Key" },
+      },
+    });
+
+    const after = await getMetadataKeys(filterNewKey, accessTokenAdmin);
+    after.body.should.have.lengthOf(1);
+    after.body[0].key.should.equal("brand_new_key");
+    after.body[0].humanReadableName.should.equal("Brand New Key");
+    after.body[0].usageCount.should.equal(1);
+
+    await deleteDataset(pid);
+  });
+
+  it("0640: changing ownerGroup updates userGroups — old group removed, new group added", async () => {
+    const pid = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { group_change_key: { value: 1 } },
+    });
+
+    const filterKey = {
+      where: { sourceType: "Dataset", key: "group_change_key" },
     };
 
-    return request(appUrl)
-      .get(
-        `/api/v4/metadatakeys?filter=${encodeURIComponent(JSON.stringify(filter))}`,
-      )
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenUser1}` })
-      .expect(TestData.SuccessfulGetStatusCode)
-      .expect("Content-Type", /json/)
-      .then((res) => {
-        res.body.should.be.an("array").and.have.length(0);
-      });
-  });
-
-  it("0040: should return metadatakeys with correct access for user1", async () => {
-    const filter = {
-      where: { sourceType: "dataset", sourceId: `${datasetIdUser1}` },
-      limits: { limit: 10, skip: 0 },
+    const before = await getMetadataKeys(filterKey, accessTokenAdmin);
+    before.body.should.have.lengthOf(1);
+    before.body[0].userGroups.should.include("group1");
+    before.body[0].userGroups.should.not.include("group2");
+
+    await patchDataset(pid, { ownerGroup: "group2" });
+
+    const after = await getMetadataKeys(filterKey, accessTokenAdmin);
+    after.body.should.have.lengthOf(1);
+    after.body[0].userGroups.should.include("group2");
+    after.body[0].userGroups.should.not.include("group1");
+
+    // user1 (group1) can no longer see this key
+    const user1Res = await getMetadataKeys(filterKey, accessTokenUser1);
+    user1Res.body.should.have.lengthOf(0);
+
+    await deleteDataset(pid);
+  });
+
+  it("0650: deleting a dataset decrements usageCount — deletes MetadataKey when it reaches 0", async () => {
+    const pidA = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { delete_test_key: { value: 1 } },
+    });
+
+    const pidB = await createDataset({
+      ...TestData.DatasetWithScientificMetadataV4,
+      ownerGroup: "group1",
+      accessGroups: [],
+      isPublished: false,
+      scientificMetadata: { delete_test_key: { value: 2 } },
+    });
+
+    const filterKey = {
+      where: { sourceType: "Dataset", key: "delete_test_key" },
     };
 
-    return request(appUrl)
-      .get(`/api/v4/metadatakeys?filter=${JSON.stringify(filter)}`)
-      .set("Accept", "application/json")
-      .set({ Authorization: `Bearer ${accessTokenUser1}` })
-      .expect(TestData.SuccessfulGetStatusCode)
-      .expect("Content-Type", /json/)
-      .then((res) => {
-        res.body.should.be.an("array").and.have.length.of.at.least(1);
-      });
+    const before = await getMetadataKeys(filterKey, accessTokenAdmin);
+    before.body.should.have.lengthOf(1);
+    before.body[0].usageCount.should.equal(2);
+
+    // Delete first dataset — usageCount drops to 1, key still exists
+    await deleteDataset(pidA);
+
+    const afterFirst = await getMetadataKeys(filterKey, accessTokenAdmin);
+    afterFirst.body.should.have.lengthOf(1);
+    afterFirst.body[0].usageCount.should.equal(1);
+
+    // Delete second dataset — usageCount hits 0, MetadataKey removed
+    await deleteDataset(pidB);
+
+    const afterSecond = await getMetadataKeys(filterKey, accessTokenAdmin);
+    afterSecond.body.should.have.lengthOf(0);
   });
 });
diff --git a/test/TestData.js b/test/TestData.js
index a3343722f..a994b82b4 100644
--- a/test/TestData.js
+++ b/test/TestData.js
@@ -1436,10 +1436,12 @@ const TestData = {
         unit: "mbar l/s/cm^2",
         valueSI: 100000,
         unitSI: "kg / s^3",
+        human_name: "Pressure SI",
       },
       with_number: {
         value: 111,
         unit: "",
+        human_name: "Sample Number",
       },
       with_string: {
         value: "222",
@@ -1458,17 +1460,18 @@ const TestData = {
     owner: faker.internet.username(),
     size: faker.number.int({ min: 0, max: 100000000 }),
     contactEmail: faker.internet.email(),
+    principalInvestigator: faker.internet.username(),
     scientificMetadata: {
       with_key_value: "some text",
       with_unit_and_value_si: {
         value: 100,
         unit: "mbar l/s/cm^2",
-        valueSI: 100000,
-        unitSI: "kg / s^3",
+        human_name: "Pressure SI",
       },
       with_number: {
         value: 111,
         unit: "",
+        human_name: "Sample Number",
       },
       with_string: {
         value: "222",

From 7daa0c19409e6be61b44a2056ce907dad455f188 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 21:24:49 +0200
Subject: [PATCH 03/29] trim migration script comment and fix failing tests

---
 ...aset-scientificMetadata-to-metadatakeys.md |  4 ++--
 ...aset-scientificMetadata-to-metadatakeys.js | 23 -------------------
 src/datasets/datasets.service.spec.ts         | 17 ++++++++------
 src/metadata-keys/metadatakeys.service.ts     |  1 +
 test/MetadataKeys.js                          |  7 ++++--
 test/TestData.js                              |  1 -
 6 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
index b966d6388..7ebd87bde 100644
--- a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
+++ b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
@@ -58,9 +58,9 @@ Each `MetadataKey` document tracks:
 
 ---
 
-## Pipeline walkthrough
+## Migration Pipeline walkthrough
 
-The pipeline uses two datasets as a running example throughout:
+The migration pipeline uses two datasets as a running example throughout:
 
 |           | ownerGroup | accessGroups | isPublished | scientificMetadata keys |
 | --------- | ---------- | ------------ | ----------- | ----------------------- |
diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index 904b48886..a63e55002 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -2,19 +2,15 @@ const SOURCE_COLLECTIONS = ["Dataset"];
 
 function buildPipeline(sourceType) {
   return [
-    // -------------------------------------------------------------------------
     // Stage 1: Only process documents that have scientificMetadata
-    // -------------------------------------------------------------------------
     {
       $match: {
         scientificMetadata: { $exists: true, $type: "object" },
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 2: Flatten scientificMetadata into an array of key/value pairs.
     // Preserve _id as datasetId so we can count unique datasets later.
-    // -------------------------------------------------------------------------
     {
       $project: {
         datasetId: "$_id",
@@ -25,15 +21,11 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 3: One document per (dataset, metadata key)
-    // -------------------------------------------------------------------------
     { $unwind: "$metaArr" },
 
-    // -------------------------------------------------------------------------
     // Stage 4: Shape each (dataset, key) document.
     // userGroups is the union of ownerGroup + accessGroups for this dataset.
-    // -------------------------------------------------------------------------
     {
       $project: {
         datasetId: 1,
@@ -46,12 +38,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 5: One document per (dataset, key, group).
-    // This is the pivot that makes per-group counting possible.
-    // preserveNullAndEmptyArrays keeps datasets with no groups so
-    // usageCount stays accurate even for group-less datasets.
-    // -------------------------------------------------------------------------
     {
       $unwind: {
         path: "$userGroups",
@@ -59,21 +46,17 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 6: Filter out the empty-string sentinel that comes from ownerGroup
     // being null/missing. Real group names are never empty strings.
-    // -------------------------------------------------------------------------
     {
       $match: {
         userGroups: { $nin: [null, ""] },
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 7: Group by (metaKeyId, group).
     // groupCount = how many datasets with this group use this key.
     // datasetIds = set of distinct dataset IDs (for accurate usageCount).
-    // -------------------------------------------------------------------------
     {
       $group: {
         _id: {
@@ -90,12 +73,10 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 8: Group by metaKeyId to reassemble one document per metadata key.
     // userGroupCountsArr will become the userGroupCounts Map.
     // datasetIdSets is a list of per-group dataset ID sets — merged in the
     // next stage to compute total unique dataset count (usageCount).
-    // -------------------------------------------------------------------------
     {
       $group: {
         _id: "$_id.metaKeyId",
@@ -110,11 +91,9 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 9: Final projection.
     // userGroupCounts: [{k,v}] array → plain object (stored as Map in Mongoose).
     // usageCount: union all per-group datasetId sets, count distinct IDs.
-    // -------------------------------------------------------------------------
     {
       $project: {
         _id: 1,
@@ -138,12 +117,10 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // -------------------------------------------------------------------------
     // Stage 10: Merge into MetadataKeys.
     // whenMatched handles the (future) case where multiple SOURCE_COLLECTIONS
     // produce the same _id. Not possible today since sourceType is part of
     // the _id, but kept correct for when more collections are added.
-    // -------------------------------------------------------------------------
     {
       $merge: {
         into: "MetadataKeys",
diff --git a/src/datasets/datasets.service.spec.ts b/src/datasets/datasets.service.spec.ts
index d5cfef7ab..9370f4580 100644
--- a/src/datasets/datasets.service.spec.ts
+++ b/src/datasets/datasets.service.spec.ts
@@ -96,6 +96,15 @@ const mockDataset: DatasetClass = {
   dataQualityMetrics: 1,
 };
 
+const mockDatasetModel = function (data: DatasetClass) {
+  return {
+    ...data,
+    save: jest.fn().mockResolvedValue(data),
+    toObject: jest.fn().mockReturnValue(data),
+  };
+};
+mockDatasetModel.collection = { name: "Dataset" };
+
 describe("DatasetsService", () => {
   let service: DatasetsService;
   // eslint-disable-next-line @typescript-eslint/no-unused-vars
@@ -107,13 +116,7 @@ describe("DatasetsService", () => {
         ConfigService,
         {
           provide: getModelToken("DatasetClass"),
-          useValue: function (data: DatasetClass) {
-            return {
-              ...data,
-              save: jest.fn().mockResolvedValue(data),
-              toObject: jest.fn().mockReturnValue(data),
-            };
-          },
+          useValue: mockDatasetModel,
         },
         DatasetsService,
         DatasetsAccessService,
diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index 0a2b84a63..f7289a613 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -119,6 +119,7 @@ export class MetadataKeysService {
             $setOnInsert: addCreatedByFields(
               {
                 _id: id as unknown as MetadataKeyDocument["_id"],
+                id,
                 key,
                 sourceType,
                 humanReadableName,
diff --git a/test/MetadataKeys.js b/test/MetadataKeys.js
index 08b6f1e2a..e8e7ef0f7 100644
--- a/test/MetadataKeys.js
+++ b/test/MetadataKeys.js
@@ -60,7 +60,7 @@ async function getMetadataKeys(filter, token) {
 
 async function createDataset(dataset) {
   const res = await request(appUrl)
-    .post("/api/v3/Datasets")
+    .post("/api/v4/datasets")
     .send(dataset)
     .set("Accept", "application/json")
     .set({ Authorization: `Bearer ${accessTokenAdmin}` })
@@ -79,7 +79,7 @@ async function patchDataset(pid, payload) {
 
 async function deleteDataset(pid) {
   return request(appUrl)
-    .delete("/api/v3/Datasets/" + encodeURIComponent(pid))
+    .delete("/api/v4/datasets/" + encodeURIComponent(pid))
     .set("Accept", "application/json")
     .set({ Authorization: `Bearer ${accessTokenArchiveManager}` })
     .expect(TestData.SuccessfulDeleteStatusCode);
@@ -87,6 +87,9 @@ async function deleteDataset(pid) {
 
 describe("2000: MetadataKeys: Access Control and Search", () => {
   before(async () => {
+    await db.collection("Dataset").deleteMany({});
+    await db.collection("MetadataKeys").deleteMany({});
+
     accessTokenAdmin = await utils.getToken(appUrl, {
       username: "adminIngestor",
       password: TestData.Accounts["adminIngestor"]["password"],
diff --git a/test/TestData.js b/test/TestData.js
index a994b82b4..1df4f5404 100644
--- a/test/TestData.js
+++ b/test/TestData.js
@@ -1460,7 +1460,6 @@ const TestData = {
     owner: faker.internet.username(),
     size: faker.number.int({ min: 0, max: 100000000 }),
     contactEmail: faker.internet.email(),
-    principalInvestigator: faker.internet.username(),
     scientificMetadata: {
       with_key_value: "some text",
       with_unit_and_value_si: {

From af3fef79c628e20408fd64d2e58617f1efea64ec Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 21:37:09 +0200
Subject: [PATCH 04/29] fix typo and api test

---
 ...5401-sync-dataset-scientificMetadata-to-metadatakeys.md | 2 +-
 test/DatasetV4.js                                          | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
index 7ebd87bde..710a24744 100644
--- a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
+++ b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
@@ -140,7 +140,7 @@ Extract `key`, `humanReadableName`, and compute `userGroups` as the union of `ow
 { datasetId: "uuid-B", key: "temperature", isPublished: false, humanReadableName: "Temperature", userGroups: ["group-1"] }
 ```
 
-> `ownerGroup` is mandatory field so no null fallback is needed. `accessGroups` is optional so it falls back to `[]`.
+> `ownerGroup` is a mandatory field, so no null fallback is needed. `accessGroups` is optional so it falls back to `[]`.
 
 ---
 
diff --git a/test/DatasetV4.js b/test/DatasetV4.js
index 6755a54cb..de1316b42 100644
--- a/test/DatasetV4.js
+++ b/test/DatasetV4.js
@@ -1331,6 +1331,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "mg",
             valueSI: 0.0006,
             unitSI: "kg",
+            human_name: "Pressure SI",
           });
           res.body.scientificMetadata.with_number.should.deep.eq({
             value: 111,
@@ -1348,6 +1349,7 @@ describe("2500: Datasets v4 tests", () => {
           with_unit_and_value_si: {
             value: -2,
             unit: "km",
+            human_name: "new human name",
           },
         },
       };
@@ -1369,6 +1371,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "km",
             valueSI: -2000,
             unitSI: "m",
+            human_name: "new human name",
           });
         });
     });
@@ -1381,6 +1384,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "cm",
             valueSI: null,
             unitSI: null,
+            human_name: "new human name",
           },
           with_number: null,
         },
@@ -1407,6 +1411,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "cm",
             valueSI: -0.02,
             unitSI: "m",
+            human_name: "new human name",
           });
           res.body.scientificMetadata.should.not.have.property("with_number");
         });
@@ -1590,6 +1595,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "cm",
             valueSI: 555,
             unitSI: "cmcm",
+            human_name: "new human name",
           },
         },
       };
@@ -1610,6 +1616,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "cm",
             valueSI: 0.22,
             unitSI: "m",
+            human_name: "new human name",
           });
         });
     });

From 8320494449e9de3c4c1c033474127bb03e3ce5db Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 21:59:04 +0200
Subject: [PATCH 05/29] set ownerGroup as  required with nonEmpty value

---
 src/common/dto/ownable.dto.ts        | 3 ++-
 src/common/schemas/ownable.schema.ts | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/common/dto/ownable.dto.ts b/src/common/dto/ownable.dto.ts
index b3a09e95f..c585e6072 100644
--- a/src/common/dto/ownable.dto.ts
+++ b/src/common/dto/ownable.dto.ts
@@ -1,5 +1,5 @@
 import { ApiProperty } from "@nestjs/swagger";
-import { IsOptional, IsString } from "class-validator";
+import { IsNotEmpty, IsOptional, IsString } from "class-validator";
 
 export class OwnableDto {
   @ApiProperty({
@@ -8,6 +8,7 @@ export class OwnableDto {
     description: "Name of the group owning this item.",
   })
   @IsString()
+  @IsNotEmpty()
   readonly ownerGroup: string;
 
   @ApiProperty({
diff --git a/src/common/schemas/ownable.schema.ts b/src/common/schemas/ownable.schema.ts
index 137c9b5f0..f12254b28 100644
--- a/src/common/schemas/ownable.schema.ts
+++ b/src/common/schemas/ownable.schema.ts
@@ -11,6 +11,7 @@ export class OwnableClass extends QueryableClass {
   @Prop({
     type: String,
     index: true,
+    required: true,
   })
   ownerGroup: string;
 

From 386ccf0ed11a9458c5c84845716b39023cde19e1 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 21:59:19 +0200
Subject: [PATCH 06/29] update doc

---
 ...145401-sync-dataset-scientificMetadata-to-metadatakeys.js | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index a63e55002..c3822810b 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -46,8 +46,9 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 6: Filter out the empty-string sentinel that comes from ownerGroup
-    // being null/missing. Real group names are never empty strings.
+    // Stage 6: Filter out null and empty-string groups.
+    // Note: datasets with no valid groups are excluded from usageCount.
+    // This is acceptable since ownerGroup is a required field in practice.
     {
       $match: {
         userGroups: { $nin: [null, ""] },

From 34e0a24df583dd95dfbad0854d42437ce4ec623e Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 21:59:26 +0200
Subject: [PATCH 07/29] fix api test

---
 test/DatasetV4.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/DatasetV4.js b/test/DatasetV4.js
index de1316b42..d373c5e73 100644
--- a/test/DatasetV4.js
+++ b/test/DatasetV4.js
@@ -1331,7 +1331,7 @@ describe("2500: Datasets v4 tests", () => {
             unit: "mg",
             valueSI: 0.0006,
             unitSI: "kg",
-            human_name: "Pressure SI",
+            human_name: "Sample Number",
           });
           res.body.scientificMetadata.with_number.should.deep.eq({
             value: 111,

From 74851db68970af8b8ce8163f36c19adecaf41bf9 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 20 Apr 2026 22:27:49 +0200
Subject: [PATCH 08/29] fix api test

---
 src/common/schemas/ownable.schema.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/common/schemas/ownable.schema.ts b/src/common/schemas/ownable.schema.ts
index f12254b28..137c9b5f0 100644
--- a/src/common/schemas/ownable.schema.ts
+++ b/src/common/schemas/ownable.schema.ts
@@ -11,7 +11,6 @@ export class OwnableClass extends QueryableClass {
   @Prop({
     type: String,
     index: true,
-    required: true,
   })
   ownerGroup: string;
 

From e66ebacfe433e3b691048bad539bfbfc41dbfd81 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Tue, 21 Apr 2026 09:18:17 +0200
Subject: [PATCH 09/29] fix api test

---
 test/DatasetV4.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/DatasetV4.js b/test/DatasetV4.js
index d373c5e73..0ed7b694c 100644
--- a/test/DatasetV4.js
+++ b/test/DatasetV4.js
@@ -1309,6 +1309,7 @@ describe("2500: Datasets v4 tests", () => {
           with_unit_and_value_si: {
             value: 600,
             unit: "mg",
+            human_name: "Sample Number",
           },
         },
       };

From 125c50714e33e7ad1d2b33dbf48541397a57e16f Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Tue, 21 Apr 2026 09:49:42 +0200
Subject: [PATCH 10/29] fix api test

---
 test/DatasetV4.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/DatasetV4.js b/test/DatasetV4.js
index 0ed7b694c..bf8f6b57a 100644
--- a/test/DatasetV4.js
+++ b/test/DatasetV4.js
@@ -1309,7 +1309,6 @@ describe("2500: Datasets v4 tests", () => {
           with_unit_and_value_si: {
             value: 600,
             unit: "mg",
-            human_name: "Sample Number",
           },
         },
       };
@@ -1332,11 +1331,12 @@ describe("2500: Datasets v4 tests", () => {
             unit: "mg",
             valueSI: 0.0006,
             unitSI: "kg",
-            human_name: "Sample Number",
+            human_name: "Pressure SI",
           });
           res.body.scientificMetadata.with_number.should.deep.eq({
             value: 111,
             unit: "",
+            human_name: "Sample Number",
           });
           res.body.datasetlifecycle.should.have
             .property("storageLocation")

From 9a41f76fbd9470d64848a5bbac1717f646cf49f7 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Thu, 23 Apr 2026 16:07:10 +0200
Subject: [PATCH 11/29] address sourcer comment

Co-authored-by: Copilot <copilot@github.com>
---
 src/metadata-keys/metadatakeys.service.ts | 39 +++++++++++++++--------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index f7289a613..26844b547 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -278,12 +278,21 @@ export class MetadataKeysService {
     newDoc: MetadataSourceDoc,
   ): Promise<void> {
     const { sourceType } = newDoc;
-    const ids = sharedKeys.map((k) => {
-      const humanReadableName =
+    const humanNameChangedKeys: string[] = [];
+    const humanNameUnchangedKeys: string[] = [];
+
+    for (const k of sharedKeys) {
+      const oldHumanName =
+        (oldDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
+      const newHumanName =
         (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-      return this.buildId(sourceType, k, humanReadableName);
-    });
-    const filter = { _id: { $in: ids } };
+
+      if (oldHumanName === newHumanName) {
+        humanNameUnchangedKeys.push(k);
+      } else {
+        humanNameChangedKeys.push(k);
+      }
+    }
 
     const addedGroups = newDoc.userGroups.filter(
       (g) => !oldDoc.userGroups.includes(g),
@@ -294,7 +303,17 @@ export class MetadataKeysService {
     const publishedFlippedOn = !oldDoc.isPublished && newDoc.isPublished;
     const hasGroupChanges = addedGroups.length > 0 || removedGroups.length > 0;
 
-    if (hasGroupChanges || publishedFlippedOn) {
+    if (
+      humanNameUnchangedKeys.length > 0 &&
+      (hasGroupChanges || publishedFlippedOn)
+    ) {
+      const ids = humanNameUnchangedKeys.map((k) => {
+        const humanReadableName =
+          (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
+        return this.buildId(sourceType, k, humanReadableName);
+      });
+      const filter = { _id: { $in: ids } };
+
       await this.metadataKeyModel.updateMany(filter, {
         ...(addedGroups.length > 0 && {
           $addToSet: { userGroups: { $each: addedGroups } },
@@ -318,14 +337,6 @@ export class MetadataKeysService {
 
     // humanReadableName is part of _id, so a change means a different document.
     // Treat it as delete the old entry + insert the new one.
-    const humanNameChangedKeys = sharedKeys.filter((k) => {
-      const oldName =
-        (oldDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-      const newName =
-        (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-      return oldName !== newName;
-    });
-
     if (humanNameChangedKeys.length > 0) {
       await Promise.all([
         this.deleteMany({

From 22e8a2f8247152c44467c8b0232fa88897a3070b Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Tue, 28 Apr 2026 10:49:31 +0200
Subject: [PATCH 12/29] remove unncessary request import

---
 test/AttachmentV4.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/AttachmentV4.js b/test/AttachmentV4.js
index 99cc09106..ec9fb73f6 100644
--- a/test/AttachmentV4.js
+++ b/test/AttachmentV4.js
@@ -2,7 +2,6 @@
 const utils = require("./LoginUtils");
 const { TestData } = require("./TestData");
 const { v4: uuidv4 } = require("uuid");
-const request = require("supertest");
 
 let accessTokenAdminIngestor = null,
   accessTokenUser1 = null,

From 352437df7e54c8aa490ad18ba101075728507621 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Tue, 28 Apr 2026 14:26:39 +0200
Subject: [PATCH 13/29] add batch for scientificMetadata sync migration

---
 ...aset-scientificMetadata-to-metadatakeys.md | 29 +++-----
 ...aset-scientificMetadata-to-metadatakeys.js | 72 +++++++++++--------
 2 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
index 710a24744..ffe4215fc 100644
--- a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
+++ b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
@@ -69,18 +69,7 @@ The migration pipeline uses two datasets as a running example throughout:
 
 ---
 
-### Stage 1 — Filter
-
-Keep only documents where `scientificMetadata` exists and is an object.
-
-```
-Input:  all Dataset documents
-Output: documents where scientificMetadata exists
-```
-
----
-
-### Stage 2 — Flatten scientificMetadata
+### Stage 1 — Flatten scientificMetadata
 
 Expose `_id` as `datasetId` and convert `scientificMetadata` from an object to an array of `{k, v}` pairs using `$objectToArray`.
 
@@ -112,7 +101,7 @@ Expose `_id` as `datasetId` and convert `scientificMetadata` from an object to a
 
 ---
 
-### Stage 3 — Unwind metaArr
+### Stage 2 — Unwind metaArr
 
 One document per `(dataset, metadata key)`.
 
@@ -126,7 +115,7 @@ Output: 2 documents
 
 ---
 
-### Stage 4 — Shape each (dataset, key) document
+### Stage 3 — Shape each (dataset, key) document
 
 Extract `key`, `humanReadableName`, and compute `userGroups` as the union of `ownerGroup` and `accessGroups`.
 
@@ -144,7 +133,7 @@ Extract `key`, `humanReadableName`, and compute `userGroups` as the union of `ow
 
 ---
 
-### Stage 5 — Unwind userGroups
+### Stage 4 — Unwind userGroups
 
 One document per `(dataset, key, group)`. This is the pivot that makes per-group counting possible.
 
@@ -164,7 +153,7 @@ Output
 
 ---
 
-### Stage 6 — Filter null userGroups
+### Stage 5 — Filter null userGroups
 
 Drop documents where `userGroups` is `null`. This only occurs when `preserveNullAndEmptyArrays` retains a document from a dataset that had no groups at all.
 
@@ -175,7 +164,7 @@ Output: only documents where userGroups is a real group name
 
 ---
 
-### Stage 7 — Group by (metaKeyId, group)
+### Stage 6 — Group by (metaKeyId, group)
 
 Each bucket is one unique `(key, humanReadableName, group)` combination.
 `groupCount` = how many datasets with this group reference this key.
@@ -199,7 +188,7 @@ Output (4 buckets — one per unique metaKeyId + group)
 
 ---
 
-### Stage 8 — Group by metaKeyId
+### Stage 7 — Group by metaKeyId
 
 Reassemble one document per metadata key by collecting the per-group buckets from stage 7.
 
@@ -229,7 +218,7 @@ Output (2 documents — one per unique key)
 
 ---
 
-### Stage 9 — Final projection
+### Stage 8 — Final projection
 
 - `userGroupCounts`: converts `[{k, v}]` array to a plain object (Mongoose stores this as a `Map`)
 - `usageCount`: unions all per-group `datasetId` sets and counts distinct IDs
@@ -259,7 +248,7 @@ Output (2 documents — one per unique key)
 
 ---
 
-### Stage 10 — Merge into MetadataKeys
+### Stage 9 — Merge into MetadataKeys
 
 - `whenNotMatched: insert` — new documents are inserted as-is
 - `whenMatched` — additively merges counts when two `SOURCE_COLLECTIONS` produce the same `_id`
diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index c3822810b..799c6b031 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -2,14 +2,7 @@ const SOURCE_COLLECTIONS = ["Dataset"];
 
 function buildPipeline(sourceType) {
   return [
-    // Stage 1: Only process documents that have scientificMetadata
-    {
-      $match: {
-        scientificMetadata: { $exists: true, $type: "object" },
-      },
-    },
-
-    // Stage 2: Flatten scientificMetadata into an array of key/value pairs.
+    // Stage 1: Flatten scientificMetadata into an array of key/value pairs.
     // Preserve _id as datasetId so we can count unique datasets later.
     {
       $project: {
@@ -21,10 +14,10 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 3: One document per (dataset, metadata key)
+    // Stage 2: One document per (dataset, metadata key)
     { $unwind: "$metaArr" },
 
-    // Stage 4: Shape each (dataset, key) document.
+    // Stage 3: Shape each (dataset, key) document.
     // userGroups is the union of ownerGroup + accessGroups for this dataset.
     {
       $project: {
@@ -38,7 +31,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 5: One document per (dataset, key, group).
+    // Stage 4: One document per (dataset, key, group).
     {
       $unwind: {
         path: "$userGroups",
@@ -46,7 +39,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 6: Filter out null and empty-string groups.
+    // Stage 5: Filter out null and empty-string groups.
     // Note: datasets with no valid groups are excluded from usageCount.
     // This is acceptable since ownerGroup is a required field in practice.
     {
@@ -55,7 +48,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 7: Group by (metaKeyId, group).
+    // Stage 6: Group by (metaKeyId, group).
     // groupCount = how many datasets with this group use this key.
     // datasetIds = set of distinct dataset IDs (for accurate usageCount).
     {
@@ -74,7 +67,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 8: Group by metaKeyId to reassemble one document per metadata key.
+    // Stage 7: Group by metaKeyId to reassemble one document per metadata key.
     // userGroupCountsArr will become the userGroupCounts Map.
     // datasetIdSets is a list of per-group dataset ID sets — merged in the
     // next stage to compute total unique dataset count (usageCount).
@@ -92,7 +85,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 9: Final projection.
+    // Stage 8: Final projection.
     // userGroupCounts: [{k,v}] array → plain object (stored as Map in Mongoose).
     // usageCount: union all per-group datasetId sets, count distinct IDs.
     {
@@ -220,6 +213,7 @@ function buildPipeline(sourceType) {
 
 module.exports = {
   async up(db) {
+    const BATCH_SIZE = 10000;
     const start = Date.now();
     const elapsed = () => `${((Date.now() - start) / 1000).toFixed(1)}s`;
 
@@ -245,24 +239,44 @@ module.exports = {
         `[${elapsed()}] Processing ${total.toLocaleString()} documents from ${collection}...`,
       );
 
-      const timer = setInterval(
-        () =>
-          console.log(
-            `[${elapsed()}] ${collection} migration still running...`,
-          ),
-        5000,
-      );
+      let lastId = null;
+      let processed = 0;
+
+      while (true) {
+        const match = {
+          scientificMetadata: { $exists: true, $type: "object" },
+          ...(lastId && { _id: { $gt: lastId } }),
+        };
+
+        const batch = await db
+          .collection(collection)
+          .find(match)
+          .sort({ _id: 1 })
+          .limit(BATCH_SIZE)
+          .project({ _id: 1 })
+          .toArray();
+
+        if (batch.length === 0) break;
+
+        const batchIds = batch.map((d) => d._id);
 
-      try {
         await db
           .collection(collection)
-          .aggregate(buildPipeline(collection), {
-            allowDiskUse: true,
-            maxTimeMS: 0,
-          })
+          .aggregate(
+            [
+              { $match: { _id: { $in: batchIds } } },
+              ...buildPipeline(collection).slice(1),
+            ],
+            { allowDiskUse: true, maxTimeMS: 0 },
+          )
           .toArray();
-      } finally {
-        clearInterval(timer);
+
+        lastId = batch[batch.length - 1]._id;
+        processed += batch.length;
+
+        console.log(
+          `[${elapsed()}] ${collection}: ${processed.toLocaleString()}/${total.toLocaleString()}`,
+        );
       }
 
       console.log(`[${elapsed()}] ✅ ${collection} done`);

From 40a0c6d363f4cf23b9ac36b2868b7a71d1fe1984 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Tue, 28 Apr 2026 14:27:07 +0200
Subject: [PATCH 14/29] address comment

---
 src/metadata-keys/metadatakeys.service.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index 26844b547..1b6e097c8 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -196,8 +196,8 @@ export class MetadataKeysService {
    * Called when a dataset is updated.
    *
    * Diffs the old and new metadata to produce three disjoint key sets:
-   *   - added:   only in newDoc → insertManyFromSource
-   *   - removed: only in oldDoc → deleteMany
+   *   - added:   only in newDoc(after change) → insertManyFromSource
+   *   - removed: only in oldDoc(before change) → deleteMany
    *   - shared:  in both       → updateSharedKeys (handles group / isPublished
    *                              / humanReadableName changes)
    *

From b5446eddbef2cd5602c1a00cd01cea7cca7794f1 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Tue, 28 Apr 2026 15:20:00 +0200
Subject: [PATCH 15/29] minor fix

---
 ...420145401-sync-dataset-scientificMetadata-to-metadatakeys.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index 799c6b031..bd3a92ede 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -111,7 +111,7 @@ function buildPipeline(sourceType) {
       },
     },
 
-    // Stage 10: Merge into MetadataKeys.
+    // Stage 9: Merge into MetadataKeys.
     // whenMatched handles the (future) case where multiple SOURCE_COLLECTIONS
     // produce the same _id. Not possible today since sourceType is part of
     // the _id, but kept correct for when more collections are added.

From d51d0699737f88116d52208334f90d42dc261004 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Wed, 29 Apr 2026 11:13:54 +0200
Subject: [PATCH 16/29] Revert "set ownerGroup as  required with nonEmpty
 value"

This reverts commit 8320494449e9de3c4c1c033474127bb03e3ce5db.
---
 src/common/dto/ownable.dto.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/common/dto/ownable.dto.ts b/src/common/dto/ownable.dto.ts
index c585e6072..b3a09e95f 100644
--- a/src/common/dto/ownable.dto.ts
+++ b/src/common/dto/ownable.dto.ts
@@ -1,5 +1,5 @@
 import { ApiProperty } from "@nestjs/swagger";
-import { IsNotEmpty, IsOptional, IsString } from "class-validator";
+import { IsOptional, IsString } from "class-validator";
 
 export class OwnableDto {
   @ApiProperty({
@@ -8,7 +8,6 @@ export class OwnableDto {
     description: "Name of the group owning this item.",
   })
   @IsString()
-  @IsNotEmpty()
   readonly ownerGroup: string;
 
   @ApiProperty({

From ff1a457204f659ba1cb6df46c89e973d5ce4c28b Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Wed, 29 Apr 2026 14:55:54 +0200
Subject: [PATCH 17/29] revert existingDataset in findByIdAndUpdate

---
 src/datasets/datasets.service.ts | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index 53443083f..876c9c6d7 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -520,6 +520,13 @@ export class DatasetsService {
       | PartialUpdateDatasetWithHistoryDto,
     unmodifiedSince?: Date,
   ): Promise<DatasetDocument | null> {
+    const existingDataset = await this.datasetModel.findOne({ pid: id }).exec();
+    // check if we were able to find the dataset
+    if (!existingDataset) {
+      // no luck. we need to create a new dataset
+      throw new NotFoundException(`Dataset #${id} not found`);
+    }
+
     const username = (this.request.user as JWTUser).username;
 
     // NOTE: When doing findByIdAndUpdate in mongoose it does reset the subdocuments to default values if no value is provided

From 43ea1142f5c0ce2f678497fc80cb4597cee8268c Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Wed, 29 Apr 2026 14:56:36 +0200
Subject: [PATCH 18/29] remove unnecessary comment

---
 src/datasets/datasets.service.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index 876c9c6d7..4748d3140 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -521,9 +521,7 @@ export class DatasetsService {
     unmodifiedSince?: Date,
   ): Promise<DatasetDocument | null> {
     const existingDataset = await this.datasetModel.findOne({ pid: id }).exec();
-    // check if we were able to find the dataset
     if (!existingDataset) {
-      // no luck. we need to create a new dataset
       throw new NotFoundException(`Dataset #${id} not found`);
     }
 

From 921d11b840061053c9ffa5de96233348e512d696 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Thu, 30 Apr 2026 10:34:21 +0200
Subject: [PATCH 19/29] add back existingDataset & modify unit test accordingly

---
 src/datasets/datasets.service.spec.ts | 9 ++++++---
 src/datasets/datasets.service.ts      | 6 +++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/datasets/datasets.service.spec.ts b/src/datasets/datasets.service.spec.ts
index 745b77545..c8d087a53 100644
--- a/src/datasets/datasets.service.spec.ts
+++ b/src/datasets/datasets.service.spec.ts
@@ -178,11 +178,11 @@ describe("DatasetsService", () => {
     ).toBe("Already Encoded");
   });
 
-  it("should throw NotFoundException if no document is found for update and no unmodifiedSince is provided", async () => {
+  it("should throw NotFoundException if no document is found", async () => {
     const updateDto = { datasetName: "Updated Name" };
-    model.findOneAndUpdate = jest
+    model.findOne = jest
       .fn()
-      .mockReturnValue({ exec: jest.fn().mockReturnValue(null) });
+      .mockReturnValue({ exec: jest.fn().mockResolvedValue(null) });
     await expect(
       service.findByIdAndUpdate("testId", updateDto),
     ).rejects.toThrow(NotFoundException);
@@ -191,6 +191,9 @@ describe("DatasetsService", () => {
   it("should throw PreconditionedFailed if no patched dataset is returned (indicating a concurrent modification)", async () => {
     const updateDto = { datasetName: "Updated Name" };
     const unmodifiedSince = new Date("2021-11-11T12:29:02.083Z");
+    model.findOne = jest
+      .fn()
+      .mockReturnValue({ exec: jest.fn().mockResolvedValue(mockDataset) });
     model.findOneAndUpdate = jest
       .fn()
       .mockReturnValue({ exec: jest.fn().mockReturnValue(null) });
diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index 4748d3140..1603eac94 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -520,13 +520,13 @@ export class DatasetsService {
       | PartialUpdateDatasetWithHistoryDto,
     unmodifiedSince?: Date,
   ): Promise<DatasetDocument | null> {
+    const username = (this.request.user as JWTUser).username;
+
     const existingDataset = await this.datasetModel.findOne({ pid: id }).exec();
     if (!existingDataset) {
       throw new NotFoundException(`Dataset #${id} not found`);
     }
 
-    const username = (this.request.user as JWTUser).username;
-
     // NOTE: When doing findByIdAndUpdate in mongoose it does reset the subdocuments to default values if no value is provided
     // https://stackoverflow.com/questions/57324321/mongoose-overwriting-data-in-mongodb-with-default-values-in-subdocuments
     let queryFilter: FilterQuery<DatasetDocument> = { pid: id };
@@ -545,7 +545,7 @@ export class DatasetsService {
     // check if we were able to find the dataset (matching the precondition, if supplied) and update it
     if (!patchedDataset) {
       if (!unmodifiedSince) {
-        throw new NotFoundException(`Dataset #${id} not found`);
+        throw new NotFoundException(`Dataset #${id} failed to update.`);
       }
       throw new PreconditionFailedException(
         `Dataset #${id} has been modified on the server since ${unmodifiedSince.toUTCString()}.`,

From d90c3cdeadb4d48cdc4e0aecf7fc4ab4d56d75f4 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Thu, 30 Apr 2026 14:29:18 +0200
Subject: [PATCH 20/29] fix migration pipe

---
 ...420145401-sync-dataset-scientificMetadata-to-metadatakeys.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index bd3a92ede..b41877970 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -265,7 +265,7 @@ module.exports = {
           .aggregate(
             [
               { $match: { _id: { $in: batchIds } } },
-              ...buildPipeline(collection).slice(1),
+              ...buildPipeline(collection),
             ],
             { allowDiskUse: true, maxTimeMS: 0 },
           )

From 1aee8bcfe28b423ec80aa74e8b1e38d0ee8ddca1 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Thu, 30 Apr 2026 15:35:22 +0200
Subject: [PATCH 21/29] include MAX_USER_GROUPS_PER_METADATA_KEY logic for
 metdataKeys migration script

---
 ...aset-scientificMetadata-to-metadatakeys.md |  11 ++
 ...aset-scientificMetadata-to-metadatakeys.js | 119 +++++++++++-------
 2 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
index ffe4215fc..c753c9cc1 100644
--- a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
+++ b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
@@ -255,6 +255,17 @@ Output (2 documents — one per unique key)
 
 > This is not triggered today since `sourceType` is part of `_id`, making collisions between collections impossible. It is kept correct for when `Proposal`, `Sample`, or other collections are added as sources.
 
+**Group cap behaviour**
+
+When merging, if the existing document's `usageCount` exceeds `MAX_USER_GROUPS_PER_METADATA_KEY`:
+
+- `userGroups` is frozen — no new groups are added
+- `userGroupCounts` is frozen — no new counts are merged
+- `isPublished` is forced to `true` — key remains publicly discoverable
+- `usageCount` continues to increment regardless
+
+This prevents unbounded document growth while ensuring heavily-used keys remain visible to all users via `isPublished: true`.
+
 ```js
 // whenMatched userGroupCounts example
 // existing: { "group-1": 3, "group-2": 1 }
diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index b41877970..e6ed37bbf 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -1,4 +1,6 @@
 const SOURCE_COLLECTIONS = ["Dataset"];
+const BATCH_SIZE = 10000;
+const MAX_USER_GROUPS_PER_METADATA_KEY = 1000;
 
 function buildPipeline(sourceType) {
   return [
@@ -131,73 +133,95 @@ function buildPipeline(sourceType) {
                   updatedBy: { $literal: "migration" },
                   updatedAt: { $toDate: "$$NOW" },
 
-                  // Merge group arrays
+                  isPublished: {
+                    $cond: {
+                      if: {
+                        $gt: ["$usageCount", MAX_USER_GROUPS_PER_METADATA_KEY],
+                      },
+                      then: true,
+                      else: { $or: ["$isPublished", "$$new.isPublished"] },
+                    },
+                  },
+
                   userGroups: {
-                    $setUnion: ["$userGroups", "$$new.userGroups"],
+                    $cond: {
+                      if: {
+                        $gt: ["$usageCount", MAX_USER_GROUPS_PER_METADATA_KEY],
+                      },
+                      then: "$userGroups",
+                      else: { $setUnion: ["$userGroups", "$$new.userGroups"] },
+                    },
                   },
 
                   // Additively merge userGroupCounts — sum counts per group
                   // across both the existing and incoming documents.
                   userGroupCounts: {
-                    $arrayToObject: {
-                      $map: {
-                        input: {
-                          $setUnion: [
-                            {
-                              $map: {
-                                input: { $objectToArray: "$userGroupCounts" },
-                                as: "e",
-                                in: "$$e.k",
-                              },
-                            },
-                            {
-                              $map: {
-                                input: {
-                                  $objectToArray: "$$new.userGroupCounts",
+                    $cond: {
+                      if: {
+                        $gt: ["$usageCount", MAX_USER_GROUPS_PER_METADATA_KEY],
+                      },
+                      then: "$userGroupCounts",
+                      else: {
+                        $arrayToObject: {
+                          $map: {
+                            input: {
+                              $setUnion: [
+                                {
+                                  $map: {
+                                    input: {
+                                      $objectToArray: "$userGroupCounts",
+                                    },
+                                    as: "e",
+                                    in: "$$e.k",
+                                  },
                                 },
-                                as: "e",
-                                in: "$$e.k",
-                              },
+                                {
+                                  $map: {
+                                    input: {
+                                      $objectToArray: "$$new.userGroupCounts",
+                                    },
+                                    as: "e",
+                                    in: "$$e.k",
+                                  },
+                                },
+                              ],
                             },
-                          ],
-                        },
-                        as: "group",
-                        in: {
-                          k: "$$group",
-                          v: {
-                            $add: [
-                              {
-                                $ifNull: [
+                            as: "group",
+                            in: {
+                              k: "$$group",
+                              v: {
+                                $add: [
                                   {
-                                    $getField: {
-                                      field: "$$group",
-                                      input: "$userGroupCounts",
-                                    },
+                                    $ifNull: [
+                                      {
+                                        $getField: {
+                                          field: "$$group",
+                                          input: "$userGroupCounts",
+                                        },
+                                      },
+                                      0,
+                                    ],
                                   },
-                                  0,
-                                ],
-                              },
-                              {
-                                $ifNull: [
                                   {
-                                    $getField: {
-                                      field: "$$group",
-                                      input: "$$new.userGroupCounts",
-                                    },
+                                    $ifNull: [
+                                      {
+                                        $getField: {
+                                          field: "$$group",
+                                          input: "$$new.userGroupCounts",
+                                        },
+                                      },
+                                      0,
+                                    ],
                                   },
-                                  0,
                                 ],
                               },
-                            ],
+                            },
                           },
                         },
                       },
                     },
                   },
 
-                  // Any source being published makes the key published
-                  isPublished: { $or: ["$isPublished", "$$new.isPublished"] },
-
                   // Add incoming dataset count to existing
                   usageCount: { $add: ["$usageCount", "$$new.usageCount"] },
                 },
@@ -213,7 +237,6 @@ function buildPipeline(sourceType) {
 
 module.exports = {
   async up(db) {
-    const BATCH_SIZE = 10000;
     const start = Date.now();
     const elapsed = () => `${((Date.now() - start) / 1000).toFixed(1)}s`;
 

From c6905fa1677923560ad0d28b0f8cf7e488068115 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 4 May 2026 17:05:58 +0200
Subject: [PATCH 22/29] update seed migration script

---
 ...aset-scientificMetadata-to-metadatakeys.js | 144 +++++++++---------
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index e6ed37bbf..712ca4328 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -1,6 +1,5 @@
 const SOURCE_COLLECTIONS = ["Dataset"];
 const BATCH_SIZE = 10000;
-const MAX_USER_GROUPS_PER_METADATA_KEY = 1000;
 
 function buildPipeline(sourceType) {
   return [
@@ -87,12 +86,26 @@ function buildPipeline(sourceType) {
       },
     },
 
+    {
+      $addFields: {
+        metaKeyId: "$_id", // metaKeyId is needed for the $merge stage for deduplication, but will be removed in the final updateMany
+        generatedId: {
+          $function: {
+            body: "function() { return UUID().toString().replace('UUID(\"', '').replace('\")', ''); }",
+            args: [],
+            lang: "js",
+          },
+        },
+      },
+    },
+
     // Stage 8: Final projection.
     // userGroupCounts: [{k,v}] array → plain object (stored as Map in Mongoose).
     // usageCount: union all per-group datasetId sets, count distinct IDs.
     {
       $project: {
-        _id: 1,
+        _id: "$generatedId",
+        metaKeyId: 1,
         key: 1,
         sourceType: { $literal: sourceType },
         humanReadableName: 1,
@@ -120,7 +133,7 @@ function buildPipeline(sourceType) {
     {
       $merge: {
         into: "MetadataKeys",
-        on: "_id",
+        on: "metaKeyId",
         whenMatched: [
           {
             $replaceWith: {
@@ -128,94 +141,71 @@ function buildPipeline(sourceType) {
                 "$$new",
                 {
                   // Preserve original audit fields
+                  _id: "$_id",
                   createdAt: { $ifNull: ["$createdAt", "$$new.createdAt"] },
                   createdBy: { $ifNull: ["$createdBy", "$$new.createdBy"] },
                   updatedBy: { $literal: "migration" },
                   updatedAt: { $toDate: "$$NOW" },
-
-                  isPublished: {
-                    $cond: {
-                      if: {
-                        $gt: ["$usageCount", MAX_USER_GROUPS_PER_METADATA_KEY],
-                      },
-                      then: true,
-                      else: { $or: ["$isPublished", "$$new.isPublished"] },
-                    },
-                  },
-
+                  isPublished: { $or: ["$isPublished", "$$new.isPublished"] },
                   userGroups: {
-                    $cond: {
-                      if: {
-                        $gt: ["$usageCount", MAX_USER_GROUPS_PER_METADATA_KEY],
-                      },
-                      then: "$userGroups",
-                      else: { $setUnion: ["$userGroups", "$$new.userGroups"] },
-                    },
+                    $setUnion: ["$userGroups", "$$new.userGroups"],
                   },
 
                   // Additively merge userGroupCounts — sum counts per group
                   // across both the existing and incoming documents.
                   userGroupCounts: {
-                    $cond: {
-                      if: {
-                        $gt: ["$usageCount", MAX_USER_GROUPS_PER_METADATA_KEY],
-                      },
-                      then: "$userGroupCounts",
-                      else: {
-                        $arrayToObject: {
-                          $map: {
-                            input: {
-                              $setUnion: [
-                                {
-                                  $map: {
-                                    input: {
-                                      $objectToArray: "$userGroupCounts",
-                                    },
-                                    as: "e",
-                                    in: "$$e.k",
-                                  },
+                    $arrayToObject: {
+                      $map: {
+                        input: {
+                          $setUnion: [
+                            {
+                              $map: {
+                                input: {
+                                  $objectToArray: "$userGroupCounts",
                                 },
-                                {
-                                  $map: {
-                                    input: {
-                                      $objectToArray: "$$new.userGroupCounts",
-                                    },
-                                    as: "e",
-                                    in: "$$e.k",
-                                  },
+                                as: "e",
+                                in: "$$e.k",
+                              },
+                            },
+                            {
+                              $map: {
+                                input: {
+                                  $objectToArray: "$$new.userGroupCounts",
                                 },
-                              ],
+                                as: "e",
+                                in: "$$e.k",
+                              },
                             },
-                            as: "group",
-                            in: {
-                              k: "$$group",
-                              v: {
-                                $add: [
+                          ],
+                        },
+                        as: "group",
+                        in: {
+                          k: "$$group",
+                          v: {
+                            $add: [
+                              {
+                                $ifNull: [
                                   {
-                                    $ifNull: [
-                                      {
-                                        $getField: {
-                                          field: "$$group",
-                                          input: "$userGroupCounts",
-                                        },
-                                      },
-                                      0,
-                                    ],
+                                    $getField: {
+                                      field: "$$group",
+                                      input: "$userGroupCounts",
+                                    },
                                   },
+                                  0,
+                                ],
+                              },
+                              {
+                                $ifNull: [
                                   {
-                                    $ifNull: [
-                                      {
-                                        $getField: {
-                                          field: "$$group",
-                                          input: "$$new.userGroupCounts",
-                                        },
-                                      },
-                                      0,
-                                    ],
+                                    $getField: {
+                                      field: "$$group",
+                                      input: "$$new.userGroupCounts",
+                                    },
                                   },
+                                  0,
                                 ],
                               },
-                            },
+                            ],
                           },
                         },
                       },
@@ -242,6 +232,11 @@ module.exports = {
 
     // Wipe MetadataKeys collection first to ensure a clean state
     const deleted = await db.collection("MetadataKeys").deleteMany({});
+
+    await db
+      .collection("MetadataKeys")
+      .createIndex({ metaKeyId: 1 }, { unique: true });
+
     console.log(
       `[${elapsed()}] Cleared ${deleted.deletedCount} existing MetadataKeys`,
     );
@@ -305,6 +300,11 @@ module.exports = {
       console.log(`[${elapsed()}] ✅ ${collection} done`);
     }
 
+    await db.collection("MetadataKeys").dropIndex("metaKeyId_1");
+    await db
+      .collection("MetadataKeys")
+      .updateMany({}, [{ $set: { id: "$_id" } }, { $unset: ["metaKeyId"] }]);
+
     const result = await db.collection("MetadataKeys").countDocuments();
     console.log(
       `[${elapsed()}] Migration completed — Total MetadataKeys: ${result.toLocaleString()}`,

From 0aeb977643029f75fe48189fce689e506f1eee8f Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 4 May 2026 17:17:02 +0200
Subject: [PATCH 23/29] Replace concat _id with compound filter (sourceType +
 key + humanReadableName)

---
 .../metadatakeys.service.spec.ts              | 104 ++++++++----------
 src/metadata-keys/metadatakeys.service.ts     |  92 ++++++++--------
 2 files changed, 92 insertions(+), 104 deletions(-)

diff --git a/src/metadata-keys/metadatakeys.service.spec.ts b/src/metadata-keys/metadatakeys.service.spec.ts
index 7f66ca359..e2ac659ab 100644
--- a/src/metadata-keys/metadatakeys.service.spec.ts
+++ b/src/metadata-keys/metadatakeys.service.spec.ts
@@ -7,7 +7,7 @@ const modelMock = {
   aggregate: jest
     .fn()
     .mockReturnValue({ exec: jest.fn().mockResolvedValue([]) }),
-  bulkWrite: jest.fn().mockResolvedValue({}),
+  findOneAndUpdate: jest.fn().mockResolvedValue({}),
   updateMany: jest.fn().mockResolvedValue({}),
   deleteMany: jest.fn().mockResolvedValue({}),
 };
@@ -122,89 +122,71 @@ describe("MetadataKeysService", () => {
     it("does nothing when metadata is empty", async () => {
       await service.insertManyFromSource({ ...BASE_DOC, metadata: {} });
 
-      expect(modelMock.bulkWrite).not.toHaveBeenCalled();
+      expect(modelMock.findOneAndUpdate).not.toHaveBeenCalled();
     });
 
-    it("calls bulkWrite with one updateOne per metadata key", async () => {
+    it("calls findOneAndUpdate once per metadata key", async () => {
       await service.insertManyFromSource(BASE_DOC);
-
-      expect(modelMock.bulkWrite).toHaveBeenCalledTimes(1);
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      expect(ops).toHaveLength(2);
-      expect(ops.every((op: object) => "updateOne" in op)).toBe(true);
-    });
-
-    it("builds correct _id from sourceType + key + humanReadableName", async () => {
-      await service.insertManyFromSource(BASE_DOC);
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      const ids = ops.map(
-        (op: { updateOne: { filter: { _id: string } } }) =>
-          op.updateOne.filter._id,
-      );
-
-      expect(ids).toContain("Dataset_temperature_Temperature");
-      expect(ids).toContain("Dataset_pressure_"); // no human_name
+      expect(modelMock.findOneAndUpdate).toHaveBeenCalledTimes(2);
     });
 
-    it("sets upsert: true on every operation", async () => {
+    it("builds correct filter from sourceType + key + humanReadableName", async () => {
       await service.insertManyFromSource(BASE_DOC);
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      expect(
-        ops.every(
-          (op: { updateOne: { upsert: boolean } }) =>
-            op.updateOne.upsert === true,
-        ),
-      ).toBe(true);
+      const calls = modelMock.findOneAndUpdate.mock.calls;
+      const filters = calls.map((call: unknown[]) => call[0]);
+      expect(filters).toContainEqual({
+        sourceType: "Dataset",
+        key: "temperature",
+        humanReadableName: "Temperature",
+      });
+      expect(filters).toContainEqual({
+        sourceType: "Dataset",
+        key: "pressure",
+        humanReadableName: "",
+      });
     });
 
     it("increments usageCount and per-group counts", async () => {
       await service.insertManyFromSource(BASE_DOC);
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      const { $inc } = ops[0].updateOne.update;
-
-      expect($inc.usageCount).toBe(1);
-      expect($inc["userGroupCounts.group-1"]).toBe(1);
-      expect($inc["userGroupCounts.group-2"]).toBe(1);
+      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+      expect(update.$inc.usageCount).toBe(1);
+      expect(update.$inc["userGroupCounts.group-1"]).toBe(1);
+      expect(update.$inc["userGroupCounts.group-2"]).toBe(1);
     });
 
     it("adds userGroups via $addToSet", async () => {
       await service.insertManyFromSource(BASE_DOC);
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      const { $addToSet } = ops[0].updateOne.update;
-
-      expect($addToSet.userGroups.$each).toEqual(["group-1", "group-2"]);
+      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+      expect(update.$addToSet.userGroups.$each).toEqual(["group-1", "group-2"]);
     });
 
     it("sets isPublished when dataset is published", async () => {
       await service.insertManyFromSource({ ...BASE_DOC, isPublished: true });
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      expect(ops[0].updateOne.update.$set.isPublished).toBe(true);
+      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+      expect(update.$set.isPublished).toBe(true);
     });
 
     it("does not set isPublished when dataset is not published", async () => {
       await service.insertManyFromSource({ ...BASE_DOC, isPublished: false });
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      expect(ops[0].updateOne.update.$set.isPublished).toBeUndefined();
+      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+      expect(update.$set.isPublished).toBeUndefined();
     });
 
     it("uses $setOnInsert for immutable fields", async () => {
       await service.insertManyFromSource(BASE_DOC);
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      const { $setOnInsert } = ops[0].updateOne.update;
-
-      expect($setOnInsert).toMatchObject({
+      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+      expect(update.$setOnInsert).toMatchObject({
         key: expect.any(String),
         sourceType: "Dataset",
         humanReadableName: expect.any(String),
       });
     });
+
+    it("sets upsert: true", async () => {
+      await service.insertManyFromSource(BASE_DOC);
+      const [, , options] = modelMock.findOneAndUpdate.mock.calls[0];
+      expect(options.upsert).toBe(true);
+    });
   });
 
   // -------------------------------------------------------------------------
@@ -244,19 +226,27 @@ describe("MetadataKeysService", () => {
       expect(firstUpdate.$inc["userGroupCounts.group-2"]).toBe(-1);
     });
 
-    it("targets correct _ids based on metadata keys and humanReadableName", async () => {
+    it("targets correct filter based on metadata keys and humanReadableName", async () => {
       await service.deleteMany(BASE_DOC);
 
       const [firstFilter] = modelMock.updateMany.mock.calls[0];
-      expect(firstFilter._id.$in).toContain("Dataset_temperature_Temperature");
-      expect(firstFilter._id.$in).toContain("Dataset_pressure_");
+      expect(firstFilter.$or).toContainEqual({
+        sourceType: "Dataset",
+        key: "temperature",
+        humanReadableName: "Temperature",
+      });
+      expect(firstFilter.$or).toContainEqual({
+        sourceType: "Dataset",
+        key: "pressure",
+        humanReadableName: "",
+      });
     });
 
     it("deletes documents where usageCount <= 0", async () => {
       await service.deleteMany(BASE_DOC);
 
       const [deleteFilter] = modelMock.deleteMany.mock.calls[0];
-      expect(deleteFilter.usageCount).toEqual({ $lte: 0 });
+      expect(deleteFilter.$and[1].usageCount).toEqual({ $lte: 0 });
     });
 
     it("recompute stage uses $set with $objectToArray on userGroupCounts", async () => {
diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index 1b6e097c8..bcb1978ab 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -28,6 +28,14 @@ export type MetadataSourceDoc = {
 const RECOMPUTE_USER_GROUPS_STAGE = [
   {
     $set: {
+      userGroupCounts: {
+        $arrayToObject: {
+          $filter: {
+            input: { $objectToArray: "$userGroupCounts" },
+            cond: { $gt: ["$$this.v", 0] },
+          },
+        },
+      },
       userGroups: {
         $map: {
           input: {
@@ -110,39 +118,28 @@ export class MetadataKeysService {
     const ops = Object.entries(metadata).map(([key, entry]) => {
       const humanReadableName =
         (entry as ScientificMetadataEntry).human_name ?? "";
-      const id = this.buildId(sourceType, key, humanReadableName);
-
-      return {
-        updateOne: {
-          filter: { _id: id },
-          update: {
-            $setOnInsert: addCreatedByFields(
-              {
-                _id: id as unknown as MetadataKeyDocument["_id"],
-                id,
-                key,
-                sourceType,
-                humanReadableName,
-              },
-              "system",
-            ),
-            $set: {
-              // Only ever set to true inline. The false case is handled
-              // by the reconciliation cronjob since it transitions rarely.
-              ...(isPublished && { isPublished: true }),
-            },
-            $addToSet: { userGroups: { $each: userGroups } },
-            $inc: {
-              usageCount: 1,
-              ...this.groupCountDeltas(userGroups, 1),
-            },
+
+      return this.metadataKeyModel.findOneAndUpdate(
+        { sourceType, key, humanReadableName },
+        {
+          $setOnInsert: addCreatedByFields(
+            { key, sourceType, humanReadableName },
+            "system",
+          ),
+          $set: {
+            ...(isPublished && { isPublished: true }),
+          },
+          $addToSet: { userGroups: { $each: userGroups } },
+          $inc: {
+            usageCount: 1,
+            ...this.groupCountDeltas(userGroups, 1),
           },
-          upsert: true,
         },
-      };
+        { upsert: true },
+      );
     });
 
-    await this.metadataKeyModel.bulkWrite(ops);
+    await Promise.all(ops);
 
     Logger.log(
       `Upserted MetadataKeys for ${sourceType}: ${Object.keys(metadata).join(", ")}`,
@@ -158,34 +155,34 @@ export class MetadataKeysService {
    *      (drops any group whose count reached zero)
    *   3. Delete entries no longer referenced by any dataset
    *
-   * usageCount is the authoritative deletion signal because a dataset
-   * with no userGroups and isPublished: false would be invisible to
-   * both userGroupCounts and isPublished checks.
+
    */
   async deleteMany(doc: MetadataSourceDoc): Promise<void> {
     if (isEmpty(doc.metadata)) return;
 
     const { sourceType, userGroups, metadata } = doc;
     const keys = Object.keys(metadata);
-    const ids = keys.map((key) => {
+    const filters = keys.map((key) => {
       const humanReadableName =
         (metadata[key] as ScientificMetadataEntry)?.human_name ?? "";
-      return this.buildId(sourceType, key, humanReadableName);
+      return this.buildFilter(sourceType, key, humanReadableName);
     });
-    const filter = { _id: { $in: ids } };
+    const queryFilter = { $or: filters };
 
-    await this.metadataKeyModel.updateMany(filter, {
+    await this.metadataKeyModel.updateMany(queryFilter, {
       $inc: {
         usageCount: -1,
         ...this.groupCountDeltas(userGroups, -1),
       },
     });
 
-    await this.metadataKeyModel.updateMany(filter, RECOMPUTE_USER_GROUPS_STAGE);
+    await this.metadataKeyModel.updateMany(
+      queryFilter,
+      RECOMPUTE_USER_GROUPS_STAGE,
+    );
 
     await this.metadataKeyModel.deleteMany({
-      ...filter,
-      usageCount: { $lte: 0 },
+      $and: [queryFilter, { usageCount: { $lte: 0 } }],
     });
 
     Logger.log(
@@ -236,12 +233,12 @@ export class MetadataKeysService {
     ]);
   }
 
-  private buildId(
+  private buildFilter(
     sourceType: string,
     key: string,
     humanReadableName: string,
-  ): string {
-    return `${sourceType}_${key}_${humanReadableName}`;
+  ): FilterQuery<MetadataKeyDocument> {
+    return { sourceType, key, humanReadableName };
   }
 
   /**
@@ -307,14 +304,15 @@ export class MetadataKeysService {
       humanNameUnchangedKeys.length > 0 &&
       (hasGroupChanges || publishedFlippedOn)
     ) {
-      const ids = humanNameUnchangedKeys.map((k) => {
+      const filters = humanNameUnchangedKeys.map((k) => {
         const humanReadableName =
           (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-        return this.buildId(sourceType, k, humanReadableName);
+        return this.buildFilter(sourceType, k, humanReadableName);
       });
-      const filter = { _id: { $in: ids } };
 
-      await this.metadataKeyModel.updateMany(filter, {
+      const queryFilter = { $or: filters };
+
+      await this.metadataKeyModel.updateMany(queryFilter, {
         ...(addedGroups.length > 0 && {
           $addToSet: { userGroups: { $each: addedGroups } },
         }),
@@ -329,7 +327,7 @@ export class MetadataKeysService {
       // Recompute userGroups to drop groups whose count reached zero
       if (removedGroups.length > 0) {
         await this.metadataKeyModel.updateMany(
-          filter,
+          queryFilter,
           RECOMPUTE_USER_GROUPS_STAGE,
         );
       }

From 3b10fdc4d0b42a2bfd2312fcf120e2b5970ceae7 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 11 May 2026 12:39:28 +0200
Subject: [PATCH 24/29] improve migration script  and documentation

---
 ...aset-scientificMetadata-to-metadatakeys.md | 458 +++++++++++++-----
 ...aset-scientificMetadata-to-metadatakeys.js | 120 +----
 2 files changed, 331 insertions(+), 247 deletions(-)

diff --git a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
index c753c9cc1..250483db1 100644
--- a/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
+++ b/docs/developer-guide/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.md
@@ -38,12 +38,13 @@ Each `MetadataKey` document tracks:
 
 ---
 
-## Output shape
+## MetadataKey shape
 
 ```js
 // MetadataKey document
 {
-  _id: "Dataset_temperature_Temperature",  // ${sourceType}_${key}_${humanReadableName}
+  _id: "550e8400-e29b-41d4-a716-446655440000",
+  id:"550e8400-e29b-41d4-a716-446655440000",
   key: "temperature",
   humanReadableName: "Temperature",
   sourceType: "Dataset",
@@ -60,189 +61,365 @@ Each `MetadataKey` document tracks:
 
 ## Migration Pipeline walkthrough
 
-The migration pipeline uses two datasets as a running example throughout:
-
-|           | ownerGroup | accessGroups | isPublished | scientificMetadata keys |
-| --------- | ---------- | ------------ | ----------- | ----------------------- |
-| Dataset A | group-1    | [group-2]    | true        | temperature, pressure   |
-| Dataset B | group-1    | []           | false       | temperature             |
+It builds a MetadataKeys collection by extracting and aggregating scientific metadata keys from datasets. Each document in MetadataKeys represents one unique metadata key, enriched with access group membership, usage counts, and publication status.
 
 ---
 
-### Stage 1 — Flatten scientificMetadata
+### Stage 1 — Flatten scientificMetadata into an array
+
+```js
+{
+  $project: {
+    datasetId: "$_id",
+    ownerGroup: 1,
+    accessGroups: 1,
+    isPublished: 1,
+    metaArr: { $objectToArray: "$scientificMetadata" },
+  },
+}
+```
+
+**What it does:** Converts the scientificMetadata object into an array of {k, v} pairs so it can be unwound in the next stage. Preserves \_id as datasetId for later use in usage counting.
 
-Expose `_id` as `datasetId` and convert `scientificMetadata` from an object to an array of `{k, v}` pairs using `$objectToArray`.
+**Input**
 
 ```js
-// Input
 {
-  _id: "uuid-A",
-  ownerGroup: "group-1",
-  accessGroups: ["group-2"],
-  isPublished: true,
-  scientificMetadata: {
-    temperature: { value: 100, unit: "C", human_name: "Temperature" },
-    pressure:    { value: 1, unit: "bar" },
+  "_id": "ds1",
+  "ownerGroup": "groupA",
+  "accessGroups": ["groupB"],
+  "isPublished": false,
+  "scientificMetadata": {
+    "temperature": { "human_name": "Temperature", "value": 100 },
+    "pressure": { "human_name": "Pressure", "value": 200 }
   }
 }
+```
 
-// Output
+**Output**
+
+```js
 {
-  datasetId: "uuid-A",
-  ownerGroup: "group-1",
-  accessGroups: ["group-2"],
-  isPublished: true,
-  metaArr: [
-    { k: "temperature", v: { value: 100, unit: "C", human_name: "Temperature" } },
-    { k: "pressure",    v: { value: 1, unit: "bar" } },
+  "datasetId": "ds1",
+  "ownerGroup": "groupA",
+  "accessGroups": ["groupB"],
+  "isPublished": false,
+  "metaArr": [
+    { "k": "temperature", "v": { "human_name": "Temperature", "value": 100 } },
+    { "k": "pressure",    "v": { "human_name": "Pressure",    "value": 200 } }
   ]
 }
 ```
 
 ---
 
-### Stage 2 — Unwind metaArr
+### Stage 2 — One document per metadata key
 
-One document per `(dataset, metadata key)`.
+```js
+{
+  $unwind: "$metaArr";
+}
+```
+
+**What it does:** Produces one document per metadata key entry. A dataset with N metadata keys becomes N documents.
 
+**Input (from stage1)**
+
+```js
+{
+  "datasetId": "ds1",
+  "metaArr": [
+    { "k": "temperature", "v": { "human_name": "Temperature" } },
+    { "k": "pressure",    "v": { "human_name": "Pressure" } }
+  ]
+}
 ```
-Input:  1 document (Dataset A) with metaArr of length 2
-Output: 2 documents
 
-{ datasetId: "uuid-A", ..., metaArr: { k: "temperature", v: { human_name: "Temperature", ... } } }
-{ datasetId: "uuid-A", ..., metaArr: { k: "pressure",    v: { ... } } }
+**Output**
+
+```js
+{ "datasetId": "ds1", "metaArr": { "k": "temperature", "v": { "human_name": "Temperature" } } }
+{ "datasetId": "ds1", "metaArr": { "k": "pressure",    "v": { "human_name": "Pressure" } } }
 ```
 
 ---
 
-### Stage 3 — Shape each (dataset, key) document
-
-Extract `key`, `humanReadableName`, and compute `userGroups` as the union of `ownerGroup` and `accessGroups`.
+### Stage 3 — Shape each document (datasetId+key) with HRM and userGroups
 
 ```js
-// Input (one of the two documents from stage 3)
-{ datasetId: "uuid-A", ownerGroup: "group-1", accessGroups: ["group-2"], isPublished: true, metaArr: { k: "temperature", v: { human_name: "Temperature" } } }
+{
+  $project: {
+    datasetId: 1,
+    key: "$metaArr.k",
+    isPublished: 1,
+    humanReadableName: { $ifNull: ["$metaArr.v.human_name", ""] },
+    userGroups: {
+      $setUnion: [["$ownerGroup"], { $ifNull: ["$accessGroups", []] }],
+    },
+  },
+}
+```
 
-// Output (all documents after both datasets are processed)
-{ datasetId: "uuid-A", key: "temperature", isPublished: true,  humanReadableName: "Temperature", userGroups: ["group-1", "group-2"] }
-{ datasetId: "uuid-A", key: "pressure",    isPublished: true,  humanReadableName: "",            userGroups: ["group-1", "group-2"] }
-{ datasetId: "uuid-B", key: "temperature", isPublished: false, humanReadableName: "Temperature", userGroups: ["group-1"] }
+**What it does:** Extracts the key name and human-readable name. Computes userGroups as the union of ownerGroup and accessGroups — every group that has access to this dataset.
+
+**Input (from stage2)**
+
+```js
+{
+  "datasetId": "ds1",
+  "ownerGroup": "groupA",
+  "accessGroups": ["groupB"],
+  "isPublished": false,
+  "metaArr": { "k": "temperature", "v": { "human_name": "Temperature" } }
+}
 ```
 
-> `ownerGroup` is a mandatory field, so no null fallback is needed. `accessGroups` is optional so it falls back to `[]`.
+**output**
+
+```js
+{
+  "datasetId": "ds1",
+  "key": "temperature",
+  "humanReadableName": "Temperature",
+  "isPublished": false,
+  "userGroups": ["groupA", "groupB"]
+}
+```
 
 ---
 
-### Stage 4 — Unwind userGroups
+### Stage 4 — One document per (dataset+key+group)
+
+```js
+{
+  $unwind: {
+    path: "$userGroups",
+  },
+}
+```
 
-One document per `(dataset, key, group)`. This is the pivot that makes per-group counting possible.
+**What it does:** Split userGroups so each group gets its own document. This allows grouping by (key, group) in Stage 6.
 
-`preserveNullAndEmptyArrays: true` keeps datasets with no groups so `usageCount` stays accurate.
+**Input (from stage3)**
 
+```js
+{
+  "datasetId": "ds1",
+  "key": "temperature",
+  "userGroups": ["groupA", "groupB"]
+}
 ```
-Input
-{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true, userGroups: ["group-1", "group-2"] }
 
-Output
-{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-1" }
-{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-2" }
-{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-1" }
-{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-2" }
-{ datasetId: "uuid-B", key: "temperature", humanReadableName: "Temperature", isPublished: false, userGroups: "group-1" }
+**output**
+
+```js
+{ "datasetId": "ds1", "key": "temperature", "userGroups": "groupA" }
+{ "datasetId": "ds1", "key": "temperature", "userGroups": "groupB" }
 ```
 
 ---
 
-### Stage 5 — Filter null userGroups
+### Stage 5 — Group by (metaKeyId, group)
 
-Drop documents where `userGroups` is `null`. This only occurs when `preserveNullAndEmptyArrays` retains a document from a dataset that had no groups at all.
+```js
+{
+  $group: {
+    _id: {
+      metaKeyId: { $concat: [`${sourceType}_`, "$key", "_", "$humanReadableName"] },
+      group: "$userGroups",
+    },
+    key: { $first: "$key" },
+    humanReadableName: { $first: "$humanReadableName" },
+    isPublished: { $max: "$isPublished" },
+    groupCount: { $sum: 1 },
+    datasetIds: { $addToSet: "$datasetId" },
+  },
+}
+```
 
+**What it does:** Groups by (metadata key, group) pair. Computes:
+
+- `metaKeyId` is a stable, deterministic identifier derived from `${sourceType}_${key}_${humanReadableName}` used as the merge key in Stage 9 to prevent duplicate documents across pipeline runs.
+- `groupCount` indicates how many datasets with this group use this key
+- `datasetIds` includes distinct dataset IDs for this group, used later to count unique datasets across all groups without double-counting
+
+**Input (from stage4)**
+
+```js
+{ "datasetId": "ds1", "key": "temperature", "humanReadableName": "Temperature", "userGroups": "groupA", "isPublished": false }
+{ "datasetId": "ds2", "key": "temperature", "humanReadableName": "Temperature", "userGroups": "groupA", "isPublished": true }
+{ "datasetId": "ds1", "key": "temperature", "humanReadableName": "Temperature", "userGroups": "groupB", "isPublished": false }
 ```
-Input:  stream from stage 5, some may have userGroups: null
-Output: only documents where userGroups is a real group name
+
+**Output**
+
+```js
+{
+  "_id": { "metaKeyId": "dataset_temperature_Temperature", "group": "groupA" },
+  "key": "temperature",
+  "humanReadableName": "Temperature",
+  "isPublished": true,
+  "groupCount": 2,
+  "datasetIds": ["ds1", "ds2"]
+}
+{
+  "_id": { "metaKeyId": "dataset_temperature_Temperature", "group": "groupB" },
+  "key": "temperature",
+  "humanReadableName": "Temperature",
+  "isPublished": false,
+  "groupCount": 1,
+  "datasetIds": ["ds1"]
+}
 ```
 
 ---
 
-### Stage 6 — Group by (metaKeyId, group)
-
-Each bucket is one unique `(key, humanReadableName, group)` combination.
-`groupCount` = how many datasets with this group reference this key.
+### Stage 6 — Group by metaKeyId
 
+```js
+{
+  $group: {
+    _id: "$_id.metaKeyId",
+    key: { $first: "$key" },
+    humanReadableName: { $first: "$humanReadableName" },
+    isPublished: { $max: "$isPublished" },
+    userGroups: { $push: "$_id.group" },
+    userGroupCountsArr: { $push: { k: "$_id.group", v: "$groupCount" } },
+    datasetIdSets: { $push: "$datasetIds" },
+  },
+}
 ```
-Input (5 documents from stage 6)
-{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-1" }
-{ datasetId: "uuid-A", key: "temperature", humanReadableName: "Temperature", isPublished: true,  userGroups: "group-2" }
-{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-1" }
-{ datasetId: "uuid-A", key: "pressure",    humanReadableName: "",            isPublished: true,  userGroups: "group-2" }
-{ datasetId: "uuid-B", key: "temperature", humanReadableName: "Temperature", isPublished: false, userGroups: "group-1" }
 
-Output (4 buckets — one per unique metaKeyId + group)
-{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-1" }, key: "temperature", humanReadableName: "Temperature", isPublished: true,  groupCount: 2 }
-{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-2" }, key: "temperature", humanReadableName: "Temperature", isPublished: true,  groupCount: 1 }
-{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-1" }, key: "pressure",    humanReadableName: "",            isPublished: true,  groupCount: 1 }
-{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-2" }, key: "pressure",    humanReadableName: "",            isPublished: true,  groupCount: 1 }
+**What it does:** Reassembles one document per metadata key by collecting all per-group data. datasetIdSets is a list of per-group dataset ID sets — merged in Stage 8 to compute total unique dataset count.
+
+**Input (from stage5)**
+
+```js
+{ "_id": { "metaKeyId": "dataset_temperature_Temperature", "group": "groupA" }, "groupCount": 2, "datasetIds": ["ds1", "ds2"] }
+{ "_id": { "metaKeyId": "dataset_temperature_Temperature", "group": "groupB" }, "groupCount": 1, "datasetIds": ["ds1"] }
 ```
 
-> `isPublished: $max` means the field is `true` if **any** contributing dataset is published.
+**Output**
+
+```js
+{
+  "_id": "dataset_temperature_Temperature",
+  "key": "temperature",
+  "humanReadableName": "Temperature",
+  "isPublished": true,
+  "userGroups": ["groupA", "groupB"],
+  "userGroupCountsArr": [
+    { "k": "groupA", "v": 2 },
+    { "k": "groupB", "v": 1 }
+  ],
+  "datasetIdSets": [["ds1", "ds2"], ["ds1"]]
+}
+```
 
 ---
 
-### Stage 7 — Group by metaKeyId
+### Stage 7 — Add generated UUID
 
-Reassemble one document per metadata key by collecting the per-group buckets from stage 7.
+```js
+{
+  $addFields: {
+    metaKeyId: "$_id",
+    generatedId: {
+      $function: {
+        body: "function() { return UUID().toString().replace('UUID(\"', '').replace('\")', ''); }",
+        args: [],
+        lang: "js",
+      },
+    },
+  },
+}
+```
 
+**What it does:** Saves `_id` which at this point is `${sourceType}_${key}_${humanReadableName}` as metaKeyId before it gets replaced. Generates a UUID for \_id to support future document splitting when a document approaches MongoDB's 16MB size limit, while metaKeyId remains the stable merge key for Stage 8.
+
+**Input (from stage7)**
+
+```json
+{ "_id": "dataset_temperature_Temperature", ... }
 ```
-Input (4 buckets from stage 7)
-{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-1" }, groupCount: 2, ... }
-{ _id: { metaKeyId: "Dataset_temperature_Temperature", group: "group-2" }, groupCount: 1, ... }
-{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-1" }, groupCount: 1, ... }
-{ _id: { metaKeyId: "Dataset_pressure_",               group: "group-2" }, groupCount: 1, ... }
 
-Output (2 documents — one per unique key)
-{
-  _id: "Dataset_temperature_Temperature",
-  key: "temperature", humanReadableName: "Temperature", isPublished: true,
-  userGroups: ["group-1", "group-2"],
-  userGroupCountsArr: [{ k: "group-1", v: 2 }, { k: "group-2", v: 1 }],
-  datasetIdSets: [["uuid-A", "uuid-B"], ["uuid-A"]]
-}
+**Output**
+
+```json
 {
-  _id: "Dataset_pressure_",
-  key: "pressure", humanReadableName: "", isPublished: true,
-  userGroups: ["group-1", "group-2"],
-  userGroupCountsArr: [{ k: "group-1", v: 1 }, { k: "group-2", v: 1 }],
-  datasetIdSets: [["uuid-A"], ["uuid-A"]]
+  "_id": "dataset_temperature_Temperature",
+  "metaKeyId": "dataset_temperature_Temperature",
+  "generatedId": "550e8400-e29b-41d4-a716-446655440000",
+  ...
 }
 ```
 
 ---
 
-### Stage 8 — Final projection
+### Stage 8 — Project final document shape
+
+```js
+{
+  $project: {
+    _id: "$generatedId",
+    metaKeyId: 1,
+    key: 1,
+    sourceType: { $literal: sourceType },
+    humanReadableName: 1,
+    isPublished: 1,
+    userGroups: 1,
+    userGroupCounts: { $arrayToObject: "$userGroupCountsArr" },
+    usageCount: {
+      $size: {
+        $reduce: {
+          input: "$datasetIdSets",
+          initialValue: [],
+          in: { $setUnion: ["$$value", "$$this"] },
+        },
+      },
+    },
+    createdBy: { $literal: "migration" },
+    createdAt: { $toDate: "$$NOW" },
+  },
+}
+```
+
+**What it does:** Produces the final document shape for MetadataKeys. Converts userGroupCountsArr to an object map. Computes usageCount by merging all per-group datasetIdSets into a single set and counting — this avoids double-counting datasets that belong to multiple groups.
 
-- `userGroupCounts`: converts `[{k, v}]` array to a plain object (Mongoose stores this as a `Map`)
-- `usageCount`: unions all per-group `datasetId` sets and counts distinct IDs
+**Why set union for usageCount:**
 
+```js
+datasetIds: ["ds1", "ds2"]; //groupA
+datasetIds: ["ds1"]; // groupB
+union: ["ds1", "ds2"]; // usageCount = 2  (not 3)
 ```
-// usageCount for temperature:
-// datasetIdSets = [["uuid-A", "uuid-B"], ["uuid-A"]]
-// union         = ["uuid-A", "uuid-B"]
-// count         = 2
+
+**Input (from stage7)**
+
+```js
+{
+  "generatedId": "550e8400-e29b-41d4-a716-446655440000",
+  "metaKeyId": "dataset_temperature_Temperature",
+  "userGroupCountsArr": [{ "k": "groupA", "v": 2 }, { "k": "groupB", "v": 1 }],
+  "datasetIdSets": [["ds1", "ds2"], ["ds1"]]
+}
 ```
 
+**Output**
+
 ```js
-// Output (final document written to MetadataKeys)
 {
-  _id: "Dataset_temperature_Temperature",
-  key: "temperature",
-  sourceType: "Dataset",
-  humanReadableName: "Temperature",
-  isPublished: true,
-  userGroups: ["group-1", "group-2"],
-  userGroupCounts: { "group-1": 2, "group-2": 1 },
-  usageCount: 2,
-  createdBy: "migration",
-  createdAt: ISODate("...")
+  "_id": "550e8400-e29b-41d4-a716-446655440000",
+  "metaKeyId": "dataset_temperature_Temperature",
+  "key": "temperature",
+  "sourceType": "dataset",
+  "humanReadableName": "Temperature",
+  "isPublished": true,
+  "userGroups": ["groupA", "groupB"],
+  "userGroupCounts": { "groupA": 2, "groupB": 1 },
+  "usageCount": 2,
+  "createdBy": "migration",
+  "createdAt": "2026-05-07T00:00:00.000Z"
 }
 ```
 
@@ -250,31 +427,54 @@ Output (2 documents — one per unique key)
 
 ### Stage 9 — Merge into MetadataKeys
 
-- `whenNotMatched: insert` — new documents are inserted as-is
-- `whenMatched` — additively merges counts when two `SOURCE_COLLECTIONS` produce the same `_id`
-
-> This is not triggered today since `sourceType` is part of `_id`, making collisions between collections impossible. It is kept correct for when `Proposal`, `Sample`, or other collections are added as sources.
+```js
+{
+  $merge: {
+    into: "MetadataKeys",
+    on: "metaKeyId",
+    whenMatched: [
+      {
+        $replaceWith: {
+          $mergeObjects: [
+            "$$new",
+            { _id: "$_id" }
+          ]
+        }
+      }
+    ],
+    whenNotMatched: "insert",
+  },
+}
+```
 
-**Group cap behaviour**
+**What it does:** Upserts each document into `MetadataKeys` using `metaKeyId` as the match key. On match, replaces the existing document entirely with the incoming one, preserving only `_id` since MongoDB does not allow changing it.
 
-When merging, if the existing document's `usageCount` exceeds `MAX_USER_GROUPS_PER_METADATA_KEY`:
+**Input (from Stage 9)**
 
-- `userGroups` is frozen — no new groups are added
-- `userGroupCounts` is frozen — no new counts are merged
-- `isPublished` is forced to `true` — key remains publicly discoverable
-- `usageCount` continues to increment regardless
+```json
+{
+  "_id": "550e8400-e29b-41d4-a716-446655440000",
+  "metaKeyId": "dataset_temperature_Temperature",
+  "userGroups": ["groupA", "groupB"],
+  "userGroupCounts": { "groupA": 2, "groupB": 1 },
+  "usageCount": 2,
+  "isPublished": true
+}
+```
 
-This prevents unbounded document growth while ensuring heavily-used keys remain visible to all users via `isPublished: true`.
+**Output (inserted or replaced):**
 
-```js
-// whenMatched userGroupCounts example
-// existing: { "group-1": 3, "group-2": 1 }
-// incoming: { "group-1": 2, "group-3": 5 }
-// result:   { "group-1": 5, "group-2": 1, "group-3": 5 }
+```json
+{
+  "_id": "550e8400-e29b-41d4-a716-446655440000",
+  "metaKeyId": "dataset_temperature_Temperature",
+  "userGroups": ["groupA", "groupB"],
+  "userGroupCounts": { "groupA": 2, "groupB": 1 },
+  "usageCount": 2,
+  "isPublished": true
+}
 ```
 
----
-
 ## Running the migration
 
 ```bash
diff --git a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
index 712ca4328..a42626c7c 100644
--- a/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
+++ b/migrations/20260420145401-sync-dataset-scientificMetadata-to-metadatakeys.js
@@ -3,8 +3,6 @@ const BATCH_SIZE = 10000;
 
 function buildPipeline(sourceType) {
   return [
-    // Stage 1: Flatten scientificMetadata into an array of key/value pairs.
-    // Preserve _id as datasetId so we can count unique datasets later.
     {
       $project: {
         datasetId: "$_id",
@@ -14,12 +12,7 @@ function buildPipeline(sourceType) {
         metaArr: { $objectToArray: "$scientificMetadata" },
       },
     },
-
-    // Stage 2: One document per (dataset, metadata key)
     { $unwind: "$metaArr" },
-
-    // Stage 3: Shape each (dataset, key) document.
-    // userGroups is the union of ownerGroup + accessGroups for this dataset.
     {
       $project: {
         datasetId: 1,
@@ -31,27 +24,11 @@ function buildPipeline(sourceType) {
         },
       },
     },
-
-    // Stage 4: One document per (dataset, key, group).
     {
       $unwind: {
         path: "$userGroups",
-        preserveNullAndEmptyArrays: true,
-      },
-    },
-
-    // Stage 5: Filter out null and empty-string groups.
-    // Note: datasets with no valid groups are excluded from usageCount.
-    // This is acceptable since ownerGroup is a required field in practice.
-    {
-      $match: {
-        userGroups: { $nin: [null, ""] },
       },
     },
-
-    // Stage 6: Group by (metaKeyId, group).
-    // groupCount = how many datasets with this group use this key.
-    // datasetIds = set of distinct dataset IDs (for accurate usageCount).
     {
       $group: {
         _id: {
@@ -67,11 +44,6 @@ function buildPipeline(sourceType) {
         datasetIds: { $addToSet: "$datasetId" },
       },
     },
-
-    // Stage 7: Group by metaKeyId to reassemble one document per metadata key.
-    // userGroupCountsArr will become the userGroupCounts Map.
-    // datasetIdSets is a list of per-group dataset ID sets — merged in the
-    // next stage to compute total unique dataset count (usageCount).
     {
       $group: {
         _id: "$_id.metaKeyId",
@@ -85,10 +57,9 @@ function buildPipeline(sourceType) {
         datasetIdSets: { $push: "$datasetIds" },
       },
     },
-
     {
       $addFields: {
-        metaKeyId: "$_id", // metaKeyId is needed for the $merge stage for deduplication, but will be removed in the final updateMany
+        metaKeyId: "$_id",
         generatedId: {
           $function: {
             body: "function() { return UUID().toString().replace('UUID(\"', '').replace('\")', ''); }",
@@ -98,10 +69,6 @@ function buildPipeline(sourceType) {
         },
       },
     },
-
-    // Stage 8: Final projection.
-    // userGroupCounts: [{k,v}] array → plain object (stored as Map in Mongoose).
-    // usageCount: union all per-group datasetId sets, count distinct IDs.
     {
       $project: {
         _id: "$generatedId",
@@ -125,11 +92,6 @@ function buildPipeline(sourceType) {
         createdAt: { $toDate: "$$NOW" },
       },
     },
-
-    // Stage 9: Merge into MetadataKeys.
-    // whenMatched handles the (future) case where multiple SOURCE_COLLECTIONS
-    // produce the same _id. Not possible today since sourceType is part of
-    // the _id, but kept correct for when more collections are added.
     {
       $merge: {
         into: "MetadataKeys",
@@ -137,85 +99,7 @@ function buildPipeline(sourceType) {
         whenMatched: [
           {
             $replaceWith: {
-              $mergeObjects: [
-                "$$new",
-                {
-                  // Preserve original audit fields
-                  _id: "$_id",
-                  createdAt: { $ifNull: ["$createdAt", "$$new.createdAt"] },
-                  createdBy: { $ifNull: ["$createdBy", "$$new.createdBy"] },
-                  updatedBy: { $literal: "migration" },
-                  updatedAt: { $toDate: "$$NOW" },
-                  isPublished: { $or: ["$isPublished", "$$new.isPublished"] },
-                  userGroups: {
-                    $setUnion: ["$userGroups", "$$new.userGroups"],
-                  },
-
-                  // Additively merge userGroupCounts — sum counts per group
-                  // across both the existing and incoming documents.
-                  userGroupCounts: {
-                    $arrayToObject: {
-                      $map: {
-                        input: {
-                          $setUnion: [
-                            {
-                              $map: {
-                                input: {
-                                  $objectToArray: "$userGroupCounts",
-                                },
-                                as: "e",
-                                in: "$$e.k",
-                              },
-                            },
-                            {
-                              $map: {
-                                input: {
-                                  $objectToArray: "$$new.userGroupCounts",
-                                },
-                                as: "e",
-                                in: "$$e.k",
-                              },
-                            },
-                          ],
-                        },
-                        as: "group",
-                        in: {
-                          k: "$$group",
-                          v: {
-                            $add: [
-                              {
-                                $ifNull: [
-                                  {
-                                    $getField: {
-                                      field: "$$group",
-                                      input: "$userGroupCounts",
-                                    },
-                                  },
-                                  0,
-                                ],
-                              },
-                              {
-                                $ifNull: [
-                                  {
-                                    $getField: {
-                                      field: "$$group",
-                                      input: "$$new.userGroupCounts",
-                                    },
-                                  },
-                                  0,
-                                ],
-                              },
-                            ],
-                          },
-                        },
-                      },
-                    },
-                  },
-
-                  // Add incoming dataset count to existing
-                  usageCount: { $add: ["$usageCount", "$$new.usageCount"] },
-                },
-              ],
+              $mergeObjects: ["$$new", { _id: "$_id" }],
             },
           },
         ],

From 32f95400b1fda14b2d4e9c1fc6dfca8052895ec2 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 11 May 2026 13:41:02 +0200
Subject: [PATCH 25/29] improve metadataKeys doc update logic and add unit
 tests based on comments

---
 .../metadatakeys.service.spec.ts              | 357 ++++++------------
 src/metadata-keys/metadatakeys.service.ts     | 299 ++++-----------
 2 files changed, 185 insertions(+), 471 deletions(-)

diff --git a/src/metadata-keys/metadatakeys.service.spec.ts b/src/metadata-keys/metadatakeys.service.spec.ts
index e2ac659ab..fe959638e 100644
--- a/src/metadata-keys/metadatakeys.service.spec.ts
+++ b/src/metadata-keys/metadatakeys.service.spec.ts
@@ -1,3 +1,4 @@
+/* eslint-disable @typescript-eslint/no-explicit-any */
 import { getModelToken } from "@nestjs/mongoose";
 import { Test, TestingModule } from "@nestjs/testing";
 import { MetadataKeysService, MetadataSourceDoc } from "./metadatakeys.service";
@@ -7,7 +8,7 @@ const modelMock = {
   aggregate: jest
     .fn()
     .mockReturnValue({ exec: jest.fn().mockResolvedValue([]) }),
-  findOneAndUpdate: jest.fn().mockResolvedValue({}),
+  bulkWrite: jest.fn().mockResolvedValue({}),
   updateMany: jest.fn().mockResolvedValue({}),
   deleteMany: jest.fn().mockResolvedValue({}),
 };
@@ -86,6 +87,44 @@ describe("MetadataKeysService", () => {
       );
     });
 
+    it("applies default sort when no limits provided", async () => {
+      await service.findAll({}, {});
+
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      expect(pipeline).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ $sort: { createdAt: -1 } }),
+        ]),
+      );
+    });
+
+    it("applies default sort when limits provided without sort", async () => {
+      await service.findAll({ limits: { limit: 10, skip: 0 } }, {});
+
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      expect(pipeline).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ $sort: { createdAt: -1 } }),
+        ]),
+      );
+    });
+
+    it("uses provided sort when specified", async () => {
+      await service.findAll(
+        { limits: { limit: 10, skip: 0, sort: { key: "asc" } } },
+        {},
+      );
+
+      const [pipeline] = modelMock.aggregate.mock.calls[0];
+      expect(pipeline).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            $sort: expect.objectContaining({ key: 1 }),
+          }),
+        ]),
+      );
+    });
+
     it("omits $project stage when no fields specified", async () => {
       await service.findAll({}, {});
 
@@ -94,12 +133,15 @@ describe("MetadataKeysService", () => {
       expect(hasProject).toBe(false);
     });
 
-    it("omits $sort stage when no sort specified", async () => {
-      await service.findAll({ limits: { limit: 10, skip: 0 } }, {});
+    it("sort stage comes before skip and limit", async () => {
+      await service.findAll({}, {});
 
       const [pipeline] = modelMock.aggregate.mock.calls[0];
-      const hasSort = pipeline.some((s: object) => "$sort" in s);
-      expect(hasSort).toBe(false);
+      const sortIndex = pipeline.findIndex((s: object) => "$sort" in s);
+      const skipIndex = pipeline.findIndex((s: object) => "$skip" in s);
+      const limitIndex = pipeline.findIndex((s: object) => "$limit" in s);
+      expect(sortIndex).toBeLessThan(skipIndex);
+      expect(sortIndex).toBeLessThan(limitIndex);
     });
 
     it("returns aggregation results", async () => {
@@ -122,18 +164,21 @@ describe("MetadataKeysService", () => {
     it("does nothing when metadata is empty", async () => {
       await service.insertManyFromSource({ ...BASE_DOC, metadata: {} });
 
-      expect(modelMock.findOneAndUpdate).not.toHaveBeenCalled();
+      expect(modelMock.bulkWrite).not.toHaveBeenCalled();
     });
 
-    it("calls findOneAndUpdate once per metadata key", async () => {
+    it("calls bulkWrite with one op per metadata key", async () => {
       await service.insertManyFromSource(BASE_DOC);
-      expect(modelMock.findOneAndUpdate).toHaveBeenCalledTimes(2);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(ops).toHaveLength(2);
     });
 
     it("builds correct filter from sourceType + key + humanReadableName", async () => {
       await service.insertManyFromSource(BASE_DOC);
-      const calls = modelMock.findOneAndUpdate.mock.calls;
-      const filters = calls.map((call: unknown[]) => call[0]);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const filters = ops.map((op: any) => op.updateOne.filter);
       expect(filters).toContainEqual({
         sourceType: "Dataset",
         key: "temperature",
@@ -148,7 +193,9 @@ describe("MetadataKeysService", () => {
 
     it("increments usageCount and per-group counts", async () => {
       await service.insertManyFromSource(BASE_DOC);
-      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { update } = ops[0].updateOne;
       expect(update.$inc.usageCount).toBe(1);
       expect(update.$inc["userGroupCounts.group-1"]).toBe(1);
       expect(update.$inc["userGroupCounts.group-2"]).toBe(1);
@@ -156,25 +203,33 @@ describe("MetadataKeysService", () => {
 
     it("adds userGroups via $addToSet", async () => {
       await service.insertManyFromSource(BASE_DOC);
-      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { update } = ops[0].updateOne;
       expect(update.$addToSet.userGroups.$each).toEqual(["group-1", "group-2"]);
     });
 
     it("sets isPublished when dataset is published", async () => {
       await service.insertManyFromSource({ ...BASE_DOC, isPublished: true });
-      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
-      expect(update.$set.isPublished).toBe(true);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { update } = ops[0].updateOne;
+      expect(update.$max.isPublished).toBe(true);
     });
 
     it("does not set isPublished when dataset is not published", async () => {
       await service.insertManyFromSource({ ...BASE_DOC, isPublished: false });
-      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
-      expect(update.$set.isPublished).toBeUndefined();
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { update } = ops[0].updateOne;
+      expect(update.$max?.isPublished).toBeFalsy();
     });
 
     it("uses $setOnInsert for immutable fields", async () => {
       await service.insertManyFromSource(BASE_DOC);
-      const [, update] = modelMock.findOneAndUpdate.mock.calls[0];
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { update } = ops[0].updateOne;
       expect(update.$setOnInsert).toMatchObject({
         key: expect.any(String),
         sourceType: "Dataset",
@@ -184,8 +239,9 @@ describe("MetadataKeysService", () => {
 
     it("sets upsert: true", async () => {
       await service.insertManyFromSource(BASE_DOC);
-      const [, , options] = modelMock.findOneAndUpdate.mock.calls[0];
-      expect(options.upsert).toBe(true);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(ops[0].updateOne.upsert).toBe(true);
     });
   });
 
@@ -197,12 +253,17 @@ describe("MetadataKeysService", () => {
     it("does nothing when metadata is empty", async () => {
       await service.deleteMany({ ...BASE_DOC, metadata: {} });
 
+      expect(modelMock.bulkWrite).not.toHaveBeenCalled();
       expect(modelMock.updateMany).not.toHaveBeenCalled();
       expect(modelMock.deleteMany).not.toHaveBeenCalled();
     });
 
-    it("runs three operations in order: decrement → recompute → delete", async () => {
+    it("runs operations in order: decrement → recompute → delete", async () => {
       const callOrder: string[] = [];
+      modelMock.bulkWrite.mockImplementation(() => {
+        callOrder.push("bulkWrite");
+        return Promise.resolve({});
+      });
       modelMock.updateMany.mockImplementation(() => {
         callOrder.push("updateMany");
         return Promise.resolve({});
@@ -214,16 +275,17 @@ describe("MetadataKeysService", () => {
 
       await service.deleteMany(BASE_DOC);
 
-      expect(callOrder).toEqual(["updateMany", "updateMany", "deleteMany"]);
+      expect(callOrder).toEqual(["bulkWrite", "updateMany", "deleteMany"]);
     });
 
     it("decrements usageCount and per-group counts", async () => {
       await service.deleteMany(BASE_DOC);
 
-      const [, firstUpdate] = modelMock.updateMany.mock.calls[0];
-      expect(firstUpdate.$inc.usageCount).toBe(-1);
-      expect(firstUpdate.$inc["userGroupCounts.group-1"]).toBe(-1);
-      expect(firstUpdate.$inc["userGroupCounts.group-2"]).toBe(-1);
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      const { update } = ops[0].updateOne;
+      expect(update.$inc.usageCount).toBe(-1);
+      expect(update.$inc["userGroupCounts.group-1"]).toBe(-1);
+      expect(update.$inc["userGroupCounts.group-2"]).toBe(-1);
     });
 
     it("targets correct filter based on metadata keys and humanReadableName", async () => {
@@ -249,12 +311,18 @@ describe("MetadataKeysService", () => {
       expect(deleteFilter.$and[1].usageCount).toEqual({ $lte: 0 });
     });
 
-    it("recompute stage uses $set with $objectToArray on userGroupCounts", async () => {
+    it("recompute stage uses $set with userGroups defined", async () => {
       await service.deleteMany(BASE_DOC);
 
-      // second updateMany call is the RECOMPUTE_USER_GROUPS_STAGE
-      const [, recomputeStage] = modelMock.updateMany.mock.calls[1];
-      expect(recomputeStage[0].$set.userGroups).toBeDefined();
+      const [, recomputeStage] = modelMock.updateMany.mock.calls[0];
+      expect(recomputeStage[1].$set.userGroups).toBeDefined();
+    });
+
+    it("does not set upsert on decrement ops", async () => {
+      await service.deleteMany(BASE_DOC);
+
+      const [ops] = modelMock.bulkWrite.mock.calls[0];
+      expect(ops[0].updateOne.upsert).toBe(false);
     });
   });
 
@@ -263,233 +331,50 @@ describe("MetadataKeysService", () => {
   // -------------------------------------------------------------------------
 
   describe("replaceManyFromSource", () => {
-    it("calls insertManyFromSource for keys only in newDoc", async () => {
-      const insertSpy = jest
-        .spyOn(service, "insertManyFromSource")
-        .mockResolvedValue();
-
-      const oldDoc = {
-        ...BASE_DOC,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-      const newDoc = {
-        ...BASE_DOC,
-        metadata: {
-          temperature: { human_name: "Temperature" },
-          wavelength: {},
-        },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      expect(insertSpy).toHaveBeenCalledWith(
-        expect.objectContaining({ metadata: { wavelength: {} } }),
-      );
-    });
-
-    it("calls deleteMany for keys only in oldDoc", async () => {
-      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
-
-      const oldDoc = {
-        ...BASE_DOC,
-        metadata: { temperature: { human_name: "Temperature" }, pressure: {} },
-      };
-      const newDoc = {
-        ...BASE_DOC,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      expect(deleteSpy).toHaveBeenCalledWith(
-        expect.objectContaining({ metadata: { pressure: {} } }),
-      );
-    });
-
-    it("does not call deleteMany or insertManyFromSource for shared unchanged keys", async () => {
+    it("calls deleteMany with oldDoc then insertManyFromSource with newDoc", async () => {
       const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
       const insertSpy = jest
         .spyOn(service, "insertManyFromSource")
         .mockResolvedValue();
 
-      const doc = {
-        ...BASE_DOC,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-
-      await service.replaceManyFromSource(doc, doc);
+      await service.replaceManyFromSource(BASE_DOC, BASE_DOC);
 
-      expect(deleteSpy).not.toHaveBeenCalled();
-      expect(insertSpy).not.toHaveBeenCalled();
+      expect(deleteSpy).toHaveBeenCalledWith(BASE_DOC);
+      expect(insertSpy).toHaveBeenCalledWith(BASE_DOC);
     });
 
-    it("handles all three buckets simultaneously", async () => {
-      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
-      const insertSpy = jest
-        .spyOn(service, "insertManyFromSource")
-        .mockResolvedValue();
-
-      const oldDoc = {
-        ...BASE_DOC,
-        metadata: { temperature: { human_name: "Temperature" }, pressure: {} },
-      };
-      const newDoc = {
-        ...BASE_DOC,
-        metadata: {
-          temperature: { human_name: "Temperature" },
-          wavelength: {},
-        },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      // pressure removed → deleteMany
-      expect(deleteSpy).toHaveBeenCalledWith(
-        expect.objectContaining({ metadata: { pressure: {} } }),
-      );
-      // wavelength added → insertManyFromSource
-      expect(insertSpy).toHaveBeenCalledWith(
-        expect.objectContaining({ metadata: { wavelength: {} } }),
-      );
-    });
-
-    it("does nothing when both docs have empty metadata", async () => {
-      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
-      const insertSpy = jest
+    it("calls deleteMany before insertManyFromSource", async () => {
+      const callOrder: string[] = [];
+      jest.spyOn(service, "deleteMany").mockImplementation(async () => {
+        callOrder.push("deleteMany");
+      });
+      jest
         .spyOn(service, "insertManyFromSource")
-        .mockResolvedValue();
-
-      const empty = { ...BASE_DOC, metadata: {} };
-      await service.replaceManyFromSource(empty, empty);
-
-      expect(deleteSpy).not.toHaveBeenCalled();
-      expect(insertSpy).not.toHaveBeenCalled();
-    });
-  });
-
-  // -------------------------------------------------------------------------
-  // updateSharedKeys (tested via replaceManyFromSource)
-  // -------------------------------------------------------------------------
-
-  describe("updateSharedKeys (via replaceManyFromSource)", () => {
-    it("increments added groups and decrements removed groups", async () => {
-      const oldDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        userGroups: ["group-1"],
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-      const newDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        userGroups: ["group-2"],
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      const [, update] = modelMock.updateMany.mock.calls[0];
-      expect(update.$inc["userGroupCounts.group-2"]).toBe(1);
-      expect(update.$inc["userGroupCounts.group-1"]).toBe(-1);
-    });
-
-    it("runs RECOMPUTE_USER_GROUPS_STAGE when groups are removed", async () => {
-      const oldDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        userGroups: ["group-1", "group-2"],
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-      const newDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        userGroups: ["group-1"],
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      // two updateMany calls: one for the $inc, one for RECOMPUTE
-      expect(modelMock.updateMany).toHaveBeenCalledTimes(2);
-    });
-
-    it("does not run RECOMPUTE_USER_GROUPS_STAGE when only groups are added", async () => {
-      const oldDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        userGroups: ["group-1"],
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-      const newDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        userGroups: ["group-1", "group-2"],
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      // only one updateMany — no RECOMPUTE needed when nothing is removed
-      expect(modelMock.updateMany).toHaveBeenCalledTimes(1);
-    });
-
-    it("sets isPublished when it flips from false to true", async () => {
-      const oldDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        isPublished: false,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-      const newDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        isPublished: true,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
+        .mockImplementation(async () => {
+          callOrder.push("insertManyFromSource");
+        });
 
-      await service.replaceManyFromSource(oldDoc, newDoc);
+      await service.replaceManyFromSource(BASE_DOC, BASE_DOC);
 
-      const [, update] = modelMock.updateMany.mock.calls[0];
-      expect(update.$set.isPublished).toBe(true);
+      expect(callOrder).toEqual(["deleteMany", "insertManyFromSource"]);
     });
 
-    it("does not touch isPublished when it flips from true to false", async () => {
-      const oldDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        isPublished: true,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-      const newDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        isPublished: false,
-        metadata: { temperature: { human_name: "Temperature" } },
-      };
-
-      await service.replaceManyFromSource(oldDoc, newDoc);
-
-      // no group changes and no publishedFlippedOn → updateMany not called at all
-      expect(modelMock.updateMany).not.toHaveBeenCalled();
-    });
-
-    it("treats humanReadableName change as delete + insert", async () => {
-      const deleteSpy = jest.spyOn(service, "deleteMany").mockResolvedValue();
-      const insertSpy = jest
-        .spyOn(service, "insertManyFromSource")
-        .mockResolvedValue();
-
-      const oldDoc: MetadataSourceDoc = {
+    it("net usageCount is zero for unchanged keys", async () => {
+      const doc = {
         ...BASE_DOC,
         metadata: { temperature: { human_name: "Temperature" } },
       };
-      const newDoc: MetadataSourceDoc = {
-        ...BASE_DOC,
-        metadata: { temperature: { human_name: "Temp (renamed)" } },
-      };
 
-      await service.replaceManyFromSource(oldDoc, newDoc);
+      await service.replaceManyFromSource(doc, doc);
 
-      expect(deleteSpy).toHaveBeenCalledWith(
-        expect.objectContaining({
-          metadata: { temperature: { human_name: "Temperature" } },
-        }),
+      const allOps = modelMock.bulkWrite.mock.calls.flatMap(
+        ([ops]: any) => ops,
       );
-      expect(insertSpy).toHaveBeenCalledWith(
-        expect.objectContaining({
-          metadata: { temperature: { human_name: "Temp (renamed)" } },
-        }),
+      const totalUsageCountDelta = allOps.reduce(
+        (sum: number, op: any) => sum + op.updateOne.update.$inc.usageCount,
+        0,
       );
+      expect(totalUsageCountDelta).toBe(0);
     });
   });
 });
diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index bcb1978ab..fedbd0963 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -36,14 +36,13 @@ const RECOMPUTE_USER_GROUPS_STAGE = [
           },
         },
       },
+    },
+  },
+  {
+    $set: {
       userGroups: {
         $map: {
-          input: {
-            $filter: {
-              input: { $objectToArray: "$userGroupCounts" },
-              cond: { $gt: ["$$this.v", 0] },
-            },
-          },
+          input: { $objectToArray: "$userGroupCounts" },
           as: "entry",
           in: "$$entry.k",
         },
@@ -65,10 +64,11 @@ export class MetadataKeysService {
   ): Promise<MetadataKeyClass[]> {
     const whereFilter: FilterQuery<MetadataKeyDocument> = filter.where ?? {};
     const fieldsProjection: string[] = filter.fields ?? {};
-    const limits: QueryOptions<MetadataKeyDocument> = filter.limits ?? {
-      limit: 100,
-      skip: 0,
-      sort: { createdAt: "desc" },
+
+    const limits: QueryOptions<MetadataKeyDocument> = {
+      limit: filter.limits?.limit ?? 100,
+      skip: filter.limits?.skip ?? 0,
+      sort: filter.limits?.sort ?? { createdAt: "desc" },
     };
 
     const pipeline: PipelineStage[] = [
@@ -100,157 +100,79 @@ export class MetadataKeysService {
     return data;
   }
 
-  /**
-   * Called when a dataset is created or gains new metadata keys.
-   *
-   * For each key:
-   * - Creates a new MetadataKey entry if none exists (upsert)
-   * - Increments usageCount and per-group reference counts
-   * - Adds new groups to the userGroups query array
-   * - Sets isPublished if the source dataset is published (never unsets inline)
-   * - Updates humanReadableName to the latest value
-   */
   async insertManyFromSource(doc: MetadataSourceDoc): Promise<void> {
-    if (isEmpty(doc.metadata)) return;
-
-    const { sourceType, userGroups, isPublished, metadata } = doc;
-
-    const ops = Object.entries(metadata).map(([key, entry]) => {
-      const humanReadableName =
-        (entry as ScientificMetadataEntry).human_name ?? "";
-
-      return this.metadataKeyModel.findOneAndUpdate(
-        { sourceType, key, humanReadableName },
-        {
-          $setOnInsert: addCreatedByFields(
-            { key, sourceType, humanReadableName },
-            "system",
-          ),
-          $set: {
-            ...(isPublished && { isPublished: true }),
-          },
-          $addToSet: { userGroups: { $each: userGroups } },
-          $inc: {
-            usageCount: 1,
-            ...this.groupCountDeltas(userGroups, 1),
-          },
-        },
-        { upsert: true },
-      );
-    });
-
-    await Promise.all(ops);
+    await this.adjustCounts(doc, 1);
+  }
 
-    Logger.log(
-      `Upserted MetadataKeys for ${sourceType}: ${Object.keys(metadata).join(", ")}`,
-    );
+  async deleteMany(doc: MetadataSourceDoc): Promise<void> {
+    await this.adjustCounts(doc, -1);
   }
 
-  /**
-   * Called when a dataset is deleted or loses metadata keys.
-   *
-   * Three-step process to ensure groups are safely removed:
-   *   1. Decrement usageCount and per-group reference counts
-   *   2. Recompute userGroups array from the updated counts
-   *      (drops any group whose count reached zero)
-   *   3. Delete entries no longer referenced by any dataset
-   *
+  async replaceManyFromSource(
+    oldDoc: MetadataSourceDoc,
+    newDoc: MetadataSourceDoc,
+  ): Promise<void> {
+    await this.deleteMany(oldDoc);
+    await this.insertManyFromSource(newDoc);
+  }
 
-   */
-  async deleteMany(doc: MetadataSourceDoc): Promise<void> {
+  private async adjustCounts(
+    doc: MetadataSourceDoc,
+    delta: 1 | -1,
+  ): Promise<void> {
     if (isEmpty(doc.metadata)) return;
 
-    const { sourceType, userGroups, metadata } = doc;
-    const keys = Object.keys(metadata);
-    const filters = keys.map((key) => {
+    const { sourceType, userGroups, isPublished, metadata } = doc;
+
+    const filters = Object.entries(metadata).map(([key, entry]) => {
       const humanReadableName =
-        (metadata[key] as ScientificMetadataEntry)?.human_name ?? "";
-      return this.buildFilter(sourceType, key, humanReadableName);
+        (entry as ScientificMetadataEntry).human_name ?? "";
+      return { sourceType, key, humanReadableName };
     });
+
     const queryFilter = { $or: filters };
 
-    await this.metadataKeyModel.updateMany(queryFilter, {
-      $inc: {
-        usageCount: -1,
-        ...this.groupCountDeltas(userGroups, -1),
+    const ops = filters.map(({ sourceType, key, humanReadableName }) => ({
+      updateOne: {
+        filter: { sourceType, key, humanReadableName },
+        update: {
+          $inc: {
+            usageCount: delta,
+            ...this.groupCountDeltas(userGroups, delta),
+          },
+          ...(delta === 1 && {
+            $max: { isPublished },
+            $addToSet: { userGroups: { $each: userGroups } },
+            $setOnInsert: addCreatedByFields(
+              { key, sourceType, humanReadableName },
+              "system",
+            ),
+          }),
+        },
+        upsert: delta === 1,
       },
-    });
+    }));
 
-    await this.metadataKeyModel.updateMany(
-      queryFilter,
-      RECOMPUTE_USER_GROUPS_STAGE,
-    );
+    await this.metadataKeyModel.bulkWrite(ops);
 
-    await this.metadataKeyModel.deleteMany({
-      $and: [queryFilter, { usageCount: { $lte: 0 } }],
-    });
+    if (delta === -1) {
+      // UpdateMany is necessary here because the bulkWrite above only decrements userGroupCounts
+      // but cannot remove zero-count groups from userGroups in the same operation.
+      await this.metadataKeyModel.updateMany(
+        queryFilter,
+        RECOMPUTE_USER_GROUPS_STAGE,
+      );
+
+      await this.metadataKeyModel.deleteMany({
+        $and: [queryFilter, { usageCount: { $lte: 0 } }],
+      });
+    }
 
     Logger.log(
-      `Decremented or deleted MetadataKeys for ${sourceType}: ${keys.join(", ")}`,
+      `${delta === 1 ? "Upserted" : "Decremented or deleted"} MetadataKeys for ${sourceType}: ${Object.keys(metadata).join(", ")}`,
     );
   }
-  /**
-   * Called when a dataset is updated.
-   *
-   * Diffs the old and new metadata to produce three disjoint key sets:
-   *   - added:   only in newDoc(after change) → insertManyFromSource
-   *   - removed: only in oldDoc(before change) → deleteMany
-   *   - shared:  in both       → updateSharedKeys (handles group / isPublished
-   *                              / humanReadableName changes)
-   *
-   * The three sets are disjoint by _id so Promise.all is safe.
-   */
-  async replaceManyFromSource(
-    oldDoc: MetadataSourceDoc,
-    newDoc: MetadataSourceDoc,
-  ): Promise<void> {
-    const oldKeys = Object.keys(oldDoc.metadata ?? {});
-    const newKeys = Object.keys(newDoc.metadata ?? {});
-
-    const addedKeys = newKeys.filter((k) => !oldKeys.includes(k));
-    const removedKeys = oldKeys.filter((k) => !newKeys.includes(k));
-    const sharedKeys = newKeys.filter((k) => oldKeys.includes(k));
-
-    await Promise.all([
-      removedKeys.length > 0 &&
-        this.deleteMany({
-          ...oldDoc,
-          metadata: Object.fromEntries(
-            removedKeys.map((k) => [k, oldDoc.metadata[k]]),
-          ),
-        }),
-
-      addedKeys.length > 0 &&
-        this.insertManyFromSource({
-          ...newDoc,
-          metadata: Object.fromEntries(
-            addedKeys.map((k) => [k, newDoc.metadata[k]]),
-          ),
-        }),
-
-      sharedKeys.length > 0 &&
-        this.updateSharedKeys(sharedKeys, oldDoc, newDoc),
-    ]);
-  }
-
-  private buildFilter(
-    sourceType: string,
-    key: string,
-    humanReadableName: string,
-  ): FilterQuery<MetadataKeyDocument> {
-    return { sourceType, key, humanReadableName };
-  }
 
-  /**
-   * Builds $inc paths for per-group reference counts.
-   *
-   * groupCountDeltas(["groupA", "groupB"], 1)
-   *   => { "userGroupCounts.groupA": 1, "userGroupCounts.groupB": 1 }
-   *
-   * MongoDB's dot-notation $inc creates the key if missing and increments
-   * if it already exists, making this safe for both first-time and
-   * subsequent upserts.
-   */
   private groupCountDeltas(
     groups: string[],
     delta: 1 | -1,
@@ -259,97 +181,4 @@ export class MetadataKeysService {
       groups.map((g) => [`userGroupCounts.${g}`, delta]),
     );
   }
-
-  /**
-   * Handles keys present in both the old and new dataset version.
-   *
-   * Checks three things that may have changed independently:
-   *   1. userGroups — increments added groups, decrements removed groups,
-   *      then recomputes the userGroups array from the updated counts
-   *   2. isPublished — only sets true inline; false is left to the cronjob
-   *   3. humanReadableName — updated per-key where it differs
-   */
-  private async updateSharedKeys(
-    sharedKeys: string[],
-    oldDoc: MetadataSourceDoc,
-    newDoc: MetadataSourceDoc,
-  ): Promise<void> {
-    const { sourceType } = newDoc;
-    const humanNameChangedKeys: string[] = [];
-    const humanNameUnchangedKeys: string[] = [];
-
-    for (const k of sharedKeys) {
-      const oldHumanName =
-        (oldDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-      const newHumanName =
-        (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-
-      if (oldHumanName === newHumanName) {
-        humanNameUnchangedKeys.push(k);
-      } else {
-        humanNameChangedKeys.push(k);
-      }
-    }
-
-    const addedGroups = newDoc.userGroups.filter(
-      (g) => !oldDoc.userGroups.includes(g),
-    );
-    const removedGroups = oldDoc.userGroups.filter(
-      (g) => !newDoc.userGroups.includes(g),
-    );
-    const publishedFlippedOn = !oldDoc.isPublished && newDoc.isPublished;
-    const hasGroupChanges = addedGroups.length > 0 || removedGroups.length > 0;
-
-    if (
-      humanNameUnchangedKeys.length > 0 &&
-      (hasGroupChanges || publishedFlippedOn)
-    ) {
-      const filters = humanNameUnchangedKeys.map((k) => {
-        const humanReadableName =
-          (newDoc.metadata[k] as ScientificMetadataEntry)?.human_name ?? "";
-        return this.buildFilter(sourceType, k, humanReadableName);
-      });
-
-      const queryFilter = { $or: filters };
-
-      await this.metadataKeyModel.updateMany(queryFilter, {
-        ...(addedGroups.length > 0 && {
-          $addToSet: { userGroups: { $each: addedGroups } },
-        }),
-        $inc: {
-          ...(addedGroups.length > 0 && this.groupCountDeltas(addedGroups, 1)),
-          ...(removedGroups.length > 0 &&
-            this.groupCountDeltas(removedGroups, -1)),
-        },
-        ...(publishedFlippedOn && { $set: { isPublished: true } }),
-      });
-
-      // Recompute userGroups to drop groups whose count reached zero
-      if (removedGroups.length > 0) {
-        await this.metadataKeyModel.updateMany(
-          queryFilter,
-          RECOMPUTE_USER_GROUPS_STAGE,
-        );
-      }
-    }
-
-    // humanReadableName is part of _id, so a change means a different document.
-    // Treat it as delete the old entry + insert the new one.
-    if (humanNameChangedKeys.length > 0) {
-      await Promise.all([
-        this.deleteMany({
-          ...oldDoc,
-          metadata: Object.fromEntries(
-            humanNameChangedKeys.map((k) => [k, oldDoc.metadata[k]]),
-          ),
-        }),
-        this.insertManyFromSource({
-          ...newDoc,
-          metadata: Object.fromEntries(
-            humanNameChangedKeys.map((k) => [k, newDoc.metadata[k]]),
-          ),
-        }),
-      ]);
-    }
-  }
 }

From 665f3925fecc934c43f671b9960b78a6025e5d48 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 11 May 2026 14:05:59 +0200
Subject: [PATCH 26/29] set createdBy, updatedBy on insert and updatedAt on
 every upsert in MetadataKeys

---
 src/metadata-keys/metadatakeys.service.spec.ts | 12 ------------
 src/metadata-keys/metadatakeys.service.ts      |  8 ++++----
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/metadata-keys/metadatakeys.service.spec.ts b/src/metadata-keys/metadatakeys.service.spec.ts
index fe959638e..812be350a 100644
--- a/src/metadata-keys/metadatakeys.service.spec.ts
+++ b/src/metadata-keys/metadatakeys.service.spec.ts
@@ -225,18 +225,6 @@ describe("MetadataKeysService", () => {
       expect(update.$max?.isPublished).toBeFalsy();
     });
 
-    it("uses $setOnInsert for immutable fields", async () => {
-      await service.insertManyFromSource(BASE_DOC);
-
-      const [ops] = modelMock.bulkWrite.mock.calls[0];
-      const { update } = ops[0].updateOne;
-      expect(update.$setOnInsert).toMatchObject({
-        key: expect.any(String),
-        sourceType: "Dataset",
-        humanReadableName: expect.any(String),
-      });
-    });
-
     it("sets upsert: true", async () => {
       await service.insertManyFromSource(BASE_DOC);
 
diff --git a/src/metadata-keys/metadatakeys.service.ts b/src/metadata-keys/metadatakeys.service.ts
index fedbd0963..ce2da4f5d 100644
--- a/src/metadata-keys/metadatakeys.service.ts
+++ b/src/metadata-keys/metadatakeys.service.ts
@@ -136,6 +136,9 @@ export class MetadataKeysService {
       updateOne: {
         filter: { sourceType, key, humanReadableName },
         update: {
+          $set: {
+            updatedAt: new Date(),
+          },
           $inc: {
             usageCount: delta,
             ...this.groupCountDeltas(userGroups, delta),
@@ -143,10 +146,7 @@ export class MetadataKeysService {
           ...(delta === 1 && {
             $max: { isPublished },
             $addToSet: { userGroups: { $each: userGroups } },
-            $setOnInsert: addCreatedByFields(
-              { key, sourceType, humanReadableName },
-              "system",
-            ),
+            $setOnInsert: addCreatedByFields({}, "system"),
           }),
         },
         upsert: delta === 1,

From fb5dfb07e7792c9e978d11b180d96997541631f2 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 11 May 2026 14:14:09 +0200
Subject: [PATCH 27/29] fix minor typescript import errors

---
 src/datasets/datasets.service.ts | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index 1603eac94..48f75d2f4 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -67,13 +67,14 @@ import {
   MetadataSourceDoc,
 } from "src/metadata-keys/metadatakeys.service";
 import { OpensearchService } from "src/opensearch/opensearch.service";
-import type { IndexSettings } from "@opensearch-project/opensearch/api/_types/indices._common";
-import type { TypeMapping } from "@opensearch-project/opensearch/api/_types/_common.mapping";
-import { BulkStats } from "@opensearch-project/opensearch/lib/Helpers";
+
 import { DatasetOpenSearchDto } from "src/opensearch/dto/dataset-opensearch.dto";
 import { plainToInstance } from "class-transformer";
 import { DATASET_OPENSEARCH_PROJECTION } from "../opensearch/utils/dataset-opensearch.utils";
 import { withOCCFilter } from "./utils/occ-util";
+import { BulkStats } from "@opensearch-project/opensearch/lib/Helpers.js";
+import { IndexSettings } from "@opensearch-project/opensearch/api/_types/indices._common.js";
+import { TypeMapping } from "@opensearch-project/opensearch/api/_types/_common.mapping.js";
 
 @Injectable({ scope: Scope.REQUEST })
 export class DatasetsService {

From 6ff971d3e64d6de51bf4ff7c92b5f7af4322f567 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 11 May 2026 14:14:54 +0200
Subject: [PATCH 28/29] reorder imports

---
 src/datasets/datasets.service.ts | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index 48f75d2f4..bd2a32204 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -67,15 +67,13 @@ import {
   MetadataSourceDoc,
 } from "src/metadata-keys/metadatakeys.service";
 import { OpensearchService } from "src/opensearch/opensearch.service";
-
+import { BulkStats } from "@opensearch-project/opensearch/lib/Helpers.js";
+import { IndexSettings } from "@opensearch-project/opensearch/api/_types/indices._common.js";
+import { TypeMapping } from "@opensearch-project/opensearch/api/_types/_common.mapping.js";
 import { DatasetOpenSearchDto } from "src/opensearch/dto/dataset-opensearch.dto";
 import { plainToInstance } from "class-transformer";
 import { DATASET_OPENSEARCH_PROJECTION } from "../opensearch/utils/dataset-opensearch.utils";
 import { withOCCFilter } from "./utils/occ-util";
-import { BulkStats } from "@opensearch-project/opensearch/lib/Helpers.js";
-import { IndexSettings } from "@opensearch-project/opensearch/api/_types/indices._common.js";
-import { TypeMapping } from "@opensearch-project/opensearch/api/_types/_common.mapping.js";
-
 @Injectable({ scope: Scope.REQUEST })
 export class DatasetsService {
   private readonly osDefaultIndex: string;

From 1d4f99ad2f7448f0f0eddcf0df42b0fc3570ef46 Mon Sep 17 00:00:00 2001
From: junjiequan <a331998513@gmail.com>
Date: Mon, 11 May 2026 15:07:18 +0200
Subject: [PATCH 29/29] add shared createMetadataKeysInstance function to
 reduce duplication

---
 src/common/utils.ts                    | 32 +++++++++++++++++
 src/datasets/datasets.service.ts       | 50 +++++++++++++-------------
 src/instruments/instruments.service.ts | 43 ++++++++++------------
 src/proposals/proposals.service.ts     | 48 ++++++++++---------------
 src/samples/samples.service.ts         | 39 +++++++++-----------
 5 files changed, 112 insertions(+), 100 deletions(-)

diff --git a/src/common/utils.ts b/src/common/utils.ts
index 8c6b67ea0..90eb96f75 100644
--- a/src/common/utils.ts
+++ b/src/common/utils.ts
@@ -12,6 +12,7 @@ import {
 import { ScientificRelation } from "./scientific-relation.enum";
 import { DatasetType } from "src/datasets/types/dataset-type.enum";
 import { isPlainObject, mapValues, omit, pickBy, some } from "lodash";
+import { MetadataSourceDoc } from "src/metadata-keys/metadatakeys.service";
 
 // add Å to mathjs accepted units as equivalent to angstrom
 const isAlphaOriginal = Unit.isValidAlpha;
@@ -1343,3 +1344,34 @@ export function parseDate(dateString?: string): Date | undefined {
   const parsedDate = new Date(dateString);
   return isNaN(parsedDate.getTime()) ? undefined : parsedDate;
 }
+
+export function createMetadataKeysInstance(
+  sourceType: string,
+  doc: {
+    ownerGroup?: string;
+    accessGroups?: string[];
+    isPublished?: boolean;
+    scientificMetadata?: Record<string, unknown>;
+    metadata?: Record<string, unknown>;
+    customMetadata?: Record<string, unknown>;
+    sampleCharacteristics?: Record<string, unknown>;
+  },
+): MetadataSourceDoc {
+  return {
+    sourceType,
+    userGroups: Array.from(
+      new Set(
+        [doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(
+          Boolean,
+        ) as string[],
+      ),
+    ),
+    isPublished: doc.isPublished ?? false,
+    metadata:
+      doc.scientificMetadata ??
+      doc.metadata ??
+      doc.customMetadata ??
+      doc.sampleCharacteristics ??
+      {},
+  };
+}
diff --git a/src/datasets/datasets.service.ts b/src/datasets/datasets.service.ts
index bd2a32204..2b44783e4 100644
--- a/src/datasets/datasets.service.ts
+++ b/src/datasets/datasets.service.ts
@@ -36,6 +36,7 @@ import {
   parsePipelineProjection,
   parsePipelineSort,
   decodeMetadataKeyStrings,
+  createMetadataKeysInstance,
 } from "src/common/utils";
 import { DatasetsAccessService } from "./datasets-access.service";
 import { CreateDatasetDto } from "./dto/create-dataset.dto";
@@ -62,10 +63,7 @@ import {
   DatasetLookupKeysEnum,
 } from "./types/dataset-lookup";
 import { ProposalsService } from "src/proposals/proposals.service";
-import {
-  MetadataKeysService,
-  MetadataSourceDoc,
-} from "src/metadata-keys/metadatakeys.service";
+import { MetadataKeysService } from "src/metadata-keys/metadatakeys.service";
 import { OpensearchService } from "src/opensearch/opensearch.service";
 import { BulkStats } from "@opensearch-project/opensearch/lib/Helpers.js";
 import { IndexSettings } from "@opensearch-project/opensearch/api/_types/indices._common.js";
@@ -99,20 +97,6 @@ export class DatasetsService {
       this.configService.get<number>("opensearch.dataSyncBatchSize") || 1000;
   }
 
-  private createMetadataKeysInstance(
-    doc: UpdateQuery<DatasetDocument>,
-  ): MetadataSourceDoc {
-    const source: MetadataSourceDoc = {
-      sourceType: this.datasetModel.collection.name,
-      userGroups: Array.from(
-        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
-      ),
-      isPublished: doc.isPublished || false,
-      metadata: doc.scientificMetadata ?? {},
-    };
-    return source;
-  }
-
   addLookupFields(
     pipeline: PipelineStage[],
     datasetLookupFields?: (DatasetLookupKeysEnum | IDatasetRelation)[],
@@ -217,7 +201,10 @@ export class DatasetsService {
     }
 
     this.metadataKeysService.insertManyFromSource(
-      this.createMetadataKeysInstance(savedDataset),
+      createMetadataKeysInstance(
+        this.datasetModel.collection.name,
+        savedDataset,
+      ),
     );
 
     return savedDataset;
@@ -502,8 +489,14 @@ export class DatasetsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
-      this.createMetadataKeysInstance(existingDataset),
-      this.createMetadataKeysInstance(updatedDataset),
+      createMetadataKeysInstance(
+        this.datasetModel.collection.name,
+        existingDataset,
+      ),
+      createMetadataKeysInstance(
+        this.datasetModel.collection.name,
+        updatedDataset,
+      ),
     );
     // we were able to find the dataset and update it
     return updatedDataset;
@@ -560,8 +553,14 @@ export class DatasetsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
-      this.createMetadataKeysInstance(existingDataset),
-      this.createMetadataKeysInstance(patchedDataset),
+      createMetadataKeysInstance(
+        this.datasetModel.collection.name,
+        existingDataset,
+      ),
+      createMetadataKeysInstance(
+        this.datasetModel.collection.name,
+        patchedDataset,
+      ),
     );
     // we were able to find the dataset and update it
     return patchedDataset;
@@ -591,7 +590,10 @@ export class DatasetsService {
 
     // delete metadata keys associated with this dataset
     await this.metadataKeysService.deleteMany(
-      this.createMetadataKeysInstance(deletedDataset),
+      createMetadataKeysInstance(
+        this.datasetModel.collection.name,
+        deletedDataset,
+      ),
     );
 
     return deletedDataset;
diff --git a/src/instruments/instruments.service.ts b/src/instruments/instruments.service.ts
index c6568c52b..8b410e9cb 100644
--- a/src/instruments/instruments.service.ts
+++ b/src/instruments/instruments.service.ts
@@ -6,13 +6,14 @@ import {
   PreconditionFailedException,
 } from "@nestjs/common";
 import { InjectModel } from "@nestjs/mongoose";
-import { FilterQuery, Model, UpdateQuery } from "mongoose";
+import { FilterQuery, Model } from "mongoose";
 import { IFilters } from "src/common/interfaces/common.interface";
 import { CountApiResponse } from "src/common/types";
 import {
   parseLimitFilters,
   addCreatedByFields,
   addUpdatedByField,
+  createMetadataKeysInstance,
 } from "src/common/utils";
 import { CreateInstrumentDto } from "./dto/create-instrument.dto";
 import { PartialUpdateInstrumentDto } from "./dto/update-instrument.dto";
@@ -20,10 +21,7 @@ import { Instrument, InstrumentDocument } from "./schemas/instrument.schema";
 import { JWTUser } from "src/auth/interfaces/jwt-user.interface";
 import { REQUEST } from "@nestjs/core";
 import { Request } from "express";
-import {
-  MetadataKeysService,
-  MetadataSourceDoc,
-} from "src/metadata-keys/metadatakeys.service";
+import { MetadataKeysService } from "src/metadata-keys/metadatakeys.service";
 import { withOCCFilter } from "src/datasets/utils/occ-util";
 
 @Injectable({ scope: Scope.REQUEST })
@@ -35,29 +33,17 @@ export class InstrumentsService {
     @Inject(REQUEST) private request: Request,
   ) {}
 
-  private createMetadataKeysInstance(
-    doc: UpdateQuery<InstrumentDocument>,
-  ): MetadataSourceDoc {
-    const source: MetadataSourceDoc = {
-      sourceType: this.instrumentModel.collection.name,
-      userGroups: Array.from(
-        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
-      ),
-      isPublished: doc.isPublished || false,
-      metadata: doc.customMetadata ?? {},
-    };
-    return source;
-  }
-
   async create(createInstrumentDto: CreateInstrumentDto): Promise<Instrument> {
     const username = (this.request.user as JWTUser).username;
     const createdInstrument = new this.instrumentModel(
       addCreatedByFields<CreateInstrumentDto>(createInstrumentDto, username),
     );
     const savedInstrument = await createdInstrument.save();
-
     await this.metadataKeysService.insertManyFromSource(
-      this.createMetadataKeysInstance(savedInstrument),
+      createMetadataKeysInstance(this.instrumentModel.collection.name, {
+        ...savedInstrument.toObject(),
+        isPublished: true,
+      }),
     );
 
     return savedInstrument;
@@ -141,8 +127,14 @@ export class InstrumentsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
-      this.createMetadataKeysInstance(existingInstrument),
-      this.createMetadataKeysInstance(updatedInstrument),
+      createMetadataKeysInstance(this.instrumentModel.collection.name, {
+        ...existingInstrument.toObject(),
+        isPublished: true,
+      }),
+      createMetadataKeysInstance(this.instrumentModel.collection.name, {
+        ...updatedInstrument.toObject(),
+        isPublished: true,
+      }),
     );
 
     return updatedInstrument;
@@ -160,7 +152,10 @@ export class InstrumentsService {
     }
 
     await this.metadataKeysService.deleteMany(
-      this.createMetadataKeysInstance(deletedInstrument),
+      createMetadataKeysInstance(
+        this.instrumentModel.collection.name,
+        deletedInstrument,
+      ),
     );
 
     return deletedInstrument;
diff --git a/src/proposals/proposals.service.ts b/src/proposals/proposals.service.ts
index ba55d30b9..6dcdf02bd 100644
--- a/src/proposals/proposals.service.ts
+++ b/src/proposals/proposals.service.ts
@@ -9,13 +9,7 @@ import {
 import { REQUEST } from "@nestjs/core";
 import { Request } from "express";
 import { InjectModel } from "@nestjs/mongoose";
-import {
-  FilterQuery,
-  Model,
-  PipelineStage,
-  QueryOptions,
-  UpdateQuery,
-} from "mongoose";
+import { FilterQuery, Model, PipelineStage, QueryOptions } from "mongoose";
 import { IFacets, IFilters } from "src/common/interfaces/common.interface";
 import {
   createFullfacetPipeline,
@@ -26,6 +20,7 @@ import {
   parsePipelineSort,
   addCreatedByFields,
   addUpdatedByField,
+  createMetadataKeysInstance,
 } from "src/common/utils";
 import { isEmpty } from "lodash";
 import {
@@ -43,10 +38,7 @@ import { IProposalFields } from "./interfaces/proposal-filters.interface";
 import { ProposalClass, ProposalDocument } from "./schemas/proposal.schema";
 import { JWTUser } from "src/auth/interfaces/jwt-user.interface";
 import { CreateMeasurementPeriodDto } from "./dto/create-measurement-period.dto";
-import {
-  MetadataKeysService,
-  MetadataSourceDoc,
-} from "src/metadata-keys/metadatakeys.service";
+import { MetadataKeysService } from "src/metadata-keys/metadatakeys.service";
 import { withOCCFilter } from "src/datasets/utils/occ-util";
 
 @Injectable({ scope: Scope.REQUEST })
@@ -177,20 +169,6 @@ export class ProposalsService {
     }
   }
 
-  private createMetadataKeysInstance(
-    doc: UpdateQuery<ProposalDocument>,
-  ): MetadataSourceDoc {
-    const source: MetadataSourceDoc = {
-      sourceType: this.proposalModel.collection.name,
-      userGroups: Array.from(
-        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
-      ),
-      isPublished: doc.isPublished || false,
-      metadata: doc.metadata ?? {},
-    };
-    return source;
-  }
-
   async create(createProposalDto: CreateProposalDto): Promise<ProposalClass> {
     const username = (this.request.user as JWTUser).username;
     if (createProposalDto.MeasurementPeriodList) {
@@ -208,7 +186,10 @@ export class ProposalsService {
     const savedProposal = await createdProposal.save();
 
     this.metadataKeysService.insertManyFromSource(
-      this.createMetadataKeysInstance(savedProposal),
+      createMetadataKeysInstance(
+        this.proposalModel.collection.name,
+        savedProposal,
+      ),
     );
     return savedProposal;
   }
@@ -326,8 +307,14 @@ export class ProposalsService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
-      this.createMetadataKeysInstance(existingProposal),
-      this.createMetadataKeysInstance(updatedProposal),
+      createMetadataKeysInstance(
+        this.proposalModel.collection.name,
+        existingProposal,
+      ),
+      createMetadataKeysInstance(
+        this.proposalModel.collection.name,
+        updatedProposal,
+      ),
     );
 
     return updatedProposal;
@@ -345,7 +332,10 @@ export class ProposalsService {
     }
 
     await this.metadataKeysService.deleteMany(
-      this.createMetadataKeysInstance(deletedProposal),
+      createMetadataKeysInstance(
+        this.proposalModel.collection.name,
+        deletedProposal,
+      ),
     );
 
     return deletedProposal;
diff --git a/src/samples/samples.service.ts b/src/samples/samples.service.ts
index 9f3771431..4bc6d6580 100644
--- a/src/samples/samples.service.ts
+++ b/src/samples/samples.service.ts
@@ -9,7 +9,7 @@ import { ConfigService } from "@nestjs/config";
 import { REQUEST } from "@nestjs/core";
 import { Request } from "express";
 import { InjectModel } from "@nestjs/mongoose";
-import { FilterQuery, Model, QueryOptions, UpdateQuery } from "mongoose";
+import { FilterQuery, Model, QueryOptions } from "mongoose";
 import { JWTUser } from "src/auth/interfaces/jwt-user.interface";
 import { IFilters } from "src/common/interfaces/common.interface";
 import {
@@ -19,6 +19,7 @@ import {
   extractMetadataKeys,
   parseLimitFilters,
   decodeMetadataKeyStrings,
+  createMetadataKeysInstance,
 } from "src/common/utils";
 import { CreateSampleDto } from "./dto/create-sample.dto";
 import { PartialUpdateSampleDto } from "./dto/update-sample.dto";
@@ -26,10 +27,7 @@ import { ISampleFields } from "./interfaces/sample-filters.interface";
 import { SampleClass, SampleDocument } from "./schemas/sample.schema";
 import { CountApiResponse } from "src/common/types";
 import { OutputSampleDto } from "./dto/output-sample.dto";
-import {
-  MetadataKeysService,
-  MetadataSourceDoc,
-} from "src/metadata-keys/metadatakeys.service";
+import { MetadataKeysService } from "src/metadata-keys/metadatakeys.service";
 import { withOCCFilter } from "src/datasets/utils/occ-util";
 
 @Injectable({ scope: Scope.REQUEST })
@@ -41,20 +39,6 @@ export class SamplesService {
     @Inject(REQUEST) private request: Request,
   ) {}
 
-  private createMetadataKeysInstance(
-    doc: UpdateQuery<SampleDocument>,
-  ): MetadataSourceDoc {
-    const source: MetadataSourceDoc = {
-      sourceType: this.sampleModel.collection.name,
-      userGroups: Array.from(
-        new Set([doc.ownerGroup, ...(doc.accessGroups ?? [])].filter(Boolean)),
-      ),
-      isPublished: doc.isPublished || false,
-      metadata: doc.sampleCharacteristics ?? {},
-    };
-    return source;
-  }
-
   async create(createSampleDto: CreateSampleDto): Promise<SampleClass> {
     const username = (this.request.user as JWTUser).username;
     const createdSample = new this.sampleModel(
@@ -63,7 +47,7 @@ export class SamplesService {
     const savedSample = await createdSample.save();
 
     this.metadataKeysService.insertManyFromSource(
-      this.createMetadataKeysInstance(savedSample),
+      createMetadataKeysInstance(this.sampleModel.collection.name, savedSample),
     );
 
     return savedSample;
@@ -223,8 +207,14 @@ export class SamplesService {
     }
 
     await this.metadataKeysService.replaceManyFromSource(
-      this.createMetadataKeysInstance(existingSample),
-      this.createMetadataKeysInstance(updatedSample),
+      createMetadataKeysInstance(
+        this.sampleModel.collection.name,
+        existingSample,
+      ),
+      createMetadataKeysInstance(
+        this.sampleModel.collection.name,
+        updatedSample,
+      ),
     );
 
     return updatedSample;
@@ -242,7 +232,10 @@ export class SamplesService {
     }
 
     this.metadataKeysService.deleteMany(
-      this.createMetadataKeysInstance(deletedSample),
+      createMetadataKeysInstance(
+        this.sampleModel.collection.name,
+        deletedSample,
+      ),
     );
     return deletedSample;
   }