From c367217e7d6355091692072bf4d8cabee75f80b4 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Wed, 22 Jan 2025 15:00:23 -0500 Subject: [PATCH 1/3] Initial changes for https://github.com/hubmapconsortium/entity-api/issues/781 to exclude fields which occur in Collections.datasets, Dataset.direct_ancestors, and Sample.direct_ancestor. --- src/app.py | 7 ------- src/dev_entity_worker.py | 9 --------- src/schema/provenance_schema.yaml | 15 +++++++++++++++ src/schema/schema_manager.py | 17 +++++++++++++++-- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/app.py b/src/app.py index f545b975..0016849e 100644 --- a/src/app.py +++ b/src/app.py @@ -797,13 +797,6 @@ def get_entity_by_id(id): # Response with the dict if public_entity and not user_in_hubmap_read_group(request): final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result) - if normalized_entity_type == 'Collection': - for i, dataset in enumerate(final_result.get('datasets', [])): - if _get_entity_visibility(normalized_entity_type='Dataset', entity_dict=dataset) != DataVisibilityEnum.PUBLIC or user_in_hubmap_read_group(request): - # If the dataset is non-public, or if the user has read-group access, there is no need to remove fields, continue to the next dataset - continue - dataset_excluded_fields = schema_manager.get_fields_to_exclude('Dataset') - final_result.get('datasets')[i] = schema_manager.exclude_properties_from_response(dataset_excluded_fields, dataset) return jsonify(final_result) """ diff --git a/src/dev_entity_worker.py b/src/dev_entity_worker.py index 38b83976..e4427179 100644 --- a/src/dev_entity_worker.py +++ b/src/dev_entity_worker.py @@ -329,15 +329,6 @@ def _get_entity_by_id_for_auth_level(self, entity_id:Annotated[str, 32], valid_u #if public_entity and not user_in_hubmap_read_group(request): if public_entity and not user_authorized: final_result = self.schemaMgr.exclude_properties_from_response(fields_to_exclude, final_result) - if normalized_entity_type == 'Collection': - for i, dataset in enumerate(final_result.get('datasets', [])): - if self._get_entity_visibility( entity_dict=dataset) != DataVisibilityEnum.PUBLIC \ - or user_authorized: # or user_in_hubmap_read_group(request): - # If the dataset is public, or if the user has read-group access, there is - # no need to remove fields, continue to the next dataset - continue - dataset_excluded_fields = self.schemaMgr.get_fields_to_exclude('Dataset') - final_result.get('datasets')[i] = self.schemaMgr.exclude_properties_from_response(dataset_excluded_fields, dataset) return final_result ''' diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 3903cb4d..e1795ba5 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -192,6 +192,10 @@ shared_entity_properties: &shared_entity_properties ENTITIES: ############################################# Collection ############################################# Collection: + excluded_properties_from_public_response: + - datasets: + - lab_dataset_id + - submission_id # Collection can not be derivation source but not target derivation: source: false @@ -304,6 +308,12 @@ ENTITIES: - lab_dataset_id - metadata: - lab_id + - direct_ancestors: + - lab_tissue_sample_id + - submission_id + - lab_dataset_id + - metadata: + - lab_id derivation: source: true target: true @@ -870,6 +880,11 @@ ENTITIES: excluded_properties_from_public_response: - lab_tissue_sample_id - submission_id + - direct_ancestor: + - lab_donor_id + - submission_id + - label + - lab_tissue_sample_id properties: <<: *shared_properties <<: *shared_entity_properties diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 14d482fe..ffd9e609 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -306,13 +306,26 @@ def delete_nested_field(data, nested_path): if isinstance(value, list): for nested_field in value: if isinstance(nested_field, dict): - delete_nested_field(data[key], nested_field) + if isinstance(data[key], list): + for item in data[key]: + delete_nested_field(item, nested_field) + else: + delete_nested_field(data[key], nested_field) + elif isinstance(data[key], list): + for item in data[key]: + if nested_field in item: + del item[nested_field] elif nested_field in data[key]: del data[key][nested_field] elif isinstance(value, dict): delete_nested_field(data[key], value) elif nested_path in data: - del data[nested_path] + if isinstance(data[nested_path], list): + for item in data[nested_path]: + if nested_path in item: + del item[nested_path] + else: + del data[nested_path] for field in excluded_fields: delete_nested_field(output_dict, field) From 139ff5187091f40030c996f4fcdc3dbecbf85913 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Wed, 22 Jan 2025 17:16:41 -0500 Subject: [PATCH 2/3] Code review requests for excluding nested fields. Missed content plus comments for clarification. --- src/schema/provenance_schema.yaml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index e1795ba5..a7f0a0ca 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -196,6 +196,8 @@ ENTITIES: - datasets: - lab_dataset_id - submission_id + - metadata: + - lab_id # Collection can not be derivation source but not target derivation: source: false @@ -309,9 +311,12 @@ ENTITIES: - metadata: - lab_id - direct_ancestors: + # Sample ancestors of a Dataset must have these fields removed - lab_tissue_sample_id - submission_id + # Dataset ancestors of a Dataset must have these fields removed - lab_dataset_id + # Both Sample and Dataset ancestors of a Dataset must have these fields removed - metadata: - lab_id derivation: @@ -880,11 +885,18 @@ ENTITIES: excluded_properties_from_public_response: - lab_tissue_sample_id - submission_id + - metadata: + - lab_id - direct_ancestor: + # Donor ancestors of a Sample must have these fields removed - lab_donor_id - - submission_id - label + # Sample ancestors of a Sample must have these fields removed - lab_tissue_sample_id + - metadata: + - lab_id + # Both Sample and Donor ancestors of a Sample must have these fields removed + - submission_id properties: <<: *shared_properties <<: *shared_entity_properties From 9c3e602c0eeb68a4c2ec647a9a1ea9298b822775 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Thu, 23 Jan 2025 08:38:44 -0500 Subject: [PATCH 3/3] Remove reference to submission_id in Collection.datasets YAML --- src/schema/provenance_schema.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index a7f0a0ca..aeb94969 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -195,7 +195,6 @@ ENTITIES: excluded_properties_from_public_response: - datasets: - lab_dataset_id - - submission_id - metadata: - lab_id # Collection can not be derivation source but not target