diff --git a/VERSION b/VERSION index da6b0a8f..35d16fb1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.6 +2.5.7 diff --git a/src/app.py b/src/app.py index 98d8a4f8..777043a2 100644 --- a/src/app.py +++ b/src/app.py @@ -3988,7 +3988,7 @@ def paired_dataset(id): - type: str - description: the uuid for the parent multi assay dataset datasets - - type: dict + - type: list - description: the datasets to be created. Only difference between these and normal datasets are the field "dataset_link_abs_dir" Returns @@ -4042,7 +4042,7 @@ def multiple_components(): bad_request_error(f"The dataset with uuid {direct_ancestor_uuid} already has component children dataset(s)") # validate that there is at least one component dataset if len(json_data_dict.get('datasets')) < 1: - bad_request_error(f"'datasets' field must contain at leawst 1 dataset.") + bad_request_error(f"'datasets' field must contain at least 1 dataset.") # Validate all datasets using existing schema with triggers and validators for dataset in json_data_dict.get('datasets'): @@ -5330,12 +5330,13 @@ def delete_cache(id): entity_dict = query_target_entity(id, get_internal_token()) entity_uuid = entity_dict['uuid'] - # If the target entity is Sample (`direct_ancestor`) or Dataset/Publication (`direct_ancestors`) - # Delete the cache of all the direct descendants (children) - child_uuids = schema_neo4j_queries.get_children(neo4j_driver_instance, entity_uuid , 'uuid') + # Delete the cache of all the descendants + descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') # If the target entity is Collection, delete the cache for each of its associated - # Datasets and Publications (via [:IN_COLLECTION] relationship) as well as just Publications (via [:USES_DATA] relationship) + # Datasets and Publications (via [:IN_COLLECTION]) as well as just Publications (via [:USES_DATA]) + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') # If the target entity is Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD] relationship) @@ -5347,7 +5348,7 @@ def delete_cache(id): upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) # We only use uuid in the cache key acorss all the cache types - uuids_list = [entity_uuid] + child_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids # It's possible no linked collection or upload if collection_dict: diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index e3819092..36b17581 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -786,6 +786,9 @@ ENTITIES: type: string indexed: true description: 'A DOI pointing to an Organ Mapping Antibody Panel relevant to this publication' + + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou associated_collection: type: json_string # dict generated: true @@ -802,6 +805,7 @@ ENTITIES: description: "The uuid of the associated collection for a given publication" after_create_trigger: link_publication_to_associated_collection after_update_trigger: link_publication_to_associated_collection + assigned_to_group_name: null # This assigned_to_group_name is Dataset specific, Publication doesn't have it ingest_task: null # This ingest_task is Dataset specific, Publication doesn't have it new_associated_multi_assay_uuid: null # Dataset-only attribute of Multi-Assay Dataset relationships diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 7fc9eeb4..3d1d9ca7 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -737,6 +737,8 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u the uuid of the associated collection """ def link_publication_to_associated_collection(neo4j_driver, entity_uuid, associated_collection_uuid): + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou try: with neo4j_driver.session() as session: tx = session.begin_transaction() @@ -1008,6 +1010,8 @@ def get_next_revision_uuids(neo4j_driver, uuid): def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None): results = [] + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou if property_key: query = (f"MATCH (e:Entity)-[:IN_COLLECTION|:USES_DATA]->(c:Collection) " f"WHERE c.uuid = '{uuid}' " @@ -1099,6 +1103,8 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None): def get_publication_associated_collection(neo4j_driver, uuid): result = {} + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou query = (f"MATCH (p:Publication)-[:USES_DATA]->(c:Collection) " f"WHERE p.uuid = '{uuid}' " f"RETURN c as {record_field_name}") @@ -1802,6 +1808,8 @@ def _delete_activity_node_and_linkages_tx(tx, uuid): The uuid to target publication """ def _delete_publication_associated_collection_linkages_tx(tx, uuid): + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou query = (f"MATCH (p:Publication)-[r:USES_DATA]->(c:Collection) " f"WHERE p.uuid = '{uuid}' " f"DELETE r") diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 4a78d36d..e1ca669f 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1658,18 +1658,19 @@ def sync_component_dataset_status(property_key, normalized_type, user_token, exi if 'status' not in existing_data_dict: raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") status = existing_data_dict['status'] - children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') - status_body = {"status": status} - for child_uuid in children_uuids_list: - creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) - if creation_action == 'Multi-Assay Split': - url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid - request_headers = { - 'Authorization': f'Bearer {user_token}' - } - request_headers[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP - request_headers[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET - response = requests.put(url=url, headers=request_headers, json=status_body) + if status.lower() != "published": + children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') + status_body = {"status": status} + for child_uuid in children_uuids_list: + creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) + if creation_action == 'Multi-Assay Split': + url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid + request_headers = { + 'Authorization': f'Bearer {user_token}' + } + request_headers[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP + request_headers[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET + response = requests.put(url=url, headers=request_headers, json=status_body) ####################################################################################################