diff --git a/src/app.py b/src/app.py index 9080c2a3..499fd20b 100644 --- a/src/app.py +++ b/src/app.py @@ -1464,9 +1464,8 @@ def update_entity(id): if ('direct_ancestor_uuids' in json_data_dict) and (json_data_dict['direct_ancestor_uuids']): has_direct_ancestor_uuids = True - # `direct_ancestor_uuids` is required for updating a Dataset. - # Verify all of the direct ancestor UUIDs exist in the Neo4j graph. - # Form an error response if an Exception is raised. + # Verify all of the provided direct ancestor UUIDs exist + # Form an error response if an Exception is raised try: app_neo4j_queries.uuids_all_exist(neo4j_driver=neo4j_driver_instance , uuids=json_data_dict['direct_ancestor_uuids']) @@ -1491,27 +1490,10 @@ def update_entity(id): if ('dataset_uuids_to_link' in json_data_dict) and (json_data_dict['dataset_uuids_to_link']): has_dataset_uuids_to_link = True - # Check existence of those datasets to be linked - # If one of the datasets to be linked appears to be already linked, - # neo4j query won't create the new linkage due to the use of `MERGE` - for dataset_uuid in json_data_dict['dataset_uuids_to_link']: - dataset_dict = query_target_entity(dataset_uuid, user_token) - # Also make sure it's a Dataset (or publication 2/17/23) - if dataset_dict['entity_type'] not in ['Dataset', 'Publication']: - bad_request_error(f"The uuid: {dataset_uuid} is not a Dataset or Publication, cannot be linked to this Upload") - has_dataset_uuids_to_unlink = False if ('dataset_uuids_to_unlink' in json_data_dict) and (json_data_dict['dataset_uuids_to_unlink']): has_dataset_uuids_to_unlink = True - # Check existence of those datasets to be unlinked - # If one of the datasets to be unlinked appears to be not linked at all, - # the neo4j cypher will simply skip it because it won't match the "MATCH" clause - # So no need to tell the end users that this dataset is not linked - # Let alone checking the entity type to ensure it's a Dataset - for dataset_uuid in json_data_dict['dataset_uuids_to_unlink']: - dataset_dict = query_target_entity(dataset_uuid, user_token) - # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index ac3fb441..2576307d 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -686,7 +686,7 @@ def get_dataset_revision_number(neo4j_driver, uuid): ---------- neo4j_driver : neo4j.Driver object The neo4j database connection pool -uuid : str +dataset_uuid : str The uuid of the target entity: Dataset """ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid): @@ -708,6 +708,17 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid): return results + +""" +Retrieve the list of uuids for samples associated with a given dataset + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +dataset_uuid : str + The uuid of the target entity: Dataset +""" def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid): results = [] @@ -727,6 +738,17 @@ def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid): return results + +""" +Retrieve the list of uuids for donors associated with a given dataset + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +dataset_uuid : str + The uuid of the target entity: Dataset +""" def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid): results = [] @@ -1072,14 +1094,29 @@ def get_tuplets(neo4j_driver, uuid, status, prop_key): results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) return results -# Verify all UUIDs in a list are found as Neo4j node identifiers. -# Return True if all list entries are found, and raise an exception if one or more -# entries are not found when expected to be in the Neo4j graph. + +""" +Verify all UUIDs in a list are found as Neo4j node identifiers. +Return + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuids : list + The uuids list + +Returns +------- +bool + True if all list entries are found, and raise an exception if one or more + entries are not found when expected to be in the Neo4j graph. +""" def uuids_all_exist(neo4j_driver, uuids:list): expected_match_count = len(uuids) record_field_name = 'match_count' - query = (f"MATCH(e: Entity) WHERE e.uuid IN {uuids} RETURN COUNT(e) AS {record_field_name}") + query = (f"MATCH (e:Entity) WHERE e.uuid IN {uuids} RETURN COUNT(e) AS {record_field_name}") with neo4j_driver.session() as session: record = session.read_transaction( schema_neo4j_queries.execute_readonly_tx @@ -1096,28 +1133,32 @@ def uuids_all_exist(neo4j_driver, uuids:list): f" exist as node identifiers in the Neo4j graph.") +""" +Get the entities from the neo4j database with the given uuids. + +Parameters +---------- +uuids : Union[str, Iterable] + The uuid(s) of the entities to get. +fields : Union[dict, Iterable, None], optional + The fields to return for each entity. If None, all fields are returned. + If a dict, the keys are the database fields to return and the values are the names to return them as. + If an iterable, the fields to return. Defaults to None. + +Returns +------- +Optional[List[neo4j.Record]]: + The entity records with the given uuids, or None if no datasets were found. + The specified fields are returned for each entity. +Raises +------ +ValueError + If fields is not a dict, an iterable, or None. +""" def get_entities_by_uuid(neo4j_driver, uuids: Union[str, Iterable], fields: Union[dict, Iterable, None] = None) -> Optional[list]: - """Get the entities from the neo4j database with the given uuids. - Parameters - ---------- - uuids : Union[str, Iterable] - The uuid(s) of the entities to get. - fields : Union[dict, Iterable, None], optional - The fields to return for each entity. If None, all fields are returned. - If a dict, the keys are the database fields to return and the values are the names to return them as. - If an iterable, the fields to return. Defaults to None. - Returns - ------- - Optional[List[neo4j.Record]]: - The entity records with the given uuids, or None if no datasets were found. - The specified fields are returned for each entity. - Raises - ------ - ValueError - If fields is not a dict, an iterable, or None. - """ + if isinstance(uuids, str): uuids = [uuids] if not isinstance(uuids, list): diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 3ff507e6..11f37911 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -1248,6 +1248,7 @@ ENTITIES: type: list before_property_update_validators: - validate_no_duplicates_in_list + - validate_ids_exist_and_datasets generated: true # Disallow user input from request json when being created indexed: false transient: true @@ -1260,6 +1261,7 @@ ENTITIES: type: list before_property_update_validators: - validate_no_duplicates_in_list + - validate_ids_exist_and_datasets generated: true # Disallow user input from request json when being created indexed: false transient: true diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 9d6e28d4..80b68be5 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -1408,6 +1408,39 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None): return results +""" +Get the qualified uuids-found and Dataset-given a list of uuids for validation purposes + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuids : list + The list of uuids from user input + +Returns +------- +list + A list of uuids that are found and Dataset type + +""" +def get_found_dataset_uuids(neo4j_driver, uuids): + query = ( + f"MATCH (e:Dataset) " + f"WHERE e.uuid IN {uuids} " + f"RETURN COLLECT(e.uuid) AS {record_field_name}") + + logger.info("======get_not_found_or_not_dataset_uuids() query======") + logger.debug(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(execute_readonly_tx, query) + + uuids_list = record[record_field_name] + + return uuids_list + + """ Get count of published Dataset in the provenance hierarchy for a given Sample/Donor diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 8e2d4255..c5710f57 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -148,6 +148,33 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request if len(set(target_list)) != len(target_list): raise ValueError(f"The {property_key} field must only contain unique items") + +""" +Validate all the provided uuids exist and all are Datasets when updating the target Upload + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + Submission +request: Flask request object + The instance of Flask request passed in from application request +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + The json data in request body, already after the regular validations +""" +def validate_ids_exist_and_datasets(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + neo4j_driver_instance = schema_manager.get_neo4j_driver_instance() + all_uuids_list = new_data_dict[property_key] + qualified_uuids_list = schema_neo4j_queries.get_found_dataset_uuids(neo4j_driver_instance, all_uuids_list) + unqualified_uuids_list = [item for item in all_uuids_list if item not in qualified_uuids_list] + + if unqualified_uuids_list: + raise ValueError(f"The following {len(unqualified_uuids_list)} uuids are either not found or not Dataset type: {str(unqualified_uuids_list)}.") + + """ Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset