Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 2 additions & 20 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -1464,9 +1464,8 @@ def update_entity(id):
if ('direct_ancestor_uuids' in json_data_dict) and (json_data_dict['direct_ancestor_uuids']):
has_direct_ancestor_uuids = True

# `direct_ancestor_uuids` is required for updating a Dataset.
# Verify all of the direct ancestor UUIDs exist in the Neo4j graph.
# Form an error response if an Exception is raised.
# Verify all of the provided direct ancestor UUIDs exist
# Form an error response if an Exception is raised
try:
app_neo4j_queries.uuids_all_exist(neo4j_driver=neo4j_driver_instance
, uuids=json_data_dict['direct_ancestor_uuids'])
Expand All @@ -1491,27 +1490,10 @@ def update_entity(id):
if ('dataset_uuids_to_link' in json_data_dict) and (json_data_dict['dataset_uuids_to_link']):
has_dataset_uuids_to_link = True

# Check existence of those datasets to be linked
# If one of the datasets to be linked appears to be already linked,
# neo4j query won't create the new linkage due to the use of `MERGE`
for dataset_uuid in json_data_dict['dataset_uuids_to_link']:
dataset_dict = query_target_entity(dataset_uuid, user_token)
# Also make sure it's a Dataset (or publication 2/17/23)
if dataset_dict['entity_type'] not in ['Dataset', 'Publication']:
bad_request_error(f"The uuid: {dataset_uuid} is not a Dataset or Publication, cannot be linked to this Upload")

has_dataset_uuids_to_unlink = False
if ('dataset_uuids_to_unlink' in json_data_dict) and (json_data_dict['dataset_uuids_to_unlink']):
has_dataset_uuids_to_unlink = True

# Check existence of those datasets to be unlinked
# If one of the datasets to be unlinked appears to be not linked at all,
# the neo4j cypher will simply skip it because it won't match the "MATCH" clause
# So no need to tell the end users that this dataset is not linked
# Let alone checking the entity type to ensure it's a Dataset
for dataset_uuid in json_data_dict['dataset_uuids_to_unlink']:
dataset_dict = query_target_entity(dataset_uuid, user_token)

# Generate 'before_update_trigger' data and update the entity details in Neo4j
merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict)

Expand Down
89 changes: 65 additions & 24 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ def get_dataset_revision_number(neo4j_driver, uuid):
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuid : str
dataset_uuid : str
The uuid of the target entity: Dataset
"""
def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid):
Expand All @@ -708,6 +708,17 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid):

return results


"""
Retrieve the list of uuids for samples associated with a given dataset

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
dataset_uuid : str
The uuid of the target entity: Dataset
"""
def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid):
results = []

Expand All @@ -727,6 +738,17 @@ def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid):

return results


"""
Retrieve the list of uuids for donors associated with a given dataset

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
dataset_uuid : str
The uuid of the target entity: Dataset
"""
def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid):
results = []

Expand Down Expand Up @@ -1072,14 +1094,29 @@ def get_tuplets(neo4j_driver, uuid, status, prop_key):
results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
return results

# Verify all UUIDs in a list are found as Neo4j node identifiers.
# Return True if all list entries are found, and raise an exception if one or more
# entries are not found when expected to be in the Neo4j graph.

"""
Verify all UUIDs in a list are found as Neo4j node identifiers.
Return

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuids : list
The uuids list

Returns
-------
bool
True if all list entries are found, and raise an exception if one or more
entries are not found when expected to be in the Neo4j graph.
"""
def uuids_all_exist(neo4j_driver, uuids:list):
expected_match_count = len(uuids)

record_field_name = 'match_count'
query = (f"MATCH(e: Entity) WHERE e.uuid IN {uuids} RETURN COUNT(e) AS {record_field_name}")
query = (f"MATCH (e:Entity) WHERE e.uuid IN {uuids} RETURN COUNT(e) AS {record_field_name}")

with neo4j_driver.session() as session:
record = session.read_transaction( schema_neo4j_queries.execute_readonly_tx
Expand All @@ -1096,28 +1133,32 @@ def uuids_all_exist(neo4j_driver, uuids:list):
f" exist as node identifiers in the Neo4j graph.")


"""
Get the entities from the neo4j database with the given uuids.

Parameters
----------
uuids : Union[str, Iterable]
The uuid(s) of the entities to get.
fields : Union[dict, Iterable, None], optional
The fields to return for each entity. If None, all fields are returned.
If a dict, the keys are the database fields to return and the values are the names to return them as.
If an iterable, the fields to return. Defaults to None.

Returns
-------
Optional[List[neo4j.Record]]:
The entity records with the given uuids, or None if no datasets were found.
The specified fields are returned for each entity.
Raises
------
ValueError
If fields is not a dict, an iterable, or None.
"""
def get_entities_by_uuid(neo4j_driver,
uuids: Union[str, Iterable],
fields: Union[dict, Iterable, None] = None) -> Optional[list]:
"""Get the entities from the neo4j database with the given uuids.
Parameters
----------
uuids : Union[str, Iterable]
The uuid(s) of the entities to get.
fields : Union[dict, Iterable, None], optional
The fields to return for each entity. If None, all fields are returned.
If a dict, the keys are the database fields to return and the values are the names to return them as.
If an iterable, the fields to return. Defaults to None.
Returns
-------
Optional[List[neo4j.Record]]:
The entity records with the given uuids, or None if no datasets were found.
The specified fields are returned for each entity.
Raises
------
ValueError
If fields is not a dict, an iterable, or None.
"""

if isinstance(uuids, str):
uuids = [uuids]
if not isinstance(uuids, list):
Expand Down
2 changes: 2 additions & 0 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,7 @@ ENTITIES:
type: list
before_property_update_validators:
- validate_no_duplicates_in_list
- validate_ids_exist_and_datasets
generated: true # Disallow user input from request json when being created
indexed: false
transient: true
Expand All @@ -1260,6 +1261,7 @@ ENTITIES:
type: list
before_property_update_validators:
- validate_no_duplicates_in_list
- validate_ids_exist_and_datasets
generated: true # Disallow user input from request json when being created
indexed: false
transient: true
Expand Down
33 changes: 33 additions & 0 deletions src/schema/schema_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,39 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None):
return results


"""
Get the qualified uuids-found and Dataset-given a list of uuids for validation purposes

Parameters
----------
neo4j_driver : neo4j.Driver object
The neo4j database connection pool
uuids : list
The list of uuids from user input

Returns
-------
list
A list of uuids that are found and Dataset type

"""
def get_found_dataset_uuids(neo4j_driver, uuids):
query = (
f"MATCH (e:Dataset) "
f"WHERE e.uuid IN {uuids} "
f"RETURN COLLECT(e.uuid) AS {record_field_name}")

logger.info("======get_not_found_or_not_dataset_uuids() query======")
logger.debug(query)

with neo4j_driver.session() as session:
record = session.read_transaction(execute_readonly_tx, query)

uuids_list = record[record_field_name]

return uuids_list


"""
Get count of published Dataset in the provenance hierarchy for a given Sample/Donor

Expand Down
27 changes: 27 additions & 0 deletions src/schema/schema_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,33 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request
if len(set(target_list)) != len(target_list):
raise ValueError(f"The {property_key} field must only contain unique items")


"""
Validate all the provided uuids exist and all are Datasets when updating the target Upload

Parameters
----------
property_key : str
The target property key
normalized_type : str
Submission
request: Flask request object
The instance of Flask request passed in from application request
existing_data_dict : dict
A dictionary that contains all existing entity properties
new_data_dict : dict
The json data in request body, already after the regular validations
"""
def validate_ids_exist_and_datasets(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict):
neo4j_driver_instance = schema_manager.get_neo4j_driver_instance()
all_uuids_list = new_data_dict[property_key]
qualified_uuids_list = schema_neo4j_queries.get_found_dataset_uuids(neo4j_driver_instance, all_uuids_list)
unqualified_uuids_list = [item for item in all_uuids_list if item not in qualified_uuids_list]

if unqualified_uuids_list:
raise ValueError(f"The following {len(unqualified_uuids_list)} uuids are either not found or not Dataset type: {str(unqualified_uuids_list)}.")


"""
Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be
updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset
Expand Down