From 8b522f6ea3c49aa644a7ebf3f8ee428f7675b21c Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 23 Sep 2025 18:09:52 -0400 Subject: [PATCH 01/17] Introduce request to trigger methods --- src/app.py | 44 ++++-- src/schema/schema_manager.py | 14 +- src/schema/schema_triggers.py | 278 ++++++++++++++++++++++++++-------- 3 files changed, 253 insertions(+), 83 deletions(-) diff --git a/src/app.py b/src/app.py index 499fd20b..2374fcbb 100644 --- a/src/app.py +++ b/src/app.py @@ -1196,7 +1196,7 @@ def create_entity(entity_type): # If the preceding "additional validations" did not raise an error, # generate 'before_create_trigger' data and create the entity details in Neo4j - merged_dict = create_entity_details(request, normalized_entity_type, user_token, json_data_dict) + merged_dict = create_entity_details(request, normalized_entity_type, request, user_token, json_data_dict) # For Donor: link to parent Lab node # For Sample: link to existing direct ancestor @@ -1451,7 +1451,7 @@ def update_entity(id): bad_request_error(f"The uuid: {direct_ancestor_uuid} is not a Donor neither a Sample, cannot be used as the direct ancestor of this Sample") # Generate 'before_update_triiger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuid: @@ -1480,7 +1480,7 @@ def update_entity(id): associated_collection_dict = query_target_entity(json_data_dict['associated_collection_uuid'], user_token) # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuids or has_associated_collection_uuid or has_updated_status: @@ -1495,20 +1495,20 @@ def update_entity(id): has_dataset_uuids_to_unlink = True # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status: after_update(normalized_entity_type, user_token, merged_updated_dict) elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'): # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods after_update(normalized_entity_type, user_token, merged_updated_dict) else: # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) # Remove the cached entities if Memcached is being used # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped) @@ -3081,7 +3081,7 @@ def retract_dataset(id): bad_request_error(e) # No need to call after_update() afterwards because retraction doesn't call any after_update_trigger methods - merged_updated_dict = update_entity_details(request, normalized_entity_type, token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, request, token, json_data_dict, entity_dict) complete_dict = schema_manager.get_complete_entity_result(token, merged_updated_dict) @@ -4598,6 +4598,8 @@ def _suppress_reindex() -> bool: The incoming request normalized_entity_type : str One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus groups token json_data_dict: dict @@ -4608,7 +4610,7 @@ def _suppress_reindex() -> bool: dict A dict of all the newly created entity detials """ -def create_entity_details(request, normalized_entity_type, user_token, json_data_dict): +def create_entity_details(request, normalized_entity_type, request, user_token, json_data_dict): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) @@ -4659,6 +4661,7 @@ def create_entity_details(request, normalized_entity_type, user_token, json_data # Use {} since no existing dict generated_before_create_trigger_data_dict = schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.BEFORE_CREATE , normalized_class=normalized_entity_type + , request=request , user_token=user_token , existing_data_dict={} , new_data_dict=new_data_dict) @@ -4747,6 +4750,8 @@ def create_entity_details(request, normalized_entity_type, user_token, json_data The incoming request normalized_entity_type : str Must be "Sample" in this case +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus groups token json_data_dict: dict @@ -4759,7 +4764,7 @@ def create_entity_details(request, normalized_entity_type, user_token, json_data list A list of all the newly generated ids via uuid-api """ -def create_multiple_samples_details(request, normalized_entity_type, user_token, json_data_dict, count): +def create_multiple_samples_details(request, normalized_entity_type, request, user_token, json_data_dict, count): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) @@ -4817,6 +4822,7 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, # Use {} since no existing dict generated_before_create_trigger_data_dict = schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.BEFORE_CREATE , normalized_class=normalized_entity_type + , request=request , user_token=user_token , existing_data_dict={} , new_data_dict=new_data_dict) @@ -4897,6 +4903,8 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, The incoming request normalized_entity_type : str Must be "Dataset" in this case +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus groups token json_data_dict_list: list @@ -4909,7 +4917,7 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, list A list of all the newly created datasets with generated fields represented as dictionaries """ -def create_multiple_component_details(request, normalized_entity_type, user_token, json_data_dict_list, creation_action): +def create_multiple_component_details(request, normalized_entity_type, request, user_token, json_data_dict_list, creation_action): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) direct_ancestor = json_data_dict_list[0].get('direct_ancestor_uuids')[0] @@ -4946,6 +4954,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke # Use {} since no existing dict generated_before_create_trigger_data_dict = schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.BEFORE_CREATE , normalized_class=normalized_entity_type + , request=request , user_token=user_token , existing_data_dict={} , new_data_dict=new_data_dict) @@ -5013,19 +5022,22 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke ---------- normalized_entity_type : str One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus groups token merged_data_dict: dict The merged dict that contains the entity dict newly created and information from user request json that are not stored in Neo4j """ -def after_create(normalized_entity_type, user_token, merged_data_dict): +def after_create(normalized_entity_type, request, user_token, merged_data_dict): try: # 'after_create_trigger' and 'after_update_trigger' don't generate property values # It just returns the empty dict, no need to assign value # Use {} since no new dict schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.AFTER_CREATE , normalized_class=normalized_entity_type + , request=request , user_token=user_token , existing_data_dict=merged_data_dict , new_data_dict={}) @@ -5048,6 +5060,8 @@ def after_create(normalized_entity_type, user_token, merged_data_dict): The incoming request normalized_entity_type : str One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus groups token json_data_dict: dict @@ -5060,7 +5074,7 @@ def after_create(normalized_entity_type, user_token, merged_data_dict): dict A dict of all the updated entity detials """ -def update_entity_details(request, normalized_entity_type, user_token, json_data_dict, existing_entity_dict): +def update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, existing_entity_dict): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) @@ -5070,6 +5084,7 @@ def update_entity_details(request, normalized_entity_type, user_token, json_data try: generated_before_update_trigger_data_dict = schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.BEFORE_UPDATE , normalized_class=normalized_entity_type + , request=request , user_token=user_token , existing_data_dict=existing_entity_dict , new_data_dict=new_data_dict) @@ -5132,18 +5147,21 @@ def update_entity_details(request, normalized_entity_type, user_token, json_data ---------- normalized_entity_type : str One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus groups token entity_dict: dict The entity dict newly updated """ -def after_update(normalized_entity_type, user_token, entity_dict): +def after_update(normalized_entity_type, request, user_token, entity_dict): try: # 'after_create_trigger' and 'after_update_trigger' don't generate property values # It just returns the empty dict, no need to assign value # Use {} sicne no new dict schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.AFTER_UPDATE , normalized_class=normalized_entity_type + , request=request , user_token=user_token , existing_data_dict=entity_dict , new_data_dict={}) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index faa583d2..951c66b3 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -358,6 +358,8 @@ def delete_nested_field(data, nested_path): One of the trigger types: on_create_trigger, on_update_trigger, on_read_trigger normalized_class : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token, 'on_read_trigger' doesn't really need this existing_data_dict : dict @@ -372,7 +374,7 @@ def delete_nested_field(data, nested_path): dict A dictionary of trigger event methods generated data """ -def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, user_token, existing_data_dict +def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, request, user_token, existing_data_dict , new_data_dict, properties_to_skip = []): global _schema @@ -422,7 +424,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, use # because the property value is already set and stored in neo4j # Normally it's building linkages between entity nodes # Use {} since no incoming new_data_dict - trigger_method_to_call(key, normalized_class, user_token, existing_data_dict, {}) + trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, {}) except Exception: msg = f"Failed to call the {trigger_type.value} method: {trigger_method_name}" # Log the full stack trace, prepend a line with our message @@ -456,9 +458,9 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, use #within this dictionary and return it so it can be saved in the scope of this loop and #passed to other 'updated_peripherally' triggers if 'updated_peripherally' in properties[key] and properties[key]['updated_peripherally']: - trigger_generated_data_dict = trigger_method_to_call(key, normalized_class, user_token, existing_data_dict, new_data_dict, trigger_generated_data_dict) + trigger_generated_data_dict = trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict, trigger_generated_data_dict) else: - target_key, target_value = trigger_method_to_call(key, normalized_class, user_token, existing_data_dict, new_data_dict) + target_key, target_value = trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict) trigger_generated_data_dict[target_key] = target_value # Meanwhile, set the original property as None if target_key is different @@ -503,9 +505,9 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, use # within this dictionary and return it so it can be saved in the scope of this loop and # passed to other 'updated_peripherally' triggers if 'updated_peripherally' in properties[key] and properties[key]['updated_peripherally']: - trigger_generated_data_dict = trigger_method_to_call(key, normalized_class, user_token, existing_data_dict, new_data_dict, trigger_generated_data_dict) + trigger_generated_data_dict = trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict, trigger_generated_data_dict) else: - target_key, target_value = trigger_method_to_call(key, normalized_class, user_token, existing_data_dict, new_data_dict) + target_key, target_value = trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict) trigger_generated_data_dict[target_key] = target_value # Meanwhile, set the original property as None if target_key is different diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index c5d83de1..b96febcf 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -34,6 +34,8 @@ The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -46,12 +48,13 @@ str: The target property key str: The neo4j TIMESTAMP() function as string """ -def set_timestamp(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_timestamp(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): # Use the neo4j TIMESTAMP() function during entity creation # Will be proessed in app_neo4j_queries._build_properties_map() # and schema_neo4j_queries._build_properties_map() return property_key, 'TIMESTAMP()' + """ Trigger event method of setting the entity type of a given entity @@ -61,6 +64,8 @@ def set_timestamp(property_key, normalized_type, user_token, existing_data_dict, The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -73,7 +78,7 @@ def set_timestamp(property_key, normalized_type, user_token, existing_data_dict, str: The target property key str: The string of normalized entity type """ -def set_entity_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_entity_type(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): return property_key, normalized_type @@ -86,6 +91,8 @@ def set_entity_type(property_key, normalized_type, user_token, existing_data_dic The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -98,12 +105,13 @@ def set_entity_type(property_key, normalized_type, user_token, existing_data_dic str: The target property key str: The 'sub' string """ -def set_user_sub(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_user_sub(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'sub' not in new_data_dict: raise KeyError("Missing 'sub' key in 'new_data_dict' during calling 'set_user_sub()' trigger method.") return property_key, new_data_dict['sub'] + """ Trigger event method of getting user email @@ -113,6 +121,8 @@ def set_user_sub(property_key, normalized_type, user_token, existing_data_dict, The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -125,12 +135,13 @@ def set_user_sub(property_key, normalized_type, user_token, existing_data_dict, str: The target property key str: The 'email' string """ -def set_user_email(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_user_email(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'email' not in new_data_dict: raise KeyError("Missing 'email' key in 'new_data_dict' during calling 'set_user_email()' trigger method.") return property_key, new_data_dict['email'] + """ Trigger event method of getting user name @@ -140,6 +151,8 @@ def set_user_email(property_key, normalized_type, user_token, existing_data_dict The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -152,12 +165,13 @@ def set_user_email(property_key, normalized_type, user_token, existing_data_dict str: The target property key str: The 'name' string """ -def set_user_displayname(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_user_displayname(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'name' not in new_data_dict: raise KeyError("Missing 'name' key in 'new_data_dict' during calling 'set_user_displayname()' trigger method.") return property_key, new_data_dict['name'] + """ Trigger event method of getting uuid, hubmap_id for a new entity to be created @@ -167,6 +181,8 @@ def set_user_displayname(property_key, normalized_type, user_token, existing_dat The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -179,12 +195,13 @@ def set_user_displayname(property_key, normalized_type, user_token, existing_dat str: The target property key str: The uuid created via uuid-api """ -def set_uuid(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_uuid(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in new_data_dict: raise KeyError("Missing 'uuid' key in 'new_data_dict' during calling 'set_uuid()' trigger method.") return property_key, new_data_dict['uuid'] + """ Trigger event method of getting uuid, hubmap_id for a new entity to be created @@ -194,6 +211,8 @@ def set_uuid(property_key, normalized_type, user_token, existing_data_dict, new_ The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -206,7 +225,7 @@ def set_uuid(property_key, normalized_type, user_token, existing_data_dict, new_ str: The target property key str: The hubmap_id created via uuid-api """ -def set_hubmap_id(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_hubmap_id(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'hubmap_id' not in new_data_dict: raise KeyError("Missing 'hubmap_id' key in 'new_data_dict' during calling 'set_hubmap_id()' trigger method.") @@ -227,6 +246,8 @@ def set_hubmap_id(property_key, normalized_type, user_token, existing_data_dict, The target property key of the value to be generated normalized_type : str One of the entity types defined in the schema yaml: Donor, Sample, Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -239,7 +260,7 @@ def set_hubmap_id(property_key, normalized_type, user_token, existing_data_dict, str: The target property key str: The data access level string """ -def set_data_access_level(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_data_access_level(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in new_data_dict: raise KeyError("Missing 'uuid' key in 'new_data_dict' during calling 'set_data_access_level()' trigger method.") @@ -282,6 +303,8 @@ def set_data_access_level(property_key, normalized_type, user_token, existing_da The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Donor, Sample, Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -294,7 +317,7 @@ def set_data_access_level(property_key, normalized_type, user_token, existing_da str: The target property key str: The group uuid """ -def set_group_uuid(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_group_uuid(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): group_uuid = None # Look for membership in a single "data provider" group and sets to that. @@ -332,6 +355,7 @@ def set_group_uuid(property_key, normalized_type, user_token, existing_data_dict return property_key, group_uuid + """ Trigger event method of setting the group_name @@ -341,6 +365,8 @@ def set_group_uuid(property_key, normalized_type, user_token, existing_data_dict The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Donor, Sample, Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -353,7 +379,7 @@ def set_group_uuid(property_key, normalized_type, user_token, existing_data_dict str: The target property key str: The group name """ -def set_group_name(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_group_name(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): group_name = None # If `group_uuid` is not already set, looks for membership in a single "data provider" group and sets to that. @@ -392,6 +418,8 @@ def set_group_name(property_key, normalized_type, user_token, existing_data_dict The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Donor, Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -404,7 +432,7 @@ def set_group_name(property_key, normalized_type, user_token, existing_data_dict str: The target property key str: The submission_id """ -def set_submission_id(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_submission_id(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'submission_id' not in new_data_dict: raise KeyError("Missing 'submission_id' key in 'new_data_dict' during calling 'set_submission_id()' trigger method.") @@ -442,6 +470,8 @@ def set_submission_id(property_key, normalized_type, user_token, existing_data_d The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Donor, Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -454,8 +484,8 @@ def set_submission_id(property_key, normalized_type, user_token, existing_data_d str: The target property key list: The file info dicts in a list """ -def commit_image_files(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): - return _commit_files('image_files', property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict) +def commit_image_files(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): + return _commit_files('image_files', property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict) """ @@ -473,6 +503,8 @@ def commit_image_files(property_key, normalized_type, user_token, existing_data_ The target property key normalized_type : str One of the types defined in the schema yaml: Donor, Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -489,8 +521,8 @@ def commit_image_files(property_key, normalized_type, user_token, existing_data_ str: The target property key list: The file info dicts in a list """ -def delete_image_files(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): - return _delete_files('image_files', property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict) +def delete_image_files(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): + return _delete_files('image_files', property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict) """ @@ -502,6 +534,8 @@ def delete_image_files(property_key, normalized_type, user_token, existing_data_ The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Donor, Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -514,7 +548,7 @@ def delete_image_files(property_key, normalized_type, user_token, existing_data_ str: The target property key list: The file info dicts (with updated descriptions) in a list """ -def update_file_descriptions(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): +def update_file_descriptions(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): if property_key not in new_data_dict: raise KeyError(f"Missing '{property_key}' key in 'new_data_dict' during calling 'update_file_descriptions()' trigger method.") @@ -562,7 +596,25 @@ def update_file_descriptions(property_key, normalized_type, user_token, existing ## Trigger methods shared by Dataset, Upload, and Publication - DO NOT RENAME #################################################################################################### -def set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +""" +Trigger event method of tracking status change events + +Parameters +---------- +property_key : str + The target property key of the value to be generated +normalized_type : str + One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used +""" +def set_status_history(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): new_status_history = [] status_entry = {} @@ -592,7 +644,6 @@ def set_status_history(property_key, normalized_type, user_token, existing_data_ - #################################################################################################### ## Trigger methods specific to Collection - DO NOT RENAME #################################################################################################### @@ -606,6 +657,8 @@ def set_status_history(property_key, normalized_type, user_token, existing_data_ The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Collection +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -618,7 +671,7 @@ def set_status_history(property_key, normalized_type, user_token, existing_data_ str: The target property key list: A list of associated dataset dicts with all the normalized information """ -def get_collection_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_collection_datasets(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_collection_datasets()' trigger method.") @@ -644,6 +697,8 @@ def get_collection_datasets(property_key, normalized_type, user_token, existing_ The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -656,10 +711,11 @@ def get_collection_datasets(property_key, normalized_type, user_token, existing_ str: The target property key str: Initial status of "New" """ -def set_dataset_status_new(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_dataset_status_new(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): # Always 'New' on dataset creation return property_key, 'New' + """ Trigger event method of getting a list of collections for this new Dataset @@ -669,6 +725,8 @@ def set_dataset_status_new(property_key, normalized_type, user_token, existing_d The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -681,7 +739,7 @@ def set_dataset_status_new(property_key, normalized_type, user_token, existing_d str: The target property key list: A list of associated collections with all the normalized information """ -def get_dataset_collections(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_dataset_collections(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_dataset_collections()' trigger method.") @@ -693,6 +751,7 @@ def get_dataset_collections(property_key, normalized_type, user_token, existing_ # as well as the ones defined as `exposed: false` in the yaml schema return property_key, schema_manager.normalize_entities_list_for_response(collections_list) + """ Trigger event method of getting the associated collection for this publication @@ -702,6 +761,8 @@ def get_dataset_collections(property_key, normalized_type, user_token, existing_ The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -714,7 +775,7 @@ def get_dataset_collections(property_key, normalized_type, user_token, existing_ str: The target property key dict: A dictionary representation of the associated collection with all the normalized information """ -def get_publication_associated_collection(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_publication_associated_collection(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_publication_associated_collection()' trigger method.") @@ -726,6 +787,7 @@ def get_publication_associated_collection(property_key, normalized_type, user_to # as well as the ones defined as `exposed: false` in the yaml schema return property_key, schema_manager.normalize_entity_result_for_response(collection_dict) + """ Trigger event method of getting the associated Upload for this Dataset @@ -735,6 +797,8 @@ def get_publication_associated_collection(property_key, normalized_type, user_to The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -747,7 +811,7 @@ def get_publication_associated_collection(property_key, normalized_type, user_to str: The target property key dict: A dict of associated Upload detail with all the normalized information """ -def get_dataset_upload(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_dataset_upload(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): return_dict = None if 'uuid' not in existing_data_dict: @@ -771,6 +835,8 @@ def get_dataset_upload(property_key, normalized_type, user_token, existing_data_ The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -778,7 +844,7 @@ def get_dataset_upload(property_key, normalized_type, user_token, existing_data_ new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_dataset_to_direct_ancestors(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_dataset_to_direct_ancestors(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") @@ -802,6 +868,7 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, user_token, # No need to log raise + """ Trigger event method for creating or recreating linkages between this new Collection and the Datasets it contains @@ -811,6 +878,8 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, user_token, The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -818,7 +887,7 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, user_token, new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_collection_to_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_collection_to_datasets(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_collection_to_datasets()' trigger method.") @@ -842,6 +911,7 @@ def link_collection_to_datasets(property_key, normalized_type, user_token, exist # No need to log raise + """ Trigger event method of getting a list of direct ancestors for a given dataset or publication @@ -851,6 +921,8 @@ def link_collection_to_datasets(property_key, normalized_type, user_token, exist The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Dataset/Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -863,7 +935,7 @@ def link_collection_to_datasets(property_key, normalized_type, user_token, exist str: The target property key list: A list of associated direct ancestors with all the normalized information """ -def get_dataset_direct_ancestors(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_dataset_direct_ancestors(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_dataset_direct_ancestors()' trigger method.") @@ -885,6 +957,8 @@ def get_dataset_direct_ancestors(property_key, normalized_type, user_token, exis The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -897,7 +971,7 @@ def get_dataset_direct_ancestors(property_key, normalized_type, user_token, exis str: The target property key str: The relative directory path """ -def get_local_directory_rel_path(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_local_directory_rel_path(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_local_directory_rel_path()' trigger method.") @@ -934,6 +1008,8 @@ def get_local_directory_rel_path(property_key, normalized_type, user_token, exis The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -941,7 +1017,7 @@ def get_local_directory_rel_path(property_key, normalized_type, user_token, exis new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_to_previous_revision(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_to_previous_revision(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): try: if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") @@ -973,6 +1049,7 @@ def link_to_previous_revision(property_key, normalized_type, user_token, existin except Exception as e: raise KeyError(e) + """ Given a string which contains multiple items, each separated by the substring specified by the 'separator' argument, and possibly also ending with 'separator', @@ -1009,6 +1086,7 @@ def _make_phrase_from_separator_delineated_str(separated_phrase:str, separator:s descriptions = separated_phrase.rsplit(separator) return new_separator.join(descriptions) + """ Given a string of metadata for a Donor which was returned from Neo4j, and a list of desired attribute names to extract from that metadata, return a dictionary containing lower-case version of each attribute found. @@ -1052,6 +1130,7 @@ def _get_attributes_from_donor_metadata(neo4j_donor_metadata: str, attribute_key pass return donor_grouping_concepts_dict + """ Given a age, race, and sex metadata for a Donor which was returned from Neo4j, generate an appropriate and consistent string phrase. @@ -1087,6 +1166,7 @@ def _get_age_age_units_race_sex_phrase(age:str=None, age_units:str='units', race else: return f"{age}-{age_units}-old {race} {sex}" + """ Trigger event method of auto generating the dataset title @@ -1096,6 +1176,8 @@ def _get_age_age_units_race_sex_phrase(age:str=None, age_units:str='units', race The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1108,7 +1190,7 @@ def _get_age_age_units_race_sex_phrase(age:str=None, age_units:str='units', race str: The target property key str: The generated dataset title """ -def get_dataset_title(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_dataset_title(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): MAX_ENTITY_LIST_LENGTH = 5 @@ -1285,6 +1367,8 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1297,7 +1381,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d str: The target property key str: The uuid string of previous revision entity or None if not found """ -def get_previous_revision_uuid(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_previous_revision_uuid(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_previous_revision_uuid()' trigger method.") @@ -1317,6 +1401,8 @@ def get_previous_revision_uuid(property_key, normalized_type, user_token, existi The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1329,9 +1415,7 @@ def get_previous_revision_uuid(property_key, normalized_type, user_token, existi str: The target property key str: A list of the uuid strings of previous revision entity or an empty list if not found """ - - -def get_previous_revision_uuids(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_previous_revision_uuids(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError( "Missing 'uuid' key in 'existing_data_dict' during calling 'get_previous_revision_uuid()' trigger method.") @@ -1353,6 +1437,8 @@ def get_previous_revision_uuids(property_key, normalized_type, user_token, exist The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1365,7 +1451,7 @@ def get_previous_revision_uuids(property_key, normalized_type, user_token, exist str: The target property key str: The uuid string of next version entity or None if not found """ -def get_next_revision_uuid(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_next_revision_uuid(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_next_revision_uuid()' trigger method.") @@ -1376,7 +1462,30 @@ def get_next_revision_uuid(property_key, normalized_type, user_token, existing_d return property_key, next_revision_uuid -def get_creation_action_activity(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +""" +Trigger event method of generating `creation_action` + +Parameters +---------- +property_key : str + The target property key of the value to be generated +normalized_type : str + One of the types defined in the schema yaml: Activity, Collection, Donor, Sample, Dataset, Upload, Publication +request: Flask request object + The instance of Flask request passed in from application request +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used + +Returns +------- +str: The target property key +str: The `creation_action` as string +""" +def get_creation_action_activity(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_creation_action_activity()' trigger method.") @@ -1399,6 +1508,8 @@ def get_creation_action_activity(property_key, normalized_type, user_token, exis The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1411,9 +1522,7 @@ def get_creation_action_activity(property_key, normalized_type, user_token, exis str: The target property key str: The list of uuid strings of next version entity or empty string if not found """ - - -def get_next_revision_uuids(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_next_revision_uuids(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError( "Missing 'uuid' key in 'existing_data_dict' during calling 'get_next_revision_uuid()' trigger method.") @@ -1443,6 +1552,8 @@ def get_next_revision_uuids(property_key, normalized_type, user_token, existing_ The property key for which the original trigger method is defined normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1456,7 +1567,7 @@ def get_next_revision_uuids(property_key, normalized_type, user_token, existing_ ------- dict: The updated generated dict """ -def commit_thumbnail_file(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): +def commit_thumbnail_file(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): # The name of the property where the file information is stored target_property_key = 'thumbnail_file' @@ -1515,7 +1626,6 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da raise - """ Trigger event method for removing the thumbnail file from a dataset during update @@ -1528,6 +1638,8 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da The property key for which the original trigger method is defined normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1541,7 +1653,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da ------- dict: The updated generated dict """ -def delete_thumbnail_file(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): +def delete_thumbnail_file(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): # The name of the property where the file information is stored target_property_key = 'thumbnail_file' @@ -1619,6 +1731,8 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1626,13 +1740,12 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da new_data_dict : dict A merged dictionary that contains all possible input data to be used """ - -def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def update_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): # execute set_status_history - set_status_history(property_key, normalized_type, user_token, existing_data_dict, new_data_dict) + set_status_history(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) #execute sync_component_dataset_status - sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict) + sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) """ @@ -1644,6 +1757,8 @@ def update_status(property_key, normalized_type, user_token, existing_data_dict, The target property key normalized_type : str One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1651,7 +1766,7 @@ def update_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def sync_component_dataset_status(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") uuid = existing_data_dict['uuid'] @@ -1687,6 +1802,8 @@ def sync_component_dataset_status(property_key, normalized_type, user_token, exi The target property key normalized_type : str One of the types defined in the schema yaml: Donor +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1694,7 +1811,7 @@ def sync_component_dataset_status(property_key, normalized_type, user_token, exi new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_donor_to_lab(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_donor_to_lab(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_donor_to_lab()' trigger method.") @@ -1755,6 +1872,8 @@ def link_donor_to_lab(property_key, normalized_type, user_token, existing_data_d The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1767,8 +1886,8 @@ def link_donor_to_lab(property_key, normalized_type, user_token, existing_data_d str: The target property key list: The file info dicts in a list """ -def commit_metadata_files(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): - return _commit_files('metadata_files', property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict) +def commit_metadata_files(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): + return _commit_files('metadata_files', property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict) """ @@ -1786,6 +1905,8 @@ def commit_metadata_files(property_key, normalized_type, user_token, existing_da The target property key normalized_type : str One of the types defined in the schema yaml: Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1802,8 +1923,8 @@ def commit_metadata_files(property_key, normalized_type, user_token, existing_da str: The target property key list: The file info dicts in a list """ -def delete_metadata_files(property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): - return _delete_files('metadata_files', property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict) +def delete_metadata_files(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): + return _delete_files('metadata_files', property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict) """ @@ -1815,6 +1936,8 @@ def delete_metadata_files(property_key, normalized_type, user_token, existing_da The target property key normalized_type : str One of the types defined in the schema yaml: Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1822,7 +1945,7 @@ def delete_metadata_files(property_key, normalized_type, user_token, existing_da new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_sample_to_direct_ancestor(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") @@ -1859,6 +1982,8 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, user_token, ex The target property key normalized_type : str One of the types defined in the schema yaml: Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1866,7 +1991,7 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, user_token, ex new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_publication_to_associated_collection(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_publication_to_associated_collection(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_publication_to_associated_collection()' trigger method.") @@ -1887,6 +2012,7 @@ def link_publication_to_associated_collection(property_key, normalized_type, use # No need to log raise + """ Trigger event method of getting the parent of a Sample, which is a Donor @@ -1896,6 +2022,8 @@ def link_publication_to_associated_collection(property_key, normalized_type, use The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1908,7 +2036,7 @@ def link_publication_to_associated_collection(property_key, normalized_type, use str: The target property key dict: The direct ancestor entity (either another Sample or a Donor) with all the normalized information """ -def get_sample_direct_ancestor(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_sample_direct_ancestor(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_sample_direct_ancestor()' trigger method.") @@ -1935,6 +2063,8 @@ def get_sample_direct_ancestor(property_key, normalized_type, user_token, existi The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1946,12 +2076,13 @@ def get_sample_direct_ancestor(property_key, normalized_type, user_token, existi ------- str: The date part YYYY-MM-DD of ISO 8601 """ -def set_publication_date(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_publication_date(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): # We only store the date part 'YYYY-MM-DD', base on the ISO 8601 format, it's fine if the user entered the time part date_obj = datetime.fromisoformat(new_data_dict[property_key]) return property_key, date_obj.date().isoformat() + """ Trigger event method setting the dataset_type immutable property for a Publication. @@ -1961,6 +2092,8 @@ def set_publication_date(property_key, normalized_type, user_token, existing_dat The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Publication +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -1973,7 +2106,7 @@ def set_publication_date(property_key, normalized_type, user_token, existing_dat str: The target property key str: Immutable dataset_type of "Publication" """ -def set_publication_dataset_type(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_publication_dataset_type(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): # Count upon the dataset_type generated: true property in provenance_schema.yaml to assure the # request does not contain a value which will be overwritten. return property_key, 'Publication' @@ -1991,6 +2124,8 @@ def set_publication_dataset_type(property_key, normalized_type, user_token, exis The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Upload +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2003,7 +2138,7 @@ def set_publication_dataset_type(property_key, normalized_type, user_token, exis str: The target property key str: The "New" status """ -def set_upload_status_new(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_upload_status_new(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): return property_key, 'New' @@ -2015,6 +2150,8 @@ def set_upload_status_new(property_key, normalized_type, user_token, existing_da The target property key normalized_type : str One of the types defined in the schema yaml: Upload +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2022,7 +2159,7 @@ def set_upload_status_new(property_key, normalized_type, user_token, existing_da new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_upload_to_lab(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_upload_to_lab(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_upload_to_lab()' trigger method.") @@ -2046,6 +2183,7 @@ def link_upload_to_lab(property_key, normalized_type, user_token, existing_data_ # No need to log raise + """ Trigger event method of building linkages between this Submission and the given datasets @@ -2055,6 +2193,8 @@ def link_upload_to_lab(property_key, normalized_type, user_token, existing_data_ The target property key normalized_type : str One of the types defined in the schema yaml: Upload +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2062,7 +2202,7 @@ def link_upload_to_lab(property_key, normalized_type, user_token, existing_data_ new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def link_datasets_to_upload(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def link_datasets_to_upload(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_datasets_to_upload()' trigger method.") @@ -2094,6 +2234,8 @@ def link_datasets_to_upload(property_key, normalized_type, user_token, existing_ The target property key normalized_type : str One of the types defined in the schema yaml: Upload +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2101,7 +2243,7 @@ def link_datasets_to_upload(property_key, normalized_type, user_token, existing_ new_data_dict : dict A merged dictionary that contains all possible input data to be used """ -def unlink_datasets_from_upload(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def unlink_datasets_from_upload(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'unlink_datasets_from_upload()' trigger method.") @@ -2133,6 +2275,8 @@ def unlink_datasets_from_upload(property_key, normalized_type, user_token, exist The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Upload +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2144,7 +2288,7 @@ def unlink_datasets_from_upload(property_key, normalized_type, user_token, exist str: The target property key list: A list of associated dataset dicts with all the normalized information """ -def get_upload_datasets(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def get_upload_datasets(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'get_upload_datasets()' trigger method.") @@ -2177,6 +2321,8 @@ def get_upload_datasets(property_key, normalized_type, user_token, existing_data The target property key of the value to be generated normalized_type : str One of the types defined in the schema yaml: Activity +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2189,7 +2335,7 @@ def get_upload_datasets(property_key, normalized_type, user_token, existing_data str: The target property key str: The creation_action string """ -def set_activity_creation_action(property_key, normalized_type, user_token, existing_data_dict, new_data_dict): +def set_activity_creation_action(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): if 'normalized_entity_type' not in new_data_dict: raise KeyError("Missing 'normalized_entity_type' key in 'existing_data_dict' during calling 'set_activity_creation_action()' trigger method.") if new_data_dict and new_data_dict.get('creation_action'): @@ -2234,6 +2380,8 @@ def set_activity_creation_action(property_key, normalized_type, user_token, exis The property key for which the original trigger method is defined normalized_type : str One of the types defined in the schema yaml: Donor, Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2247,7 +2395,7 @@ def set_activity_creation_action(property_key, normalized_type, user_token, exis ------- dict: The updated generated dict """ -def _commit_files(target_property_key, property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): +def _commit_files(target_property_key, property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): # Do nothing if no files to add are provided (missing or empty property) # For image files the property name is "image_files_to_add" # For metadata files the property name is "metadata_files_to_add" @@ -2347,6 +2495,8 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token The property key for which the original trigger method is defined normalized_type : str One of the types defined in the schema yaml: Donor, Sample +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token existing_data_dict : dict @@ -2360,7 +2510,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token ------- dict: The updated generated dict """ -def _delete_files(target_property_key, property_key, normalized_type, user_token, existing_data_dict, new_data_dict, generated_dict): +def _delete_files(target_property_key, property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict, generated_dict): #do nothing if no files to delete are provided in the field specified by property_key if (not property_key in new_data_dict) or (not new_data_dict[property_key]): return generated_dict From f0a687c1f903b861b44da1876b036c6b6b4cbd6d Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 23 Sep 2025 18:39:01 -0400 Subject: [PATCH 02/17] Check reindex query string --- src/app.py | 118 +++++----- src/schema/schema_manager.py | 412 ++++++++++++++++++---------------- src/schema/schema_triggers.py | 91 ++++---- 3 files changed, 320 insertions(+), 301 deletions(-) diff --git a/src/app.py b/src/app.py index 2374fcbb..115a60dc 100644 --- a/src/app.py +++ b/src/app.py @@ -508,7 +508,7 @@ def get_ancestor_organs(id): # Skip executing the trigger method to get Sample.direct_ancestor properties_to_skip = ['direct_ancestor'] - complete_entities_list = schema_manager.get_complete_entities_list(token, organs, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, organs, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -613,7 +613,7 @@ def get_entity_visibility(id): # Otherwise re-generate on the fly. To verify if a Collection is public, it is # necessary to have its Datasets, which are populated as triggered data, so # pull back the complete entity - complete_dict = schema_manager.get_complete_entity_result(token, entity_dict) + complete_dict = schema_manager.get_complete_entity_result(request, token, entity_dict) # Determine if the entity is publicly visible base on its data, only. entity_scope = _get_entity_visibility(normalized_entity_type=normalized_entity_type, entity_dict=complete_dict) @@ -673,7 +673,8 @@ def get_provenance_metadata_by_id_for_auth_level(id): # Get the generated complete entity result from cache if exists # Otherwise re-generate on the fly - complete_dict = schema_manager.get_complete_entity_result(token=token + complete_dict = schema_manager.get_complete_entity_result(request=request + , token=token , entity_dict=dataset_dict) # Determine if the entity is publicly visible base on its data, only. @@ -777,7 +778,7 @@ def get_entity_by_id(id): # Get the generated complete entity result from cache if exists # Otherwise re-generate on the fly - complete_dict = schema_manager.get_complete_entity_result(token, entity_dict) + complete_dict = schema_manager.get_complete_entity_result(request, token, entity_dict) # Determine if the entity is publicly visible base on its data, only. # To verify if a Collection is public, it is necessary to have its Datasets, which @@ -1038,7 +1039,7 @@ def get_entities_by_type(entity_type): # Get back a list of entity dicts for the given entity type entities_list = app_neo4j_queries.get_entities_by_type(neo4j_driver_instance, normalized_entity_type) - complete_entities_list = schema_manager.get_complete_entities_list(token, entities_list, generated_properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, entities_list, generated_properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -1119,7 +1120,7 @@ def create_entity(entity_type): # # Check if re-indexing is to be suppressed after entity creation. try: - supress_reindex = _suppress_reindex() + supress_reindex = schema_manager.suppress_reindex(request) except Exception as e: bad_request_error(e) @@ -1196,7 +1197,7 @@ def create_entity(entity_type): # If the preceding "additional validations" did not raise an error, # generate 'before_create_trigger' data and create the entity details in Neo4j - merged_dict = create_entity_details(request, normalized_entity_type, request, user_token, json_data_dict) + merged_dict = create_entity_details(request, normalized_entity_type, user_token, json_data_dict) # For Donor: link to parent Lab node # For Sample: link to existing direct ancestor @@ -1239,7 +1240,7 @@ def create_entity(entity_type): properties_to_skip = [] # Generate the filtered or complete entity dict to send back - complete_dict = schema_manager.get_complete_entity_result(user_token, merged_dict, properties_to_skip) + complete_dict = schema_manager.get_complete_entity_result(request, user_token, merged_dict, properties_to_skip) # Will also filter the result based on schema normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) @@ -1428,7 +1429,7 @@ def update_entity(id): # # Check if re-indexing is to be suppressed after entity creation. try: - suppress_reindex = _suppress_reindex() + suppress_reindex = schema_manager.suppress_reindex(request) except Exception as e: bad_request_error(e) @@ -1451,7 +1452,7 @@ def update_entity(id): bad_request_error(f"The uuid: {direct_ancestor_uuid} is not a Donor neither a Sample, cannot be used as the direct ancestor of this Sample") # Generate 'before_update_triiger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuid: @@ -1480,7 +1481,7 @@ def update_entity(id): associated_collection_dict = query_target_entity(json_data_dict['associated_collection_uuid'], user_token) # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuids or has_associated_collection_uuid or has_updated_status: @@ -1495,20 +1496,20 @@ def update_entity(id): has_dataset_uuids_to_unlink = True # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status: after_update(normalized_entity_type, user_token, merged_updated_dict) elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'): # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods after_update(normalized_entity_type, user_token, merged_updated_dict) else: # Generate 'before_update_trigger' data and update the entity details in Neo4j - merged_updated_dict = update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Remove the cached entities if Memcached is being used # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped) @@ -1634,7 +1635,7 @@ def get_ancestors(id): 'previous_revision_uuid' ] - complete_entities_list = schema_manager.get_complete_entities_list(token, ancestors_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, ancestors_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -1719,7 +1720,7 @@ def get_descendants(id): 'previous_revision_uuid' ] - complete_entities_list = schema_manager.get_complete_entities_list(user_token, descendants_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, user_token, descendants_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -1842,7 +1843,7 @@ def get_parents(id): 'previous_revision_uuid' ] - complete_entities_list = schema_manager.get_complete_entities_list(token, parents_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, parents_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -1926,7 +1927,7 @@ def get_children(id): 'previous_revision_uuid' ] - complete_entities_list = schema_manager.get_complete_entities_list(user_token, children_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, user_token, children_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -2043,7 +2044,7 @@ def get_siblings(id): 'local_directory_rel_path' ] - complete_entities_list = schema_manager.get_complete_entities_list(token, sibling_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, sibling_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) filtered_final_result = [] @@ -2158,7 +2159,7 @@ def get_tuplets(id): 'local_directory_rel_path' ] - complete_entities_list = schema_manager.get_complete_entities_list(token, tuplet_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, tuplet_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) filtered_final_result = [] @@ -2230,7 +2231,7 @@ def get_previous_revisions(id): 'direct_ancestors' ] - complete_entities_list = schema_manager.get_complete_entities_list(user_token, descendants_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, user_token, descendants_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -2294,7 +2295,7 @@ def get_next_revisions(id): 'direct_ancestors' ] - complete_entities_list = schema_manager.get_complete_entities_list(user_token, descendants_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, user_token, descendants_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -2386,7 +2387,7 @@ def get_collections(id): 'previous_revision_uuid' ] - complete_entities_list = schema_manager.get_complete_entities_list(token, collection_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, collection_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -2493,7 +2494,7 @@ def get_uploads(id): 'previous_revision_uuid' ] - complete_entities_list = schema_manager.get_complete_entities_list(token, uploads_list, properties_to_skip) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, uploads_list, properties_to_skip) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -2844,7 +2845,7 @@ def get_dataset_latest_revision(id): ] # On entity retrieval, the 'on_read_trigger' doesn't really need a token - complete_dict = schema_manager.get_complete_entity_result(token, latest_revision_dict, properties_to_skip) + complete_dict = schema_manager.get_complete_entity_result(request, token, latest_revision_dict, properties_to_skip) # Also normalize the result based on schema final_result = schema_manager.normalize_entity_result_for_response(complete_dict) @@ -2979,7 +2980,7 @@ def get_dataset_revision_number(id): # # if property_key is None: # for revision in sorted_revisions_list_merged: -# complete_revision_list = schema_manager.get_complete_entities_list(token, revision, properties_to_skip) +# complete_revision_list = schema_manager.get_complete_entities_list(request, token, revision, properties_to_skip) # normal = schema_manager.normalize_entities_list_for_response(complete_revision_list) # normalized_revisions_list.append(normal) # else: @@ -3081,9 +3082,9 @@ def retract_dataset(id): bad_request_error(e) # No need to call after_update() afterwards because retraction doesn't call any after_update_trigger methods - merged_updated_dict = update_entity_details(request, normalized_entity_type, request, token, json_data_dict, entity_dict) + merged_updated_dict = update_entity_details(request, normalized_entity_type, token, json_data_dict, entity_dict) - complete_dict = schema_manager.get_complete_entity_result(token, merged_updated_dict) + complete_dict = schema_manager.get_complete_entity_result(request, token, merged_updated_dict) # Will also filter the result based on schema normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) @@ -3159,7 +3160,7 @@ def get_revisions_list(id): 'upload', 'title' ] - complete_revisions_list = schema_manager.get_complete_entities_list(token, sorted_revisions_list, properties_to_skip) + complete_revisions_list = schema_manager.get_complete_entities_list(request, token, sorted_revisions_list, properties_to_skip) normalized_revisions_list = schema_manager.normalize_entities_list_for_response(complete_revisions_list) fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type) # Only check the very last revision (the first revision dict since normalized_revisions_list is already sorted DESC) @@ -3242,7 +3243,7 @@ def get_associated_organs_from_dataset(id): if len(associated_organs) < 1: not_found_error("the dataset does not have any associated organs") - complete_entities_list = schema_manager.get_complete_entities_list(token, associated_organs) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, associated_organs) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -3302,7 +3303,7 @@ def get_associated_samples_from_dataset(id): if len(associated_samples) < 1: not_found_error("the dataset does not have any associated samples") - complete_entities_list = schema_manager.get_complete_entities_list(token, associated_samples) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, associated_samples) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -3362,7 +3363,7 @@ def get_associated_donors_from_dataset(id): if len(associated_donors) < 1: not_found_error("the dataset does not have any associated donors") - complete_entities_list = schema_manager.get_complete_entities_list(token, associated_donors) + complete_entities_list = schema_manager.get_complete_entities_list(request, token, associated_donors) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(complete_entities_list) @@ -4064,7 +4065,7 @@ def multiple_components(): # # Check if re-indexing is to be suppressed after entity creation. try: - suppress_reindex = _suppress_reindex() + suppress_reindex = schema_manager.suppress_reindex(request) except Exception as e: bad_request_error(e) @@ -4096,7 +4097,7 @@ def multiple_components(): # Remove dataset_link_abs_dir once more before entity creation dataset_link_abs_dir = dataset.pop('dataset_link_abs_dir', None) # Generate the filtered or complete entity dict to send back - complete_dict = schema_manager.get_complete_entity_result(user_token, dataset, properties_to_skip) + complete_dict = schema_manager.get_complete_entity_result(request, user_token, dataset, properties_to_skip) # Will also filter the result based on schema normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) @@ -4558,7 +4559,8 @@ def _get_dataset_associated_metadata(dataset_dict, dataset_visibility, valid_use # Use the internal token to query the target entity to assure it is returned. This way public # entities can be accessed even if valid_user_token is None. internal_token = auth_helper_instance.getProcessSecret() - complete_entities_list = schema_manager.get_complete_entities_list( token=internal_token + complete_entities_list = schema_manager.get_complete_entities_list( request=request + , token=internal_token , entities_list=associated_entities) # Final result after normalization final_result = schema_manager.normalize_entities_list_for_response(entities_list=complete_entities_list) @@ -4576,30 +4578,16 @@ def _get_dataset_associated_metadata(dataset_dict, dataset_visibility, valid_use return final_result -# Use the Flask request.args MultiDict to see if 'reindex' is a URL parameter passed in with the -# request and if it indicates reindexing should be supressed. Default to reindexing in all other cases. -def _suppress_reindex() -> bool: - if 'reindex' not in request.args: - return False - reindex_str = request.args.get('reindex').lower() - if reindex_str == 'false': - return True - elif reindex_str == 'true': - return False - raise Exception(f"The value of the 'reindex' parameter must be True or False (case-insensitive)." - f" '{request.args.get('reindex')}' is not recognized.") """ Generate 'before_create_triiger' data and create the entity details in Neo4j Parameters ---------- -request : flask.Request object - The incoming request -normalized_entity_type : str - One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication request: Flask request object The instance of Flask request passed in from application request +normalized_entity_type : str + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token json_data_dict: dict @@ -4610,7 +4598,7 @@ def _suppress_reindex() -> bool: dict A dict of all the newly created entity detials """ -def create_entity_details(request, normalized_entity_type, request, user_token, json_data_dict): +def create_entity_details(request, normalized_entity_type, user_token, json_data_dict): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) @@ -4746,12 +4734,10 @@ def create_entity_details(request, normalized_entity_type, request, user_token, Parameters ---------- -request : flask.Request object - The incoming request -normalized_entity_type : str - Must be "Sample" in this case request: Flask request object The instance of Flask request passed in from application request +normalized_entity_type : str + Must be "Sample" in this case user_token: str The user's globus groups token json_data_dict: dict @@ -4764,7 +4750,7 @@ def create_entity_details(request, normalized_entity_type, request, user_token, list A list of all the newly generated ids via uuid-api """ -def create_multiple_samples_details(request, normalized_entity_type, request, user_token, json_data_dict, count): +def create_multiple_samples_details(request, normalized_entity_type, user_token, json_data_dict, count): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) @@ -4899,12 +4885,10 @@ def create_multiple_samples_details(request, normalized_entity_type, request, us Parameters ---------- -request : flask.Request object - The incoming request -normalized_entity_type : str - Must be "Dataset" in this case request: Flask request object The instance of Flask request passed in from application request +normalized_entity_type : str + Must be "Dataset" in this case user_token: str The user's globus groups token json_data_dict_list: list @@ -4917,7 +4901,7 @@ def create_multiple_samples_details(request, normalized_entity_type, request, us list A list of all the newly created datasets with generated fields represented as dictionaries """ -def create_multiple_component_details(request, normalized_entity_type, request, user_token, json_data_dict_list, creation_action): +def create_multiple_component_details(request, normalized_entity_type, user_token, json_data_dict_list, creation_action): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) direct_ancestor = json_data_dict_list[0].get('direct_ancestor_uuids')[0] @@ -5056,12 +5040,10 @@ def after_create(normalized_entity_type, request, user_token, merged_data_dict): Parameters ---------- -request : flask.Request object - The incoming request -normalized_entity_type : str - One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication request: Flask request object The instance of Flask request passed in from application request +normalized_entity_type : str + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token json_data_dict: dict @@ -5074,7 +5056,7 @@ def after_create(normalized_entity_type, request, user_token, merged_data_dict): dict A dict of all the updated entity detials """ -def update_entity_details(request, normalized_entity_type, request, user_token, json_data_dict, existing_entity_dict): +def update_entity_details(request, normalized_entity_type, user_token, json_data_dict, existing_entity_dict): # Get user info based on request user_info_dict = schema_manager.get_user_info(request) @@ -5530,7 +5512,7 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met # Get the entity result of the indexable dictionary from cache if exists, otherwise regenerate and cache metadata_dict = schema_manager.get_index_metadata(token, entity_dict) \ if metadata_scope==MetadataScopeEnum.INDEX \ - else schema_manager.get_complete_entity_result(token, entity_dict) + else schema_manager.get_complete_entity_result(request, token, entity_dict) # Determine if the entity is publicly visible base on its data, only. # To verify if a Collection is public, it is necessary to have its Datasets, which diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 951c66b3..16ceca56 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -616,6 +616,8 @@ def remove_transient_and_none_values(merged_dict, normalized_entity_type): Parameters ---------- +request: Flask request object + The instance of Flask request passed in from application request token: str Either the user's globus nexus token or the internal token entity_dict : dict @@ -628,7 +630,7 @@ def remove_transient_and_none_values(merged_dict, normalized_entity_type): dict A dictionary of complete entity with all the generated 'on_read_trigger' data """ -def get_complete_entity_result(token, entity_dict, properties_to_skip = []): +def get_complete_entity_result(request, token, entity_dict, properties_to_skip = []): global _memcached_client global _memcached_prefix @@ -658,6 +660,7 @@ def get_complete_entity_result(token, entity_dict, properties_to_skip = []): # Pass {} since no new_data_dict for 'on_read_trigger' generated_on_read_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_READ , normalized_class=entity_type + , request=request , user_token=token , existing_data_dict=entity_dict , new_data_dict={} @@ -717,120 +720,14 @@ def get_index_metadata(token, entity_dict, properties_to_skip=[]): ,properties_to_skip=properties_to_skip) return metadata_dict -""" -Generate the entity metadata by reading Neo4j data and appropriate triggers based upon the scope of -metadata requested e.g. complete data for a another service, indexing data for an OpenSearch document, etc. - -Parameters ----------- -token: str - Either the user's globus nexus token or the internal token -entity_dict : dict - The entity dict based on neo4j record -metadata_scope: - A recognized scope from the SchemaConstants, controlling the triggers which are fired and elements - from Neo4j which are retained. -properties_to_skip : list - Any properties to skip running triggers - -Returns -------- -dict - A dictionary of metadata appropriate for the metadata_scope argument value. -""" -def _get_metadata_result(token, entity_dict, metadata_scope:MetadataScopeEnum, properties_to_skip=[]): - global _memcached_client - global _memcached_prefix - - complete_entity = {} - - # In case entity_dict is None or - # an incorrectly created entity that doesn't have the `entity_type` property - if entity_dict and ('entity_type' in entity_dict) and ('uuid' in entity_dict): - entity_uuid = entity_dict['uuid'] - entity_type = entity_dict['entity_type'] - cache_result = None - - # Need both client and prefix when fetching the cache - # Do NOT fetch cache if properties_to_skip is specified - if _memcached_client and _memcached_prefix and (not properties_to_skip): - cache_key = f'{_memcached_prefix}_complete_index_{entity_uuid}' - cache_result = _memcached_client.get(cache_key) - - # Use the cached data if found and still valid - # Otherwise, calculate and add to cache - if cache_result is None: - if _memcached_client and _memcached_prefix: - logger.info( - f'Cache of complete entity of {entity_type} {entity_uuid} not found or expired at time {datetime.now()}') - - if metadata_scope == MetadataScopeEnum.COMPLETE: - # No error handling here since if a 'on_read_trigger' method fails, - # the property value will be the error message - # Pass {} since no new_data_dict for 'on_read_trigger' - #generated_on_read_trigger_data_dict = generate_triggered_data('on_read_trigger', entity_type, token, - # entity_dict, {}, properties_to_skip) - generated_on_read_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_READ - , normalized_class=entity_type - , user_token=token - , existing_data_dict=entity_dict - , new_data_dict={} - , properties_to_skip=properties_to_skip) - - # Merge the entity info and the generated on read data into one dictionary - complete_entity_dict = {**entity_dict, **generated_on_read_trigger_data_dict} - - # Remove properties of None value - metadata_dict = remove_none_values(complete_entity_dict) - elif metadata_scope == MetadataScopeEnum.INDEX: - # No error handling here since if a 'on_index_trigger' method fails, - # the property value will be the error message - # Pass {} since no new_data_dict for 'on_index_trigger' - generated_on_index_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_INDEX - , normalized_class=entity_type - , user_token=token - , existing_data_dict=entity_dict - , new_data_dict={} - , properties_to_skip=properties_to_skip) - - # Merge the entity info and the generated on read data into one dictionary - complete_entity_dict = {**entity_dict, **generated_on_index_trigger_data_dict} - - # Remove properties of None value - metadata_dict = remove_none_values(complete_entity_dict) - else: - # Merge the entity info and the generated on read data into one dictionary - metadata_dict = {**entity_dict} - - # Need both client and prefix when creating the cache - # Do NOT cache when properties_to_skip is specified - if _memcached_client and _memcached_prefix and (not properties_to_skip): - logger.info(f'Creating complete entity cache of {entity_type} {entity_uuid} at time {datetime.now()}') - - cache_key = f'{_memcached_prefix}_complete_index_{entity_uuid}' - _memcached_client.set(cache_key, metadata_dict, expire=SchemaConstants.MEMCACHED_TTL) - - logger.debug( - f"Following is the complete {entity_type} cache created at time {datetime.now()} using key {cache_key}:") - logger.debug(metadata_dict) - else: - logger.info(f'Using complete entity cache of {entity_type} {entity_uuid} at time {datetime.now()}') - logger.debug(cache_result) - - metadata_dict = cache_result - else: - # Just return the original entity_dict otherwise - metadata_dict = entity_dict - - # One final return - return metadata_dict - """ Generate the complete entity records as well as result filtering for response Parameters ---------- +request: Flask request object + The instance of Flask request passed in from application request token: str Either the user's globus nexus token or the internal token entities_list : list @@ -843,13 +740,13 @@ def _get_metadata_result(token, entity_dict, metadata_scope:MetadataScopeEnum, p list A list a complete entity dictionaries with all the normalized information """ -def get_complete_entities_list(token, entities_list, properties_to_skip = []): +def get_complete_entities_list(request, token, entities_list, properties_to_skip = []): complete_entities_list = [] # Use a pool of threads to execute the time-consuming iteration asynchronously to avoid timeout - Zhou 2/6/2025 with concurrent.futures.ThreadPoolExecutor() as executor: - helper_func = lambda args: get_complete_entity_result(args[0], args[1], args[2]) - args_list = [(token, entity_dict, properties_to_skip) for entity_dict in entities_list] + helper_func = lambda args: get_complete_entity_result(args[0], args[1], args[2], args[3]) + args_list = [(request, token, entity_dict, properties_to_skip) for entity_dict in entities_list] # The order of donors/organs/samples lists are not gurenteed with using `executor.submit()` # `executor.map()` maintains the same order of results as the original submitted tasks @@ -969,84 +866,6 @@ def normalize_document_result_for_response(entity_dict, properties_to_exclude=[] , metadata_scope=MetadataScopeEnum.INDEX , properties_to_skip=properties_to_exclude) -""" -Normalize the entity result by filtering the properties to those appropriate for the -scope of metadata requested e.g. complete data for a another service, indexing data for an OpenSearch document, etc. - -Properties that are not defined in the yaml schema and properties marked as `exposed: false` in the yaml schema are -removed. Properties are also filter based upon the metadata_scope argument e.g. properties lacking `indexed: true` -marking in the yaml schema are removed when `metadata_scope` has a value of `MetadataScopeEnum.INDEX`. - -Parameters ----------- -entity_dict : dict - Either a neo4j node converted dict or metadata dict generated from get_index_metadata() -metadata_scope: - A recognized scope from the SchemaConstants, controlling the triggers which are fired and elements - from Neo4j which are retained. Default is MetadataScopeEnum.INDEX. -properties_to_exclude : list - Any additional properties to exclude from the response - -Returns -------- -dict - An entity metadata dictionary with keys that are all normalized appropriately for the metadata_scope argument value. -""" -def _normalize_metadata(entity_dict, metadata_scope:MetadataScopeEnum, properties_to_skip=[]): - global _schema - - # When the entity_dict is unavailable or the entity was incorrectly created, do not - # try to normalize. - if not entity_dict or 'entity_type' not in entity_dict: - return {} - - normalized_metadata = {} - - normalized_entity_type = entity_dict['entity_type'] - properties = _schema['ENTITIES'][normalized_entity_type]['properties'] - - for key in entity_dict: - # Only return the properties defined in the schema yaml - # Exclude additional schema yaml properties, if specified - if key not in properties: - # Skip Neo4j entity properties not found in the schema yaml - continue - if key in properties_to_skip: - # Skip properties if directed by the calling function - continue - if entity_dict[key] is None: - # Do not include properties in the metadata if they are empty - continue - if 'exposed' in properties[key] and \ - properties[key]['exposed'] is False: - # Do not include properties in the metadata if they are not exposed - continue - if metadata_scope is MetadataScopeEnum.INDEX and \ - 'indexed' in properties[key] and \ - properties[key]['indexed'] is False: - # Do not include properties in metadata for indexing if they are not True i.e. False or non-boolean - continue - # Only run convert_str_literal() on string representation of Python dict and list with removing control characters - # No convertion for string representation of Python string, meaning that can still contain control characters - if entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']): - logger.info( - f"Executing convert_str_literal() on {normalized_entity_type}.{key} of uuid: {entity_dict['uuid']}") - - # Safely evaluate a string containing a Python dict or list literal - # Only convert to Python list/dict when the string literal is not empty - # instead of returning the json-as-string or array-as-string - # convert_str_literal() also removes those control chars to avoid SyntaxError - entity_dict[key] = convert_str_literal(entity_dict[key]) - - # Add the target key with correct value of data type to the normalized_entity dict - normalized_metadata[key] = entity_dict[key] - - # After possible modification to entity_dict[key] prior to assigning to normalized_metadata[key], remove - # the normalized_metadata entry for the key if it is an empty string, dictionary, or list. - if (isinstance(normalized_metadata[key], (str, dict, list)) and (not normalized_metadata[key])): - normalized_metadata.pop(key) - - return normalized_metadata """ Normalize the given list of complete entity results by removing properties that are not defined in the yaml schema @@ -2310,8 +2129,221 @@ def get_organ_types(): return _organ_types +""" +Use the Flask request.args MultiDict to see if 'reindex' is a URL parameter passed in with the +request and if it indicates reindexing should be supressed. Default to reindexing in all other cases. + +Parameters +---------- +request: Flask request object + The instance of Flask request passed in from application request + +Returns +------- +bool +""" +def suppress_reindex(request) -> bool: + if 'reindex' not in request.args: + return False + reindex_str = request.args.get('reindex').lower() + if reindex_str == 'false': + return True + elif reindex_str == 'true': + return False + raise Exception(f"The value of the 'reindex' parameter must be True or False (case-insensitive)." + f" '{request.args.get('reindex')}' is not recognized.") + + #################################################################################################### ## Internal functions #################################################################################################### +""" +Generate the entity metadata by reading Neo4j data and appropriate triggers based upon the scope of +metadata requested e.g. complete data for a another service, indexing data for an OpenSearch document, etc. + +Parameters +---------- +token: str + Either the user's globus nexus token or the internal token +entity_dict : dict + The entity dict based on neo4j record +metadata_scope: + A recognized scope from the SchemaConstants, controlling the triggers which are fired and elements + from Neo4j which are retained. +properties_to_skip : list + Any properties to skip running triggers + +Returns +------- +dict + A dictionary of metadata appropriate for the metadata_scope argument value. +""" +def _get_metadata_result(token, entity_dict, metadata_scope:MetadataScopeEnum, properties_to_skip=[]): + global _memcached_client + global _memcached_prefix + + complete_entity = {} + + # In case entity_dict is None or + # an incorrectly created entity that doesn't have the `entity_type` property + if entity_dict and ('entity_type' in entity_dict) and ('uuid' in entity_dict): + entity_uuid = entity_dict['uuid'] + entity_type = entity_dict['entity_type'] + cache_result = None + + # Need both client and prefix when fetching the cache + # Do NOT fetch cache if properties_to_skip is specified + if _memcached_client and _memcached_prefix and (not properties_to_skip): + cache_key = f'{_memcached_prefix}_complete_index_{entity_uuid}' + cache_result = _memcached_client.get(cache_key) + + # Use the cached data if found and still valid + # Otherwise, calculate and add to cache + if cache_result is None: + if _memcached_client and _memcached_prefix: + logger.info( + f'Cache of complete entity of {entity_type} {entity_uuid} not found or expired at time {datetime.now()}') + + if metadata_scope == MetadataScopeEnum.COMPLETE: + # No error handling here since if a 'on_read_trigger' method fails, + # the property value will be the error message + # Pass {} since no new_data_dict for 'on_read_trigger' + #generated_on_read_trigger_data_dict = generate_triggered_data('on_read_trigger', entity_type, token, + # entity_dict, {}, properties_to_skip) + generated_on_read_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_READ + , normalized_class=entity_type + , user_token=token + , existing_data_dict=entity_dict + , new_data_dict={} + , properties_to_skip=properties_to_skip) + + # Merge the entity info and the generated on read data into one dictionary + complete_entity_dict = {**entity_dict, **generated_on_read_trigger_data_dict} + + # Remove properties of None value + metadata_dict = remove_none_values(complete_entity_dict) + elif metadata_scope == MetadataScopeEnum.INDEX: + # No error handling here since if a 'on_index_trigger' method fails, + # the property value will be the error message + # Pass {} since no new_data_dict for 'on_index_trigger' + generated_on_index_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_INDEX + , normalized_class=entity_type + , user_token=token + , existing_data_dict=entity_dict + , new_data_dict={} + , properties_to_skip=properties_to_skip) + + # Merge the entity info and the generated on read data into one dictionary + complete_entity_dict = {**entity_dict, **generated_on_index_trigger_data_dict} + + # Remove properties of None value + metadata_dict = remove_none_values(complete_entity_dict) + else: + # Merge the entity info and the generated on read data into one dictionary + metadata_dict = {**entity_dict} + + # Need both client and prefix when creating the cache + # Do NOT cache when properties_to_skip is specified + if _memcached_client and _memcached_prefix and (not properties_to_skip): + logger.info(f'Creating complete entity cache of {entity_type} {entity_uuid} at time {datetime.now()}') + + cache_key = f'{_memcached_prefix}_complete_index_{entity_uuid}' + _memcached_client.set(cache_key, metadata_dict, expire=SchemaConstants.MEMCACHED_TTL) + + logger.debug( + f"Following is the complete {entity_type} cache created at time {datetime.now()} using key {cache_key}:") + logger.debug(metadata_dict) + else: + logger.info(f'Using complete entity cache of {entity_type} {entity_uuid} at time {datetime.now()}') + logger.debug(cache_result) + + metadata_dict = cache_result + else: + # Just return the original entity_dict otherwise + metadata_dict = entity_dict + + # One final return + return metadata_dict + + +""" +Normalize the entity result by filtering the properties to those appropriate for the +scope of metadata requested e.g. complete data for a another service, indexing data for an OpenSearch document, etc. + +Properties that are not defined in the yaml schema and properties marked as `exposed: false` in the yaml schema are +removed. Properties are also filter based upon the metadata_scope argument e.g. properties lacking `indexed: true` +marking in the yaml schema are removed when `metadata_scope` has a value of `MetadataScopeEnum.INDEX`. + +Parameters +---------- +entity_dict : dict + Either a neo4j node converted dict or metadata dict generated from get_index_metadata() +metadata_scope: + A recognized scope from the SchemaConstants, controlling the triggers which are fired and elements + from Neo4j which are retained. Default is MetadataScopeEnum.INDEX. +properties_to_exclude : list + Any additional properties to exclude from the response + +Returns +------- +dict + An entity metadata dictionary with keys that are all normalized appropriately for the metadata_scope argument value. +""" +def _normalize_metadata(entity_dict, metadata_scope:MetadataScopeEnum, properties_to_skip=[]): + global _schema + + # When the entity_dict is unavailable or the entity was incorrectly created, do not + # try to normalize. + if not entity_dict or 'entity_type' not in entity_dict: + return {} + + normalized_metadata = {} + + normalized_entity_type = entity_dict['entity_type'] + properties = _schema['ENTITIES'][normalized_entity_type]['properties'] + + for key in entity_dict: + # Only return the properties defined in the schema yaml + # Exclude additional schema yaml properties, if specified + if key not in properties: + # Skip Neo4j entity properties not found in the schema yaml + continue + if key in properties_to_skip: + # Skip properties if directed by the calling function + continue + if entity_dict[key] is None: + # Do not include properties in the metadata if they are empty + continue + if 'exposed' in properties[key] and \ + properties[key]['exposed'] is False: + # Do not include properties in the metadata if they are not exposed + continue + if metadata_scope is MetadataScopeEnum.INDEX and \ + 'indexed' in properties[key] and \ + properties[key]['indexed'] is False: + # Do not include properties in metadata for indexing if they are not True i.e. False or non-boolean + continue + # Only run convert_str_literal() on string representation of Python dict and list with removing control characters + # No convertion for string representation of Python string, meaning that can still contain control characters + if entity_dict[key] and (properties[key]['type'] in ['list', 'json_string']): + logger.info( + f"Executing convert_str_literal() on {normalized_entity_type}.{key} of uuid: {entity_dict['uuid']}") + + # Safely evaluate a string containing a Python dict or list literal + # Only convert to Python list/dict when the string literal is not empty + # instead of returning the json-as-string or array-as-string + # convert_str_literal() also removes those control chars to avoid SyntaxError + entity_dict[key] = convert_str_literal(entity_dict[key]) + + # Add the target key with correct value of data type to the normalized_entity dict + normalized_metadata[key] = entity_dict[key] + + # After possible modification to entity_dict[key] prior to assigning to normalized_metadata[key], remove + # the normalized_metadata entry for the key if it is an empty string, dictionary, or list. + if (isinstance(normalized_metadata[key], (str, dict, list)) and (not normalized_metadata[key])): + normalized_metadata.pop(key) + + return normalized_metadata + diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index b96febcf..236caf8d 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1741,51 +1741,9 @@ def delete_thumbnail_file(property_key, normalized_type, request, user_token, ex A merged dictionary that contains all possible input data to be used """ def update_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): - # execute set_status_history set_status_history(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) + _sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) - #execute sync_component_dataset_status - sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) - - -""" -Function that changes the status of component datasets when their parent multi-assay dataset's status changes - -Parameters ----------- -property_key : str - The target property key -normalized_type : str - One of the types defined in the schema yaml: Dataset -request: Flask request object - The instance of Flask request passed in from application request -user_token: str - The user's globus nexus token -existing_data_dict : dict - A dictionary that contains all existing entity properties -new_data_dict : dict - A merged dictionary that contains all possible input data to be used -""" -def sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): - if 'uuid' not in existing_data_dict: - raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") - uuid = existing_data_dict['uuid'] - if 'status' not in existing_data_dict: - raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") - status = existing_data_dict['status'] - if status.lower() != "published": - children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') - status_body = {"status": status} - for child_uuid in children_uuids_list: - creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) - if creation_action == 'Multi-Assay Split': - url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid - request_headers = { - 'Authorization': f'Bearer {user_token}' - } - request_headers[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP - request_headers[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET - response = requests.put(url=url, headers=request_headers, json=status_body) #################################################################################################### @@ -2572,3 +2530,50 @@ def _delete_files(target_property_key, property_key, normalized_type, request, u return generated_dict + +""" +Function that syncs the status of component datasets when their parent multi-assay dataset's status changes + +Parameters +---------- +property_key : str + The target property key +normalized_type : str + One of the types defined in the schema yaml: Dataset +request: Flask request object + The instance of Flask request passed in from application request +user_token: str + The user's globus nexus token +existing_data_dict : dict + A dictionary that contains all existing entity properties +new_data_dict : dict + A merged dictionary that contains all possible input data to be used +""" +def _sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + uuid = existing_data_dict['uuid'] + if 'status' not in existing_data_dict: + raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + status = existing_data_dict['status'] + if status.lower() != "published": + children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') + status_body = {"status": status} + for child_uuid in children_uuids_list: + creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) + if creation_action == 'Multi-Assay Split': + url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid + + # When the parent dataset status update disables reindex via query string '?reindex=false' + # We'll also disable the reindex call to search-api upon each subsequent child dataset update + if schema_manager.suppress_reindex(request): + url += '?reindex=false' + + request_headers = { + 'Authorization': f'Bearer {user_token}' + } + request_headers[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP + request_headers[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET + response = requests.put(url=url, headers=request_headers, json=status_body) + + From 874a381bab946eb13746aef6959b73cab3300987 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 23 Sep 2025 19:34:37 -0400 Subject: [PATCH 03/17] Fix missing arg and add log info --- src/app.py | 8 ++++---- src/schema/schema_triggers.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/app.py b/src/app.py index 115a60dc..99a1bb7f 100644 --- a/src/app.py +++ b/src/app.py @@ -1456,7 +1456,7 @@ def update_entity(id): # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuid: - after_update(normalized_entity_type, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict) # 2/17/23 - Adding direct ancestor checks to publication as well as dataset. elif normalized_entity_type in ['Dataset', 'Publication']: # A bit more validation if `direct_ancestor_uuids` provided @@ -1485,7 +1485,7 @@ def update_entity(id): # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuids or has_associated_collection_uuid or has_updated_status: - after_update(normalized_entity_type, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict) elif normalized_entity_type == 'Upload': has_dataset_uuids_to_link = False if ('dataset_uuids_to_link' in json_data_dict) and (json_data_dict['dataset_uuids_to_link']): @@ -1500,13 +1500,13 @@ def update_entity(id): # Handle linkages update via `after_update_trigger` methods if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status: - after_update(normalized_entity_type, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict) elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'): # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods - after_update(normalized_entity_type, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict) else: # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 236caf8d..80d879db 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -2566,14 +2566,20 @@ def _sync_component_dataset_status(property_key, normalized_type, request, user_ # When the parent dataset status update disables reindex via query string '?reindex=false' # We'll also disable the reindex call to search-api upon each subsequent child dataset update + reindex = 'followed' if schema_manager.suppress_reindex(request): url += '?reindex=false' + reindex = 'suppressed' + + logger.info(f"Parent Multi-Assay Split dataset {existing_data_dict['uuid']} status update to {status}, with reindex {reindex}.") + logger.info(f'Internal PUT call to update child component dataset {child_uuid} status to {status}, with reindex {reindex}.') request_headers = { - 'Authorization': f'Bearer {user_token}' + 'Authorization': f'Bearer {user_token}', + SchemaConstants.HUBMAP_APP_HEADER: SchemaConstants.INGEST_API_APP, + SchemaConstants.INTERNAL_TRIGGER: SchemaConstants.COMPONENT_DATASET } - request_headers[SchemaConstants.HUBMAP_APP_HEADER] = SchemaConstants.INGEST_API_APP - request_headers[SchemaConstants.INTERNAL_TRIGGER] = SchemaConstants.COMPONENT_DATASET + response = requests.put(url=url, headers=request_headers, json=status_body) From 77067610df3b97ee4de5c786ccaaf41478de1ef9 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 23 Sep 2025 19:39:54 -0400 Subject: [PATCH 04/17] Fix missing arg for POST --- src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 99a1bb7f..7f58c4d5 100644 --- a/src/app.py +++ b/src/app.py @@ -1204,7 +1204,7 @@ def create_entity(entity_type): # For Dataset: link to direct ancestors # For Collection: link to member Datasets # For Upload: link to parent Lab node - after_create(normalized_entity_type, user_token, merged_dict) + after_create(normalized_entity_type, request, user_token, merged_dict) # By default we'll return all the properties but skip these time-consuming ones # Donor doesn't need to skip any From ff30cf1128a658ea598a5c0103023bc9d2b698ab Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 23 Sep 2025 19:58:29 -0400 Subject: [PATCH 05/17] Add request to Activity data generation --- src/app.py | 6 +++--- src/schema/schema_manager.py | 26 +++++++++++++++++--------- src/schema/schema_triggers.py | 8 ++++---- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/app.py b/src/app.py index 7f58c4d5..bd2daafa 100644 --- a/src/app.py +++ b/src/app.py @@ -4863,7 +4863,7 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, samples_dict_list.append(sample_dict) # Generate property values for the only one Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_entity_type, user_token, user_info_dict) + activity_data_dict = schema_manager.generate_activity_data(normalized_entity_type, request, user_token, user_info_dict) # Create new sample nodes and needed relationships as well as activity node in one transaction try: @@ -4985,7 +4985,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke dataset_dict['dataset_link_abs_dir'] = dataset_link_abs_dir datasets_dict_list.append(dataset_dict) - activity_data_dict = schema_manager.generate_activity_data(normalized_entity_type, user_token, user_info_dict) + activity_data_dict = schema_manager.generate_activity_data(normalized_entity_type, request, user_token, user_info_dict) activity_data_dict['creation_action'] = creation_action try: created_datasets = app_neo4j_queries.create_multiple_datasets(neo4j_driver_instance, datasets_dict_list, activity_data_dict, direct_ancestor) @@ -5510,7 +5510,7 @@ def _get_metadata_by_id(entity_id:str=None, metadata_scope:MetadataScopeEnum=Met normalized_entity_type = entity_dict['entity_type'] excluded_fields = schema_manager.get_fields_to_exclude(normalized_entity_type) # Get the entity result of the indexable dictionary from cache if exists, otherwise regenerate and cache - metadata_dict = schema_manager.get_index_metadata(token, entity_dict) \ + metadata_dict = schema_manager.get_index_metadata(request, token, entity_dict) \ if metadata_scope==MetadataScopeEnum.INDEX \ else schema_manager.get_complete_entity_result(request, token, entity_dict) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 16ceca56..e692630f 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -701,6 +701,8 @@ def get_complete_entity_result(request, token, entity_dict, properties_to_skip = Parameters ---------- +request: Flask request object + The instance of Flask request passed in from application request token: str Either the user's globus nexus token or the internal token entity_dict : dict @@ -713,11 +715,12 @@ def get_complete_entity_result(request, token, entity_dict, properties_to_skip = dict A dictionary of metadata to be included in an OpenSearch index document for the entity. """ -def get_index_metadata(token, entity_dict, properties_to_skip=[]): - metadata_dict = _get_metadata_result( token=token - ,entity_dict=entity_dict - ,metadata_scope=MetadataScopeEnum.INDEX - ,properties_to_skip=properties_to_skip) +def get_index_metadata(request, token, entity_dict, properties_to_skip=[]): + metadata_dict = _get_metadata_result(request=request + , token=token + , entity_dict=entity_dict + , metadata_scope=MetadataScopeEnum.INDEX + , properties_to_skip=properties_to_skip) return metadata_dict @@ -1837,6 +1840,8 @@ def get_entity_group_name(group_uuid): ---------- normalized_entity_type : str One of the entity types defined in the schema yaml: Donor, Sample, Dataset +request: Flask request object + The instance of Flask request passed in from application request user_token: str The user's globus nexus token user_info_dict : dict @@ -1848,7 +1853,7 @@ def get_entity_group_name(group_uuid): ------- dict: A dict of gnerated Activity data """ -def generate_activity_data(normalized_entity_type, user_token, user_info_dict): +def generate_activity_data(normalized_entity_type, request, user_token, user_info_dict): # Activity is not an Entity normalized_activity_type = 'Activity' @@ -1864,6 +1869,7 @@ def generate_activity_data(normalized_entity_type, user_token, user_info_dict): # Generate property values for Activity node generated_activity_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.BEFORE_CREATE , normalized_class=normalized_activity_type + , request=request , user_token=user_token , existing_data_dict={} , new_data_dict=data_dict_for_activity) @@ -2164,6 +2170,8 @@ def suppress_reindex(request) -> bool: Parameters ---------- +request: Flask request object + The instance of Flask request passed in from application request token: str Either the user's globus nexus token or the internal token entity_dict : dict @@ -2179,7 +2187,7 @@ def suppress_reindex(request) -> bool: dict A dictionary of metadata appropriate for the metadata_scope argument value. """ -def _get_metadata_result(token, entity_dict, metadata_scope:MetadataScopeEnum, properties_to_skip=[]): +def _get_metadata_result(request, token, entity_dict, metadata_scope:MetadataScopeEnum, properties_to_skip=[]): global _memcached_client global _memcached_prefix @@ -2209,10 +2217,9 @@ def _get_metadata_result(token, entity_dict, metadata_scope:MetadataScopeEnum, p # No error handling here since if a 'on_read_trigger' method fails, # the property value will be the error message # Pass {} since no new_data_dict for 'on_read_trigger' - #generated_on_read_trigger_data_dict = generate_triggered_data('on_read_trigger', entity_type, token, - # entity_dict, {}, properties_to_skip) generated_on_read_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_READ , normalized_class=entity_type + , request=request , user_token=token , existing_data_dict=entity_dict , new_data_dict={} @@ -2229,6 +2236,7 @@ def _get_metadata_result(token, entity_dict, metadata_scope:MetadataScopeEnum, p # Pass {} since no new_data_dict for 'on_index_trigger' generated_on_index_trigger_data_dict = generate_triggered_data( trigger_type=TriggerTypeEnum.ON_INDEX , normalized_class=entity_type + , request=request , user_token=token , existing_data_dict=entity_dict , new_data_dict={} diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 80d879db..49702ff6 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -855,7 +855,7 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use direct_ancestor_uuids = existing_data_dict['direct_ancestor_uuids'] # Generate property values for Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_type, user_token, existing_data_dict) + activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) try: # Create a linkage (via one Activity node) between the dataset node and its direct ancestors in neo4j @@ -1781,7 +1781,7 @@ def link_donor_to_lab(property_key, normalized_type, request, user_token, existi direct_ancestor_uuids = [existing_data_dict['group_uuid']] # Generate property values for Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_type, user_token, existing_data_dict) + activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) try: # Create a linkage (via Activity node) @@ -1917,7 +1917,7 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ direct_ancestor_uuids = [existing_data_dict['direct_ancestor_uuid']] # Generate property values for Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_type, user_token, existing_data_dict) + activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) try: # Create a linkage (via Activity node) @@ -2129,7 +2129,7 @@ def link_upload_to_lab(property_key, normalized_type, request, user_token, exist direct_ancestor_uuids = [existing_data_dict['group_uuid']] # Generate property values for Activity node - activity_data_dict = schema_manager.generate_activity_data(normalized_type, user_token, existing_data_dict) + activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) try: # Create a linkage (via Activity node) From 193f933d1d61ac3d36757ef07a2b671857610601 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 00:27:46 -0400 Subject: [PATCH 06/17] Optimize comp dataset status sync --- src/schema/schema_neo4j_queries.py | 70 ++++++++++++++++++++-- src/schema/schema_triggers.py | 93 ++++++++++++------------------ 2 files changed, 104 insertions(+), 59 deletions(-) diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 80b68be5..b519dcfd 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -139,7 +139,6 @@ def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type): records = session.run(query).data() return records if records else None - """ @@ -406,6 +405,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): return results + """ Get all descendants by uuid @@ -507,7 +507,6 @@ def get_collections(neo4j_driver, uuid, property_key = None): return results - """ Get all uploads by uuid @@ -602,6 +601,7 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None): return results + """ For every Sample organ associated with the given dataset_uuid, retrieve the organ information and organ Donor information for use in composing a title for the Dataset. @@ -639,6 +639,22 @@ def get_dataset_donor_organs_info(neo4j_driver, dataset_uuid): return record['donorOrganSet'] if record and record['donorOrganSet'] else None + +""" +Get entity type for a given uuid + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +entity_uuid : str + The uuid of target entity + +Returns +------- +str + The entity_type string +""" def get_entity_type(neo4j_driver, entity_uuid: str) -> str: query: str = f"Match (ent {{uuid: '{entity_uuid}'}}) return ent.entity_type" @@ -653,6 +669,21 @@ def get_entity_type(neo4j_driver, entity_uuid: str) -> str: return None +""" +Get Activity.creation_action for a given collection + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +entity_uuid : str + The uuid of given entity + +Returns +------- +str + The creation action string +""" def get_entity_creation_action_activity(neo4j_driver, entity_uuid: str) -> str: query: str = f"MATCH (ds:Dataset {{uuid:'{entity_uuid}'}})<-[:ACTIVITY_OUTPUT]-(a:Activity) RETURN a.creation_action" @@ -913,8 +944,6 @@ def get_previous_revision_uuids(neo4j_driver, uuid): return result - - """ Get the uuid of next revision entity for a given entity @@ -1189,6 +1218,7 @@ def get_collection_datasets(neo4j_driver, uuid): return results + """ Get a dictionary with an entry for each Dataset in a Collection. The dictionary is keyed by Dataset uuid and contains the Dataset data_access_level. @@ -1441,6 +1471,38 @@ def get_found_dataset_uuids(neo4j_driver, uuids): return uuids_list +""" +Get the component dataset uuids for a given parent dataset uuid + +Parameters +---------- +neo4j_driver : neo4j.Driver object + The neo4j database connection pool +uuid : str + The uuid of target parent dataset + +Returns +------- +list + A list of component dataset uuids +""" +def get_component_dataset_uuids(neo4j_driver, uuid): + query = ( + f"MATCH (c:Dataset)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[:ACTIVITY_INPUT]-(p:Dataset) " + f"WHERE p.uuid='{uuid}' AND a.creation_action='Multi-Assay Split' " + f"RETURN COLLECT(c.uuid) AS {record_field_name}") + + logger.info("======get_component_dataset_uuids() query======") + logger.debug(query) + + with neo4j_driver.session() as session: + record = session.read_transaction(execute_readonly_tx, query) + + uuids_list = record[record_field_name] + + return uuids_list + + """ Get count of published Dataset in the provenance hierarchy for a given Sample/Donor diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 49702ff6..7a8a8885 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1723,7 +1723,9 @@ def delete_thumbnail_file(property_key, normalized_type, request, user_token, ex """ -Trigger event method that calls related functions involved with updating the status value +Trigger event method that updates the status value of the target dataset +If the dataset is a parent Multi-Assay Split dataset, will also sync the status update +to all the child component datasets Parameters ---------- @@ -1742,8 +1744,42 @@ def delete_thumbnail_file(property_key, normalized_type, request, user_token, ex """ def update_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): set_status_history(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) - _sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) + + if 'uuid' not in existing_data_dict: + raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'update_status()' trigger method.") + uuid = existing_data_dict['uuid'] + if 'status' not in existing_data_dict: + raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'update_status()' trigger method.") + status = existing_data_dict['status'] + + # Only apply to non-published parent datasets + if status.lower() != "published": + # Only sync the child component datasets status for Multi-Assay Split + component_dataset_uuids = schema_neo4j_queries.get_component_dataset_uuids(schema_manager.get_neo4j_driver_instance(), uuid) + + for comp_uuid in component_dataset_uuids: + url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + comp_uuid + + # When the parent dataset status update disables reindex via query string '?reindex=false' + # We'll also disable the reindex call to search-api upon each subsequent child component dataset update + reindex = 'followed' + if schema_manager.suppress_reindex(request): + url += '?reindex=false' + reindex = 'suppressed' + + logger.info(f"Update parent Multi-Assay Split dataset {uuid} status to {status}, with re-indexing {reindex}.") + logger.info(f'Update child component dataset {comp_uuid} status to {status}, with re-indexing {reindex}.') + + request_headers = { + 'Authorization': f'Bearer {user_token}', + SchemaConstants.HUBMAP_APP_HEADER: SchemaConstants.INGEST_API_APP, + SchemaConstants.INTERNAL_TRIGGER: SchemaConstants.COMPONENT_DATASET + } + + status_body = {"status": status} + + response = requests.put(url=url, headers=request_headers, json=status_body) #################################################################################################### @@ -2530,56 +2566,3 @@ def _delete_files(target_property_key, property_key, normalized_type, request, u return generated_dict - -""" -Function that syncs the status of component datasets when their parent multi-assay dataset's status changes - -Parameters ----------- -property_key : str - The target property key -normalized_type : str - One of the types defined in the schema yaml: Dataset -request: Flask request object - The instance of Flask request passed in from application request -user_token: str - The user's globus nexus token -existing_data_dict : dict - A dictionary that contains all existing entity properties -new_data_dict : dict - A merged dictionary that contains all possible input data to be used -""" -def _sync_component_dataset_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): - if 'uuid' not in existing_data_dict: - raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") - uuid = existing_data_dict['uuid'] - if 'status' not in existing_data_dict: - raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") - status = existing_data_dict['status'] - if status.lower() != "published": - children_uuids_list = schema_neo4j_queries.get_children(schema_manager.get_neo4j_driver_instance(), uuid, property_key='uuid') - status_body = {"status": status} - for child_uuid in children_uuids_list: - creation_action = schema_neo4j_queries.get_entity_creation_action_activity(schema_manager.get_neo4j_driver_instance(), child_uuid) - if creation_action == 'Multi-Assay Split': - url = schema_manager.get_entity_api_url() + SchemaConstants.ENTITY_API_UPDATE_ENDPOINT + '/' + child_uuid - - # When the parent dataset status update disables reindex via query string '?reindex=false' - # We'll also disable the reindex call to search-api upon each subsequent child dataset update - reindex = 'followed' - if schema_manager.suppress_reindex(request): - url += '?reindex=false' - reindex = 'suppressed' - - logger.info(f"Parent Multi-Assay Split dataset {existing_data_dict['uuid']} status update to {status}, with reindex {reindex}.") - logger.info(f'Internal PUT call to update child component dataset {child_uuid} status to {status}, with reindex {reindex}.') - - request_headers = { - 'Authorization': f'Bearer {user_token}', - SchemaConstants.HUBMAP_APP_HEADER: SchemaConstants.INGEST_API_APP, - SchemaConstants.INTERNAL_TRIGGER: SchemaConstants.COMPONENT_DATASET - } - - response = requests.put(url=url, headers=request_headers, json=status_body) - - From 9b4e21c4c4338dbf7d2d9f5583f9a5676ef51a20 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 00:35:38 -0400 Subject: [PATCH 07/17] Add log for failed comp status update --- src/schema/schema_triggers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 7a8a8885..52185322 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1754,7 +1754,7 @@ def update_status(property_key, normalized_type, request, user_token, existing_d status = existing_data_dict['status'] # Only apply to non-published parent datasets - if status.lower() != "published": + if status.lower() != 'published': # Only sync the child component datasets status for Multi-Assay Split component_dataset_uuids = schema_neo4j_queries.get_component_dataset_uuids(schema_manager.get_neo4j_driver_instance(), uuid) @@ -1781,6 +1781,9 @@ def update_status(property_key, normalized_type, request, user_token, existing_d response = requests.put(url=url, headers=request_headers, json=status_body) + if response.status_code != 200: + logger.error(f'Failed to update child component dataset {child_uuid} status: {response.text}') + #################################################################################################### ## Trigger methods specific to Donor - DO NOT RENAME From 73ba21598e484c9eca348898ebdfde354ca698e0 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 15:45:14 -0400 Subject: [PATCH 08/17] Bump version to 2.6.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index d9ba762b..e70b4523 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.13 +2.6.0 From 92d4522f4f94447f413932123e732336b724ac2c Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 16:15:16 -0400 Subject: [PATCH 09/17] Fix var name --- src/schema/schema_triggers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 52185322..e5978275 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1782,7 +1782,7 @@ def update_status(property_key, normalized_type, request, user_token, existing_d response = requests.put(url=url, headers=request_headers, json=status_body) if response.status_code != 200: - logger.error(f'Failed to update child component dataset {child_uuid} status: {response.text}') + logger.error(f'Failed to update child component dataset {comp_uuid} status: {response.text}') #################################################################################################### From 39326a7d92202c3972c486532db26784c4f11527 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 19:20:29 -0400 Subject: [PATCH 10/17] Don't run after_create and after_update triggers if not specified --- src/app.py | 44 +++++++++++++++++------------- src/schema/schema_manager.py | 6 ++--- src/schema/schema_triggers.py | 50 +++++++++++++++++------------------ 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/src/app.py b/src/app.py index bd2daafa..8e360ea5 100644 --- a/src/app.py +++ b/src/app.py @@ -1204,7 +1204,7 @@ def create_entity(entity_type): # For Dataset: link to direct ancestors # For Collection: link to member Datasets # For Upload: link to parent Lab node - after_create(normalized_entity_type, request, user_token, merged_dict) + after_create(normalized_entity_type, request, user_token, merged_dict, json_data_dict) # By default we'll return all the properties but skip these time-consuming ones # Donor doesn't need to skip any @@ -1451,12 +1451,12 @@ def update_entity(id): if direct_ancestor_dict['entity_type'] not in ['Donor', 'Sample']: bad_request_error(f"The uuid: {direct_ancestor_uuid} is not a Donor neither a Sample, cannot be used as the direct ancestor of this Sample") - # Generate 'before_update_triiger' data and update the entity details in Neo4j + # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuid: - after_update(normalized_entity_type, request, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict, json_data_dict) # 2/17/23 - Adding direct ancestor checks to publication as well as dataset. elif normalized_entity_type in ['Dataset', 'Publication']: # A bit more validation if `direct_ancestor_uuids` provided @@ -1482,10 +1482,15 @@ def update_entity(id): # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) + + print("==========DEBUG Begin") + print(merged_updated_dict) + print("==========DEBUG End") + # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuids or has_associated_collection_uuid or has_updated_status: - after_update(normalized_entity_type, request, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict, json_data_dict) elif normalized_entity_type == 'Upload': has_dataset_uuids_to_link = False if ('dataset_uuids_to_link' in json_data_dict) and (json_data_dict['dataset_uuids_to_link']): @@ -1500,13 +1505,13 @@ def update_entity(id): # Handle linkages update via `after_update_trigger` methods if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status: - after_update(normalized_entity_type, request, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict, json_data_dict) elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'): # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) # Handle linkages update via `after_update_trigger` methods - after_update(normalized_entity_type, request, user_token, merged_updated_dict) + after_update(normalized_entity_type, request, user_token, merged_updated_dict, json_data_dict) else: # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) @@ -4580,7 +4585,7 @@ def _get_dataset_associated_metadata(dataset_dict, dataset_visibility, valid_use """ -Generate 'before_create_triiger' data and create the entity details in Neo4j +Generate 'before_create_trigger' data and create the entity details in Neo4j Parameters ---------- @@ -5000,7 +5005,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke """ -Execute 'after_create_triiger' methods +Execute 'after_create_trigger' methods Parameters ---------- @@ -5013,8 +5018,10 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke merged_data_dict: dict The merged dict that contains the entity dict newly created and information from user request json that are not stored in Neo4j +json_data_dict: dict + The json request dict """ -def after_create(normalized_entity_type, request, user_token, merged_data_dict): +def after_create(normalized_entity_type, request, user_token, merged_data_dict, json_data_dict): try: # 'after_create_trigger' and 'after_update_trigger' don't generate property values # It just returns the empty dict, no need to assign value @@ -5024,7 +5031,7 @@ def after_create(normalized_entity_type, request, user_token, merged_data_dict): , request=request , user_token=user_token , existing_data_dict=merged_data_dict - , new_data_dict={}) + , new_data_dict=json_data_dict) except schema_errors.AfterCreateTriggerException: # Log the full stack trace, prepend a line with our message msg = "The entity has been created, but failed to execute one of the 'after_create_trigger' methods" @@ -5036,7 +5043,7 @@ def after_create(normalized_entity_type, request, user_token, merged_data_dict): """ -Generate 'before_create_triiger' data and create the entity details in Neo4j +Generate 'before_create_trigger' data and create the entity details in Neo4j Parameters ---------- @@ -5133,20 +5140,19 @@ def update_entity_details(request, normalized_entity_type, user_token, json_data The instance of Flask request passed in from application request user_token: str The user's globus groups token -entity_dict: dict - The entity dict newly updated +merged_updated_dict: dict + The merged entity dict containing newly updated values and existing values +json_data_dict: dict + The data dict containing new values """ -def after_update(normalized_entity_type, request, user_token, entity_dict): +def after_update(normalized_entity_type, request, user_token, merged_updated_dict, json_data_dict): try: - # 'after_create_trigger' and 'after_update_trigger' don't generate property values - # It just returns the empty dict, no need to assign value - # Use {} sicne no new dict schema_manager.generate_triggered_data( trigger_type=TriggerTypeEnum.AFTER_UPDATE , normalized_class=normalized_entity_type , request=request , user_token=user_token - , existing_data_dict=entity_dict - , new_data_dict={}) + , existing_data_dict=merged_updated_dict + , new_data_dict=json_data_dict) except schema_errors.AfterUpdateTriggerException: # Log the full stack trace, prepend a line with our message msg = "The entity information has been updated, but failed to execute one of the 'after_update_trigger' methods" diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index e692630f..2ebe8ebb 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -410,8 +410,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req if trigger_type in [TriggerTypeEnum.AFTER_CREATE, TriggerTypeEnum.AFTER_UPDATE]: # Only call the triggers if the propery key presents from the incoming data # E.g., 'direct_ancestor_uuid' for Sample, 'dataset_uuids' for Collection - # This `existing_data_dict` is the newly created or updated entity dict - if key in existing_data_dict: + if key in new_data_dict: trigger_method_name = properties[key][trigger_type.value] try: @@ -423,8 +422,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req # No return values for 'after_create_trigger' and 'after_update_trigger' # because the property value is already set and stored in neo4j # Normally it's building linkages between entity nodes - # Use {} since no incoming new_data_dict - trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, {}) + trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict) except Exception: msg = f"Failed to call the {trigger_type.value} method: {trigger_method_name}" # Log the full stack trace, prepend a line with our message diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index e5978275..102e6665 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -848,11 +848,11 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") - if 'direct_ancestor_uuids' not in existing_data_dict: - raise KeyError("Missing 'direct_ancestor_uuids' key in 'existing_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") + if 'direct_ancestor_uuids' not in new_data_dict: + raise KeyError("Missing 'direct_ancestor_uuids' key in 'new_data_dict' during calling 'link_dataset_to_direct_ancestors()' trigger method.") dataset_uuid = existing_data_dict['uuid'] - direct_ancestor_uuids = existing_data_dict['direct_ancestor_uuids'] + direct_ancestor_uuids = new_data_dict['direct_ancestor_uuids'] # Generate property values for Activity node activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) @@ -891,11 +891,11 @@ def link_collection_to_datasets(property_key, normalized_type, request, user_tok if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_collection_to_datasets()' trigger method.") - if 'dataset_uuids' not in existing_data_dict: - raise KeyError("Missing 'dataset_uuids' key in 'existing_data_dict' during calling 'link_collection_to_datasets()' trigger method.") + if 'dataset_uuids' not in new_data_dict: + raise KeyError("Missing 'dataset_uuids' key in 'new_data_dict' during calling 'link_collection_to_datasets()' trigger method.") collection_uuid = existing_data_dict['uuid'] - dataset_uuids = existing_data_dict['dataset_uuids'] + dataset_uuids = new_data_dict['dataset_uuids'] try: # Create a linkage (without an Activity node) between the Collection node and each Dataset it contains. @@ -1022,14 +1022,14 @@ def link_to_previous_revision(property_key, normalized_type, request, user_token if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") - if 'previous_revision_uuid' not in existing_data_dict: - raise KeyError("Missing 'previous_revision_uuid' key in 'existing_data_dict' during calling 'link_to_previous_revision()' trigger method.") + if 'previous_revision_uuid' not in new_data_dict: + raise KeyError("Missing 'previous_revision_uuid' key in 'new_data_dict' during calling 'link_to_previous_revision()' trigger method.") entity_uuid = existing_data_dict['uuid'] - if isinstance(existing_data_dict['previous_revision_uuid'], list): - previous_uuid = existing_data_dict['previous_revision_uuid'] + if isinstance(new_data_dict['previous_revision_uuid'], list): + previous_uuid = new_data_dict['previous_revision_uuid'] else: - previous_uuid = [existing_data_dict['previous_revision_uuid']] + previous_uuid = [new_data_dict['previous_revision_uuid']] # Create a revision reltionship from this new Dataset node and its previous revision of dataset node in neo4j try: @@ -1743,8 +1743,6 @@ def delete_thumbnail_file(property_key, normalized_type, request, user_token, ex A merged dictionary that contains all possible input data to be used """ def update_status(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict): - set_status_history(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) - if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'update_status()' trigger method.") uuid = existing_data_dict['uuid'] @@ -1753,6 +1751,8 @@ def update_status(property_key, normalized_type, request, user_token, existing_d raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'update_status()' trigger method.") status = existing_data_dict['status'] + set_status_history(property_key, normalized_type, request, user_token, existing_data_dict, new_data_dict) + # Only apply to non-published parent datasets if status.lower() != 'published': # Only sync the child component datasets status for Multi-Assay Split @@ -1946,14 +1946,14 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") - if 'direct_ancestor_uuid' not in existing_data_dict: - raise KeyError("Missing 'direct_ancestor_uuid' key in 'existing_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") + if 'direct_ancestor_uuid' not in new_data_dict: + raise KeyError("Missing 'direct_ancestor_uuid' key in 'new_data_dict' during calling 'link_sample_to_direct_ancestor()' trigger method.") sample_uuid = existing_data_dict['uuid'] # Build a list of direct ancestor uuids # Only one uuid in the list in this case - direct_ancestor_uuids = [existing_data_dict['direct_ancestor_uuid']] + direct_ancestor_uuids = [new_data_dict['direct_ancestor_uuid']] # Generate property values for Activity node activity_data_dict = schema_manager.generate_activity_data(normalized_type, request, user_token, existing_data_dict) @@ -1992,10 +1992,10 @@ def link_publication_to_associated_collection(property_key, normalized_type, req if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_publication_to_associated_collection()' trigger method.") - if 'associated_collection_uuid' not in existing_data_dict: - raise KeyError("Missing 'associated_collection_uuid' key in 'existing_data_dict' during calling 'link_publication_to_associated_collection()' trigger method.") + if 'associated_collection_uuid' not in new_data_dict: + raise KeyError("Missing 'associated_collection_uuid' key in 'new_data_dict' during calling 'link_publication_to_associated_collection()' trigger method.") - associated_collection_uuid = existing_data_dict['associated_collection_uuid'] + associated_collection_uuid = new_data_dict['associated_collection_uuid'] # No activity node. We are creating a direct link to the associated collection @@ -2203,11 +2203,11 @@ def link_datasets_to_upload(property_key, normalized_type, request, user_token, if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_datasets_to_upload()' trigger method.") - if 'dataset_uuids_to_link' not in existing_data_dict: - raise KeyError("Missing 'dataset_uuids_to_link' key in 'existing_data_dict' during calling 'link_datasets_to_upload()' trigger method.") + if 'dataset_uuids_to_link' not in new_data_dict: + raise KeyError("Missing 'dataset_uuids_to_link' key in 'new_data_dict' during calling 'link_datasets_to_upload()' trigger method.") upload_uuid = existing_data_dict['uuid'] - dataset_uuids = existing_data_dict['dataset_uuids_to_link'] + dataset_uuids = new_data_dict['dataset_uuids_to_link'] try: # Create a direct linkage (Dataset) - [:IN_UPLOAD] -> (Submission) for each dataset @@ -2244,11 +2244,11 @@ def unlink_datasets_from_upload(property_key, normalized_type, request, user_tok if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'unlink_datasets_from_upload()' trigger method.") - if 'dataset_uuids_to_unlink' not in existing_data_dict: - raise KeyError("Missing 'dataset_uuids_to_unlink' key in 'existing_data_dict' during calling 'unlink_datasets_from_upload()' trigger method.") + if 'dataset_uuids_to_unlink' not in new_data_dict: + raise KeyError("Missing 'dataset_uuids_to_unlink' key in 'new_data_dict' during calling 'unlink_datasets_from_upload()' trigger method.") upload_uuid = existing_data_dict['uuid'] - dataset_uuids = existing_data_dict['dataset_uuids_to_unlink'] + dataset_uuids = new_data_dict['dataset_uuids_to_unlink'] try: # Delete the linkage (Dataset) - [:IN_UPLOAD] -> (Upload) for each dataset From 89dfd353722cb25c388fb6a77126313d9ed94f63 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 19:57:13 -0400 Subject: [PATCH 11/17] Add trigger type comments --- src/app.py | 5 - src/schema/schema_triggers.py | 331 ++++++++++++++++++++++------------ 2 files changed, 215 insertions(+), 121 deletions(-) diff --git a/src/app.py b/src/app.py index 8e360ea5..bfdc493a 100644 --- a/src/app.py +++ b/src/app.py @@ -1482,11 +1482,6 @@ def update_entity(id): # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) - - print("==========DEBUG Begin") - print(merged_updated_dict) - print("==========DEBUG End") - # Handle linkages update via `after_update_trigger` methods if has_direct_ancestor_uuids or has_associated_collection_uuid or has_updated_status: diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 102e6665..5ac47b2c 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -26,6 +26,8 @@ #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of generating current timestamp Parameters @@ -56,6 +58,8 @@ def set_timestamp(property_key, normalized_type, request, user_token, existing_d """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of setting the entity type of a given entity Parameters @@ -83,6 +87,8 @@ def set_entity_type(property_key, normalized_type, request, user_token, existing """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting user sub Parameters @@ -113,6 +119,8 @@ def set_user_sub(property_key, normalized_type, request, user_token, existing_da """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting user email Parameters @@ -143,6 +151,8 @@ def set_user_email(property_key, normalized_type, request, user_token, existing_ """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting user name Parameters @@ -173,6 +183,8 @@ def set_user_displayname(property_key, normalized_type, request, user_token, exi """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting uuid, hubmap_id for a new entity to be created Parameters @@ -203,6 +215,8 @@ def set_uuid(property_key, normalized_type, request, user_token, existing_data_d """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting uuid, hubmap_id for a new entity to be created Parameters @@ -238,6 +252,8 @@ def set_hubmap_id(property_key, normalized_type, request, user_token, existing_d """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of generating data access level Parameters @@ -295,6 +311,8 @@ def set_data_access_level(property_key, normalized_type, request, user_token, ex """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of setting the group_uuid Parameters @@ -357,6 +375,8 @@ def set_group_uuid(property_key, normalized_type, request, user_token, existing_ """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of setting the group_name Parameters @@ -409,6 +429,8 @@ def set_group_name(property_key, normalized_type, request, user_token, existing_ #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting the submission_id No submission_id for Dataset, Collection, and Upload @@ -440,6 +462,8 @@ def set_submission_id(property_key, normalized_type, request, user_token, existi """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method to commit files saved that were previously uploaded with UploadFileHelper.save_file The information, filename and optional description is saved in the field with name specified by `target_property_key` @@ -489,6 +513,8 @@ def commit_image_files(property_key, normalized_type, request, user_token, exist """ +TriggerTypeEnum.BEFORE_UPDATE + Trigger event methods for removing files from an entity during update Files are stored in a json encoded text field with property name 'target_property_key' in the entity dict @@ -526,6 +552,8 @@ def delete_image_files(property_key, normalized_type, request, user_token, exist """ +TriggerTypeEnum.BEFORE_UPDATE + Trigger event method to ONLY update descriptions of existing files Parameters @@ -597,6 +625,8 @@ def update_file_descriptions(property_key, normalized_type, request, user_token, #################################################################################################### """ +TriggerTypeEnum.AFTER_CREATE and TriggerTypeEnum.AFTER_UPDATE + Trigger event method of tracking status change events Parameters @@ -649,6 +679,8 @@ def set_status_history(property_key, normalized_type, request, user_token, exist #################################################################################################### """ +TriggerTypeEnum.ON_READ + Trigger event method of getting a list of associated datasets for a given collection Parameters @@ -689,6 +721,8 @@ def get_collection_datasets(property_key, normalized_type, request, user_token, #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of setting the default "New" status for this new Dataset Parameters @@ -717,6 +751,8 @@ def set_dataset_status_new(property_key, normalized_type, request, user_token, e """ +TriggerTypeEnum.ON_READ + Trigger event method of getting a list of collections for this new Dataset Parameters @@ -753,6 +789,8 @@ def get_dataset_collections(property_key, normalized_type, request, user_token, """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the associated collection for this publication Parameters @@ -789,6 +827,8 @@ def get_publication_associated_collection(property_key, normalized_type, request """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the associated Upload for this Dataset Parameters @@ -827,6 +867,8 @@ def get_dataset_upload(property_key, normalized_type, request, user_token, exist """ +TriggerTypeEnum.AFTER_CREATE and TriggerTypeEnum.AFTER_UPDATE + Trigger event method of creating or recreating linkages between this new Dataset and its direct ancestors Parameters @@ -870,6 +912,8 @@ def link_dataset_to_direct_ancestors(property_key, normalized_type, request, use """ +TriggerTypeEnum.AFTER_CREATE and TriggerTypeEnum.AFTER_UPDATE + Trigger event method for creating or recreating linkages between this new Collection and the Datasets it contains Parameters @@ -913,6 +957,8 @@ def link_collection_to_datasets(property_key, normalized_type, request, user_tok """ +TriggerTypeEnum.ON_READ + Trigger event method of getting a list of direct ancestors for a given dataset or publication Parameters @@ -949,6 +995,8 @@ def get_dataset_direct_ancestors(property_key, normalized_type, request, user_to """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the relative directory path of a given dataset Parameters @@ -1000,6 +1048,8 @@ def get_local_directory_rel_path(property_key, normalized_type, request, user_to """ +TriggerTypeEnum.AFTER_CREATE + Trigger event method of building linkage from this new Dataset to the dataset of its previous revision Parameters @@ -1051,123 +1101,8 @@ def link_to_previous_revision(property_key, normalized_type, request, user_token """ -Given a string which contains multiple items, each separated by the substring specified by -the 'separator' argument, and possibly also ending with 'separator', -- remove the last instance of 'separator' -- replaced the remaining last instance of 'separator' with ", and" -- replace all remaining instances of 'separator' with the substring specified in the 'new_separator' argument - -Parameters ----------- -separated_phrase : str - A string which contains multiple items, each separated by the substring specified by - the 'separator' argument, and possibly also ending with 'separator' -separator : str - A string which is used to separate items during computation. This should be something which - is statistically improbable to occur within items, such as a comma or a common word. -new_separator: str - The replacement for occurrences of 'separator', such as a comma or a comma followed by a space. - -Returns -------- -str: A version of the 'separated_phase' argument revised per the method description -""" -def _make_phrase_from_separator_delineated_str(separated_phrase:str, separator:str, new_separator=', ')->str: - # Remove the last separator - if re.search(rf"{separator}$", separated_phrase): - separated_phrase = re.sub( pattern=rf"(.*)({separator})$" - , repl=r"\1" - , string=separated_phrase) - # Replace the last separator with the word 'and' for inclusion in the Dataset title - separated_phrase = re.sub( pattern=rf"(.*)({separator})(.*?)$" - , repl=r"\1, and \3" - , string=separated_phrase) - # Replace all remaining separator with commas - descriptions = separated_phrase.rsplit(separator) - return new_separator.join(descriptions) - - -""" -Given a string of metadata for a Donor which was returned from Neo4j, and a list of desired attribute names to -extract from that metadata, return a dictionary containing lower-case version of each attribute found. - -Parameters ----------- -neo4j_donor_metadata : str - A string representation of a Python dict returned from Neo4j, containing metadata for a Donor. -attribute_key_list : list[str] - A list of strings, each of which may be the name of a key found in the Donor metadata. +TriggerTypeEnum.ON_READ -Returns -------- -dict: A dict keyed using elements of attribute_key_list which were found in the Donor metadata, containing - a lower-case version of the value stored in Neo4j -""" -def _get_attributes_from_donor_metadata(neo4j_donor_metadata: str, attribute_key_list: list[str]) -> dict: - # Note: The donor_metadata is stored in Neo4j as a string representation of the Python dict - # It's not stored in Neo4j as a json string! And we can't store it as a json string - # due to the way that Cypher handles single/double quotes. - donor_metadata_dict = schema_manager.convert_str_literal(neo4j_donor_metadata) - - # Since either 'organ_donor_data' or 'living_donor_data' can be present in donor_metadata_dict, but not - # both, just grab the first element. If neither are present, use the empty list - data_list = [] - if donor_metadata_dict: - data_list = list(donor_metadata_dict.values())[0] - - donor_grouping_concepts_dict = dict() - for data in data_list: - if 'grouping_concept_preferred_term' in data: - if data['grouping_concept_preferred_term'].lower() == 'age': - # The actual value of age stored in 'data_value' instead of 'preferred_term' - donor_grouping_concepts_dict['age'] = data['data_value'] - donor_grouping_concepts_dict['age_units'] = data['units'][0:-1].lower() - elif data['grouping_concept_preferred_term'].lower() == 'race': - donor_grouping_concepts_dict['race'] = data['preferred_term'].lower() - elif data['grouping_concept_preferred_term'].lower() == 'sex': - donor_grouping_concepts_dict['sex'] = data['preferred_term'].lower() - else: - pass - return donor_grouping_concepts_dict - - -""" -Given a age, race, and sex metadata for a Donor which was returned from Neo4j, generate an appropriate and -consistent string phrase. - -Parameters ----------- -age : str - A age value found in the metadata for the Donor returned from Neo4j. -race : str - A race value found in the metadata for the Donor returned from Neo4j. -sex : str - A sex value found in the metadata for the Donor returned from Neo4j. - -Returns -------- -str: A consistent string phrase appropriate for the Donor's metadata -""" -def _get_age_age_units_race_sex_phrase(age:str=None, age_units:str='units', race:str=None, sex:str=None)->str: - if age is None and race is not None and sex is not None: - return f"{race} {sex} of unknown age" - elif race is None and age is not None and sex is not None: - return f"{age}-{age_units}-old {sex} of unknown race" - elif sex is None and age is not None and race is not None: - return f"{age}-{age_units}-old {race} donor of unknown sex" - elif age is None and race is None and sex is not None: - return f"{sex} donor of unknown age and race" - elif age is None and sex is None and race is not None: - return f"{race} donor of unknown age and sex" - elif race is None and sex is None and age is not None: - return f"{age}-{age_units}-old donor of unknown race and sex" - elif age is None and race is None and sex is None: - return "donor of unknown age, race and sex" - else: - return f"{age}-{age_units}-old {race} {sex}" - - -""" Trigger event method of auto generating the dataset title Parameters @@ -1359,6 +1294,8 @@ def get_dataset_title(property_key, normalized_type, request, user_token, existi """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the uuid of the previous revision dataset if exists Parameters @@ -1393,6 +1330,8 @@ def get_previous_revision_uuid(property_key, normalized_type, request, user_toke """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the uuids of the previous revision datasets if they exist Parameters @@ -1429,6 +1368,8 @@ def get_previous_revision_uuids(property_key, normalized_type, request, user_tok """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the uuid of the next version dataset if exists Parameters @@ -1463,6 +1404,8 @@ def get_next_revision_uuid(property_key, normalized_type, request, user_token, e """ +TriggerTypeEnum.ON_READ + Trigger event method of generating `creation_action` Parameters @@ -1500,6 +1443,8 @@ def get_creation_action_activity(property_key, normalized_type, request, user_to """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the uuids of the next version dataset if they exist Parameters @@ -1536,6 +1481,8 @@ def get_next_revision_uuids(property_key, normalized_type, request, user_token, """ +TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE + Trigger event method to commit thumbnail file saved that were previously uploaded via ingest-api The information, filename is saved in the field with name specified by `target_property_key` @@ -1627,6 +1574,8 @@ def commit_thumbnail_file(property_key, normalized_type, request, user_token, ex """ +TriggerTypeEnum.BEFORE_UPDATE + Trigger event method for removing the thumbnail file from a dataset during update File is stored in a json encoded text field with property name 'target_property_key' in the entity dict @@ -1723,6 +1672,8 @@ def delete_thumbnail_file(property_key, normalized_type, request, user_token, ex """ +TriggerTypeEnum.AFTER_UPDATE + Trigger event method that updates the status value of the target dataset If the dataset is a parent Multi-Assay Split dataset, will also sync the status update to all the child component datasets @@ -1791,6 +1742,8 @@ def update_status(property_key, normalized_type, request, user_token, existing_d """ +TriggerTypeEnum.AFTER_UPDATE + Trigger event method of building linkage between this new Donor and Lab Parameters @@ -1812,6 +1765,7 @@ def link_donor_to_lab(property_key, normalized_type, request, user_token, existi if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_donor_to_lab()' trigger method.") + # Use `existing_data_dict` for `group_uuid` lookup as it may not be provided in the user request if 'group_uuid' not in existing_data_dict: raise KeyError("Missing 'group_uuid' key in 'existing_data_dict' during calling 'link_donor_to_lab()' trigger method.") @@ -1839,6 +1793,8 @@ def link_donor_to_lab(property_key, normalized_type, request, user_token, existi #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE + Trigger event method to commit files saved that were previously uploaded with UploadFileHelper.save_file The information, filename and optional description is saved in the field with name specified by `target_property_key` @@ -1888,6 +1844,8 @@ def commit_metadata_files(property_key, normalized_type, request, user_token, ex """ +TriggerTypeEnum.BEFORE_UPDATE + Trigger event methods for removing files from an entity during update Files are stored in a json encoded text field with property name 'target_property_key' in the entity dict @@ -1925,6 +1883,8 @@ def delete_metadata_files(property_key, normalized_type, request, user_token, ex """ +TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE + Trigger event method of creating or recreating linkages between this new Sample and its direct ancestor Parameters @@ -1971,6 +1931,8 @@ def link_sample_to_direct_ancestor(property_key, normalized_type, request, user_ raise """ +TriggerTypeEnum.BEFORE_CREATE and TriggerTypeEnum.BEFORE_UPDATE + Trigger event method of creating or recreating linkages between this new publication and its associated_collection Parameters @@ -2011,6 +1973,8 @@ def link_publication_to_associated_collection(property_key, normalized_type, req """ +TriggerTypeEnum.ON_READ + Trigger event method of getting the parent of a Sample, which is a Donor Parameters @@ -2052,6 +2016,8 @@ def get_sample_direct_ancestor(property_key, normalized_type, request, user_toke #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of truncating the time part of publication_date if provided by users Parameters @@ -2081,6 +2047,8 @@ def set_publication_date(property_key, normalized_type, request, user_token, exi """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method setting the dataset_type immutable property for a Publication. Parameters @@ -2113,6 +2081,8 @@ def set_publication_dataset_type(property_key, normalized_type, request, user_to #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of setting the Upload initial status - "New" Parameters @@ -2140,6 +2110,8 @@ def set_upload_status_new(property_key, normalized_type, request, user_token, ex """ +TriggerTypeEnum.AFTER_CREATE + Trigger event method of building linkage between this new Upload and Lab Parameters ---------- @@ -2160,6 +2132,7 @@ def link_upload_to_lab(property_key, normalized_type, request, user_token, exist if 'uuid' not in existing_data_dict: raise KeyError("Missing 'uuid' key in 'existing_data_dict' during calling 'link_upload_to_lab()' trigger method.") + # Use `existing_data_dict` for `group_uuid` lookup as it may not be provided in the user request if 'group_uuid' not in existing_data_dict: raise KeyError("Missing 'group_uuid' key in 'existing_data_dict' during calling 'link_upload_to_lab()' trigger method.") @@ -2182,6 +2155,8 @@ def link_upload_to_lab(property_key, normalized_type, request, user_token, exist """ +TriggerTypeEnum.AFTER_UPDATE + Trigger event method of building linkages between this Submission and the given datasets Parameters @@ -2223,6 +2198,8 @@ def link_datasets_to_upload(property_key, normalized_type, request, user_token, """ +TriggerTypeEnum.AFTER_UPDATE + Trigger event method of deleting linkages between this target Submission and the given datasets Parameters @@ -2264,6 +2241,8 @@ def unlink_datasets_from_upload(property_key, normalized_type, request, user_tok """ +TriggerTypeEnum.ON_READ + Trigger event method of getting a list of associated datasets for a given Upload Parameters @@ -2303,6 +2282,8 @@ def get_upload_datasets(property_key, normalized_type, request, user_token, exis #################################################################################################### """ +TriggerTypeEnum.BEFORE_CREATE + Trigger event method of getting creation_action for Activity Lab->Activity->Donor (Not needed for now) @@ -2569,3 +2550,121 @@ def _delete_files(target_property_key, property_key, normalized_type, request, u return generated_dict + +""" +Given a string which contains multiple items, each separated by the substring specified by +the 'separator' argument, and possibly also ending with 'separator', +- remove the last instance of 'separator' +- replaced the remaining last instance of 'separator' with ", and" +- replace all remaining instances of 'separator' with the substring specified in the 'new_separator' argument + +Parameters +---------- +separated_phrase : str + A string which contains multiple items, each separated by the substring specified by + the 'separator' argument, and possibly also ending with 'separator' +separator : str + A string which is used to separate items during computation. This should be something which + is statistically improbable to occur within items, such as a comma or a common word. +new_separator: str + The replacement for occurrences of 'separator', such as a comma or a comma followed by a space. + +Returns +------- +str: A version of the 'separated_phase' argument revised per the method description +""" +def _make_phrase_from_separator_delineated_str(separated_phrase:str, separator:str, new_separator=', ')->str: + # Remove the last separator + if re.search(rf"{separator}$", separated_phrase): + separated_phrase = re.sub( pattern=rf"(.*)({separator})$" + , repl=r"\1" + , string=separated_phrase) + # Replace the last separator with the word 'and' for inclusion in the Dataset title + separated_phrase = re.sub( pattern=rf"(.*)({separator})(.*?)$" + , repl=r"\1, and \3" + , string=separated_phrase) + # Replace all remaining separator with commas + descriptions = separated_phrase.rsplit(separator) + return new_separator.join(descriptions) + + +""" +Given a string of metadata for a Donor which was returned from Neo4j, and a list of desired attribute names to +extract from that metadata, return a dictionary containing lower-case version of each attribute found. + +Parameters +---------- +neo4j_donor_metadata : str + A string representation of a Python dict returned from Neo4j, containing metadata for a Donor. +attribute_key_list : list[str] + A list of strings, each of which may be the name of a key found in the Donor metadata. + +Returns +------- +dict: A dict keyed using elements of attribute_key_list which were found in the Donor metadata, containing + a lower-case version of the value stored in Neo4j +""" +def _get_attributes_from_donor_metadata(neo4j_donor_metadata: str, attribute_key_list: list[str]) -> dict: + # Note: The donor_metadata is stored in Neo4j as a string representation of the Python dict + # It's not stored in Neo4j as a json string! And we can't store it as a json string + # due to the way that Cypher handles single/double quotes. + donor_metadata_dict = schema_manager.convert_str_literal(neo4j_donor_metadata) + + # Since either 'organ_donor_data' or 'living_donor_data' can be present in donor_metadata_dict, but not + # both, just grab the first element. If neither are present, use the empty list + data_list = [] + if donor_metadata_dict: + data_list = list(donor_metadata_dict.values())[0] + + donor_grouping_concepts_dict = dict() + for data in data_list: + if 'grouping_concept_preferred_term' in data: + if data['grouping_concept_preferred_term'].lower() == 'age': + # The actual value of age stored in 'data_value' instead of 'preferred_term' + donor_grouping_concepts_dict['age'] = data['data_value'] + donor_grouping_concepts_dict['age_units'] = data['units'][0:-1].lower() + elif data['grouping_concept_preferred_term'].lower() == 'race': + donor_grouping_concepts_dict['race'] = data['preferred_term'].lower() + elif data['grouping_concept_preferred_term'].lower() == 'sex': + donor_grouping_concepts_dict['sex'] = data['preferred_term'].lower() + else: + pass + return donor_grouping_concepts_dict + + +""" +Given a age, race, and sex metadata for a Donor which was returned from Neo4j, generate an appropriate and +consistent string phrase. + +Parameters +---------- +age : str + A age value found in the metadata for the Donor returned from Neo4j. +race : str + A race value found in the metadata for the Donor returned from Neo4j. +sex : str + A sex value found in the metadata for the Donor returned from Neo4j. + +Returns +------- +str: A consistent string phrase appropriate for the Donor's metadata +""" +def _get_age_age_units_race_sex_phrase(age:str=None, age_units:str='units', race:str=None, sex:str=None)->str: + if age is None and race is not None and sex is not None: + return f"{race} {sex} of unknown age" + elif race is None and age is not None and sex is not None: + return f"{age}-{age_units}-old {sex} of unknown race" + elif sex is None and age is not None and race is not None: + return f"{age}-{age_units}-old {race} donor of unknown sex" + elif age is None and race is None and sex is not None: + return f"{sex} donor of unknown age and race" + elif age is None and sex is None and race is not None: + return f"{race} donor of unknown age and sex" + elif race is None and sex is None and age is not None: + return f"{age}-{age_units}-old donor of unknown race and sex" + elif age is None and race is None and sex is None: + return "donor of unknown age, race and sex" + else: + return f"{age}-{age_units}-old {race} {sex}" + + From 53f2778ce025badda31889f5250f52a234e90026 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 20:21:05 -0400 Subject: [PATCH 12/17] Log trigger type with uuid when available --- src/schema/schema_manager.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 2ebe8ebb..d8e92bf4 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -417,7 +417,10 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req # Get the target trigger method defined in the schema_triggers.py module trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") + if 'uuid' in existing_data_dict: + logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class} {existing_data_dict['uuid']}") + else: + logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") # No return values for 'after_create_trigger' and 'after_update_trigger' # because the property value is already set and stored in neo4j @@ -441,7 +444,10 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req try: trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") + if 'uuid' in existing_data_dict: + logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class} {existing_data_dict['uuid']}") + else: + logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") # Will set the trigger return value as the property value by default # Unless the return value is to be assigned to another property different target key @@ -488,7 +494,10 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req try: trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") + if 'uuid' in existing_data_dict: + logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class} {existing_data_dict['uuid']}") + else: + logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") # Will set the trigger return value as the property value by default # Unless the return value is to be assigned to another property different target key From 3f7dc9c6be09fe0e8e43e3d32efc2538d7cff264 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 21:39:23 -0400 Subject: [PATCH 13/17] Logging tweaks --- src/schema/schema_manager.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index d8e92bf4..a36f6ed1 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -418,9 +418,9 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req trigger_method_to_call = getattr(schema_triggers, trigger_method_name) if 'uuid' in existing_data_dict: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class} {existing_data_dict['uuid']}") + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {existing_data_dict['uuid']}") else: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class}") # No return values for 'after_create_trigger' and 'after_update_trigger' # because the property value is already set and stored in neo4j @@ -445,9 +445,9 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req trigger_method_to_call = getattr(schema_triggers, trigger_method_name) if 'uuid' in existing_data_dict: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class} {existing_data_dict['uuid']}") + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {existing_data_dict['uuid']}") else: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class}") # Will set the trigger return value as the property value by default # Unless the return value is to be assigned to another property different target key @@ -495,9 +495,9 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req trigger_method_to_call = getattr(schema_triggers, trigger_method_name) if 'uuid' in existing_data_dict: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class} {existing_data_dict['uuid']}") + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {existing_data_dict['uuid']}") else: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} defined for {normalized_class}") + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class}") # Will set the trigger return value as the property value by default # Unless the return value is to be assigned to another property different target key @@ -1041,8 +1041,11 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque try: # Get the target validator method defined in the schema_validators.py module validator_method_to_call = getattr(schema_validators, validator_method_name) - - logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}") + + if 'uuid' in existing_entity_dict: + logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {existing_entity_dict['uuid']}") + else: + logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type}") # Create a dictionary to hold data need by any entity validator, which must be populated # with validator specific requirements when the method to be called is determined. @@ -1062,7 +1065,7 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque except schema_errors.LockedEntityUpdateException as leue: raise leue except Exception as e: - msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}" + msg = f"Failed to call the {validator_type} method: {validator_method_name} for {normalized_entity_type}" # Log the full stack trace, prepend a line with our message logger.exception(msg) raise e @@ -1107,7 +1110,10 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re # Get the target validator method defined in the schema_validators.py module validator_method_to_call = getattr(schema_validators, validator_method_name) - logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type} on property {key}") + if 'uuid' in existing_data_dict: + logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {existing_data_dict['uuid']} on property {key}") + else: + logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} on property {key}") validator_method_to_call(key, normalized_entity_type, request, existing_data_dict, new_data_dict) except schema_errors.MissingApplicationHeaderException as e: @@ -1117,12 +1123,12 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re except ValueError as ve: raise ValueError(ve) except schema_errors.UnimplementedValidatorException as uve: - msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type} on property {key}" + msg = f"Failed to call the {validator_type} method: {validator_method_name} for {normalized_entity_type} on property {key}" # Log the full stack trace, prepend a line with our message logger.exception(f"{msg}. {str(uve)}") raise uve except Exception as e: - msg = f"Unexpected exception calling {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type} on property {key}" + msg = f"Unexpected exception calling {validator_type} method: {validator_method_name} for {normalized_entity_type} on property {key}" # Log the full stack trace, prepend a line with our message logger.exception(f"{msg}. {str(e)}") raise e From d4244744b9e87f51de3b55b8e40bce48070ee892 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 24 Sep 2025 22:28:24 -0400 Subject: [PATCH 14/17] Add log and comment for internal _commit_files() to avoid confusion --- src/schema/schema_manager.py | 1 + src/schema/schema_triggers.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index a36f6ed1..a55d1888 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -512,6 +512,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req # within this dictionary and return it so it can be saved in the scope of this loop and # passed to other 'updated_peripherally' triggers if 'updated_peripherally' in properties[key] and properties[key]['updated_peripherally']: + # Such trigger methods get executed but really do nothing internally trigger_generated_data_dict = trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict, trigger_generated_data_dict) else: target_key, target_value = trigger_method_to_call(key, normalized_class, request, user_token, existing_data_dict, new_data_dict) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 5ac47b2c..4445718e 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -2379,6 +2379,7 @@ def _commit_files(target_property_key, property_key, normalized_type, request, u # For metadata files the property name is "metadata_files_to_add" # But other may be used in the future if (not property_key in new_data_dict) or (not new_data_dict[property_key]): + logger.info(f"Do nothing with the internal trigger method _commit_files() because {property_key} not found in request payload") return generated_dict #If POST or PUT where the target doesn't exist create the file info array From 89e65c16854df34d7c6f2cf33c9f15c1e9076b8b Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Thu, 25 Sep 2025 13:57:44 -0400 Subject: [PATCH 15/17] Better none and exist check --- src/schema/schema_manager.py | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index a55d1888..a1dff4ad 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -417,10 +417,8 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req # Get the target trigger method defined in the schema_triggers.py module trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - if 'uuid' in existing_data_dict: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {existing_data_dict['uuid']}") - else: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class}") + target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {target_uuid}") # No return values for 'after_create_trigger' and 'after_update_trigger' # because the property value is already set and stored in neo4j @@ -443,11 +441,9 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req trigger_method_name = properties[key][trigger_type.value] try: trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - - if 'uuid' in existing_data_dict: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {existing_data_dict['uuid']}") - else: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class}") + + target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {target_uuid}") # Will set the trigger return value as the property value by default # Unless the return value is to be assigned to another property different target key @@ -494,10 +490,8 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req try: trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - if 'uuid' in existing_data_dict: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {existing_data_dict['uuid']}") - else: - logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class}") + target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {target_uuid}") # Will set the trigger return value as the property value by default # Unless the return value is to be assigned to another property different target key @@ -1043,10 +1037,8 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque # Get the target validator method defined in the schema_validators.py module validator_method_to_call = getattr(schema_validators, validator_method_name) - if 'uuid' in existing_entity_dict: - logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {existing_entity_dict['uuid']}") - else: - logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type}") + target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {target_uuid}") # Create a dictionary to hold data need by any entity validator, which must be populated # with validator specific requirements when the method to be called is determined. @@ -1111,10 +1103,8 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re # Get the target validator method defined in the schema_validators.py module validator_method_to_call = getattr(schema_validators, validator_method_name) - if 'uuid' in existing_data_dict: - logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {existing_data_dict['uuid']} on property {key}") - else: - logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} on property {key}") + target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {target_uuid} on property {key}") validator_method_to_call(key, normalized_entity_type, request, existing_data_dict, new_data_dict) except schema_errors.MissingApplicationHeaderException as e: From f991e2db409ef8584243a30bfd0527a0d3f00a23 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Thu, 25 Sep 2025 14:02:13 -0400 Subject: [PATCH 16/17] Fix var name error --- src/schema/schema_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index a1dff4ad..e9d2c644 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1103,7 +1103,7 @@ def execute_property_level_validators(validator_type, normalized_entity_type, re # Get the target validator method defined in the schema_validators.py module validator_method_to_call = getattr(schema_validators, validator_method_name) - target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + target_uuid = existing_data_dict['uuid'] if existing_data_dict and 'uuid' in existing_data_dict else ''; logger.info(f"To run {validator_type}: {validator_method_name} for {normalized_entity_type} {target_uuid} on property {key}") validator_method_to_call(key, normalized_entity_type, request, existing_data_dict, new_data_dict) From 0014e4635949e003c3c904ddc6815c074cf67966 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Thu, 25 Sep 2025 14:06:13 -0400 Subject: [PATCH 17/17] Fix var name error again --- src/schema/schema_manager.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index e9d2c644..7be00fb3 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -417,7 +417,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req # Get the target trigger method defined in the schema_triggers.py module trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + target_uuid = existing_data_dict['uuid'] if existing_data_dict and 'uuid' in existing_data_dict else ''; logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {target_uuid}") # No return values for 'after_create_trigger' and 'after_update_trigger' @@ -442,7 +442,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req try: trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + target_uuid = existing_data_dict['uuid'] if existing_data_dict and 'uuid' in existing_data_dict else ''; logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {target_uuid}") # Will set the trigger return value as the property value by default @@ -490,7 +490,7 @@ def generate_triggered_data(trigger_type: TriggerTypeEnum, normalized_class, req try: trigger_method_to_call = getattr(schema_triggers, trigger_method_name) - target_uuid = existing_entity_dict['uuid'] if existing_entity_dict and 'uuid' in existing_entity_dict else ''; + target_uuid = existing_data_dict['uuid'] if existing_data_dict and 'uuid' in existing_data_dict else ''; logger.info(f"To run {trigger_type.value}: {trigger_method_name} for {normalized_class} {target_uuid}") # Will set the trigger return value as the property value by default