From f9d294dc4090e50a7658e1e716aea4c56f7bcdee Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 29 Jul 2025 18:22:24 -0400 Subject: [PATCH 1/9] Remove no-longer used property filtering on entity update --- src/app.py | 41 ++--------------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/src/app.py b/src/app.py index b554734f..92c8019a 100644 --- a/src/app.py +++ b/src/app.py @@ -1313,10 +1313,6 @@ def create_multiple_samples(count): """ Update the properties of a given entity -Response result filtering is supported based on query string -For example: /entities/?return_all_properties=true -Default to skip those time-consuming properties - Parameters ---------- entity_type : str @@ -1326,8 +1322,8 @@ def create_multiple_samples(count): Returns ------- -json - All the updated properties of the target entity +str + A successful message """ @app.route('/entities/', methods = ['PUT']) def update_entity(id): @@ -1504,39 +1500,6 @@ def update_entity(id): # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) - # By default we'll return all the properties but skip these time-consuming ones - # Donor doesn't need to skip any - properties_to_skip = [] - - if normalized_entity_type == 'Sample': - properties_to_skip = [ - 'direct_ancestor' - ] - # 2/17/23 - Also adding publication for skipping properties ~Derek Furst - elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): - properties_to_skip = [ - 'direct_ancestors', - 'collections', - 'upload', - 'title', - 'previous_revision_uuid', - 'next_revision_uuid' - ] - elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']: - properties_to_skip = [ - 'datasets' - ] - - # Result filtering based on query string - # Will return all properties by running all the read triggers - # If the reuqest specifies `/entities/?return_all_properties=true` - if bool(request.args): - # The parased query string value is a string 'true' - return_all_properties = request.args.get('return_all_properties') - - if (return_all_properties is not None) and (return_all_properties.lower() == 'true'): - properties_to_skip = [] - # Remove the cached entities if Memcached is being used # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped) # can be different from the one generated by GET call From e6e06dca6e4c4c94cc544066bbb078d481543316 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Tue, 29 Jul 2025 19:26:00 -0400 Subject: [PATCH 2/9] Update resulting message --- src/app.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index 92c8019a..644d0507 100644 --- a/src/app.py +++ b/src/app.py @@ -1521,10 +1521,13 @@ def update_entity(id): reindex_entity(entity_dict['uuid'], user_token) # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou - # return jsonify(normalized_complete_dict) + message_returned = f"The update request on {normalized_entity_type} of {id} has been accepted, the backend may still be processing" + if lockout_overridden: + message_returned = f"Lockout overridden on {normalized_entity_type} of {id}" - override_msg = 'Lockout overridden. ' if lockout_overridden else '' - return jsonify({'message': f"{override_msg}{normalized_entity_type} of {id} has been updated"}) + # Here we use 200 status code instead of 202 mainly for compatibility + # so the API consumers don't need to update their implementations + return jsonify({'message': message_returned}) """ From 3006bc9a8439792822fd4a4ce88a75395325fe1d Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Wed, 30 Jul 2025 13:32:07 -0400 Subject: [PATCH 3/9] Switch back to the cache delete on all descendants --- src/app.py | 132 ++++++++++++++++++----------------------------------- 1 file changed, 44 insertions(+), 88 deletions(-) diff --git a/src/app.py b/src/app.py index 644d0507..e6d84b50 100644 --- a/src/app.py +++ b/src/app.py @@ -1513,11 +1513,7 @@ def update_entity(id): # # Will also filter the result based on schema # normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) - # Also reindex the updated entity node in elasticsearch via search-api - logger.log(logging.INFO - ,f"Re-indexing for modification of {entity_dict['entity_type']}" - f" with UUID {entity_dict['uuid']}") - + # Also reindex the updated entity in elasticsearch via search-api reindex_entity(entity_dict['uuid'], user_token) # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou @@ -5184,7 +5180,7 @@ def update_entity_details(request, normalized_entity_type, user_token, json_data """ -Execute 'after_update_triiger' methods +Execute 'after_update_trigger' methods Parameters ---------- @@ -5308,94 +5304,54 @@ def require_json(request): """ def delete_cache(id): if MEMCACHED_MODE: - # ========================================================================= - # Commented out on 7/28/2025 and replaced with the original implementation - # in the hope that this may reduce the 504 timeout rate - Zhou - # ========================================================================= - # entity_dict = query_target_entity(id, get_internal_token()) - # entity_uuid = entity_dict['uuid'] - # entity_type = entity_dict['entity_type'] - # descendant_uuids = [] - # collection_dataset_uuids = [] - # upload_dataset_uuids = [] - # collection_uuids = [] - # dataset_upload_dict = {} - # publication_collection_dict = {} - - # # Determine the associated cache keys based on the entity type - # # To reduce unnecessary Neo4j lookups that may cause timeout on the PUT call - - # # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants - # if entity_type in ['Donor', 'Sample', 'Dataset', 'Publication']: - # descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') - - # # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) - # if schema_manager.entity_type_instanceof(entity_type, 'Collection'): - # collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) - # if entity_type == 'Upload': - # upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # # For Dataset, delete the associated Collections cache and single Upload cache - # if entity_type == 'Dataset': - # collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') - # dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) - - # # For Publication, delete cache of the associated collection - # # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # # Still keep it in the code until further decision - Zhou - # if entity_type == 'Publication': - # publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) - - # # We only use uuid in the cache key acorss all the cache types - # uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids - - # # It's possible the target dataset has no linked upload - # if dataset_upload_dict: - # uuids_list.append(dataset_upload_dict['uuid']) - - # # It's possible the target publicaiton has no associated collection - # if publication_collection_dict: - # uuids_list.append(publication_collection_dict['uuid']) - - # # Final batch delete - # schema_manager.delete_memcached_cache(uuids_list) - - - # ========================================================================= - # The original implementation - # ========================================================================= - # First delete the target entity cache entity_dict = query_target_entity(id, get_internal_token()) entity_uuid = entity_dict['uuid'] - - # If the target entity is Sample (`direct_ancestor`) or Dataset/Publication (`direct_ancestors`) - # Delete the cache of all the direct descendants (children) - child_uuids = schema_neo4j_queries.get_children(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Collection, delete the cache for each of its associated - # Datasets and Publications (via [:IN_COLLECTION] relationship) as well as just Publications (via [:USES_DATA] relationship) - collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD] relationship) - upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache - collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') - collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) - upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) - + entity_type = entity_dict['entity_type'] + descendant_uuids = [] + collection_dataset_uuids = [] + upload_dataset_uuids = [] + collection_uuids = [] + dataset_upload_dict = {} + publication_collection_dict = {} + + # Determine the associated cache keys based on the entity type + # To reduce unnecessary Neo4j lookups that may cause timeout on the PUT call + + # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants + if entity_type in ['Donor', 'Sample', 'Dataset', 'Publication']: + descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) + if schema_manager.entity_type_instanceof(entity_type, 'Collection'): + collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) + if entity_type == 'Upload': + upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Dataset, delete the associated Collections cache and single Upload cache + if entity_type == 'Dataset': + collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') + dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) + + # For Publication, delete cache of the associated collection + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou + if entity_type == 'Publication': + publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) + # We only use uuid in the cache key acorss all the cache types - uuids_list = [entity_uuid] + child_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids - # It's possible no linked collection or upload - if collection_dict: - uuids_list.append(collection_dict['uuid']) + # It's possible the target dataset has no linked upload + if dataset_upload_dict: + uuids_list.append(dataset_upload_dict['uuid']) - if upload_dict: - uuids_list.append(upload_dict['uuid']) + # It's possible the target publicaiton has no associated collection + if publication_collection_dict: + uuids_list.append(publication_collection_dict['uuid']) + # Final batch delete schema_manager.delete_memcached_cache(uuids_list) From e2a3912470aeb643db99056d85e8fad48af76454 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 4 Aug 2025 12:56:57 -0400 Subject: [PATCH 4/9] Bump version to 2.5.11 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index ff3b2cd9..398d0dac 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.10 +2.5.11 From 2729872fdd01e386e89755356c3ea7e4ab760bbf Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 4 Aug 2025 13:34:50 -0400 Subject: [PATCH 5/9] Update comments --- src/app.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/app.py b/src/app.py index e6d84b50..f09b0e51 100644 --- a/src/app.py +++ b/src/app.py @@ -1506,13 +1506,6 @@ def update_entity(id): if MEMCACHED_MODE: delete_cache(id) - # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou - # # Generate the complete entity dict - # complete_dict = schema_manager.get_complete_entity_result(user_token, merged_updated_dict, properties_to_skip) - - # # Will also filter the result based on schema - # normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) - # Also reindex the updated entity in elasticsearch via search-api reindex_entity(entity_dict['uuid'], user_token) @@ -5315,8 +5308,6 @@ def delete_cache(id): publication_collection_dict = {} # Determine the associated cache keys based on the entity type - # To reduce unnecessary Neo4j lookups that may cause timeout on the PUT call - # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants if entity_type in ['Donor', 'Sample', 'Dataset', 'Publication']: descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') @@ -5329,12 +5320,12 @@ def delete_cache(id): if entity_type == 'Upload': upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - # For Dataset, delete the associated Collections cache and single Upload cache + # For Dataset, also delete the cache of associated Collections and Upload if entity_type == 'Dataset': collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) - # For Publication, delete cache of the associated collection + # For Publication, also delete cache of the associated collection # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. # Still keep it in the code until further decision - Zhou if entity_type == 'Publication': @@ -5343,11 +5334,11 @@ def delete_cache(id): # We only use uuid in the cache key acorss all the cache types uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids - # It's possible the target dataset has no linked upload + # Add to the list if the target dataset has linked upload if dataset_upload_dict: uuids_list.append(dataset_upload_dict['uuid']) - # It's possible the target publicaiton has no associated collection + # Add to the list if the target publicaiton has associated collection if publication_collection_dict: uuids_list.append(publication_collection_dict['uuid']) From 408513e9a13b486922735a6dc0405eaa64d9fd75 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 4 Aug 2025 13:48:16 -0400 Subject: [PATCH 6/9] Introduce configurable debug mode --- src/app.py | 7 +++++-- src/instance/app.cfg.example | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index f09b0e51..fcb6a28e 100644 --- a/src/app.py +++ b/src/app.py @@ -50,8 +50,11 @@ # Root logger configuration global logger -# Set logging format and level (default is warning) -logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S') +# Set logging format and level +if app.config['DEBUG_MODE']: + logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S') +else: + logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') # Use `getLogger()` instead of `getLogger(__name__)` to apply the config to the root logger # will be inherited by the sub-module loggers diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 595ee728..174fda33 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -1,4 +1,8 @@ -# Set to True to disable the PUT and POST calls, used on STAGE to make entity-api READ-ONLY +# Set to False for PROD deployment to use INFO logging level +# Default to DEBUG for DEV/TEST +DEBUG_MODE = True + +# Set to True to disable the PUT and POST calls to make entity-api READ-ONLY READ_ONLY_MODE = False # File path of schema yaml file, DO NOT MODIFY From 892c7ed05b7a0d627055d44091dd93e7f945534b Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 4 Aug 2025 16:13:56 -0400 Subject: [PATCH 7/9] Optimize delete_cache() --- src/app.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/app.py b/src/app.py index fcb6a28e..f2b1aa72 100644 --- a/src/app.py +++ b/src/app.py @@ -434,7 +434,8 @@ def flush_cache(id): msg = '' if MEMCACHED_MODE: - delete_cache(id) + entity_dict = query_target_entity(id, get_internal_token()) + delete_cache(entity_dict['uuid'], entity_dict['entity_type']) msg = f'The cached data has been deleted from Memcached for entity {id}' else: msg = 'No caching is being used because Memcached mode is not enabled at all' @@ -1319,7 +1320,7 @@ def create_multiple_samples(count): Parameters ---------- entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication id : str The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity @@ -1359,6 +1360,7 @@ def update_entity(id): # Get the entity dict from cache if exists # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists entity_dict = query_target_entity(id, user_token) + entity_uuid = entity_dict['uuid'] # Check that the user has the correct access to modify this entity validate_user_update_privilege(entity_dict, user_token) @@ -1382,7 +1384,7 @@ def update_entity(id): locked_entity_update_header = request.headers.get(SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER) if locked_entity_update_header and (LOCKED_ENTITY_UPDATE_OVERRIDE_KEY == locked_entity_update_header): lockout_overridden = True - logger.info(f"For {entity_dict['entity_type']} {entity_dict['uuid']}" + logger.info(f"For {normalized_entity_type} {entity_uuid}" f" update prohibited due to {str(leue)}," f" but being overridden by valid {SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER} in request.") else: @@ -1507,10 +1509,10 @@ def update_entity(id): # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped) # can be different from the one generated by GET call if MEMCACHED_MODE: - delete_cache(id) + delete_cache(entity_uuid, normalized_entity_type) # Also reindex the updated entity in elasticsearch via search-api - reindex_entity(entity_dict['uuid'], user_token) + reindex_entity(entity_uuid, user_token) # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou message_returned = f"The update request on {normalized_entity_type} of {id} has been accepted, the backend may still be processing" @@ -4489,7 +4491,7 @@ def get_internal_token(): entity_dict : dict A Python dictionary retrieved for the entity normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor, Publication, Upload + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication Returns ------- @@ -4647,7 +4649,7 @@ def _get_dataset_associated_metadata(dataset_dict, dataset_visibility, valid_use request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token json_data_dict: dict @@ -4796,7 +4798,7 @@ def create_entity_details(request, normalized_entity_type, user_token, json_data request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + Must be "Sample" in this case user_token: str The user's globus groups token json_data_dict: dict @@ -4946,7 +4948,7 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + Must be "Dataset" in this case user_token: str The user's globus groups token json_data_dict_list: list @@ -5062,7 +5064,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke Parameters ---------- normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token merged_data_dict: dict @@ -5097,7 +5099,7 @@ def after_create(normalized_entity_type, user_token, merged_data_dict): request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token json_data_dict: dict @@ -5181,7 +5183,7 @@ def update_entity_details(request, normalized_entity_type, user_token, json_data Parameters ---------- normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token entity_dict: dict @@ -5291,18 +5293,20 @@ def require_json(request): """ -Delete the cached data of all possible keys used for the given entity id +Delete the cached data of all possible keys used for the given entity_uuid and entity_type +By taking entity_uuid and entity_type as input, it eliminates the need to call query_target_entity() +which is more useful when the input id could be either UUID or HuBMAP ID. Parameters ---------- -id : str - The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity (Donor/Dataset/Sample/Upload/Collection/Publication) +entity_uuid : str + The UUID of target entity Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication + +entity_type : str + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication """ -def delete_cache(id): +def delete_cache(entity_uuid, entity_type): if MEMCACHED_MODE: - entity_dict = query_target_entity(id, get_internal_token()) - entity_uuid = entity_dict['uuid'] - entity_type = entity_dict['entity_type'] descendant_uuids = [] collection_dataset_uuids = [] upload_dataset_uuids = [] From 2000c2b8343ac3e6f32441ee7fea8edb46c7a673 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Mon, 4 Aug 2025 16:26:02 -0400 Subject: [PATCH 8/9] Reindex against single search-api instance --- src/app.py | 23 ++++++++++------------- src/instance/app.cfg.example | 6 +++--- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/app.py b/src/app.py index f2b1aa72..03c2fbee 100644 --- a/src/app.py +++ b/src/app.py @@ -68,7 +68,7 @@ app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/') app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/') app.config['ONTOLOGY_API_URL'] = app.config['ONTOLOGY_API_URL'].strip('/') -app.config['SEARCH_API_URL_LIST'] = [url.strip('/') for url in app.config['SEARCH_API_URL_LIST']] +app.config['SEARCH_API_URL'] = app.config['SEARCH_API_URL'].strip('/') S3_settings_dict = {'large_response_threshold': app.config['LARGE_RESPONSE_THRESHOLD'] , 'aws_access_key_id': app.config['AWS_ACCESS_KEY_ID'] @@ -5301,7 +5301,6 @@ def require_json(request): ---------- entity_uuid : str The UUID of target entity Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication - entity_type : str One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication """ @@ -5354,7 +5353,7 @@ def delete_cache(entity_uuid, entity_type): """ -Make a call to each search-api instance to reindex this entity node in elasticsearch +Make a call to search-api to trigger reindex of this entity document in elasticsearch Parameters ---------- @@ -5368,18 +5367,16 @@ def reindex_entity(uuid, user_token): 'Authorization': f'Bearer {user_token}' } - # Reindex the target entity against each configured search-api instance - for search_api_url in app.config['SEARCH_API_URL_LIST']: - logger.info(f"Making a call to search-api instance of {search_api_url} to reindex uuid: {uuid}") + logger.info(f"Making a call to search-api to reindex uuid: {uuid}") - response = requests.put(f"{search_api_url}/reindex/{uuid}", headers = headers) + response = requests.put(f"{app.config['SEARCH_API_URL']}/reindex/{uuid}", headers = headers) - # The reindex takes time, so 202 Accepted response status code indicates that - # the request has been accepted for processing, but the processing has not been completed - if response.status_code == 202: - logger.info(f"The search-api instance of {search_api_url} has accepted the reindex request for uuid: {uuid}") - else: - logger.error(f"The search-api instance of {search_api_url} failed to initialize the reindex for uuid: {uuid}") + # The reindex takes time, so 202 Accepted response status code indicates that + # the request has been accepted for processing, but the processing has not been completed + if response.status_code == 202: + logger.info(f"The search-api has accepted the reindex request for uuid: {uuid}") + else: + logger.error(f"The search-api failed to initialize the reindex for uuid: {uuid}") """ diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 174fda33..9cab5ad9 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -41,7 +41,7 @@ MEMCACHED_SERVER = 'host:11211' # Change prefix based on deployment environment, default for DEV MEMCACHED_PREFIX = 'hm_entity_dev_' -# URL for talking to UUID API (default value used for docker deployment, no token needed) +# URL for talking to UUID API (default value used for docker deployment) # Works regardless of the trailing slash / UUID_API_URL = 'http://uuid-api:8080' @@ -58,9 +58,9 @@ ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org' # necessitates subsequent calls for other entities. ENTITY_API_URL = 'http://localhost:5002' -# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed) +# URL for talking to Search API (default value used for docker deployment) # Works regardless of the trailing slash / -SEARCH_API_URL_LIST = ['http://search-api:8080'] +SEARCH_API_URL = 'http://search-api:8080' #The Base URL to the Globus transfer application site GLOBUS_APP_BASE_URL = 'https://app.globus.org' From 4b0fce930a232ec1b864df25f86ad1ca009b81f3 Mon Sep 17 00:00:00 2001 From: yuanzhou Date: Sun, 10 Aug 2025 19:35:34 -0400 Subject: [PATCH 9/9] Logging cleanups --- src/app.py | 6 +- src/app_neo4j_queries.py | 45 ++++++------- src/schema/schema_manager.py | 8 +-- src/schema/schema_neo4j_queries.py | 101 +++++++++++++---------------- src/schema/schema_triggers.py | 10 +-- 5 files changed, 80 insertions(+), 90 deletions(-) diff --git a/src/app.py b/src/app.py index 03c2fbee..e358111c 100644 --- a/src/app.py +++ b/src/app.py @@ -3623,7 +3623,7 @@ def get_prov_info_for_dataset(id): # Get provenance non-organ Samples for the Dataset all the way back to each Donor, to supplement # the "first sample" data stashed in internal_dict in the previous section. dataset_samples = app_neo4j_queries.get_all_dataset_samples(neo4j_driver_instance, uuid) - logger.debug(f"dataset_samples={str(dataset_samples)}") + if 'all' in include_samples: internal_dict[HEADER_DATASET_SAMPLES] = dataset_samples else: @@ -4171,7 +4171,6 @@ def bulk_update_entities( if idx < len(entity_updates) - 1: time.sleep(throttle) - logger.info(f"bulk_update_entities() results: {results}") return results @@ -5248,7 +5247,7 @@ def query_target_entity(id, user_token): # The uuid exists via uuid-api doesn't mean it also exists in Neo4j if not entity_dict: - logger.debug(f"Entity of uuid: {uuid} not found in Neo4j") + logger.info(f"Entity of uuid: {uuid} not found in Neo4j") # Still use the user provided id, especially when it's a hubmap_id, for error message not_found_error(f"Entity of id: {id} not found in Neo4j") @@ -5261,7 +5260,6 @@ def query_target_entity(id, user_token): memcached_client_instance.set(cache_key, entity_dict, expire = SchemaConstants.MEMCACHED_TTL) else: logger.info(f'Using neo4j entity cache of UUID {uuid} at time {datetime.now()}') - logger.debug(cache_result) entity_dict = cache_result except requests.exceptions.RequestException as e: diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 220df6b4..ac3fb441 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -44,7 +44,7 @@ def check_connection(neo4j_driver): logger.info("Neo4j is connected :)") return True - logger.info("Neo4j is NOT connected :(") + logger.error("Neo4j is NOT connected :(") return False @@ -83,7 +83,7 @@ def get_entities_by_type(neo4j_driver, entity_type, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}") logger.info("======get_entities_by_type() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -148,7 +148,7 @@ def get_ancestor_organs(neo4j_driver, entity_uuid): f"RETURN apoc.coll.toSet(COLLECT(organ)) AS {record_field_name}") logger.info("======get_ancestor_organs() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -199,7 +199,7 @@ def create_multiple_samples(neo4j_driver, samples_dict_list, activity_data_dict, f"CREATE (a)-[:ACTIVITY_OUTPUT]->(e)") logger.info("======create_multiple_samples() individual query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -211,7 +211,7 @@ def create_multiple_samples(neo4j_driver, samples_dict_list, activity_data_dict, logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit create_multiple_samples() transaction, rollback") + logger.error("Failed to commit create_multiple_samples() transaction, rollback") tx.rollback() @@ -262,7 +262,7 @@ def create_multiple_datasets(neo4j_driver, datasets_dict_list, activity_data_dic f"RETURN e AS {record_field_name}") logger.info("======create_multiple_samples() individual query======") - logger.info(query) + logger.debug(query) result = tx.run(query) record = result.single() @@ -279,7 +279,7 @@ def create_multiple_datasets(neo4j_driver, datasets_dict_list, activity_data_dic logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit create_multiple_samples() transaction, rollback") + logger.error("Failed to commit create_multiple_samples() transaction, rollback") tx.rollback() @@ -314,7 +314,7 @@ def get_sorted_revisions(neo4j_driver, uuid): f"RETURN COLLECT(node) AS {record_field_name}") logger.info("======get_sorted_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -364,7 +364,7 @@ def get_sorted_multi_revisions(neo4j_driver, uuid, fetch_all=True, property_key= ) logger.info("======get_sorted_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -420,7 +420,7 @@ def get_previous_revisions(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(prev)) AS {record_field_name}") logger.info("======get_previous_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -470,7 +470,7 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(next)) AS {record_field_name}") logger.info("======get_next_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -511,7 +511,7 @@ def is_next_revision_latest(neo4j_driver, uuid): f"RETURN apoc.coll.toSet(COLLECT(next.uuid)) AS {record_field_name}") logger.info("======is_next_revision_latest() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -547,7 +547,7 @@ def nested_previous_revisions(neo4j_driver, previous_revision_list): "RETURN connectedUUID1, connectedUUID2 ") logger.info("======nested_previous_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -598,7 +598,7 @@ def get_provenance(neo4j_driver, uuid, depth): f"RETURN json") logger.info("======get_provenance() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: return session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -634,7 +634,7 @@ def get_dataset_latest_revision(neo4j_driver, uuid, public = False): f"RETURN latest AS {record_field_name}") logger.info("======get_dataset_latest_revision() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -667,7 +667,7 @@ def get_dataset_revision_number(neo4j_driver, uuid): f"RETURN COUNT(prev) AS {record_field_name}") logger.info("======get_dataset_revision_number() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -698,7 +698,7 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid): f"RETURN apoc.coll.toSet(COLLECT(organ)) AS {record_field_name}") logger.info("======get_associated_organs_from_dataset() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -717,7 +717,7 @@ def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid): f"RETURN apoc.coll.toSet(COLLECT(sample)) AS {record_field_name}") logger.info("======get_associated_samples_from_dataset() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -736,7 +736,7 @@ def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid): f"RETURN apoc.coll.toSet(COLLECT(donor)) AS {record_field_name}") logger.info("======get_associated_donors_from_dataset() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -779,7 +779,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid): f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, " f" ds.last_modified_user_email, ds.lab_dataset_id, ds.dataset_type, METASAMPLE, PROCESSED_DATASET") logger.info("======get_prov_info() query======") - logger.info(query) + logger.debug(query) record_contents = [] record_dict = {} @@ -849,7 +849,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid): def get_all_dataset_samples(neo4j_driver, dataset_uuid): query = f"MATCH p = (ds:Dataset {{uuid: '{dataset_uuid}'}})<-[*]-(dn:Donor) return p" logger.info("======get_all_dataset_samples() query======") - logger.info(query) + logger.debug(query) # Dictionary of Dictionaries, keyed by UUID, containing each Sample returned in the Neo4j Path dataset_sample_list = {} @@ -890,7 +890,8 @@ def get_sankey_info(neo4j_driver, public_only): f"ORDER BY ds.group_name") logger.info("======get_sankey_info() query======") - logger.info(query) + logger.debug(query) + with neo4j_driver.session() as session: # Because we're returning multiple things, we use session.run rather than session.read_transaction result = session.run(query) diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 59b11e3a..10ffa360 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1524,7 +1524,7 @@ def get_user_info(request): user_info = _auth_helper.getUserInfoUsingRequest(request, True) logger.info("======get_user_info()======") - logger.info(user_info) + logger.debug(user_info) # For debugging purposes try: @@ -1533,7 +1533,7 @@ def get_user_info(request): groups_list = auth_helper_instance.get_user_groups_deprecated(token) logger.info("======Groups using get_user_groups_deprecated()======") - logger.info(groups_list) + logger.debug(groups_list) except Exception: msg = "For debugging purposes, failed to parse the Authorization token by calling commons.auth_helper.getAuthorizationTokens()" # Log the full stack trace, prepend a line with our message @@ -1819,7 +1819,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di query_parms = {'entity_count': count} logger.info("======create_hubmap_ids() json_to_post to uuid-api======") - logger.info(json_to_post) + logger.debug(json_to_post) # Disable ssl certificate verification target_url = _uuid_api_url + SchemaConstants.UUID_API_ID_ENDPOINT @@ -1857,7 +1857,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di d.pop('hubmap_base_id', None) logger.info("======create_hubmap_ids() generated ids from uuid-api======") - logger.info(ids_list) + logger.debug(ids_list) return ids_list else: diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 3d1d9ca7..bbff230e 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -45,7 +45,7 @@ def create_entity(neo4j_driver, entity_type, entity_data_dict, superclass = None f"RETURN e AS {record_field_name}") logger.info("======create_entity() query======") - logger.info(query) + logger.debug(query) try: with neo4j_driver.session() as session: @@ -59,9 +59,6 @@ def create_entity(neo4j_driver, entity_type, entity_data_dict, superclass = None entity_dict = node_to_dict(entity_node) - # logger.info("======create_entity() resulting entity_dict======") - # logger.info(entity_dict) - tx.commit() return entity_dict @@ -71,7 +68,7 @@ def create_entity(neo4j_driver, entity_type, entity_data_dict, superclass = None logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit create_entity() transaction, rollback") + logger.error("Failed to commit create_entity() transaction, rollback") tx.rollback() @@ -101,7 +98,7 @@ def get_entity(neo4j_driver, uuid): f"RETURN e AS {record_field_name}") logger.info("======get_entity() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -136,7 +133,7 @@ def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type): f"WHERE e.uuid in {direct_ancestor_uuids} AND toLower(e.entity_type) <> '{entity_type.lower()}' " f"RETURN e.entity_type AS entity_type, collect(e.uuid) AS uuids") logger.info("======filter_ancestors_by_type======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: records = session.run(query).data() @@ -180,7 +177,7 @@ def get_children(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueChildren | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_children() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -231,7 +228,7 @@ def get_parents(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueParents | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_parents() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -288,7 +285,7 @@ def get_siblings(neo4j_driver, uuid, property_key=None): logger.info("======get_siblings() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -344,7 +341,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None): logger.info("======get_tuplets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -394,7 +391,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueAncestors | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_ancestors() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -444,7 +441,7 @@ def get_descendants(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueDescendants | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_descendants() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -494,7 +491,7 @@ def get_collections(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}") logger.info("======get_collections() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -544,7 +541,7 @@ def get_uploads(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(u)) AS {record_field_name}") logger.info("======get_uploads() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -589,7 +586,7 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(s)) AS {record_field_name}") logger.info("======get_dataset_direct_ancestors() query======") - logger.info(query) + logger.debug(query) # Sessions will often be created and destroyed using a with block context with neo4j_driver.session() as session: @@ -634,7 +631,7 @@ def get_dataset_donor_organs_info(neo4j_driver, dataset_uuid): f" , organ_type: org.organ}})) AS donorOrganSet") logger.info("======get_dataset_donor_organs_info() ds_donors_organs_query======") - logger.info(ds_donors_organs_query) + logger.debug(ds_donors_organs_query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx @@ -646,7 +643,7 @@ def get_entity_type(neo4j_driver, entity_uuid: str) -> str: query: str = f"Match (ent {{uuid: '{entity_uuid}'}}) return ent.entity_type" logger.info("======get_entity_type() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -660,7 +657,7 @@ def get_entity_creation_action_activity(neo4j_driver, entity_uuid: str) -> str: query: str = f"MATCH (ds:Dataset {{uuid:'{entity_uuid}'}})<-[:ACTIVITY_OUTPUT]-(a:Activity) RETURN a.creation_action" logger.info("======get_entity_creation_action() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -717,7 +714,7 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_entity_to_direct_ancestors() transaction, rollback") + logger.error("Failed to commit link_entity_to_direct_ancestors() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -757,7 +754,7 @@ def link_publication_to_associated_collection(neo4j_driver, entity_uuid, associa if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_publication_to_associated_collection() transaction, rollback") + logger.error("Failed to commit link_publication_to_associated_collection() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -805,7 +802,7 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_collection_to_datasets() transaction, rollback") + logger.error("Failed to commit link_collection_to_datasets() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -838,7 +835,7 @@ def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revisio if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_entity_to_previous_revision() transaction, rollback") + logger.error("Failed to commit link_entity_to_previous_revision() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -869,7 +866,7 @@ def get_previous_revision_uuid(neo4j_driver, uuid): f"RETURN previous_revision.uuid AS {record_field_name}") logger.info("======get_previous_revision_uuid() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -905,7 +902,7 @@ def get_previous_revision_uuids(neo4j_driver, uuid): f"RETURN COLLECT(previous_revision.uuid) AS {record_field_name}") logger.info("======get_previous_revision_uuids() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -943,7 +940,7 @@ def get_next_revision_uuid(neo4j_driver, uuid): f"RETURN next_revision.uuid AS {record_field_name}") logger.info("======get_next_revision_uuid() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -979,7 +976,7 @@ def get_next_revision_uuids(neo4j_driver, uuid): f"RETURN COLLECT(next_revision.uuid) AS {record_field_name}") logger.info("======get_next_revision_uuids() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1022,7 +1019,7 @@ def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}") logger.info("======get_collection_associated_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1068,7 +1065,7 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}") logger.info("======get_dataset_collections() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1110,7 +1107,7 @@ def get_publication_associated_collection(neo4j_driver, uuid): f"RETURN c as {record_field_name}") logger.info("=====get_publication_associated_collection() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1144,7 +1141,7 @@ def get_dataset_upload(neo4j_driver, uuid): f"RETURN s AS {record_field_name}") logger.info("======get_dataset_upload() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1181,7 +1178,7 @@ def get_collection_datasets(neo4j_driver, uuid): f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_collection_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1217,7 +1214,7 @@ def get_collection_datasets_data_access_levels(neo4j_driver, uuid): f"RETURN COLLECT(DISTINCT d.data_access_level) AS {record_field_name}") logger.info("======get_collection_datasets_data_access_levels() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1253,7 +1250,7 @@ def get_collection_datasets_statuses(neo4j_driver, uuid): f"RETURN COLLECT(DISTINCT d.status) AS {record_field_name}") logger.info("======get_collection_datasets_statuses() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1298,7 +1295,7 @@ def link_datasets_to_upload(neo4j_driver, upload_uuid, dataset_uuids_list): f"MERGE (s)<-[r:IN_UPLOAD]-(d)") logger.info("======link_datasets_to_upload() query======") - logger.info(query) + logger.debug(query) tx.run(query) tx.commit() @@ -1308,7 +1305,7 @@ def link_datasets_to_upload(neo4j_driver, upload_uuid, dataset_uuids_list): logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit link_datasets_to_upload() transaction, rollback") + logger.error("Failed to commit link_datasets_to_upload() transaction, rollback") tx.rollback() @@ -1345,7 +1342,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list): f"DELETE r") logger.info("======unlink_datasets_from_upload() query======") - logger.info(query) + logger.debug(query) tx.run(query) tx.commit() @@ -1355,7 +1352,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list): logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit unlink_datasets_from_upload() transaction, rollback") + logger.error("Failed to commit unlink_datasets_from_upload() transaction, rollback") tx.rollback() @@ -1395,7 +1392,7 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_upload_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1438,16 +1435,13 @@ def count_attached_published_datasets(neo4j_driver, entity_type, uuid): f"RETURN COUNT(d) AS {record_field_name}") logger.info("======count_attached_published_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) count = record[record_field_name] - # logger.info("======count_attached_published_datasets() resulting count======") - # logger.info(count) - return count @@ -1487,7 +1481,7 @@ def get_sample_direct_ancestor(neo4j_driver, uuid, property_key = None): f"RETURN parent AS {record_field_name}") logger.info("======get_sample_direct_ancestor() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1530,7 +1524,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): f"RETURN e AS {record_field_name}") logger.info("======update_entity() query======") - logger.info(query) + logger.debug(query) try: with neo4j_driver.session() as session: @@ -1546,9 +1540,6 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): entity_dict = node_to_dict(entity_node) - # logger.info("======update_entity() resulting entity_dict======") - # logger.info(entity_dict) - return entity_dict except TransactionError as te: msg = f"TransactionError from calling create_entity(): {te.value}" @@ -1556,7 +1547,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit update_entity() transaction, rollback") + logger.error("Failed to commit update_entity() transaction, rollback") tx.rollback() @@ -1586,7 +1577,7 @@ def create_activity_tx(tx, activity_data_dict): f"RETURN e AS {record_field_name}") logger.info("======create_activity_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) record = result.single() @@ -1724,7 +1715,7 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship, f"RETURN type(r) AS {record_field_name}") logger.info("======create_relationship_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1748,7 +1739,7 @@ def create_outgoing_activity_relationships_tx(tx, source_node_uuids:list, activi f" CREATE (e) - [r:ACTIVITY_INPUT]->(a)") logger.info("======create_outgoing_activity_relationships_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1793,7 +1784,7 @@ def _delete_activity_node_and_linkages_tx(tx, uuid): f"DELETE in, a, out") logger.info("======_delete_activity_node_and_linkages_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1815,7 +1806,7 @@ def _delete_publication_associated_collection_linkages_tx(tx, uuid): f"DELETE r") logger.info("======_delete_publication_associated_collection_linkages_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1835,7 +1826,7 @@ def _delete_collection_linkages_tx(tx, uuid): f" DELETE in") logger.info("======_delete_collection_linkages_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index e1ca669f..c5d83de1 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1484,7 +1484,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da 'user_token': user_token } - logger.info(f"Commit the uploaded thumbnail file of tmp file id {tmp_file_id} for entity {entity_uuid} via ingest-api call...") + logger.info(f"Commit the uploaded thumbnail file of tmp_file_id {tmp_file_id} for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}' @@ -1494,7 +1494,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da response = requests.post(url = ingest_api_target_url, headers = request_headers, json = json_to_post, verify = False) if response.status_code != 200: - msg = f"Failed to commit the thumbnail file of tmp file id {tmp_file_id} via ingest-api for entity uuid: {entity_uuid}" + msg = f"Failed to commit the thumbnail file of tmp_file_id {tmp_file_id} via ingest-api for entity uuid: {entity_uuid}" logger.error(msg) raise schema_errors.FileUploadException(msg) @@ -1588,7 +1588,7 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da 'files_info_list': [file_info_dict] } - logger.info(f"Remove the uploaded thumbnail file {file_uuid} for entity {entity_uuid} via ingest-api call...") + logger.debug(f"Remove the uploaded thumbnail file {file_uuid} for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}' @@ -2290,7 +2290,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token 'user_token': user_token } - logger.info(f"Commit the uploaded file of temp_file_id {temp_file_id} for entity {entity_uuid} via ingest-api call...") + logger.debug(f"Commit the uploaded file of temp_file_id {temp_file_id} for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}' @@ -2401,7 +2401,7 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token 'files_info_list': files_info_list } - logger.info(f"Remove the uploaded files for entity {entity_uuid} via ingest-api call...") + logger.debug(f"Remove the uploaded files for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}'