diff --git a/src/app.py b/src/app.py index b554734f..36bff7ab 100644 --- a/src/app.py +++ b/src/app.py @@ -47,25 +47,28 @@ from hubmap_commons.exceptions import HTTPException from hubmap_commons.S3_worker import S3Worker +# Specify the absolute path of the instance folder and use the config file relative to the instance path +app = Flask(__name__, instance_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'instance'), instance_relative_config = True) +app.config.from_pyfile('app.cfg') + # Root logger configuration global logger -# Set logging format and level (default is warning) -logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S') +# Set logging format and level +if app.config['DEBUG_MODE']: + logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S') +else: + logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') # Use `getLogger()` instead of `getLogger(__name__)` to apply the config to the root logger # will be inherited by the sub-module loggers logger = logging.getLogger() -# Specify the absolute path of the instance folder and use the config file relative to the instance path -app = Flask(__name__, instance_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'instance'), instance_relative_config = True) -app.config.from_pyfile('app.cfg') - # Remove trailing slash / from URL base to avoid "//" caused by config with trailing slash app.config['UUID_API_URL'] = app.config['UUID_API_URL'].strip('/') app.config['INGEST_API_URL'] = app.config['INGEST_API_URL'].strip('/') app.config['ONTOLOGY_API_URL'] = app.config['ONTOLOGY_API_URL'].strip('/') -app.config['SEARCH_API_URL_LIST'] = [url.strip('/') for url in app.config['SEARCH_API_URL_LIST']] +app.config['SEARCH_API_URL'] = app.config['SEARCH_API_URL'].strip('/') S3_settings_dict = {'large_response_threshold': app.config['LARGE_RESPONSE_THRESHOLD'] , 'aws_access_key_id': app.config['AWS_ACCESS_KEY_ID'] @@ -140,11 +143,11 @@ def http_internal_server_error(e): auth_helper_instance = AuthHelper.create(app.config['APP_CLIENT_ID'], app.config['APP_CLIENT_SECRET']) - logger.info('Initialized AuthHelper class successfully :)') + logger.info('Initialized auth_helper_instance successfully :)') else: auth_helper_instance = AuthHelper.instance() except Exception: - msg = 'Failed to initialize the AuthHelper class :(' + msg = 'Failed to initialize the auth_helper_instance :(' # Log the full stack trace, prepend a line with our message logger.exception(msg) @@ -160,9 +163,9 @@ def http_internal_server_error(e): neo4j_driver_instance = neo4j_driver.instance(app.config['NEO4J_URI'], app.config['NEO4J_USERNAME'], app.config['NEO4J_PASSWORD']) - logger.info('Initialized neo4j_driver module successfully :)') + logger.info('Initialized neo4j_driver_instance successfully :)') except Exception: - msg = 'Failed to initialize the neo4j_driver module :(' + msg = 'Failed to initialize the neo4j_driver_instance :(' # Log the full stack trace, prepend a line with our message logger.exception(msg) @@ -194,9 +197,9 @@ def http_internal_server_error(e): # memcached_client_instance can be instantiated without connecting to the Memcached server # A version() call will throw error (e.g., timeout) when failed to connect to server # Need to convert the version in bytes to string - logger.info(f'Connected to Memcached server {memcached_client_instance.version().decode()} successfully :)') + logger.info('Initialized memcached_client_instance successfully :)') except Exception: - msg = 'Failed to connect to the Memcached server :(' + msg = 'Failed to initialize memcached_client_instance :(' # Log the full stack trace, prepend a line with our message logger.exception(msg) @@ -227,10 +230,9 @@ def http_internal_server_error(e): memcached_client_instance, MEMCACHED_PREFIX) - logger.info('Initialized schema_manager module successfully :)') -# Use a broad catch-all here + logger.info('Initialized schema_manager successfully :)') except Exception: - msg = f"Failed to initialize the schema_manager module with" \ + msg = f"Failed to initialize the schema_manager with" \ f" _schema_yaml_file={_schema_yaml_file}." # Log the full stack trace, prepend a line with our message logger.exception(msg) @@ -247,9 +249,11 @@ def http_internal_server_error(e): , S3_OBJECT_URL_EXPIRATION_IN_SECS=S3_settings_dict['aws_object_url_expiration_in_secs'] , LARGE_RESPONSE_THRESHOLD=S3_settings_dict['large_response_threshold'] , SERVICE_S3_OBJ_PREFIX=S3_settings_dict['service_configured_obj_prefix']) - logger.info("anS3Worker initialized") -except Exception as s3exception: - logger.critical(s3exception, exc_info=True) + logger.info('Initialized anS3Worker successfully :)') +except Exception: + msg = 'Failed to initialize anS3Worker :(' + # Log the full stack trace, prepend a line with our message + logger.exception(msg) #################################################################################################### @@ -364,9 +368,10 @@ def get_status(): try: # If can't connect, won't be able to get the Memcached version memcached_client_instance.version() + logger.info(f'Connected to Memcached server {memcached_client_instance.version().decode()} :)') status_data['memcached_connection'] = True except Exception: - logger.error('Failed to connect to Memcached server') + logger.error('Failed to connect to Memcached server :(') return jsonify(status_data) @@ -431,7 +436,8 @@ def flush_cache(id): msg = '' if MEMCACHED_MODE: - delete_cache(id) + entity_dict = query_target_entity(id, get_internal_token()) + delete_cache(entity_dict['uuid'], entity_dict['entity_type']) msg = f'The cached data has been deleted from Memcached for entity {id}' else: msg = 'No caching is being used because Memcached mode is not enabled at all' @@ -1313,21 +1319,17 @@ def create_multiple_samples(count): """ Update the properties of a given entity -Response result filtering is supported based on query string -For example: /entities/?return_all_properties=true -Default to skip those time-consuming properties - Parameters ---------- entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication id : str The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity Returns ------- -json - All the updated properties of the target entity +str + A successful message """ @app.route('/entities/', methods = ['PUT']) def update_entity(id): @@ -1360,6 +1362,7 @@ def update_entity(id): # Get the entity dict from cache if exists # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists entity_dict = query_target_entity(id, user_token) + entity_uuid = entity_dict['uuid'] # Check that the user has the correct access to modify this entity validate_user_update_privilege(entity_dict, user_token) @@ -1383,7 +1386,7 @@ def update_entity(id): locked_entity_update_header = request.headers.get(SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER) if locked_entity_update_header and (LOCKED_ENTITY_UPDATE_OVERRIDE_KEY == locked_entity_update_header): lockout_overridden = True - logger.info(f"For {entity_dict['entity_type']} {entity_dict['uuid']}" + logger.info(f"For {normalized_entity_type} {entity_uuid}" f" update prohibited due to {str(leue)}," f" but being overridden by valid {SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER} in request.") else: @@ -1504,64 +1507,23 @@ def update_entity(id): # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) - # By default we'll return all the properties but skip these time-consuming ones - # Donor doesn't need to skip any - properties_to_skip = [] - - if normalized_entity_type == 'Sample': - properties_to_skip = [ - 'direct_ancestor' - ] - # 2/17/23 - Also adding publication for skipping properties ~Derek Furst - elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): - properties_to_skip = [ - 'direct_ancestors', - 'collections', - 'upload', - 'title', - 'previous_revision_uuid', - 'next_revision_uuid' - ] - elif normalized_entity_type in ['Upload', 'Collection', 'Epicollection']: - properties_to_skip = [ - 'datasets' - ] - - # Result filtering based on query string - # Will return all properties by running all the read triggers - # If the reuqest specifies `/entities/?return_all_properties=true` - if bool(request.args): - # The parased query string value is a string 'true' - return_all_properties = request.args.get('return_all_properties') - - if (return_all_properties is not None) and (return_all_properties.lower() == 'true'): - properties_to_skip = [] - # Remove the cached entities if Memcached is being used # DO NOT update the cache with new entity dict because the returned dict from PUT (some properties maybe skipped) # can be different from the one generated by GET call if MEMCACHED_MODE: - delete_cache(id) + delete_cache(entity_uuid, normalized_entity_type) - # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou - # # Generate the complete entity dict - # complete_dict = schema_manager.get_complete_entity_result(user_token, merged_updated_dict, properties_to_skip) - - # # Will also filter the result based on schema - # normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) - - # Also reindex the updated entity node in elasticsearch via search-api - logger.log(logging.INFO - ,f"Re-indexing for modification of {entity_dict['entity_type']}" - f" with UUID {entity_dict['uuid']}") - - reindex_entity(entity_dict['uuid'], user_token) + # Also reindex the updated entity in elasticsearch via search-api + reindex_entity(entity_uuid, user_token) # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou - # return jsonify(normalized_complete_dict) + message_returned = f"The update request on {normalized_entity_type} of {id} has been accepted, the backend may still be processing" + if lockout_overridden: + message_returned = f"Lockout overridden on {normalized_entity_type} of {id}" - override_msg = 'Lockout overridden. ' if lockout_overridden else '' - return jsonify({'message': f"{override_msg}{normalized_entity_type} of {id} has been updated"}) + # Here we use 200 status code instead of 202 mainly for compatibility + # so the API consumers don't need to update their implementations + return jsonify({'message': message_returned}) """ @@ -3663,7 +3625,7 @@ def get_prov_info_for_dataset(id): # Get provenance non-organ Samples for the Dataset all the way back to each Donor, to supplement # the "first sample" data stashed in internal_dict in the previous section. dataset_samples = app_neo4j_queries.get_all_dataset_samples(neo4j_driver_instance, uuid) - logger.debug(f"dataset_samples={str(dataset_samples)}") + if 'all' in include_samples: internal_dict[HEADER_DATASET_SAMPLES] = dataset_samples else: @@ -4211,7 +4173,6 @@ def bulk_update_entities( if idx < len(entity_updates) - 1: time.sleep(throttle) - logger.info(f"bulk_update_entities() results: {results}") return results @@ -4531,7 +4492,7 @@ def get_internal_token(): entity_dict : dict A Python dictionary retrieved for the entity normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor, Publication, Upload + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication Returns ------- @@ -4577,7 +4538,7 @@ def _get_entity_visibility(normalized_entity_type, entity_dict): return entity_visibility -''' +""" Retrieve the organ, donor, or sample metadata information associated with a Dataset, based up the user's authorization to access the Dataset. @@ -4600,7 +4561,7 @@ def _get_entity_visibility(normalized_entity_type, entity_dict): ------- list A dictionary containing the metadata properties the Dataset associated data. -''' +""" def _get_dataset_associated_metadata(dataset_dict, dataset_visibility, valid_user_token, request, associated_data: str): # Confirm the associated data requested is supported by this method. @@ -4689,7 +4650,7 @@ def _get_dataset_associated_metadata(dataset_dict, dataset_visibility, valid_use request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token json_data_dict: dict @@ -4838,7 +4799,7 @@ def create_entity_details(request, normalized_entity_type, user_token, json_data request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + Must be "Sample" in this case user_token: str The user's globus groups token json_data_dict: dict @@ -4988,7 +4949,7 @@ def create_multiple_samples_details(request, normalized_entity_type, user_token, request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + Must be "Dataset" in this case user_token: str The user's globus groups token json_data_dict_list: list @@ -5104,7 +5065,7 @@ def create_multiple_component_details(request, normalized_entity_type, user_toke Parameters ---------- normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token merged_data_dict: dict @@ -5139,7 +5100,7 @@ def after_create(normalized_entity_type, user_token, merged_data_dict): request : flask.Request object The incoming request normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token json_data_dict: dict @@ -5218,12 +5179,12 @@ def update_entity_details(request, normalized_entity_type, user_token, json_data """ -Execute 'after_update_triiger' methods +Execute 'after_update_trigger' methods Parameters ---------- normalized_entity_type : str - One of the normalized entity types: Dataset, Collection, Sample, Donor + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication user_token: str The user's globus groups token entity_dict: dict @@ -5288,7 +5249,7 @@ def query_target_entity(id, user_token): # The uuid exists via uuid-api doesn't mean it also exists in Neo4j if not entity_dict: - logger.debug(f"Entity of uuid: {uuid} not found in Neo4j") + logger.info(f"Entity of uuid: {uuid} not found in Neo4j") # Still use the user provided id, especially when it's a hubmap_id, for error message not_found_error(f"Entity of id: {id} not found in Neo4j") @@ -5301,7 +5262,6 @@ def query_target_entity(id, user_token): memcached_client_instance.set(cache_key, entity_dict, expire = SchemaConstants.MEMCACHED_TTL) else: logger.info(f'Using neo4j entity cache of UUID {uuid} at time {datetime.now()}') - logger.debug(cache_result) entity_dict = cache_result except requests.exceptions.RequestException as e: @@ -5333,108 +5293,67 @@ def require_json(request): """ -Delete the cached data of all possible keys used for the given entity id +Delete the cached data of all possible keys used for the given entity_uuid and entity_type +By taking entity_uuid and entity_type as input, it eliminates the need to call query_target_entity() +which is more useful when the input id could be either UUID or HuBMAP ID. Parameters ---------- -id : str - The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target entity (Donor/Dataset/Sample/Upload/Collection/Publication) +entity_uuid : str + The UUID of target entity Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication +entity_type : str + One of the normalized entity types: Donor/Dataset/Sample/Upload/Collection/EPICollection/Publication """ -def delete_cache(id): +def delete_cache(entity_uuid, entity_type): if MEMCACHED_MODE: - # ========================================================================= - # Commented out on 7/28/2025 and replaced with the original implementation - # in the hope that this may reduce the 504 timeout rate - Zhou - # ========================================================================= - # entity_dict = query_target_entity(id, get_internal_token()) - # entity_uuid = entity_dict['uuid'] - # entity_type = entity_dict['entity_type'] - # descendant_uuids = [] - # collection_dataset_uuids = [] - # upload_dataset_uuids = [] - # collection_uuids = [] - # dataset_upload_dict = {} - # publication_collection_dict = {} - - # # Determine the associated cache keys based on the entity type - # # To reduce unnecessary Neo4j lookups that may cause timeout on the PUT call - - # # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants - # if entity_type in ['Donor', 'Sample', 'Dataset', 'Publication']: - # descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') - - # # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) - # if schema_manager.entity_type_instanceof(entity_type, 'Collection'): - # collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) - # if entity_type == 'Upload': - # upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # # For Dataset, delete the associated Collections cache and single Upload cache - # if entity_type == 'Dataset': - # collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') - # dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) - - # # For Publication, delete cache of the associated collection - # # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. - # # Still keep it in the code until further decision - Zhou - # if entity_type == 'Publication': - # publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) + descendant_uuids = [] + collection_dataset_uuids = [] + upload_dataset_uuids = [] + collection_uuids = [] + dataset_upload_dict = {} + publication_collection_dict = {} + + # Determine the associated cache keys based on the entity type + # For Donor/Datasets/Sample/Publication, delete the cache of all the descendants + if entity_type in ['Donor', 'Sample', 'Dataset', 'Publication']: + descendant_uuids = schema_neo4j_queries.get_descendants(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Collection/Epicollection, delete the cache for each of its associated datasets (via [:IN_COLLECTION]) + if schema_manager.entity_type_instanceof(entity_type, 'Collection'): + collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD]) + if entity_type == 'Upload': + upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') + + # For Dataset, also delete the cache of associated Collections and Upload + if entity_type == 'Dataset': + collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') + dataset_upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) + + # For Publication, also delete cache of the associated collection + # NOTE: As of 5/30/2025, the [:USES_DATA] workaround has been deprecated. + # Still keep it in the code until further decision - Zhou + if entity_type == 'Publication': + publication_collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) - # # We only use uuid in the cache key acorss all the cache types - # uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids - - # # It's possible the target dataset has no linked upload - # if dataset_upload_dict: - # uuids_list.append(dataset_upload_dict['uuid']) - - # # It's possible the target publicaiton has no associated collection - # if publication_collection_dict: - # uuids_list.append(publication_collection_dict['uuid']) - - # # Final batch delete - # schema_manager.delete_memcached_cache(uuids_list) - - - # ========================================================================= - # The original implementation - # ========================================================================= - # First delete the target entity cache - entity_dict = query_target_entity(id, get_internal_token()) - entity_uuid = entity_dict['uuid'] - - # If the target entity is Sample (`direct_ancestor`) or Dataset/Publication (`direct_ancestors`) - # Delete the cache of all the direct descendants (children) - child_uuids = schema_neo4j_queries.get_children(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Collection, delete the cache for each of its associated - # Datasets and Publications (via [:IN_COLLECTION] relationship) as well as just Publications (via [:USES_DATA] relationship) - collection_dataset_uuids = schema_neo4j_queries.get_collection_associated_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Upload, delete the cache for each of its associated Datasets (via [:IN_UPLOAD] relationship) - upload_dataset_uuids = schema_neo4j_queries.get_upload_datasets(neo4j_driver_instance, entity_uuid , 'uuid') - - # If the target entity is Datasets/Publication, delete the associated Collections cache, Upload cache - collection_uuids = schema_neo4j_queries.get_dataset_collections(neo4j_driver_instance, entity_uuid , 'uuid') - collection_dict = schema_neo4j_queries.get_publication_associated_collection(neo4j_driver_instance, entity_uuid) - upload_dict = schema_neo4j_queries.get_dataset_upload(neo4j_driver_instance, entity_uuid) - # We only use uuid in the cache key acorss all the cache types - uuids_list = [entity_uuid] + child_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids + uuids_list = [entity_uuid] + descendant_uuids + collection_dataset_uuids + upload_dataset_uuids + collection_uuids - # It's possible no linked collection or upload - if collection_dict: - uuids_list.append(collection_dict['uuid']) + # Add to the list if the target dataset has linked upload + if dataset_upload_dict: + uuids_list.append(dataset_upload_dict['uuid']) - if upload_dict: - uuids_list.append(upload_dict['uuid']) + # Add to the list if the target publicaiton has associated collection + if publication_collection_dict: + uuids_list.append(publication_collection_dict['uuid']) + # Final batch delete schema_manager.delete_memcached_cache(uuids_list) """ -Make a call to each search-api instance to reindex this entity node in elasticsearch +Make a call to search-api to trigger reindex of this entity document in elasticsearch Parameters ---------- @@ -5448,18 +5367,16 @@ def reindex_entity(uuid, user_token): 'Authorization': f'Bearer {user_token}' } - # Reindex the target entity against each configured search-api instance - for search_api_url in app.config['SEARCH_API_URL_LIST']: - logger.info(f"Making a call to search-api instance of {search_api_url} to reindex uuid: {uuid}") + logger.info(f"Making a call to search-api to reindex uuid: {uuid}") - response = requests.put(f"{search_api_url}/reindex/{uuid}", headers = headers) + response = requests.put(f"{app.config['SEARCH_API_URL']}/reindex/{uuid}", headers = headers) - # The reindex takes time, so 202 Accepted response status code indicates that - # the request has been accepted for processing, but the processing has not been completed - if response.status_code == 202: - logger.info(f"The search-api instance of {search_api_url} has accepted the reindex request for uuid: {uuid}") - else: - logger.error(f"The search-api instance of {search_api_url} failed to initialize the reindex for uuid: {uuid}") + # The reindex takes time, so 202 Accepted response status code indicates that + # the request has been accepted for processing, but the processing has not been completed + if response.status_code == 202: + logger.info(f"The search-api has accepted the reindex request for uuid: {uuid}") + else: + logger.error(f"The search-api failed to initialize the reindex for uuid: {uuid}") """ diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 220df6b4..ac3fb441 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -44,7 +44,7 @@ def check_connection(neo4j_driver): logger.info("Neo4j is connected :)") return True - logger.info("Neo4j is NOT connected :(") + logger.error("Neo4j is NOT connected :(") return False @@ -83,7 +83,7 @@ def get_entities_by_type(neo4j_driver, entity_type, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}") logger.info("======get_entities_by_type() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -148,7 +148,7 @@ def get_ancestor_organs(neo4j_driver, entity_uuid): f"RETURN apoc.coll.toSet(COLLECT(organ)) AS {record_field_name}") logger.info("======get_ancestor_organs() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -199,7 +199,7 @@ def create_multiple_samples(neo4j_driver, samples_dict_list, activity_data_dict, f"CREATE (a)-[:ACTIVITY_OUTPUT]->(e)") logger.info("======create_multiple_samples() individual query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -211,7 +211,7 @@ def create_multiple_samples(neo4j_driver, samples_dict_list, activity_data_dict, logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit create_multiple_samples() transaction, rollback") + logger.error("Failed to commit create_multiple_samples() transaction, rollback") tx.rollback() @@ -262,7 +262,7 @@ def create_multiple_datasets(neo4j_driver, datasets_dict_list, activity_data_dic f"RETURN e AS {record_field_name}") logger.info("======create_multiple_samples() individual query======") - logger.info(query) + logger.debug(query) result = tx.run(query) record = result.single() @@ -279,7 +279,7 @@ def create_multiple_datasets(neo4j_driver, datasets_dict_list, activity_data_dic logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit create_multiple_samples() transaction, rollback") + logger.error("Failed to commit create_multiple_samples() transaction, rollback") tx.rollback() @@ -314,7 +314,7 @@ def get_sorted_revisions(neo4j_driver, uuid): f"RETURN COLLECT(node) AS {record_field_name}") logger.info("======get_sorted_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -364,7 +364,7 @@ def get_sorted_multi_revisions(neo4j_driver, uuid, fetch_all=True, property_key= ) logger.info("======get_sorted_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -420,7 +420,7 @@ def get_previous_revisions(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(prev)) AS {record_field_name}") logger.info("======get_previous_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -470,7 +470,7 @@ def get_next_revisions(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(next)) AS {record_field_name}") logger.info("======get_next_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -511,7 +511,7 @@ def is_next_revision_latest(neo4j_driver, uuid): f"RETURN apoc.coll.toSet(COLLECT(next.uuid)) AS {record_field_name}") logger.info("======is_next_revision_latest() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -547,7 +547,7 @@ def nested_previous_revisions(neo4j_driver, previous_revision_list): "RETURN connectedUUID1, connectedUUID2 ") logger.info("======nested_previous_revisions() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -598,7 +598,7 @@ def get_provenance(neo4j_driver, uuid, depth): f"RETURN json") logger.info("======get_provenance() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: return session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -634,7 +634,7 @@ def get_dataset_latest_revision(neo4j_driver, uuid, public = False): f"RETURN latest AS {record_field_name}") logger.info("======get_dataset_latest_revision() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -667,7 +667,7 @@ def get_dataset_revision_number(neo4j_driver, uuid): f"RETURN COUNT(prev) AS {record_field_name}") logger.info("======get_dataset_revision_number() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -698,7 +698,7 @@ def get_associated_organs_from_dataset(neo4j_driver, dataset_uuid): f"RETURN apoc.coll.toSet(COLLECT(organ)) AS {record_field_name}") logger.info("======get_associated_organs_from_dataset() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -717,7 +717,7 @@ def get_associated_samples_from_dataset(neo4j_driver, dataset_uuid): f"RETURN apoc.coll.toSet(COLLECT(sample)) AS {record_field_name}") logger.info("======get_associated_samples_from_dataset() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -736,7 +736,7 @@ def get_associated_donors_from_dataset(neo4j_driver, dataset_uuid): f"RETURN apoc.coll.toSet(COLLECT(donor)) AS {record_field_name}") logger.info("======get_associated_donors_from_dataset() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) @@ -779,7 +779,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid): f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, " f" ds.last_modified_user_email, ds.lab_dataset_id, ds.dataset_type, METASAMPLE, PROCESSED_DATASET") logger.info("======get_prov_info() query======") - logger.info(query) + logger.debug(query) record_contents = [] record_dict = {} @@ -849,7 +849,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid): def get_all_dataset_samples(neo4j_driver, dataset_uuid): query = f"MATCH p = (ds:Dataset {{uuid: '{dataset_uuid}'}})<-[*]-(dn:Donor) return p" logger.info("======get_all_dataset_samples() query======") - logger.info(query) + logger.debug(query) # Dictionary of Dictionaries, keyed by UUID, containing each Sample returned in the Neo4j Path dataset_sample_list = {} @@ -890,7 +890,8 @@ def get_sankey_info(neo4j_driver, public_only): f"ORDER BY ds.group_name") logger.info("======get_sankey_info() query======") - logger.info(query) + logger.debug(query) + with neo4j_driver.session() as session: # Because we're returning multiple things, we use session.run rather than session.read_transaction result = session.run(query) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index 595ee728..37b4f792 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -1,4 +1,7 @@ -# Set to True to disable the PUT and POST calls, used on STAGE to make entity-api READ-ONLY +# Set to False to use INFO logging level +DEBUG_MODE = True + +# Set to True to disable the PUT and POST calls to make entity-api READ-ONLY READ_ONLY_MODE = False # File path of schema yaml file, DO NOT MODIFY @@ -37,7 +40,7 @@ MEMCACHED_SERVER = 'host:11211' # Change prefix based on deployment environment, default for DEV MEMCACHED_PREFIX = 'hm_entity_dev_' -# URL for talking to UUID API (default value used for docker deployment, no token needed) +# URL for talking to UUID API (default value used for docker deployment) # Works regardless of the trailing slash / UUID_API_URL = 'http://uuid-api:8080' @@ -54,9 +57,9 @@ ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org' # necessitates subsequent calls for other entities. ENTITY_API_URL = 'http://localhost:5002' -# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed) +# URL for talking to Search API (default value used for docker deployment) # Works regardless of the trailing slash / -SEARCH_API_URL_LIST = ['http://search-api:8080'] +SEARCH_API_URL = 'http://search-api:8080' #The Base URL to the Globus transfer application site GLOBUS_APP_BASE_URL = 'https://app.globus.org' diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 59b11e3a..faa583d2 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -192,7 +192,7 @@ def get_all_entity_types(): """ -Get the superclass (if defined) of the given entity class +Get the optional superclass (if defined) of the given entity class Parameters ---------- @@ -209,32 +209,40 @@ def get_entity_superclass(normalized_entity_class): all_entity_types = get_all_entity_types() - if normalized_entity_class in all_entity_types: - if 'superclass' in _schema['ENTITIES'][normalized_entity_class]: - normalized_superclass = normalize_entity_type(_schema['ENTITIES'][normalized_entity_class]['superclass']) + if normalized_entity_class not in all_entity_types: + msg = f"Unrecognized value of 'normalized_entity_class': {normalized_entity_class}" + logger.error(msg) + raise ValueError(msg) - if normalized_superclass not in all_entity_types: - msg = f"Invalid 'superclass' value defined for {normalized_entity_class}: {normalized_superclass}" - logger.error(msg) - raise ValueError(msg) - else: - # Since the 'superclass' property is optional, we just log the warning message, no need to bubble up - msg = f"The 'superclass' property is not defined for entity class: {normalized_entity_class}" - logger.warning(msg) + if 'superclass' in _schema['ENTITIES'][normalized_entity_class]: + normalized_superclass = normalize_entity_type(_schema['ENTITIES'][normalized_entity_class]['superclass']) + + # Additional check to ensure no schema yaml mistake + if normalized_superclass not in all_entity_types: + msg = f"Invalid 'superclass' value defined for {normalized_entity_class}: {normalized_superclass}" + logger.error(msg) + raise ValueError(msg) return normalized_superclass -def entity_type_instanceof(entity_type: str, entity_class: str) -> bool: - """ - Determine if the Entity type with 'entity_type' is an instance of 'entity_class'. - Use this function if you already have the Entity type. Use entity_instanceof(uuid, class) - if you just have the Entity uuid. +""" +Determine if the Entity type with 'entity_type' is an instance of 'entity_class'. +Use this function if you already have the Entity type. Use `entity_instanceof(uuid, class)` +if you just have the Entity uuid. - :param entity_type: from Entity - :param entity_class: found in .yaml file - :return: True or False - """ +Parameters +---------- +entity_type : str + The superclass +entity_class : str + The subclass + +Returns +------- +bool +""" +def entity_type_instanceof(entity_type: str, entity_class: str) -> bool: if entity_type is None: return False @@ -247,14 +255,21 @@ def entity_type_instanceof(entity_type: str, entity_class: str) -> bool: return False -def entity_instanceof(entity_uuid: str, entity_class: str) -> bool: - """ - Determine if the Entity with 'entity_uuid' is an instance of 'entity_class'. +""" +Determine if the Entity with 'entity_uuid' is an instance of 'entity_class'. - :param entity_uuid: from Entity - :param entity_class: found in .yaml file - :return: True or False - """ +Parameters +---------- +entity_uuid : str + The uuid of the given entity +entity_class : str + The superclass + +Returns +------- +bool +""" +def entity_instanceof(entity_uuid: str, entity_class: str) -> bool: entity_type: str =\ schema_neo4j_queries.get_entity_type(get_neo4j_driver_instance(), entity_uuid.strip()) return entity_type_instanceof(entity_type, entity_class) @@ -1524,7 +1539,7 @@ def get_user_info(request): user_info = _auth_helper.getUserInfoUsingRequest(request, True) logger.info("======get_user_info()======") - logger.info(user_info) + logger.debug(user_info) # For debugging purposes try: @@ -1533,7 +1548,7 @@ def get_user_info(request): groups_list = auth_helper_instance.get_user_groups_deprecated(token) logger.info("======Groups using get_user_groups_deprecated()======") - logger.info(groups_list) + logger.debug(groups_list) except Exception: msg = "For debugging purposes, failed to parse the Authorization token by calling commons.auth_helper.getAuthorizationTokens()" # Log the full stack trace, prepend a line with our message @@ -1819,7 +1834,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di query_parms = {'entity_count': count} logger.info("======create_hubmap_ids() json_to_post to uuid-api======") - logger.info(json_to_post) + logger.debug(json_to_post) # Disable ssl certificate verification target_url = _uuid_api_url + SchemaConstants.UUID_API_ID_ENDPOINT @@ -1857,7 +1872,7 @@ def create_hubmap_ids(normalized_class, json_data_dict, user_token, user_info_di d.pop('hubmap_base_id', None) logger.info("======create_hubmap_ids() generated ids from uuid-api======") - logger.info(ids_list) + logger.debug(ids_list) return ids_list else: diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 3d1d9ca7..9d6e28d4 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -21,10 +21,10 @@ The neo4j database connection pool entity_type : str One of the normalized entity types: Dataset, Collection, Sample, Donor -superclass : str - The normalized entity superclass type if defined, None by default entity_data_dict : dict The target Entity node to be created +superclass : str + The normalized entity superclass type if defined, None by default Returns ------- @@ -45,7 +45,7 @@ def create_entity(neo4j_driver, entity_type, entity_data_dict, superclass = None f"RETURN e AS {record_field_name}") logger.info("======create_entity() query======") - logger.info(query) + logger.debug(query) try: with neo4j_driver.session() as session: @@ -59,9 +59,6 @@ def create_entity(neo4j_driver, entity_type, entity_data_dict, superclass = None entity_dict = node_to_dict(entity_node) - # logger.info("======create_entity() resulting entity_dict======") - # logger.info(entity_dict) - tx.commit() return entity_dict @@ -71,7 +68,7 @@ def create_entity(neo4j_driver, entity_type, entity_data_dict, superclass = None logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit create_entity() transaction, rollback") + logger.error("Failed to commit create_entity() transaction, rollback") tx.rollback() @@ -101,7 +98,7 @@ def get_entity(neo4j_driver, uuid): f"RETURN e AS {record_field_name}") logger.info("======get_entity() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -136,7 +133,7 @@ def filter_ancestors_by_type(neo4j_driver, direct_ancestor_uuids, entity_type): f"WHERE e.uuid in {direct_ancestor_uuids} AND toLower(e.entity_type) <> '{entity_type.lower()}' " f"RETURN e.entity_type AS entity_type, collect(e.uuid) AS uuids") logger.info("======filter_ancestors_by_type======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: records = session.run(query).data() @@ -180,7 +177,7 @@ def get_children(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueChildren | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_children() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -231,7 +228,7 @@ def get_parents(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueParents | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_parents() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -288,7 +285,7 @@ def get_siblings(neo4j_driver, uuid, property_key=None): logger.info("======get_siblings() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -344,7 +341,7 @@ def get_tuplets(neo4j_driver, uuid, property_key=None): logger.info("======get_tuplets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -394,7 +391,7 @@ def get_ancestors(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueAncestors | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_ancestors() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -444,7 +441,7 @@ def get_descendants(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueDescendants | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_descendants() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -494,7 +491,7 @@ def get_collections(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}") logger.info("======get_collections() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -544,7 +541,7 @@ def get_uploads(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(u)) AS {record_field_name}") logger.info("======get_uploads() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -589,7 +586,7 @@ def get_dataset_direct_ancestors(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(s)) AS {record_field_name}") logger.info("======get_dataset_direct_ancestors() query======") - logger.info(query) + logger.debug(query) # Sessions will often be created and destroyed using a with block context with neo4j_driver.session() as session: @@ -634,7 +631,7 @@ def get_dataset_donor_organs_info(neo4j_driver, dataset_uuid): f" , organ_type: org.organ}})) AS donorOrganSet") logger.info("======get_dataset_donor_organs_info() ds_donors_organs_query======") - logger.info(ds_donors_organs_query) + logger.debug(ds_donors_organs_query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx @@ -646,7 +643,7 @@ def get_entity_type(neo4j_driver, entity_uuid: str) -> str: query: str = f"Match (ent {{uuid: '{entity_uuid}'}}) return ent.entity_type" logger.info("======get_entity_type() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -660,7 +657,7 @@ def get_entity_creation_action_activity(neo4j_driver, entity_uuid: str) -> str: query: str = f"MATCH (ds:Dataset {{uuid:'{entity_uuid}'}})<-[:ACTIVITY_OUTPUT]-(a:Activity) RETURN a.creation_action" logger.info("======get_entity_creation_action() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -717,7 +714,7 @@ def link_entity_to_direct_ancestors(neo4j_driver, entity_uuid, direct_ancestor_u if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_entity_to_direct_ancestors() transaction, rollback") + logger.error("Failed to commit link_entity_to_direct_ancestors() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -757,7 +754,7 @@ def link_publication_to_associated_collection(neo4j_driver, entity_uuid, associa if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_publication_to_associated_collection() transaction, rollback") + logger.error("Failed to commit link_publication_to_associated_collection() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -805,7 +802,7 @@ def link_collection_to_datasets(neo4j_driver, collection_uuid, dataset_uuid_list if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_collection_to_datasets() transaction, rollback") + logger.error("Failed to commit link_collection_to_datasets() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -838,7 +835,7 @@ def link_entity_to_previous_revision(neo4j_driver, entity_uuid, previous_revisio if tx.closed() == False: # Log the full stack trace, prepend a line with our message - logger.info("Failed to commit link_entity_to_previous_revision() transaction, rollback") + logger.error("Failed to commit link_entity_to_previous_revision() transaction, rollback") tx.rollback() raise TransactionError(msg) @@ -869,7 +866,7 @@ def get_previous_revision_uuid(neo4j_driver, uuid): f"RETURN previous_revision.uuid AS {record_field_name}") logger.info("======get_previous_revision_uuid() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -905,7 +902,7 @@ def get_previous_revision_uuids(neo4j_driver, uuid): f"RETURN COLLECT(previous_revision.uuid) AS {record_field_name}") logger.info("======get_previous_revision_uuids() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -943,7 +940,7 @@ def get_next_revision_uuid(neo4j_driver, uuid): f"RETURN next_revision.uuid AS {record_field_name}") logger.info("======get_next_revision_uuid() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -979,7 +976,7 @@ def get_next_revision_uuids(neo4j_driver, uuid): f"RETURN COLLECT(next_revision.uuid) AS {record_field_name}") logger.info("======get_next_revision_uuids() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1022,7 +1019,7 @@ def get_collection_associated_datasets(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}") logger.info("======get_collection_associated_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1068,7 +1065,7 @@ def get_dataset_collections(neo4j_driver, uuid, property_key = None): f"RETURN apoc.coll.toSet(COLLECT(c)) AS {record_field_name}") logger.info("======get_dataset_collections() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1110,7 +1107,7 @@ def get_publication_associated_collection(neo4j_driver, uuid): f"RETURN c as {record_field_name}") logger.info("=====get_publication_associated_collection() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1144,7 +1141,7 @@ def get_dataset_upload(neo4j_driver, uuid): f"RETURN s AS {record_field_name}") logger.info("======get_dataset_upload() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1181,7 +1178,7 @@ def get_collection_datasets(neo4j_driver, uuid): f"RETURN [a IN uniqueDataset | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_collection_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1217,7 +1214,7 @@ def get_collection_datasets_data_access_levels(neo4j_driver, uuid): f"RETURN COLLECT(DISTINCT d.data_access_level) AS {record_field_name}") logger.info("======get_collection_datasets_data_access_levels() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1253,7 +1250,7 @@ def get_collection_datasets_statuses(neo4j_driver, uuid): f"RETURN COLLECT(DISTINCT d.status) AS {record_field_name}") logger.info("======get_collection_datasets_statuses() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1298,7 +1295,7 @@ def link_datasets_to_upload(neo4j_driver, upload_uuid, dataset_uuids_list): f"MERGE (s)<-[r:IN_UPLOAD]-(d)") logger.info("======link_datasets_to_upload() query======") - logger.info(query) + logger.debug(query) tx.run(query) tx.commit() @@ -1308,7 +1305,7 @@ def link_datasets_to_upload(neo4j_driver, upload_uuid, dataset_uuids_list): logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit link_datasets_to_upload() transaction, rollback") + logger.error("Failed to commit link_datasets_to_upload() transaction, rollback") tx.rollback() @@ -1345,7 +1342,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list): f"DELETE r") logger.info("======unlink_datasets_from_upload() query======") - logger.info(query) + logger.debug(query) tx.run(query) tx.commit() @@ -1355,7 +1352,7 @@ def unlink_datasets_from_upload(neo4j_driver, upload_uuid, dataset_uuids_list): logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit unlink_datasets_from_upload() transaction, rollback") + logger.error("Failed to commit unlink_datasets_from_upload() transaction, rollback") tx.rollback() @@ -1395,7 +1392,7 @@ def get_upload_datasets(neo4j_driver, uuid, property_key = None): f"RETURN [a IN uniqueUploads | apoc.create.vNode(labels(a), apoc.map.removeKeys(properties(a), {fields_to_omit}))] AS {record_field_name}") logger.info("======get_upload_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1438,16 +1435,13 @@ def count_attached_published_datasets(neo4j_driver, entity_type, uuid): f"RETURN COUNT(d) AS {record_field_name}") logger.info("======count_attached_published_datasets() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) count = record[record_field_name] - # logger.info("======count_attached_published_datasets() resulting count======") - # logger.info(count) - return count @@ -1487,7 +1481,7 @@ def get_sample_direct_ancestor(neo4j_driver, uuid, property_key = None): f"RETURN parent AS {record_field_name}") logger.info("======get_sample_direct_ancestor() query======") - logger.info(query) + logger.debug(query) with neo4j_driver.session() as session: record = session.read_transaction(execute_readonly_tx, query) @@ -1530,7 +1524,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): f"RETURN e AS {record_field_name}") logger.info("======update_entity() query======") - logger.info(query) + logger.debug(query) try: with neo4j_driver.session() as session: @@ -1546,9 +1540,6 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): entity_dict = node_to_dict(entity_node) - # logger.info("======update_entity() resulting entity_dict======") - # logger.info(entity_dict) - return entity_dict except TransactionError as te: msg = f"TransactionError from calling create_entity(): {te.value}" @@ -1556,7 +1547,7 @@ def update_entity(neo4j_driver, entity_type, entity_data_dict, uuid): logger.exception(msg) if tx.closed() == False: - logger.info("Failed to commit update_entity() transaction, rollback") + logger.error("Failed to commit update_entity() transaction, rollback") tx.rollback() @@ -1586,7 +1577,7 @@ def create_activity_tx(tx, activity_data_dict): f"RETURN e AS {record_field_name}") logger.info("======create_activity_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) record = result.single() @@ -1724,7 +1715,7 @@ def create_relationship_tx(tx, source_node_uuid, target_node_uuid, relationship, f"RETURN type(r) AS {record_field_name}") logger.info("======create_relationship_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1748,7 +1739,7 @@ def create_outgoing_activity_relationships_tx(tx, source_node_uuids:list, activi f" CREATE (e) - [r:ACTIVITY_INPUT]->(a)") logger.info("======create_outgoing_activity_relationships_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1793,7 +1784,7 @@ def _delete_activity_node_and_linkages_tx(tx, uuid): f"DELETE in, a, out") logger.info("======_delete_activity_node_and_linkages_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1815,7 +1806,7 @@ def _delete_publication_associated_collection_linkages_tx(tx, uuid): f"DELETE r") logger.info("======_delete_publication_associated_collection_linkages_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) @@ -1835,7 +1826,7 @@ def _delete_collection_linkages_tx(tx, uuid): f" DELETE in") logger.info("======_delete_collection_linkages_tx() query======") - logger.info(query) + logger.debug(query) result = tx.run(query) diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index e1ca669f..c5d83de1 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1484,7 +1484,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da 'user_token': user_token } - logger.info(f"Commit the uploaded thumbnail file of tmp file id {tmp_file_id} for entity {entity_uuid} via ingest-api call...") + logger.info(f"Commit the uploaded thumbnail file of tmp_file_id {tmp_file_id} for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}' @@ -1494,7 +1494,7 @@ def commit_thumbnail_file(property_key, normalized_type, user_token, existing_da response = requests.post(url = ingest_api_target_url, headers = request_headers, json = json_to_post, verify = False) if response.status_code != 200: - msg = f"Failed to commit the thumbnail file of tmp file id {tmp_file_id} via ingest-api for entity uuid: {entity_uuid}" + msg = f"Failed to commit the thumbnail file of tmp_file_id {tmp_file_id} via ingest-api for entity uuid: {entity_uuid}" logger.error(msg) raise schema_errors.FileUploadException(msg) @@ -1588,7 +1588,7 @@ def delete_thumbnail_file(property_key, normalized_type, user_token, existing_da 'files_info_list': [file_info_dict] } - logger.info(f"Remove the uploaded thumbnail file {file_uuid} for entity {entity_uuid} via ingest-api call...") + logger.debug(f"Remove the uploaded thumbnail file {file_uuid} for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}' @@ -2290,7 +2290,7 @@ def _commit_files(target_property_key, property_key, normalized_type, user_token 'user_token': user_token } - logger.info(f"Commit the uploaded file of temp_file_id {temp_file_id} for entity {entity_uuid} via ingest-api call...") + logger.debug(f"Commit the uploaded file of temp_file_id {temp_file_id} for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}' @@ -2401,7 +2401,7 @@ def _delete_files(target_property_key, property_key, normalized_type, user_token 'files_info_list': files_info_list } - logger.info(f"Remove the uploaded files for entity {entity_uuid} via ingest-api call...") + logger.debug(f"Remove the uploaded files for entity {entity_uuid} via ingest-api call...") request_headers = { 'Authorization': f'Bearer {user_token}'