Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 196 additions & 53 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@
from schema.schema_constants import TriggerTypeEnum
from metadata_constraints import get_constraints, constraints_json_is_valid
# from lib.ontology import initialize_ubkg, init_ontology, Ontology, UbkgSDK
from dev_entity_worker import EntityWorker
import dev_entity_exceptions as entityEx

# HuBMAP commons
from hubmap_commons import string_helper
Expand Down Expand Up @@ -248,23 +246,6 @@ def http_internal_server_error(e):
except Exception as s3exception:
logger.critical(s3exception, exc_info=True)

####################################################################################################
## Initialize a "worker" for the service.
## For initial transition to "worker" usage, pass in globals of app.py which would eventually
## be only in the worker and not in app.py.
####################################################################################################
entity_worker = None
try:
entity_worker = EntityWorker( app_config=app.config
, schema_mgr = schema_manager
, memcached_client_instance = memcached_client_instance
, neo4j_driver_instance = neo4j_driver_instance)
if not isinstance(entity_worker, EntityWorker):
raise Exception("Error instantiating a EntityWorker during startup.")
logger.info("EntityWorker instantiated using app.cfg setting.")
except Exception as e:
logger.critical(f"Unable to instantiate a EntityWorker during startup.")
logger.error(e, exc_info=True)

####################################################################################################
## REFERENCE DOI Redirection
Expand Down Expand Up @@ -632,6 +613,112 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
entity_visibility = DataVisibilityEnum.PUBLIC
return entity_visibility

'''
Retrieve the metadata information for certain data associated with entity. This method supports
Dataset entities, and can get the associated data for organs, samples, or donors.

Get associated data dict based upon the user's authorization. The associated data may be
filtered down if credentials were not presented for full access.

Parameters
----------
dataset_dict : dict
A dictionary containing all the properties the target entity.
dataset_visibility : DataVisibilityEnum
An indication of the entity itself is public or not, so the associated data can
be filtered to match the entity dictionary before being returned.
valid_user_token : str
Either the valid current token for an authenticated user or None.
user_info : dict
Information for the logged-in user to be used for authorization accessing non-public entities.
associated_data : str
A string indicating the associated property to be retrieved, which must be from
the values supported by this method.

Returns
-------
list
A dictionary containing all the properties the target entity.
'''
def _get_dataset_associated_data(dataset_dict, dataset_visibility, valid_user_token, request, associated_data: str):

# Confirm the associated data requested is supported by this method.
retrievable_associations = ['organs', 'samples', 'donors']
if associated_data.lower() not in retrievable_associations:
bad_request_error( f"Dataset associated data cannot be retrieved for"
f" {associated_data}, only"
f" {COMMA_SEPARATOR.join(retrievable_associations)}.")

# Confirm the dictionary passed in is for a Dataset entity.
if not schema_manager.entity_type_instanceof(dataset_dict['entity_type'], 'Dataset'):
bad_request_error( f"'{dataset_dict['entity_type']}' for"
f" uuid={dataset_dict['uuid']} is not a Dataset or Publication,"
f" so '{associated_data}' can not be retrieved for it.")
# Set up fields to be excluded when retrieving the entities associated with
# the Dataset. Organs are one kind of Sample.
if associated_data.lower() in ['organs', 'samples']:
fields_to_exclude = schema_manager.get_fields_to_exclude('Sample')
elif associated_data.lower() in ['donors']:
fields_to_exclude = schema_manager.get_fields_to_exclude('Donor')
else:
logger.error( f"Expected associated data type to be verified, but got"
f" associated_data.lower()={associated_data.lower()}.")
internal_server_error(f"Unexpected error retrieving '{associated_data}' for a Dataset")

public_entity = (dataset_visibility is DataVisibilityEnum.PUBLIC)

# Set a variable reflecting the user's authorization by being in the HuBMAP-READ Globus Group
user_authorized = user_in_hubmap_read_group(request=request)

# For non-public documents, reject the request if the user is not authorized
if not public_entity:
if valid_user_token is None:
forbidden_error( f"{dataset_dict['entity_type']} for"
f" {dataset_dict['uuid']} is not"
f" accessible without presenting a token.")
if not user_authorized:
forbidden_error( f"The requested Dataset has non-public data."
f" A Globus token with access permission is required.")

# By now, either the entity is public accessible or the user has the correct access level
if associated_data.lower() == 'organs':
associated_entities = app_neo4j_queries.get_associated_organs_from_dataset(neo4j_driver_instance,
dataset_dict['uuid'])
elif associated_data.lower() == 'samples':
associated_entities = app_neo4j_queries.get_associated_samples_from_dataset(neo4j_driver_instance,
dataset_dict['uuid'])
elif associated_data.lower() == 'donors':
associated_entities = app_neo4j_queries.get_associated_donors_from_dataset(neo4j_driver_instance,
dataset_dict['uuid'])
else:
logger.error( f"Expected associated data type to be verified, but got"
f" associated_data.lower()={associated_data.lower()} while retrieving from Neo4j.")
internal_server_error(f"Unexpected error retrieving '{associated_data}' from the data store")

# If there are zero items in the list of associated_entities, return an empty list rather than retrieving.
if len(associated_entities) < 1:
return []

# Use the internal token to query the target entity to assure it is returned. This way public
# entities can be accessed even if valid_user_token is None.
internal_token = auth_helper_instance.getProcessSecret()
complete_entities_list = schema_manager.get_complete_entities_list( token=internal_token
, entities_list=associated_entities)
# Final result after normalization
final_result = schema_manager.normalize_entities_list_for_response(entities_list=complete_entities_list)

# For public entities, limit the fields in the response unless the authorization presented in the
# Request allows the user to see all properties.
if public_entity and not user_authorized:
filtered_entities_list = []
for entity in final_result:
final_entity_dict = schema_manager.exclude_properties_from_response(excluded_fields=fields_to_exclude
, output_dict=entity)
filtered_entities_list.append(final_entity_dict)
final_result = filtered_entities_list

return final_result

'''
Retrieve the full provenance metadata information of a given entity by id, as
produced for metadata.json files.
Expand All @@ -644,11 +731,11 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):

An HTTP 400 Response is returned for reasons described in the error message, such as
requesting data for a non-Dataset.

An HTTP 401 Response is returned when a token is presented that is not valid.

An HTTP 403 Response is returned if user is not authorized to access the Dataset, as described above.

An HTTP 404 Response is returned if the requested Dataset is not found.

Parameters
Expand All @@ -661,39 +748,95 @@ def _get_entity_visibility(normalized_entity_type, entity_dict):
json
Valid JSON for the full provenance metadata of the requested Dataset
'''
@app.route('/datasets/<id>/prov-metadata', methods = ['GET'])
def get_provenance_metadata_by_id_for_auth_level(id:Annotated[str, 32]) -> str:
@app.route('/datasets/<id>/prov-metadata', methods=['GET'])
def get_provenance_metadata_by_id_for_auth_level(id):
# Token is not required, but if an invalid token provided,
# we need to tell the client with a 401 error
validate_token_if_auth_header_exists(request)

try:
# Get the user's token from the Request for later authorization to access non-public entities.
# If an invalid token is presented, reject with an HTTP 401 Response.
# N.B. None is a "valid" user_token which may be adequate for access to public data.
user_token = entity_worker.get_request_auth_token(request=request)

# Get the user's token from the Request for later authorization to access non-public entities.
user_info = entity_worker.get_request_user_info_with_groups(request=request)

# Retrieve the expanded metadata for the entity. If authorization of token or group membership
# does not allow access to the entity, exceptions will be raised describing the problem.
expanded_entity_metadata = entity_worker.get_expanded_dataset_metadata( dataset_id=id
, valid_user_token=user_token
, user_info=user_info)
return jsonify(expanded_entity_metadata)
except entityEx.EntityBadRequestException as e_400:
return jsonify({'error': e_400.message}), 400
except entityEx.EntityUnauthorizedException as e_401:
return jsonify({'error': e_401.message}), 401
except entityEx.EntityForbiddenException as e_403:
return jsonify({'error': e_403.message}), 403
except entityEx.EntityNotFoundException as e_404:
return jsonify({'error': e_404.message}), 404
except entityEx.EntityServerErrorException as e_500:
logger.exception(f"An unexpected error occurred during provenance metadata retrieval.")
return jsonify({'error': e_500.message}), 500
except Exception as e:
default_msg = 'An unexpected error occurred retrieving provenance metadata'
logger.exception(default_msg)
return jsonify({'error': default_msg}), 500
# Use the internal token to query the target entity
# since public entities don't require user token
token = get_internal_token()

# The argument id that shadows Python's built-in id should be an identifier for a Dataset.
# Get the entity dict from cache if exists
# Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
dataset_dict = query_target_entity(id, token)
normalized_entity_type = dataset_dict['entity_type']

# A bit validation
if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'):
bad_request_error(f"Unable to get the provenance metatdata for this: {normalized_entity_type},"
" supported entity types: Dataset, Publication")

# Get the generated complete entity result from cache if exists
# Otherwise re-generate on the fly
complete_dict = schema_manager.get_complete_entity_result(token=token
, entity_dict=dataset_dict)

# Determine if the entity is publicly visible base on its data, only.
# To verify if a Collection is public, it is necessary to have its Datasets, which
# are populated as triggered data. So pull back the complete entity for
# _get_entity_visibility() to check.
entity_scope = _get_entity_visibility( normalized_entity_type=normalized_entity_type
,entity_dict=complete_dict)
public_entity = (entity_scope is DataVisibilityEnum.PUBLIC)

# Set a variable reflecting the user's authorization by being in the HuBMAP-READ Globus Group
user_authorized = user_in_hubmap_read_group(request=request)

# Get user token from Authorization header
user_token = get_user_token(request)

# For non-public documents, reject the request if the user is not authorized
if not public_entity:
if user_token is None:
forbidden_error( f"{normalized_entity_type} for {complete_dict['uuid']} is not"
f" accessible without presenting a token.")
if not user_authorized:
forbidden_error( f"The requested {normalized_entity_type} has non-public data."
f" A Globus token with access permission is required.")

# We'll need to return all the properties including those generated by
# `on_read_trigger` to have a complete result e.g., the 'next_revision_uuid' and
# 'previous_revision_uuid' being used below.
# Collections, however, will filter out only public properties for return.

# Also normalize the result based on schema
final_result = schema_manager.normalize_entity_result_for_response(complete_dict)

# Identify fields to exclude from non-authorized responses for the entity type.
fields_to_exclude = schema_manager.get_fields_to_exclude(normalized_entity_type)

# Response with the dict
if public_entity and not user_authorized:
final_result = schema_manager.exclude_properties_from_response(fields_to_exclude, final_result)

# Retrieve the associated data for the entity, and add it to the expanded dictionary.
associated_organ_list = _get_dataset_associated_data( dataset_dict=final_result
, dataset_visibility=entity_scope
, valid_user_token=user_token
, request=request
, associated_data='Organs')
final_result['organs'] = associated_organ_list

associated_sample_list = _get_dataset_associated_data( dataset_dict=final_result
, dataset_visibility=entity_scope
, valid_user_token=user_token
, request=request
, associated_data='Samples')
final_result['samples'] = associated_sample_list

associated_donor_list = _get_dataset_associated_data( dataset_dict=final_result
, dataset_visibility=entity_scope
, valid_user_token=user_token
, request=request
, associated_data='Donors')

final_result['donors'] = associated_donor_list

# Return JSON for the dictionary containing the entity metadata as well as metadata for the associated data.
return jsonify(final_result)

"""
Retrieve the metadata information of a given entity by id
Expand Down
44 changes: 0 additions & 44 deletions src/dev_entity_exceptions.py

This file was deleted.

Loading
Loading