From 5032d9c56a1682fa8e660311be336da66e38fda9 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:57:31 -0400 Subject: [PATCH 1/5] Add second entity validator to check if entities are "locked" by some criteria before allowing an update. --- src/app.py | 13 +++++++ src/schema/provenance_schema.yaml | 35 +++++++++++++++++-- src/schema/schema_manager.py | 52 ++++++++++++++++----------- src/schema/schema_validators.py | 58 ++++++++++++++++++++++++------- 4 files changed, 124 insertions(+), 34 deletions(-) diff --git a/src/app.py b/src/app.py index 23f16fc1..0532823e 100644 --- a/src/app.py +++ b/src/app.py @@ -1357,6 +1357,19 @@ def update_entity(id): # Note, we don't support entity level validators on entity update via PUT # Only entity create via POST is supported at the entity level + # KBKBKB...or do we? + # Execute entity level validator defined in schema yaml before entity modification. + try: + schema_manager.execute_entity_level_validator(validator_type='before_entity_update_validator' + , normalized_entity_type=normalized_entity_type + , request=request + , existing_entity_dict=entity_dict) + except schema_errors.MissingApplicationHeaderException as e: + bad_request_error(e) + except schema_errors.InvalidApplicationHeaderException as e: + bad_request_error(e) + except schema_errors.SchemaValidationException as sve: + bad_request_error(sve) # Validate request json against the yaml schema # Pass in the entity_dict for missing required key check, this is different from creating new entity diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 43c53dad..b94b1e42 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -10,8 +10,9 @@ # - trigger types: before_create_trigger|after_create_trigger|before_update_trigger|after_update_trigger|on_read_trigger|on_index_trigger, one property can have none (default) or more than one triggers # - updated_peripherally: a temporary measure to correctly handle any attributes which are potentially updated by multiple triggers -# Entity level validator: -# - types: before_entity_create_validator, a single validation method needed for creating or updating the entity +# Entity level validators: +# - types: before_entity_create_validator - validation method needed for creating an entity. +# before_entity_update_validator - validation method needed for updating an entity. # Property level validators: # - types: before_property_create_validators|before_property_update_validators, a list of validation methods @@ -192,6 +193,11 @@ shared_entity_properties: &shared_entity_properties ENTITIES: ############################################# Collection ############################################# Collection: + before_entity_update_validator: + # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this + - validate_application_header_before_entity_create + # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' + - validate_entity_not_locked_before_update excluded_properties_from_public_response: - datasets: - lab_dataset_id @@ -304,6 +310,11 @@ ENTITIES: Dataset: # Only allowed applications can create new Dataset via POST before_entity_create_validator: validate_application_header_before_entity_create + before_entity_update_validator: + # Only allowed applications can modify an existing Dataset via PUT + - validate_application_header_before_entity_create + # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' + - validate_entity_not_locked_before_update # Dataset can be either derivation source or target excluded_properties_from_public_response: - lab_dataset_id @@ -659,6 +670,11 @@ ENTITIES: superclass: Dataset # Only allowed applications can create new Publication via POST before_entity_create_validator: validate_application_header_before_entity_create + before_entity_update_validator: + # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this + - validate_application_header_before_entity_create + # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' + - validate_entity_not_locked_before_update # Publications can be either derivation source or target derivation: source: true @@ -763,6 +779,11 @@ ENTITIES: - lab_donor_id - submission_id - label + before_entity_update_validator: + # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this + - validate_application_header_before_entity_create + # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' + - validate_entity_not_locked_before_update properties: <<: *shared_properties <<: *shared_entity_properties @@ -896,6 +917,11 @@ ENTITIES: - lab_id # Both Sample and Donor ancestors of a Sample must have these fields removed - submission_id + before_entity_update_validator: + # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this + - validate_application_header_before_entity_create + # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' + - validate_entity_not_locked_before_update properties: <<: *shared_properties <<: *shared_entity_properties @@ -1250,5 +1276,10 @@ ENTITIES: derivation: source: false target: false + before_entity_update_validator: + # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this + - validate_application_header_before_entity_create + # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' + - validate_entity_not_locked_before_update properties: <<: *shared_collection_properties diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index a14a75fc..8740141e 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1166,13 +1166,13 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex Parameters ---------- validator_type : str - One of the validator types: before_entity_create_validator + One of the validator types recognized by the validate_entity_level_validator_type() method. normalized_entity_type : str One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload, Upload, Publication request: Flask request object The instance of Flask request passed in from application request """ -def execute_entity_level_validator(validator_type, normalized_entity_type, request): +def execute_entity_level_validator(validator_type, normalized_entity_type, request, existing_entity_dict=None): global _schema # A bit validation @@ -1183,23 +1183,35 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque for key in entity: if validator_type == key: - validator_method_name = entity[validator_type] + if isinstance(entity[validator_type], str): + validator_method_names = [entity[validator_type]] + else: + # default to expecting a list when not a str + validator_method_names = entity[validator_type] - try: - # Get the target validator method defined in the schema_validators.py module - validator_method_to_call = getattr(schema_validators, validator_method_name) - - logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}") - - validator_method_to_call(normalized_entity_type, request) - except schema_errors.MissingApplicationHeaderException as e: - raise schema_errors.MissingApplicationHeaderException(e) - except schema_errors.InvalidApplicationHeaderException as e: - raise schema_errors.InvalidApplicationHeaderException(e) - except Exception as e: - msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}" - # Log the full stack trace, prepend a line with our message - logger.exception(msg) + for validator_method_name in validator_method_names: + try: + # Get the target validator method defined in the schema_validators.py module + validator_method_to_call = getattr(schema_validators, validator_method_name) + + logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}") + + if existing_entity_dict is None: + # Execute the entity-level validation for create/POST + validator_method_to_call(normalized_entity_type, request) + else: + # Execute the entity-level validation for update/PUT + validator_method_to_call(normalized_entity_type, request, existing_entity_dict) + except schema_errors.MissingApplicationHeaderException as e: + raise schema_errors.MissingApplicationHeaderException(e) + except schema_errors.InvalidApplicationHeaderException as e: + raise schema_errors.InvalidApplicationHeaderException(e) + except schema_errors.SchemaValidationException as sve: + raise sve + except Exception as e: + msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}" + # Log the full stack trace, prepend a line with our message + logger.exception(msg) """ @@ -1360,10 +1372,10 @@ def validate_trigger_type(trigger_type:TriggerTypeEnum): Parameters ---------- validator_type : str - One of the validator types: before_entity_create_validator + Name of an entity-level validator type, which must be listed in accepted_validator_types and found in this schema manager module. """ def validate_entity_level_validator_type(validator_type): - accepted_validator_types = ['before_entity_create_validator'] + accepted_validator_types = ['before_entity_create_validator', 'before_entity_update_validator'] separator = ', ' if validator_type.lower() not in accepted_validator_types: diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 5a6fd5e1..bbfd0c93 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -14,14 +14,13 @@ logger = logging.getLogger(__name__) - #################################################################################################### ## Entity Level Validators #################################################################################################### """ Validate the application specified in the custom HTTP header -for creating a new entity via POST or updating via PUT +for creating a new entity via POST. Parameters ---------- @@ -30,7 +29,7 @@ request: Flask request The instance of Flask request passed in from application request """ -def validate_application_header_before_entity_create(normalized_entity_type, request): +def validate_application_header_before_entity_create(normalized_entity_type, request, existing_entity_id): # A list of applications allowed to create this new entity or update Dataset and Upload # Use lowercase for comparison applications_allowed = [ @@ -41,6 +40,18 @@ def validate_application_header_before_entity_create(normalized_entity_type, req _validate_application_header(applications_allowed, request.headers) +""" +Validate required conditions prior to allowing update of an existing entity via PUT. + +Parameters +---------- +normalized_type : str + One of the types defined in the schema yaml: Dataset, Upload +request: Flask request + The instance of Flask request passed in from application request +""" +def validate_entity_not_locked_before_update(normalized_entity_type, request, existing_entity_dict): + _is_entity_locked_against_update(existing_entity_dict) ############################################################################################## ## Property Level Validators @@ -74,7 +85,6 @@ def validate_recognized_dataset_type(property_key, normalized_entity_type, reque raise ValueError(f"Proposed Dataset dataset_type '{proposed_dataset_type_prefix}'" f" is not recognized in the existing ontology." f" Valid values are: {str(target_list)}.") - """ Validate the specified value for an Upload's intended_dataset_type is in the valueset UBKG recognizes. @@ -103,7 +113,6 @@ def validate_intended_dataset_type(property_key, normalized_entity_type, request f" is not recognized in the existing ontology." f" Valid values are: {str(target_list)}.") - """ Validate the target list has no duplicated items @@ -126,7 +135,6 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request if len(set(target_list)) != len(target_list): raise ValueError(f"The {property_key} field must only contain unique items") - """ Validate that a given dataset is not a component of a multi-assay split parent dataset fore allowing status to be updated. If a component dataset needs to be updated, update it via its parent multi-assay dataset @@ -144,8 +152,6 @@ def validate_no_duplicates_in_list(property_key, normalized_entity_type, request new_data_dict : dict The json data in request body, already after the regular validations """ - - def validate_dataset_not_component(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): headers = request.headers if not headers.get(SchemaConstants.INTERNAL_TRIGGER) == SchemaConstants.COMPONENT_DATASET: @@ -157,7 +163,6 @@ def validate_dataset_not_component(property_key, normalized_entity_type, request f" {existing_data_dict['uuid']}. Can not change status on component datasets directly. Status" f"change must occur on parent multi-assay split dataset") - """ If the provided previous revision is already a revision of another dataset, disallow """ @@ -169,7 +174,6 @@ def validate_if_revision_is_unique(property_key, normalized_entity_type, request raise ValueError(f"Dataset marked as previous revision is already the previous revision of another dataset. " f"Each dataset may only be the previous revision of one other dataset") - """ If an entity has a DOI, do not allow it to be updated """ @@ -827,9 +831,39 @@ def _validate_application_header(applications_allowed, request_headers): if not app_header: msg = f"Unable to proceed due to missing {SchemaConstants.HUBMAP_APP_HEADER} header from request" - raise schema_errors.MissingApplicationHeaderException(msg) + raise MissingApplicationHeaderException(msg) # Use lowercase for comparing the application header value against the yaml if app_header.lower() not in applications_allowed: msg = f"Unable to proceed due to invalid {SchemaConstants.HUBMAP_APP_HEADER} header value: {app_header}" - raise schema_errors.InvalidApplicationHeaderException(msg) + raise InvalidApplicationHeaderException(msg) + +""" +Indicate if the entity meets a criteria to lock out modification updates + +Parameters +---------- +request_headers: Flask request.headers object, behaves like a dict + The instance of Flask request.headers passed in from application request +""" +def _is_entity_locked_against_update(existing_entity_dict): + entity_type = existing_entity_dict['entity_type'] + if entity_type in ['Publication','Dataset']: + if 'status' in existing_entity_dict and existing_entity_dict['status'] == 'Published': + raise schema_errors.SchemaValidationException(f"{entity_type} cannot be modified, due to" + f" status={existing_entity_dict['status']}.") + elif entity_type in ['Donor','Sample']: + if 'data_access_level' in existing_entity_dict and existing_entity_dict['data_access_level'] == 'public': + raise schema_errors.SchemaValidationException(f"{entity_type} cannot be modified, due to" + f" data_access_level={existing_entity_dict['data_access_level']}.") + elif entity_type in ['Collection','Epicollection']: + if 'doi_url' in existing_entity_dict and existing_entity_dict['doi_url']: + raise schema_errors.SchemaValidationException( f"{entity_type} cannot be modified, due to" + f" doi_url={existing_entity_dict['doi_url']}.") + # Probably never get here, since doi_url and registered_doi must be set as a pair. + if 'registered_doi' in existing_entity_dict and existing_entity_dict['registered_doi']: + raise schema_errors.SchemaValidationException( f"{entity_type} cannot be modified, due to" + f" registered_doi={existing_entity_dict['registered_doi']}.") + else: + entity_uuid = existing_entity_dict['uuid'] + raise schema_errors.SchemaValidationException(f'Unable to check if {entity_type} for {entity_uuid} is locked!') From 01aff4e989475cdeb60045f5388ba12af92d1600 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:29:34 -0400 Subject: [PATCH 2/5] Introduce LockedEntityUpdateException and return HTTP 403 rather than HTTP 400 when requested to update a locked entity. --- src/app.py | 4 ++-- src/schema/schema_errors.py | 3 +++ src/schema/schema_manager.py | 4 ++-- src/schema/schema_validators.py | 14 +++++++------- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/app.py b/src/app.py index 0532823e..29e1d89a 100644 --- a/src/app.py +++ b/src/app.py @@ -1368,8 +1368,8 @@ def update_entity(id): bad_request_error(e) except schema_errors.InvalidApplicationHeaderException as e: bad_request_error(e) - except schema_errors.SchemaValidationException as sve: - bad_request_error(sve) + except schema_errors.LockedEntityUpdateException as sve: + forbidden_error(sve) # Validate request json against the yaml schema # Pass in the entity_dict for missing required key check, this is different from creating new entity diff --git a/src/schema/schema_errors.py b/src/schema/schema_errors.py index 90a900a0..edb4ac63 100644 --- a/src/schema/schema_errors.py +++ b/src/schema/schema_errors.py @@ -39,4 +39,7 @@ class MissingApplicationHeaderException(Exception): pass class InvalidApplicationHeaderException(Exception): + pass + +class LockedEntityUpdateException(Exception): pass \ No newline at end of file diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 8740141e..38efa83a 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1206,8 +1206,8 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque raise schema_errors.MissingApplicationHeaderException(e) except schema_errors.InvalidApplicationHeaderException as e: raise schema_errors.InvalidApplicationHeaderException(e) - except schema_errors.SchemaValidationException as sve: - raise sve + except schema_errors.LockedEntityUpdateException as leue: + raise leue except Exception as e: msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}" # Log the full stack trace, prepend a line with our message diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index bbfd0c93..b7de47a3 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -850,20 +850,20 @@ def _is_entity_locked_against_update(existing_entity_dict): entity_type = existing_entity_dict['entity_type'] if entity_type in ['Publication','Dataset']: if 'status' in existing_entity_dict and existing_entity_dict['status'] == 'Published': - raise schema_errors.SchemaValidationException(f"{entity_type} cannot be modified, due to" - f" status={existing_entity_dict['status']}.") + raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" + f" status={existing_entity_dict['status']}.") elif entity_type in ['Donor','Sample']: if 'data_access_level' in existing_entity_dict and existing_entity_dict['data_access_level'] == 'public': - raise schema_errors.SchemaValidationException(f"{entity_type} cannot be modified, due to" - f" data_access_level={existing_entity_dict['data_access_level']}.") + raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" + f" data_access_level={existing_entity_dict['data_access_level']}.") elif entity_type in ['Collection','Epicollection']: if 'doi_url' in existing_entity_dict and existing_entity_dict['doi_url']: - raise schema_errors.SchemaValidationException( f"{entity_type} cannot be modified, due to" + raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" f" doi_url={existing_entity_dict['doi_url']}.") # Probably never get here, since doi_url and registered_doi must be set as a pair. if 'registered_doi' in existing_entity_dict and existing_entity_dict['registered_doi']: - raise schema_errors.SchemaValidationException( f"{entity_type} cannot be modified, due to" + raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" f" registered_doi={existing_entity_dict['registered_doi']}.") else: entity_uuid = existing_entity_dict['uuid'] - raise schema_errors.SchemaValidationException(f'Unable to check if {entity_type} for {entity_uuid} is locked!') + raise schema_errors.LockedEntityUpdateException(f'Unable to check if {entity_type} for {entity_uuid} is locked!') From 602ce8951bdd742a890783dc1ab077a401590f58 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Fri, 28 Mar 2025 09:46:10 -0400 Subject: [PATCH 3/5] Introduce handling X-HuBMAP-Update-Override in the HTTP Request Header. --- src/app.py | 25 ++++++++++++++++++++++--- src/instance/app.cfg.example | 5 +++++ src/schema/schema_constants.py | 1 + 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/app.py b/src/app.py index 29e1d89a..627f47ad 100644 --- a/src/app.py +++ b/src/app.py @@ -89,6 +89,15 @@ MEMCACHED_MODE = False MEMCACHED_PREFIX = 'NONE' +# Read the secret key which may be submitted in HTTP Request Headers to override the lockout of +# updates to entities with characteristics prohibiting their modification. +LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY'] + +# Compile the only accept regular expression pattern for the lockout override key in an HTTP Request Header. +# N.B. This is case-insensitive matching "Override-Key", but case-sensitive matching the value. +LOCKED_ENTITY_UPDATE_OVERRIDE_PATTERN = rf"^\s*(?i:override-key)\s+{re.escape(LOCKED_ENTITY_UPDATE_OVERRIDE_KEY)}\s*$" +LOCKED_ENTITY_UPDATE_OVERRIDE_REGEX = re.compile(pattern=LOCKED_ENTITY_UPDATE_OVERRIDE_PATTERN) + # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning) @@ -1359,6 +1368,7 @@ def update_entity(id): # Only entity create via POST is supported at the entity level # KBKBKB...or do we? # Execute entity level validator defined in schema yaml before entity modification. + lockout_overridden = False try: schema_manager.execute_entity_level_validator(validator_type='before_entity_update_validator' , normalized_entity_type=normalized_entity_type @@ -1368,8 +1378,16 @@ def update_entity(id): bad_request_error(e) except schema_errors.InvalidApplicationHeaderException as e: bad_request_error(e) - except schema_errors.LockedEntityUpdateException as sve: - forbidden_error(sve) + except schema_errors.LockedEntityUpdateException as leue: + # HTTP header names are case-insensitive, and request.headers.get() returns None if the header doesn't exist + locked_entity_update_header = request.headers.get(SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER) + if locked_entity_update_header and bool(LOCKED_ENTITY_UPDATE_OVERRIDE_REGEX.match(locked_entity_update_header)): + lockout_overridden = True + logger.info(f"For {entity_dict['entity_type']} {entity_dict['uuid']}" + f" update prohibited due to {str(leue)}," + f" but being overridden by valid {SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER} in request.") + else: + forbidden_error(leue) # Validate request json against the yaml schema # Pass in the entity_dict for missing required key check, this is different from creating new entity @@ -1544,7 +1562,8 @@ def update_entity(id): # Do not return the updated dict to avoid computing overhead - 7/14/2023 by Zhou # return jsonify(normalized_complete_dict) - return jsonify({'message': f"{normalized_entity_type} of {id} has been updated"}) + override_msg = 'Lockout overridden. ' if lockout_overridden else '' + return jsonify({'message': f"{override_msg}{normalized_entity_type} of {id} has been updated"}) """ diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index c0ae7302..a01a7260 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -26,6 +26,11 @@ NEO4J_URI = 'bolt://hubmap-neo4j-localhost:7687' NEO4J_USERNAME = 'neo4j' NEO4J_PASSWORD = '123' +# Secret value presented with the request header value named by +# SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER, expected to be off the form +# X-HuBMAP-Update-Override: Override-Key +LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = 'set during deployment' + # Set MEMCACHED_MODE to False to disable the caching for local development MEMCACHED_MODE = True MEMCACHED_SERVER = 'host:11211' diff --git a/src/schema/schema_constants.py b/src/schema/schema_constants.py index 30cc0279..e3cfd1a5 100644 --- a/src/schema/schema_constants.py +++ b/src/schema/schema_constants.py @@ -7,6 +7,7 @@ class SchemaConstants(object): COMPONENT_DATASET = 'component-dataset' INGEST_PIPELINE_APP = 'ingest-pipeline' HUBMAP_APP_HEADER = 'X-Hubmap-Application' + LOCKED_ENTITY_UPDATE_HEADER = 'X-HuBMAP-Update-Override' INTERNAL_TRIGGER = 'X-Internal-Trigger' DATASET_STATUS_PUBLISHED = 'published' From f12f91e765465074fb2ea8fe4a75d71141ad0309 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Thu, 3 Apr 2025 13:22:01 -0400 Subject: [PATCH 4/5] Implementation of Code Review requests. New format for override key. Strip validate_application_header_before_entity_create from all before_entity_update_validator entries. Comments added for clarity. Consolidate entity validator args to one dict. Remove block on modifying public Collections from previous Issue, so now blocked by new validator and block can be overridden. --- src/app.py | 22 ++++++---------------- src/instance/app.cfg.example | 2 +- src/schema/provenance_schema.yaml | 17 ++++------------- src/schema/schema_manager.py | 12 ++++++++++-- src/schema/schema_validators.py | 16 ++++++++++++++-- 5 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/app.py b/src/app.py index e9d584bb..e0abe58e 100644 --- a/src/app.py +++ b/src/app.py @@ -93,11 +93,6 @@ # updates to entities with characteristics prohibiting their modification. LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY'] -# Compile the only accept regular expression pattern for the lockout override key in an HTTP Request Header. -# N.B. This is case-insensitive matching "Override-Key", but case-sensitive matching the value. -LOCKED_ENTITY_UPDATE_OVERRIDE_PATTERN = rf"^\s*(?i:override-key)\s+{re.escape(LOCKED_ENTITY_UPDATE_OVERRIDE_KEY)}\s*$" -LOCKED_ENTITY_UPDATE_OVERRIDE_REGEX = re.compile(pattern=LOCKED_ENTITY_UPDATE_OVERRIDE_PATTERN) - # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning) @@ -1372,9 +1367,6 @@ def update_entity(id): # Normalize user provided entity_type normalized_entity_type = schema_manager.normalize_entity_type(entity_dict['entity_type']) - # Note, we don't support entity level validators on entity update via PUT - # Only entity create via POST is supported at the entity level - # KBKBKB...or do we? # Execute entity level validator defined in schema yaml before entity modification. lockout_overridden = False try: @@ -1389,13 +1381,15 @@ def update_entity(id): except schema_errors.LockedEntityUpdateException as leue: # HTTP header names are case-insensitive, and request.headers.get() returns None if the header doesn't exist locked_entity_update_header = request.headers.get(SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER) - if locked_entity_update_header and bool(LOCKED_ENTITY_UPDATE_OVERRIDE_REGEX.match(locked_entity_update_header)): + if locked_entity_update_header and (LOCKED_ENTITY_UPDATE_OVERRIDE_KEY == locked_entity_update_header): lockout_overridden = True logger.info(f"For {entity_dict['entity_type']} {entity_dict['uuid']}" f" update prohibited due to {str(leue)}," f" but being overridden by valid {SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER} in request.") else: forbidden_error(leue) + except Exception as e: + internal_server_error(e) # Validate request json against the yaml schema # Pass in the entity_dict for missing required key check, this is different from creating new entity @@ -1414,6 +1408,9 @@ def update_entity(id): ValueError) as e: bad_request_error(e) + # Proceed with per-entity updates after passing any entity-level or property-level validations which + # would have locked out updates. + # # Sample, Dataset, and Upload: additional validation, update entity, after_update_trigger # Collection and Donor: update entity if normalized_entity_type == 'Sample': @@ -1498,13 +1495,6 @@ def update_entity(id): if has_dataset_uuids_to_link or has_dataset_uuids_to_unlink or has_updated_status: after_update(normalized_entity_type, user_token, merged_updated_dict) elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection'): - entity_visibility = _get_entity_visibility( normalized_entity_type=normalized_entity_type - ,entity_dict=entity_dict) - # Prohibit update of an existing Collection if it meets criteria of being visible to public e.g. has DOI. - if entity_visibility == DataVisibilityEnum.PUBLIC: - logger.info(f"Attempt to update {normalized_entity_type} with id={id} which has visibility {entity_visibility}.") - bad_request_error(f"Cannot update {normalized_entity_type} due '{entity_visibility.value}' visibility.") - # Generate 'before_update_trigger' data and update the entity details in Neo4j merged_updated_dict = update_entity_details(request, normalized_entity_type, user_token, json_data_dict, entity_dict) diff --git a/src/instance/app.cfg.example b/src/instance/app.cfg.example index a01a7260..595ee728 100644 --- a/src/instance/app.cfg.example +++ b/src/instance/app.cfg.example @@ -28,7 +28,7 @@ NEO4J_PASSWORD = '123' # Secret value presented with the request header value named by # SchemaConstants.LOCKED_ENTITY_UPDATE_HEADER, expected to be off the form -# X-HuBMAP-Update-Override: Override-Key +# X-HuBMAP-Update-Override: LOCKED_ENTITY_UPDATE_OVERRIDE_KEY = 'set during deployment' # Set MEMCACHED_MODE to False to disable the caching for local development diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index c9ed7bad..dbb3351b 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -194,8 +194,6 @@ ENTITIES: ############################################# Collection ############################################# Collection: before_entity_update_validator: - # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this - - validate_application_header_before_entity_create # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' - validate_entity_not_locked_before_update excluded_properties_from_public_response: @@ -311,8 +309,6 @@ ENTITIES: # Only allowed applications can create new Dataset via POST before_entity_create_validator: validate_application_header_before_entity_create before_entity_update_validator: - # Only allowed applications can modify an existing Dataset via PUT - - validate_application_header_before_entity_create # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' - validate_entity_not_locked_before_update # Dataset can be either derivation source or target @@ -671,8 +667,6 @@ ENTITIES: # Only allowed applications can create new Publication via POST before_entity_create_validator: validate_application_header_before_entity_create before_entity_update_validator: - # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this - - validate_application_header_before_entity_create # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' - validate_entity_not_locked_before_update # Publications can be either derivation source or target @@ -780,8 +774,6 @@ ENTITIES: - submission_id - label before_entity_update_validator: - # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this - - validate_application_header_before_entity_create # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' - validate_entity_not_locked_before_update properties: @@ -918,8 +910,6 @@ ENTITIES: # Both Sample and Donor ancestors of a Sample must have these fields removed - submission_id before_entity_update_validator: - # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this - - validate_application_header_before_entity_create # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' - validate_entity_not_locked_before_update properties: @@ -1146,7 +1136,10 @@ ENTITIES: Upload: # Only allowed applications can create new Upload via POST before_entity_create_validator: validate_application_header_before_entity_create - # Upload requires an ancestor of Lab, and and has no allowed decesndents + # No before_entity_update_validator needed for Upload because the entity is + # always considered "non-public", and therefore not blocked from update/PUT. + # + # Upload requires a Lab entity as an ancestor, and has no allowed descendants derivation: source: false target: false # Set to false since the schema doesn't handle Lab currently @@ -1295,8 +1288,6 @@ ENTITIES: source: false target: false before_entity_update_validator: - # Only allowed applications can modify an existing Collection via PUT # KBKBKB @TODO confirm introducing this - - validate_application_header_before_entity_create # Halt modification of entities which are "locked", such as a Dataset with status == 'Published' - validate_entity_not_locked_before_update properties: diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 38efa83a..59b11e3a 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1171,6 +1171,8 @@ def validate_json_data_against_schema(json_data_dict, normalized_entity_type, ex One of the normalized entity types defined in the schema yaml: Donor, Sample, Dataset, Upload, Upload, Publication request: Flask request object The instance of Flask request passed in from application request +existing_entity_dict : dict + The dictionary for an entity, retrieved from Neo4j, for use during update/PUT validations """ def execute_entity_level_validator(validator_type, normalized_entity_type, request, existing_entity_dict=None): global _schema @@ -1196,12 +1198,17 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque logger.info(f"To run {validator_type}: {validator_method_name} defined for entity {normalized_entity_type}") + # Create a dictionary to hold data need by any entity validator, which must be populated + # with validator specific requirements when the method to be called is determined. + options_dict = {} if existing_entity_dict is None: # Execute the entity-level validation for create/POST - validator_method_to_call(normalized_entity_type, request) + options_dict['http_request'] = request + validator_method_to_call(options_dict) else: # Execute the entity-level validation for update/PUT - validator_method_to_call(normalized_entity_type, request, existing_entity_dict) + options_dict['existing_entity_dict']= existing_entity_dict + validator_method_to_call(options_dict) except schema_errors.MissingApplicationHeaderException as e: raise schema_errors.MissingApplicationHeaderException(e) except schema_errors.InvalidApplicationHeaderException as e: @@ -1212,6 +1219,7 @@ def execute_entity_level_validator(validator_type, normalized_entity_type, reque msg = f"Failed to call the {validator_type} method: {validator_method_name} defined for entity {normalized_entity_type}" # Log the full stack trace, prepend a line with our message logger.exception(msg) + raise e """ diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index a64d5560..75b96b5c 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -29,7 +29,13 @@ request: Flask request The instance of Flask request passed in from application request """ -def validate_application_header_before_entity_create(normalized_entity_type, request, existing_entity_id): +def validate_application_header_before_entity_create(options_dict): + if 'http_request' in options_dict: + request = options_dict['http_request'] + else: + logger.error(f"validate_application_header_before_entity_create() expected 'http_request' in" + f" options_dict, but it was missing in {str(options_dict)}.") + raise KeyError("Entity validator internal misconfiguration.") # A list of applications allowed to create this new entity or update Dataset and Upload # Use lowercase for comparison applications_allowed = [ @@ -50,7 +56,13 @@ def validate_application_header_before_entity_create(normalized_entity_type, req request: Flask request The instance of Flask request passed in from application request """ -def validate_entity_not_locked_before_update(normalized_entity_type, request, existing_entity_dict): +def validate_entity_not_locked_before_update(options_dict): + if 'existing_entity_dict' in options_dict: + existing_entity_dict = options_dict['existing_entity_dict'] + else: + logger.error(f"validate_entity_not_locked_before_update() expected 'existing_entity_dict' in" + f" options_dict, but it was missing in {str(options_dict)}.") + raise KeyError("Entity validator internal misconfiguration.") _is_entity_locked_against_update(existing_entity_dict) ############################################################################################## From 6d4dc4843599fdf1f6c5c5013f111d36bb8d0557 Mon Sep 17 00:00:00 2001 From: kburke <209327+kburke@users.noreply.github.com> Date: Fri, 4 Apr 2025 15:29:13 -0400 Subject: [PATCH 5/5] Code review request - revised error message when updates locked out for entity. --- src/schema/schema_validators.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 75b96b5c..a3c56492 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -925,20 +925,16 @@ def _is_entity_locked_against_update(existing_entity_dict): entity_type = existing_entity_dict['entity_type'] if entity_type in ['Publication','Dataset']: if 'status' in existing_entity_dict and existing_entity_dict['status'] == 'Published': - raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" - f" status={existing_entity_dict['status']}.") + raise schema_errors.LockedEntityUpdateException(f"Permission denied to change a published/public {entity_type}.") elif entity_type in ['Donor','Sample']: if 'data_access_level' in existing_entity_dict and existing_entity_dict['data_access_level'] == 'public': - raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" - f" data_access_level={existing_entity_dict['data_access_level']}.") + raise schema_errors.LockedEntityUpdateException(f"Permission denied to change a published/public {entity_type}.") elif entity_type in ['Collection','Epicollection']: if 'doi_url' in existing_entity_dict and existing_entity_dict['doi_url']: - raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" - f" doi_url={existing_entity_dict['doi_url']}.") + raise schema_errors.LockedEntityUpdateException(f"Permission denied to change a published/public {entity_type}.") # Probably never get here, since doi_url and registered_doi must be set as a pair. if 'registered_doi' in existing_entity_dict and existing_entity_dict['registered_doi']: - raise schema_errors.LockedEntityUpdateException(f"{entity_type} cannot be modified, due to" - f" registered_doi={existing_entity_dict['registered_doi']}.") + raise schema_errors.LockedEntityUpdateException(f"Permission denied to change a published/public {entity_type}.") else: entity_uuid = existing_entity_dict['uuid'] raise schema_errors.LockedEntityUpdateException(f'Unable to check if {entity_type} for {entity_uuid} is locked!')